sesh6.tex (9080B)
1 \include{./preamble.tex} 2 3 \begin{document} 4 \maketitle 5 \tableofcontents 6 \section{Sheet 6} 7 \subsection{Exercise 35} 8 Let $M \in \mathbb{R}^{n \times n}$ with $\|M\| < 1$. Show that $I - M$ is 9 regular and 10 \begin{equation} 11 \|\left( I - M \right) \| \le \frac{1}{1-\|M\|}. \label{eq: ineq} 12 \end{equation} 13 Suppose $I-M$ is not singular then for $x \in \mathbb{R}^{n}$ we have that 14 \begin{align} 15 &\left( I - M \right)x = 0\\ 16 \Leftrightarrow &Ix - Mx = 0\\ 17 \Leftrightarrow & Mx = x. 18 \end{align} 19 But since $\|M\|<1$ then $\forall x \in \mathbb{R}^{n}$ we have that $\|Mx\| 20 < \|x\|$. This means that 21 \begin{align} 22 \text{ker}\left(I-M \right) = \emptyset, 23 \end{align} 24 so $I-M$ is regular. The identity on the other hand is derived by the 25 following observation 26 \begin{align} 27 \left( I-M \right)^{-1} - \left( I-M \right)^{-1} M = \left( I -M 28 \right)\left( I-M \right)^{-1} = I, 29 \end{align} 30 Then we calculate 31 \begin{align} 32 \|\left(I-M \right)^{-1}\| 33 &= \|I+\left( I-M \right)^{-1} M\|\\ 34 &\le \|I\| + \|(I-M)^{-1}\|\|M\|, 35 \end{align} 36 rearranging gives 37 \begin{align} 38 \|\left( I-M \right)^{-1}\| - \|(I-M)^{-1}\| \le \|I\|\\ 39 \|(I-M)^{-1}\|\left(1-\|M\| \right) \le 1\\ 40 \|\left( I-M \right)^{-1}\| \le \frac{1}{1-\|M\|}. 41 \end{align} 42 Now let $A,B \in \mathbb{R}^{n \times n}$ with $\|I-BA\|<1$. Show that $A$ 43 and $B$ are regular and that 44 \begin{align} 45 \|B^{-1}\| \le \frac{\|A\|}{1-\|I-BA\|}\\ 46 \|A^{-1}\| \le \frac{\|B\|}{1-\|I-BA\|}\\ 47 \end{align} 48 We know that for $M \in \mathbb{R}^{n \times n}$ with $\|M\|<1$ then $I-M$ 49 is regular and the inequality in \ref{eq: ineq} holds. Set $M = 50 I-BA$ then $I-M=AB$ is regular. Because $AB$ is regular so is $A$ and $B$. 51 Now note that for all regular matrices we have that $\|A^{-1}\|\le 52 \|A\|^{-1}$. Furthermore 53 \begin{align} 54 \|\left(AB \right)^{-1}\| \le \|B^{-1}\|\|A^{-1}\|. 55 \end{align} 56 Then for $A$ we have 57 \begin{align} 58 \|A^{-1}\|\le \frac{1}{\|B^{-1}\|}\frac{1}{1-\|I-BA\|}\le 59 \frac{\|B\|}{1-\|I-BA\|}. 60 \end{align} 61 and for $B$ 62 \begin{align} 63 \|B^{-1}\|\le \frac{1}{\|A^{-1}\|}\frac{1}{1-\|I-BA\|}\le 64 \frac{\|A\|}{1-\|I-BA\|}. 65 \end{align} 66 \subsection{Exercise 36} 67 Let $f:\mathbb{R}^{2}\to \mathbb{R}$, $f(x, y) = x^4 + 2x^2y^2 + y^4$. Show 68 that the local Newton algorithm converges to the unique global minimum of 69 $f$ for every $(x^0, y^0) \in \mathbb{R}^{2}\backslash\{(0, 0)^T\}$. 70 First we determine the minimum $x^*$. Note that $f(x, y) = \left( x^2 + y^2 71 \right)^2 \ge 0$ for all $(x, y)^T \in \mathbb{R}^{2}$. Since $f$ is strongly convex the 72 only minimum, which is the global minimum is $(x, y)^T = (0 ,0)^T$. The 73 Hessian of $f$ is 74 \begin{align} 75 \nabla^2 f(x,y) = \begin{pmatrix}12x^2+4y^2 & 8xy\\ 8xy & 12y^2 + 4x^2 76 \end{pmatrix}. 77 \end{align} 78 Now note that the Hessian at the minimum $\nabla^2f(0,0)$ is the zero matrix 79 which is singular. But considering starting vectors $(x, y)^T \neq (0, 80 0)^T$, all we need in the local Newton algorithm is the solution to the equation 81 $\nabla^2f(x^k)d_k = -\nabla f(x^k)$. Meaning we need to show that 82 $\nabla^2f(x,y)$ is regular for all $(x, y)^T \neq (0,0)^T$, in this 83 case we look at the determinant of the Hessian 84 \begin{align} 85 \det(\nabla^2 f(x,y)) = 48*(x^2 + y^2)^2 > 0 \;\; \forall (x, y)^T \neq (0, 86 0)^T. 87 \end{align} 88 This means that the sequence $(x^k)_{k\ge 0}$ produced by the local newton 89 algorithm converges to the unique global minimum of $f$ given by $(0, 0)^T$. 90 Indeed if we calculate the solution of the system $\nabla^2f(x,y)d = 91 -\nabla f(x,y)$ we get that $d = (\frac{x}{3}, \frac{y}{3})^T$. 92 93 \subsection{Exercise 37} 94 Show that the local Newton Algorithm is invariant to affine-linear 95 transformation, for a regular matrix $A \in \mathbb{R}^{n \times n}$ and $c 96 \in \mathbb{R}^{n}$, $(x^k)_{k\ge 0}$ the sequence generated by the local 97 Newton algorithm for minimizing $f$ with starting vector $x^0$. Then let 98 $(y^k)_{k\ge 0}$ the sequence generated by the local Newton algorithm for the 99 function $g(y) := f(Ay +c)$ with starting vector $y^0$, then 100 \begin{align} 101 x^0 = Ay^0 + c \;\; \implies x^k = Ay^k + c \;\;\; \forall k \ge 0. 102 \end{align} 103 First of all we calculate the gradient and the hessian for $g$ 104 \begin{align} 105 \nabla g(y) = \nabla f(Ay+c) = A^T \nabla f(Ay+c)\\ 106 \nabla^2 g(y) = \nabla^2 f(Ay+c) = A^T \nabla^2 f(Ay+c) A\\ 107 \end{align} 108 Now we need to prove that $x^{k+1} = Ay^{k+1} + c$ 109 \begin{align} 110 x^{k+1}q 111 &= x^{k} + d_k = x^{k} - \left( \nabla^2 f(x^{k})\right)^{-1} 112 \nabla f(x_k)\\ 113 &=Ay^k + c - \left( \nabla^2(f(Ay^{k}+c )\right)^{-1} \nabla f(Ay^{k}+c)\\ 114 &=Ay^{k} + c - A A^{-1}\left( \nabla^2(f(Ay^{k}+c )\right)^{-1} \nabla f(Ay^{k}+c)\\ 115 &=Ay^{k} + c - A A^{-1} A^T A^{-T}\left( \nabla^2(f(Ay^{k}+c )\right)^{-1} \nabla f(Ay^{k}+c)\\ 116 &=A\left(y^{k} - \left( A^{T}\nabla^2 f(Ay^{k}+c)A\right)^{-1}A^T \nabla 117 f(Ay^{k}+c ) \right) +c \\ 118 &= Ay^{k+1} +c. 119 \end{align} 120 by that the induction is finished. 121 \subsection{Exercise 38} 122 Let $M \in \mathbb{R}^{n \times n}$ be a regular matrix and $\{M\}_{k\ge 0} 123 \in \mathbb{R}^{n \times n}$ a sequence of matrices which converge to M as $k 124 \to \infty $. Sow that there exists a $k_0 \ge 0$ such that $M_k$ is regular 125 for all $k\ge k_0$ and the sequence $\{M_k^{-1}\}_{k\ge 0}$ converges to 126 $M^{-1}$. 127 \newline 128 The map $M \to M^{-1}$ is a continuous invertible meaning it is monotone. 129 $M^{-1} = \frac{\text{adj}(M)}{\text{det}(M)}$. Then convergence means that 130 there is a $k\ge k_0$ such that for all $M_k \in B_{\frac{1}{k}}(M)$ we have 131 that $\|M_k -M\| < \frac{1}{k}$ then $M_k$ is sufficiently close to $M$ and 132 so regular. Since $\{M_k\}_{k\ge k_0} \cup {M}$ is a compact set 133 of invertible matrices so is $\{M_k^{-1}\}_{k\ge k_0} \cup {M^{-1}}$, 134 meaning it is bounded. This means that $\{M^{-1}_k\}_{k\ge k_0}$ converges to 135 $M^{-1}$. 136 \subsection{Exercise 39} 137 Let $H \in \mathbb{R}^{n \times n}$ be regular $u, v \in \mathbb{R}^{n}$ 138 arbitrary. Show that $H+uv^T$ regular $\Leftrightarrow$ $1+v^TH^{-1}u \neq 139 0$, then the Sherman-Morrison formula holds 140 \begin{align} 141 \left( H + uv^T \right)^{-1} = \left( I - \frac{1}{1-v^TH^{-1} u} H^{-1} 142 uv^T\right)H^{-1} \label{eq: smf} 143 \end{align} 144 Let $1+ v^TH^{-1}u =0$ then 145 \begin{align} 146 \text{det}\left( H + uv^T \right) =(1+v^T H^{-1} u)\text{det}(H) = 0. 147 \end{align} 148 This means that $H$ is not invertible. Now we need to check if the inverse 149 really holds which is done by simply multiplying 150 \begin{align} 151 &\left( H+uv^{T} \right) \left( H^{-1} - \frac{H^{-1} uv^T 152 H^{-1}}{1+v^{T}H^{-1}u} \right) = \\ 153 &=H H^{-1} + uv^T H^{-1} - H \frac{H^{-1}uv^T H^{-1}}{1+v^TH^{-1}u} 154 uv^{T}\frac{H^{-1}uv^TH^{-1}}{1+v^{T}H^{-1}u}\\ 155 &=I + uv^T H^{-1} - \frac{uv^T H^{-1} + uv^T H^{-1}uv^T 156 H^{-1}}{1+v^TH^{-1}u}\\ 157 &=I+uv^TH^{-1} - \frac{u\left( 1+v^{T}H^{-1}u \right) 158 v^{T}H^{-1}}{1+v^{T}H^{-1} u}\\ 159 &=I + uv^{T}H^{-1} - uv^{T}H^{-1} \\ 160 &= I 161 \end{align} 162 Since these are square matrices $AB=I$ is the same as $BA=I$. 163 \subsection{Exercise 40} 164 Consider the quadratic optimization problem 165 \begin{align} 166 \min \quad &f(x):=\gamma + c^{T}x + \frac{1}{2}x^{T}Qx, \label{eq: opp}\\ 167 \text{s.t}\quad &h(x) := b^{T}x = 0, \nonumber 168 \end{align} 169 with $Q \in \mathbb{R}^{n \times n}$ SPD , $b, c \in \mathbb{R}^{n}$, $b\neq 170 0$ and $\gamma \in \mathbb{R}$. For a given $\alpha > 0$ find the minimum 171 $x^*(\alpha)$ of the penalty function 172 \begin{align} 173 P(x;\alpha) := f(x) + \frac{\alpha}{2}\left( h(x) \right)^{2} 174 \end{align} 175 determine $x^* := \lim_{\alpha \to \infty}x^{*}(\alpha)$ and prove that 176 $x^{*}$ is a unique optimal solution of the optimization problem in \ref{eq: 177 opp}. We start with calculating the minimum of $P(x(\alpha))$ 178 \begin{align} 179 \nabla P(x(\alpha)) 180 &= \nabla f(x) + \frac{\alpha}{2}\nabla h(x)^{2}\\ 181 &= c + Qx + \frac{\alpha}{2} 2 h(x) \nabla h(x) \\ 182 &=c + Qx + \alpha b^T x b = 0\\ 183 \nonumber\\ 184 Qx + \alpha b b ^T x = -c \\ 185 \left( Q + \alpha b b^T \right) x = -c. 186 \end{align} 187 Using the Sherman-Morrison formula in \ref{eq: smf} for $H = Q$, $u=\alpha 188 b$ and $v = b$ we get 189 \begin{align} 190 x^{*}(\alpha) = \left( \frac{\alpha}{1+\alpha b^{T}Q^{-1}b}Q^{-1} bb^{T} 191 -I\right) Q^{-1} c 192 \end{align} 193 The limit is then (the standard limit $\frac{x}{1+kx} \to \frac{1}{k}$ as $x$ 194 goes to infinity) 195 \begin{align} 196 x^{*} &= \lim_{\alpha \to \infty}x^{*}(\alpha)\\ 197 &= \left(\frac{Q^{-1}bb^{T}}{b^{T}Q^{-1}b} -I \right) Q^{-1} c. 198 \end{align} 199 To show that $x^{*}$ is a unique solution of the optimization problem 200 \ref{eq: opp} we need to show that it satisfies the optimality condition and 201 that it is unique. Now it is unique because $Q$ is SPD meaning it is regular 202 and invertible and $\nabla^2 f = Q>0$. Further more $(x^{*}, \alpha)$ is a 203 KKT point of $P(x, \alpha)$ then $x^{*}$ is a minumum of the optimization 204 problem. Now we show that $b^{T}x^{*} = 0$: 205 \begin{align} 206 b^{T}x^{*} &= \left(\frac{b^{T}Q^{-1}bb^{T}Q^{-1}}{b^{T}Q^{-1}b}- 207 b^{T}Q^{-1} \right) c \\ 208 &= \left( b^{T}Q - b^{T}Q \right) c \\ 209 &= 0. 210 \end{align} 211 \end{document} 212 213