notes

uni notes
git clone git://popovic.xyz/notes.git
Log | Files | Refs

sesh6.tex (9080B)


      1 \include{./preamble.tex}
      2 
      3 \begin{document}
      4 \maketitle
      5 \tableofcontents
      6 \section{Sheet 6}
      7 \subsection{Exercise 35}
      8 Let $M \in \mathbb{R}^{n \times  n}$ with $\|M\| < 1$. Show that $I - M$ is
      9 regular and
     10 \begin{equation}
     11     \|\left( I - M \right) \| \le \frac{1}{1-\|M\|}. \label{eq: ineq}
     12 \end{equation}
     13 Suppose $I-M$ is not singular then for $x \in \mathbb{R}^{n}$ we have that
     14 \begin{align}
     15     &\left( I - M \right)x = 0\\
     16     \Leftrightarrow  &Ix - Mx = 0\\
     17     \Leftrightarrow & Mx = x.
     18 \end{align}
     19 But since $\|M\|<1$ then $\forall x \in \mathbb{R}^{n}$ we have that $\|Mx\|
     20 < \|x\|$. This means that
     21 \begin{align}
     22     \text{ker}\left(I-M \right) = \emptyset,
     23 \end{align}
     24 so $I-M$ is regular. The identity on the other hand is derived by the
     25 following observation
     26 \begin{align}
     27     \left( I-M \right)^{-1} - \left( I-M \right)^{-1} M = \left( I -M
     28     \right)\left( I-M \right)^{-1} = I,
     29 \end{align}
     30 Then we calculate
     31 \begin{align}
     32     \|\left(I-M \right)^{-1}\|
     33     &= \|I+\left( I-M \right)^{-1} M\|\\
     34     &\le \|I\| + \|(I-M)^{-1}\|\|M\|,
     35 \end{align}
     36 rearranging gives
     37 \begin{align}
     38     \|\left( I-M \right)^{-1}\| - \|(I-M)^{-1}\| \le \|I\|\\
     39     \|(I-M)^{-1}\|\left(1-\|M\|  \right)  \le 1\\
     40     \|\left( I-M \right)^{-1}\| \le \frac{1}{1-\|M\|}.
     41 \end{align}
     42 Now let $A,B \in \mathbb{R}^{n \times n}$ with  $\|I-BA\|<1$. Show that $A$
     43 and $B$ are regular and that
     44 \begin{align}
     45     \|B^{-1}\| \le \frac{\|A\|}{1-\|I-BA\|}\\
     46     \|A^{-1}\| \le \frac{\|B\|}{1-\|I-BA\|}\\
     47 \end{align}
     48 We know that for $M \in \mathbb{R}^{n \times  n}$ with $\|M\|<1$ then $I-M$
     49 is regular and the inequality in \ref{eq: ineq} holds. Set $M =
     50 I-BA$ then $I-M=AB$ is regular. Because $AB$ is regular so is $A$ and $B$.
     51 Now note that for all regular matrices we have that $\|A^{-1}\|\le
     52 \|A\|^{-1}$. Furthermore
     53 \begin{align}
     54     \|\left(AB  \right)^{-1}\| \le \|B^{-1}\|\|A^{-1}\|.
     55 \end{align}
     56 Then for $A$ we have
     57 \begin{align}
     58     \|A^{-1}\|\le \frac{1}{\|B^{-1}\|}\frac{1}{1-\|I-BA\|}\le
     59     \frac{\|B\|}{1-\|I-BA\|}.
     60 \end{align}
     61 and for $B$
     62 \begin{align}
     63     \|B^{-1}\|\le \frac{1}{\|A^{-1}\|}\frac{1}{1-\|I-BA\|}\le
     64     \frac{\|A\|}{1-\|I-BA\|}.
     65 \end{align}
     66 \subsection{Exercise 36}
     67 Let $f:\mathbb{R}^{2}\to \mathbb{R}$, $f(x, y) = x^4 + 2x^2y^2 + y^4$. Show
     68 that the local Newton algorithm converges to the unique global minimum of
     69 $f$ for every $(x^0, y^0) \in \mathbb{R}^{2}\backslash\{(0, 0)^T\}$.
     70 First we determine the minimum $x^*$. Note that $f(x, y) = \left( x^2 + y^2
     71 \right)^2 \ge 0$ for all $(x, y)^T \in \mathbb{R}^{2}$. Since $f$ is strongly convex the
     72 only minimum, which is the global minimum is $(x, y)^T = (0 ,0)^T$. The
     73 Hessian of $f$ is
     74 \begin{align}
     75     \nabla^2 f(x,y) = \begin{pmatrix}12x^2+4y^2 & 8xy\\ 8xy & 12y^2 + 4x^2
     76     \end{pmatrix}.
     77 \end{align}
     78 Now note that the Hessian at the minimum $\nabla^2f(0,0)$ is the zero matrix
     79 which is singular. But considering starting vectors $(x, y)^T \neq (0,
     80 0)^T$, all we need in the local Newton algorithm is the solution to the equation
     81 $\nabla^2f(x^k)d_k = -\nabla f(x^k)$. Meaning we need to show that
     82 $\nabla^2f(x,y)$ is regular for all $(x, y)^T \neq (0,0)^T$, in this
     83 case we look at the determinant of the Hessian
     84 \begin{align}
     85     \det(\nabla^2 f(x,y)) = 48*(x^2 + y^2)^2 > 0 \;\; \forall (x, y)^T \neq (0,
     86     0)^T.
     87 \end{align}
     88 This means that the sequence $(x^k)_{k\ge 0}$ produced by the local newton
     89 algorithm converges to the unique global minimum of $f$ given by $(0, 0)^T$.
     90 Indeed if we calculate the solution of the system $\nabla^2f(x,y)d =
     91 -\nabla f(x,y)$ we get that $d = (\frac{x}{3}, \frac{y}{3})^T$.
     92 
     93 \subsection{Exercise 37}
     94 Show that the local Newton Algorithm is invariant to affine-linear
     95 transformation, for a regular matrix $A \in \mathbb{R}^{n \times n}$ and $c
     96 \in \mathbb{R}^{n}$, $(x^k)_{k\ge 0}$ the sequence generated by the local
     97 Newton algorithm for minimizing $f$ with starting vector $x^0$. Then let
     98 $(y^k)_{k\ge 0}$ the sequence generated by the local Newton algorithm for the
     99 function $g(y) := f(Ay +c)$ with starting vector $y^0$, then
    100 \begin{align}
    101     x^0 = Ay^0 + c \;\; \implies  x^k = Ay^k + c \;\;\; \forall k \ge 0.
    102 \end{align}
    103 First of all we calculate the gradient and the hessian for $g$
    104 \begin{align}
    105     \nabla g(y) = \nabla f(Ay+c) = A^T \nabla f(Ay+c)\\
    106     \nabla^2 g(y) = \nabla^2 f(Ay+c) = A^T \nabla^2 f(Ay+c) A\\
    107 \end{align}
    108 Now we need to prove that $x^{k+1} = Ay^{k+1} + c$
    109 \begin{align}
    110     x^{k+1}q
    111     &= x^{k} + d_k = x^{k} - \left( \nabla^2 f(x^{k})\right)^{-1}
    112     \nabla f(x_k)\\
    113     &=Ay^k + c - \left( \nabla^2(f(Ay^{k}+c )\right)^{-1} \nabla f(Ay^{k}+c)\\
    114     &=Ay^{k} + c - A A^{-1}\left( \nabla^2(f(Ay^{k}+c )\right)^{-1} \nabla f(Ay^{k}+c)\\
    115     &=Ay^{k} + c - A A^{-1} A^T A^{-T}\left( \nabla^2(f(Ay^{k}+c )\right)^{-1} \nabla f(Ay^{k}+c)\\
    116     &=A\left(y^{k} - \left( A^{T}\nabla^2 f(Ay^{k}+c)A\right)^{-1}A^T \nabla
    117     f(Ay^{k}+c ) \right)  +c \\
    118     &= Ay^{k+1} +c.
    119 \end{align}
    120 by that the induction is finished.
    121 \subsection{Exercise 38}
    122 Let $M \in \mathbb{R}^{n \times n}$ be a regular matrix and $\{M\}_{k\ge 0}
    123 \in \mathbb{R}^{n \times n}$ a sequence of matrices which converge to M as $k
    124 \to \infty $. Sow that there exists a $k_0 \ge 0$ such that $M_k$ is regular
    125 for all $k\ge k_0$ and the sequence $\{M_k^{-1}\}_{k\ge 0}$ converges to
    126 $M^{-1}$.
    127 \newline
    128 The map $M \to M^{-1}$ is a continuous invertible meaning it is monotone.
    129 $M^{-1} = \frac{\text{adj}(M)}{\text{det}(M)}$. Then convergence means that
    130 there is a $k\ge k_0$ such that for all $M_k \in B_{\frac{1}{k}}(M)$ we have
    131 that $\|M_k -M\| < \frac{1}{k}$ then $M_k$ is sufficiently close to $M$ and
    132 so regular. Since $\{M_k\}_{k\ge k_0} \cup {M}$ is a compact set
    133 of invertible matrices so is $\{M_k^{-1}\}_{k\ge k_0} \cup {M^{-1}}$,
    134 meaning it is bounded. This means that $\{M^{-1}_k\}_{k\ge k_0}$ converges to
    135 $M^{-1}$.
    136 \subsection{Exercise 39}
    137 Let $H \in \mathbb{R}^{n \times n}$ be regular $u, v \in \mathbb{R}^{n}$
    138 arbitrary. Show that $H+uv^T$ regular $\Leftrightarrow$  $1+v^TH^{-1}u \neq
    139 0$, then the Sherman-Morrison formula holds
    140 \begin{align}
    141     \left( H + uv^T \right)^{-1} = \left( I - \frac{1}{1-v^TH^{-1} u} H^{-1}
    142     uv^T\right)H^{-1} \label{eq: smf}
    143 \end{align}
    144 Let $1+ v^TH^{-1}u =0$ then
    145 \begin{align}
    146     \text{det}\left( H + uv^T \right) =(1+v^T H^{-1} u)\text{det}(H) = 0.
    147 \end{align}
    148 This means that $H$ is not invertible. Now we need to check if the inverse
    149 really holds which is done by simply multiplying
    150 \begin{align}
    151     &\left( H+uv^{T} \right) \left( H^{-1} - \frac{H^{-1} uv^T
    152     H^{-1}}{1+v^{T}H^{-1}u} \right)  = \\
    153     &=H H^{-1} + uv^T H^{-1}  - H \frac{H^{-1}uv^T H^{-1}}{1+v^TH^{-1}u}
    154     uv^{T}\frac{H^{-1}uv^TH^{-1}}{1+v^{T}H^{-1}u}\\
    155     &=I + uv^T H^{-1} - \frac{uv^T H^{-1} + uv^T H^{-1}uv^T
    156     H^{-1}}{1+v^TH^{-1}u}\\
    157     &=I+uv^TH^{-1} - \frac{u\left( 1+v^{T}H^{-1}u \right)
    158     v^{T}H^{-1}}{1+v^{T}H^{-1} u}\\
    159     &=I + uv^{T}H^{-1} - uv^{T}H^{-1} \\
    160     &= I
    161 \end{align}
    162 Since these are square matrices $AB=I$ is the same as $BA=I$.
    163 \subsection{Exercise 40}
    164 Consider the quadratic optimization problem
    165 \begin{align}
    166     \min \quad &f(x):=\gamma + c^{T}x + \frac{1}{2}x^{T}Qx, \label{eq: opp}\\
    167     \text{s.t}\quad &h(x) := b^{T}x = 0, \nonumber
    168 \end{align}
    169 with $Q \in \mathbb{R}^{n \times n}$ SPD , $b, c \in \mathbb{R}^{n}$, $b\neq
    170 0$ and $\gamma \in \mathbb{R}$. For a given $\alpha > 0$ find the minimum
    171 $x^*(\alpha)$ of the penalty function
    172 \begin{align}
    173     P(x;\alpha) := f(x) + \frac{\alpha}{2}\left( h(x) \right)^{2}
    174 \end{align}
    175 determine $x^* := \lim_{\alpha \to \infty}x^{*}(\alpha)$ and prove that
    176 $x^{*}$ is a unique optimal solution of the optimization problem in \ref{eq:
    177 opp}. We start with calculating the minimum of $P(x(\alpha))$
    178 \begin{align}
    179     \nabla P(x(\alpha))
    180     &= \nabla f(x) + \frac{\alpha}{2}\nabla h(x)^{2}\\
    181     &= c + Qx + \frac{\alpha}{2} 2 h(x) \nabla h(x) \\
    182     &=c  + Qx + \alpha b^T x b = 0\\
    183     \nonumber\\
    184     Qx + \alpha b b ^T x = -c \\
    185     \left( Q + \alpha b b^T \right) x = -c.
    186 \end{align}
    187 Using the Sherman-Morrison formula in \ref{eq: smf} for $H = Q$, $u=\alpha
    188 b$ and $v = b$ we get
    189 \begin{align}
    190     x^{*}(\alpha) = \left( \frac{\alpha}{1+\alpha b^{T}Q^{-1}b}Q^{-1} bb^{T}
    191     -I\right) Q^{-1} c
    192 \end{align}
    193 The limit is then (the standard limit $\frac{x}{1+kx} \to \frac{1}{k}$ as $x$
    194 goes to infinity)
    195 \begin{align}
    196     x^{*} &= \lim_{\alpha \to \infty}x^{*}(\alpha)\\
    197           &= \left(\frac{Q^{-1}bb^{T}}{b^{T}Q^{-1}b} -I \right) Q^{-1} c.
    198 \end{align}
    199 To show that $x^{*}$ is a unique solution of the optimization problem
    200 \ref{eq: opp} we need to show that it satisfies the optimality condition and
    201 that it is unique. Now it is unique because $Q$ is SPD meaning it is regular
    202 and invertible and $\nabla^2 f = Q>0$. Further more $(x^{*}, \alpha)$ is a
    203 KKT point of $P(x, \alpha)$ then $x^{*}$ is a minumum of the optimization
    204 problem. Now we show that $b^{T}x^{*} = 0$:
    205 \begin{align}
    206     b^{T}x^{*} &= \left(\frac{b^{T}Q^{-1}bb^{T}Q^{-1}}{b^{T}Q^{-1}b}-
    207     b^{T}Q^{-1} \right) c \\
    208     &= \left( b^{T}Q - b^{T}Q \right) c \\
    209     &= 0.
    210 \end{align}
    211 \end{document}
    212 
    213