notes

uni notes
git clone git://popovic.xyz/notes.git
Log | Files | Refs

sesh1.tex (13243B)


      1 \include{./preamble.tex}
      2 
      3 \begin{document}
      4 \maketitle
      5 \tableofcontents
      6 \section{Sheet 1}
      7 \subsection{Exercise 1}
      8 Let $X \subseteq \mathbb{R}^{n}$ be an nonempty convex set and
      9 $f:\mathbb{R}^{n} \to \mathbb{R}$ a convex function. Show that every local
     10 minimum of $f$ w.r.t. $X$ is a global minimum of $f$ w.r.t. $X$.
     11 \newline
     12 
     13 Let $x^{*} \in X$ be the local minimum of $f$ w.r.t. $X$. Then there is an
     14 $\varepsilon$-ball $B_{\varepsilon}(x^{*}) = \{x \in X:\ \|x^{*}- x\|\le
     15 \varepsilon\}$ such that $f(x^{*}) \le f(x)$ for all $x \in
     16 B_\varepsilon(x^{*}) \cap X$. Now suppose $x^{*}$ is not a global minimum
     17 then there is a $x_{0} \in X$ such that $f(x_{0}) < f(x^{*})$. Since $X$ is
     18 convex there is a line connecting $x_{0}$ and $x^{*}$ in $X$. This line has
     19 elements of the form
     20 \begin{align}
     21     L = \left\{\left( 1-\lambda \right)x_0 - \lambda x^{*}:\ \lambda \in[0,1]
     22     \right\}
     23     \subseteq X
     24 \end{align}
     25 Now for all $z \in L$ with $z = (1-\lambda)x_0 - \lambda x^{*}$ ($\forall
     26 \lambda \in [0, 1]$) it holds that
     27 \begin{align}
     28     &\|x^{*}- (1-\lambda)x_0 - \lambda x^{*}\| \le \varepsilon \\
     29     &(1-\lambda) \|x^{*}- x_{0}\| \le \varepsilon \; \Rightarrow \; \lambda
     30     \simeq 1
     31 \end{align}
     32 and
     33 \begin{align}
     34     f\left( \left( 1-\lambda \right)x_{0} +\lambda x^{} \right)
     35     &\le \lambda f(x^{*}) + (1-\lambda) f(x_0)\\
     36     &< \lambda f(x_0) + (1-\lambda)f(x_0)\\
     37     &= f(x_0)
     38 \end{align}
     39 This means that we found a $\tilde{x} = \lambda x^{*} - (1-\lambda)x_0 \in
     40 B_\varepsilon(x^{*})$ such that $f(\tilde{x}) < f(x^{*})$ which is a
     41 contradiction since $x^{*}$ is a local minimum in $B_\varepsilon(x^{*})$.
     42 This means that $x^{*}$ is a global minimum of $f$ w.r.t $X$.
     43 \subsection{Exercise 2}
     44 Let $X \subseteq \mathbb{R}^{n}$ be a nonempty, $x_0 \in X$. Show that
     45 \begin{enumerate}
     46     \item $T_X(x_0)$ is a nonempty closed cone
     47     \item If $X$ is convex, then $T_X(x_0) = \text{cl}\left( \bigcup_{\lambda
     48         \ge 0 } \lambda \left( X - x_0 \right)  \right)$
     49     \item If $X$ is convex then $\left( T_X(x_0))^{*} = - N_X(x_0) \right) $
     50 \end{enumerate}
     51 The statements will be proven in chronological order. Starting with $1.$ the
     52 Bouligand tangent cone to $X$ is defined as
     53 \begin{align}
     54     T_X(x_0) = \{d \in \mathbb{R}^{n}: \exists (x^{k})_{k\ge 0}\subset X, \exists
     55     (t_k)_{k\ge 0}\searrow 0 :\ \frac{x^{k} -x_0}{t_k}\to d\}
     56 \end{align}
     57 Now $T_X(x_0)$ is nonempty since for $x^{k} = x_0$ for all $k$ and a sequence $t_k =
     58 \frac{1}{k}$ we have
     59 \begin{align}
     60     \frac{x^{k}-x_0}{t_k} \to 0 \in T_X(x_0)
     61 \end{align}
     62 To show that $T_X(x_0)$ is closed, consider a sequence $(d_k)_{k\ge 0}\subset
     63 T_X(x_0)$ with convergence point $d \in \mathbb{R}^{n}$. To show that it is closed we
     64 need to show that $d \in T_X(x_0)$. So for all $d_n \in (d_k)_{k\ge 0}
     65 \subset T_X(x_0)$ there exists a sequence $(x^{n,k})_{k\ge 0} \subset X$ and
     66 a sequence $(t_{n, k})_{k\ge 0}$ with $t_{n, k} \searrow 0$ as $k \to
     67 \infty$ such that
     68 \begin{align}
     69     &\frac{x^{n,k}-x_0}{t_{n,k}} \to d_n \;\;\; \forall n\ge 0,\\
     70     &\text{and}\nonumber\\
     71     &\lim_{n\to \infty}\lim_{k \to \infty}\frac{x^{n,k}-x_0}{t_{n,k}} = d.
     72 \end{align}
     73 Then there exist $K_\varepsilon$ and $N_\delta$ for $\varepsilon,\delta >0$
     74 such that
     75 \begin{align}
     76     &\|d_k - d\|\le \varepsilon \;\; \forall k\ge K_\varepsilon,\\
     77     &\|\frac{x^{n,K_\varepsilon}-x_0}{t_{n,K_\varepsilon}} -
     78     d_{K_\varepsilon}\| \le \delta \;\; \forall n \ge N_\delta.
     79 \end{align}
     80 To conclude the proof consider
     81 \begin{align}
     82     &\|\frac{x^{N_\delta,K_\varepsilon}-x_0}{t_{N_\delta,K_\varepsilon}} -
     83     d\|\\
     84     &= \|\left(\frac{x^{N_\delta,K_\varepsilon}-x_0}{t_{N_\delta,K_\varepsilon}} -
     85     d_{K_\varepsilon}\right) + \left( d_{K_\varepsilon} - d\right) \|\\
     86     &\le \varepsilon + \delta
     87 \end{align}
     88 Meaning that $d \in T_X(x_0)$.
     89 Then
     90 $T_X(x_0)$  is really a cone because $\forall d \in T_X(x_0)$ and $\lambda
     91 >0$ we have that $\lambda d \in T_X(x_0)$ by the choice $t_{k,\lambda}=
     92 \frac{1}{\lambda}t_k$
     93 \begin{align}
     94     \frac{x^{k}- x}{t_{k,\lambda}} = \lambda \frac{x^{k}-x_0}{t_k}\to
     95     \lambda d \in T_X(x_0)
     96 \end{align}
     97 For number 2 additionally $X$ is a convex set. And by
     98 definition of a cone $\bigcup_{\lambda \ge 0}(X-x_0)$ is a cone. And the
     99 union of convex sets is also convex the set
    100 $\text{cl}\left(\bigcup_{\lambda\ge_0} \lambda(X-x_0 ) \right)$ is also.
    101 convex.
    102 For number 3 we simply calculate
    103 \begin{align}
    104     -\left( T_X(x_0) \right)^{*}
    105     &= -\{s \in R^{n}: s^{T}d \ge 0 \forall d \in T_X(x_0)\}\\
    106     &= \{s \in R^{n}: s^{T}d \le 0 \forall d \in T_X(x_0)\}\\
    107     &= \{s \in R^{n}: s^{T}(x-x_0) \le 0 \forall x \in X\}
    108 \end{align}
    109 Since $forall x \in X$ there exists an appropriate sequence converging to $x$
    110 subjected to the tangent cone elements.
    111 \subsection{Exercise 3}
    112 Let $X \subseteq \mathbb{R}^{n}$ be nonempty and $\text{dist}_X: \mathbb{R}^{n}
    113 \to \mathbb{R}$, with $\text{dist}_X(y) = \inf \{\|y-x\|: x\in X\}$. Then consider the
    114 directional derivative of a function $f: \mathbb{R}^{n}\to \mathbb{R}$ at
    115 direction $d$ at $x_0 \in \mathbb{R}^{n}$
    116 \begin{align}
    117     f'(x_0; d) = \lim_{t \downarrow 0} \frac{f(x_0 + t d) - f(x_0)}{t}
    118 \end{align}
    119 Show that if $X \subseteq \mathbb{R}^{n}$ is nonempty and convex then the
    120 tangent cone can be written as
    121 \begin{align}
    122     T_X(x_0) = \{d \in \mathbb{R}^{n}:\; \left( \ \text{dis}t_X
    123     \right)'(x_0;d) = 0\}.
    124 \end{align}
    125 First we note
    126 \begin{align}
    127     (\text{dist}_X)'(x_0;d) = \lim_{t \downarrow 0} \frac{\text{dist}_X(x_0+t
    128     d)}{t} = 0,
    129 \end{align}
    130 is true for all $x_0 + t d \in X$. Since $X$ is convex we have that
    131 \begin{align}
    132     T_X(x_0) &= \text{cl}\left( \bigcup_{\lambda}\left( X-x_0 \right)  \right)
    133     \\
    134              &= \text{cl}\left( \{\lambda (x - x_0): x\in X, \lambda \ge 0\}  \right)
    135 \end{align}
    136 Then $(\text{dist}_X)'(x_0;d) = 0$ holds only for vectors of the form
    137 $d = \lambda (x - x_0)$, $\lambda \ge 0$ and $x \in X$.
    138 \subsection{Exercise 4}
    139 We consider the general constrained optimization problem
    140 \begin{align}
    141     \text{min}\quad & f(x),\\
    142     \text{s.t.}\quad & g_i(x) \le 0, i=0,\ldots,m\nonumber\\
    143     &h_j(x) = 0, j=0,\ldots,p\nonumber\\
    144     & x \in \mathbb{R}^{n}\nonumber
    145 \end{align}
    146 for $f, g_i, h_j: \mathbb{R}^{n}\to \mathbb{R}$, $i=0,\ldots,m$ and
    147 $j=0,\ldots,p$ continuously differentiable. Let $x_0$ be a feasible element
    148 of the problem and
    149 \begin{align}
    150     X_{\text{lin}} := \{ x \in \mathbb{R}^{n}:\quad
    151     &g_i(x_0) + \nabla g_i(x_0)^{T}(x-x_0), i=0,\ldots,m\\
    152     &h_j(x_0) + \nabla h_j(x_0)^{T}(x-x_0), j=0,\ldots,p\}.
    153 \end{align}
    154 Show that $T_\text{lin}(x_0) = T_{X_\text{lin}}(x_0)$.
    155 \newline
    156 For the reminder
    157 \begin{align}
    158     T_{\text{lin}}(x_0) := \{ d \in \mathbb{R}^{n}:\quad
    159     &\nabla g_i(x_0)^{T}d, \forall i \in \mathcal{A}(x_0)\\
    160     &\nabla h_j(x_0)^{T}d, j=0,\ldots,p\},
    161 \end{align}
    162 where $\mathcal{A}(x_0) = \{i=i,\ldots,m:g_i(x_0) = 0\}$. First we show that
    163 $X_\text{lin}$ is convex then we can use the second part of exercise 2. So
    164 for all $x, y \in X_{\text{lin}}$ we need to show that $z=(1-\lambda)x
    165 \lambda y \in X_{\text{lin}}$, this is done by checking the conditions
    166 \begin{align}
    167     &\nabla g_i(x_0) + \nabla g_i(x_0)^{T} (z-x_0)\\
    168     &=\nabla g_i(x_0) + \nabla g_i(x_0)^{T} ((1-\lambda)x + \lambda y-x_0)\\
    169     &=\nabla g_i(x_0) + \nabla g_i(x_0)^{T} (x-\lambda x + \lambda y-x_0)\\
    170     &=\nabla g_i(x_0) + \nabla g_i(x_0)^{T} (x-x_0)
    171     + \nabla g_i(x_0)^{T}(-\lambda x + \lambda y)\\
    172     &\le \lambda \nabla g_i(x_0)^{T}(-x + y) \qquad (\lambda \ge 0 )\\
    173     &\le \nabla g_i(x_0)^{T}(- x + y + x_0 - x_0) + g_i(x_0)
    174     -g_i(x_0)\\
    175     &=-\nabla g_i(x_0) - \nabla g_i(x_0)^{T} (x-x_0)
    176     + \nabla g_i(x_0) + \nabla g_i(x_0)^{T} (y-x_0)\\
    177     &\le 0
    178 \end{align}
    179 and similarly for $h_j$
    180 \begin{align}
    181     &\nabla h_j(x_0) + \nabla h_j(x_0)^{T} (z-x_0)\\
    182     &=\nabla h_j(x_0) + \nabla h_j(x_0)^{T} ((1-\lambda)x + \lambda y-x_0)\\
    183     &=\nabla h_j(x_0) + \nabla h_j(x_0)^{T} (x-\lambda x + \lambda y-x_0)\\
    184     &=\nabla h_j(x_0) + \nabla h_j(x_0)^{T} (x-x_0)
    185     + \nabla h_j(x_0)^{T}(-\lambda x + \lambda y)\\
    186     &= \lambda \nabla h_j(x_0)^{T}(-x + y)\\
    187     &= \lambda \nabla h_j(x_0)^{T}(- x + y + x_0 - x_0) + \lambda h_j(x_0)
    188     -\lambda h_j(x_0)\\
    189     &=\lambda\left(-\nabla h_j(x_0) - \nabla h_j(x_0)^{T} (x-x_0)
    190     + \nabla h_j(x_0) + \nabla h_j(x_0)^{T} (y-x_0)\right)\\
    191     &= 0
    192 \end{align}
    193 Thereby $X_{lin}$ is convex and
    194 \begin{align}
    195     T_{X_\text{lin}}(x_0) = \bigcup_{\lambda \ge 0 }\lambda \left(
    196     X_\text{lin} - x_0 \right) .
    197 \end{align}
    198 Additionally $T_{lin}(x_0)$ is a polyhedral so per definition
    199 \begin{align}
    200     T_\text{lin}(x_0) = \bigcup_{\lambda \ge 0 }\lambda \left( X_\text{lin} -
    201     x_0 \right)
    202 \end{align}
    203 \subsection{Exercise 5}
    204 Let $g: \mathbb{R}^{2} \to \mathbb{R}^{4}$ with
    205 \begin{align}
    206     g(x, y) =
    207     \begin{pmatrix}
    208         \pi - 2x\\
    209         -y - 1\\
    210         2x - 3\pi\\
    211         y - \sin(x)
    212     \end{pmatrix}.
    213 \end{align}
    214 and
    215 \begin{align}
    216     X = \{(x,y) \in \mathbb{R}^{2}: g(x,y) \leqq 0\}.
    217 \end{align}
    218 The set $X$ has the following constrains on $x, y$
    219 \begin{align}
    220     \pi - 2x \le 0 \; \Rightarrow x \ge \frac{\pi}{2}\\
    221     -y - 1 \le 0 \Rightarrow y \ge -1\\
    222     2x - 3\pi \le \Rightarrow x \le \frac{3\pi}{2}\\
    223     y - \sin\left( x \right) \le 0 \Rightarrow y \le 1,
    224 \end{align}
    225 meaning $x \in [\frac{\pi}{2}, \frac{3\pi}{2}]$ and $y \in [-1, 1]$.
    226 The tangent cones of $X$ w.r.t $x_1 = (\frac{\pi}{2}, 1)^{T},
    227 x_2=(\pi,0)^{T}$ and $x_3 = (\frac{3\pi}{2}, -1)^{T}$ are
    228 \begin{align}
    229     T_X(x_1) = \{(0, -\lambda)^{T}, (\lambda, 1)^{T}: \lambda > 0\} \\
    230     T_X(x_2) = \{\lambda(\cos(x), \sin(x))^{T}, (\lambda, 1)^{T}: \lambda > 0\} \\
    231     T_X(x_3) = \{(-\lambda, 0)^{T}, (0, \lambda)^{T}: \lambda > 0\}
    232 \end{align}
    233 Graphically it represents the following
    234 \begin{figure}[H]
    235     \centering
    236     \includegraphics[width=0.4\textwidth]{./build/pics/sesh1_ex5.png}
    237     \caption{Graphical representation of $X$ and tangent cones $T_X(x_i)$, $i=1, 2,
    238     3$}
    239     \label{fig:}
    240 \end{figure}
    241 Then we look at the linearized tangent cones $T_\text{lin}(x_i) = \{d\in \mathbb{R}^{2}:
    242 \nabla g_i(x_i)^{T} d \le 0 \forall i \in \mathcal{A}(x_i)\}$. First we
    243 calculate the gradients of entries of $g$
    244 \begin{align}
    245     \nabla g_1(x, y) = (-2 ,0)^{T}\qquad \nabla g_2(x,y) = (0, -1)^{T}\\
    246     \nabla g_3(x, y) = (2 ,0)^{T}\qquad \nabla g_4(x,y) = (-\cos(x), 1)^{T}
    247 \end{align}
    248 then for all $j  \in \{ 1, 2, 3, 4\} $ and $i \in \{1, 2, 3\}$ we check if
    249 $g_j(x_i) \le 0$ and construct $\mathcal{A}(x_i)$ and then find $d$ subjected
    250 to the condition.
    251 For $x_1$ we have
    252 \begin{align}
    253     g_1(x_1) = 0,\;\;
    254     g_2(x_1) \neq 0,\;\;
    255     g_3(x_1) \neq 0,\;\;
    256     g_4(x_1) = 0 \;\;\\
    257     \Rightarrow \mathcal{A}(x_1) = \{1, 2\}
    258 \end{align}
    259 Then
    260 \begin{align}
    261     T_{\text{lin}}
    262     &= \{d \in \mathbb{R}^{2} : (0, 1)d \le 0, (-2, 0)d \le 0\}\\
    263     &=\{(\lambda, 0)^{T} , (0, -\lambda)^{T}: \lambda >0 \}
    264 \end{align}
    265 For $x_2$ we have
    266 \begin{align}
    267     g_1(x_1) \neq 0,\;\;
    268     g_2(x_1) \neq 0,\;\;
    269     g_3(x_1) \neq 0,\;\;
    270     g_4(x_1) \neq 0 \;\;\\
    271     \Rightarrow \mathcal{A}(x_1) = \{4\}
    272 \end{align}
    273 Then
    274 \begin{align}
    275     T_{\text{lin}}
    276     &= \{d \in \mathbb{R}^{2} : (1, 1)d \le 0\}\\
    277     &=\{(-\lambda, \lambda)^{T} , (\lambda, -\lambda)^{T}, (-\lambda, 0)^{T}, (0,
    278     -\lambda)^{T}: \lambda >0 \}
    279 \end{align}
    280 For $x_3$ we have
    281 \begin{align}
    282     g_1(x_1) \neq 0,\;\;
    283     g_2(x_1) = 0,\;\;
    284     g_3(x_1) = 0,\;\;
    285     g_4(x_1) = 0 \;\;\\
    286     \Rightarrow \mathcal{A}(x_1) = \{2, 3, 4\}
    287 \end{align}
    288 Then
    289 \begin{align}
    290     T_{\text{lin}}
    291     &= \{d \in \mathbb{R}^{2} : (0, -1)d \le 0, (2, 0)d\le 0, (0, 1)d \le 0\}\\
    292     &=\{(-\lambda, 0)^{T}: \lambda >0 \}
    293 \end{align}
    294 We conclude that $T_{X_\text{lin}}(x_i) = T_{\text{lin}}(x_i)$ only for $i=1$.
    295 \subsection{Exercise 6}
    296 Let $A \in \mathbb{R}^{n \times  m}$ and $b \in \mathbb{R}^{m}$. Prove using
    297 the strong duality theorem of linear optimization that the following
    298 statements are equivalent
    299 \begin{enumerate}
    300     \item The system $Ax = b$ has a solution $x \leqq 0$
    301     \item It holds $b^{T}d \ge 0$ for all $d \in \mathbb{R}^{m}$ with $A^{T}d
    302         \leqq 0$
    303 \end{enumerate}
    304 we show that $1$ is equivalent to $\neg \exists d \in R^{m}: Ad \le 0$
    305 and $b^{T}d > 0$. Consider the following primal dual
    306 \begin{align}
    307     \text{max}\quad & 0^{T}x\\
    308     \text{s.t.:}\quad & Ax=b\nonumber\\
    309                       &x\leqq 0\nonumber\\
    310     \text{and}\nonumber\\
    311     \text{min}\quad & b^{T}d\\
    312     \text{s.t.:}\quad & Ad\le 0\nonumber\\
    313                       &d \in R^{m}\nonumber\\
    314 \end{align}
    315 Consider a solution of $Ax = b$ such that $x \ge 0$. This means that the
    316 primal is true, so there exists an optimal solution since $0^{T}x=0$ then $0$
    317 must be this primal optimal. By the duality $0$ must be the optimal of the
    318 dual problem, which means $b^{T}d =0 $  so there is no $d \in \mathbb{R}^{m}:
    319 b^{T}d >0$.\newline
    320 On the other hand $\neg \exists d \in \mathbb{R}^{m}: Ad \le 0$ and $b^{T}d
    321 >0$ so $\forall d' \in \mathbb{R}^{m}$ we have that $Ad' > 0$ or $b^{T}d \le
    322 0$ such that $b^{T}d \leq 0 $ for all $d \in \mathbb{R}^{m}$. So we can
    323 conclude that there exists a solution because the primal has at least one
    324 feasible $x \in \mathbb{R}^{m}: Ax = 0, x\geqq 0$. Thereby the two statements
    325 above are equivalent.
    326 
    327 \end{document}
    328 
    329