sesh1.tex (13243B)
1 \include{./preamble.tex} 2 3 \begin{document} 4 \maketitle 5 \tableofcontents 6 \section{Sheet 1} 7 \subsection{Exercise 1} 8 Let $X \subseteq \mathbb{R}^{n}$ be an nonempty convex set and 9 $f:\mathbb{R}^{n} \to \mathbb{R}$ a convex function. Show that every local 10 minimum of $f$ w.r.t. $X$ is a global minimum of $f$ w.r.t. $X$. 11 \newline 12 13 Let $x^{*} \in X$ be the local minimum of $f$ w.r.t. $X$. Then there is an 14 $\varepsilon$-ball $B_{\varepsilon}(x^{*}) = \{x \in X:\ \|x^{*}- x\|\le 15 \varepsilon\}$ such that $f(x^{*}) \le f(x)$ for all $x \in 16 B_\varepsilon(x^{*}) \cap X$. Now suppose $x^{*}$ is not a global minimum 17 then there is a $x_{0} \in X$ such that $f(x_{0}) < f(x^{*})$. Since $X$ is 18 convex there is a line connecting $x_{0}$ and $x^{*}$ in $X$. This line has 19 elements of the form 20 \begin{align} 21 L = \left\{\left( 1-\lambda \right)x_0 - \lambda x^{*}:\ \lambda \in[0,1] 22 \right\} 23 \subseteq X 24 \end{align} 25 Now for all $z \in L$ with $z = (1-\lambda)x_0 - \lambda x^{*}$ ($\forall 26 \lambda \in [0, 1]$) it holds that 27 \begin{align} 28 &\|x^{*}- (1-\lambda)x_0 - \lambda x^{*}\| \le \varepsilon \\ 29 &(1-\lambda) \|x^{*}- x_{0}\| \le \varepsilon \; \Rightarrow \; \lambda 30 \simeq 1 31 \end{align} 32 and 33 \begin{align} 34 f\left( \left( 1-\lambda \right)x_{0} +\lambda x^{} \right) 35 &\le \lambda f(x^{*}) + (1-\lambda) f(x_0)\\ 36 &< \lambda f(x_0) + (1-\lambda)f(x_0)\\ 37 &= f(x_0) 38 \end{align} 39 This means that we found a $\tilde{x} = \lambda x^{*} - (1-\lambda)x_0 \in 40 B_\varepsilon(x^{*})$ such that $f(\tilde{x}) < f(x^{*})$ which is a 41 contradiction since $x^{*}$ is a local minimum in $B_\varepsilon(x^{*})$. 42 This means that $x^{*}$ is a global minimum of $f$ w.r.t $X$. 43 \subsection{Exercise 2} 44 Let $X \subseteq \mathbb{R}^{n}$ be a nonempty, $x_0 \in X$. Show that 45 \begin{enumerate} 46 \item $T_X(x_0)$ is a nonempty closed cone 47 \item If $X$ is convex, then $T_X(x_0) = \text{cl}\left( \bigcup_{\lambda 48 \ge 0 } \lambda \left( X - x_0 \right) \right)$ 49 \item If $X$ is convex then $\left( T_X(x_0))^{*} = - N_X(x_0) \right) $ 50 \end{enumerate} 51 The statements will be proven in chronological order. Starting with $1.$ the 52 Bouligand tangent cone to $X$ is defined as 53 \begin{align} 54 T_X(x_0) = \{d \in \mathbb{R}^{n}: \exists (x^{k})_{k\ge 0}\subset X, \exists 55 (t_k)_{k\ge 0}\searrow 0 :\ \frac{x^{k} -x_0}{t_k}\to d\} 56 \end{align} 57 Now $T_X(x_0)$ is nonempty since for $x^{k} = x_0$ for all $k$ and a sequence $t_k = 58 \frac{1}{k}$ we have 59 \begin{align} 60 \frac{x^{k}-x_0}{t_k} \to 0 \in T_X(x_0) 61 \end{align} 62 To show that $T_X(x_0)$ is closed, consider a sequence $(d_k)_{k\ge 0}\subset 63 T_X(x_0)$ with convergence point $d \in \mathbb{R}^{n}$. To show that it is closed we 64 need to show that $d \in T_X(x_0)$. So for all $d_n \in (d_k)_{k\ge 0} 65 \subset T_X(x_0)$ there exists a sequence $(x^{n,k})_{k\ge 0} \subset X$ and 66 a sequence $(t_{n, k})_{k\ge 0}$ with $t_{n, k} \searrow 0$ as $k \to 67 \infty$ such that 68 \begin{align} 69 &\frac{x^{n,k}-x_0}{t_{n,k}} \to d_n \;\;\; \forall n\ge 0,\\ 70 &\text{and}\nonumber\\ 71 &\lim_{n\to \infty}\lim_{k \to \infty}\frac{x^{n,k}-x_0}{t_{n,k}} = d. 72 \end{align} 73 Then there exist $K_\varepsilon$ and $N_\delta$ for $\varepsilon,\delta >0$ 74 such that 75 \begin{align} 76 &\|d_k - d\|\le \varepsilon \;\; \forall k\ge K_\varepsilon,\\ 77 &\|\frac{x^{n,K_\varepsilon}-x_0}{t_{n,K_\varepsilon}} - 78 d_{K_\varepsilon}\| \le \delta \;\; \forall n \ge N_\delta. 79 \end{align} 80 To conclude the proof consider 81 \begin{align} 82 &\|\frac{x^{N_\delta,K_\varepsilon}-x_0}{t_{N_\delta,K_\varepsilon}} - 83 d\|\\ 84 &= \|\left(\frac{x^{N_\delta,K_\varepsilon}-x_0}{t_{N_\delta,K_\varepsilon}} - 85 d_{K_\varepsilon}\right) + \left( d_{K_\varepsilon} - d\right) \|\\ 86 &\le \varepsilon + \delta 87 \end{align} 88 Meaning that $d \in T_X(x_0)$. 89 Then 90 $T_X(x_0)$ is really a cone because $\forall d \in T_X(x_0)$ and $\lambda 91 >0$ we have that $\lambda d \in T_X(x_0)$ by the choice $t_{k,\lambda}= 92 \frac{1}{\lambda}t_k$ 93 \begin{align} 94 \frac{x^{k}- x}{t_{k,\lambda}} = \lambda \frac{x^{k}-x_0}{t_k}\to 95 \lambda d \in T_X(x_0) 96 \end{align} 97 For number 2 additionally $X$ is a convex set. And by 98 definition of a cone $\bigcup_{\lambda \ge 0}(X-x_0)$ is a cone. And the 99 union of convex sets is also convex the set 100 $\text{cl}\left(\bigcup_{\lambda\ge_0} \lambda(X-x_0 ) \right)$ is also. 101 convex. 102 For number 3 we simply calculate 103 \begin{align} 104 -\left( T_X(x_0) \right)^{*} 105 &= -\{s \in R^{n}: s^{T}d \ge 0 \forall d \in T_X(x_0)\}\\ 106 &= \{s \in R^{n}: s^{T}d \le 0 \forall d \in T_X(x_0)\}\\ 107 &= \{s \in R^{n}: s^{T}(x-x_0) \le 0 \forall x \in X\} 108 \end{align} 109 Since $forall x \in X$ there exists an appropriate sequence converging to $x$ 110 subjected to the tangent cone elements. 111 \subsection{Exercise 3} 112 Let $X \subseteq \mathbb{R}^{n}$ be nonempty and $\text{dist}_X: \mathbb{R}^{n} 113 \to \mathbb{R}$, with $\text{dist}_X(y) = \inf \{\|y-x\|: x\in X\}$. Then consider the 114 directional derivative of a function $f: \mathbb{R}^{n}\to \mathbb{R}$ at 115 direction $d$ at $x_0 \in \mathbb{R}^{n}$ 116 \begin{align} 117 f'(x_0; d) = \lim_{t \downarrow 0} \frac{f(x_0 + t d) - f(x_0)}{t} 118 \end{align} 119 Show that if $X \subseteq \mathbb{R}^{n}$ is nonempty and convex then the 120 tangent cone can be written as 121 \begin{align} 122 T_X(x_0) = \{d \in \mathbb{R}^{n}:\; \left( \ \text{dis}t_X 123 \right)'(x_0;d) = 0\}. 124 \end{align} 125 First we note 126 \begin{align} 127 (\text{dist}_X)'(x_0;d) = \lim_{t \downarrow 0} \frac{\text{dist}_X(x_0+t 128 d)}{t} = 0, 129 \end{align} 130 is true for all $x_0 + t d \in X$. Since $X$ is convex we have that 131 \begin{align} 132 T_X(x_0) &= \text{cl}\left( \bigcup_{\lambda}\left( X-x_0 \right) \right) 133 \\ 134 &= \text{cl}\left( \{\lambda (x - x_0): x\in X, \lambda \ge 0\} \right) 135 \end{align} 136 Then $(\text{dist}_X)'(x_0;d) = 0$ holds only for vectors of the form 137 $d = \lambda (x - x_0)$, $\lambda \ge 0$ and $x \in X$. 138 \subsection{Exercise 4} 139 We consider the general constrained optimization problem 140 \begin{align} 141 \text{min}\quad & f(x),\\ 142 \text{s.t.}\quad & g_i(x) \le 0, i=0,\ldots,m\nonumber\\ 143 &h_j(x) = 0, j=0,\ldots,p\nonumber\\ 144 & x \in \mathbb{R}^{n}\nonumber 145 \end{align} 146 for $f, g_i, h_j: \mathbb{R}^{n}\to \mathbb{R}$, $i=0,\ldots,m$ and 147 $j=0,\ldots,p$ continuously differentiable. Let $x_0$ be a feasible element 148 of the problem and 149 \begin{align} 150 X_{\text{lin}} := \{ x \in \mathbb{R}^{n}:\quad 151 &g_i(x_0) + \nabla g_i(x_0)^{T}(x-x_0), i=0,\ldots,m\\ 152 &h_j(x_0) + \nabla h_j(x_0)^{T}(x-x_0), j=0,\ldots,p\}. 153 \end{align} 154 Show that $T_\text{lin}(x_0) = T_{X_\text{lin}}(x_0)$. 155 \newline 156 For the reminder 157 \begin{align} 158 T_{\text{lin}}(x_0) := \{ d \in \mathbb{R}^{n}:\quad 159 &\nabla g_i(x_0)^{T}d, \forall i \in \mathcal{A}(x_0)\\ 160 &\nabla h_j(x_0)^{T}d, j=0,\ldots,p\}, 161 \end{align} 162 where $\mathcal{A}(x_0) = \{i=i,\ldots,m:g_i(x_0) = 0\}$. First we show that 163 $X_\text{lin}$ is convex then we can use the second part of exercise 2. So 164 for all $x, y \in X_{\text{lin}}$ we need to show that $z=(1-\lambda)x 165 \lambda y \in X_{\text{lin}}$, this is done by checking the conditions 166 \begin{align} 167 &\nabla g_i(x_0) + \nabla g_i(x_0)^{T} (z-x_0)\\ 168 &=\nabla g_i(x_0) + \nabla g_i(x_0)^{T} ((1-\lambda)x + \lambda y-x_0)\\ 169 &=\nabla g_i(x_0) + \nabla g_i(x_0)^{T} (x-\lambda x + \lambda y-x_0)\\ 170 &=\nabla g_i(x_0) + \nabla g_i(x_0)^{T} (x-x_0) 171 + \nabla g_i(x_0)^{T}(-\lambda x + \lambda y)\\ 172 &\le \lambda \nabla g_i(x_0)^{T}(-x + y) \qquad (\lambda \ge 0 )\\ 173 &\le \nabla g_i(x_0)^{T}(- x + y + x_0 - x_0) + g_i(x_0) 174 -g_i(x_0)\\ 175 &=-\nabla g_i(x_0) - \nabla g_i(x_0)^{T} (x-x_0) 176 + \nabla g_i(x_0) + \nabla g_i(x_0)^{T} (y-x_0)\\ 177 &\le 0 178 \end{align} 179 and similarly for $h_j$ 180 \begin{align} 181 &\nabla h_j(x_0) + \nabla h_j(x_0)^{T} (z-x_0)\\ 182 &=\nabla h_j(x_0) + \nabla h_j(x_0)^{T} ((1-\lambda)x + \lambda y-x_0)\\ 183 &=\nabla h_j(x_0) + \nabla h_j(x_0)^{T} (x-\lambda x + \lambda y-x_0)\\ 184 &=\nabla h_j(x_0) + \nabla h_j(x_0)^{T} (x-x_0) 185 + \nabla h_j(x_0)^{T}(-\lambda x + \lambda y)\\ 186 &= \lambda \nabla h_j(x_0)^{T}(-x + y)\\ 187 &= \lambda \nabla h_j(x_0)^{T}(- x + y + x_0 - x_0) + \lambda h_j(x_0) 188 -\lambda h_j(x_0)\\ 189 &=\lambda\left(-\nabla h_j(x_0) - \nabla h_j(x_0)^{T} (x-x_0) 190 + \nabla h_j(x_0) + \nabla h_j(x_0)^{T} (y-x_0)\right)\\ 191 &= 0 192 \end{align} 193 Thereby $X_{lin}$ is convex and 194 \begin{align} 195 T_{X_\text{lin}}(x_0) = \bigcup_{\lambda \ge 0 }\lambda \left( 196 X_\text{lin} - x_0 \right) . 197 \end{align} 198 Additionally $T_{lin}(x_0)$ is a polyhedral so per definition 199 \begin{align} 200 T_\text{lin}(x_0) = \bigcup_{\lambda \ge 0 }\lambda \left( X_\text{lin} - 201 x_0 \right) 202 \end{align} 203 \subsection{Exercise 5} 204 Let $g: \mathbb{R}^{2} \to \mathbb{R}^{4}$ with 205 \begin{align} 206 g(x, y) = 207 \begin{pmatrix} 208 \pi - 2x\\ 209 -y - 1\\ 210 2x - 3\pi\\ 211 y - \sin(x) 212 \end{pmatrix}. 213 \end{align} 214 and 215 \begin{align} 216 X = \{(x,y) \in \mathbb{R}^{2}: g(x,y) \leqq 0\}. 217 \end{align} 218 The set $X$ has the following constrains on $x, y$ 219 \begin{align} 220 \pi - 2x \le 0 \; \Rightarrow x \ge \frac{\pi}{2}\\ 221 -y - 1 \le 0 \Rightarrow y \ge -1\\ 222 2x - 3\pi \le \Rightarrow x \le \frac{3\pi}{2}\\ 223 y - \sin\left( x \right) \le 0 \Rightarrow y \le 1, 224 \end{align} 225 meaning $x \in [\frac{\pi}{2}, \frac{3\pi}{2}]$ and $y \in [-1, 1]$. 226 The tangent cones of $X$ w.r.t $x_1 = (\frac{\pi}{2}, 1)^{T}, 227 x_2=(\pi,0)^{T}$ and $x_3 = (\frac{3\pi}{2}, -1)^{T}$ are 228 \begin{align} 229 T_X(x_1) = \{(0, -\lambda)^{T}, (\lambda, 1)^{T}: \lambda > 0\} \\ 230 T_X(x_2) = \{\lambda(\cos(x), \sin(x))^{T}, (\lambda, 1)^{T}: \lambda > 0\} \\ 231 T_X(x_3) = \{(-\lambda, 0)^{T}, (0, \lambda)^{T}: \lambda > 0\} 232 \end{align} 233 Graphically it represents the following 234 \begin{figure}[H] 235 \centering 236 \includegraphics[width=0.4\textwidth]{./build/pics/sesh1_ex5.png} 237 \caption{Graphical representation of $X$ and tangent cones $T_X(x_i)$, $i=1, 2, 238 3$} 239 \label{fig:} 240 \end{figure} 241 Then we look at the linearized tangent cones $T_\text{lin}(x_i) = \{d\in \mathbb{R}^{2}: 242 \nabla g_i(x_i)^{T} d \le 0 \forall i \in \mathcal{A}(x_i)\}$. First we 243 calculate the gradients of entries of $g$ 244 \begin{align} 245 \nabla g_1(x, y) = (-2 ,0)^{T}\qquad \nabla g_2(x,y) = (0, -1)^{T}\\ 246 \nabla g_3(x, y) = (2 ,0)^{T}\qquad \nabla g_4(x,y) = (-\cos(x), 1)^{T} 247 \end{align} 248 then for all $j \in \{ 1, 2, 3, 4\} $ and $i \in \{1, 2, 3\}$ we check if 249 $g_j(x_i) \le 0$ and construct $\mathcal{A}(x_i)$ and then find $d$ subjected 250 to the condition. 251 For $x_1$ we have 252 \begin{align} 253 g_1(x_1) = 0,\;\; 254 g_2(x_1) \neq 0,\;\; 255 g_3(x_1) \neq 0,\;\; 256 g_4(x_1) = 0 \;\;\\ 257 \Rightarrow \mathcal{A}(x_1) = \{1, 2\} 258 \end{align} 259 Then 260 \begin{align} 261 T_{\text{lin}} 262 &= \{d \in \mathbb{R}^{2} : (0, 1)d \le 0, (-2, 0)d \le 0\}\\ 263 &=\{(\lambda, 0)^{T} , (0, -\lambda)^{T}: \lambda >0 \} 264 \end{align} 265 For $x_2$ we have 266 \begin{align} 267 g_1(x_1) \neq 0,\;\; 268 g_2(x_1) \neq 0,\;\; 269 g_3(x_1) \neq 0,\;\; 270 g_4(x_1) \neq 0 \;\;\\ 271 \Rightarrow \mathcal{A}(x_1) = \{4\} 272 \end{align} 273 Then 274 \begin{align} 275 T_{\text{lin}} 276 &= \{d \in \mathbb{R}^{2} : (1, 1)d \le 0\}\\ 277 &=\{(-\lambda, \lambda)^{T} , (\lambda, -\lambda)^{T}, (-\lambda, 0)^{T}, (0, 278 -\lambda)^{T}: \lambda >0 \} 279 \end{align} 280 For $x_3$ we have 281 \begin{align} 282 g_1(x_1) \neq 0,\;\; 283 g_2(x_1) = 0,\;\; 284 g_3(x_1) = 0,\;\; 285 g_4(x_1) = 0 \;\;\\ 286 \Rightarrow \mathcal{A}(x_1) = \{2, 3, 4\} 287 \end{align} 288 Then 289 \begin{align} 290 T_{\text{lin}} 291 &= \{d \in \mathbb{R}^{2} : (0, -1)d \le 0, (2, 0)d\le 0, (0, 1)d \le 0\}\\ 292 &=\{(-\lambda, 0)^{T}: \lambda >0 \} 293 \end{align} 294 We conclude that $T_{X_\text{lin}}(x_i) = T_{\text{lin}}(x_i)$ only for $i=1$. 295 \subsection{Exercise 6} 296 Let $A \in \mathbb{R}^{n \times m}$ and $b \in \mathbb{R}^{m}$. Prove using 297 the strong duality theorem of linear optimization that the following 298 statements are equivalent 299 \begin{enumerate} 300 \item The system $Ax = b$ has a solution $x \leqq 0$ 301 \item It holds $b^{T}d \ge 0$ for all $d \in \mathbb{R}^{m}$ with $A^{T}d 302 \leqq 0$ 303 \end{enumerate} 304 we show that $1$ is equivalent to $\neg \exists d \in R^{m}: Ad \le 0$ 305 and $b^{T}d > 0$. Consider the following primal dual 306 \begin{align} 307 \text{max}\quad & 0^{T}x\\ 308 \text{s.t.:}\quad & Ax=b\nonumber\\ 309 &x\leqq 0\nonumber\\ 310 \text{and}\nonumber\\ 311 \text{min}\quad & b^{T}d\\ 312 \text{s.t.:}\quad & Ad\le 0\nonumber\\ 313 &d \in R^{m}\nonumber\\ 314 \end{align} 315 Consider a solution of $Ax = b$ such that $x \ge 0$. This means that the 316 primal is true, so there exists an optimal solution since $0^{T}x=0$ then $0$ 317 must be this primal optimal. By the duality $0$ must be the optimal of the 318 dual problem, which means $b^{T}d =0 $ so there is no $d \in \mathbb{R}^{m}: 319 b^{T}d >0$.\newline 320 On the other hand $\neg \exists d \in \mathbb{R}^{m}: Ad \le 0$ and $b^{T}d 321 >0$ so $\forall d' \in \mathbb{R}^{m}$ we have that $Ad' > 0$ or $b^{T}d \le 322 0$ such that $b^{T}d \leq 0 $ for all $d \in \mathbb{R}^{m}$. So we can 323 conclude that there exists a solution because the primal has at least one 324 feasible $x \in \mathbb{R}^{m}: Ax = 0, x\geqq 0$. Thereby the two statements 325 above are equivalent. 326 327 \end{document} 328 329