sesh3.tex (16509B)
1 \include{./preamble.tex} 2 3 \begin{document} 4 \maketitle 5 \tableofcontents 6 7 \section{Sheet 3} 8 \subsection{Exercise 13} 9 \subsubsection{Part a} 10 Solve 11 \begin{align} 12 \text{min}\quad & -x_1 - 2x_2,\\ 13 \text{s.t.}\quad & x_1^{2} + x_2^{2} \le 4 \nonumber\\ 14 &x_1\ge 0, x_2 \ge 0 \nonumber 15 \end{align} 16 rewriting it in to reduced notation 17 \begin{align} 18 \text{min}\quad & -x_1 - 2x_2,\\ 19 \text{s.t.}\quad & g_1(x_1, x_2) = x_1^{2} + x_2^{2} -4 \le 0 \nonumber\\ 20 &g_2(x_1,x_2) = - x_1 \le 0\nonumber\\ 21 &g_3(x_1,x_2) = - x_2 \le 0\nonumber 22 \end{align} 23 We know that for a KKT point $(x, \lambda)$ we have that the 24 Lagrangian of the problem satisfies 25 \begin{align} 26 \nabla_x L(x, \lambda) = 0\\ 27 \lambda\geqq 0,\quad \lambda^Tg(x) = 0. 28 \end{align} 29 Then for $-\lambda_2 x_1 =0$ and $-\lambda_3 x_2 =0$ the only solution is for 30 $\lambda_2, \lambda_2 = 0$. Now we have a system of three equations with 31 three unknowns $x_1, x_2, \lambda_1$ that we can solve 32 \begin{align} 33 &\nabla f(x) + \nabla(\lambda^{T}g(x)) =0\\ 34 &\begin{pmatrix} 35 -1 + 2x_1\lambda_1\\ 36 -2 + 2x_2\lambda_1\\ 37 -\lambda_1(x_1^{2}+x_2^{2}-4) 38 \end{pmatrix} 39 =\begin{pmatrix} 0\\0\\0 \end{pmatrix}. 40 \end{align} 41 Solving the first and second equation we get 42 \begin{align} 43 x_1 = \frac{1}{2\lambda_1},\quad x_2 = \frac{1}{\lambda_1}\\ 44 x_2 = \frac{1}{2}x_1. 45 \end{align} 46 Plugging this into equation 3 and considering $x_1 \ge 0$, $x_2 \ge 0$ 47 which tells us what root to take we get 48 \begin{align} 49 x_1^{2}+\frac{1}{4}x_1^{2} -4 = 0\\ 50 \Rightarrow x_1 = \frac{4}{\sqrt{5}}, \quad x_2 = \frac{2}{\sqrt{5} } 51 \end{align} 52 The solution the optimization problem is $x^{*}=(\frac{4}{\sqrt{5}}, 53 \frac{2}{\sqrt{5}})^{T}$. 54 \subsubsection{Part b} 55 Verify if $x=(2,4)^{T}$ is an optimal solution of the optimization problem 56 and determine a KKT point. 57 \begin{align} 58 \text{min}\quad & (x_1-4)^{2} + (x_2-3)^{2},\\ 59 \text{s.t.}\quad & x_1^{2}\le x_2 \nonumber\\ 60 &x_2 \le 4 \nonumber 61 \end{align} 62 i.e. 63 \begin{align} 64 \text{min}\quad & (x_1-4)^{2} + (x_2-3)^{2},\\ 65 \text{s.t.}\quad &g_1(x) = x_1^{2} - x_2\le 0 \nonumber\\ 66 &g_2(x) = x_2 -4 \le 0 \nonumber 67 \end{align} 68 We again use the KKT optimality conditions 69 \begin{align} 70 \nabla f(x^{*}) + \nabla (\lambda^{T}g(x)) = 0\\ 71 \begin{pmatrix} 72 2(x_1-4) + 2\lambda_1x_1\\ 73 2(x_2-3) - \lambda_1 + \lambda_2 74 \end{pmatrix} = 75 \begin{pmatrix} 0\\0 \end{pmatrix} 76 \end{align} 77 substituting for $x=(2,4)^{T}$ gives \begin{align} \begin{pmatrix} 78 -4 + 4\lambda_1\\ 79 2 -\lambda_1 + \lambda_{2} 80 \end{pmatrix} 81 = 82 \begin{pmatrix} 83 0\\0 84 \end{pmatrix}. 85 \end{align} 86 Which gives $\lambda_1 = 1, \lambda_2 = -1$ and tells us that $x=(2,4)^{T}$ 87 is an optimal solution, and $\left( x^{*}=(2,4)^{T}, \lambda^{*}= (1, 88 -1)^{T}\right)$ is a KKT point. 89 \subsection{Exercise 14} 90 Solve the following optimization problem 91 \begin{align} 92 \text{min}\quad & \Sigma_{i=1}^{n} \left( x_i - a_i \right)^{2},\\ 93 \text{s.t.}\quad & \Sigma_{i=1}^{n} x_i^{2} \le 1 \nonumber\\ 94 & \Sigma_{i=1}^{n} x_i = 0 \nonumber 95 \end{align} 96 To solve this we use the KKT optimality conditions for $g(x) = 97 \Sigma_{i=1}^{n}x_i^{2}$ and $h(x) = \Sigma_{i=1}^{n}x_i=0$. 98 \begin{align} 99 &\nabla f(x) + \lambda \nabla g(x) 100 +\mu \nabla h(x)= 0\\ 101 &\lambda \ge 0,\; g(x) \le 0,\; \lambda g(x) = 0, \;h(x) = 0. 102 \end{align} 103 From the first equation we get 104 \begin{align} 105 &2x_i - 2a_i + 2 \lambda x_i + \mu = 0 \\ 106 &2\left( 1+\lambda \right) x_i +\mu -2a_i = 0\\ 107 &x_i= \frac{2a_i - \mu}{2(1+\lambda)} 108 \qquad \forall i \in 109 \{1,\ldots,n\}. 110 \end{align} 111 By substituting the derived expression for $x_i$ into $h(x) =0$ we get 112 \begin{align} 113 &\sum\frac{2a_i - \mu}{2(1+\lambda)} = 0\\ 114 \Rightarrow & \mu = \frac{2}{n}\sum a_i 115 \end{align} 116 plugging this into $\lambda g(x) = 0$ we get 117 \begin{align} 118 &\sum \left( \frac{2a_i-\mu}{2\left( 1+\lambda \right)} \right) 119 ^{2}-1=0\\ 120 &\sum \left( 2a_i -\mu \right)^{2} = 4(1+\lambda)^{2}\\ 121 &=4 \sum a_i^{2}-2\mu \sum a_i - n \mu^{2}= 4\left( 1+\lambda 122 \right)^{2}\\ 123 &\sum a_i^{2} = (1+\lambda)^{2}. 124 \end{align} 125 Since $\lambda \ge 0 $ then $(1+\lambda) \ge 1$ and the root is positive 126 \begin{align} 127 \lambda = 1 - \sqrt{\sum a_i} 128 \end{align} 129 Then $x_i$ becomes 130 \begin{align} 131 x_i &= \frac{2a_i - \mu}{2(1-\lambda)} \\ 132 &= \frac{2a_i - \frac{2}{n}\sum_j a_j}{2\sum_j a_j}\\ 133 &= \frac{a_i}{\sum_j a_j} - \frac{1}{n}\\ 134 &= \frac{1}{n}\left( \frac{a_i}{\left\langle a \right\rangle } -1 \right) 135 \end{align} 136 where $\left\langle a \right\rangle$ denotes the standard mean of 137 $a=(a_1,\ldots,a_n)^{T}$. 138 \subsection{Exercise 15} 139 Consider the function 140 \begin{align} 141 f : \;\;\mathbb{R}^{2}&\to \mathbb{R}\\ 142 (x_1, x_2) &\mapsto 3x_1^{4}-4x_1^{2}x_2 + x_2^{2} 143 \end{align} 144 Prove that the following statements for $x^{*}=(0,0)^{T}$ are true 145 \begin{enumerate} 146 \item $x^{*}$ is a critical point of f 147 \item $x^{*}$ is a strict local minimum of f along any line going through 148 the origin 149 \item $x^{*}$ is not a local minimum of $f$ 150 \end{enumerate} 151 For 1. we check if $\nabla f(x^{*}) = 0$ then $x^{*}$ is a critical point 152 \begin{align} 153 & \nabla f(x) = 154 \begin{pmatrix} 155 12x_1 + 8 x_1 x_2\\ 156 4x_1^{2} + 2x_2 157 \end{pmatrix} \\ 158 & \nabla f(x^{*}) = 159 \begin{pmatrix} 160 0\\ 161 0 162 \end{pmatrix}. 163 \end{align} 164 For 2. we need minimize $f(x)$ subjected to all lines through the origin. We 165 start with lines $x_2 = m x_1$ for $m \neq 0$. 166 \begin{align} 167 f(x_1, x_2=mx_1) = 3x_1^{4} - 4mx_1^{3} + m^{2}x_1^{2}, 168 \end{align} 169 set $g(x) = f(x, mx)$. We need to check that $x=0$ is local minimum of $g(x)$ 170 \begin{align} 171 &g'(x) = 12x^{3}-12mx^{2} + 2m^{2}x\\ 172 &g''(x) = 36x^{2} - 24m x + 2m^{2} .\\ 173 &g'(0) = 0 \qquad g''(0) = 2m^{2}>0. 174 \end{align} 175 So $f$ is a strict local min along lines $x_2 = mx_1$. Next we check along 176 $x_1 = mx_2$ 177 \begin{align} 178 f(x_1=mx_2, x_{2}) = 3m^{4}x_2^{4}-tm^{2}x_2^{3} + x_2^{2} 179 \end{align} 180 set $g(x) = f(x_1=mx_2, x_2)$ 181 \begin{align} 182 & g'(x) = 12m^{4}x^{3}-12m^{2}x^{2}+2x\\ 183 & g''(x) = 36m^{4}x^{2} - 24m^{2}x + 2\\ 184 & g'(0) =0 \qquad g''(0) = 2 > 0. 185 \end{align} 186 For 3. we need to show that $x^{*}=(0,0)^{T}$ is not a local minimum of $f$. 187 Consider $x_2 = 2x_1^{2}$ 188 \begin{align} 189 f(x_1, 2x_1^{2}) &= 3x_1^{4} - 8x_1^{4} + 4x_1^{4}\\ 190 &= -x_1^{4} < f(x^{*}) = 0\qquad \forall x_1 \in \mathbb{R}\setminus \{0\} 191 \end{align} 192 We have found function values smaller than that of $f(x^{*}) = 0$. 193 \subsection{Exercise 16} 194 \subsubsection{Part a} 195 Formulate a statement concerning the solutions of the optimization problem 196 \begin{align} 197 \text{max}\quad & x_1,\\ 198 \text{s.t.}\quad & x_1^{2} + x_2^{2} \le 1 \nonumber\\ 199 &(x_1 - 1)^{2} + x_2^{2} \ge 1 \nonumber\\ 200 &x_1 + x_2 \ge 1\nonumber. 201 \end{align} 202 using geometric arguments and verify this statement by means of arguments. 203 \newline 204 Let $X$ be the optimization domain 205 \begin{align} 206 X = \left\{ \left( x_1, x_2 \right) \in \mathbb{R}^{2}: x_1^{2}+x_2^{2} 207 \le 1, (x_1 -1)^{2} + x_2^{2} \ge 0 , x_1 +x_2 \ge 0 \right\} 208 \end{align} 209 Then $X$ has the following graphical representation in the red domain of the 210 plot below. 211 \begin{figure}[H] 212 \centering 213 \begin{tikzpicture}[yscale=1, xscale=1] 214 \begin{axis}[ 215 xmin=-2.3, xmax=2.3, 216 ymin=-2.3, ymax=2.3, 217 xlabel=$x_1$, ylabel=$x_2$, 218 axis lines = middle, 219 ] 220 \addplot[domain=-1:2, samples=100, color=black, name path=A]{-x+1}; 221 \addplot[domain=-1:1, samples=100, color=black, name 222 path=B1]{sqrt(1-x^2)}; 223 \addplot[domain=-1:1, samples=100, color=black, name 224 path=B2]{-sqrt(1-x^2)}; 225 \addplot[domain=0:2, samples=100, color=black, name 226 path=C1]{sqrt(1-(x-1)^2)}; 227 \addplot[domain=0:2, samples=100, color=black, name 228 path=C2]{-sqrt(1-(x-1)^2)}; 229 230 \addplot[domain=-2:2, samples=100, color=orange, name 231 path=D, opacity=0]{2}; 232 233 \addplot[domain=-2:2, samples=100, color=orange, name 234 path=E, opacity=0]{-2}; 235 236 \addplot[domain=-2:2, samples=100, color=orange, name 237 path=F, opacity=0]{0}; 238 239 \addplot[domain=-2:2, samples=100, color=black, name 240 path=G, opacity=1]{x}; 241 242 \addplot[domain=-2:2, samples=100, color=black, name 243 path=G, opacity=1]{x}; 244 \draw[red] (axis cs:0.5, 2) -- (axis cs:0.5, -2); 245 246 \addplot[fill=orange,fill opacity=0.3] fill between[of=A and D,soft 247 clip={domain=-1:2},]; 248 \addplot[fill=blue,fill opacity=0.3] fill between[of=B1 and F,soft 249 clip={domain=-1:1},]; 250 251 \addplot[fill=blue,fill opacity=0.3] fill between[of=B2 and F,soft 252 clip={domain=-1:1},]; 253 254 \addplot[fill=green,fill opacity=0.1] fill between[of=D and E,soft 255 clip={domain=-2:0},]; 256 257 \addplot[fill=green,fill opacity=0.1] fill between[of=C2 and E,soft 258 clip={domain=0:2},]; 259 260 \addplot[fill=green,fill opacity=0.1] fill between[of=C1 and D,soft 261 clip={domain=0:2},]; 262 263 \addplot[fill=red,fill opacity=1] fill between[of=A and B1,soft 264 clip={domain=0:0.2928},]; 265 266 \addplot[fill=red,fill opacity=1] fill between[of=C1 and B1,soft 267 clip={domain=0.2928:1/2},]; 268 269 \end{axis} 270 \end{tikzpicture} 271 \caption{Area: Red: $X$, Blue: $x_1^{2}+ x_2^{2} \le 1$, Green: 272 $(x_1-1)^{2} +x_2^{2} \ge 1$ and Orange: $x_1 + x_2 \ge 1$} 273 \label{fig: ex16a} 274 \end{figure} 275 Solutions are in the red area. But since $f(x_1, x_2) = x_1$ actually only depends 276 on $x_1$ we can choose any $x_2$ then the maximum in the area is at $x_1= 277 \frac{1}{2}$. The analytical argumentation on the other hand follows KKT 278 optimality condition, for this we transform the maximization problem into a 279 minimization problem by multiplying the objective function $f$ by $-1$. 280 \begin{align} 281 \text{min}\quad & -x_1,\\ 282 \text{s.t.}\quad & g_1\left(x \right) = x_1^{2} + x_2^{2} -1 \le 0 \nonumber\\ 283 &g_2(x) = 1- (x_1 - 1)^{2} - x_2^{2} \le 0 \nonumber\\ 284 &g_3(x) = 1- x_1 - x_2 \le 0\nonumber. 285 \end{align} 286 then $\nabla L(x, \lambda) =0$, $\lambda^{T}g(x) = 0$ and $\lambda^{T} \geqq 287 0 $ will give us the 288 optimal solution for the optimization problem 289 \begin{align} 290 \begin{pmatrix} 291 -1 + 2\lambda_1x_1 - 2\lambda_2(x_1 -1) - \lambda_3\\ 292 \lambda_1 x_2 - 2\lambda_2 x_2 - \lambda_3 293 \end{pmatrix} 294 = 295 \begin{pmatrix} 0\\0 \end{pmatrix} 296 \end{align} 297 we set $x_2 = 0$ since $x_2$ is not dependent on objective then we get that 298 $\lambda_3 = 0$ and 299 \begin{align} 300 \lambda_1 = -\lambda_2 \frac{1-(x_1 - 1)^{2}}{x_2^{2}-1}. 301 \end{align} 302 we are left with 303 \begin{align} 304 -1 + 2\lambda_1x_1 - 2\lambda_2(x_1 -1) - \lambda_3.\label{eq: 16a lag} 305 \end{align} 306 Then $\lambda^{T}g(x)$ gives us 307 \begin{align} 308 \lambda_1 = -\lambda_2 \frac{1-(x_1-1)^2}{x_1^{2} - 1} 309 \end{align} 310 substituting into \ref{eq: 16a lag} back and calculating we arrive at the equation 311 \begin{align} 312 x_1^{2} - x_1 +1 = 0 313 \end{align} 314 which gives $x_1=\frac{1}{2}$. 315 \subsubsection{Part b} 316 Verify if $x^{*} = (1, 1)^{T}$ fulfills the constraint qualifications of 317 LICQ, MFCQ, ABADIE-CQ. 318 \begin{align} 319 \text{min}\quad & x_1,\\ 320 \text{s.t.}\quad & g_1\left(x \right) = x_1 + x_2 -2 \le 0 \nonumber\\ 321 &g_2(x) = 1-x_1x_2 \le 0 \nonumber\\ 322 &g_3(x) = -x_1 \le 0\nonumber. 323 &g_4(x) = -x_2 \le 0\nonumber. 324 \end{align} 325 Tangent cone are all tangent vectors $x_2 = \frac{1}{x_1} = 1$ 326 \begin{align} 327 T_X(x^{*}) = \left\{ 328 \begin{pmatrix} \lambda\\-\lambda \end{pmatrix} , 329 \begin{pmatrix} -\lambda\\\lambda \end{pmatrix} 330 : \lambda \ge 0 331 \right\} 332 \end{align} 333 The linearized tangent cone is at 334 \begin{align} 335 T_\text{lin} (x^{*}) &= 336 \left\{ d \in \mathbb{R}^{2}: 337 \begin{pmatrix}1\\1 \end{pmatrix}^{T}d \le 0, 338 \begin{pmatrix}-1\\-1 \end{pmatrix}^{T}d \le 0, 339 \right\} \\ 340 &= 341 \left\{ 342 \begin{pmatrix} \lambda\\-\lambda \end{pmatrix} , 343 \begin{pmatrix} -\lambda\\\lambda \end{pmatrix} 344 : \lambda \ge 0 345 \right\}. 346 \end{align} 347 Which means $x^{*}$ fulfills ABADIE-CQ. For MFCQ we need strict inequality 348 $\nabla g_i (x^{*})^{T}d < 0$ for $i \in \{1, 2\}$, which is not fulfilled for any $d \in 349 T_\text{lin}(x)$. For LICQ we need that $\{\nabla g_i (x^{*})\}_{i\in \{1, 350 2\} }$ are linearly independent. But 351 \begin{align} 352 \begin{pmatrix} 1\\1 \end{pmatrix} , 353 \begin{pmatrix} -1, -1 \end{pmatrix} 354 \end{align} 355 are not linearly independent. 356 \subsection{Exercise 17} 357 Find out by using second order optimality conditions if $x^{*} = (0, 0)^{T}$ 358 is a local minimum of 359 \begin{align} 360 \text{min}\quad & -x_1^{2} + x_2,\\ 361 \text{s.t.}\quad & g_1\left(x \right) = x_1^{3} - x_2 \le 0 \nonumber\\ 362 &g_2(x) = -mx_1+x_2 \le 0 \nonumber\\ 363 \end{align} 364 where $m\ge0$. 365 We need to check that 366 \begin{align} 367 d^{T}\nabla^{2}_x L(x^{*}, \lambda^{*})d >0 \qquad \forall d \in 368 T_2(x^{*}), 369 \end{align} 370 where 371 \begin{align} 372 T_2(x^{*}) = \{d\in \mathbb{R}^{2}: &\nabla g_i(x^{*})d =0\;\; i \in 373 \mathcal{A}_\ge (x^{*}), \\ 374 &\nabla g_i(x^{*})d \le 0\;\; i \in 375 \mathcal{A}_0 (x^{*}) \}\\ 376 \end{align} 377 and 378 \begin{align} 379 \mathcal{A}_0(x^{*}) = \{i \in \mathcal{A}(x^{*}): \lambda_i^{*} = 0\} \\ 380 \mathcal{A}_>(x^{*}) = \{i \in \mathcal{A}(x^{*}): \lambda_i^{*} > 0\} \\ 381 \end{align} 382 The gradients are 383 \begin{align} 384 \nabla g_1 (x)|_{x^{*}} = (0, -1)^{T}\\ 385 \nabla g_2 (x)|_{x^{*}} = (-m, 1)^{T}.\\ 386 \end{align} 387 Note that the KKT conditions $\lambda^{T}g(x)=0$ and $\lambda \geqq 0$ are 388 satisfied only if 389 \begin{align} 390 \lambda^{T}g(x) = \lambda_1(-x_2 + x_1^{3}) + \lambda_2(-mx_1 + x_2) = 391 0\\ 392 \lambda_1 = \lambda_2 \frac{mx_1 - x_2}{x_1^{3} -x_2}, 393 \end{align} 394 since $x_1^{3} - x_2 \le 0$ then the $\lambda^{T} \geqq 0$ is satisfied only 395 if $\lambda 2 = 0$ which means $\lambda_1 =0$. Which gives us 396 $\mathcal{A}_> = \emptyset$ and $\mathcal{A}_0 = \{1, 2\}$ which means 397 $T_2(x^{*}) = T_\text{lin}(x^{*})$ and 398 \begin{align} 399 T_\text{lin}(x^{*}) &= 400 \left\{ 401 d \in \mathbb{R}^{2}: 402 \begin{pmatrix} 403 0\\ 404 -1 405 \end{pmatrix}^{T} 406 d \le 0, 407 \begin{pmatrix} 408 -m\\ 409 -1 410 \end{pmatrix}^{T} 411 d \le 0, 412 \right\} \\ 413 &= 414 \left\{ 415 \begin{pmatrix} 416 \lambda\\ 417 \lambda m 418 \end{pmatrix} 419 :\lambda \ge 0 420 \right\} 421 \end{align} 422 Now we calculate the hessian of the Lagrangian 423 \begin{align} 424 L(x, \lambda) &= f(x) + \lambda_1 g_1(x) + \lambda_2 g_2(x)\\ 425 &= f(x)\\ 426 \nabla^2 L(x, \lambda) &= \nabla^2 f(x)\\ 427 &= 428 \begin{pmatrix} 429 -2 & 0\\ 430 0 & 0 431 \end{pmatrix} 432 \end{align} 433 Then 434 \begin{align} 435 \begin{pmatrix} 436 \lambda\\ 437 \lambda m 438 \end{pmatrix}^{T} 439 \begin{pmatrix} 440 -2 & 0\\ 441 0 & 0 442 \end{pmatrix} 443 \begin{pmatrix} 444 \lambda\\ 445 \lambda m 446 \end{pmatrix} = -2\lambda^{2} \not> 0. 447 \end{align} 448 We conclude that $x^{*}=(0,0)^{T}$ is not a local minimum of the optimization 449 problem. 450 \subsection{Exercise 18} 451 Let $(t_k)_{k\ge 0} \subseteq \mathbb{R}$ be a monotonically decreasing 452 sequence and $t^{*}$ an accumulation point of it. Show that the sequence 453 $(t_k)_{k\ge 0}$ converges to $t^{*}$. 454 \newline 455 We know that $t ^{*}$ is an accumulation point of $(t_k)_{k\ge 0}$ so 456 \begin{align} 457 &\forall U_\varepsilon(t^{*}) = [t^{*}-\varepsilon, t^{*}+\varepsilon], 458 \varepsilon>0 \;\; \forall N\in \mathbb{N} \;\; \exists n \ge N: t_n \in 459 U_\varepsilon(t^{*})\\ 460 &\text{i.e.}\quad |t_n - t^{*}| < \varepsilon \qquad \forall n\ge N \in \mathbb{N} 461 \end{align} 462 since $(t_k)_{k\ge 0}$ monotonically decreasing, $t_0 > t_1 > \ldots> 463 t_k > \ldots $ we have that $\forall n \in N$ 464 \begin{align} 465 \varepsilon_n > |t_n - t^{*} | > |t_{n+1} - t^{*}| 466 \end{align} 467 so there exists a positive, strictly monotonically decreasing subsequence $\left( 468 \varepsilon_k \right)_{k\ge 0}$ of $(t_k)_{k\ge 0}$ defined by $\varepsilon_n > 469 |t_n - t^{*}|$ and $\varepsilon_n > \varepsilon_{n+1}$ that converges to $0$ 470 so $(t_k)_{k\ge 0}$ converges to $t^{*}$. 471 \end{document}