sesh7.tex (15361B)
1 \include{./preamble.tex} 2 3 \begin{document} 4 \maketitle 5 \tableofcontents 6 7 \section{Sheet 7} 8 \subsection{Exercise 43} 9 Consider the optimization problem 10 \begin{align} 11 \text{min}\quad &f(x) := (x_1+1)^{2}+(x_2+2)^2,\\ 12 \text{s.t.}\quad & g_1(x) := -x_1 \le 0 \nonumber\\ 13 & g_2(x) := -x_2 \le 0 \nonumber 14 \end{align} 15 with $x = (x_1,x_2)^{T}$. For $\alpha >0$, find the minimum $x^{*}(\alpha)$ 16 of the penalty function 17 \begin{align} 18 P(x;\alpha) := f(x) + \frac{\alpha}{2} \|g_+(x)\|^2 19 \end{align} 20 and the limit points $x^{*}=\lim_{\alpha \to +\infty} x^{*}(\alpha)$ and 21 $\lambda^{*} = \lim_{\alpha \to +\infty}\alpha g_+ (x^{*}(\alpha))$. Find out 22 if $(x^{*}, \lambda^{*})$ is a KKT point of the constrained optimization 23 problem. 24 \newline 25 First we find the minimum of $P(x;\alpha)$. 26 \begin{align} 27 \nabla P(x;\alpha) 28 = \nabla f(x) + \frac{\alpha}{2}\left(\nabla\left(\max(0,-x_1)\right)^{2} 29 +\nabla \left( \max\left(0,-x_2\right) \right)^{2} \right) 30 \end{align} 31 since $\frac{\partial }{\partial x_i} \max(0, -x_i)^2$ is $2x_i$ for $x_i < 0$ 32 and $0$ otherwise for all $i=1,2$, so we have the equations 33 \begin{align} 34 \nabla P(x;\alpha) = 35 \begin{pmatrix} 36 2(x_1+1)\\ 37 2(x_2+2) 38 \end{pmatrix} 39 + \alpha 40 \begin{pmatrix} 41 x_1\\ 42 x_2 43 \end{pmatrix}= 0 44 \end{align} 45 which gives 46 \begin{align} 47 x^{*}(\alpha) = 48 \begin{pmatrix} 49 -2(2+\alpha)^{-1}\\ 50 -4(2+\alpha)^{-1} 51 \end{pmatrix}\\ 52 x^{*} = \lim_{\alpha \to +\infty}x^{*}(\alpha)= 53 \begin{pmatrix} 0\\0 \end{pmatrix} 54 \end{align} 55 and 56 \begin{align} 57 \lambda^{*} 58 &= \lim_{\alpha \to +\infty}\alpha g_+(x^{*}(\alpha))\\ 59 &= \lim_{\alpha \to +\infty} 60 \begin{pmatrix} 61 \max(0, \frac{2\alpha}{\left(2+\alpha\right) }\\ 62 \max(0, \frac{4\alpha}{\left(2+\alpha\right) } 63 \end{pmatrix}\\ 64 &= 65 \begin{pmatrix}2 \\ 4 \end{pmatrix}. 66 \end{align} 67 All that is left is to show that $(x^{*}, \lambda^{*})$ is a KKT point by 68 $\nabla_x L(x^{*},\lambda^{*}) =0$ 69 \begin{align} 70 &\nabla f(x^{*}) + \lambda_1^{*} \nabla g_1(x^{*}) + \lambda_2^{*}\nabla 71 g_2(x^{*}) = \\ 72 &= 73 \begin{pmatrix} 2\\ 4 \end{pmatrix} 74 + \begin{pmatrix} -2 \\ -4 \end{pmatrix}\\ 75 &=\begin{pmatrix} 0\\0 \end{pmatrix}, 76 \end{align} 77 we conclude that $\left(x^{*}=(0,0)^{T}, \lambda^{*} = (2, 4)^{T}\right)$ is a 78 KKT point. 79 \subsection{Exercise 44} 80 Consider the optimization problem 81 \begin{align} 82 \text{min}\quad &f(x) := x^{2},\\ 83 \text{s.t.}\quad & g(x) := 1-\ln(x)\le 0 \nonumber 84 \end{align} 85 and the penalized optimization problem 86 \begin{align} 87 \min_{x\in\mathbb{R}} P(x;\alpha) = f(x) + \alpha \phi\left( 88 \frac{g(x)}{\alpha} \right). 89 \end{align} 90 with $\phi(t) = e^{t} -1$ (\textit{exponential penalty function}). For 91 $\alpha >0$ find the optimal solution $x^{*}\left( \alpha \right)$ of the 92 penalized optimization problem and prove $ x^{*}$, the limit of 93 $x^{*}(\alpha)$ as $\alpha \downarrow 0$ is an optimal solution of the 94 constrained optimization problem. 95 \newline 96 To find the minimum we differentiate $P(x;\alpha)$ w.r.t $x$ 97 \begin{align} 98 \frac{d}{dx}P(x;\alpha) = \frac{d}{dx}\left( x^{2} + \alpha\left( 99 \exp\left( \frac{1-\ln(x)}{\alpha} \right) -1 \right) \right) =\\ 100 &= 2x - e^{\frac{1}{\alpha}}x^{-\frac{\alpha+1}{\alpha}} 101 \end{align} 102 setting to $0$ give the equation 103 \begin{align} 104 &x^{-\frac{\alpha+1}{\alpha}} = 2e^{-\frac{1}{\alpha}}x\\ 105 &x^{*}(\alpha) = \left( \frac{1}{2}e^{\frac{1}{\alpha}} 106 \right)^{\frac{\alpha}{2\alpha+1} }. 107 \end{align} 108 Then 109 \begin{align} 110 x^{*} = \lim_{\alpha \downarrow 0}x^{*}(\alpha) = e. 111 \end{align} 112 First of all $f(x) = x^{2}$ is strictly convex and the condition $1-\ln(x)\le 113 0$ is equivalent the condition $x \ge e$. So there is no $y \in \{x\in 114 \mathbb{R}: x > e\}$ such that $f(y) = y^2 < e^2$. We conclude that $x^{*}=e$ 115 is the optimal solution for the constrained optimization problem. 116 \subsection{Exercise 45} 117 Consider the optimization problem 118 \begin{align} 119 \text{min}\quad &f(x) := x^{2},\\ 120 \text{s.t.}\quad & h(x) := x-1= 0 \nonumber 121 \end{align} 122 and its optimal solution $x^{*}=1$. For $\overline{\alpha}>0$ such that 123 $x^{*}$ is a minimum of the $\ell_1$-penalty function $P_1(\cdot, \alpha)$ 124 for all $\alpha \ge \overline{\alpha}$. 125 We have that for $\overline{\alpha}$ and $x^{*}$ and some $x \in \mathbb{R}$ that 126 \begin{align} 127 &P_1(x;\overline{\alpha}) < P_1(x^{*},\overline{\alpha})\\ 128 &x^{2} + \overline{\alpha} |x-1| < 1 \qquad \big|\frac{d}{dx}\cdot\\ 129 &2x + \overline{\alpha} \frac{x-1}{|x-1|} < 0\\ 130 &\alpha < -\frac{2x|x-1|}{x-1} \longrightarrow 2 \quad \text{as 131 $x\downarrow 1$ } 132 \end{align} 133 so $\alpha \ge 2$. 134 \subsection{Exercise 46} 135 Consider the optimization problem in Exercise 40 136 \begin{align} 137 \min \quad &f(x):=\gamma + c^{T}x + \frac{1}{2}x^{T}Qx, \label{eq: opp}\\ 138 \text{s.t}\quad &h(x) := b^{T}x = 0, \nonumber 139 \end{align} 140 The penalized optimization problem 141 \begin{align} 142 P(x;\alpha) := f(x) + \frac{\alpha}{2}\left( h(x) \right)^{2} 143 \end{align} 144 with solution 145 \begin{align} 146 x^{*}(\alpha) = \left( \frac{\alpha}{1+\alpha b^{T}Q^{-1}b}Q^{-1} bb^{T} 147 -I\right) Q^{-1} c 148 \end{align} 149 and solution to the constrained optimization problem 150 \begin{align} 151 x^{*} &= \lim_{\alpha \to \infty}x^{*}(\alpha)\\ 152 &= \left(\frac{Q^{-1}bb^{T}}{b^{T}Q^{-1}b} -I \right) Q^{-1} c. 153 \end{align} 154 \subsubsection{Part a} 155 Prove that 156 \begin{align} 157 \mu^{*} := \lim_{\alpha \to +\infty}\alpha h(x^{*}(\alpha)) 158 \end{align} 159 is a Lagrange multiplier corresponding to the optimal solution $x^{*}$. 160 \begin{align} 161 \alpha h(x^{*}(\alpha)) 162 &= \alpha b^{T}x^{*}(\alpha)\\ 163 &= \alpha \left( \frac{\alpha}{1+\alpha b^{T}Q^{-1}b}b^{T}Q^{-1}b\ b^{T} 164 -b^{T}\right) Q^{-1}c\\ 165 &= \left( \frac{\alpha^{2}}{1+\alpha b^{T}Q^{-1}b}b^{T}Q^{-1}b 166 - \alpha\right) b^{T}Q^{-1}c\\ 167 &= \left( \frac{\alpha^{2} b^{T}Q^{-1}b - \alpha - 168 \alpha^{2}b^{T}Q^{-1}b}{1+\alpha b^{T}Q^{-1}b} \right) b^{T}Q^{-1}c\\ 169 &= \left(\frac{-\alpha}{1+\alpha b^{T}Q^{-1}b} \right) b^{T}Q^{-1}c\\ 170 \end{align} 171 then we let the $\alpha \to +\infty$ and we get 172 \begin{align} 173 \mu^{*} = -\frac{b^{T}Q^{-1}c}{b^TQ^{-1}b}. 174 \end{align} 175 Now we check if $\mu^{*}$ is the Lagrange multiplier w.r.t $x^{*}$. 176 \begin{align} 177 L(x, \mu) 178 &= f(x) + \mu h(x)\\ 179 &= \gamma + c^{T}x + \frac{1}{2}x^{T}Qx + \mu b^{T}x, 180 \end{align} 181 we need the condition $\nabla L(x^{*},\mu^{*}) = 0$, which is satisfied if 182 \begin{align} 183 &\nabla L(x^{*}, \mu^{*}) = c + Qx^{*} + \mu^{*}b = 0 \qquad \Big| 184 b^{T}Q^{-1}\\ 185 &-\mu^{*}b^{T}Q^{-1}b = b^{T}Q^{-1}c + b^{T}x^{*}.\\ 186 \end{align} 187 we know that $b^{T}x^{*}=0$ is satisfied then 188 \begin{align} 189 \mu^{*} = -\frac{b^{T}Q^{-1}c}{b^TQ^{-1}b}. 190 \end{align} 191 which is the same as taking the limit. 192 \subsubsection{Part b} 193 A bit confused here. 194 \subsection{Exercise 47} 195 Prove that the following functions are NCP-functions. 196 \begin{enumerate} 197 \item minimum function 198 \begin{align} 199 \varphi(a,b) = \min \{a, b\} 200 \end{align} 201 \item Fischer-Burgmeister function 202 \begin{align} 203 \varphi(a,b) = \sqrt{a^{2}+b^{2}} - a - b 204 \end{align} 205 \item penalized minimum function 206 \begin{align} 207 \varphi(a,b) = 2\lambda \min \{a, b\} + (1-\lambda) a_+ b_+ 208 \end{align} 209 where $a_+ = \max \{0, a\}$, $b_+ = \max \{0, b\} $ and $\lambda \in 210 (0, 1)$ 211 \end{enumerate} 212 For 1. we have that $\min \{a, b\} =0$ if 213 \begin{align} 214 &\Leftrightarrow a=0 \quad \text{for}\quad b\ge 0\quad \text{then}\quad 215 ab =0\\ 216 &\Leftrightarrow b=0 \quad \text{for}\quad a\ge 0\quad \text{then}\quad 217 ab =0. 218 \end{align} 219 The minimum function is an NCP-function 220 \newline 221 For 2. we have 222 \begin{align} 223 \varphi(a,b) = \sqrt{a^{2}+b^{2}} - a -b =0 224 \end{align} 225 then 226 \begin{align} 227 a^{2} + b^{2} = (a+b)^{2}, 228 \end{align} 229 here we need $a\ge 0$ and $b\ge 0$ to preserve the root. Solving the above we 230 get $2ab =0$ or simply $ab =0$, which means $\varphi$ is an NCP-function 231 \newline 232 For 3. we have that 233 \begin{align} 234 &\varphi(a,b) = -2\lambda \min (a,b) + (1-\lambda) \max(0, a) \max(0,b) = 0\\ 235 &-2\lambda \min (a,b) = (1-\lambda) \max(0, a) \max(0, b) = 0. 236 \end{align} 237 The solution is either $a = 0$ with $b \ge 0$ or $b=0$ with $a\ge 0$ in the 238 first case we get that $a\cdot b=0$, which means this is an NCP function. 239 \subsection{Exercise 48} 240 Let $(x^{*}, \lambda^{*}, \mu^{*}) \in \mathbb{R}^{n + m + p}$ be a 241 KKT point of the optimization problem. 242 \begin{align} 243 \text{min}\quad &f(x),\\ 244 \text{s.t.}\quad & g_i(x) \le 0, i=1,\ldots,m \nonumber 245 &h_j(x) =0, j=1,\ldots,p \nonumber 246 \end{align} 247 all functions are considered to be twice continuously differentiable. 248 Additionally we have that 249 \begin{itemize} 250 \item $g_i(x^{*}) + \lambda^{*}_i \neq 0$ for all $i=1,\ldots,p$ 251 \item $\{\nabla g_i(x^{*})\}_{i\in \mathcal{A}(x^{*})}$ and $\{\nabla 252 h_j(x^{*})\}_{j=1,\ldots,p}$ are linearly independent (LICQ) 253 \item second order sufficient optimality condition is satisfied 254 \end{itemize} 255 Let $\Phi: \mathbb{R}^{n+m+p}\to \mathbb{R}^{n+m+p}$ be defined as 256 \begin{align} 257 \Phi := 258 \begin{pmatrix} 259 \nabla_{x x} L(x,\lambda ,\mu)\\ 260 h(x)\\ 261 \phi(-g(x), \lambda) 262 \end{pmatrix} 263 \end{align} 264 where 265 \begin{align} 266 \phi(-g(x), \lambda) := 267 \begin{pmatrix} 268 \varphi(-g_1(x), \lambda_1)\\ 269 \vdots\\ 270 \varphi(-g_m(x), \lambda_1)\\ 271 \end{pmatrix} \in \mathbb{R}^{m} 272 \end{align} 273 and $\varphi:\mathbb{R}^{2} \to \mathbb{R}$ with $\varphi(a,b) = \min \{a, 274 b\}$. Show that the matrix $\nabla \Phi$ is well defined and regular. 275 \newline 276 The matrix is well defined because first of all, the functions $f, g_i, h_j$ 277 are $C^{2}$ and $\min \{-g_i(x), \lambda_i\}$ is differentiable because of 278 the strict complementarity condition, meaning that 279 \begin{align} 280 \nabla \varphi(a, b) = 281 \begin{cases} 282 (1, 0)^{T}\quad a<b\\ 283 (0, 1)^{T}\quad a>b 284 \end{cases} 285 \end{align} 286 and not differentiable for $a=b$ which is never the case since, per condition 287 $g_i(x^{*}) + \lambda_i^{*} \neq 0$ for all $i$. 288 289 Then we need to show that he matrix $\nabla \Phi(x^{*}, \lambda^{*}, 290 \mu^{*})$ is regular, first of all the matrix has the following form 291 \begin{align} 292 \nabla \Phi = 293 \begin{pmatrix} 294 \nabla_{x x}^{2}L(x^{*},\lambda^{*}, \mu^{*}) & \nabla g(x^{*})^{T} & \nabla 295 h(x^{*})^{T}\\ 296 \nabla h(x^{*}) & 0 & 0\\ 297 \nabla \phi_1(-g(x^{*}), \lambda^{*}) &\nabla \phi_2(-g(x^{*}), \lambda^{*}) & 0\\ 298 \end{pmatrix} \in \mathbb{R}^{(n+m+p) \times (n+m+p)}. 299 \end{align} 300 For convinience the matrix $\phi(-g(x^{*}), \lambda^{*})$ was split into two 301 parts because of the chain rule the that matrix has the following form 302 \begin{align} 303 \phi_1(-g(x^{*}),\lambda^{*}) = 304 \begin{pmatrix} 305 \partial_{g_1}\varphi(-g_1(x^{*}), 306 \lambda_1)(\partial_{x_1}g_1(x^{*})) & \dots & 307 \partial_{g_1}\varphi(-g_1(x^{*}), 308 \lambda_1)(\partial_{x_n}g_1(x^{*}))\\ 309 \vdots & \vdots & \vdots \\ 310 \partial_{g_m}\varphi(-g_m(x^{*}), 311 \lambda_m)(\partial_{x_1}g_m(x^{*})) & \dots & 312 \partial_{g_m}\varphi(-g_m(x^{*}), 313 \lambda_m)(\partial_{x_n}g_1(x^{*})) 314 \end{pmatrix} \quad \in \mathbb{R}^{n \times m} 315 \end{align} 316 and 317 \begin{align} 318 \phi_2(-g(x^{*}),-\lambda^{*}) = 319 \begin{pmatrix} 320 \partial_{\lambda_1}\varphi(-g_1(x^{*}), 321 \lambda_1^{*})&0&\ldots\\ 322 & \ddots& \\ 323 \ldots &0& 324 \partial_{\lambda_m}\varphi(-g_m(x^{*}), 325 \lambda_m^{*}) 326 \end{pmatrix} \quad \in \mathbb{R}^{m \times m} 327 \end{align} 328 To show that $\nabla \Phi(x^{*}, \lambda^{*}, 329 \mu^{*})$ is regular we show that $\text{ker}\left(\nabla \Phi(x^{*}, \lambda^{*}, 330 \mu^{*}) \right) = \emptyset$. 331 \newline 332 Let $ q = (q^{(1)}, q^{(2)}, q^{(3)})^{T} \in 333 \mathbb{R}^{n+m+p}$ then we need to find the solution of 334 \begin{align} 335 \nabla \Phi(x^{*}, \lambda^{*}, \mu^{*}) \begin{pmatrix} 336 q^{(1)}\\q^{(2)}\\q^{(3)} \end{pmatrix} =0. 337 \end{align} 338 These are three equations 339 \begin{align} 340 &\nabla_x^{2}L(x^{*},\lambda^{*},\mu^{*})q^{(1)} + \nabla g(x^{*})^{T} q^{(2)} 341 + \nabla h(x^{*})^{T}q^{(3)} = 0 \label{eq: ex49.1}\\ 342 &\nabla h(x^{*}) q^{(1)} = 0 \label{eq: ex49.2}\\ 343 &\nabla \phi_1(-g(x^{*}), \lambda^{*}) q^{(1)} + \nabla 344 \phi_2(-g(^{*}),\lambda^{*}) q^{(2)} = 0 \label{eq: ex49.3}. 345 \end{align} 346 By multiplying \ref{eq: ex49.1} with $(q^{(1)})^{T}$ we get that 347 \begin{align} 348 &(q^{(1)})^{T}\nabla_x^{2}L(x^{*},\lambda^{*},\mu^{*})q^{(1)}\\ 349 &+ \sum_{i=1}^{m} q_i^{(3)}\underbrace{(q^{(1)})^{T}\nabla 350 g_i(x^{*})}_{=0 \;\; (\ref{eq: ex49.3})} \label{eq: ex49.4} \\ 351 &+ \sum_{j=1}^{p} q_j^{(2)}\underbrace{(q^{(1)})^{T}\nabla h_j(x^{*})}_{=0 352 \;\; (\ref{eq: ex49.2})}\\ 353 &= 0. \nonumber 354 \end{align} 355 It is not directly obvious why the term \ref{eq: ex49.4} in the above 356 equation is directly zero. To see this we have to separate two cases, the first 357 addresses what happens in \ref{eq: ex49.3} in the case $i \in 358 \mathcal{A}(x^{*})$. In this case $\nabla_{g_i}\varphi(-g_i(x^{*}), 359 \lambda_i^{*}) = 1$ since $g_i(x^{*}) =0$ with $\lambda_i^{*}>0$ and thereby 360 $-g_i(^{*}) - \lambda_i^{*} < 0$, so we have that the this specific entry is 361 \begin{align} 362 &\left(\nabla \phi_1(-g(x^{*}),\lambda^{*})\right)_i = -\nabla g_i(x^{*})\\ 363 &\left( \nabla \phi_2(-g(x^{*}),\lambda^{*})\right)_i = 0, 364 \end{align} 365 evaluating the equation in \ref{eq: ex49.3} we get 366 \begin{align} 367 -\nabla g_i(x^{*})^{T}q^{(1)} = 0 \qquad \forall i\in\mathcal{A}(x^{*}). 368 \end{align} 369 In the other case $i \not\in \mathcal{A}(x^{*})$, $g_i(x^{*}) < 0$ with 370 $\lambda^{*} = 0$ and thereby $-g_i(^{*}) - \lambda_i^{*} >0$ so 371 $\varphi(-g_i(x^{*}),\lambda^{*}) = \lambda_i^{*}$. The entries of the matrix 372 are 373 \begin{align} 374 &\left(\nabla \phi_1(-g(x^{*}),\lambda^{*})\right)_i = 0\\ 375 &\left( \nabla \phi_2(-g(x^{*}),\lambda^{*})\right)_i = 376 (0,\ldots,0,\underbrace{1}_{i},0,\ldots,0)^{T}, 377 \end{align} 378 evaluating the equation in \ref{eq: ex49.3} we get 379 \begin{align} 380 q_i^{(2)} = 0 \qquad \forall i \not\in \mathcal{A}(x^{*}). 381 \end{align} 382 Both cases contribute to the fact that the sum evaluates to 0 in term 383 \ref{eq: ex49.4}. In summary we are left with 384 \begin{align} 385 (q^{(1)})^{T}\nabla_x^{2}L(x^{*},\lambda^{*},\mu^{*})q^{(1)} =0. 386 \end{align} 387 Since second order sufficient optimality condition is satisfied then $q^{(1)} 388 \in T_2(x^{*})$, and the only solution is $q^{(1)} = 0$. Equation \ref{eq: 389 ex49.1} is left with 390 \begin{align} 391 &\nabla g(x^{*})^{T}q^{(2)}+\nabla h(x^{*})q^{(3)} =\\ 392 =& \sum_{i=1}^{m} q_i^{(2)}\nabla g_i(x^{*}) 393 + \sum_{j=1}^{p} q_j^{(3)} \nabla h_j(x^{*}) =\\ 394 =& \sum_{i \in \mathcal{A}(x^{*})} q_i^{(2)}\nabla g_i(x^{*}) 395 + \sum_{j=1}^{p} q_j^{(3)} \nabla h_j(x^{*}) = 0 396 \end{align} 397 in the last equation we remove all $q^{(2)}_i = 0$ which are exactly all $i 398 \not\in \mathcal{A}(x^{*})$. Since LICQ is fulfilled these vectors are 399 linearly independent and by definition of linear independence the only 400 $q^{(2)}, q^{(3)}$ fulfilling the above condition are $q^{(2)} = 0$ and 401 $q^{(3)} = 0$. Thereby $q = 0$ and 402 $\text{ker}(\nabla\Phi(x^{*},\lambda^{*},\mu^{*})) = \emptyset$, so the 403 matrix is regular. 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 \end{document}