notes

uni notes
git clone git://popovic.xyz/notes.git
Log | Files | Refs

sesh7.tex (15361B)


      1 \include{./preamble.tex}
      2 
      3 \begin{document}
      4 \maketitle
      5 \tableofcontents
      6 
      7 \section{Sheet 7}
      8 \subsection{Exercise 43}
      9 Consider the optimization problem
     10 \begin{align}
     11     \text{min}\quad &f(x) := (x_1+1)^{2}+(x_2+2)^2,\\
     12     \text{s.t.}\quad & g_1(x) := -x_1 \le 0 \nonumber\\
     13     & g_2(x) := -x_2 \le 0 \nonumber
     14 \end{align}
     15 with $x = (x_1,x_2)^{T}$. For $\alpha >0$, find the minimum $x^{*}(\alpha)$
     16 of the penalty function
     17 \begin{align}
     18     P(x;\alpha) := f(x) + \frac{\alpha}{2} \|g_+(x)\|^2
     19 \end{align}
     20 and the limit points $x^{*}=\lim_{\alpha \to +\infty} x^{*}(\alpha)$ and
     21 $\lambda^{*} = \lim_{\alpha \to +\infty}\alpha g_+ (x^{*}(\alpha))$. Find out
     22 if $(x^{*}, \lambda^{*})$ is a KKT point of the constrained optimization
     23 problem.
     24 \newline
     25 First we find the minimum of $P(x;\alpha)$.
     26 \begin{align}
     27     \nabla P(x;\alpha)
     28     = \nabla f(x) + \frac{\alpha}{2}\left(\nabla\left(\max(0,-x_1)\right)^{2}
     29     +\nabla \left( \max\left(0,-x_2\right) \right)^{2} \right)
     30 \end{align}
     31 since $\frac{\partial }{\partial x_i} \max(0, -x_i)^2$ is $2x_i$ for $x_i < 0$
     32 and $0$ otherwise for all $i=1,2$, so we have the equations
     33 \begin{align}
     34     \nabla P(x;\alpha) =
     35     \begin{pmatrix}
     36         2(x_1+1)\\
     37         2(x_2+2)
     38     \end{pmatrix}
     39     + \alpha
     40     \begin{pmatrix}
     41         x_1\\
     42         x_2
     43     \end{pmatrix}= 0
     44 \end{align}
     45 which gives
     46 \begin{align}
     47     x^{*}(\alpha) =
     48     \begin{pmatrix}
     49         -2(2+\alpha)^{-1}\\
     50         -4(2+\alpha)^{-1}
     51     \end{pmatrix}\\
     52     x^{*} = \lim_{\alpha \to +\infty}x^{*}(\alpha)=
     53     \begin{pmatrix} 0\\0 \end{pmatrix}
     54 \end{align}
     55 and
     56 \begin{align}
     57     \lambda^{*}
     58     &= \lim_{\alpha \to +\infty}\alpha g_+(x^{*}(\alpha))\\
     59     &= \lim_{\alpha \to +\infty}
     60     \begin{pmatrix}
     61         \max(0, \frac{2\alpha}{\left(2+\alpha\right) }\\
     62         \max(0, \frac{4\alpha}{\left(2+\alpha\right) }
     63     \end{pmatrix}\\
     64     &=
     65     \begin{pmatrix}2 \\ 4 \end{pmatrix}.
     66 \end{align}
     67 All that is left is to show that $(x^{*}, \lambda^{*})$ is a KKT point by
     68 $\nabla_x L(x^{*},\lambda^{*}) =0$
     69 \begin{align}
     70     &\nabla f(x^{*}) + \lambda_1^{*} \nabla g_1(x^{*}) + \lambda_2^{*}\nabla
     71     g_2(x^{*}) = \\
     72     &=
     73     \begin{pmatrix} 2\\ 4 \end{pmatrix}
     74     + \begin{pmatrix} -2 \\ -4 \end{pmatrix}\\
     75     &=\begin{pmatrix} 0\\0 \end{pmatrix},
     76 \end{align}
     77 we conclude that $\left(x^{*}=(0,0)^{T}, \lambda^{*} = (2, 4)^{T}\right)$ is a
     78 KKT point.
     79 \subsection{Exercise 44}
     80 Consider the optimization problem
     81 \begin{align}
     82     \text{min}\quad &f(x) := x^{2},\\
     83     \text{s.t.}\quad & g(x) := 1-\ln(x)\le 0 \nonumber
     84 \end{align}
     85 and the penalized optimization problem
     86 \begin{align}
     87     \min_{x\in\mathbb{R}} P(x;\alpha) = f(x) + \alpha \phi\left(
     88     \frac{g(x)}{\alpha} \right).
     89 \end{align}
     90 with $\phi(t) = e^{t} -1$ (\textit{exponential penalty function}). For
     91 $\alpha >0$ find the optimal solution $x^{*}\left( \alpha \right)$ of the
     92 penalized optimization problem and prove $ x^{*}$, the limit of
     93 $x^{*}(\alpha)$ as $\alpha \downarrow 0$ is an optimal solution of the
     94 constrained optimization problem.
     95 \newline
     96 To find the minimum we differentiate $P(x;\alpha)$ w.r.t $x$
     97 \begin{align}
     98     \frac{d}{dx}P(x;\alpha) = \frac{d}{dx}\left( x^{2} + \alpha\left(
     99     \exp\left( \frac{1-\ln(x)}{\alpha} \right)  -1 \right)  \right) =\\
    100     &= 2x - e^{\frac{1}{\alpha}}x^{-\frac{\alpha+1}{\alpha}}
    101 \end{align}
    102 setting to $0$ give the equation
    103 \begin{align}
    104     &x^{-\frac{\alpha+1}{\alpha}} = 2e^{-\frac{1}{\alpha}}x\\
    105     &x^{*}(\alpha) = \left( \frac{1}{2}e^{\frac{1}{\alpha}}
    106     \right)^{\frac{\alpha}{2\alpha+1} }.
    107 \end{align}
    108 Then
    109 \begin{align}
    110     x^{*} = \lim_{\alpha \downarrow 0}x^{*}(\alpha) = e.
    111 \end{align}
    112 First of all $f(x) = x^{2}$ is strictly convex and the condition $1-\ln(x)\le
    113 0$ is equivalent the condition $x \ge e$. So there is no $y \in \{x\in
    114 \mathbb{R}: x > e\}$ such that $f(y) = y^2 < e^2$. We conclude that $x^{*}=e$
    115 is the optimal solution for the constrained optimization problem.
    116 \subsection{Exercise 45}
    117 Consider the optimization problem
    118 \begin{align}
    119     \text{min}\quad &f(x) := x^{2},\\
    120     \text{s.t.}\quad & h(x) := x-1= 0 \nonumber
    121 \end{align}
    122 and its optimal solution $x^{*}=1$. For $\overline{\alpha}>0$ such that
    123 $x^{*}$ is a minimum of the $\ell_1$-penalty function $P_1(\cdot, \alpha)$
    124 for all $\alpha \ge \overline{\alpha}$.
    125 We have that for $\overline{\alpha}$ and $x^{*}$ and some $x \in \mathbb{R}$ that
    126 \begin{align}
    127    &P_1(x;\overline{\alpha}) < P_1(x^{*},\overline{\alpha})\\
    128    &x^{2} + \overline{\alpha} |x-1| < 1  \qquad \big|\frac{d}{dx}\cdot\\
    129    &2x + \overline{\alpha} \frac{x-1}{|x-1|} < 0\\
    130    &\alpha < -\frac{2x|x-1|}{x-1} \longrightarrow 2 \quad \text{as
    131    $x\downarrow 1$ }
    132 \end{align}
    133 so $\alpha \ge 2$.
    134 \subsection{Exercise 46}
    135 Consider the optimization problem in Exercise 40
    136 \begin{align}
    137     \min \quad &f(x):=\gamma + c^{T}x + \frac{1}{2}x^{T}Qx, \label{eq: opp}\\
    138     \text{s.t}\quad &h(x) := b^{T}x = 0, \nonumber
    139 \end{align}
    140 The penalized optimization problem
    141 \begin{align}
    142     P(x;\alpha) := f(x) + \frac{\alpha}{2}\left( h(x) \right)^{2}
    143 \end{align}
    144 with solution
    145 \begin{align}
    146     x^{*}(\alpha) = \left( \frac{\alpha}{1+\alpha b^{T}Q^{-1}b}Q^{-1} bb^{T}
    147     -I\right) Q^{-1} c
    148 \end{align}
    149 and solution to the constrained optimization problem
    150 \begin{align}
    151     x^{*} &= \lim_{\alpha \to \infty}x^{*}(\alpha)\\
    152           &= \left(\frac{Q^{-1}bb^{T}}{b^{T}Q^{-1}b} -I \right) Q^{-1} c.
    153 \end{align}
    154 \subsubsection{Part a}
    155 Prove that
    156 \begin{align}
    157     \mu^{*} := \lim_{\alpha \to +\infty}\alpha h(x^{*}(\alpha))
    158 \end{align}
    159 is a Lagrange multiplier corresponding to the optimal solution $x^{*}$.
    160 \begin{align}
    161     \alpha h(x^{*}(\alpha))
    162     &= \alpha b^{T}x^{*}(\alpha)\\
    163     &= \alpha \left( \frac{\alpha}{1+\alpha b^{T}Q^{-1}b}b^{T}Q^{-1}b\ b^{T}
    164     -b^{T}\right) Q^{-1}c\\
    165     &= \left( \frac{\alpha^{2}}{1+\alpha b^{T}Q^{-1}b}b^{T}Q^{-1}b
    166      - \alpha\right) b^{T}Q^{-1}c\\
    167     &= \left( \frac{\alpha^{2} b^{T}Q^{-1}b - \alpha -
    168     \alpha^{2}b^{T}Q^{-1}b}{1+\alpha b^{T}Q^{-1}b} \right) b^{T}Q^{-1}c\\
    169     &= \left(\frac{-\alpha}{1+\alpha b^{T}Q^{-1}b} \right) b^{T}Q^{-1}c\\
    170 \end{align}
    171 then we let the $\alpha \to +\infty$ and we get
    172 \begin{align}
    173     \mu^{*} = -\frac{b^{T}Q^{-1}c}{b^TQ^{-1}b}.
    174 \end{align}
    175 Now we check if $\mu^{*}$ is the Lagrange multiplier w.r.t $x^{*}$.
    176 \begin{align}
    177     L(x, \mu)
    178     &= f(x) + \mu h(x)\\
    179     &= \gamma + c^{T}x + \frac{1}{2}x^{T}Qx + \mu b^{T}x,
    180 \end{align}
    181 we need the condition $\nabla L(x^{*},\mu^{*}) = 0$, which is satisfied if
    182 \begin{align}
    183     &\nabla L(x^{*}, \mu^{*}) = c + Qx^{*} + \mu^{*}b = 0 \qquad \Big|
    184     b^{T}Q^{-1}\\
    185     &-\mu^{*}b^{T}Q^{-1}b = b^{T}Q^{-1}c + b^{T}x^{*}.\\
    186 \end{align}
    187 we know that $b^{T}x^{*}=0$ is satisfied then
    188 \begin{align}
    189     \mu^{*} = -\frac{b^{T}Q^{-1}c}{b^TQ^{-1}b}.
    190 \end{align}
    191 which is the same as taking the limit.
    192 \subsubsection{Part b}
    193 A bit confused here.
    194 \subsection{Exercise 47}
    195 Prove that the following functions are NCP-functions.
    196 \begin{enumerate}
    197     \item minimum function
    198         \begin{align}
    199             \varphi(a,b) = \min \{a, b\}
    200         \end{align}
    201     \item Fischer-Burgmeister function
    202         \begin{align}
    203             \varphi(a,b) = \sqrt{a^{2}+b^{2}} - a - b
    204         \end{align}
    205     \item penalized minimum function
    206         \begin{align}
    207             \varphi(a,b) = 2\lambda \min \{a, b\}  + (1-\lambda) a_+ b_+
    208         \end{align}
    209         where $a_+ = \max \{0, a\}$, $b_+ = \max \{0, b\} $ and $\lambda \in
    210         (0, 1)$
    211 \end{enumerate}
    212 For 1. we have that $\min \{a, b\}  =0$ if
    213 \begin{align}
    214     &\Leftrightarrow a=0 \quad \text{for}\quad b\ge 0\quad \text{then}\quad
    215     ab =0\\
    216     &\Leftrightarrow b=0 \quad \text{for}\quad a\ge 0\quad \text{then}\quad
    217     ab =0.
    218 \end{align}
    219 The minimum function is an NCP-function
    220 \newline
    221 For 2. we have
    222 \begin{align}
    223     \varphi(a,b) = \sqrt{a^{2}+b^{2}} - a -b =0
    224 \end{align}
    225 then
    226 \begin{align}
    227     a^{2} + b^{2} = (a+b)^{2},
    228 \end{align}
    229 here we need $a\ge 0$ and $b\ge 0$ to preserve the root. Solving the above we
    230 get $2ab =0$ or simply $ab =0$, which means $\varphi$ is an NCP-function
    231 \newline
    232 For 3. we have that
    233 \begin{align}
    234     &\varphi(a,b) = -2\lambda \min (a,b) + (1-\lambda) \max(0, a) \max(0,b) = 0\\
    235     &-2\lambda \min (a,b) = (1-\lambda) \max(0, a) \max(0, b) = 0.
    236 \end{align}
    237 The solution is either $a = 0$ with $b \ge 0$ or $b=0$ with $a\ge 0$ in the
    238 first case we get that $a\cdot b=0$, which means this is an NCP function.
    239 \subsection{Exercise 48}
    240 Let $(x^{*}, \lambda^{*}, \mu^{*}) \in \mathbb{R}^{n + m + p}$ be a
    241 KKT point of the optimization problem.
    242 \begin{align}
    243     \text{min}\quad &f(x),\\
    244     \text{s.t.}\quad & g_i(x) \le 0, i=1,\ldots,m \nonumber
    245                      &h_j(x) =0, j=1,\ldots,p \nonumber
    246 \end{align}
    247 all functions are considered to be twice continuously differentiable.
    248 Additionally we have that
    249 \begin{itemize}
    250     \item $g_i(x^{*}) + \lambda^{*}_i \neq 0$ for all $i=1,\ldots,p$
    251     \item $\{\nabla g_i(x^{*})\}_{i\in \mathcal{A}(x^{*})}$ and $\{\nabla
    252         h_j(x^{*})\}_{j=1,\ldots,p}$ are linearly independent (LICQ)
    253     \item second order sufficient optimality condition is satisfied
    254 \end{itemize}
    255 Let $\Phi: \mathbb{R}^{n+m+p}\to \mathbb{R}^{n+m+p}$ be defined as
    256 \begin{align}
    257    \Phi  :=
    258    \begin{pmatrix}
    259        \nabla_{x x} L(x,\lambda ,\mu)\\
    260        h(x)\\
    261        \phi(-g(x), \lambda)
    262    \end{pmatrix}
    263 \end{align}
    264 where
    265 \begin{align}
    266     \phi(-g(x), \lambda) :=
    267     \begin{pmatrix}
    268         \varphi(-g_1(x), \lambda_1)\\
    269         \vdots\\
    270         \varphi(-g_m(x), \lambda_1)\\
    271     \end{pmatrix} \in \mathbb{R}^{m}
    272 \end{align}
    273 and $\varphi:\mathbb{R}^{2} \to \mathbb{R}$ with $\varphi(a,b) = \min \{a,
    274 b\}$. Show that the matrix $\nabla \Phi$ is well defined and regular.
    275 \newline
    276 The matrix is well defined because first of all, the functions $f, g_i, h_j$
    277 are $C^{2}$ and $\min \{-g_i(x), \lambda_i\}$ is differentiable because of
    278 the strict complementarity condition, meaning that
    279 \begin{align}
    280     \nabla \varphi(a, b) =
    281     \begin{cases}
    282         (1, 0)^{T}\quad a<b\\
    283         (0, 1)^{T}\quad a>b
    284     \end{cases}
    285 \end{align}
    286 and not differentiable for $a=b$ which is never the case since, per condition
    287 $g_i(x^{*}) + \lambda_i^{*} \neq 0$ for all $i$.
    288 
    289 Then we need to show that he matrix $\nabla \Phi(x^{*}, \lambda^{*},
    290 \mu^{*})$ is regular, first of all the matrix has the following form
    291 \begin{align}
    292     \nabla \Phi =
    293     \begin{pmatrix}
    294         \nabla_{x x}^{2}L(x^{*},\lambda^{*}, \mu^{*}) & \nabla g(x^{*})^{T} & \nabla
    295         h(x^{*})^{T}\\
    296         \nabla h(x^{*}) & 0 & 0\\
    297           \nabla \phi_1(-g(x^{*}), \lambda^{*}) &\nabla \phi_2(-g(x^{*}), \lambda^{*})  & 0\\
    298     \end{pmatrix} \in \mathbb{R}^{(n+m+p) \times  (n+m+p)}.
    299 \end{align}
    300 For convinience the matrix $\phi(-g(x^{*}), \lambda^{*})$ was split into two
    301 parts because of the chain rule the that matrix has the following form
    302 \begin{align}
    303     \phi_1(-g(x^{*}),\lambda^{*}) =
    304     \begin{pmatrix}
    305         \partial_{g_1}\varphi(-g_1(x^{*}),
    306         \lambda_1)(\partial_{x_1}g_1(x^{*})) & \dots &
    307         \partial_{g_1}\varphi(-g_1(x^{*}),
    308         \lambda_1)(\partial_{x_n}g_1(x^{*}))\\
    309         \vdots & \vdots & \vdots \\
    310         \partial_{g_m}\varphi(-g_m(x^{*}),
    311         \lambda_m)(\partial_{x_1}g_m(x^{*})) & \dots &
    312         \partial_{g_m}\varphi(-g_m(x^{*}),
    313         \lambda_m)(\partial_{x_n}g_1(x^{*}))
    314     \end{pmatrix} \quad \in \mathbb{R}^{n \times m}
    315 \end{align}
    316 and
    317 \begin{align}
    318     \phi_2(-g(x^{*}),-\lambda^{*}) =
    319     \begin{pmatrix}
    320         \partial_{\lambda_1}\varphi(-g_1(x^{*}),
    321         \lambda_1^{*})&0&\ldots\\
    322         & \ddots& \\
    323         \ldots &0&
    324         \partial_{\lambda_m}\varphi(-g_m(x^{*}),
    325         \lambda_m^{*})
    326     \end{pmatrix} \quad \in \mathbb{R}^{m \times m}
    327 \end{align}
    328 To show that $\nabla \Phi(x^{*}, \lambda^{*},
    329 \mu^{*})$ is regular we show that $\text{ker}\left(\nabla \Phi(x^{*}, \lambda^{*},
    330 \mu^{*})  \right) = \emptyset$.
    331 \newline
    332 Let $ q = (q^{(1)}, q^{(2)}, q^{(3)})^{T} \in
    333 \mathbb{R}^{n+m+p}$ then we need to find the solution of
    334 \begin{align}
    335         \nabla \Phi(x^{*}, \lambda^{*}, \mu^{*}) \begin{pmatrix}
    336         q^{(1)}\\q^{(2)}\\q^{(3)} \end{pmatrix} =0.
    337 \end{align}
    338 These are three equations
    339 \begin{align}
    340     &\nabla_x^{2}L(x^{*},\lambda^{*},\mu^{*})q^{(1)} + \nabla g(x^{*})^{T} q^{(2)}
    341     + \nabla h(x^{*})^{T}q^{(3)} = 0 \label{eq: ex49.1}\\
    342     &\nabla h(x^{*}) q^{(1)} = 0 \label{eq: ex49.2}\\
    343     &\nabla \phi_1(-g(x^{*}), \lambda^{*}) q^{(1)} + \nabla
    344     \phi_2(-g(^{*}),\lambda^{*}) q^{(2)} = 0 \label{eq: ex49.3}.
    345 \end{align}
    346 By multiplying \ref{eq: ex49.1} with $(q^{(1)})^{T}$ we get that
    347 \begin{align}
    348     &(q^{(1)})^{T}\nabla_x^{2}L(x^{*},\lambda^{*},\mu^{*})q^{(1)}\\
    349     &+ \sum_{i=1}^{m} q_i^{(3)}\underbrace{(q^{(1)})^{T}\nabla
    350     g_i(x^{*})}_{=0 \;\; (\ref{eq: ex49.3})} \label{eq: ex49.4} \\
    351     &+ \sum_{j=1}^{p} q_j^{(2)}\underbrace{(q^{(1)})^{T}\nabla h_j(x^{*})}_{=0
    352     \;\; (\ref{eq: ex49.2})}\\
    353      &= 0. \nonumber
    354 \end{align}
    355 It is not directly obvious why the term \ref{eq: ex49.4} in the above
    356 equation is directly zero. To see this we have to separate two cases, the first
    357 addresses what happens in \ref{eq: ex49.3} in the case $i \in
    358 \mathcal{A}(x^{*})$. In this case $\nabla_{g_i}\varphi(-g_i(x^{*}),
    359 \lambda_i^{*}) = 1$ since $g_i(x^{*}) =0$ with $\lambda_i^{*}>0$ and thereby
    360 $-g_i(^{*}) - \lambda_i^{*} < 0$, so we have that the this specific entry is
    361 \begin{align}
    362     &\left(\nabla \phi_1(-g(x^{*}),\lambda^{*})\right)_i = -\nabla g_i(x^{*})\\
    363     &\left( \nabla \phi_2(-g(x^{*}),\lambda^{*})\right)_i = 0,
    364 \end{align}
    365 evaluating the equation in \ref{eq: ex49.3} we get
    366 \begin{align}
    367     -\nabla g_i(x^{*})^{T}q^{(1)} = 0 \qquad \forall i\in\mathcal{A}(x^{*}).
    368 \end{align}
    369 In the other case $i \not\in \mathcal{A}(x^{*})$, $g_i(x^{*}) < 0$ with
    370 $\lambda^{*} = 0$ and thereby $-g_i(^{*}) - \lambda_i^{*} >0$ so
    371 $\varphi(-g_i(x^{*}),\lambda^{*}) = \lambda_i^{*}$. The entries of the matrix
    372 are
    373 \begin{align}
    374     &\left(\nabla \phi_1(-g(x^{*}),\lambda^{*})\right)_i = 0\\
    375     &\left( \nabla \phi_2(-g(x^{*}),\lambda^{*})\right)_i =
    376     (0,\ldots,0,\underbrace{1}_{i},0,\ldots,0)^{T},
    377 \end{align}
    378 evaluating the equation in \ref{eq: ex49.3} we get
    379 \begin{align}
    380     q_i^{(2)} = 0 \qquad \forall i \not\in \mathcal{A}(x^{*}).
    381 \end{align}
    382 Both cases contribute to the fact that the sum evaluates to 0 in term
    383 \ref{eq: ex49.4}. In summary we are left with
    384 \begin{align}
    385     (q^{(1)})^{T}\nabla_x^{2}L(x^{*},\lambda^{*},\mu^{*})q^{(1)} =0.
    386 \end{align}
    387 Since second order sufficient optimality condition is satisfied then $q^{(1)}
    388 \in T_2(x^{*})$, and the only solution is $q^{(1)} = 0$. Equation \ref{eq:
    389 ex49.1} is left with
    390 \begin{align}
    391     &\nabla g(x^{*})^{T}q^{(2)}+\nabla h(x^{*})q^{(3)} =\\
    392     =& \sum_{i=1}^{m} q_i^{(2)}\nabla g_i(x^{*})
    393     + \sum_{j=1}^{p} q_j^{(3)}  \nabla h_j(x^{*}) =\\
    394 =& \sum_{i \in \mathcal{A}(x^{*})} q_i^{(2)}\nabla g_i(x^{*})
    395     + \sum_{j=1}^{p} q_j^{(3)}  \nabla h_j(x^{*}) = 0
    396 \end{align}
    397 in the last equation we remove all $q^{(2)}_i = 0$ which are exactly all $i
    398 \not\in \mathcal{A}(x^{*})$. Since LICQ is fulfilled these vectors are
    399 linearly independent and by definition of linear independence the only
    400 $q^{(2)}, q^{(3)}$ fulfilling the above condition are $q^{(2)} = 0$ and
    401 $q^{(3)} = 0$. Thereby $q = 0$ and
    402 $\text{ker}(\nabla\Phi(x^{*},\lambda^{*},\mu^{*})) = \emptyset$, so the
    403 matrix is regular.
    404 
    405 
    406 
    407 
    408 
    409 
    410 
    411 
    412 
    413 
    414 
    415 
    416 
    417 
    418 
    419 
    420 
    421 
    422 
    423 
    424 
    425 
    426 
    427 
    428 
    429 
    430 
    431 \end{document}