notes

uni notes
git clone git://popovic.xyz/notes.git
Log | Files | Refs

prb5.tex (9314B)


      1 \include{./preamble.tex}
      2 
      3 \begin{document}
      4 \maketitle
      5 \tableofcontents
      6 \section{Sheet 5}
      7 \subsection{Problem 1}
      8 Let $A \in \mathbb{R}^{n \times n}$ be an SPD matrix with an additive
      9 decomposition $A = D + L + L^T$, where $D$ consists of the diagonal entries
     10 of $A$, $L$ and $L^T$ of the lower diagonal and upper-diagonal entries of $A$
     11 respectively. For $\omega \in(0,2)$ the SSOR preconditioner is defined as
     12 follows
     13 \begin{align}
     14     C_\omega = \frac{1}{2-\omega}\left( \frac{1}{\omega} D+L\right)
     15     \left( \frac{1}{\omega} D \right)^{-1}\left( \frac{1}{\omega}D + L^T
     16     \right).
     17 \end{align}
     18 We can rewrite $C_\omega = KK^T$, where $K$ is an invertible lower-triangular
     19 matrix by a simple splitting of the diagonal entries $D =
     20 D^{\frac{1}{2}}D^{\frac{1}{2}}$. We get
     21 \begin{align}
     22     C_\omega &= \frac{1}{2-\omega}\left( \frac{1}{\omega}D +L \right)\omega
     23     D^{-1}\left( \frac{1}{\omega}D + L^T \right)  \\
     24     &= \frac{1}{2-\omega}\left( D^{\frac{1}{2}} + \omega
     25     LD^{-\frac{1}{2}}\left( D^{\frac{1}{2}}+\omega LD^{-\frac{1}{2}} \right)
     26 \right)^T = KK^T,
     27 \end{align}
     28 where
     29 \begin{align}
     30     K := \frac{1}{\sqrt{1-\omega} }\left( D^{\frac{1}{2}} + \omega LD^{-
     31     \frac{1}{2}} \right)
     32 \end{align}
     33 The Matrix $C_\omega$ is a good approximation for the inverse of $A$ because
     34 \begin{align}
     35     H^{\text{SSOR}}_\omega = I - C^{-1}_\omega A\\
     36     \rho(H^{\text{SSOR}}_\omega) < 1
     37 \end{align}
     38 for a right choice of $\omega$. Furthermore the general idea is that we
     39 multiply the system $Ax=b$ with a preconditioning matrix $P \in
     40 \text{GL}_n\left(\mathbb{R}\right)$ with inevitability, then we get the
     41 system
     42 \begin{align}
     43     \underbrace{PA}_{\approx I}x&= Pb\\
     44 \end{align}
     45 The thing is that $\text{cond}_2(A) = v(n)$ bound by the curse of
     46 dimensionality and $\text{cond}_2 = s \ll v(n)$ not dependent and thereby $P$
     47 would be an optimal preconditioner.
     48 \subsection{Exercise 2}
     49 Let $m, n \in \mathbb{N}$, $I \in \mathbb{R}^{m\times m}$ the identity in
     50 $\mathbb{R}^{m\times m}$ and $Q$ be the banded matrix
     51 \begin{align}
     52     Q =
     53     \begin{pmatrix}
     54         4  & - 1 &&  &   \\
     55         -1 &  4& -1&  &   \\
     56            &\ddots& \ddots & \ddots & \\
     57            &   &   -1 & 4 & -1\\
     58            &   &    & & 4 \\
     59     \end{pmatrix}
     60 \end{align}
     61 The eigenvalues of the matrix $Q$ lie in $\sigma(A) \subset [4-2, 4+2]$ by
     62 the Gershorin disk theorem. Since no eigenvalue is $0$, then $Q$ is
     63 invertible.
     64 Now consider the matrix $A \in \mathbb{R}^{nm \times nm}$
     65 \begin{align}
     66     Q =
     67     \begin{pmatrix}
     68         Q  & - I &  &  & \\
     69         -I &  Q& -I&    & \\
     70            &\ddots& \ddots & \ddots & \\
     71            &     & -I & Q & -I\\
     72            &     &  & & Q \\
     73     \end{pmatrix}
     74 \end{align}
     75 We can consider the separation wrt. addition $A = D + L + L^T$ (Like in
     76 Exercise 1). The Jacobi-Method iteration matrix is $J = - D^{-1}(L + L^T)$,
     77 where $L$ is the lower triangular with \textbf{-1 or 0 entries}. Further more
     78 the Gershorin theorem states that $\sigma(A) < 1$. All in all the matrix $I -
     79 J$ is by the geometric(Neumann) series
     80 \begin{align}
     81     (I-J)^{-1} = \sum_{n=0}^{\infty}J^k
     82 \end{align}
     83 and we have the identity
     84 \begin{align}
     85     J = I - D^{-1} A \quad \Rightarrow \quad (I-J)^{-1} = DA^{-1}.
     86 \end{align}
     87 Thereby the sum transforms to
     88 \begin{align}
     89     A^{-1} = D^{-1}\sum_{n=0}^{\infty} J^k.
     90 \end{align}
     91 The entries of $D$ are all $4$ and thereby non-negative, the matrix is also
     92 invertible. The matrix $L$ has only -1 or 0 entries which get compensated
     93 with the minus sing in $J = -D^{-1}(L +L^T) = D^{-1}(-L - L^T)$, thereby all
     94 entries of $J^k$ are positive for all $k$. Finally we arrive at the
     95 conclusion, that all entries of $A^{-1}$ are non-negative and $A$ is a
     96 $M$-matrix or `(inverse) monotone' matrix.
     97 \subsection{Exercise 3}
     98 Let $A \in \mathbb{R}^{n \times n}$ be an SPD matrix and $b \in
     99 \mathbb{R}^{n}$ be a right hand side of a linear system. Suppose we apply the
    100 CG method for solving $Ax = b$. The $k$-th iterate $x_k$ of the CG method
    101 then satisfies the $A$-norm optimality condition
    102 \begin{align}
    103     \|x_k - x\|_A = \min_{y\in x_0 + B_k} \|y - x\|_A,
    104 \end{align}
    105 where
    106 \begin{align}
    107     B_k = \text{span}\left\{ p_0,\ldots,p_{k-1} \right\} = \text{span}\left\{
    108     r_0, Ar_0, \ldots,A^{k-1}r_0\right\}
    109 \end{align}
    110 is the Krylov space. The search directions $p_k$ form an $A$-orthogonal
    111 system.
    112 \newline
    113 Now if the spectrum of $A$, $\sigma(A) = [a, b] \subset (0, \infty)$ then
    114 for any polynomial $p \in \mathbb{P}_k^{0, 1}:= \left\{p \in \mathbb{P}: p(0)
    115 = 1\right\} $ we have that
    116 \begin{align}
    117     \|x_k - x\|_A \le \left( \sup_{t\in[a,b]} \mid p(t) \mid \right)
    118     \|x_0 - x\|_A.
    119 \end{align}
    120 To show this we have that for all $y \in x_0 + B_k$ the representation
    121 \begin{align}
    122     y = \sum_{j=0}^{k-1} c_j A^j r_0 = x_0 + q_y(A)r_0
    123 \end{align}
    124 for suitable $c_j$'s and a polynomial $q_y \in \mathbb{P}_{k-1}$. Now
    125 \begin{align}
    126     y - x &= x_0 + x - q_y(A) r_0 = x_0 - x + q_y(A) \left( b - Ax_0
    127     \right)\\
    128           &= x_0 - x + q_y(A) \left(Ax - Ax_0 \right)\\
    129           &=\underbrace{\left(I - q_y(A)A \right)}_{=: p_y(A) \in
    130           \mathbb{P}_k^{0, 1}} (x-x_0).
    131 \end{align}
    132 With this information we may consider the norm
    133 \begin{align}
    134     \|x_k - x\|_A \leq \|p_y(A)(x- x_0)\|_A \qquad \forall y \in x_0+B_k.
    135 \end{align}
    136 Now we use the fact that $A$ is SPD thereby there is an orthogonal matrix $Q$
    137 diagonalizing $A = Q^T \Lambda Q$, with $\Lambda = \text{diag}(\lambda_1, \ldots,
    138 \lambda_n)$ consisting of eigenvalues of $A$, then
    139 \begin{align}
    140     A^k = Q^T\Lambda Q \cdots Q^T \Lambda Q = Q^T \Lambda^k Q
    141 \end{align}
    142 With this we can transform the polynomial $p_y(A)$ and the geometric
    143 (Neumann) series
    144 \begin{align}
    145     p_y(A) = \sum_{j=0}^{\infty} c_j A^j = Q^T \left(\sum_{j=0}^{\infty} c_j
    146         \Lambda^j\right)
    147 \end{align}
    148 The norm becomes then
    149 \begin{align}
    150     \|p(A) (x-x_0)\|^2_a
    151     &= \langle Ap(A) (x-x_0), p(A)(x-x_0)\rangle =\\
    152     &= \langle Q^T\Lambda Q Q^Tp(\Lambda)Q (x-x_0),
    153     Q^Tp(\Lambda)Q(x-x_0)\rangle =\\
    154     &= \langle Q^T\Lambda p(\Lambda)Q (x-x_0),
    155     Q^Tp(\Lambda)Q(x-x_0)\rangle =\\
    156     &= \langle \Lambda p(\Lambda)Q (x-x_0),
    157     p(\Lambda)Q(x-x_0)\rangle =\\
    158     &= \langle \Lambda^{\frac{1}{2}} p(\Lambda)Q (x-x_0),
    159     \Lambda^{\frac{1}{2}} p(\Lambda)Q(x-x_0)\rangle =\\
    160     &= \|\Lambda^{\frac{1}{2}}p(\Lambda)Q(x-x_0)\|_2\\
    161     &= \|p(\Lambda)\Lambda^{\frac{1}{2}}Q(x-x_0)\|_2\\
    162     &\leq \|p(\Lambda)\|_2 \|\Lambda^{\frac{1}{2}}Q(x-x_0)\|_2.
    163 \end{align}
    164 The Norm of the polynomial is the maximal eigenvalue thereby
    165 \begin{align}
    166     \|p(\Lambda\|_2 = \max_{\lambda \in \sigma(A)}  \mid p(\lambda) \mid \leq
    167     \sup_{t\in[a,b]}  \mid p(t)\mid,
    168 \end{align}
    169 we can do the supremum boundary because $\lambda \in [a, b]$. As for the
    170 second part
    171 \begin{align}
    172     \|\Lambda^{\frac{1}{2}}Q(x-x_0)\|_2^2 &= (x-x_0)^T Q^T
    173     \Lambda^{\frac{1}{2}}\Lambda^{\frac{1}{2}}Q(x-x_0)\\
    174     &= (x-x_0)^T A (x-x_0) \\
    175     &= \|x-x_0\|_A^2.
    176 \end{align}
    177 And finally we get the result
    178 \begin{align}
    179     \|x_k - x\| \le \sup_{t\in[a,b]}  \mid p(t)  \mid \|x-x_0\|_A^2.
    180 \end{align}
    181 The last approximation can be done because $\sup_{t\in[a,b]} \mid p(t) \mid$
    182 holds for \textbf{all} $p \in \mathbb{P}_k^{0,1}$ thereby we can bound by an
    183 infimum over all the polynomials in $\mathbb{P}_k^{0,1}$ and we get
    184 \begin{align}
    185     \sup_{t\in[a,b]}  \mid p(t)  \mid \le \inf_{p \in \mathbb{P}_k^{0,1}}
    186     \|p\|_{C([0,1])}.
    187 \end{align}
    188 \subsubsection{Exercise 4}
    189 We can do subsequently the as in the last exercise wit the GMRES method. So
    190 we let $A \in \mathbb{R}^{n \times n}$ be an SPD and $b \in \mathbb{R}^{n}$
    191 be the right hand side of the linear system. The iterates of $x_k$  of the CG
    192 method satisfy the $A^{-1}$-norm optimality
    193 \begin{align}
    194     \|Ax_k - b\|_{A^{-1}} = \min_{y\in x_0 + C_k} \|Ay - b\|_{A^{-1}},
    195 \end{align}
    196 with $C_k = \text{span}\left\{ p_0, Ap_0, \ldots , A^{k-1}p_0 \right\}$. The
    197 `generalized minimal residual', short GMRES method, instead, formally
    198 constructs a sequence of iterates $x_k^G$ by
    199 \begin{align}
    200     \|Ax^G_2 - b \|_2 = \min_{y \in x_0 + C_k}\|Ay - b\|_2.
    201 \end{align}
    202 The GMRES method allows for an error inequality similar to the one observed
    203 in the CG method
    204 \begin{align}
    205     \|Ax_k^G - b\|_2 \le \inf_{p \in \mathbb{P}_k^{0,1}} \|p(A)\|_2 \|Ax_0
    206     -b\|_2.
    207 \end{align}
    208 To show this we start off by minimizing over a $z \in C_k$
    209 \begin{align}
    210     \|Ax_k^G - b\|_2 = \min_{y\in x_k + C_k} \|Ay - b\|_2 = \min_{z \in
    211     C_k}\|Az + Ax_0 -b\|_2.
    212 \end{align}
    213 Then for all $z \in C_k$, there exists a $\pi_k \in \mathbb{P}_{k-1}$ such that
    214 \begin{align}
    215     z = \pi_k(A) p_0 = \pi_k(A) r_0,
    216 \end{align}
    217 Then the minimization can be bounded
    218 \begin{align}
    219     \min_{z \in C_k} \|Az + Ax_0 - b\|_2 &=
    220     \min_{\pi_k \in \mathbb{P}_k}\|A\pi_k(A)r_0 + Ax_0 -b\|_2\\
    221         &\le \|A\pi_k(A) (b-Ax_0) + Ax_0 +b\|_2\\
    222         &= \|(Ax_0 -b)\underbrace{(I-A\pi_k(A)}_{=:p \in
    223         \mathbb{P}_k^{0,1}}\|_2\\
    224         &= \|(Ax_0-b)p(A)\|\\
    225         &\le \|p(A)\|_2 \|Ax_0 -b\|_2.
    226 \end{align}
    227 Following the same argumentation as in Exercise 3 we get for the norm of the
    228 polynomial
    229 \begin{align}
    230     \|p(A)\|_2  &= \max_{\lambda \in \sigma(A)}  \mid p(\lambda) \mid\\
    231     &\le \sup_{t\in [a,b]}  \mid p(t) \mid\\
    232     &\le \inf_{p \in \mathbb{P}_k^{0,1}} \|p\|_{C\left([a,b]  \right) }.
    233 \end{align}
    234 
    235 
    236 
    237 \end{document}
    238 
    239