main.tex (14814B)
1 \documentclass[a4paper]{article} 2 3 4 \usepackage[T1]{fontenc} 5 \usepackage[utf8]{inputenc} 6 \usepackage{mathptmx} 7 8 %\usepackage{ngerman} % Sprachanpassung Deutsch 9 10 \usepackage{graphicx} 11 \usepackage{geometry} 12 \geometry{a4paper, top=15mm} 13 14 \usepackage{subcaption} 15 \usepackage[shortlabels]{enumitem} 16 \usepackage{amssymb} 17 \usepackage{amsthm} 18 \usepackage{mathtools} 19 \usepackage{braket} 20 \usepackage{bbm} 21 \usepackage{graphicx} 22 \usepackage{float} 23 \usepackage{yhmath} 24 \usepackage{tikz} 25 \usetikzlibrary{patterns,decorations.pathmorphing,positioning} 26 \usetikzlibrary{calc,decorations.markings} 27 28 \usepackage[backend=biber, sorting=none]{biblatex} 29 \addbibresource{uni.bib} 30 31 \usepackage[framemethod=TikZ]{mdframed} 32 33 \tikzstyle{titlered} = 34 [draw=black, thick, fill=white,% 35 text=black, rectangle, 36 right, minimum height=.7cm] 37 38 39 \usepackage[colorlinks=true,naturalnames=true,plainpages=false,pdfpagelabels=true]{hyperref} 40 \usepackage[parfill]{parskip} 41 \usepackage{lipsum} 42 43 44 \usepackage{tcolorbox} 45 \tcbuselibrary{skins,breakable} 46 47 \pagestyle{myheadings} 48 49 \markright{Popović\hfill Tensor Methods \hfill} 50 51 52 \title{University of Vienna\\ Faculty of Mathematics\\ 53 \vspace{1cm}TENSOR METHODS FOR DATA SCIENCE AND SCIENTIFIC COMPUTING 54 } 55 \author{Milutin Popovic} 56 57 \begin{document} 58 \maketitle 59 \tableofcontents 60 \section{Assignment 2} 61 62 Let us introduce the \textit{column-major} convention for writing matrices in 63 the indices notation. For $A \; \in \; \mathbb{R}^{n\times n}$, which has 64 $n^2$ entries can be written down in the \textit{column-major} convention as 65 \begin{align} 66 A = [a_{i+n(j-1)}]_{i,j = 1,\dots,n}. 67 \end{align} 68 \subsection{Matrix multiplication tensor} 69 The matrix multiplication $AB=C$, for $B, C \in \mathbb{R}^{n\times n}$ is a 70 bilinear operation, thereby there exists a tensor $ T = [t_{ijk}]\in 71 \mathbb{R}^{n^2\times n^2\times n^2}$ such that the matrix multiplication can be 72 represented as 73 \begin{align} 74 c_k = \sum_{i=1}^{n^2}\sum_{j=1}^{n^2}t_{ijk}a_i b_k. 75 \end{align} 76 The tensor $T$ is referred to as the matrix multiplication tensor of order $n$. 77 78 For $n = 2$ we can easily see the non-zero entries by writing out the matrix 79 multiplication in the column major convention 80 \begin{align} 81 c_1 &= a_1b_1 + a_3b_2,\\ 82 c_2 &= a_2b_1 + a_4b_2,\\ 83 c_3 &= a_1b_3 + a_3b_4,\\ 84 c_4 &= a_2b_3 + a_4b_4. 85 \end{align} 86 So the non-zero entries in e.g. $k=1$ are $(1, 1)$ and $(3, 2)$, and so on. 87 Thus the matrix multiplication Tensor of $n=2$ is 88 \begin{align} 89 t_{ij1} = \begin{pmatrix} 90 1 & 0 & 0 & 0\\ 91 0 & 0 & 0 & 0\\ 92 0 & 1 & 0 & 0\\ 93 0 & 0 & 0 & 0 94 \end{pmatrix},\\ 95 t_{ij2}= 96 \begin{pmatrix} 97 0 & 0 & 0 & 0\\ 98 1 & 0 & 0 & 0\\ 99 0 & 0 & 0 & 0\\ 100 0 & 1 & 0 & 0 101 \end{pmatrix},\\ 102 t_{ij3}= 103 \begin{pmatrix} 104 0 & 0 & 1 & 0\\ 105 0 & 0 & 0 & 0\\ 106 0 & 0 & 0 & 1\\ 107 0 & 0 & 0 & 0 108 \end{pmatrix},\\ 109 t_{ij4}= 110 \begin{pmatrix} 111 0 & 0 & 0 & 0\\ 112 0 & 0 & 1 & 0\\ 113 0 & 0 & 0 & 0\\ 114 0 & 0 & 0 & 1 115 \end{pmatrix}. 116 \end{align} 117 The $CPD$ (Canonical Polyadic Decomposition) of rank-$n^3$ of the multiplication tensor of 118 order $n = 2$, specified by matrices $U, V, W \;\in \; \mathbb{R}^{4\times 8}$ 119 can be represented in such a way that the columns of these matrices satisfy 120 \begin{align} 121 T = \sum_{\alpha=1}u_\alpha \otimes v_\alpha \times w_\alpha. 122 \end{align} 123 Each nonzero entry in $u_\alpha$ represents the column of the nonzero entry in $T$, 124 each nonzero entry in $v_\alpha$ represents the row of the nonzero entry in 125 $T$ and each nonzero entry in $w_\alpha$ represents the location of the $k$-th 126 slice of the nonzero entry, so we have 127 \begin{align} 128 U &= 129 \begin{pmatrix} 130 1 & 0 & 0 & 0 & 1 & 0 & 0 & 0\\ 131 0 & 0 & 1 & 0 & 0 & 0 & 1 & 0\\ 132 0 & 1 & 0 & 0 & 0 & 1 & 0 & 0\\ 133 0 & 0 & 0 & 1 & 0 & 0 & 0 & 1 134 \end{pmatrix},\\ 135 V &= 136 \begin{pmatrix} 137 1 & 0 & 1 & 0 & 0 & 0 & 0 & 0\\ 138 0 & 1 & 0 & 1 & 0 & 0 & 0 & 0\\ 139 0 & 0 & 0 & 0 & 1 & 0 & 1 & 0\\ 140 0 & 0 & 0 & 0 & 0 & 1 & 0 & 1 141 \end{pmatrix},\\ 142 W &= 143 \begin{pmatrix} 144 1 & 1 & 0 & 0 & 0 & 0 & 0 & 0\\ 145 0 & 0 & 1 & 1 & 0 & 0 & 0 & 0\\ 146 0 & 0 & 0 & 0 & 1 & 1 & 0 & 0\\ 147 0 & 0 & 0 & 0 & 0 & 0 & 1 & 1 148 \end{pmatrix}. 149 \end{align} 150 \subsection{Mode Contraction} 151 Let us introduce a notion of multiplying a matrix with a tensor which is 152 called the mode-$k$ contraction. For $d \in \mathbb{N}, n_1,\dots, n_d \in 153 \mathbb{N}$ and $k\in\{1,\dots,d\}$ the mode-$k$ contraction of a 154 $d$-dimensional tensor $S\in\mathbb{R}^{n_1\times\cdots\times n_d}$ with a 155 matrix $Z \in \mathbb{R}^{r\times n_k}$ is an operation 156 \begin{align} 157 \times_k : [s_{i_1,\dots,i_d}]_{i_1\in I_1,\dots,i_d\in I_d}\mapsto 158 \left[\sum_{i_k=1}^{n_k}z_{\alpha i_k}s_{i_1,\dots,i_d}\right]_{ 159 i_1\in I_1, \dots, i_{k-1}\in I_{k-1}, \alpha\in\{1,\dots,r\}, 160 i_{k+1}\in I_{k+1}, \dots, i_d \in I_d}, 161 \end{align} 162 where $I_l = \{1, \dots, n_l\}$ for $l \in \{1,\dots, d\}$. Now that we know 163 the definition we may write 164 \begin{align} 165 T = Z \times_k S, 166 \end{align} 167 and $T$ is in $\mathbb{R}^{n_1\times \cdots n_{k-1} \times r\times n_{k-1} 168 \times \cdots \times n_d}$. The algorithm constructed for the mode-$k$ 169 contraction is structured as follows 170 \begin{enumerate} 171 \item initialize an empty array $T$ 172 \item iterate $\alpha$ from 1 to $r$ 173 \item initialize a zero array $s$ of size $n_1\times\cdots 174 n_{k-1}\times n_{n+1} \times \cdots n_d$ 175 \item for each $j \in \{1, \dots , n_k\}$ add $Z_{\alpha, j}S_{i_1, 176 \dots, i_{n-1}, j, i_{n+1}, \dots ,i_d}$ to s 177 \item append $s$ to $T$ 178 \item end iteration 179 \item reshape $T$ to $n_1\times \cdots n_{k-1} \times r\times n_{k-1} 180 \times \cdots \times n_d$ and return it 181 \end{enumerate} 182 \subsection{Evaluating a CPD} 183 Let us now implement a function that will convert a rank-$r$ CPD of a 184 $d$-dimensional tensor $S$ of size $n_1 \times \cdots \times n_d$, given 185 $U^{(k)} \in \mathbb{R}^{n_k\times r}$ for $k\in \{1, \dots, k\}$. 186 \begin{align} 187 S = \sum_{\alpha=1}^r u_\alpha^{(1)} \otimes \cdots \otimes 188 u_\alpha^{(d)}. 189 \end{align} 190 The implementation of this is straight forward. While iterating over $\alpha$ 191 all we have to do is call a function that will do a Kronecker product of the 192 $\alpha$-th column slice of a matrix $U^{(k)}$ with the same order column 193 slice of the matrix $U^{(k+1)}$, for each $k \in \{1,\dots, d\}$. In 194 ``julia'' the only thing we have to be aware of is that the \textit{kron} 195 function reverses the order for the Kronecker product that is it does the 196 following 197 \begin{align} 198 u \otimes v = \textit{kron}(v, u). 199 \end{align} 200 Meaning that if we pass a list of CPD matrices $[U^{(1)}, \dots, U^{(k)}]$ as 201 arguments, we need to reverse its order\cite{code}. 202 \subsection{Implementing the multiplication tensor and its CPD} 203 Once again to recapitulate the multiplication tensor is of the size $T \in 204 \mathbb{R}^{n^2\times n^2 \times n^2}$. With some tinkering we see that the 205 nonzero entries in the $k$-th end-dimension corresponds to the matrix 206 multiplication in the column major convention, and with some more tinkering 207 we found a way to construct an multiplication tensor for an arbitrary 208 dimension (finite) using three loops. 209 \begin{enumerate} 210 \item loop over the column indices in the first row in the column major 211 convention $m=1:n:n^2$ 212 \item loop over the row indices in the first column in the column major 213 convention $l=1:n$ 214 \item $I = l:n:n^2$, $J = m:(m+n)$, $K = (l-1)+m$ 215 \item $T_{i, j, k} = 1$ for every $i\in I, j\in J,k \in K$ 216 \end{enumerate} 217 Additionally we can even construct a CPD of this tensor in the same loop 218 since we know the indices $i, j, k$ of the nonzero entries, $U$ would be 219 filled with the $i$-th row entry of each $n^3$ columns, $V$ would be filled 220 in the $j$-th row entry of the each $n^3$ columns and finally $W$ would be 221 filled in the corresponding $k$-th row entry of the each $n^3$ columns 222 \cite{code}. 223 224 Furthermore we can evaluate the matrix multiplication only with the CPD of the 225 matrix multiplication tensor $T$, with $U, V, W$ without computing $T$. This 226 is done by rewriting the matrix multiplication in the row-major convention 227 with $U, V, W$ into 228 \begin{align} 229 c_k = \sum_{\alpha=1}^r \left(\sum_{i=1}^{n^2} a_i 230 u_{i\alpha}\right)\cdot 231 \left(\sum_{j = 1}^{n^2} b_j v_{j\alpha}\right) \cdot w_{k\alpha}. 232 \end{align} 233 The implementation is straight forward it uses one loop and only a 234 summation function \cite{code}. 235 \subsection{Interpreting the Strassen algorithm in terms of CPD} 236 For $n=2$ we will write the Strassen algorithm and its CPD in the column 237 major convention. This algorithm allows matrix multiplication of square 238 $2\times2$ matrices (even of order $2^n\times 2^n$) in seven essential 239 multiplication steps instead of the stubborn eight. The Strassen algorithm 240 defines seven new coefficients $M_l$, where $l\in {1, \cdots, 7}$ 241 \begin{align} 242 M_1 &:= (a_1 + a_4)(b_1 + b_4), \\ 243 M_2 &:= (a_2 + a_4)b_1,\\ 244 M_3 &:= a_1(b_3 - b_4),\\ 245 M_4 &:= a_4(b_2 - b_1),\\ 246 M_5 &:= (a_1 + a_3)b_4,\\ 247 M_6 &:= (a_2 - a_1)(b_1 + b_3),\\ 248 M_7 &:= (a_3 - a_4)(b_2 + b_4). 249 \end{align} 250 Then the coefficients $c_k$ can be calculated as 251 \begin{align} 252 c_1 &= M_1 + M_4 - M_5 + M_7,\\ 253 c_2 &= M_2 + M_4,\\ 254 c_3 &= M_3 + M_5,\\ 255 c_4 &= M_1 - M_2 + M_3 + M_6.\\ 256 \end{align} 257 This ultimately means that there exists an rank-$7$ CPD decomposition of the 258 matrix multiplication tensor $T$, with matrices $U, V, W \in 259 \mathbb{R}^{4\times 7}$. By that $U$ represents the placement of $a_i$ in the 260 definition of $M_l$ filled in the columns of $U$, $V$ represents the existence 261 of $b_j$ in the definition of $M_l$ filled in the columns $V$ and $W$ 262 represents the $M_l$ placement in the coefficients $c_k$. Do note that these 263 matrices have entries $-1, 0, 1$ corresponding to the addition/subtraction as 264 defined in the $M_l$'s. The matrices $U, V, W$ are the following 265 \begin{align} 266 U &= 267 \begin{pmatrix} 268 1 & 0 & 1 & 0 & 1 & -1 & 0 \\ 269 0 & 1 & 0 & 0 & 0 & 1 & 0 \\ 270 0 & 0 & 0 & 0 & 1 & 0 & 1 \\ 271 1 & 1 & 0 & 1 & 0 & 0 & -1 272 \end{pmatrix},\\ 273 V &= 274 \begin{pmatrix} 275 1 & 1 & 0 & -1 & 0 & 1 & 0\\ 276 0 & 0 & 0 & 1 & 0 & 0 & 1\\ 277 0 & 0 & 1 & 0 & 0 & 1 & 0\\ 278 1 & 0 & -1 & 0 & 1 & 0 & 1 279 \end{pmatrix},\\ 280 W &= 281 \begin{pmatrix} 282 1 & 0 & 0 & 1 & -1 & 0 & 1\\ 283 0 & 1 & 0 & 1 & 0 & 0 & 0\\ 284 0 & 0 & 1 & 0 & 1 & 0 & 0\\ 285 1 & -1 & 1 & 0 & 0 & 1 & 0 286 \end{pmatrix}. 287 \end{align} 288 \nocite{code} 289 \subsection{Tucker Structure and CP rank !!(NO real idea what should be 290 done here)!!} 291 Let $\hat{a} = \begin{pmatrix}1 & 0\end{pmatrix}^T$ and $\hat{b} = 292 \begin{pmatrix}0 & 1\end{pmatrix}$, we can define a Laplace-like tensor $T$ 293 using the Kronecker product 294 \begin{align}\label{eq: cpd} 295 \hat{T} = \hat{b}\otimes \hat{a} \otimes \hat{a}+ 296 \hat{a}\otimes \hat{b} \otimes \hat{a}+ 297 \hat{a}\otimes \hat{a} \otimes \hat{b} \;\;\; \in 298 \mathbb{R}^{2\times2\times2}, 299 \end{align} 300 which represents $T$ as a CPD of rank 3. The CPD is a special case of the 301 Tucker decomposition with a super diagonal tucker core, in this case the 302 tucker core is 303 \begin{align} 304 S_{i_1i_2i_3} = \begin{cases} 305 1\;\;\;\; \text{for} \;\; i_1=i_2=i_3\\ 306 0\;\;\;\; \text{else} 307 \end{cases} 308 \end{align} 309 for $i_1, i_2, i_3 = 1,2,3$. We may write the tucker decomposition of $T$ in 310 terms of matrices $U_1, U_2, U_3 \in \mathbb{R}^{2\times 3}$ constructed by 311 the $\hat{a}$ and $\hat{b}$ as in \ref{eq: cpd} in each of the summation 312 steps. Then $U_1$ would have $\hat{b}, \hat{a}, \hat{a}$ in the columns and 313 so on. Now we may write the Tucker decomposition for $T_{ijk}$ for $i_1, 314 i_2,i_3 315 = 1, 2$ as 316 \begin{align} 317 \hat{T}_{i_1i_2i_3} = \sum_{\alpha_1}^3\sum_{\alpha_2}^3\sum_{\alpha_2}^3 318 (U_1)_{i_1\alpha_1} (U_2)_{i_2\alpha_2} (U_3)_{i_3\alpha_3} 319 S_{\alpha_1\alpha_2\alpha_3}. 320 \end{align} 321 With this we can derive the first, second and third Tucker unfolding matrix 322 by rewriting the Tucker decomposition 323 \begin{align} 324 \hat{T}_{i_1i_2i_3} &= 325 \sum_{\alpha_k = 1}^3 (U_k)_{i_k\alpha_k} \sum_{\alpha_{k-1}=1}^3 326 \sum_{\alpha_{k+1}=1}^3(U_{k-1})_{i_{k-1}\alpha_{k-1}} 327 (U_{k+1})_{i_{k+1}\alpha_3}S_{\alpha_1 \alpha_2 \alpha_3}\\ 328 &= 329 \sum_{\alpha_k = 1}^3 (U_k)_{i_k\alpha_k} (Z_k)_{i_{k-1}i_{k+1}\alpha_k}, 330 \end{align} 331 for $k=1, 2, 3$ cyclic. We call 332 \begin{align} 333 \mathcal{U}^T_k := U_k Z_k^T 334 \end{align} 335 the $k-th$ Tucker unfolding. It turns out in our case all of them are unique 336 and thereby the CPD decomposition is unique, thereby the CPD rank of 337 $\hat{T}$ is three. 338 339 Let us consider a richer structure, for $2<d \in \mathbb{N}$ and $n_1, \cdot 340 ,n_d\in \mathbb{N}$ all greater then one. For $k \in \{1, 2, 3\}$ consider 341 $a_k, b_k \in \mathbb{R}^{n_k}$ linearly independent and for $\mathbb{N} \ni 342 k \geq 4$ consider $c_k\in \mathbb{R}^{n_k}$ nonzero. Then we define a 343 Laplace-like tensor 344 \begin{align} 345 T = b_1 \otimes a_2 \otimes a_3 \otimes c_4\otimes \cdots \otimes c_d 346 +a_1 \otimes b_2 \otimes a_3 \otimes c_4\otimes \cdots \otimes c_d 347 +a_1 \otimes a_2 \otimes b_3 \otimes c_4\otimes \cdots \otimes c_d. 348 \end{align} 349 The matrices $U^{(k)} \in \mathbb{R}^{n_k \times 3}$ for $k= 1, 2 ,3$ are the 350 following 351 \begin{align} 352 U^{(1)} &= (b_1\; a_2\; a_3),\\ 353 U^{(2)} &= (a_1\; b_2\; a_3),\\ 354 U^{(3)} &= (a_1\; a_2\; b_3).\\ 355 \end{align} 356 The matrices $U^{(k)} \in \mathbb{R}^{n_k \times 3}$ for $k = 4, \dots, d$ 357 are 358 \begin{align} 359 U^{(4)} &= (c_4\; c_4\; c_4).\\ 360 &\vdots\nonumber\\ 361 U^{(d)} &= (c_d\; c_d\; c_d). 362 \end{align} 363 The Tucker core $S \in \mathbb{R}^{3\times 3\times3}$ is a superdiagonal 364 tensor of order three. We can write the Tucker decomposition of $T$ as 365 \begin{align} 366 \hat{T}_{i_1\dots i_d} &= 367 \sum_{\alpha_k = 1}^3 (U^{(k)})_{i_k\alpha_k}\cdot\nonumber 368 \\&\cdot\sum_{\alpha_1,\dots,\alpha_{k-1},\alpha_{k+1},\dots, \alpha_d}^3 369 (U^{(1)})_{i_1\alpha_1}\cdots(U^{(k-1)})_{i_{k-1}\alpha_{k-1}} 370 (U^{(k+1)})_{i_{k+1}\alpha_3}\cdots(U^{(d)})_{i_d\alpha_d} 371 S_{\alpha_1 \dots \alpha_d}\\ 372 &= \sum_{\alpha_k = 1}^3 (U^{(k)})_{i_k\alpha_k} (Z_{k})_{i_1,\dots 373 i_{k-1}i_{k+1}\dots i_d \alpha_k}. 374 \end{align} 375 All the Tucker unfoldings $\mathcal{U}_{k}$ for $k\geq 4$ are non-unique, 376 because the matrices $U^{(k)}$ for $k \geq 4$ are constructed with the same 377 column vectors. On the other hand we may write the Tucker decomposition in 378 with the Kronecker product like this 379 \begin{align} 380 T_{(k)} =U^{(k)} \cdot S_k \cdot (U^{(1)}\otimes\cdots\otimes 381 U^{(k-1)}\otimes U^{(k+1)}\otimes \cdots \otimes U^{(d)}) 382 \end{align} 383 384 \printbibliography 385 \end{document}