main.tex - tensor_methods - Tensor Methods for DS and SC

main.tex (14814B)
      1 \documentclass[a4paper]{article}
      2 
      3 
      4 \usepackage[T1]{fontenc}
      5 \usepackage[utf8]{inputenc}
      6 \usepackage{mathptmx}
      7 
      8 %\usepackage{ngerman}	% Sprachanpassung Deutsch
      9 
     10 \usepackage{graphicx}
     11 \usepackage{geometry}
     12 \geometry{a4paper, top=15mm}
     13 
     14 \usepackage{subcaption}
     15 \usepackage[shortlabels]{enumitem}
     16 \usepackage{amssymb}
     17 \usepackage{amsthm}
     18 \usepackage{mathtools}
     19 \usepackage{braket}
     20 \usepackage{bbm}
     21 \usepackage{graphicx}
     22 \usepackage{float}
     23 \usepackage{yhmath}
     24 \usepackage{tikz}
     25 \usetikzlibrary{patterns,decorations.pathmorphing,positioning}
     26 \usetikzlibrary{calc,decorations.markings}
     27 
     28 \usepackage[backend=biber, sorting=none]{biblatex}
     29 \addbibresource{uni.bib}
     30 
     31 \usepackage[framemethod=TikZ]{mdframed}
     32 
     33 \tikzstyle{titlered} =
     34     [draw=black, thick, fill=white,%
     35         text=black, rectangle,
     36         right, minimum height=.7cm]
     37 
     38 
     39 \usepackage[colorlinks=true,naturalnames=true,plainpages=false,pdfpagelabels=true]{hyperref}
     40 \usepackage[parfill]{parskip}
     41 \usepackage{lipsum}
     42 
     43 
     44 \usepackage{tcolorbox}
     45 \tcbuselibrary{skins,breakable}
     46 
     47 \pagestyle{myheadings}
     48 
     49 \markright{Popović\hfill Tensor Methods \hfill}
     50 
     51 
     52 \title{University of Vienna\\ Faculty of Mathematics\\
     53 \vspace{1cm}TENSOR METHODS FOR DATA SCIENCE AND SCIENTIFIC COMPUTING
     54 }
     55 \author{Milutin Popovic}
     56 
     57 \begin{document}
     58 \maketitle
     59 \tableofcontents
     60 \section{Assignment 2}
     61 
     62 Let us introduce the \textit{column-major} convention for writing matrices in
     63 the indices notation. For $A \; \in \; \mathbb{R}^{n\times n}$, which has
     64 $n^2$ entries can be written down in the \textit{column-major} convention as
     65 \begin{align}
     66     A = [a_{i+n(j-1)}]_{i,j = 1,\dots,n}.
     67 \end{align}
     68 \subsection{Matrix multiplication tensor}
     69 The matrix multiplication $AB=C$, for $B, C \in \mathbb{R}^{n\times n}$ is a
     70 bilinear operation, thereby there exists a tensor $ T = [t_{ijk}]\in
     71 \mathbb{R}^{n^2\times n^2\times n^2}$ such that the matrix multiplication can be
     72 represented as
     73 \begin{align}
     74     c_k = \sum_{i=1}^{n^2}\sum_{j=1}^{n^2}t_{ijk}a_i b_k.
     75 \end{align}
     76 The tensor $T$ is referred to as the matrix multiplication tensor of order $n$.
     77 
     78 For $n = 2$ we can easily see the non-zero entries by writing out the matrix
     79 multiplication in the column major convention
     80 \begin{align}
     81     c_1 &= a_1b_1 + a_3b_2,\\
     82     c_2 &= a_2b_1 + a_4b_2,\\
     83     c_3 &= a_1b_3 + a_3b_4,\\
     84     c_4 &= a_2b_3 + a_4b_4.
     85 \end{align}
     86 So the non-zero entries in e.g. $k=1$ are $(1, 1)$ and $(3, 2)$, and so on.
     87 Thus the matrix multiplication Tensor of $n=2$ is
     88 \begin{align}
     89 t_{ij1} = \begin{pmatrix}
     90     1 & 0 & 0 & 0\\
     91     0 & 0 & 0 & 0\\
     92     0 & 1 & 0 & 0\\
     93     0 & 0 & 0 & 0
     94     \end{pmatrix},\\
     95 t_{ij2}=
     96 \begin{pmatrix}
     97     0 & 0 & 0 & 0\\
     98     1 & 0 & 0 & 0\\
     99     0 & 0 & 0 & 0\\
    100     0 & 1 & 0 & 0
    101 \end{pmatrix},\\
    102 t_{ij3}=
    103 \begin{pmatrix}
    104     0 & 0 & 1 & 0\\
    105     0 & 0 & 0 & 0\\
    106     0 & 0 & 0 & 1\\
    107     0 & 0 & 0 & 0
    108 \end{pmatrix},\\
    109 t_{ij4}=
    110 \begin{pmatrix}
    111     0 & 0 & 0 & 0\\
    112     0 & 0 & 1 & 0\\
    113     0 & 0 & 0 & 0\\
    114     0 & 0 & 0 & 1
    115 \end{pmatrix}.
    116 \end{align}
    117 The $CPD$ (Canonical Polyadic Decomposition) of rank-$n^3$ of the multiplication tensor of
    118 order $n = 2$, specified by matrices $U, V, W \;\in \; \mathbb{R}^{4\times 8}$
    119 can be represented in such a way that the columns of these matrices satisfy
    120 \begin{align}
    121     T = \sum_{\alpha=1}u_\alpha \otimes v_\alpha \times w_\alpha.
    122 \end{align}
    123 Each nonzero entry in $u_\alpha$ represents the column of the nonzero entry in $T$,
    124 each nonzero entry in $v_\alpha$ represents the row of the nonzero entry in
    125 $T$ and each nonzero entry in $w_\alpha$ represents the location of the $k$-th
    126 slice of the nonzero entry, so we have
    127 \begin{align}
    128     U &=
    129     \begin{pmatrix}
    130         1 & 0 & 0 & 0 & 1 & 0 & 0 & 0\\
    131         0 & 0 & 1 & 0 & 0 & 0 & 1 & 0\\
    132         0 & 1 & 0 & 0 & 0 & 1 & 0 & 0\\
    133         0 & 0 & 0 & 1 & 0 & 0 & 0 & 1
    134     \end{pmatrix},\\
    135     V &=
    136     \begin{pmatrix}
    137         1 & 0 & 1 & 0 & 0 & 0 & 0 & 0\\
    138         0 & 1 & 0 & 1 & 0 & 0 & 0 & 0\\
    139         0 & 0 & 0 & 0 & 1 & 0 & 1 & 0\\
    140         0 & 0 & 0 & 0 & 0 & 1 & 0 & 1
    141     \end{pmatrix},\\
    142     W &=
    143     \begin{pmatrix}
    144         1 & 1 & 0 & 0 & 0 & 0 & 0 & 0\\
    145         0 & 0 & 1 & 1 & 0 & 0 & 0 & 0\\
    146         0 & 0 & 0 & 0 & 1 & 1 & 0 & 0\\
    147         0 & 0 & 0 & 0 & 0 & 0 & 1 & 1
    148     \end{pmatrix}.
    149 \end{align}
    150 \subsection{Mode Contraction}
    151 Let us introduce a notion of multiplying a matrix with a tensor which is
    152 called the mode-$k$ contraction. For $d \in \mathbb{N}, n_1,\dots, n_d \in
    153 \mathbb{N}$ and $k\in\{1,\dots,d\}$ the mode-$k$ contraction of a
    154 $d$-dimensional tensor $S\in\mathbb{R}^{n_1\times\cdots\times n_d}$ with a
    155 matrix $Z \in \mathbb{R}^{r\times n_k}$ is an operation
    156 \begin{align}
    157     \times_k : [s_{i_1,\dots,i_d}]_{i_1\in I_1,\dots,i_d\in I_d}\mapsto
    158     \left[\sum_{i_k=1}^{n_k}z_{\alpha i_k}s_{i_1,\dots,i_d}\right]_{
    159     i_1\in I_1, \dots, i_{k-1}\in I_{k-1}, \alpha\in\{1,\dots,r\},
    160 i_{k+1}\in I_{k+1}, \dots, i_d \in I_d},
    161 \end{align}
    162 where $I_l = \{1, \dots, n_l\}$ for $l \in \{1,\dots, d\}$. Now that we know
    163 the definition we may write
    164 \begin{align}
    165         T = Z \times_k S,
    166 \end{align}
    167 and $T$ is in $\mathbb{R}^{n_1\times \cdots n_{k-1} \times r\times n_{k-1}
    168 \times \cdots \times n_d}$. The algorithm constructed for the mode-$k$
    169 contraction is structured as follows
    170 \begin{enumerate}
    171     \item initialize an empty array $T$
    172     \item iterate $\alpha$ from 1 to $r$
    173     \item initialize a zero array $s$ of size $n_1\times\cdots
    174         n_{k-1}\times n_{n+1} \times \cdots n_d$
    175     \item for each $j \in \{1, \dots , n_k\}$ add $Z_{\alpha, j}S_{i_1,
    176         \dots, i_{n-1}, j, i_{n+1}, \dots ,i_d}$ to s
    177     \item append $s$ to $T$
    178         \item end iteration
    179     \item reshape $T$ to $n_1\times \cdots n_{k-1} \times r\times n_{k-1}
    180         \times \cdots \times n_d$ and return it
    181 \end{enumerate}
    182 \subsection{Evaluating a CPD}
    183 Let us now implement a function that will convert a rank-$r$ CPD of a
    184 $d$-dimensional tensor $S$ of size $n_1 \times \cdots \times n_d$, given
    185 $U^{(k)} \in \mathbb{R}^{n_k\times r}$ for $k\in \{1, \dots, k\}$.
    186 \begin{align}
    187     S = \sum_{\alpha=1}^r u_\alpha^{(1)} \otimes \cdots \otimes
    188     u_\alpha^{(d)}.
    189 \end{align}
    190 The implementation of this is straight forward. While iterating over $\alpha$
    191 all we have to do is call a function that will do a Kronecker product of the
    192 $\alpha$-th column slice of a matrix $U^{(k)}$ with  the same order column
    193 slice of the matrix $U^{(k+1)}$, for each $k \in \{1,\dots, d\}$. In
    194 ``julia'' the only thing we have to be aware of is that the \textit{kron}
    195 function reverses the order for the Kronecker product that is it does the
    196 following
    197 \begin{align}
    198         u \otimes v = \textit{kron}(v, u).
    199 \end{align}
    200 Meaning that if we pass a list of CPD matrices $[U^{(1)}, \dots, U^{(k)}]$ as
    201 arguments, we need to reverse its order\cite{code}.
    202 \subsection{Implementing the multiplication tensor and its CPD}
    203 Once again to recapitulate the multiplication tensor is of the size $T \in
    204 \mathbb{R}^{n^2\times n^2 \times n^2}$. With some tinkering we see that the
    205 nonzero entries in the $k$-th end-dimension corresponds to the matrix
    206 multiplication in the column major convention, and with some more tinkering
    207 we found a way to construct an multiplication tensor for an arbitrary
    208 dimension (finite) using three loops.
    209 \begin{enumerate}
    210     \item loop over the column indices in the first row in the column major
    211         convention $m=1:n:n^2$
    212     \item loop over the row indices in the first column in the column major
    213         convention $l=1:n$
    214     \item $I = l:n:n^2$, $J = m:(m+n)$, $K = (l-1)+m$
    215     \item $T_{i, j, k} = 1$ for every $i\in I, j\in J,k \in K$
    216 \end{enumerate}
    217 Additionally we can even construct a CPD of this tensor in the same loop
    218 since we know the indices $i, j, k$ of the nonzero entries, $U$ would be
    219 filled with the $i$-th row entry of each $n^3$ columns, $V$ would be filled
    220 in the $j$-th row entry of the each $n^3$ columns and finally $W$ would be
    221 filled in the corresponding $k$-th row entry of the each $n^3$ columns
    222 \cite{code}.
    223 
    224 Furthermore we can evaluate the matrix multiplication only with the CPD of the
    225 matrix multiplication tensor $T$, with $U, V, W$ without computing $T$. This
    226 is done by rewriting the matrix multiplication in the row-major convention
    227 with $U, V, W$ into
    228 \begin{align}
    229     c_k = \sum_{\alpha=1}^r \left(\sum_{i=1}^{n^2} a_i
    230     u_{i\alpha}\right)\cdot
    231         \left(\sum_{j = 1}^{n^2} b_j v_{j\alpha}\right) \cdot w_{k\alpha}.
    232 \end{align}
    233 The implementation is straight forward it uses one loop and only a
    234 summation function \cite{code}.
    235 \subsection{Interpreting the Strassen algorithm in terms of CPD}
    236 For $n=2$ we will write the Strassen algorithm and its CPD in the column
    237 major convention. This algorithm allows matrix multiplication of square
    238 $2\times2$ matrices (even of order $2^n\times 2^n$) in seven essential
    239 multiplication steps instead of the stubborn eight. The Strassen algorithm
    240 defines seven new coefficients $M_l$, where $l\in {1, \cdots, 7}$
    241 \begin{align}
    242     M_1 &:= (a_1 + a_4)(b_1 + b_4), \\
    243     M_2 &:= (a_2 + a_4)b_1,\\
    244     M_3 &:= a_1(b_3 - b_4),\\
    245     M_4 &:= a_4(b_2 - b_1),\\
    246     M_5 &:= (a_1 + a_3)b_4,\\
    247     M_6 &:= (a_2 - a_1)(b_1 + b_3),\\
    248     M_7 &:= (a_3 - a_4)(b_2 + b_4).
    249 \end{align}
    250 Then the coefficients $c_k$ can be calculated as
    251 \begin{align}
    252     c_1 &= M_1 + M_4 - M_5 + M_7,\\
    253     c_2 &= M_2 + M_4,\\
    254     c_3 &= M_3 + M_5,\\
    255     c_4 &= M_1 - M_2 + M_3 + M_6.\\
    256 \end{align}
    257 This ultimately means that there exists an rank-$7$ CPD decomposition of the
    258 matrix multiplication tensor $T$, with matrices $U, V, W \in
    259 \mathbb{R}^{4\times 7}$. By that $U$ represents the placement of $a_i$ in the
    260 definition of $M_l$ filled in the columns of $U$, $V$ represents the existence
    261 of $b_j$ in the definition of $M_l$ filled in the columns $V$ and $W$
    262 represents the $M_l$ placement in the coefficients $c_k$. Do note that these
    263 matrices have entries $-1, 0, 1$ corresponding to the addition/subtraction as
    264 defined in the $M_l$'s. The matrices $U, V, W$ are the following
    265 \begin{align}
    266     U &=
    267     \begin{pmatrix}
    268         1 & 0 & 1 & 0 & 1 & -1 & 0 \\
    269         0 & 1 & 0 & 0 & 0 & 1 & 0 \\
    270         0 & 0 & 0 & 0 & 1 & 0 & 1 \\
    271         1 & 1 & 0 & 1 & 0 & 0 & -1
    272     \end{pmatrix},\\
    273     V &=
    274     \begin{pmatrix}
    275         1 & 1 & 0 & -1 & 0 & 1 & 0\\
    276         0 & 0 & 0 & 1 & 0 & 0 & 1\\
    277         0 & 0 & 1 & 0 & 0 & 1 & 0\\
    278         1 & 0 & -1 & 0 & 1 & 0 & 1
    279     \end{pmatrix},\\
    280     W &=
    281     \begin{pmatrix}
    282         1 & 0 & 0 & 1 & -1 & 0 & 1\\
    283         0 & 1 & 0 & 1 & 0 & 0 & 0\\
    284         0 & 0 & 1 & 0 & 1 & 0 & 0\\
    285         1 & -1 & 1 & 0 & 0 & 1 & 0
    286     \end{pmatrix}.
    287 \end{align}
    288 \nocite{code}
    289 \subsection{Tucker Structure and CP rank !!(NO real idea what  should be
    290 done here)!!}
    291 Let $\hat{a} = \begin{pmatrix}1 & 0\end{pmatrix}^T$ and $\hat{b} =
    292 \begin{pmatrix}0 & 1\end{pmatrix}$, we can define a Laplace-like tensor $T$
    293 using the Kronecker product
    294 \begin{align}\label{eq: cpd}
    295     \hat{T} = \hat{b}\otimes \hat{a} \otimes \hat{a}+
    296         \hat{a}\otimes \hat{b} \otimes \hat{a}+
    297         \hat{a}\otimes \hat{a} \otimes \hat{b} \;\;\; \in
    298         \mathbb{R}^{2\times2\times2},
    299 \end{align}
    300 which represents $T$ as a CPD of rank 3. The CPD is a special case of the
    301 Tucker decomposition with a super diagonal tucker core, in this case the
    302 tucker core is
    303 \begin{align}
    304     S_{i_1i_2i_3} = \begin{cases}
    305                     1\;\;\;\; \text{for} \;\; i_1=i_2=i_3\\
    306                     0\;\;\;\; \text{else}
    307                 \end{cases}
    308 \end{align}
    309 for $i_1, i_2, i_3 = 1,2,3$. We may write the tucker decomposition of $T$ in
    310 terms of matrices $U_1, U_2, U_3 \in \mathbb{R}^{2\times 3}$ constructed by
    311 the $\hat{a}$ and $\hat{b}$ as in \ref{eq: cpd} in each of the summation
    312 steps.  Then $U_1$ would have $\hat{b}, \hat{a}, \hat{a}$ in the columns and
    313 so on.  Now we may write the Tucker decomposition for $T_{ijk}$ for $i_1,
    314 i_2,i_3
    315 = 1, 2$ as
    316 \begin{align}
    317     \hat{T}_{i_1i_2i_3} = \sum_{\alpha_1}^3\sum_{\alpha_2}^3\sum_{\alpha_2}^3
    318     (U_1)_{i_1\alpha_1} (U_2)_{i_2\alpha_2}  (U_3)_{i_3\alpha_3}
    319     S_{\alpha_1\alpha_2\alpha_3}.
    320 \end{align}
    321 With this we can derive the first, second and third Tucker unfolding matrix
    322 by rewriting the Tucker decomposition
    323 \begin{align}
    324     \hat{T}_{i_1i_2i_3} &=
    325     \sum_{\alpha_k = 1}^3 (U_k)_{i_k\alpha_k} \sum_{\alpha_{k-1}=1}^3
    326     \sum_{\alpha_{k+1}=1}^3(U_{k-1})_{i_{k-1}\alpha_{k-1}}
    327     (U_{k+1})_{i_{k+1}\alpha_3}S_{\alpha_1 \alpha_2 \alpha_3}\\
    328                   &=
    329     \sum_{\alpha_k = 1}^3 (U_k)_{i_k\alpha_k} (Z_k)_{i_{k-1}i_{k+1}\alpha_k},
    330 \end{align}
    331 for $k=1, 2, 3$ cyclic. We call
    332 \begin{align}
    333     \mathcal{U}^T_k := U_k Z_k^T
    334 \end{align}
    335 the $k-th$ Tucker unfolding. It turns out in our case all of them are unique
    336 and thereby the CPD decomposition is unique, thereby the CPD rank of
    337 $\hat{T}$ is three.
    338 
    339 Let us consider a richer structure, for $2<d \in \mathbb{N}$ and $n_1, \cdot
    340 ,n_d\in \mathbb{N}$ all greater then one. For $k \in \{1, 2, 3\}$ consider
    341 $a_k, b_k \in \mathbb{R}^{n_k}$ linearly independent and for $\mathbb{N} \ni
    342 k \geq 4$ consider $c_k\in \mathbb{R}^{n_k}$ nonzero. Then we define a
    343 Laplace-like tensor
    344 \begin{align}
    345     T = b_1 \otimes a_2 \otimes a_3 \otimes c_4\otimes \cdots \otimes c_d
    346 +a_1 \otimes b_2 \otimes a_3 \otimes c_4\otimes \cdots \otimes c_d
    347 +a_1 \otimes a_2 \otimes b_3 \otimes c_4\otimes \cdots \otimes c_d.
    348 \end{align}
    349 The matrices $U^{(k)} \in \mathbb{R}^{n_k \times 3}$ for $k= 1, 2 ,3$ are the
    350 following
    351 \begin{align}
    352     U^{(1)} &= (b_1\; a_2\; a_3),\\
    353     U^{(2)} &= (a_1\; b_2\; a_3),\\
    354     U^{(3)} &= (a_1\; a_2\; b_3).\\
    355 \end{align}
    356 The matrices $U^{(k)} \in \mathbb{R}^{n_k \times 3}$ for $k = 4, \dots, d$
    357 are
    358 \begin{align}
    359     U^{(4)} &= (c_4\; c_4\; c_4).\\
    360             &\vdots\nonumber\\
    361     U^{(d)} &= (c_d\; c_d\; c_d).
    362 \end{align}
    363 The Tucker core $S \in \mathbb{R}^{3\times 3\times3}$ is a superdiagonal
    364 tensor of order three. We can write the Tucker decomposition of $T$ as
    365 \begin{align}
    366     \hat{T}_{i_1\dots i_d} &=
    367     \sum_{\alpha_k = 1}^3 (U^{(k)})_{i_k\alpha_k}\cdot\nonumber
    368 \\&\cdot\sum_{\alpha_1,\dots,\alpha_{k-1},\alpha_{k+1},\dots, \alpha_d}^3
    369        (U^{(1)})_{i_1\alpha_1}\cdots(U^{(k-1)})_{i_{k-1}\alpha_{k-1}}
    370     (U^{(k+1)})_{i_{k+1}\alpha_3}\cdots(U^{(d)})_{i_d\alpha_d}
    371     S_{\alpha_1 \dots \alpha_d}\\
    372        &= \sum_{\alpha_k = 1}^3 (U^{(k)})_{i_k\alpha_k} (Z_{k})_{i_1,\dots
    373        i_{k-1}i_{k+1}\dots i_d \alpha_k}.
    374 \end{align}
    375 All the Tucker unfoldings $\mathcal{U}_{k}$ for $k\geq 4$ are non-unique,
    376 because the matrices $U^{(k)}$ for $k \geq 4$ are constructed with the same
    377 column vectors. On the other hand we may write the Tucker decomposition in
    378 with the Kronecker product like this
    379 \begin{align}
    380     T_{(k)} =U^{(k)} \cdot S_k \cdot (U^{(1)}\otimes\cdots\otimes
    381     U^{(k-1)}\otimes U^{(k+1)}\otimes \cdots \otimes U^{(d)})
    382 \end{align}
    383 
    384 \printbibliography
    385 \end{document}
	tensor_methods Tensor Methods for DS and SC
	git clone git://popovic.xyz/tensor_methods.git
	Log \| Files \| Refs