done sesh3 without last exercise - tensor_methods

commit 29d9d951e8347f500087fe53a3184bfbef1146a9
parent ba0d807d22b0ff5d200e489d03f06aa60e8dc2ff
Author: miksa234 <milutin@popovic.xyz>
Date:   Sun, 14 Nov 2021 11:46:19 +0100

done sesh3 without last exercise

Diffstat:
A assingments/TensorReview.pdf  | 0 
A assingments/hw03_published_2021-11-08.pdf  | 0 
M sesh2/src/functions.jl  | 6 +++---
M sesh2/tex/main.pdf  | 0 
M sesh2/tex/main.tex  | 8 ++++----
A sesh3/src/functions.jl  | 46 ++++++++++++++++++++++++++++++++++++++++++++++
A sesh3/src/main.jl  | 38 ++++++++++++++++++++++++++++++++++++++
A sesh3/src/plots/err_2.png  | 0 
A sesh3/src/plots/err_3.png  | 0 
A sesh3/src/plots/err_4.png  | 0 
A sesh3/tex/main.pdf  | 0 
A sesh3/tex/main.tex  | 156 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A sesh3/tex/uni.bib  | 6 ++++++

13 files changed, 253 insertions(+), 7 deletions(-)
diff --git a/assingments/TensorReview.pdf b/assingments/TensorReview.pdf
Binary files differ.
diff --git a/assingments/hw03_published_2021-11-08.pdf b/assingments/hw03_published_2021-11-08.pdf
Binary files differ.
diff --git a/sesh2/src/functions.jl b/sesh2/src/functions.jl
@@ -43,7 +43,7 @@ of size \alpha $n × n$, satisfying the column major convention
         $C_k = ∑_{i, j}^{n^2} T_{ijk} ⋅ A_i ⋅ B_k$
 """
 function multiplication_tensor(n)
-    T = zeros(n^2, n^2, n^2)
+    T = zeros(Int, n^2, n^2, n^2)
     U = zeros(n^2, n^3)
     V = zeros(n^2, n^3)
     W = zeros(n^2, n^3)
@@ -59,7 +59,7 @@ function multiplication_tensor(n)
             count += 1
         end
     end
-    return T, (U, V, W)
+    return T, [U, V, W]
 end
 
 @doc raw"""
@@ -76,6 +76,7 @@ function cpd_eval(U_i)
     r = size(U_i[1])[2]
     n_d = [size(U_i[i])[1] for i in 1:d]
     s = zeros(n_d...)
+
     for alpha in 1:r
         k = kron(U_i[1][:, alpha], U_i[2][:, alpha])
         for i in 2:(d-1)
@@ -129,7 +130,6 @@ function strassen_alg(A, B)
     C[4] = M_1 - M_2 + M_3 + M_6
     return C
 end
-
 @doc raw"""
     rank_7_CPD()
 
diff --git a/sesh2/tex/main.pdf b/sesh2/tex/main.pdf
Binary files differ.
diff --git a/sesh2/tex/main.tex b/sesh2/tex/main.tex
@@ -80,8 +80,8 @@ multiplication in the column major convention
 \begin{align}
     c_1 &= a_1b_1 + a_3b_2,\\
     c_2 &= a_2b_1 + a_4b_2,\\
-    c_1 &= a_1b_3 + a_3b_4,\\
-    c_1 &= a_2b_3 + a_4b_4.
+    c_3 &= a_1b_3 + a_3b_4,\\
+    c_4 &= a_2b_3 + a_4b_4.
 \end{align}
 So the non-zero entries in e.g. $k=1$ are $(1, 1)$ and $(3, 2)$, and so on.
 Thus the matrix multiplication Tensor of $n=2$ is
@@ -239,7 +239,7 @@ $2\times2$ matrices (even of order $2^n\times 2^n$) in seven essential
 multiplication steps instead of the stubborn eight. The Strassen algorithm
 defines seven new coefficients $M_l$, where $l\in {1, \cdots, 7}$
 \begin{align}
-    M_1 &:= (a_1 + a_4)(a_1 + a_4), \\
+    M_1 &:= (a_1 + a_4)(b_1 + b_4), \\
     M_2 &:= (a_2 + a_4)b_1,\\
     M_3 &:= a_1(b_3 - b_4),\\
     M_4 &:= a_4(b_2 - b_1),\\
@@ -249,7 +249,7 @@ defines seven new coefficients $M_l$, where $l\in {1, \cdots, 7}$
 \end{align}
 Then the coefficients $c_k$ can be calculated as
 \begin{align}
-    c_1 &= M_1 + M_4 - M_6 + M_7,\\
+    c_1 &= M_1 + M_4 - M_5 + M_7,\\
     c_2 &= M_2 + M_4,\\
     c_3 &= M_3 + M_5,\\
     c_4 &= M_1 - M_2 + M_3 + M_6.\\
diff --git a/sesh3/src/functions.jl b/sesh3/src/functions.jl
@@ -0,0 +1,46 @@
+#/usr/bin/julia
+
+using LinearAlgebra
+using LaTeXStrings
+using Distributions
+using Plots
+
+include("../../sesh2/src/functions.jl")
+
+function cp_als(U, V, iter)
+    d = length(U)
+    R = size(U[1])[2]
+    r = size(V[1])[2]
+    dims = [size(U[l])[1] for l=1:d]
+    Vk = copy(V)
+
+    Fs = [eleprod([(Vk[l]' * U[l]) for l=1:d if l!=j]) for j=1:d]
+    Gs = [eleprod([(Vk[l]' * Vk[l]) for l=1:d if l!=j]) for j=1:d]
+
+    phi = []
+    d_phi_2 = []
+    for count=1:iter
+        for k in [collect(2:d)..., reverse(collect(1:(d-1)))...]
+            Fs[k] = eleprod([(Vk[l]' * U[l]) for l=1:d if l!=k])
+            Gs[k] = eleprod([(Vk[l]' * Vk[l]) for l=1:d if l!=k])
+
+            VV = kron(Gs[k], Matrix{Float64}(I, dims[k], dims[k]))
+            VU = kron(Fs[k], Matrix{Float64}(I, dims[k], dims[k]))
+
+            Vk[k] = reshape(VV\(VU * vec(U[k])), (dims[k], r))
+        end
+        append!(phi, norm(cpd_eval(Vk) - cpd_eval(U)))
+        append!(d_phi_2, norm(diff(1/2*phi.^2)))
+    end
+    return Vk, phi, d_phi_2
+end
+
+
+function eleprod(A)
+    d = length(A)
+    Z = copy(A[1])
+    for i=2:d
+        Z = Z .* A[i]
+    end
+    return Z
+end
diff --git a/sesh3/src/main.jl b/sesh3/src/main.jl
@@ -0,0 +1,38 @@
+using LinearAlgebra
+using LaTeXStrings
+using Distributions
+using Plots
+
+include("../../sesh2/src/functions.jl")
+include("../../sesh3/src/functions.jl")
+
+function errplot(x, phi, d_phi_2, n)
+    p = plot(x, phi,
+             lw=3,
+             titlefontsize=20,
+             xlabelfontsize=14,
+             ylabelfontsize=14,
+             dpi=300,
+             grid=false,
+             size=(500, 400))
+    plot!(p, x, d_phi_2,
+          lw=3,
+          title="n=$n",
+          label=L"\nabla \frac{1}{2} \phi^2")
+    savefig(p, "./plots/err_$n.png")
+end
+
+function main()
+    nr = [(2, 7), (3, 23), (4, 49)]
+
+    for (n, r) in nr
+        V = [reshape(vcat([normalize(rand(-1:1, n^2)) for i=1:r]...), (n^2, r)) for _=1:3]  # guess
+        T, U = multiplication_tensor(n)                                                     # given n^3 CPD
+        V_hat, phi, d_phi_2 = cp_als(U, V, 10000)                                            # CP-ALS + err
+        x = collect(1:length(phi))                                                          # for plot
+
+        errplot(x, phi, d_phi_2, n)
+    end
+end
+
+main()
diff --git a/sesh3/src/plots/err_2.png b/sesh3/src/plots/err_2.png
Binary files differ.
diff --git a/sesh3/src/plots/err_3.png b/sesh3/src/plots/err_3.png
Binary files differ.
diff --git a/sesh3/src/plots/err_4.png b/sesh3/src/plots/err_4.png
Binary files differ.
diff --git a/sesh3/tex/main.pdf b/sesh3/tex/main.pdf
Binary files differ.
diff --git a/sesh3/tex/main.tex b/sesh3/tex/main.tex
@@ -0,0 +1,156 @@
+\documentclass[a4paper]{article}
+
+
+\usepackage[T1]{fontenc}
+\usepackage[utf8]{inputenc}
+\usepackage{mathptmx}
+
+%\usepackage{ngerman}	% Sprachanpassung Deutsch
+
+\usepackage{graphicx}
+\usepackage{geometry}
+\geometry{a4paper, top=15mm}
+
+\usepackage{subcaption}
+\usepackage[shortlabels]{enumitem}
+\usepackage{amssymb}
+\usepackage{amsthm}
+\usepackage{mathtools}
+\usepackage{braket}
+\usepackage{bbm}
+\usepackage{graphicx}
+\usepackage{float}
+\usepackage{yhmath}
+\usepackage{tikz}
+\usetikzlibrary{patterns,decorations.pathmorphing,positioning}
+\usetikzlibrary{calc,decorations.markings}
+
+\usepackage[backend=biber, sorting=none]{biblatex}
+\addbibresource{uni.bib}
+
+\usepackage[framemethod=TikZ]{mdframed}
+
+\tikzstyle{titlered} =
+    [draw=black, thick, fill=white,%
+        text=black, rectangle,
+        right, minimum height=.7cm]
+
+
+\usepackage[colorlinks=true,naturalnames=true,plainpages=false,pdfpagelabels=true]{hyperref}
+\usepackage[parfill]{parskip}
+\usepackage{lipsum}
+
+
+\usepackage{tcolorbox}
+\tcbuselibrary{skins,breakable}
+
+\pagestyle{myheadings}
+
+\markright{Popović\hfill Tensor Methods \hfill}
+
+
+\title{University of Vienna\\ Faculty of Mathematics\\
+\vspace{1cm}TENSOR METHODS FOR DATA SCIENCE AND SCIENTIFIC COMPUTING
+}
+\author{Milutin Popovic}
+
+\begin{document}
+\maketitle
+\tableofcontents
+\section{Assignment 3}
+\subsection{Implementing the CP-ALS Algorithm}
+The main idea of the algorithm is that we have a rank $R\in \mathbb{N}$ CPD
+of a tensor $A \in \mathbb{R}^{n_1 \times \cdot n_d}$ given in terms of
+$U_k \in \mathbb{R}^{n_k \times R}$, for $\mathbb{N}\ni k>2$, $k =
+\{1,\dots,d\}$ and for $n_1, \dots, n_d\; \in \mathbb{N}$. We want to find a
+rank $r\in \mathbb{N}$ CPD of the tensor $A$, by taking an initial guess
+for some $V_k \in \mathbb{R}^{n_k \times r}$ and updating it for each $k$ by
+optimizing the problem
+\begin{align}
+    \phi(V_1, \dots, V_d) \rightarrow \min_{V_k \in \mathbb{R}^{n_k\times
+    r}},
+\end{align}
+where $\phi(V_1, \dots, V_d)$ is the error function, determined by
+\begin{align}
+    \phi(V_1, \dots, V_d) = \left\|\Psi_r(V_1,\dots,V_d) - \Psi_R(U_1,
+    \dots,U_d) \right\|_F .
+\end{align}
+For $\Psi_r$ and $\Psi_R$ denote the CPD multilinear representation maps
+transforming the CP decomposition into the tensor
+\begin{align}
+    U &= \Psi_R(U_1, \dots, U_d)\\
+    V &= \Psi_r(V_1, \dots, V_d)\\
+ \end{align}
+for $k = 1, \dots d, d-1, \dots 2$ sequentially by updating $V_k$ at each
+step of the optimization. \textbf{This is one iteration step}.
+
+For $\Psi$ we will use the implementation constructed in the last exercise,
+which consists of applying the Kronecker product of the rows and then
+summing over them. Once we define the following matrices we may rewrite the
+optimality condition in a linear system which is solvable since it is a least
+square problem. We define matrices $\mathcal{V}_k \in \mathbb{R}^{n_1\cdots n_d
+\times n_k\cdot r}$ and $\mathcal{U}_k \in \mathbb{R}^{n_1\cdots n_d \times
+n_k\cdot R}$ for $k \in \{1, \dots,d\}$  by the following
+\begin{align}
+    \mathcal{U}_k &= \prod_{\substack{l=1 \\ l!=k}}^{d} U_l \otimes
+    \mathbbm{1}_{\mathbb{R}^{n_k\times n_k}},\\
+    \mathcal{V}_k &= \prod_{\substack{l=1 \\ l!=k}}^{d} V_l \otimes
+    \mathbbm{1}_{\mathbb{R}^{n_k\times n_k}},
+\end{align}
+note that $\prod$ represents the Hadamard product (elementwise multiplication) and
+$\otimes$ the Kronecker product. The product of these two new defined
+matrices is then simply
+\begin{align}
+    \mathcal{V}_k^*\mathcal{V}_k &=  \prod_{\substack{l=1 \\ l!=k}}^{d}
+    (V_l^*V_l) \otimes \mathbbm{1}_{\mathbb{R}^{n_k\times n_k}}
+    \label{eq: VV}    \\
+    \mathcal{V}_k^*\mathcal{U}_k &=  \prod_{\substack{l=1 \\ l!=k}}^{d}
+    (V_l^*U_l) \otimes \mathbbm{1}_{\mathbb{R}^{n_k\times n_k}}.\label{eq:
+        VU}
+\end{align}
+Do note that $\mathcal{V}_k^*\mathcal{V}_k \in \mathbb{R}^{r\cdot n_k \times
+r\cdot n_k}$ and $\mathcal{V}_k^*\mathcal{U}_k \in \mathbb{R}^{r\cdot n_k
+\times R\cdot n_k}$. For convenience let us define $F_k := V_k^* U_k$ and
+$G_k := V_k^* V_k$, which will become useful when talking about storing the
+computation. The we can solve for $V_k$ with the following least square
+problem
+\begin{align}
+    (\mathcal{V}_k^*\mathcal{V}_k)\hat{v}_k = (\mathcal{V}_k^*\mathcal{U}_k)u_k,
+\end{align}
+where $\hat{v}_k = \text{vec}(\hat{V}_k)$ and $u_k = \text{vec}(U_k)$.
+
+To make the computation cost linear with respect to $d$, we can compute
+$\mathcal{V}_k^*\mathcal{V}_k$ and $\mathcal{V}_k^*\mathcal{U}_k$ for
+each $k$ and update them in the iteration in $k$ with the new $\hat{V}_k$, by
+computing the product in equations \ref{eq: VV} and \ref{eq: VU} again.
+Additionally we compute the error $\phi$ and $\|\nabla \frac{1}{2}
+\phi^2\|_2$ after each iteration (after going through
+$k=2,\dots,d,d-1,\dots,1$). The implementation is in \cite{code}.
+
+\subsection{CP-ALS for the matrix multiplication tensor}
+In this section we will use the matrix multiplication tensor to play around
+the CP-ALS algorithm we implemented in the last section. We consider $n = 2$
+and $r = 7$, $n = 3$ and $r = 23$, $n = 4$ and $r = 49$ for the
+multiplication tensor and its CP decomposition. The implementation of them we
+have done in the last exercise together with their rank $R = n^3$ CP
+decomposition. We test the our implementation of the CP-ALS algorithm of
+these three multiplication tensor for three random initial guesses $V_1, V_2,
+V_3$, where each matrix has unitary columns (i.e. of norm one).
+
+Our procedure will be that we do seven different guesses if they are good
+enough, i.e. if they do not produce a \textit{SingularValueError} after 10
+iterations we go for 10000 iterations and plot both $\phi$ and $\|\nabla
+\frac{1}{2} \phi\|_2$ with respect to the number of iterations for each
+multiplication tensor. We get the following curves
+\begin{figure}[H]
+    \centering \includegraphics[width=0.33\textwidth]{./../src/plots/err_2.png}
+    \includegraphics[width=0.33\textwidth]{./../src/plots/err_3.png}
+    \includegraphics[width=0.32\textwidth]{./../src/plots/err_4.png}
+    \caption{From left to right, rank $r\in\{7, 23, 49\}$ CP-ALS error for the
+    $n\in \{2, 3, 4\}$ multiplication tensor at 10000 iteration steps}
+\end{figure}
+
+
+
+\printbibliography
+\end{document}
diff --git a/sesh3/tex/uni.bib b/sesh3/tex/uni.bib
@@ -0,0 +1,6 @@
+@online{code,
+    author = {Popovic Milutin},
+    title = {Git Instance, Tensor Methods for Data Science and Scientific Computing},
+    urldate = {2021-24-10},
+    url = {git://popovic.xyz/tensor_methods.git},
+}

	tensor_methods Tensor Methods for DS and SC
	git clone git://popovic.xyz/tensor_methods.git
	Log \| Files \| Refs

A	assingments/TensorReview.pdf	\|	0
A	assingments/hw03_published_2021-11-08.pdf	\|	0
M	sesh2/src/functions.jl	\|	6	+++---
M	sesh2/tex/main.pdf	\|	0
M	sesh2/tex/main.tex	\|	8	++++----
A	sesh3/src/functions.jl	\|	46	++++++++++++++++++++++++++++++++++++++++++++++
A	sesh3/src/main.jl	\|	38	++++++++++++++++++++++++++++++++++++++
A	sesh3/src/plots/err_2.png	\|	0
A	sesh3/src/plots/err_3.png	\|	0
A	sesh3/src/plots/err_4.png	\|	0
A	sesh3/tex/main.pdf	\|	0
A	sesh3/tex/main.tex	\|	156	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	sesh3/tex/uni.bib	\|	6	++++++