commit 29d9d951e8347f500087fe53a3184bfbef1146a9
parent ba0d807d22b0ff5d200e489d03f06aa60e8dc2ff
Author: miksa234 <milutin@popovic.xyz>
Date: Sun, 14 Nov 2021 11:46:19 +0100
done sesh3 without last exercise
Diffstat:
13 files changed, 253 insertions(+), 7 deletions(-)
diff --git a/assingments/TensorReview.pdf b/assingments/TensorReview.pdf
Binary files differ.
diff --git a/assingments/hw03_published_2021-11-08.pdf b/assingments/hw03_published_2021-11-08.pdf
Binary files differ.
diff --git a/sesh2/src/functions.jl b/sesh2/src/functions.jl
@@ -43,7 +43,7 @@ of size \alpha $n × n$, satisfying the column major convention
$C_k = ∑_{i, j}^{n^2} T_{ijk} ⋅ A_i ⋅ B_k$
"""
function multiplication_tensor(n)
- T = zeros(n^2, n^2, n^2)
+ T = zeros(Int, n^2, n^2, n^2)
U = zeros(n^2, n^3)
V = zeros(n^2, n^3)
W = zeros(n^2, n^3)
@@ -59,7 +59,7 @@ function multiplication_tensor(n)
count += 1
end
end
- return T, (U, V, W)
+ return T, [U, V, W]
end
@doc raw"""
@@ -76,6 +76,7 @@ function cpd_eval(U_i)
r = size(U_i[1])[2]
n_d = [size(U_i[i])[1] for i in 1:d]
s = zeros(n_d...)
+
for alpha in 1:r
k = kron(U_i[1][:, alpha], U_i[2][:, alpha])
for i in 2:(d-1)
@@ -129,7 +130,6 @@ function strassen_alg(A, B)
C[4] = M_1 - M_2 + M_3 + M_6
return C
end
-
@doc raw"""
rank_7_CPD()
diff --git a/sesh2/tex/main.pdf b/sesh2/tex/main.pdf
Binary files differ.
diff --git a/sesh2/tex/main.tex b/sesh2/tex/main.tex
@@ -80,8 +80,8 @@ multiplication in the column major convention
\begin{align}
c_1 &= a_1b_1 + a_3b_2,\\
c_2 &= a_2b_1 + a_4b_2,\\
- c_1 &= a_1b_3 + a_3b_4,\\
- c_1 &= a_2b_3 + a_4b_4.
+ c_3 &= a_1b_3 + a_3b_4,\\
+ c_4 &= a_2b_3 + a_4b_4.
\end{align}
So the non-zero entries in e.g. $k=1$ are $(1, 1)$ and $(3, 2)$, and so on.
Thus the matrix multiplication Tensor of $n=2$ is
@@ -239,7 +239,7 @@ $2\times2$ matrices (even of order $2^n\times 2^n$) in seven essential
multiplication steps instead of the stubborn eight. The Strassen algorithm
defines seven new coefficients $M_l$, where $l\in {1, \cdots, 7}$
\begin{align}
- M_1 &:= (a_1 + a_4)(a_1 + a_4), \\
+ M_1 &:= (a_1 + a_4)(b_1 + b_4), \\
M_2 &:= (a_2 + a_4)b_1,\\
M_3 &:= a_1(b_3 - b_4),\\
M_4 &:= a_4(b_2 - b_1),\\
@@ -249,7 +249,7 @@ defines seven new coefficients $M_l$, where $l\in {1, \cdots, 7}$
\end{align}
Then the coefficients $c_k$ can be calculated as
\begin{align}
- c_1 &= M_1 + M_4 - M_6 + M_7,\\
+ c_1 &= M_1 + M_4 - M_5 + M_7,\\
c_2 &= M_2 + M_4,\\
c_3 &= M_3 + M_5,\\
c_4 &= M_1 - M_2 + M_3 + M_6.\\
diff --git a/sesh3/src/functions.jl b/sesh3/src/functions.jl
@@ -0,0 +1,46 @@
+#/usr/bin/julia
+
+using LinearAlgebra
+using LaTeXStrings
+using Distributions
+using Plots
+
+include("../../sesh2/src/functions.jl")
+
+function cp_als(U, V, iter)
+ d = length(U)
+ R = size(U[1])[2]
+ r = size(V[1])[2]
+ dims = [size(U[l])[1] for l=1:d]
+ Vk = copy(V)
+
+ Fs = [eleprod([(Vk[l]' * U[l]) for l=1:d if l!=j]) for j=1:d]
+ Gs = [eleprod([(Vk[l]' * Vk[l]) for l=1:d if l!=j]) for j=1:d]
+
+ phi = []
+ d_phi_2 = []
+ for count=1:iter
+ for k in [collect(2:d)..., reverse(collect(1:(d-1)))...]
+ Fs[k] = eleprod([(Vk[l]' * U[l]) for l=1:d if l!=k])
+ Gs[k] = eleprod([(Vk[l]' * Vk[l]) for l=1:d if l!=k])
+
+ VV = kron(Gs[k], Matrix{Float64}(I, dims[k], dims[k]))
+ VU = kron(Fs[k], Matrix{Float64}(I, dims[k], dims[k]))
+
+ Vk[k] = reshape(VV\(VU * vec(U[k])), (dims[k], r))
+ end
+ append!(phi, norm(cpd_eval(Vk) - cpd_eval(U)))
+ append!(d_phi_2, norm(diff(1/2*phi.^2)))
+ end
+ return Vk, phi, d_phi_2
+end
+
+
+function eleprod(A)
+ d = length(A)
+ Z = copy(A[1])
+ for i=2:d
+ Z = Z .* A[i]
+ end
+ return Z
+end
diff --git a/sesh3/src/main.jl b/sesh3/src/main.jl
@@ -0,0 +1,38 @@
+using LinearAlgebra
+using LaTeXStrings
+using Distributions
+using Plots
+
+include("../../sesh2/src/functions.jl")
+include("../../sesh3/src/functions.jl")
+
+function errplot(x, phi, d_phi_2, n)
+ p = plot(x, phi,
+ lw=3,
+ titlefontsize=20,
+ xlabelfontsize=14,
+ ylabelfontsize=14,
+ dpi=300,
+ grid=false,
+ size=(500, 400))
+ plot!(p, x, d_phi_2,
+ lw=3,
+ title="n=$n",
+ label=L"\nabla \frac{1}{2} \phi^2")
+ savefig(p, "./plots/err_$n.png")
+end
+
+function main()
+ nr = [(2, 7), (3, 23), (4, 49)]
+
+ for (n, r) in nr
+ V = [reshape(vcat([normalize(rand(-1:1, n^2)) for i=1:r]...), (n^2, r)) for _=1:3] # guess
+ T, U = multiplication_tensor(n) # given n^3 CPD
+ V_hat, phi, d_phi_2 = cp_als(U, V, 10000) # CP-ALS + err
+ x = collect(1:length(phi)) # for plot
+
+ errplot(x, phi, d_phi_2, n)
+ end
+end
+
+main()
diff --git a/sesh3/src/plots/err_2.png b/sesh3/src/plots/err_2.png
Binary files differ.
diff --git a/sesh3/src/plots/err_3.png b/sesh3/src/plots/err_3.png
Binary files differ.
diff --git a/sesh3/src/plots/err_4.png b/sesh3/src/plots/err_4.png
Binary files differ.
diff --git a/sesh3/tex/main.pdf b/sesh3/tex/main.pdf
Binary files differ.
diff --git a/sesh3/tex/main.tex b/sesh3/tex/main.tex
@@ -0,0 +1,156 @@
+\documentclass[a4paper]{article}
+
+
+\usepackage[T1]{fontenc}
+\usepackage[utf8]{inputenc}
+\usepackage{mathptmx}
+
+%\usepackage{ngerman} % Sprachanpassung Deutsch
+
+\usepackage{graphicx}
+\usepackage{geometry}
+\geometry{a4paper, top=15mm}
+
+\usepackage{subcaption}
+\usepackage[shortlabels]{enumitem}
+\usepackage{amssymb}
+\usepackage{amsthm}
+\usepackage{mathtools}
+\usepackage{braket}
+\usepackage{bbm}
+\usepackage{graphicx}
+\usepackage{float}
+\usepackage{yhmath}
+\usepackage{tikz}
+\usetikzlibrary{patterns,decorations.pathmorphing,positioning}
+\usetikzlibrary{calc,decorations.markings}
+
+\usepackage[backend=biber, sorting=none]{biblatex}
+\addbibresource{uni.bib}
+
+\usepackage[framemethod=TikZ]{mdframed}
+
+\tikzstyle{titlered} =
+ [draw=black, thick, fill=white,%
+ text=black, rectangle,
+ right, minimum height=.7cm]
+
+
+\usepackage[colorlinks=true,naturalnames=true,plainpages=false,pdfpagelabels=true]{hyperref}
+\usepackage[parfill]{parskip}
+\usepackage{lipsum}
+
+
+\usepackage{tcolorbox}
+\tcbuselibrary{skins,breakable}
+
+\pagestyle{myheadings}
+
+\markright{Popović\hfill Tensor Methods \hfill}
+
+
+\title{University of Vienna\\ Faculty of Mathematics\\
+\vspace{1cm}TENSOR METHODS FOR DATA SCIENCE AND SCIENTIFIC COMPUTING
+}
+\author{Milutin Popovic}
+
+\begin{document}
+\maketitle
+\tableofcontents
+\section{Assignment 3}
+\subsection{Implementing the CP-ALS Algorithm}
+The main idea of the algorithm is that we have a rank $R\in \mathbb{N}$ CPD
+of a tensor $A \in \mathbb{R}^{n_1 \times \cdot n_d}$ given in terms of
+$U_k \in \mathbb{R}^{n_k \times R}$, for $\mathbb{N}\ni k>2$, $k =
+\{1,\dots,d\}$ and for $n_1, \dots, n_d\; \in \mathbb{N}$. We want to find a
+rank $r\in \mathbb{N}$ CPD of the tensor $A$, by taking an initial guess
+for some $V_k \in \mathbb{R}^{n_k \times r}$ and updating it for each $k$ by
+optimizing the problem
+\begin{align}
+ \phi(V_1, \dots, V_d) \rightarrow \min_{V_k \in \mathbb{R}^{n_k\times
+ r}},
+\end{align}
+where $\phi(V_1, \dots, V_d)$ is the error function, determined by
+\begin{align}
+ \phi(V_1, \dots, V_d) = \left\|\Psi_r(V_1,\dots,V_d) - \Psi_R(U_1,
+ \dots,U_d) \right\|_F .
+\end{align}
+For $\Psi_r$ and $\Psi_R$ denote the CPD multilinear representation maps
+transforming the CP decomposition into the tensor
+\begin{align}
+ U &= \Psi_R(U_1, \dots, U_d)\\
+ V &= \Psi_r(V_1, \dots, V_d)\\
+ \end{align}
+for $k = 1, \dots d, d-1, \dots 2$ sequentially by updating $V_k$ at each
+step of the optimization. \textbf{This is one iteration step}.
+
+For $\Psi$ we will use the implementation constructed in the last exercise,
+which consists of applying the Kronecker product of the rows and then
+summing over them. Once we define the following matrices we may rewrite the
+optimality condition in a linear system which is solvable since it is a least
+square problem. We define matrices $\mathcal{V}_k \in \mathbb{R}^{n_1\cdots n_d
+\times n_k\cdot r}$ and $\mathcal{U}_k \in \mathbb{R}^{n_1\cdots n_d \times
+n_k\cdot R}$ for $k \in \{1, \dots,d\}$ by the following
+\begin{align}
+ \mathcal{U}_k &= \prod_{\substack{l=1 \\ l!=k}}^{d} U_l \otimes
+ \mathbbm{1}_{\mathbb{R}^{n_k\times n_k}},\\
+ \mathcal{V}_k &= \prod_{\substack{l=1 \\ l!=k}}^{d} V_l \otimes
+ \mathbbm{1}_{\mathbb{R}^{n_k\times n_k}},
+\end{align}
+note that $\prod$ represents the Hadamard product (elementwise multiplication) and
+$\otimes$ the Kronecker product. The product of these two new defined
+matrices is then simply
+\begin{align}
+ \mathcal{V}_k^*\mathcal{V}_k &= \prod_{\substack{l=1 \\ l!=k}}^{d}
+ (V_l^*V_l) \otimes \mathbbm{1}_{\mathbb{R}^{n_k\times n_k}}
+ \label{eq: VV} \\
+ \mathcal{V}_k^*\mathcal{U}_k &= \prod_{\substack{l=1 \\ l!=k}}^{d}
+ (V_l^*U_l) \otimes \mathbbm{1}_{\mathbb{R}^{n_k\times n_k}}.\label{eq:
+ VU}
+\end{align}
+Do note that $\mathcal{V}_k^*\mathcal{V}_k \in \mathbb{R}^{r\cdot n_k \times
+r\cdot n_k}$ and $\mathcal{V}_k^*\mathcal{U}_k \in \mathbb{R}^{r\cdot n_k
+\times R\cdot n_k}$. For convenience let us define $F_k := V_k^* U_k$ and
+$G_k := V_k^* V_k$, which will become useful when talking about storing the
+computation. The we can solve for $V_k$ with the following least square
+problem
+\begin{align}
+ (\mathcal{V}_k^*\mathcal{V}_k)\hat{v}_k = (\mathcal{V}_k^*\mathcal{U}_k)u_k,
+\end{align}
+where $\hat{v}_k = \text{vec}(\hat{V}_k)$ and $u_k = \text{vec}(U_k)$.
+
+To make the computation cost linear with respect to $d$, we can compute
+$\mathcal{V}_k^*\mathcal{V}_k$ and $\mathcal{V}_k^*\mathcal{U}_k$ for
+each $k$ and update them in the iteration in $k$ with the new $\hat{V}_k$, by
+computing the product in equations \ref{eq: VV} and \ref{eq: VU} again.
+Additionally we compute the error $\phi$ and $\|\nabla \frac{1}{2}
+\phi^2\|_2$ after each iteration (after going through
+$k=2,\dots,d,d-1,\dots,1$). The implementation is in \cite{code}.
+
+\subsection{CP-ALS for the matrix multiplication tensor}
+In this section we will use the matrix multiplication tensor to play around
+the CP-ALS algorithm we implemented in the last section. We consider $n = 2$
+and $r = 7$, $n = 3$ and $r = 23$, $n = 4$ and $r = 49$ for the
+multiplication tensor and its CP decomposition. The implementation of them we
+have done in the last exercise together with their rank $R = n^3$ CP
+decomposition. We test the our implementation of the CP-ALS algorithm of
+these three multiplication tensor for three random initial guesses $V_1, V_2,
+V_3$, where each matrix has unitary columns (i.e. of norm one).
+
+Our procedure will be that we do seven different guesses if they are good
+enough, i.e. if they do not produce a \textit{SingularValueError} after 10
+iterations we go for 10000 iterations and plot both $\phi$ and $\|\nabla
+\frac{1}{2} \phi\|_2$ with respect to the number of iterations for each
+multiplication tensor. We get the following curves
+\begin{figure}[H]
+ \centering \includegraphics[width=0.33\textwidth]{./../src/plots/err_2.png}
+ \includegraphics[width=0.33\textwidth]{./../src/plots/err_3.png}
+ \includegraphics[width=0.32\textwidth]{./../src/plots/err_4.png}
+ \caption{From left to right, rank $r\in\{7, 23, 49\}$ CP-ALS error for the
+ $n\in \{2, 3, 4\}$ multiplication tensor at 10000 iteration steps}
+\end{figure}
+
+
+
+\printbibliography
+\end{document}
diff --git a/sesh3/tex/uni.bib b/sesh3/tex/uni.bib
@@ -0,0 +1,6 @@
+@online{code,
+ author = {Popovic Milutin},
+ title = {Git Instance, Tensor Methods for Data Science and Scientific Computing},
+ urldate = {2021-24-10},
+ url = {git://popovic.xyz/tensor_methods.git},
+}