tensor_methods

Tensor Methods for DS and SC
git clone git://popovic.xyz/tensor_methods.git
Log | Files | Refs

commit 29d9d951e8347f500087fe53a3184bfbef1146a9
parent ba0d807d22b0ff5d200e489d03f06aa60e8dc2ff
Author: miksa234 <milutin@popovic.xyz>
Date:   Sun, 14 Nov 2021 11:46:19 +0100

done sesh3 without last exercise

Diffstat:
Aassingments/TensorReview.pdf | 0
Aassingments/hw03_published_2021-11-08.pdf | 0
Msesh2/src/functions.jl | 6+++---
Msesh2/tex/main.pdf | 0
Msesh2/tex/main.tex | 8++++----
Asesh3/src/functions.jl | 46++++++++++++++++++++++++++++++++++++++++++++++
Asesh3/src/main.jl | 38++++++++++++++++++++++++++++++++++++++
Asesh3/src/plots/err_2.png | 0
Asesh3/src/plots/err_3.png | 0
Asesh3/src/plots/err_4.png | 0
Asesh3/tex/main.pdf | 0
Asesh3/tex/main.tex | 156+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asesh3/tex/uni.bib | 6++++++
13 files changed, 253 insertions(+), 7 deletions(-)

diff --git a/assingments/TensorReview.pdf b/assingments/TensorReview.pdf Binary files differ. diff --git a/assingments/hw03_published_2021-11-08.pdf b/assingments/hw03_published_2021-11-08.pdf Binary files differ. diff --git a/sesh2/src/functions.jl b/sesh2/src/functions.jl @@ -43,7 +43,7 @@ of size \alpha $n × n$, satisfying the column major convention $C_k = ∑_{i, j}^{n^2} T_{ijk} ⋅ A_i ⋅ B_k$ """ function multiplication_tensor(n) - T = zeros(n^2, n^2, n^2) + T = zeros(Int, n^2, n^2, n^2) U = zeros(n^2, n^3) V = zeros(n^2, n^3) W = zeros(n^2, n^3) @@ -59,7 +59,7 @@ function multiplication_tensor(n) count += 1 end end - return T, (U, V, W) + return T, [U, V, W] end @doc raw""" @@ -76,6 +76,7 @@ function cpd_eval(U_i) r = size(U_i[1])[2] n_d = [size(U_i[i])[1] for i in 1:d] s = zeros(n_d...) + for alpha in 1:r k = kron(U_i[1][:, alpha], U_i[2][:, alpha]) for i in 2:(d-1) @@ -129,7 +130,6 @@ function strassen_alg(A, B) C[4] = M_1 - M_2 + M_3 + M_6 return C end - @doc raw""" rank_7_CPD() diff --git a/sesh2/tex/main.pdf b/sesh2/tex/main.pdf Binary files differ. diff --git a/sesh2/tex/main.tex b/sesh2/tex/main.tex @@ -80,8 +80,8 @@ multiplication in the column major convention \begin{align} c_1 &= a_1b_1 + a_3b_2,\\ c_2 &= a_2b_1 + a_4b_2,\\ - c_1 &= a_1b_3 + a_3b_4,\\ - c_1 &= a_2b_3 + a_4b_4. + c_3 &= a_1b_3 + a_3b_4,\\ + c_4 &= a_2b_3 + a_4b_4. \end{align} So the non-zero entries in e.g. $k=1$ are $(1, 1)$ and $(3, 2)$, and so on. Thus the matrix multiplication Tensor of $n=2$ is @@ -239,7 +239,7 @@ $2\times2$ matrices (even of order $2^n\times 2^n$) in seven essential multiplication steps instead of the stubborn eight. The Strassen algorithm defines seven new coefficients $M_l$, where $l\in {1, \cdots, 7}$ \begin{align} - M_1 &:= (a_1 + a_4)(a_1 + a_4), \\ + M_1 &:= (a_1 + a_4)(b_1 + b_4), \\ M_2 &:= (a_2 + a_4)b_1,\\ M_3 &:= a_1(b_3 - b_4),\\ M_4 &:= a_4(b_2 - b_1),\\ @@ -249,7 +249,7 @@ defines seven new coefficients $M_l$, where $l\in {1, \cdots, 7}$ \end{align} Then the coefficients $c_k$ can be calculated as \begin{align} - c_1 &= M_1 + M_4 - M_6 + M_7,\\ + c_1 &= M_1 + M_4 - M_5 + M_7,\\ c_2 &= M_2 + M_4,\\ c_3 &= M_3 + M_5,\\ c_4 &= M_1 - M_2 + M_3 + M_6.\\ diff --git a/sesh3/src/functions.jl b/sesh3/src/functions.jl @@ -0,0 +1,46 @@ +#/usr/bin/julia + +using LinearAlgebra +using LaTeXStrings +using Distributions +using Plots + +include("../../sesh2/src/functions.jl") + +function cp_als(U, V, iter) + d = length(U) + R = size(U[1])[2] + r = size(V[1])[2] + dims = [size(U[l])[1] for l=1:d] + Vk = copy(V) + + Fs = [eleprod([(Vk[l]' * U[l]) for l=1:d if l!=j]) for j=1:d] + Gs = [eleprod([(Vk[l]' * Vk[l]) for l=1:d if l!=j]) for j=1:d] + + phi = [] + d_phi_2 = [] + for count=1:iter + for k in [collect(2:d)..., reverse(collect(1:(d-1)))...] + Fs[k] = eleprod([(Vk[l]' * U[l]) for l=1:d if l!=k]) + Gs[k] = eleprod([(Vk[l]' * Vk[l]) for l=1:d if l!=k]) + + VV = kron(Gs[k], Matrix{Float64}(I, dims[k], dims[k])) + VU = kron(Fs[k], Matrix{Float64}(I, dims[k], dims[k])) + + Vk[k] = reshape(VV\(VU * vec(U[k])), (dims[k], r)) + end + append!(phi, norm(cpd_eval(Vk) - cpd_eval(U))) + append!(d_phi_2, norm(diff(1/2*phi.^2))) + end + return Vk, phi, d_phi_2 +end + + +function eleprod(A) + d = length(A) + Z = copy(A[1]) + for i=2:d + Z = Z .* A[i] + end + return Z +end diff --git a/sesh3/src/main.jl b/sesh3/src/main.jl @@ -0,0 +1,38 @@ +using LinearAlgebra +using LaTeXStrings +using Distributions +using Plots + +include("../../sesh2/src/functions.jl") +include("../../sesh3/src/functions.jl") + +function errplot(x, phi, d_phi_2, n) + p = plot(x, phi, + lw=3, + titlefontsize=20, + xlabelfontsize=14, + ylabelfontsize=14, + dpi=300, + grid=false, + size=(500, 400)) + plot!(p, x, d_phi_2, + lw=3, + title="n=$n", + label=L"\nabla \frac{1}{2} \phi^2") + savefig(p, "./plots/err_$n.png") +end + +function main() + nr = [(2, 7), (3, 23), (4, 49)] + + for (n, r) in nr + V = [reshape(vcat([normalize(rand(-1:1, n^2)) for i=1:r]...), (n^2, r)) for _=1:3] # guess + T, U = multiplication_tensor(n) # given n^3 CPD + V_hat, phi, d_phi_2 = cp_als(U, V, 10000) # CP-ALS + err + x = collect(1:length(phi)) # for plot + + errplot(x, phi, d_phi_2, n) + end +end + +main() diff --git a/sesh3/src/plots/err_2.png b/sesh3/src/plots/err_2.png Binary files differ. diff --git a/sesh3/src/plots/err_3.png b/sesh3/src/plots/err_3.png Binary files differ. diff --git a/sesh3/src/plots/err_4.png b/sesh3/src/plots/err_4.png Binary files differ. diff --git a/sesh3/tex/main.pdf b/sesh3/tex/main.pdf Binary files differ. diff --git a/sesh3/tex/main.tex b/sesh3/tex/main.tex @@ -0,0 +1,156 @@ +\documentclass[a4paper]{article} + + +\usepackage[T1]{fontenc} +\usepackage[utf8]{inputenc} +\usepackage{mathptmx} + +%\usepackage{ngerman} % Sprachanpassung Deutsch + +\usepackage{graphicx} +\usepackage{geometry} +\geometry{a4paper, top=15mm} + +\usepackage{subcaption} +\usepackage[shortlabels]{enumitem} +\usepackage{amssymb} +\usepackage{amsthm} +\usepackage{mathtools} +\usepackage{braket} +\usepackage{bbm} +\usepackage{graphicx} +\usepackage{float} +\usepackage{yhmath} +\usepackage{tikz} +\usetikzlibrary{patterns,decorations.pathmorphing,positioning} +\usetikzlibrary{calc,decorations.markings} + +\usepackage[backend=biber, sorting=none]{biblatex} +\addbibresource{uni.bib} + +\usepackage[framemethod=TikZ]{mdframed} + +\tikzstyle{titlered} = + [draw=black, thick, fill=white,% + text=black, rectangle, + right, minimum height=.7cm] + + +\usepackage[colorlinks=true,naturalnames=true,plainpages=false,pdfpagelabels=true]{hyperref} +\usepackage[parfill]{parskip} +\usepackage{lipsum} + + +\usepackage{tcolorbox} +\tcbuselibrary{skins,breakable} + +\pagestyle{myheadings} + +\markright{Popović\hfill Tensor Methods \hfill} + + +\title{University of Vienna\\ Faculty of Mathematics\\ +\vspace{1cm}TENSOR METHODS FOR DATA SCIENCE AND SCIENTIFIC COMPUTING +} +\author{Milutin Popovic} + +\begin{document} +\maketitle +\tableofcontents +\section{Assignment 3} +\subsection{Implementing the CP-ALS Algorithm} +The main idea of the algorithm is that we have a rank $R\in \mathbb{N}$ CPD +of a tensor $A \in \mathbb{R}^{n_1 \times \cdot n_d}$ given in terms of +$U_k \in \mathbb{R}^{n_k \times R}$, for $\mathbb{N}\ni k>2$, $k = +\{1,\dots,d\}$ and for $n_1, \dots, n_d\; \in \mathbb{N}$. We want to find a +rank $r\in \mathbb{N}$ CPD of the tensor $A$, by taking an initial guess +for some $V_k \in \mathbb{R}^{n_k \times r}$ and updating it for each $k$ by +optimizing the problem +\begin{align} + \phi(V_1, \dots, V_d) \rightarrow \min_{V_k \in \mathbb{R}^{n_k\times + r}}, +\end{align} +where $\phi(V_1, \dots, V_d)$ is the error function, determined by +\begin{align} + \phi(V_1, \dots, V_d) = \left\|\Psi_r(V_1,\dots,V_d) - \Psi_R(U_1, + \dots,U_d) \right\|_F . +\end{align} +For $\Psi_r$ and $\Psi_R$ denote the CPD multilinear representation maps +transforming the CP decomposition into the tensor +\begin{align} + U &= \Psi_R(U_1, \dots, U_d)\\ + V &= \Psi_r(V_1, \dots, V_d)\\ + \end{align} +for $k = 1, \dots d, d-1, \dots 2$ sequentially by updating $V_k$ at each +step of the optimization. \textbf{This is one iteration step}. + +For $\Psi$ we will use the implementation constructed in the last exercise, +which consists of applying the Kronecker product of the rows and then +summing over them. Once we define the following matrices we may rewrite the +optimality condition in a linear system which is solvable since it is a least +square problem. We define matrices $\mathcal{V}_k \in \mathbb{R}^{n_1\cdots n_d +\times n_k\cdot r}$ and $\mathcal{U}_k \in \mathbb{R}^{n_1\cdots n_d \times +n_k\cdot R}$ for $k \in \{1, \dots,d\}$ by the following +\begin{align} + \mathcal{U}_k &= \prod_{\substack{l=1 \\ l!=k}}^{d} U_l \otimes + \mathbbm{1}_{\mathbb{R}^{n_k\times n_k}},\\ + \mathcal{V}_k &= \prod_{\substack{l=1 \\ l!=k}}^{d} V_l \otimes + \mathbbm{1}_{\mathbb{R}^{n_k\times n_k}}, +\end{align} +note that $\prod$ represents the Hadamard product (elementwise multiplication) and +$\otimes$ the Kronecker product. The product of these two new defined +matrices is then simply +\begin{align} + \mathcal{V}_k^*\mathcal{V}_k &= \prod_{\substack{l=1 \\ l!=k}}^{d} + (V_l^*V_l) \otimes \mathbbm{1}_{\mathbb{R}^{n_k\times n_k}} + \label{eq: VV} \\ + \mathcal{V}_k^*\mathcal{U}_k &= \prod_{\substack{l=1 \\ l!=k}}^{d} + (V_l^*U_l) \otimes \mathbbm{1}_{\mathbb{R}^{n_k\times n_k}}.\label{eq: + VU} +\end{align} +Do note that $\mathcal{V}_k^*\mathcal{V}_k \in \mathbb{R}^{r\cdot n_k \times +r\cdot n_k}$ and $\mathcal{V}_k^*\mathcal{U}_k \in \mathbb{R}^{r\cdot n_k +\times R\cdot n_k}$. For convenience let us define $F_k := V_k^* U_k$ and +$G_k := V_k^* V_k$, which will become useful when talking about storing the +computation. The we can solve for $V_k$ with the following least square +problem +\begin{align} + (\mathcal{V}_k^*\mathcal{V}_k)\hat{v}_k = (\mathcal{V}_k^*\mathcal{U}_k)u_k, +\end{align} +where $\hat{v}_k = \text{vec}(\hat{V}_k)$ and $u_k = \text{vec}(U_k)$. + +To make the computation cost linear with respect to $d$, we can compute +$\mathcal{V}_k^*\mathcal{V}_k$ and $\mathcal{V}_k^*\mathcal{U}_k$ for +each $k$ and update them in the iteration in $k$ with the new $\hat{V}_k$, by +computing the product in equations \ref{eq: VV} and \ref{eq: VU} again. +Additionally we compute the error $\phi$ and $\|\nabla \frac{1}{2} +\phi^2\|_2$ after each iteration (after going through +$k=2,\dots,d,d-1,\dots,1$). The implementation is in \cite{code}. + +\subsection{CP-ALS for the matrix multiplication tensor} +In this section we will use the matrix multiplication tensor to play around +the CP-ALS algorithm we implemented in the last section. We consider $n = 2$ +and $r = 7$, $n = 3$ and $r = 23$, $n = 4$ and $r = 49$ for the +multiplication tensor and its CP decomposition. The implementation of them we +have done in the last exercise together with their rank $R = n^3$ CP +decomposition. We test the our implementation of the CP-ALS algorithm of +these three multiplication tensor for three random initial guesses $V_1, V_2, +V_3$, where each matrix has unitary columns (i.e. of norm one). + +Our procedure will be that we do seven different guesses if they are good +enough, i.e. if they do not produce a \textit{SingularValueError} after 10 +iterations we go for 10000 iterations and plot both $\phi$ and $\|\nabla +\frac{1}{2} \phi\|_2$ with respect to the number of iterations for each +multiplication tensor. We get the following curves +\begin{figure}[H] + \centering \includegraphics[width=0.33\textwidth]{./../src/plots/err_2.png} + \includegraphics[width=0.33\textwidth]{./../src/plots/err_3.png} + \includegraphics[width=0.32\textwidth]{./../src/plots/err_4.png} + \caption{From left to right, rank $r\in\{7, 23, 49\}$ CP-ALS error for the + $n\in \{2, 3, 4\}$ multiplication tensor at 10000 iteration steps} +\end{figure} + + + +\printbibliography +\end{document} diff --git a/sesh3/tex/uni.bib b/sesh3/tex/uni.bib @@ -0,0 +1,6 @@ +@online{code, + author = {Popovic Milutin}, + title = {Git Instance, Tensor Methods for Data Science and Scientific Computing}, + urldate = {2021-24-10}, + url = {git://popovic.xyz/tensor_methods.git}, +}