Added Project and Report

2024-07-30 14:43:25 +02:00
parent c828453e94
commit 3ad6f7f86f
311 changed files with 13490 additions and 3280 deletions
--- a/algorithms/algorithms.aux
+++ b/algorithms/algorithms.aux
@ -0,0 +1,186 @@
+\relax 
+\providecommand\hyper@newdestlabel[2]{}
+\@writefile{toc}{\contentsline {chapter}{\numberline {3}Algorithms}{6}{chapter.3}\protected@file@percent }
+\@writefile{lof}{\addvspace {10\p@ }}
+\@writefile{lot}{\addvspace {10\p@ }}
+\@writefile{loa}{\addvspace {10\p@ }}
+\newlabel{ch: algorithms}{{3}{6}{Algorithms}{chapter.3}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {3.1}QR}{6}{section.3.1}\protected@file@percent }
+\@writefile{loa}{\contentsline {algocf}{\numberline {1}{\ignorespaces Thin QR}}{7}{algocf.1}\protected@file@percent }
+\newlabel{algo: thinQR}{{1}{7}{QR}{algocf.1}{}}
+\@writefile{loa}{\contentsline {algocf}{\numberline {2}{\ignorespaces householder\_vector}}{7}{algocf.2}\protected@file@percent }
+\newlabel{algo: householder_vector}{{2}{7}{QR}{algocf.2}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {3.2}L-BFGS}{8}{section.3.2}\protected@file@percent }
+\@writefile{loa}{\contentsline {algocf}{\numberline {3}{\ignorespaces Limited Memory BFGS}}{8}{algocf.3}\protected@file@percent }
+\newlabel{algo: L-BFGS}{{3}{8}{L-BFGS}{algocf.3}{}}
+\@writefile{loa}{\contentsline {algocf}{\numberline {4}{\ignorespaces Limited Memory BFGS {-} Two-Loop Recursion}}{9}{algocf.4}\protected@file@percent }
+\newlabel{algo: L-BFGS Two-Loop Recursion}{{4}{9}{L-BFGS}{algocf.4}{}}
+\newlabel{algo: convergence1}{{1}{9}{}{Item.1}{}}
+\newlabel{algo: convergence2}{{2}{9}{}{Item.2}{}}
+\newlabel{algo: convergence3}{{3}{9}{}{Item.3}{}}
+\newlabel{eq:6}{{3}{9}{}{Item.3}{}}
+\newlabel{algo: definition y_k}{{3.1}{10}{}{equation.3.2.1}{}}
+\@writefile{loe}{\addvspace {10\p@ }}
+\@writefile{loe}{\contentsline {mtheo}{\ifthmt@listswap \else \numberline {\let \autodot \@empty }\fi Theorem}{10}{thmt@dummyctr.dummy.1}\protected@file@percent }
+\@writefile{loe}{\contentsline {mproof}{\ifthmt@listswap \else \numberline {\let \autodot \@empty }\fi Proof}{10}{thmt@dummyctr.dummy.2}\protected@file@percent }
+\@setckpt{(3) - algorithms/algorithms}{
+\setcounter{page}{12}
+\setcounter{equation}{1}
+\setcounter{enumi}{3}
+\setcounter{enumii}{0}
+\setcounter{enumiii}{0}
+\setcounter{enumiv}{0}
+\setcounter{footnote}{0}
+\setcounter{mpfootnote}{0}
+\setcounter{part}{0}
+\setcounter{chapter}{3}
+\setcounter{section}{2}
+\setcounter{subsection}{0}
+\setcounter{subsubsection}{0}
+\setcounter{paragraph}{0}
+\setcounter{subparagraph}{0}
+\setcounter{figure}{0}
+\setcounter{table}{0}
+\setcounter{tabx@nest}{0}
+\setcounter{listtotal}{0}
+\setcounter{listcount}{0}
+\setcounter{liststart}{0}
+\setcounter{liststop}{0}
+\setcounter{citecount}{0}
+\setcounter{citetotal}{0}
+\setcounter{multicitecount}{0}
+\setcounter{multicitetotal}{0}
+\setcounter{instcount}{2}
+\setcounter{maxnames}{3}
+\setcounter{minnames}{3}
+\setcounter{maxitems}{3}
+\setcounter{minitems}{1}
+\setcounter{citecounter}{0}
+\setcounter{maxcitecounter}{0}
+\setcounter{savedcitecounter}{0}
+\setcounter{uniquelist}{0}
+\setcounter{uniquename}{0}
+\setcounter{refsection}{0}
+\setcounter{refsegment}{0}
+\setcounter{maxextratitle}{0}
+\setcounter{maxextratitleyear}{0}
+\setcounter{maxextraname}{0}
+\setcounter{maxextradate}{0}
+\setcounter{maxextraalpha}{0}
+\setcounter{abbrvpenalty}{50}
+\setcounter{highnamepenalty}{50}
+\setcounter{lownamepenalty}{25}
+\setcounter{maxparens}{3}
+\setcounter{parenlevel}{0}
+\setcounter{blx@maxsection}{0}
+\setcounter{mincomprange}{10}
+\setcounter{maxcomprange}{100000}
+\setcounter{mincompwidth}{1}
+\setcounter{afterword}{0}
+\setcounter{savedafterword}{0}
+\setcounter{annotator}{0}
+\setcounter{savedannotator}{0}
+\setcounter{author}{0}
+\setcounter{savedauthor}{0}
+\setcounter{bookauthor}{0}
+\setcounter{savedbookauthor}{0}
+\setcounter{commentator}{0}
+\setcounter{savedcommentator}{0}
+\setcounter{editor}{0}
+\setcounter{savededitor}{0}
+\setcounter{editora}{0}
+\setcounter{savededitora}{0}
+\setcounter{editorb}{0}
+\setcounter{savededitorb}{0}
+\setcounter{editorc}{0}
+\setcounter{savededitorc}{0}
+\setcounter{foreword}{0}
+\setcounter{savedforeword}{0}
+\setcounter{holder}{0}
+\setcounter{savedholder}{0}
+\setcounter{introduction}{0}
+\setcounter{savedintroduction}{0}
+\setcounter{namea}{0}
+\setcounter{savednamea}{0}
+\setcounter{nameb}{0}
+\setcounter{savednameb}{0}
+\setcounter{namec}{0}
+\setcounter{savednamec}{0}
+\setcounter{translator}{0}
+\setcounter{savedtranslator}{0}
+\setcounter{shortauthor}{0}
+\setcounter{savedshortauthor}{0}
+\setcounter{shorteditor}{0}
+\setcounter{savedshorteditor}{0}
+\setcounter{labelname}{0}
+\setcounter{savedlabelname}{0}
+\setcounter{institution}{0}
+\setcounter{savedinstitution}{0}
+\setcounter{lista}{0}
+\setcounter{savedlista}{0}
+\setcounter{listb}{0}
+\setcounter{savedlistb}{0}
+\setcounter{listc}{0}
+\setcounter{savedlistc}{0}
+\setcounter{listd}{0}
+\setcounter{savedlistd}{0}
+\setcounter{liste}{0}
+\setcounter{savedliste}{0}
+\setcounter{listf}{0}
+\setcounter{savedlistf}{0}
+\setcounter{location}{0}
+\setcounter{savedlocation}{0}
+\setcounter{organization}{0}
+\setcounter{savedorganization}{0}
+\setcounter{origlocation}{0}
+\setcounter{savedoriglocation}{0}
+\setcounter{origpublisher}{0}
+\setcounter{savedorigpublisher}{0}
+\setcounter{publisher}{0}
+\setcounter{savedpublisher}{0}
+\setcounter{language}{0}
+\setcounter{savedlanguage}{0}
+\setcounter{origlanguage}{0}
+\setcounter{savedoriglanguage}{0}
+\setcounter{pageref}{0}
+\setcounter{savedpageref}{0}
+\setcounter{textcitecount}{0}
+\setcounter{textcitetotal}{0}
+\setcounter{textcitemaxnames}{0}
+\setcounter{biburlbigbreakpenalty}{100}
+\setcounter{biburlbreakpenalty}{200}
+\setcounter{biburlnumpenalty}{0}
+\setcounter{biburlucpenalty}{0}
+\setcounter{biburllcpenalty}{0}
+\setcounter{smartand}{1}
+\setcounter{bbx:relatedcount}{0}
+\setcounter{bbx:relatedtotal}{0}
+\setcounter{cbx@tempcnta}{0}
+\setcounter{cbx@tempcntb}{1}
+\setcounter{cbx@tempcntc}{0}
+\setcounter{cbx@tempcntd}{-1}
+\setcounter{float@type}{4}
+\setcounter{parentequation}{0}
+\setcounter{ALG@line}{0}
+\setcounter{ALG@rem}{0}
+\setcounter{ALG@nested}{0}
+\setcounter{ALG@Lnr}{2}
+\setcounter{ALG@blocknr}{10}
+\setcounter{ALG@storecount}{0}
+\setcounter{ALG@tmpcounter}{0}
+\setcounter{thmt@dummyctr}{2}
+\setcounter{nlinenum}{0}
+\setcounter{caption@flags}{2}
+\setcounter{continuedfloat}{0}
+\setcounter{subfigure}{0}
+\setcounter{subtable}{0}
+\setcounter{section@level}{0}
+\setcounter{Item}{3}
+\setcounter{Hfootnote}{0}
+\setcounter{bookmark@seq@number}{0}
+\setcounter{AlgoLine}{11}
+\setcounter{algocfline}{4}
+\setcounter{algocfproc}{4}
+\setcounter{algocf}{4}
+\setcounter{mlemma}{0}
+}
--- a/algorithms/algorithms.tex
+++ b/algorithms/algorithms.tex
@ -0,0 +1,218 @@
+% chktex-file 9 chktex-file 17
+\chapter{Algorithms}\label{ch: algorithms}
+
+\section{QR}
+The algorithm has been implemented considering that the input matrix $A \in \mathbb{R}^{m \times n}$, where $m$ may be different from $n$, namely it can be rectangular \textit{horizontally} or \textit{vertically}. In this version we store in a proper data structure a matrix $\Upsilon \in {m \times n}$ of the following form ($m > n$ in this example):
+
+\begin{equation*}
+    \Upsilon = {(\upsilon_{i,j})}_{i,j} = \begin{tikzpicture}[baseline=-1ex] 
+    \matrix[%
+        matrix of math nodes,
+        nodes in empty cells,
+        left delimiter={[},right delimiter={]}, 
+        inner xsep=2pt, column sep=6pt, 
+        ] (m)
+    {%
+        \vphantom{1} & * & \cdots & * \\
+        & \vphantom{1} & \ddots & \vdots \\
+        & & \vphantom{1} & * \\
+        & & & \vphantom{1} \\
+        u_1 & u_2 & \cdots & u_n \\
+        \vphantom{1} & \vphantom{1} & \vphantom{1} & \vphantom{1} \\
+    };
+    \node[rectangle, draw, fit={(m-1-1) (m-6-1)}, inner sep=-1.5pt, text width=22pt] {};
+    \node[rectangle, draw, fit={(m-2-2) (m-6-2)}, inner sep=-1.5pt, text width=22pt] {};
+    \node[rectangle, draw, fit={(m-3-3) (m-6-3)}, inner sep=-1.5pt, text width=22pt] {};
+    \node[rectangle, draw, fit={(m-4-4) (m-6-4)}, inner sep=-1.5pt, text width=22pt] {};
+    \end{tikzpicture}
+\end{equation*}
+
+\begin{center}
+    $u_k \in \mathbb{R}^{m - k + 1},\ 1 \leq k \leq n$
+\end{center}
+and the values of the diagonal of R in a vector $d \in \mathbb{R}^{n}$. The $*$ entries are elements computed in the QR factorization belonging to the upper triangular matrix, yielded by line 6 of \hyperref[algo: thinQR]{Algorithm 1}. In this way we are allowed to lazily perform the products $Qy$ and $Q^T y$ by means of the householder vectors $u_1 \dots, u_n $ that we stored. On the other hand, to compute a product between the upper part of $\Upsilon$ and an input vector we reconstruct the upper triangular matrix by taking element $\upsilon_{ij} \text{ such that } j > i$ and attach the vector $d$ as the diagonal of the resulting matrix.
+The zeros of the matrix $R$ are ignored.
+
+\begin{algorithm}[H]
+    \SetAlgoLined%
+    \caption{Thin QR}\label{algo: thinQR}
+    \SetKwInOut{Input}{Input}
+    \SetKwInOut{Output}{Output}
+
+    \BlankLine%
+    \Input{$A \in \mathbb{R}^{m \times n}$}
+    \Output{$Q \in \mathbb{R}^{m \times m},\ R \in \mathbb{R}^{m \times n}$ implicit $QR$ factorization of $A$}
+  
+    \BlankLine%
+    $\Upsilon = copy(A)$ \\
+    $d = zeros(\min(m, n))$ \\
+
+    \For{
+        $k \in 1 \dots \min(m, n)$
+    }{
+        $u_k, s_k = householder\_vector(\Upsilon[k:m, k])$\\
+        $d_k = s_k$ \\
+        $\Upsilon[k:m, k+1:n] = \Upsilon[k:m, k+1:n] - 2u(u^T \Upsilon[k:m, k+1:n])$\\
+        $\Upsilon[k:m, k] = u_k$
+    }
+
+    \Return$\Upsilon, d$
+\end{algorithm}
+
+\begin{algorithm}[H]
+    \SetAlgoLined%
+    \caption{householder\_vector}\label{algo: householder_vector}
+    \SetKwInOut{Input}{Input}
+    \SetKwInOut{Output}{Output}
+    
+    \BlankLine%
+    \Input{$x \in \mathbb{R}^d$}
+    \Output{$u \in \mathbb{R}^{d},\ s \in \mathbb{R}$ householder vector of $x$}
+  
+    \BlankLine%
+    $s = \norm{x}$ \\
+    \If{$x_1 \geq 0$}{
+        $s = -s$
+    }
+    $u = copy(x)$ \\
+    $u_1 = u_1 - s$ \\
+    $u = u\ / \norm{u}$ \\
+
+    \Return$u, s$
+\end{algorithm}
+
+We assume $m > n$ as the case $n > m$ is similar for the complexity analysis. The time complexity of this algorithm is $\theta\bigl(mn^2 \bigr) \approx \theta\bigl(n^3 \bigr)$, because $m \approx n$ in (P). We will see in \hyperref[ch: experiments]{section Experiments} that the running time scales linearly with $m$ as expected, where $m$ is the size of $\hat{X}$.
+
+\newpage
+\section{L-BFGS}
+
+We follow the syntax from \textit{Numerical Optimization}\cite{Numerical-Optimization-2006} and define $f_k = f(x_k)$
+
+\begin{algorithm}[H]
+    \SetAlgoLined%
+    \caption{Limited Memory BFGS}\label{algo: L-BFGS}
+    \SetKwInOut{Input}{Input}
+    \SetKwInOut{Output}{Output}
+    
+    \BlankLine%
+    \Input{$\textbf{f}: \mathbb{R}^n \longrightarrow \mathbb{R},\ \textbf{x} \in \mathbb{R}^n,\ m \text{ memory, } \epsilon \text{ tolerance}$}
+    \Output{${\bf x^*}\ \text{ending point},\ {\bf f(x^*)},\ {\bf \nabla f(x^*)}$}
+  
+    \BlankLine%
+    $k = 0$ \\
+    \While{$\nabla f_k \geq \epsilon \nabla f_0$} {
+        \uIf{storage is empty}{
+            $H_k^0 = I$
+        }\uElse{
+            $H_k^0 = \frac{\langle y_{k-1}, s_{k-1} \rangle}{\norm{y_{k-1}}^2} \cdot I$
+        }
+        Calculate $p_k = H_k \nabla{f_k}$ with \hyperref[algo: L-BFGS Two-Loop Recursion]{\textbf{Algorithm 4}} \\
+        Choose $\alpha_k$ satisfying the Armijo-Wolfe conditions or with exact line search \\
+        $x_{k+1} = x_k + \alpha_k p_k$ \\
+        $s_k = x_{k+1} - x_k$ \\
+        $y_k = \nabla f_{k+1} - \nabla f_k$ \\
+        $curvature = \langle y_k, s_k \rangle$ \\
+        $\rho_k = curvature^{-1}$ \\
+        \uIf{$curvature \leq 10^{-16}$}{
+            free the storage and start again from gradient descent
+        }\uElse{
+            Discard the vector pair $\{s_{k-m}, y_{k-m}, \rho_{k-m}\}$ from storage \\
+            Save $s_k, y_k, \rho_k$
+        }
+
+        $k = k + 1$
+    }
+    \Return$x_k$, $f_k$, $\nabla f_k$
+\end{algorithm}
+\begin{algorithm}[H]
+    \SetAlgoLined%
+    \caption{Limited Memory BFGS {-} Two-Loop Recursion}\label{algo: L-BFGS Two-Loop Recursion}
+
+    $q = \nabla f_k$ \\
+    \For{$i = (k - 1), \dots, (k - m)$}{
+        $\alpha_i = \rho_i s_i^T q$ \\
+        $q = q - \alpha_i y_i$ \\
+    }
+
+    $r = H_k^0 q$ \\
+    \For{$i = (k - m), \dots, (k - 1)$}{
+        $\beta = \rho_i y_i^T r$ \\
+        $r = r + s_i\bigl(\alpha_i - \beta\bigr)$ \\
+    }
+
+    \Return$-r$
+
+\end{algorithm}
+In our implementation we keep the triplets $(s_k, y_k, \rho_k)$ in a circular buffer with capacity $m$ and the values of $\alpha_i$ in \hyperref[algo: L-BFGS Two-Loop Recursion]{Algorithm 4} in a stack such that no explicit indices are needed. 
+
+In case the curvature of the function is too small, we free the storage and restart with a gradient step.
+
+We prefer using an exact line search to compute the step size over an inexact line search since the computational cost for our problem is lesser.
+
+\subsection*{Convergence}
+To prove that the implemented method converges to the global minimum of the function we have to optimize, we follow~\cite{convergence_lbfgs} and state the following assumptions about our problem:
+\begin{enumerate}
+    \item\label{algo: convergence1} $f \in C^2$
+    \item\label{algo: convergence2} The level sets $\mathcal{L} = \{ x \in \mathbb{R}^n\ |\ f(x) \leq f(x_0) \} $ is convex
+    \item\label{algo: convergence3} $\exists\ M_1, M_2 \in \mathbb{R}^+$ such that 
+      \begin{equation*}
+        M_1\norm{z}^2 \leq z^T G(x) z \leq  M_2\norm{z}^2\label{eq:6}
+      \end{equation*}
+    $\forall z \in \mathbb{R}^n$ and $\forall x \in \mathcal{L}$
+\end{enumerate}
+
+We follow the publication's notation and define:
+
+\[ G(x) \coloneqq \nabla^{2}f(x) \]
+\[ \bar{G}_k(x) \coloneqq \int_0^1 G(x_k + \tau \alpha_k p_k) d\tau \]
+
+From Taylor's theorem:
+
+\begin{equation}\label{algo: definition y_k}
+        y_k = \bar{G}_k \alpha_k p_k = \bar{G}_k s_k
+\end{equation}
+
+The first assumption for our problem follows from the definition. The second assumption is proved by \autoref{definitions: hessian tomography}. The third assumption is also a consequence of the fact that the hessian of $f$ is constant.
+
+% \[ z_k \coloneqq {\bar{G}_k}^{1/2} s_k \]
+
+\begin{mtheo}
+    Let $B_0$ be any symmetric positive definite initial matrix, and let $x_0$ be a starting point for which the Assumptions~\ref{algo: convergence1},~\ref{algo: convergence2} and~\ref{algo: convergence3} hold, then the sequence ${x_k}$ generated by the L-BFGS algorithm converges to the minimizer $x^*$ of $f$ linearly.
+\end{mtheo}
+
+\begin{mproof}
+    Using \autoref{algo: definition y_k} and Assumption~\ref{algo: convergence3}:
+    \[ M_1 \norm{s_k}^2 \leq y_k^T s_k \leq M_2 \norm{s_k}^2 \]
+    and:
+    \[ \frac{\norm{y_k}^2}{y_k^T s_k} = \frac{s_k^T \hat{G}_k^2 s_k}{s_k^T \hat{G}_k s_k} \]
+    Both trace and determinant can be expressed in terms of the trace and determinant of the starting matrix from which the approximate hessian is constructed:
+    \begin{align*}
+        \Tr(B_{k+1}) &\leq \Tr(B_k^{(0)}) + \Tilde{m} M_2 \leq M_3 \\
+        \det(B_{k+1}) &= \det(B_k^{(0)}) \cdot \prod_{l=0}^{\Tilde{m}-1} \frac{y_l^T s_l}{s_l^T B_k^{(l)} s_l} \geq \det\left(B_k^{(0)} {\left(\frac{M_1}{M_3}\right)}^{\Tilde{m}}\right) \geq M_4
+    \end{align*}
+    where $\Tilde{m}$ is the memory size and $M_3$ and $M_4$ are chosen appropriately in $\mathbb{R}^+$.
+
+    From these two bounds we have that for some constant $\delta > 0$:
+
+    \[ \cos(\theta_k) = \frac{s_k^T B_k s_k}{\norm{s_k} \norm{B_k s_k}} \geq \delta \]
+    Since with exact line search the Armijo condition $f(x_k + \alpha_k p_k) \leq f(x_k) + m_1 \alpha_k \nabla f(x_k)$ is always satisfied if the constant $m_1$ does not exclude the minimum $x_*$ and since the strong Wolfe condition $\norm{\nabla f(x_k + \alpha_k p_k)} \leq m_3 \norm{\nabla f(x_k)}$ is also always satisfied since $\norm{\nabla f(x_k + \alpha_k p_k)} = O(u)$, follows from the two conditions and Assumptions~\ref{algo: convergence1} and~\ref{algo: convergence2} that:
+    \begin{align*}
+        & f(x_{k+1}) - f(x_*) \leq (1 - c \cos^2(\theta_k) (f(x_k) - f(x_*))) \\
+        \implies& f(x_k) - f(x_*) \leq {(1 - c \cdot \delta^2)}^k (f(x_0) - f(x_*)) \\
+        \implies& f(x_k) - f(x_*) \leq r^k (f(x_0) - f(x_*))
+    \end{align*}
+    for some $r \in [0, 1)$. Using Assumption~\ref{algo: convergence3}:
+    \begin{gather*}
+        \frac{1}{2} M_1 \norm{x_k - x_*}^2 \leq f(x_k) - f(x_*) \\
+        \implies \norm{x_k - x_*} \leq r^{k/2} {\left( 2 \frac{f(x_0) - f(x_*)}{M_1} \right)}^{(1/2)}
+    \end{gather*}
+    so the sequence $\{x_k\}$ is linearly convergent.
+\end{mproof}
+
+The implementation of L-BFGS that uses Armijo-Wolfe line search also satisfies the assumptions so it also converges linearly to $x_*$.
+
+%%% Local Variables:
+%%% mode: latex
+%%% TeX-master: "../main"
+%%% TeX-command-extra-options: "-shell-escape"
+%%% End: