Added Project and Report

2024-07-30 14:43:25 +02:00
parent c828453e94
commit 3ad6f7f86f
311 changed files with 13490 additions and 3280 deletions
--- a/introduction/introduction.aux
+++ b/introduction/introduction.aux
@ -0,0 +1,168 @@
+\relax 
+\providecommand\hyper@newdestlabel[2]{}
+\@writefile{toc}{\contentsline {chapter}{\numberline {1}Introduction}{1}{chapter.1}\protected@file@percent }
+\@writefile{lof}{\addvspace {10\p@ }}
+\@writefile{lot}{\addvspace {10\p@ }}
+\@writefile{loa}{\addvspace {10\p@ }}
+\newlabel{ch: introduction}{{1}{1}{Introduction}{chapter.1}{}}
+\@setckpt{(1) - introduction/introduction}{
+\setcounter{page}{2}
+\setcounter{equation}{0}
+\setcounter{enumi}{0}
+\setcounter{enumii}{0}
+\setcounter{enumiii}{0}
+\setcounter{enumiv}{0}
+\setcounter{footnote}{0}
+\setcounter{mpfootnote}{0}
+\setcounter{part}{0}
+\setcounter{chapter}{1}
+\setcounter{section}{0}
+\setcounter{subsection}{0}
+\setcounter{subsubsection}{0}
+\setcounter{paragraph}{0}
+\setcounter{subparagraph}{0}
+\setcounter{figure}{0}
+\setcounter{table}{0}
+\setcounter{tabx@nest}{0}
+\setcounter{listtotal}{0}
+\setcounter{listcount}{0}
+\setcounter{liststart}{0}
+\setcounter{liststop}{0}
+\setcounter{citecount}{0}
+\setcounter{citetotal}{0}
+\setcounter{multicitecount}{0}
+\setcounter{multicitetotal}{0}
+\setcounter{instcount}{0}
+\setcounter{maxnames}{3}
+\setcounter{minnames}{3}
+\setcounter{maxitems}{3}
+\setcounter{minitems}{1}
+\setcounter{citecounter}{0}
+\setcounter{maxcitecounter}{0}
+\setcounter{savedcitecounter}{0}
+\setcounter{uniquelist}{0}
+\setcounter{uniquename}{0}
+\setcounter{refsection}{0}
+\setcounter{refsegment}{0}
+\setcounter{maxextratitle}{0}
+\setcounter{maxextratitleyear}{0}
+\setcounter{maxextraname}{0}
+\setcounter{maxextradate}{0}
+\setcounter{maxextraalpha}{0}
+\setcounter{abbrvpenalty}{50}
+\setcounter{highnamepenalty}{50}
+\setcounter{lownamepenalty}{25}
+\setcounter{maxparens}{3}
+\setcounter{parenlevel}{0}
+\setcounter{blx@maxsection}{0}
+\setcounter{mincomprange}{10}
+\setcounter{maxcomprange}{100000}
+\setcounter{mincompwidth}{1}
+\setcounter{afterword}{0}
+\setcounter{savedafterword}{0}
+\setcounter{annotator}{0}
+\setcounter{savedannotator}{0}
+\setcounter{author}{0}
+\setcounter{savedauthor}{0}
+\setcounter{bookauthor}{0}
+\setcounter{savedbookauthor}{0}
+\setcounter{commentator}{0}
+\setcounter{savedcommentator}{0}
+\setcounter{editor}{0}
+\setcounter{savededitor}{0}
+\setcounter{editora}{0}
+\setcounter{savededitora}{0}
+\setcounter{editorb}{0}
+\setcounter{savededitorb}{0}
+\setcounter{editorc}{0}
+\setcounter{savededitorc}{0}
+\setcounter{foreword}{0}
+\setcounter{savedforeword}{0}
+\setcounter{holder}{0}
+\setcounter{savedholder}{0}
+\setcounter{introduction}{0}
+\setcounter{savedintroduction}{0}
+\setcounter{namea}{0}
+\setcounter{savednamea}{0}
+\setcounter{nameb}{0}
+\setcounter{savednameb}{0}
+\setcounter{namec}{0}
+\setcounter{savednamec}{0}
+\setcounter{translator}{0}
+\setcounter{savedtranslator}{0}
+\setcounter{shortauthor}{0}
+\setcounter{savedshortauthor}{0}
+\setcounter{shorteditor}{0}
+\setcounter{savedshorteditor}{0}
+\setcounter{labelname}{0}
+\setcounter{savedlabelname}{0}
+\setcounter{institution}{0}
+\setcounter{savedinstitution}{0}
+\setcounter{lista}{0}
+\setcounter{savedlista}{0}
+\setcounter{listb}{0}
+\setcounter{savedlistb}{0}
+\setcounter{listc}{0}
+\setcounter{savedlistc}{0}
+\setcounter{listd}{0}
+\setcounter{savedlistd}{0}
+\setcounter{liste}{0}
+\setcounter{savedliste}{0}
+\setcounter{listf}{0}
+\setcounter{savedlistf}{0}
+\setcounter{location}{0}
+\setcounter{savedlocation}{0}
+\setcounter{organization}{0}
+\setcounter{savedorganization}{0}
+\setcounter{origlocation}{0}
+\setcounter{savedoriglocation}{0}
+\setcounter{origpublisher}{0}
+\setcounter{savedorigpublisher}{0}
+\setcounter{publisher}{0}
+\setcounter{savedpublisher}{0}
+\setcounter{language}{0}
+\setcounter{savedlanguage}{0}
+\setcounter{origlanguage}{0}
+\setcounter{savedoriglanguage}{0}
+\setcounter{pageref}{0}
+\setcounter{savedpageref}{0}
+\setcounter{textcitecount}{0}
+\setcounter{textcitetotal}{0}
+\setcounter{textcitemaxnames}{0}
+\setcounter{biburlbigbreakpenalty}{100}
+\setcounter{biburlbreakpenalty}{200}
+\setcounter{biburlnumpenalty}{0}
+\setcounter{biburlucpenalty}{0}
+\setcounter{biburllcpenalty}{0}
+\setcounter{smartand}{1}
+\setcounter{bbx:relatedcount}{0}
+\setcounter{bbx:relatedtotal}{0}
+\setcounter{cbx@tempcnta}{0}
+\setcounter{cbx@tempcntb}{0}
+\setcounter{cbx@tempcntc}{0}
+\setcounter{cbx@tempcntd}{0}
+\setcounter{float@type}{4}
+\setcounter{parentequation}{0}
+\setcounter{ALG@line}{0}
+\setcounter{ALG@rem}{0}
+\setcounter{ALG@nested}{0}
+\setcounter{ALG@Lnr}{2}
+\setcounter{ALG@blocknr}{10}
+\setcounter{ALG@storecount}{0}
+\setcounter{ALG@tmpcounter}{0}
+\setcounter{thmt@dummyctr}{0}
+\setcounter{nlinenum}{0}
+\setcounter{caption@flags}{0}
+\setcounter{continuedfloat}{0}
+\setcounter{subfigure}{0}
+\setcounter{subtable}{0}
+\setcounter{section@level}{0}
+\setcounter{Item}{0}
+\setcounter{Hfootnote}{0}
+\setcounter{bookmark@seq@number}{0}
+\setcounter{AlgoLine}{0}
+\setcounter{algocfline}{0}
+\setcounter{algocfproc}{0}
+\setcounter{algocf}{0}
+\setcounter{mlemma}{0}
+}
--- a/introduction/introduction.tex
+++ b/introduction/introduction.tex
@ -0,0 +1,33 @@
+\chapter{Introduction}\label{ch: introduction}
+
+(P) is the linear least squares problem
+\[\min_{w}\ \left\lVert \hat{X}w-\hat{y} \right\rVert\]
+where
+\[
+  \hat{X} = 
+  \begin{bmatrix} 
+    X^T \\ 
+    \lambda I_m 
+  \end{bmatrix}, 
+  \ \ 
+  \hat{y} = 
+  \begin{bmatrix} 
+    y \\ 
+    0 
+  \end{bmatrix},
+\]
+with $X$ the (tall thin) matrix from the ML-cup dataset by prof. Micheli, $\lambda > 0$ and $y$ is a random vector.
+\begin{itemize}
+\item[--] (A1) is an algorithm of the class of limited-memory quasi-Newton methods.
+\item[--] (A2) is a cothin QR factorization with Householder reflectors, in the variant where one does not form the matrix $Q$, but stores the Householder vectors $u_k$ and uses them to perform (implicitly) products with $Q$ and $Q^T$.
+\end{itemize}
+No off-the-shelf solvers allowed. In particular you must implement yourself the thin QR factorization, and the computational cost of your implementation should be at most quadratic in $m$.
+
+\subsection*{Outline}
+This report is organized as follows:
+\begin{description}
+\item[\autoref{ch: problem definition},] in which the problem is reformulated under the mathematical aspect;
+\item[\autoref{ch: algorithms},] where we will include the implemented algorithms, with the analysis of convergence and complexity;
+\item[\autoref{ch: experiments},] to evaluate and compare (A1) with (A2) for this task and provide different tests in order to examine deeper the algorithms;
+\item[\autoref{ch: conclusion},] in which conclusions are drawn, offering a critical analysis of the results obtained.
+\end{description}
--- a/definition/images/conditioning.png
+++ b/definition/images/conditioning.png
--- a/definition/problem
+++ b/definition/problem
@ -0,0 +1,177 @@
+\relax 
+\providecommand\hyper@newdestlabel[2]{}
+\@writefile{toc}{\contentsline {chapter}{\numberline {2}Problem Definition}{2}{chapter.2}\protected@file@percent }
+\@writefile{lof}{\addvspace {10\p@ }}
+\@writefile{lot}{\addvspace {10\p@ }}
+\@writefile{loa}{\addvspace {10\p@ }}
+\newlabel{ch: problem definition}{{2}{2}{Problem Definition}{chapter.2}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {2.1}QR}{2}{section.2.1}\protected@file@percent }
+\@writefile{toc}{\contentsline {section}{\numberline {2.2}L-BFGS}{3}{section.2.2}\protected@file@percent }
+\newlabel{ch: L-BFGS}{{2.2}{3}{L-BFGS}{section.2.2}{}}
+\newlabel{definitions: hessian tomography}{{2.2}{4}{L-BFGS}{equation.2.2.2}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {2.3}Conditioning}{4}{section.2.3}\protected@file@percent }
+\newlabel{subsec:conditioning}{{2.3}{4}{Conditioning}{section.2.3}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {2.1}{\ignorespaces $\kappa (\hat  {X})$ \textit  {for different values of} $\lambda $}}{5}{figure.caption.2}\protected@file@percent }
+\providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}}
+\newlabel{fig:condition}{{2.1}{5}{$\kappa (\hat {X})$ \textit {for different values of} $\lambda $}{figure.caption.2}{}}
+\@setckpt{(2) - problem definition/problem definition}{
+\setcounter{page}{6}
+\setcounter{equation}{2}
+\setcounter{enumi}{0}
+\setcounter{enumii}{0}
+\setcounter{enumiii}{0}
+\setcounter{enumiv}{0}
+\setcounter{footnote}{0}
+\setcounter{mpfootnote}{0}
+\setcounter{part}{0}
+\setcounter{chapter}{2}
+\setcounter{section}{3}
+\setcounter{subsection}{0}
+\setcounter{subsubsection}{0}
+\setcounter{paragraph}{0}
+\setcounter{subparagraph}{0}
+\setcounter{figure}{1}
+\setcounter{table}{0}
+\setcounter{tabx@nest}{0}
+\setcounter{listtotal}{0}
+\setcounter{listcount}{0}
+\setcounter{liststart}{0}
+\setcounter{liststop}{0}
+\setcounter{citecount}{0}
+\setcounter{citetotal}{0}
+\setcounter{multicitecount}{0}
+\setcounter{multicitetotal}{0}
+\setcounter{instcount}{0}
+\setcounter{maxnames}{3}
+\setcounter{minnames}{3}
+\setcounter{maxitems}{3}
+\setcounter{minitems}{1}
+\setcounter{citecounter}{0}
+\setcounter{maxcitecounter}{0}
+\setcounter{savedcitecounter}{0}
+\setcounter{uniquelist}{0}
+\setcounter{uniquename}{0}
+\setcounter{refsection}{0}
+\setcounter{refsegment}{0}
+\setcounter{maxextratitle}{0}
+\setcounter{maxextratitleyear}{0}
+\setcounter{maxextraname}{0}
+\setcounter{maxextradate}{0}
+\setcounter{maxextraalpha}{0}
+\setcounter{abbrvpenalty}{50}
+\setcounter{highnamepenalty}{50}
+\setcounter{lownamepenalty}{25}
+\setcounter{maxparens}{3}
+\setcounter{parenlevel}{0}
+\setcounter{blx@maxsection}{0}
+\setcounter{mincomprange}{10}
+\setcounter{maxcomprange}{100000}
+\setcounter{mincompwidth}{1}
+\setcounter{afterword}{0}
+\setcounter{savedafterword}{0}
+\setcounter{annotator}{0}
+\setcounter{savedannotator}{0}
+\setcounter{author}{0}
+\setcounter{savedauthor}{0}
+\setcounter{bookauthor}{0}
+\setcounter{savedbookauthor}{0}
+\setcounter{commentator}{0}
+\setcounter{savedcommentator}{0}
+\setcounter{editor}{0}
+\setcounter{savededitor}{0}
+\setcounter{editora}{0}
+\setcounter{savededitora}{0}
+\setcounter{editorb}{0}
+\setcounter{savededitorb}{0}
+\setcounter{editorc}{0}
+\setcounter{savededitorc}{0}
+\setcounter{foreword}{0}
+\setcounter{savedforeword}{0}
+\setcounter{holder}{0}
+\setcounter{savedholder}{0}
+\setcounter{introduction}{0}
+\setcounter{savedintroduction}{0}
+\setcounter{namea}{0}
+\setcounter{savednamea}{0}
+\setcounter{nameb}{0}
+\setcounter{savednameb}{0}
+\setcounter{namec}{0}
+\setcounter{savednamec}{0}
+\setcounter{translator}{0}
+\setcounter{savedtranslator}{0}
+\setcounter{shortauthor}{0}
+\setcounter{savedshortauthor}{0}
+\setcounter{shorteditor}{0}
+\setcounter{savedshorteditor}{0}
+\setcounter{labelname}{0}
+\setcounter{savedlabelname}{0}
+\setcounter{institution}{0}
+\setcounter{savedinstitution}{0}
+\setcounter{lista}{0}
+\setcounter{savedlista}{0}
+\setcounter{listb}{0}
+\setcounter{savedlistb}{0}
+\setcounter{listc}{0}
+\setcounter{savedlistc}{0}
+\setcounter{listd}{0}
+\setcounter{savedlistd}{0}
+\setcounter{liste}{0}
+\setcounter{savedliste}{0}
+\setcounter{listf}{0}
+\setcounter{savedlistf}{0}
+\setcounter{location}{0}
+\setcounter{savedlocation}{0}
+\setcounter{organization}{0}
+\setcounter{savedorganization}{0}
+\setcounter{origlocation}{0}
+\setcounter{savedoriglocation}{0}
+\setcounter{origpublisher}{0}
+\setcounter{savedorigpublisher}{0}
+\setcounter{publisher}{0}
+\setcounter{savedpublisher}{0}
+\setcounter{language}{0}
+\setcounter{savedlanguage}{0}
+\setcounter{origlanguage}{0}
+\setcounter{savedoriglanguage}{0}
+\setcounter{pageref}{0}
+\setcounter{savedpageref}{0}
+\setcounter{textcitecount}{0}
+\setcounter{textcitetotal}{0}
+\setcounter{textcitemaxnames}{0}
+\setcounter{biburlbigbreakpenalty}{100}
+\setcounter{biburlbreakpenalty}{200}
+\setcounter{biburlnumpenalty}{0}
+\setcounter{biburlucpenalty}{0}
+\setcounter{biburllcpenalty}{0}
+\setcounter{smartand}{1}
+\setcounter{bbx:relatedcount}{0}
+\setcounter{bbx:relatedtotal}{0}
+\setcounter{cbx@tempcnta}{0}
+\setcounter{cbx@tempcntb}{0}
+\setcounter{cbx@tempcntc}{0}
+\setcounter{cbx@tempcntd}{0}
+\setcounter{float@type}{4}
+\setcounter{parentequation}{0}
+\setcounter{ALG@line}{0}
+\setcounter{ALG@rem}{0}
+\setcounter{ALG@nested}{0}
+\setcounter{ALG@Lnr}{2}
+\setcounter{ALG@blocknr}{10}
+\setcounter{ALG@storecount}{0}
+\setcounter{ALG@tmpcounter}{0}
+\setcounter{thmt@dummyctr}{0}
+\setcounter{nlinenum}{0}
+\setcounter{caption@flags}{2}
+\setcounter{continuedfloat}{0}
+\setcounter{subfigure}{0}
+\setcounter{subtable}{0}
+\setcounter{section@level}{0}
+\setcounter{Item}{0}
+\setcounter{Hfootnote}{0}
+\setcounter{bookmark@seq@number}{0}
+\setcounter{AlgoLine}{0}
+\setcounter{algocfline}{0}
+\setcounter{algocfproc}{0}
+\setcounter{algocf}{0}
+\setcounter{mlemma}{0}
+}
--- a/definition/problem
+++ b/definition/problem
@ -0,0 +1,147 @@
+\chapter{Problem Definition}\label{ch: problem definition}
+Henceforth, we denote the norm-2 $\norm{ - }_2$ with the generic norm symbol $\norm{ - }$.\newline
+Given $\hat{X} \in \mathbb{R}^{(m + n) \times m},\ \hat{y} \in \mathbb{R}^{m + n},\ $ we want to find 
+\[\min_{w}\ \norm{\hat{X}w-\hat{y}}\]
+
+\section{QR}
+By performing a QR factorization on $\hat{X}$ we can reformulate the problem as follows:
+\[
+    \min_{w}\ \norm{\hat{X}w - \hat{y}} = \min_{w}\ \norm{\vphantom{\hat{X}}QRw - \hat{y}}
+\]
+with $Q \in \mathbb{R}^{(m + n) \times (m + n)}$ being an orthogonal matrix and $R \in \mathbb{R}^{(m + n) \times m}$ being an upper triangular matrix. Knowing that $R_{ij} = 0,\ \ \forall i > j,\ i = 1, \ldots, m + n,\ j = 1, \ldots, m,\ $ we can write 
+\begin{equation*}
+    \begin{aligned}
+        &R = 
+        \begin{bmatrix} 
+            R_0 \\
+            0
+        \end{bmatrix},\ 
+        &R_0 \in \mathbb{R}^{m \times m} \\
+        &Q = 
+        \begin{bmatrix} 
+            Q_0\ Q_c
+        \end{bmatrix},\ 
+        &Q_0 \in \mathbb{R}^{(m+n) \times m},\ &Q_c \in \mathbb{R}^{(m+n) \times n}
+    \end{aligned}
+\end{equation*}
+Since orthogonal matrices preserve norm-2, we have:
+\begin{equation*}
+    \begin{aligned}
+        &\min_{w}\ \norm{QRw - \hat{y}} = \min_{w}\ \norm{Q^T(QRw - \hat{y})} = \\
+        &\min_{w}\ \norm{Q^{T}QRw - Q^T\hat{y}} = \\
+        &\min_{w}\ \norm{Rw - Q^T\hat{y}}  = \\ 
+        &\min_{w}\ \norm{
+            \begin{bmatrix} 
+                R_0 \\
+                0
+            \end{bmatrix}
+            w - 
+            \begin{bmatrix} 
+                Q^T_0 \\
+                Q^T_c
+            \end{bmatrix}\hat{y}} = \\
+        & \min_{w}\ \norm{
+            \begin{bmatrix} 
+                R_0w - Q^T_0\hat{y} \\
+                - Q^T_c\hat{y}
+            \end{bmatrix}}
+    \end{aligned}
+\end{equation*}
+The entries of the second block $- Q^T_c\hat{y}$ do not depend on $w$, meaning that they will appear in the norm independently from $w$. Thus, we can simplify the problem and solve the triangular system
+\begin{equation*}
+    R_0w - Q^T_0\hat{y} = 0 \iff R_0w = Q^T_0\hat{y}
+\end{equation*}
+provided that $R_0$ is invertible.
+\begin{center}
+    $R_0$ is invertible $\iff \hat{X}$ has full column rank $\iff \hat{X}^T\hat{X} \succ 0$.
+\end{center}
+$R_0$ is invertible and the triangular system can be solved via backsubstitution. This claim is proved  in \hyperref[proofs: fullcolumn]{the last section}.
+
+\section{L-BFGS}\label{ch: L-BFGS}
+
+We can define 
+\begin{equation}
+    \begin{aligned}
+        g(w) = {f(w)}^2 = \norm{\hat{X}w-\hat{y}}^2
+    \end{aligned}
+\end{equation}
+and reformulate the problem equivalently in terms of $g(w)$, since it is monotonic.
+\begin{equation*}
+    \begin{aligned}
+        \min_{w}\ g(w) = \min_{w}\ \norm{\hat{X}w-\hat{y}}^2 = \min_{w}\ {\bigl(\hat{X}w - \hat{y}\bigr)}^T\bigl(\hat{X}w - \hat{y}\bigr)
+    \end{aligned}
+\end{equation*}
+The gradient of $g$ with respect to $w$ is 
+\begin{equation*}
+    \begin{aligned}
+        \nabla g(w) = 2\hat{X}^T\bigl(\hat{X}w - \hat{y}\bigr)
+    \end{aligned}
+\end{equation*}
+
+Likewise the gradient of $f(w)$ is as follows:
+\begin{equation*}
+    \nabla f(w) = \frac{1}{\norm{\hat{X} w - \hat{y}}} \hat{X}^T\bigl(\hat{X}w - \hat{y}\bigr)
+\end{equation*}
+but gives much worse performance since it is no longer quadratic.
+
+The function is L-smooth since $\forall w, w' \in \mathbb{R}^m,\ \text{with } w \neq w'$:
+
+\vspace{6pt}
+
+\begin{tblr}{colspec={crl}, colsep={0pt}}
+    & \(\norm{\nabla g(w) - \nabla g(w')}\) &\(\ \leq L \norm{w - w'}\)\\
+    \(\iff\) & \(\norm{\hat{X}^T(\hat{X}w - w') - \hat{X}^T (\hat{X} w' -\hat{y})}\) & \(\ \leq L \norm{w - w'}\) \\
+    \(\iff\) & \(\norm{\hat{X}^T \hat{X} (w-w')}\) & \(\ \leq L \norm{w - w'}\) \\
+    \(\Longleftarrow\) & \(\norm{\hat{X}^T \hat{X}} \norm{w-w'}\) & \(\ \leq L \norm{w - w'}\) \\
+    \(\iff\) & \(\norm{\hat{X}^T \hat{X}}\) & \(\ \leq L\ \)
+\end{tblr}
+
+\vspace{6pt}
+
+The function $g$ is also strongly convex since \( \nabla^2g(w) = \hat{X}^T \hat{X} \succ 0\).
+
+The tomography of $g(w)$ with respect to the direction $p$ is:
+\begin{align}
+    \phi(\alpha)&={(\hat{X}(w+\alpha p) - \hat{y})}^T \cdot (\hat{X}(w+\alpha p) - \hat{y}) \notag\\
+    \frac{d \phi(\alpha)}{d \alpha} &= 2 w^T \hat{X}^T \hat{X} p - 2 \hat{y}^T \hat{X} p + 2 \alpha p^T \hat{X}^T \hat{X} p \notag\\
+    \frac{d^2 \phi(\alpha)}{d \alpha^2} &= 2 p^T \hat{X}^T \hat{X} p \label{definitions: hessian tomography}
+\end{align}
+
+Since $\frac{d^2 \phi(\alpha)}{d \alpha^2}$ is constant, the tomography is simply a parabola and since $\hat{X}^T \hat{X}$ is positive definite, the dot product $\langle p, p \rangle_{\hat{X}^T \hat{X}}$ is always positive and the parabola always has a minimum. The minimum is found by solving $\frac{d \phi(\alpha)}{d \alpha}$ for $0$:
+
+\[ \alpha_{\min} = \frac{\hat{y}^T \hat{X} p - w^T \hat{X}^T \hat{X} p}{p^T \hat{X}^T \hat{X} p} \]
+
+\section{Conditioning}\label{subsec:conditioning}
+
+We check the condition number $\kappa(\hat{X})$ when the regularization term $\lambda > 0$ varies.
+\[
+\kappa(\hat{X}) = \norm{\hat{X}} \norm{\hat{X}^{T}} = \frac{\sigma_1}{\sigma_m} = \sqrt{\frac{\lambda_{\max}}{\lambda_{\min}}}
+\]
+with $\sigma_1, \sigma_m$ being respectively the largest and smallest singular values of $\hat{X}$ and $\lambda_{\max}, \lambda_{\min}$ being the largest and smallest eigenvalues of $\hat{X}^T\hat{X}$.\\
+Knowing that $\hat{X}^T\hat{X} = XX^T + \lambda^2I_m$, we have that 
+\begin{center}
+    \begin{tblr}{colspec={c}, colsep={0pt}, column{1} = {mode = math}}
+        \lambda_{max} = \lambda_1 + \lambda^2 \\
+        \lambda_{min} = \lambda_m + \lambda^2 \\
+    \end{tblr}
+\end{center}
+with $\lambda_1, \lambda_m$ being the largest and smallest eigenvalues of $XX^T$, which are translated by $\lambda^2$ as a result of adding $\lambda^2I_m$ (\autoref{proof:eigenvalues_translation})\\
+In \autoref{proofs: eigenvalues} we show that $\lambda_m = 0$ and conclude that $\kappa(\hat{X})$ scales linearly with $\frac{1}{\lambda}$:
+\[
+\kappa(\hat{X}) = \sqrt{\frac{\lambda_{\max}}{\lambda_{\min}}} = \sqrt{\frac{\lambda_{1} + \lambda^2}{\lambda_{m} + \lambda^2}} = \frac{\sqrt{\lambda_{1} + \lambda^2}}{{\sqrt{\lambda^2}}} = \frac{\sqrt{\lambda_{1} + \lambda^2}}{\lambda}
+\]
+if $\lambda_1 > 0$.
+
+For lambda close to zero we have $\frac{\sqrt{\lambda_{1} + \lambda^2}}{\lambda} \approx O\left(\frac{1}{\lambda}\right)$.
+This property is witnessed in \autoref{fig:condition}, which is in logarithmic scale:
+\begin{figure}[htbp]
+    \centering
+    \includegraphics[width=0.7\linewidth]{(2) - problem definition/images/conditioning.png} % chktex 8
+    \caption{$\kappa(\hat{X})$ \textit{for different values of} $\lambda$}\label{fig:condition}
+\end{figure}
+
+%%% Local Variables:
+%%% mode: latex
+%%% TeX-master: "../main"
+%%% TeX-command-extra-options: "-shell-escape"
+%%% End:
--- a/algorithms/algorithms.aux
+++ b/algorithms/algorithms.aux
@ -0,0 +1,186 @@
+\relax 
+\providecommand\hyper@newdestlabel[2]{}
+\@writefile{toc}{\contentsline {chapter}{\numberline {3}Algorithms}{6}{chapter.3}\protected@file@percent }
+\@writefile{lof}{\addvspace {10\p@ }}
+\@writefile{lot}{\addvspace {10\p@ }}
+\@writefile{loa}{\addvspace {10\p@ }}
+\newlabel{ch: algorithms}{{3}{6}{Algorithms}{chapter.3}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {3.1}QR}{6}{section.3.1}\protected@file@percent }
+\@writefile{loa}{\contentsline {algocf}{\numberline {1}{\ignorespaces Thin QR}}{7}{algocf.1}\protected@file@percent }
+\newlabel{algo: thinQR}{{1}{7}{QR}{algocf.1}{}}
+\@writefile{loa}{\contentsline {algocf}{\numberline {2}{\ignorespaces householder\_vector}}{7}{algocf.2}\protected@file@percent }
+\newlabel{algo: householder_vector}{{2}{7}{QR}{algocf.2}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {3.2}L-BFGS}{8}{section.3.2}\protected@file@percent }
+\@writefile{loa}{\contentsline {algocf}{\numberline {3}{\ignorespaces Limited Memory BFGS}}{8}{algocf.3}\protected@file@percent }
+\newlabel{algo: L-BFGS}{{3}{8}{L-BFGS}{algocf.3}{}}
+\@writefile{loa}{\contentsline {algocf}{\numberline {4}{\ignorespaces Limited Memory BFGS {-} Two-Loop Recursion}}{9}{algocf.4}\protected@file@percent }
+\newlabel{algo: L-BFGS Two-Loop Recursion}{{4}{9}{L-BFGS}{algocf.4}{}}
+\newlabel{algo: convergence1}{{1}{9}{}{Item.1}{}}
+\newlabel{algo: convergence2}{{2}{9}{}{Item.2}{}}
+\newlabel{algo: convergence3}{{3}{9}{}{Item.3}{}}
+\newlabel{eq:6}{{3}{9}{}{Item.3}{}}
+\newlabel{algo: definition y_k}{{3.1}{10}{}{equation.3.2.1}{}}
+\@writefile{loe}{\addvspace {10\p@ }}
+\@writefile{loe}{\contentsline {mtheo}{\ifthmt@listswap \else \numberline {\let \autodot \@empty }\fi Theorem}{10}{thmt@dummyctr.dummy.1}\protected@file@percent }
+\@writefile{loe}{\contentsline {mproof}{\ifthmt@listswap \else \numberline {\let \autodot \@empty }\fi Proof}{10}{thmt@dummyctr.dummy.2}\protected@file@percent }
+\@setckpt{(3) - algorithms/algorithms}{
+\setcounter{page}{12}
+\setcounter{equation}{1}
+\setcounter{enumi}{3}
+\setcounter{enumii}{0}
+\setcounter{enumiii}{0}
+\setcounter{enumiv}{0}
+\setcounter{footnote}{0}
+\setcounter{mpfootnote}{0}
+\setcounter{part}{0}
+\setcounter{chapter}{3}
+\setcounter{section}{2}
+\setcounter{subsection}{0}
+\setcounter{subsubsection}{0}
+\setcounter{paragraph}{0}
+\setcounter{subparagraph}{0}
+\setcounter{figure}{0}
+\setcounter{table}{0}
+\setcounter{tabx@nest}{0}
+\setcounter{listtotal}{0}
+\setcounter{listcount}{0}
+\setcounter{liststart}{0}
+\setcounter{liststop}{0}
+\setcounter{citecount}{0}
+\setcounter{citetotal}{0}
+\setcounter{multicitecount}{0}
+\setcounter{multicitetotal}{0}
+\setcounter{instcount}{2}
+\setcounter{maxnames}{3}
+\setcounter{minnames}{3}
+\setcounter{maxitems}{3}
+\setcounter{minitems}{1}
+\setcounter{citecounter}{0}
+\setcounter{maxcitecounter}{0}
+\setcounter{savedcitecounter}{0}
+\setcounter{uniquelist}{0}
+\setcounter{uniquename}{0}
+\setcounter{refsection}{0}
+\setcounter{refsegment}{0}
+\setcounter{maxextratitle}{0}
+\setcounter{maxextratitleyear}{0}
+\setcounter{maxextraname}{0}
+\setcounter{maxextradate}{0}
+\setcounter{maxextraalpha}{0}
+\setcounter{abbrvpenalty}{50}
+\setcounter{highnamepenalty}{50}
+\setcounter{lownamepenalty}{25}
+\setcounter{maxparens}{3}
+\setcounter{parenlevel}{0}
+\setcounter{blx@maxsection}{0}
+\setcounter{mincomprange}{10}
+\setcounter{maxcomprange}{100000}
+\setcounter{mincompwidth}{1}
+\setcounter{afterword}{0}
+\setcounter{savedafterword}{0}
+\setcounter{annotator}{0}
+\setcounter{savedannotator}{0}
+\setcounter{author}{0}
+\setcounter{savedauthor}{0}
+\setcounter{bookauthor}{0}
+\setcounter{savedbookauthor}{0}
+\setcounter{commentator}{0}
+\setcounter{savedcommentator}{0}
+\setcounter{editor}{0}
+\setcounter{savededitor}{0}
+\setcounter{editora}{0}
+\setcounter{savededitora}{0}
+\setcounter{editorb}{0}
+\setcounter{savededitorb}{0}
+\setcounter{editorc}{0}
+\setcounter{savededitorc}{0}
+\setcounter{foreword}{0}
+\setcounter{savedforeword}{0}
+\setcounter{holder}{0}
+\setcounter{savedholder}{0}
+\setcounter{introduction}{0}
+\setcounter{savedintroduction}{0}
+\setcounter{namea}{0}
+\setcounter{savednamea}{0}
+\setcounter{nameb}{0}
+\setcounter{savednameb}{0}
+\setcounter{namec}{0}
+\setcounter{savednamec}{0}
+\setcounter{translator}{0}
+\setcounter{savedtranslator}{0}
+\setcounter{shortauthor}{0}
+\setcounter{savedshortauthor}{0}
+\setcounter{shorteditor}{0}
+\setcounter{savedshorteditor}{0}
+\setcounter{labelname}{0}
+\setcounter{savedlabelname}{0}
+\setcounter{institution}{0}
+\setcounter{savedinstitution}{0}
+\setcounter{lista}{0}
+\setcounter{savedlista}{0}
+\setcounter{listb}{0}
+\setcounter{savedlistb}{0}
+\setcounter{listc}{0}
+\setcounter{savedlistc}{0}
+\setcounter{listd}{0}
+\setcounter{savedlistd}{0}
+\setcounter{liste}{0}
+\setcounter{savedliste}{0}
+\setcounter{listf}{0}
+\setcounter{savedlistf}{0}
+\setcounter{location}{0}
+\setcounter{savedlocation}{0}
+\setcounter{organization}{0}
+\setcounter{savedorganization}{0}
+\setcounter{origlocation}{0}
+\setcounter{savedoriglocation}{0}
+\setcounter{origpublisher}{0}
+\setcounter{savedorigpublisher}{0}
+\setcounter{publisher}{0}
+\setcounter{savedpublisher}{0}
+\setcounter{language}{0}
+\setcounter{savedlanguage}{0}
+\setcounter{origlanguage}{0}
+\setcounter{savedoriglanguage}{0}
+\setcounter{pageref}{0}
+\setcounter{savedpageref}{0}
+\setcounter{textcitecount}{0}
+\setcounter{textcitetotal}{0}
+\setcounter{textcitemaxnames}{0}
+\setcounter{biburlbigbreakpenalty}{100}
+\setcounter{biburlbreakpenalty}{200}
+\setcounter{biburlnumpenalty}{0}
+\setcounter{biburlucpenalty}{0}
+\setcounter{biburllcpenalty}{0}
+\setcounter{smartand}{1}
+\setcounter{bbx:relatedcount}{0}
+\setcounter{bbx:relatedtotal}{0}
+\setcounter{cbx@tempcnta}{0}
+\setcounter{cbx@tempcntb}{1}
+\setcounter{cbx@tempcntc}{0}
+\setcounter{cbx@tempcntd}{-1}
+\setcounter{float@type}{4}
+\setcounter{parentequation}{0}
+\setcounter{ALG@line}{0}
+\setcounter{ALG@rem}{0}
+\setcounter{ALG@nested}{0}
+\setcounter{ALG@Lnr}{2}
+\setcounter{ALG@blocknr}{10}
+\setcounter{ALG@storecount}{0}
+\setcounter{ALG@tmpcounter}{0}
+\setcounter{thmt@dummyctr}{2}
+\setcounter{nlinenum}{0}
+\setcounter{caption@flags}{2}
+\setcounter{continuedfloat}{0}
+\setcounter{subfigure}{0}
+\setcounter{subtable}{0}
+\setcounter{section@level}{0}
+\setcounter{Item}{3}
+\setcounter{Hfootnote}{0}
+\setcounter{bookmark@seq@number}{0}
+\setcounter{AlgoLine}{11}
+\setcounter{algocfline}{4}
+\setcounter{algocfproc}{4}
+\setcounter{algocf}{4}
+\setcounter{mlemma}{0}
+}
--- a/algorithms/algorithms.tex
+++ b/algorithms/algorithms.tex
@ -0,0 +1,218 @@
+% chktex-file 9 chktex-file 17
+\chapter{Algorithms}\label{ch: algorithms}
+
+\section{QR}
+The algorithm has been implemented considering that the input matrix $A \in \mathbb{R}^{m \times n}$, where $m$ may be different from $n$, namely it can be rectangular \textit{horizontally} or \textit{vertically}. In this version we store in a proper data structure a matrix $\Upsilon \in {m \times n}$ of the following form ($m > n$ in this example):
+
+\begin{equation*}
+    \Upsilon = {(\upsilon_{i,j})}_{i,j} = \begin{tikzpicture}[baseline=-1ex] 
+    \matrix[%
+        matrix of math nodes,
+        nodes in empty cells,
+        left delimiter={[},right delimiter={]}, 
+        inner xsep=2pt, column sep=6pt, 
+        ] (m)
+    {%
+        \vphantom{1} & * & \cdots & * \\
+        & \vphantom{1} & \ddots & \vdots \\
+        & & \vphantom{1} & * \\
+        & & & \vphantom{1} \\
+        u_1 & u_2 & \cdots & u_n \\
+        \vphantom{1} & \vphantom{1} & \vphantom{1} & \vphantom{1} \\
+    };
+    \node[rectangle, draw, fit={(m-1-1) (m-6-1)}, inner sep=-1.5pt, text width=22pt] {};
+    \node[rectangle, draw, fit={(m-2-2) (m-6-2)}, inner sep=-1.5pt, text width=22pt] {};
+    \node[rectangle, draw, fit={(m-3-3) (m-6-3)}, inner sep=-1.5pt, text width=22pt] {};
+    \node[rectangle, draw, fit={(m-4-4) (m-6-4)}, inner sep=-1.5pt, text width=22pt] {};
+    \end{tikzpicture}
+\end{equation*}
+
+\begin{center}
+    $u_k \in \mathbb{R}^{m - k + 1},\ 1 \leq k \leq n$
+\end{center}
+and the values of the diagonal of R in a vector $d \in \mathbb{R}^{n}$. The $*$ entries are elements computed in the QR factorization belonging to the upper triangular matrix, yielded by line 6 of \hyperref[algo: thinQR]{Algorithm 1}. In this way we are allowed to lazily perform the products $Qy$ and $Q^T y$ by means of the householder vectors $u_1 \dots, u_n $ that we stored. On the other hand, to compute a product between the upper part of $\Upsilon$ and an input vector we reconstruct the upper triangular matrix by taking element $\upsilon_{ij} \text{ such that } j > i$ and attach the vector $d$ as the diagonal of the resulting matrix.
+The zeros of the matrix $R$ are ignored.
+
+\begin{algorithm}[H]
+    \SetAlgoLined%
+    \caption{Thin QR}\label{algo: thinQR}
+    \SetKwInOut{Input}{Input}
+    \SetKwInOut{Output}{Output}
+
+    \BlankLine%
+    \Input{$A \in \mathbb{R}^{m \times n}$}
+    \Output{$Q \in \mathbb{R}^{m \times m},\ R \in \mathbb{R}^{m \times n}$ implicit $QR$ factorization of $A$}
+  
+    \BlankLine%
+    $\Upsilon = copy(A)$ \\
+    $d = zeros(\min(m, n))$ \\
+
+    \For{
+        $k \in 1 \dots \min(m, n)$
+    }{
+        $u_k, s_k = householder\_vector(\Upsilon[k:m, k])$\\
+        $d_k = s_k$ \\
+        $\Upsilon[k:m, k+1:n] = \Upsilon[k:m, k+1:n] - 2u(u^T \Upsilon[k:m, k+1:n])$\\
+        $\Upsilon[k:m, k] = u_k$
+    }
+
+    \Return$\Upsilon, d$
+\end{algorithm}
+
+\begin{algorithm}[H]
+    \SetAlgoLined%
+    \caption{householder\_vector}\label{algo: householder_vector}
+    \SetKwInOut{Input}{Input}
+    \SetKwInOut{Output}{Output}
+    
+    \BlankLine%
+    \Input{$x \in \mathbb{R}^d$}
+    \Output{$u \in \mathbb{R}^{d},\ s \in \mathbb{R}$ householder vector of $x$}
+  
+    \BlankLine%
+    $s = \norm{x}$ \\
+    \If{$x_1 \geq 0$}{
+        $s = -s$
+    }
+    $u = copy(x)$ \\
+    $u_1 = u_1 - s$ \\
+    $u = u\ / \norm{u}$ \\
+
+    \Return$u, s$
+\end{algorithm}
+
+We assume $m > n$ as the case $n > m$ is similar for the complexity analysis. The time complexity of this algorithm is $\theta\bigl(mn^2 \bigr) \approx \theta\bigl(n^3 \bigr)$, because $m \approx n$ in (P). We will see in \hyperref[ch: experiments]{section Experiments} that the running time scales linearly with $m$ as expected, where $m$ is the size of $\hat{X}$.
+
+\newpage
+\section{L-BFGS}
+
+We follow the syntax from \textit{Numerical Optimization}\cite{Numerical-Optimization-2006} and define $f_k = f(x_k)$
+
+\begin{algorithm}[H]
+    \SetAlgoLined%
+    \caption{Limited Memory BFGS}\label{algo: L-BFGS}
+    \SetKwInOut{Input}{Input}
+    \SetKwInOut{Output}{Output}
+    
+    \BlankLine%
+    \Input{$\textbf{f}: \mathbb{R}^n \longrightarrow \mathbb{R},\ \textbf{x} \in \mathbb{R}^n,\ m \text{ memory, } \epsilon \text{ tolerance}$}
+    \Output{${\bf x^*}\ \text{ending point},\ {\bf f(x^*)},\ {\bf \nabla f(x^*)}$}
+  
+    \BlankLine%
+    $k = 0$ \\
+    \While{$\nabla f_k \geq \epsilon \nabla f_0$} {
+        \uIf{storage is empty}{
+            $H_k^0 = I$
+        }\uElse{
+            $H_k^0 = \frac{\langle y_{k-1}, s_{k-1} \rangle}{\norm{y_{k-1}}^2} \cdot I$
+        }
+        Calculate $p_k = H_k \nabla{f_k}$ with \hyperref[algo: L-BFGS Two-Loop Recursion]{\textbf{Algorithm 4}} \\
+        Choose $\alpha_k$ satisfying the Armijo-Wolfe conditions or with exact line search \\
+        $x_{k+1} = x_k + \alpha_k p_k$ \\
+        $s_k = x_{k+1} - x_k$ \\
+        $y_k = \nabla f_{k+1} - \nabla f_k$ \\
+        $curvature = \langle y_k, s_k \rangle$ \\
+        $\rho_k = curvature^{-1}$ \\
+        \uIf{$curvature \leq 10^{-16}$}{
+            free the storage and start again from gradient descent
+        }\uElse{
+            Discard the vector pair $\{s_{k-m}, y_{k-m}, \rho_{k-m}\}$ from storage \\
+            Save $s_k, y_k, \rho_k$
+        }
+
+        $k = k + 1$
+    }
+    \Return$x_k$, $f_k$, $\nabla f_k$
+\end{algorithm}
+\begin{algorithm}[H]
+    \SetAlgoLined%
+    \caption{Limited Memory BFGS {-} Two-Loop Recursion}\label{algo: L-BFGS Two-Loop Recursion}
+
+    $q = \nabla f_k$ \\
+    \For{$i = (k - 1), \dots, (k - m)$}{
+        $\alpha_i = \rho_i s_i^T q$ \\
+        $q = q - \alpha_i y_i$ \\
+    }
+
+    $r = H_k^0 q$ \\
+    \For{$i = (k - m), \dots, (k - 1)$}{
+        $\beta = \rho_i y_i^T r$ \\
+        $r = r + s_i\bigl(\alpha_i - \beta\bigr)$ \\
+    }
+
+    \Return$-r$
+
+\end{algorithm}
+In our implementation we keep the triplets $(s_k, y_k, \rho_k)$ in a circular buffer with capacity $m$ and the values of $\alpha_i$ in \hyperref[algo: L-BFGS Two-Loop Recursion]{Algorithm 4} in a stack such that no explicit indices are needed. 
+
+In case the curvature of the function is too small, we free the storage and restart with a gradient step.
+
+We prefer using an exact line search to compute the step size over an inexact line search since the computational cost for our problem is lesser.
+
+\subsection*{Convergence}
+To prove that the implemented method converges to the global minimum of the function we have to optimize, we follow~\cite{convergence_lbfgs} and state the following assumptions about our problem:
+\begin{enumerate}
+    \item\label{algo: convergence1} $f \in C^2$
+    \item\label{algo: convergence2} The level sets $\mathcal{L} = \{ x \in \mathbb{R}^n\ |\ f(x) \leq f(x_0) \} $ is convex
+    \item\label{algo: convergence3} $\exists\ M_1, M_2 \in \mathbb{R}^+$ such that 
+      \begin{equation*}
+        M_1\norm{z}^2 \leq z^T G(x) z \leq  M_2\norm{z}^2\label{eq:6}
+      \end{equation*}
+    $\forall z \in \mathbb{R}^n$ and $\forall x \in \mathcal{L}$
+\end{enumerate}
+
+We follow the publication's notation and define:
+
+\[ G(x) \coloneqq \nabla^{2}f(x) \]
+\[ \bar{G}_k(x) \coloneqq \int_0^1 G(x_k + \tau \alpha_k p_k) d\tau \]
+
+From Taylor's theorem:
+
+\begin{equation}\label{algo: definition y_k}
+        y_k = \bar{G}_k \alpha_k p_k = \bar{G}_k s_k
+\end{equation}
+
+The first assumption for our problem follows from the definition. The second assumption is proved by \autoref{definitions: hessian tomography}. The third assumption is also a consequence of the fact that the hessian of $f$ is constant.
+
+% \[ z_k \coloneqq {\bar{G}_k}^{1/2} s_k \]
+
+\begin{mtheo}
+    Let $B_0$ be any symmetric positive definite initial matrix, and let $x_0$ be a starting point for which the Assumptions~\ref{algo: convergence1},~\ref{algo: convergence2} and~\ref{algo: convergence3} hold, then the sequence ${x_k}$ generated by the L-BFGS algorithm converges to the minimizer $x^*$ of $f$ linearly.
+\end{mtheo}
+
+\begin{mproof}
+    Using \autoref{algo: definition y_k} and Assumption~\ref{algo: convergence3}:
+    \[ M_1 \norm{s_k}^2 \leq y_k^T s_k \leq M_2 \norm{s_k}^2 \]
+    and:
+    \[ \frac{\norm{y_k}^2}{y_k^T s_k} = \frac{s_k^T \hat{G}_k^2 s_k}{s_k^T \hat{G}_k s_k} \]
+    Both trace and determinant can be expressed in terms of the trace and determinant of the starting matrix from which the approximate hessian is constructed:
+    \begin{align*}
+        \Tr(B_{k+1}) &\leq \Tr(B_k^{(0)}) + \Tilde{m} M_2 \leq M_3 \\
+        \det(B_{k+1}) &= \det(B_k^{(0)}) \cdot \prod_{l=0}^{\Tilde{m}-1} \frac{y_l^T s_l}{s_l^T B_k^{(l)} s_l} \geq \det\left(B_k^{(0)} {\left(\frac{M_1}{M_3}\right)}^{\Tilde{m}}\right) \geq M_4
+    \end{align*}
+    where $\Tilde{m}$ is the memory size and $M_3$ and $M_4$ are chosen appropriately in $\mathbb{R}^+$.
+
+    From these two bounds we have that for some constant $\delta > 0$:
+
+    \[ \cos(\theta_k) = \frac{s_k^T B_k s_k}{\norm{s_k} \norm{B_k s_k}} \geq \delta \]
+    Since with exact line search the Armijo condition $f(x_k + \alpha_k p_k) \leq f(x_k) + m_1 \alpha_k \nabla f(x_k)$ is always satisfied if the constant $m_1$ does not exclude the minimum $x_*$ and since the strong Wolfe condition $\norm{\nabla f(x_k + \alpha_k p_k)} \leq m_3 \norm{\nabla f(x_k)}$ is also always satisfied since $\norm{\nabla f(x_k + \alpha_k p_k)} = O(u)$, follows from the two conditions and Assumptions~\ref{algo: convergence1} and~\ref{algo: convergence2} that:
+    \begin{align*}
+        & f(x_{k+1}) - f(x_*) \leq (1 - c \cos^2(\theta_k) (f(x_k) - f(x_*))) \\
+        \implies& f(x_k) - f(x_*) \leq {(1 - c \cdot \delta^2)}^k (f(x_0) - f(x_*)) \\
+        \implies& f(x_k) - f(x_*) \leq r^k (f(x_0) - f(x_*))
+    \end{align*}
+    for some $r \in [0, 1)$. Using Assumption~\ref{algo: convergence3}:
+    \begin{gather*}
+        \frac{1}{2} M_1 \norm{x_k - x_*}^2 \leq f(x_k) - f(x_*) \\
+        \implies \norm{x_k - x_*} \leq r^{k/2} {\left( 2 \frac{f(x_0) - f(x_*)}{M_1} \right)}^{(1/2)}
+    \end{gather*}
+    so the sequence $\{x_k\}$ is linearly convergent.
+\end{mproof}
+
+The implementation of L-BFGS that uses Armijo-Wolfe line search also satisfies the assumptions so it also converges linearly to $x_*$.
+
+%%% Local Variables:
+%%% mode: latex
+%%% TeX-master: "../main"
+%%% TeX-command-extra-options: "-shell-escape"
+%%% End:
--- a/experiments/experiments.aux
+++ b/experiments/experiments.aux
@ -0,0 +1,221 @@
+\relax 
+\providecommand\hyper@newdestlabel[2]{}
+\@writefile{toc}{\contentsline {chapter}{\numberline {4}Experiments}{12}{chapter.4}\protected@file@percent }
+\@writefile{lof}{\addvspace {10\p@ }}
+\@writefile{lot}{\addvspace {10\p@ }}
+\@writefile{loa}{\addvspace {10\p@ }}
+\newlabel{ch: experiments}{{4}{12}{Experiments}{chapter.4}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {4.1}QR}{13}{section.4.1}\protected@file@percent }
+\newlabel{fig:QR-error-lambda}{{4.1a}{13}{\textit {QR decomposition errors and backward stability for different} $\lambda $}{figure.caption.3}{}}
+\newlabel{sub@fig:QR-error-lambda}{{a}{13}{\textit {QR decomposition errors and backward stability for different} $\lambda $}{figure.caption.3}{}}
+\newlabel{fig:QR-forward}{{4.1b}{13}{\textit {QR factorization forward stability on Q and R for different} $\lambda $}{figure.caption.3}{}}
+\newlabel{sub@fig:QR-forward}{{b}{13}{\textit {QR factorization forward stability on Q and R for different} $\lambda $}{figure.caption.3}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.1}{\ignorespaces \textit  {Errors and scalability of the QR decomposition for different values of} $\lambda $}}{13}{figure.caption.3}\protected@file@percent }
+\newlabel{fig:qrtests}{{4.1}{13}{\textit {Errors and scalability of the QR decomposition for different values of} $\lambda $}{figure.caption.3}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {4.2}L-BFGS}{14}{section.4.2}\protected@file@percent }
+\newlabel{fig:gradnorm-res-rel-ill}{{4.2a}{14}{\textit {Ill-conditioned matrix}}{figure.caption.4}{}}
+\newlabel{sub@fig:gradnorm-res-rel-ill}{{a}{14}{\textit {Ill-conditioned matrix}}{figure.caption.4}{}}
+\newlabel{fig:gradnorm-res-rel-wellll}{{4.2b}{14}{\textit {Well-conditioned matrix}}{figure.caption.4}{}}
+\newlabel{sub@fig:gradnorm-res-rel-wellll}{{b}{14}{\textit {Well-conditioned matrix}}{figure.caption.4}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.2}{\ignorespaces $\left \lVert \nabla f\right \rVert $\textit  {, Residual, Relative Error of L-BFGS execution on ill and well-conditioned matrices}}}{14}{figure.caption.4}\protected@file@percent }
+\newlabel{fig:gradnorm-res-rel}{{4.2}{14}{$\norm {\nabla f}$\textit {, Residual, Relative Error of L-BFGS execution on ill and well-conditioned matrices}}{figure.caption.4}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.3}{\ignorespaces \textit  {Line Search algorithms comparison}}}{15}{figure.caption.5}\protected@file@percent }
+\newlabel{fig:LS-comparison}{{4.3}{15}{\textit {Line Search algorithms comparison}}{figure.caption.5}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {4.3}Comparison between QR and L-BFGS}{15}{section.4.3}\protected@file@percent }
+\newlabel{fig:time-comparison-illcond-n}{{4.4a}{16}{\textit {Ill-conditioned matrix}}{figure.caption.6}{}}
+\newlabel{sub@fig:time-comparison-illcond-n}{{a}{16}{\textit {Ill-conditioned matrix}}{figure.caption.6}{}}
+\newlabel{fig:time-comparison-wellcond-n}{{4.4b}{16}{\textit {Well-conditioned matrix}}{figure.caption.6}{}}
+\newlabel{sub@fig:time-comparison-wellcond-n}{{b}{16}{\textit {Well-conditioned matrix}}{figure.caption.6}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.4}{\ignorespaces \textit  {Time and Memory scalability comparison of QR and L-BFGS on ill and well-conditioned matrices, varying \textbf  {n}}}}{16}{figure.caption.6}\protected@file@percent }
+\newlabel{fig:QRvsLBFGS-time-comparison-n}{{4.4}{16}{\textit {Time and Memory scalability comparison of QR and L-BFGS on ill and well-conditioned matrices, varying \textbf {n}}}{figure.caption.6}{}}
+\newlabel{fig:time-comparison-illcond-m}{{4.5a}{16}{\textit {Ill-conditioned matrix}}{figure.caption.7}{}}
+\newlabel{sub@fig:time-comparison-illcond-m}{{a}{16}{\textit {Ill-conditioned matrix}}{figure.caption.7}{}}
+\newlabel{fig:time-comparison-wellcond-m}{{4.5b}{16}{\textit {Well-conditioned matrix}}{figure.caption.7}{}}
+\newlabel{sub@fig:time-comparison-wellcond-m}{{b}{16}{\textit {Well-conditioned matrix}}{figure.caption.7}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.5}{\ignorespaces \textit  {Time and Memory scalability comparison of QR and L-BFGS on ill and well-conditioned matrices, varying \textbf  {m}}}}{16}{figure.caption.7}\protected@file@percent }
+\newlabel{fig:QRvsLBFGS-time-comparison-m}{{4.5}{16}{\textit {Time and Memory scalability comparison of QR and L-BFGS on ill and well-conditioned matrices, varying \textbf {m}}}{figure.caption.7}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {4.4}Other Experiments}{17}{section.4.4}\protected@file@percent }
+\newlabel{sec:other_experiments}{{4.4}{17}{Other Experiments}{section.4.4}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.4.1}The Effect of the Memory Size}{17}{subsection.4.4.1}\protected@file@percent }
+\newlabel{fig:error-memory-size-illcond}{{4.6a}{17}{\textit {Ill-conditioned matrix}}{figure.caption.8}{}}
+\newlabel{sub@fig:error-memory-size-illcond}{{a}{17}{\textit {Ill-conditioned matrix}}{figure.caption.8}{}}
+\newlabel{fig:error-memory-size-wellcond}{{4.6b}{17}{\textit {Well-conditioned matrix}}{figure.caption.8}{}}
+\newlabel{sub@fig:error-memory-size-wellcond}{{b}{17}{\textit {Well-conditioned matrix}}{figure.caption.8}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.6}{\ignorespaces \textit  {The effect of the memory size}}}{17}{figure.caption.8}\protected@file@percent }
+\newlabel{fig:error-memory-size}{{4.6}{17}{\textit {The effect of the memory size}}{figure.caption.8}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {4.4.2}A Comparison of Quasi-Newton Methods}{18}{subsection.4.4.2}\protected@file@percent }
+\newlabel{fig:BFGSvsLBFGS-rate}{{4.7a}{19}{\textit {Convergence rate}}{figure.caption.9}{}}
+\newlabel{sub@fig:BFGSvsLBFGS-rate}{{a}{19}{\textit {Convergence rate}}{figure.caption.9}{}}
+\newlabel{fig:BFGSvsLBFGS-scalability}{{4.7b}{19}{\textit {Time and memory scalability}}{figure.caption.9}{}}
+\newlabel{sub@fig:BFGSvsLBFGS-scalability}{{b}{19}{\textit {Time and memory scalability}}{figure.caption.9}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.7}{\ignorespaces \textit  {BFGS vs L-BFGS}}}{19}{figure.caption.9}\protected@file@percent }
+\newlabel{fig:BFGSvsLBFGS-comparison}{{4.7}{19}{\textit {BFGS vs L-BFGS}}{figure.caption.9}{}}
+\newlabel{fig:Quasi-newton-dogleg-time}{{4.8a}{20}{\textit {With Dogleg}}{figure.caption.10}{}}
+\newlabel{sub@fig:Quasi-newton-dogleg-time}{{a}{20}{\textit {With Dogleg}}{figure.caption.10}{}}
+\newlabel{fig:Quasi-newton-no-dogleg-time}{{4.8b}{20}{\textit {Without Dogleg}}{figure.caption.10}{}}
+\newlabel{sub@fig:Quasi-newton-no-dogleg-time}{{b}{20}{\textit {Without Dogleg}}{figure.caption.10}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.8}{\ignorespaces \textit  {Quasi-Newton methods running time comparison}}}{20}{figure.caption.10}\protected@file@percent }
+\newlabel{fig:Quasi-newton-time}{{4.8}{20}{\textit {Quasi-Newton methods running time comparison}}{figure.caption.10}{}}
+\@writefile{lof}{\contentsline {figure}{\numberline {4.9}{\ignorespaces \textit  {Memory allocation of the different Quasi-Newton methods}}}{20}{figure.caption.11}\protected@file@percent }
+\newlabel{fig:Quasi newton comparison memory}{{4.9}{20}{\textit {Memory allocation of the different Quasi-Newton methods}}{figure.caption.11}{}}
+\@setckpt{(4) - experiments/experiments}{
+\setcounter{page}{21}
+\setcounter{equation}{0}
+\setcounter{enumi}{3}
+\setcounter{enumii}{0}
+\setcounter{enumiii}{0}
+\setcounter{enumiv}{0}
+\setcounter{footnote}{0}
+\setcounter{mpfootnote}{0}
+\setcounter{part}{0}
+\setcounter{chapter}{4}
+\setcounter{section}{4}
+\setcounter{subsection}{2}
+\setcounter{subsubsection}{0}
+\setcounter{paragraph}{0}
+\setcounter{subparagraph}{0}
+\setcounter{figure}{9}
+\setcounter{table}{0}
+\setcounter{tabx@nest}{0}
+\setcounter{listtotal}{0}
+\setcounter{listcount}{0}
+\setcounter{liststart}{0}
+\setcounter{liststop}{0}
+\setcounter{citecount}{0}
+\setcounter{citetotal}{0}
+\setcounter{multicitecount}{0}
+\setcounter{multicitetotal}{0}
+\setcounter{instcount}{5}
+\setcounter{maxnames}{3}
+\setcounter{minnames}{3}
+\setcounter{maxitems}{3}
+\setcounter{minitems}{1}
+\setcounter{citecounter}{0}
+\setcounter{maxcitecounter}{0}
+\setcounter{savedcitecounter}{0}
+\setcounter{uniquelist}{0}
+\setcounter{uniquename}{0}
+\setcounter{refsection}{0}
+\setcounter{refsegment}{0}
+\setcounter{maxextratitle}{0}
+\setcounter{maxextratitleyear}{0}
+\setcounter{maxextraname}{0}
+\setcounter{maxextradate}{0}
+\setcounter{maxextraalpha}{0}
+\setcounter{abbrvpenalty}{50}
+\setcounter{highnamepenalty}{50}
+\setcounter{lownamepenalty}{25}
+\setcounter{maxparens}{3}
+\setcounter{parenlevel}{0}
+\setcounter{blx@maxsection}{0}
+\setcounter{mincomprange}{10}
+\setcounter{maxcomprange}{100000}
+\setcounter{mincompwidth}{1}
+\setcounter{afterword}{0}
+\setcounter{savedafterword}{0}
+\setcounter{annotator}{0}
+\setcounter{savedannotator}{0}
+\setcounter{author}{0}
+\setcounter{savedauthor}{0}
+\setcounter{bookauthor}{0}
+\setcounter{savedbookauthor}{0}
+\setcounter{commentator}{0}
+\setcounter{savedcommentator}{0}
+\setcounter{editor}{0}
+\setcounter{savededitor}{0}
+\setcounter{editora}{0}
+\setcounter{savededitora}{0}
+\setcounter{editorb}{0}
+\setcounter{savededitorb}{0}
+\setcounter{editorc}{0}
+\setcounter{savededitorc}{0}
+\setcounter{foreword}{0}
+\setcounter{savedforeword}{0}
+\setcounter{holder}{0}
+\setcounter{savedholder}{0}
+\setcounter{introduction}{0}
+\setcounter{savedintroduction}{0}
+\setcounter{namea}{0}
+\setcounter{savednamea}{0}
+\setcounter{nameb}{0}
+\setcounter{savednameb}{0}
+\setcounter{namec}{0}
+\setcounter{savednamec}{0}
+\setcounter{translator}{0}
+\setcounter{savedtranslator}{0}
+\setcounter{shortauthor}{0}
+\setcounter{savedshortauthor}{0}
+\setcounter{shorteditor}{0}
+\setcounter{savedshorteditor}{0}
+\setcounter{labelname}{0}
+\setcounter{savedlabelname}{0}
+\setcounter{institution}{0}
+\setcounter{savedinstitution}{0}
+\setcounter{lista}{0}
+\setcounter{savedlista}{0}
+\setcounter{listb}{0}
+\setcounter{savedlistb}{0}
+\setcounter{listc}{0}
+\setcounter{savedlistc}{0}
+\setcounter{listd}{0}
+\setcounter{savedlistd}{0}
+\setcounter{liste}{0}
+\setcounter{savedliste}{0}
+\setcounter{listf}{0}
+\setcounter{savedlistf}{0}
+\setcounter{location}{0}
+\setcounter{savedlocation}{0}
+\setcounter{organization}{0}
+\setcounter{savedorganization}{0}
+\setcounter{origlocation}{0}
+\setcounter{savedoriglocation}{0}
+\setcounter{origpublisher}{0}
+\setcounter{savedorigpublisher}{0}
+\setcounter{publisher}{0}
+\setcounter{savedpublisher}{0}
+\setcounter{language}{0}
+\setcounter{savedlanguage}{0}
+\setcounter{origlanguage}{0}
+\setcounter{savedoriglanguage}{0}
+\setcounter{pageref}{0}
+\setcounter{savedpageref}{0}
+\setcounter{textcitecount}{0}
+\setcounter{textcitetotal}{0}
+\setcounter{textcitemaxnames}{0}
+\setcounter{biburlbigbreakpenalty}{100}
+\setcounter{biburlbreakpenalty}{200}
+\setcounter{biburlnumpenalty}{0}
+\setcounter{biburlucpenalty}{0}
+\setcounter{biburllcpenalty}{0}
+\setcounter{smartand}{1}
+\setcounter{bbx:relatedcount}{0}
+\setcounter{bbx:relatedtotal}{0}
+\setcounter{cbx@tempcnta}{0}
+\setcounter{cbx@tempcntb}{2}
+\setcounter{cbx@tempcntc}{0}
+\setcounter{cbx@tempcntd}{-1}
+\setcounter{float@type}{4}
+\setcounter{parentequation}{0}
+\setcounter{ALG@line}{0}
+\setcounter{ALG@rem}{0}
+\setcounter{ALG@nested}{0}
+\setcounter{ALG@Lnr}{2}
+\setcounter{ALG@blocknr}{10}
+\setcounter{ALG@storecount}{0}
+\setcounter{ALG@tmpcounter}{0}
+\setcounter{thmt@dummyctr}{2}
+\setcounter{nlinenum}{0}
+\setcounter{caption@flags}{2}
+\setcounter{continuedfloat}{0}
+\setcounter{subfigure}{0}
+\setcounter{subtable}{0}
+\setcounter{section@level}{0}
+\setcounter{Item}{3}
+\setcounter{Hfootnote}{0}
+\setcounter{bookmark@seq@number}{0}
+\setcounter{AlgoLine}{11}
+\setcounter{algocfline}{4}
+\setcounter{algocfproc}{4}
+\setcounter{algocf}{4}
+\setcounter{mlemma}{0}
+}
--- a/experiments/experiments.tex
+++ b/experiments/experiments.tex
@ -0,0 +1,220 @@
+\chapter{Experiments}\label{ch: experiments}
+In this chapter we present the results of the experiments run on both algorithms as well as a comparison of the two methods in terms of accuracy and time scalability.
+
+To test the behaviour of the two methods we handle both cases in which the matrix $\hat{X}$ is well-conditioned, with $\kappa(\hat{X}) \approx 5$, and ill-conditioned, with $\kappa(\hat{X}) \approx 5 \times 10^5$. To accomplish so, we randomly generated the matrix $X$ forcing its values to be in the range $[-1, 1]$, the dimensions $m = 1000$ and $n = 20$ (except for time and memory scalability tests), and, as we have seen in \autoref{subsec:conditioning}, since we can control the conditioning directly with the \textit{hyperparameter} $\lambda$, we choose for the first case $\lambda = 10^{-4}$ and for the latter $\lambda = 10^{-12}$.
+
+For the QR factorization we check how the relative error and residual change with respect to different values of $\lambda$. Then, we confirm the backward stability of the decomposition over different values of $\lambda$ and check its forward stability as well.
+
+For what concerns L-BFGS we fix the relative tolerance $\epsilon = 10^{-14}$, the memory size $k = 7$ and the maximum number of function evaluations to $200$, knowing that the function we have to optimize can be easily optimized by the method. 
+
+The last kind of tests we present concerns the scalability of the methods in terms of time and memory, which has been compared by modifying the matrix $\hat{X} \in \mathbb{R}^{(m+n) \times m}$ by generating random matrices with increasing $m$ and $n$ separately. As mentioned before, we brought this experiment to the case in which $\hat{X}$ is either ill-conditioned or well-conditioned. In the case of the thin-QR factorization we expect a linear dependency between the number of rows and the time needed to converge to the optimal solution, assuming a fixed number of columns. If instead we vary the number of columns we expect a quadratic dependency.
+
+In \autoref{sec:other_experiments} we first explore better the effect of the memory size for L-BFGS and then we provide a deeper comparison of other Quasi-Newton methods we manually implemented (even if not really required by the project instructions).
+
+All tests have been executed with the benchmark library \texttt{BenchmarkTools.jl}\cite{BenchmarkTools} which ignores startup and compilation time and repeated 10 times in order to get accurate estimates.
+
+
+\section{QR}
+Since we know from theory that the QR decomposition is backward stable, we expect that $\frac{\norm{\hat{X} - Q R}}{\norm{\hat{X}}} \approx u$.
+Or more explicitly that for $QR = \hat{X} + \delta \hat{X}$, $\frac{\norm{\delta \hat{X}}}{\norm{\hat{X}}} = O(u)$. The results in \autoref{fig:QR-error-lambda} show a decreasing trend for relative error and residual when increasing $\lambda$ and hence decreasing the condition number $\kappa(\hat{X})$. The errors are acceptable even for the smallest lambda: $\lambda = 10^{-16}$, $\kappa(\hat{X}) \approx 5 \times 10^{15}$. The algorithm is backward stable as well, as it can be noticed from the green part of the plot.\\
+To check the forward error we QR-decomposed the original matrix $\hat{X}$ to get $Q$ and $R$ and then we perturbed it with a random matrix multiplied by a factor $\delta = 10^{-10}$. Then, we ran another QR-decomposition on the perturbed version of $\hat{X}$ to get the factors $\tilde{Q}$ and $\tilde{R}$. Finally, we evaluated $\norm{Q - \tilde{Q}}$ and $\frac{\norm{R - \tilde{R}}}{\norm{R}}$, that are both much larger, as reported in \autoref{fig:QR-forward}. The forward error on $Q$ is slightly worse than on $R$ due to its orthogonality property that needs to be maintained in the factorization, fixing $\kappa(\hat{X})$. However, we can see a generally decreasing trend of the forward error with respect to the condition number of the matrix $\hat{X}$.
+
+\begin{figure}[H]
+
+    \centering
+    \begin{subfigure}{0.48\linewidth}
+        \centering
+        \includegraphics[width=\linewidth]{(4) - experiments/images/QR-lambda-error.png} % chktex 8
+    \caption{\textit{QR decomposition errors and backward stability for different} $\lambda$}\label{fig:QR-error-lambda}
+
+    \end{subfigure}
+    \hspace{0cm}
+    \begin{subfigure}{0.48\linewidth}
+        \centering
+        \includegraphics[width=\linewidth]{(4) - experiments/images/QR-forward_error.png} % chktex 8
+        \caption{\textit{QR factorization forward stability on Q and R for different} $\lambda$}\label{fig:QR-forward}
+    \end{subfigure}
+    
+    \caption{\textit{Errors and scalability of the QR decomposition for different values of} $\lambda$}\label{fig:qrtests}
+\end{figure}
+
+
+\vspace{1em}%% insert computations (square matrices)
+
+% The least squares problem solved by QR factorization is also stable and we expect that the relative error $\frac{\norm{w - w^*}}{\norm{w^*}} = O(\kappa(\hat{X}) u)$ where $w$ is the solution found by the algorithm, $w^*$ is the optimal solution and $\kappa(\hat{X})$ is the condition number of the matrix $\hat{X}$.
+
+% \textbf{HERE IS} $O(u)$ \textbf{AND NOT} $O(\kappa(\hat{X}u))$ 
+
+% \vspace{1em}%% insert computations
+
+\newpage
+\section{L-BFGS}
+For the first experiment regarding this algorithm we compute the relative gap, the residual and the number of iterations employed by the algorithms to converge. The relative gap is defined as 
+\[
+\frac{\norm{w- w^*}}{\norm{w^*}}
+\]
+where $w$ is the solution found by our algorithm and $w^*$ is Julia's \textit{ground truth} coming from its standard linear system solver.\\
+The residual, instead, is defined as
+\[
+\frac{\norm{\hat{X}w - \hat{y}}}{\norm{\hat{y}}}
+\]
+The results are shown in \autoref{fig:gradnorm-res-rel}, satisfying constraints we imposed on $\kappa(\hat{X})$. It is evident from the plots that the convergence of the method is linear and that it is able to compute a relatively good solution in a small number of iterations.
+
+\begin{figure}[H]
+
+    \centering
+    \begin{subfigure}{0.48\linewidth}
+        \centering
+        \includegraphics[width=\linewidth]{(4) - experiments/images/LBFGS-iterations-gradient-ill.png} % chktex 8
+        \caption{\textit{Ill-conditioned matrix}}\label{fig:gradnorm-res-rel-ill}
+    \end{subfigure}
+    \hspace{0cm}
+    \begin{subfigure}{0.48\linewidth}
+        \centering
+        \includegraphics[width=\linewidth]{(4) - experiments/images/LBFGS-iterations-gradient-well.png} % chktex 8
+        \caption{\textit{Well-conditioned matrix}}\label{fig:gradnorm-res-rel-wellll}
+    \end{subfigure}
+    
+    \caption{$\norm{\nabla f}$\textit{, Residual, Relative Error of L-BFGS execution on ill and well-conditioned matrices}}\label{fig:gradnorm-res-rel}
+\end{figure}
+
+The other test we propose regards checking the convergence of the method, when using different line search algorithms. We checked how the gradient norm changes when using Exact Line Search and Armijo-Wolfe Line Search only on the well-conditioned matrix.
+
+\begin{figure}[htbp]
+    \centering
+    \includegraphics[width=0.75\linewidth]{(4) - experiments/images/LBFGS-LS-gradient-comparison.png} % chktex 8
+    \caption{\textit{Line Search algorithms comparison}}\label{fig:LS-comparison}
+\end{figure}
+
+From \autoref{fig:LS-comparison} we can notice that the exact line search behaves better than the inexact line search because of the nature of the function we are optimizing. AWLS computes a step size which may lead to instability, but does converge.
+
+\section{Comparison between QR and L-BFGS}
+The tests have been performed by fixing one between $m = 200$ and $n = 50$ and varying the other dimension from an initial value of $500$ to a value of $5500$, at intervals of $500$. The results of fixing $m$ and varying $n$ be summarized in \autoref{fig:QRvsLBFGS-time-comparison-n}, which shows a linear growth of running time with increasing $n$ for the QR decomposition and a better performance for L-BFGS, in both the ill and well-conditioned case.
+The allocated memory is consistent and on the same trend as the running time as expected.
+
+\begin{figure}[H]
+
+    \centering
+    \begin{subfigure}{0.48\linewidth}
+        \centering
+        \includegraphics[width=\linewidth]{(4) - experiments/images/QRvsLBFGS-scalability-time-illcond-n.png} % chktex 8
+        \caption{\textit{Ill-conditioned matrix}}\label{fig:time-comparison-illcond-n}
+    \end{subfigure}
+    \hspace{0cm}
+    \begin{subfigure}{0.48\linewidth}
+        \centering
+        \includegraphics[width=\linewidth]{(4) - experiments/images/QRvsLBFGS-scalability-time-wellcond-n.png} % chktex 8
+        \caption{\textit{Well-conditioned matrix}}\label{fig:time-comparison-wellcond-n}
+    \end{subfigure}
+    \caption{\textit{Time and Memory scalability comparison of QR and L-BFGS on ill and well-conditioned matrices, varying \textbf{n}}}\label{fig:QRvsLBFGS-time-comparison-n}
+\end{figure}
+
+Instead, if we fix $n$ and let $m$ vary, we get the following curves as shown in \autoref{fig:QRvsLBFGS-time-comparison-m}. Both the running time and the allocated memory of QR grows more or less quadratically with the number of columns, confirming what the theory suggests. 
+
+\begin{figure}[H]
+
+    \centering
+    \begin{subfigure}{0.48\linewidth}
+        \centering
+        \includegraphics[width=\linewidth]{(4) - experiments/images/QRvsLBFGS-scalability-time-illcond-m.png} % chktex 8
+        \caption{\textit{Ill-conditioned matrix}}\label{fig:time-comparison-illcond-m}
+    \end{subfigure}
+    \hspace{0cm}
+    \begin{subfigure}{0.48\linewidth}
+        \centering
+        \includegraphics[width=\linewidth]{(4) - experiments/images/QRvsLBFGS-scalability-time-wellcond-m.png} % chktex 8
+        \caption{\textit{Well-conditioned matrix}}\label{fig:time-comparison-wellcond-m}
+    \end{subfigure}
+    \caption{\textit{Time and Memory scalability comparison of QR and L-BFGS on ill and well-conditioned matrices, varying \textbf{m}}}\label{fig:QRvsLBFGS-time-comparison-m}
+\end{figure}
+
+ For QR the allocated memory is in the order of MiB even in the worst case while L-BFGS allocates much less memory, in the order of KiB.
+
+The conditioning of the matrix has no impact on the time taken to compute a solution for the two algorithms compared, but rather has an impact for L-BFGS in the quality of the solution when dealing with a very flat function (small $\lambda)$. When the function is flat it means that its curvature is low and the gradients change slowly, so the algorithm struggles to rapidly descent towards the minimum with a reasonable relative error.  
+
+\section{Other Experiments}
+\label{sec:other_experiments}
+\subsection{The Effect of the Memory Size}
+It is interesting to check the behaviour of L-BFGS when changing the memory size. We compare the relative error decrease at each iteration with a memory size that varies from $1$ to $11$, as shown in \autoref{fig:error-memory-size}:
+\begin{figure}[H]
+
+    \centering
+    \begin{subfigure}{0.48\linewidth}
+        \centering
+        \includegraphics[width=\linewidth]{(4) - experiments/images/LBFGS-iterations-memory-ill.png} % chktex 8
+        \caption{\textit{Ill-conditioned matrix}}\label{fig:error-memory-size-illcond}
+    \end{subfigure}
+    \hspace{0cm}
+    \begin{subfigure}{0.48\linewidth}
+        \centering
+        \includegraphics[width=\linewidth]{(4) - experiments/images/LBFGS-iterations-memory-well.png} % chktex 8
+        \caption{\textit{Well-conditioned matrix}}\label{fig:error-memory-size-wellcond}
+    \end{subfigure}
+    \caption{\textit{The effect of the memory size}}\label{fig:error-memory-size}
+
+\end{figure}
+
+In accordance with the suggestions provided by \cite{Numerical-Optimization-2006}, the memory size $k$ should be chosen such that $3 \leq k \leq 20$, as it is empirically a good trade-off between number of function evaluations and number of additional operations required to reconstruct the hessian with the two loop formula (\autoref{algo: L-BFGS Two-Loop Recursion}). However, since the function that has to be optimized is quadratic, the algorithm is fast at finding the optimal solution more or less independently of the memory size, but depends still on the curvature $\kappa(\hat{X})$ for the convergence.
+When the memory size is $1$, the algorithm is a normal gradient descent and still reaches similar convergence rate with respect to higher memory sizes. For higher memory sizes the convergence rate is almost indistinguishable for the well-conditioned case (\autoref{fig:error-memory-size-wellcond}). However, for the ill-conditioned case, shown in \autoref{fig:error-memory-size-illcond}, the algorithm can still converge in $16$ iterations without depending on the memory size $k$, but the relative error is constant in each different setting. This is the consequence of the fact that the algorithm was terminating in a flatter region in which the curvature is so low that satisfies the stopping criterion imposed on the gradient, but with a bad approximation of the optimum.
+
+\subsection{A Comparison of Quasi-Newton Methods}
+To further check the behaviour of our implementation of L-BFGS, we implemented and tested a version of BFGS. In the beginning of this section we provide two additional tests, performed only on well-conditioned matrices and in which we compare the two solvers together. As far as the setup is concerned, we stick to the default setup stated in \autoref{ch: experiments} and for BFGS we set the tolerance to the same as L-BFGS. The first test is shown in \autoref{fig:BFGSvsLBFGS-rate} and shows how for the least squares problem the two algorithms are almost identical in terms of convergence rate. In the plot we have the relative error, residual and gradient norm to be almost equal between the two algorithms. To understand deeply and check the differences in the implementation, we also checked the time and memory scalability.
+It is not surprising that, as \autoref{fig:BFGSvsLBFGS-scalability} suggests, BFGS is much slower in finding the optimum than its limited version, even for this small optimization problem.
+This aspect reflects the theory and confirms that this method is more expensive in terms of time and memory with respect to L-BFGS.
+
+\begin{figure}[H]
+
+    \centering
+    \begin{subfigure}{0.48\linewidth}
+        \centering
+        \includegraphics[width=\linewidth]{(4) - experiments/images/BFGS-LBFGS-gradient-comparison.png} % chktex 8
+        \caption{\textit{Convergence rate}}\label{fig:BFGSvsLBFGS-rate}
+    \end{subfigure}
+    \hspace{0cm}
+    \begin{subfigure}{0.46\linewidth}
+        \centering
+        \includegraphics[width=\linewidth]{(4) - experiments/images/BFGSvsLBFGS-time-m.png} % chktex 8
+        \caption{\textit{Time and memory scalability}}\label{fig:BFGSvsLBFGS-scalability}
+    \end{subfigure}
+    \caption{\textit{BFGS vs L-BFGS}}\label{fig:BFGSvsLBFGS-comparison}
+
+\end{figure}
+
+To enhance and point out better the effectiveness of the implementation, we analyze other Quasi-Newton methods. We provide a final comparison between a relevant subset, in particular the final comparison consists of confronting L-BFGS, BFGS, DFP and SR1. Both DFP and BFGS are tested in its variants with the Dogleg alternative to line search as well \cite{Dogleg}.
+These algorithms have been implemented and finally optimized in terms of efficiency in memory allocations, since they are prone to huge memory allocation.  
+
+The plot in \autoref{fig:Quasi-newton-time} shows the running time of the algorithms on growing size well-conditioned matrices. In \autoref{fig:Quasi-newton-dogleg-time}, we can see that combining the update formula (BFGS and DFP) with the Dogleg (trust region) brings a lot of inefficiency in finding the minimum of that region since with line search an exact solution is used.
+In the plot in \autoref{fig:Quasi-newton-no-dogleg-time} we can have a clearer visualization of the difference in efficiency between the methods, since the running time when using the Dogleg is much higher and worsens the plot. In particular, the results stick with the theory from which we expect exact line search to be better than dogleg method for finding appropriate steps; we expect also for L-BFGS to be the fastest method, followed by BFGS and SR1 that are almost equally efficient on average. The slowest is DFP that is more than twice slower than the two previously mentioned algorithms.
+
+
+\begin{figure}[H]
+
+    \centering
+    \begin{subfigure}{0.48\linewidth}
+        \centering
+        \includegraphics[width=\linewidth]{(4) - experiments/images/Quasi-Newton-Comparison-time-wellcond-Dogleg.png} 
+        \caption{\textit{With Dogleg}}
+        \label{fig:Quasi-newton-dogleg-time}
+    \end{subfigure}
+    \hspace{0cm}
+    \begin{subfigure}{0.48\linewidth}
+        \centering
+        \includegraphics[width=\linewidth]{(4) - experiments/images/Quasi-Newton-Comparison-time-wellcond.png}
+        \caption{\textit{Without Dogleg}}
+        \label{fig:Quasi-newton-no-dogleg-time}
+    \end{subfigure}
+    \caption{\textit{Quasi-Newton methods running time comparison}}\label{fig:Quasi-newton-time}
+
+\end{figure}
+
+The last test regards the memory allocation provided by the algorithms. This test is the equivalent of the time scalability, but the metric is the number of allocated bytes on average by the algorithms.
+Our implementation has been optimized as much as possible, for instance by using Julia's in-place operators in order to minimize the number of allocations.
+The last plot we display, in \autoref{fig:Quasi newton comparison memory}, shows the trend of increasing allocated bytes by the algorithms.
+Methods that converge more slowly or use more memory per iteration, by using more complex update rules, perform worse.
+
+\begin{figure}[htbp]
+    \centering
+    \includegraphics[width=0.65\linewidth]{(4) - experiments/images/Quasi-Newton-Comparison-memory-wellcond.png}
+    \caption{\textit{Memory allocation of the different Quasi-Newton methods}}
+    \label{fig:Quasi newton comparison memory}
+\end{figure}
--- a/experiments/images/BFGS-LBFGS-gradient-comparison.png
+++ b/experiments/images/BFGS-LBFGS-gradient-comparison.png
--- a/experiments/images/BFGSvsLBFGS-time-m.png
+++ b/experiments/images/BFGSvsLBFGS-time-m.png
--- a/experiments/images/LBFGS-LS-gradient-comparison.png
+++ b/experiments/images/LBFGS-LS-gradient-comparison.png
--- a/experiments/images/LBFGS-iterations-gradient-ill.png
+++ b/experiments/images/LBFGS-iterations-gradient-ill.png
--- a/experiments/images/LBFGS-iterations-gradient-well.png
+++ b/experiments/images/LBFGS-iterations-gradient-well.png
--- a/experiments/images/LBFGS-iterations-memory-ill.png
+++ b/experiments/images/LBFGS-iterations-memory-ill.png
--- a/experiments/images/LBFGS-iterations-memory-well.png
+++ b/experiments/images/LBFGS-iterations-memory-well.png
--- a/experiments/images/QR-forward_error.png
+++ b/experiments/images/QR-forward_error.png
--- a/experiments/images/QR-lambda-error.png
+++ b/experiments/images/QR-lambda-error.png
--- a/experiments/images/QRvsLBFGS-scalability-time-illcond-m.png
+++ b/experiments/images/QRvsLBFGS-scalability-time-illcond-m.png
--- a/experiments/images/QRvsLBFGS-scalability-time-illcond-n.png
+++ b/experiments/images/QRvsLBFGS-scalability-time-illcond-n.png
--- a/experiments/images/QRvsLBFGS-scalability-time-wellcond-m.png
+++ b/experiments/images/QRvsLBFGS-scalability-time-wellcond-m.png
--- a/experiments/images/QRvsLBFGS-scalability-time-wellcond-n.png
+++ b/experiments/images/QRvsLBFGS-scalability-time-wellcond-n.png
--- a/experiments/images/Quasi-Newton-Comparison-memory-wellcond.png
+++ b/experiments/images/Quasi-Newton-Comparison-memory-wellcond.png
--- a/experiments/images/Quasi-Newton-Comparison-time-wellcond-Dogleg.png
+++ b/experiments/images/Quasi-Newton-Comparison-time-wellcond-Dogleg.png
--- a/experiments/images/Quasi-Newton-Comparison-time-wellcond.png
+++ b/experiments/images/Quasi-Newton-Comparison-time-wellcond.png
--- a/conclusion/conclusion.aux
+++ b/conclusion/conclusion.aux
@ -0,0 +1,168 @@
+\relax 
+\providecommand\hyper@newdestlabel[2]{}
+\@writefile{toc}{\contentsline {chapter}{\numberline {5}Concluding Remarks}{21}{chapter.5}\protected@file@percent }
+\@writefile{lof}{\addvspace {10\p@ }}
+\@writefile{lot}{\addvspace {10\p@ }}
+\@writefile{loa}{\addvspace {10\p@ }}
+\newlabel{ch: conclusion}{{5}{21}{Concluding Remarks}{chapter.5}{}}
+\@setckpt{(6) - conclusion/conclusion}{
+\setcounter{page}{22}
+\setcounter{equation}{0}
+\setcounter{enumi}{3}
+\setcounter{enumii}{0}
+\setcounter{enumiii}{0}
+\setcounter{enumiv}{0}
+\setcounter{footnote}{0}
+\setcounter{mpfootnote}{0}
+\setcounter{part}{0}
+\setcounter{chapter}{5}
+\setcounter{section}{0}
+\setcounter{subsection}{0}
+\setcounter{subsubsection}{0}
+\setcounter{paragraph}{0}
+\setcounter{subparagraph}{0}
+\setcounter{figure}{0}
+\setcounter{table}{0}
+\setcounter{tabx@nest}{0}
+\setcounter{listtotal}{0}
+\setcounter{listcount}{0}
+\setcounter{liststart}{0}
+\setcounter{liststop}{0}
+\setcounter{citecount}{0}
+\setcounter{citetotal}{0}
+\setcounter{multicitecount}{0}
+\setcounter{multicitetotal}{0}
+\setcounter{instcount}{5}
+\setcounter{maxnames}{3}
+\setcounter{minnames}{3}
+\setcounter{maxitems}{3}
+\setcounter{minitems}{1}
+\setcounter{citecounter}{0}
+\setcounter{maxcitecounter}{0}
+\setcounter{savedcitecounter}{0}
+\setcounter{uniquelist}{0}
+\setcounter{uniquename}{0}
+\setcounter{refsection}{0}
+\setcounter{refsegment}{0}
+\setcounter{maxextratitle}{0}
+\setcounter{maxextratitleyear}{0}
+\setcounter{maxextraname}{0}
+\setcounter{maxextradate}{0}
+\setcounter{maxextraalpha}{0}
+\setcounter{abbrvpenalty}{50}
+\setcounter{highnamepenalty}{50}
+\setcounter{lownamepenalty}{25}
+\setcounter{maxparens}{3}
+\setcounter{parenlevel}{0}
+\setcounter{blx@maxsection}{0}
+\setcounter{mincomprange}{10}
+\setcounter{maxcomprange}{100000}
+\setcounter{mincompwidth}{1}
+\setcounter{afterword}{0}
+\setcounter{savedafterword}{0}
+\setcounter{annotator}{0}
+\setcounter{savedannotator}{0}
+\setcounter{author}{0}
+\setcounter{savedauthor}{0}
+\setcounter{bookauthor}{0}
+\setcounter{savedbookauthor}{0}
+\setcounter{commentator}{0}
+\setcounter{savedcommentator}{0}
+\setcounter{editor}{0}
+\setcounter{savededitor}{0}
+\setcounter{editora}{0}
+\setcounter{savededitora}{0}
+\setcounter{editorb}{0}
+\setcounter{savededitorb}{0}
+\setcounter{editorc}{0}
+\setcounter{savededitorc}{0}
+\setcounter{foreword}{0}
+\setcounter{savedforeword}{0}
+\setcounter{holder}{0}
+\setcounter{savedholder}{0}
+\setcounter{introduction}{0}
+\setcounter{savedintroduction}{0}
+\setcounter{namea}{0}
+\setcounter{savednamea}{0}
+\setcounter{nameb}{0}
+\setcounter{savednameb}{0}
+\setcounter{namec}{0}
+\setcounter{savednamec}{0}
+\setcounter{translator}{0}
+\setcounter{savedtranslator}{0}
+\setcounter{shortauthor}{0}
+\setcounter{savedshortauthor}{0}
+\setcounter{shorteditor}{0}
+\setcounter{savedshorteditor}{0}
+\setcounter{labelname}{0}
+\setcounter{savedlabelname}{0}
+\setcounter{institution}{0}
+\setcounter{savedinstitution}{0}
+\setcounter{lista}{0}
+\setcounter{savedlista}{0}
+\setcounter{listb}{0}
+\setcounter{savedlistb}{0}
+\setcounter{listc}{0}
+\setcounter{savedlistc}{0}
+\setcounter{listd}{0}
+\setcounter{savedlistd}{0}
+\setcounter{liste}{0}
+\setcounter{savedliste}{0}
+\setcounter{listf}{0}
+\setcounter{savedlistf}{0}
+\setcounter{location}{0}
+\setcounter{savedlocation}{0}
+\setcounter{organization}{0}
+\setcounter{savedorganization}{0}
+\setcounter{origlocation}{0}
+\setcounter{savedoriglocation}{0}
+\setcounter{origpublisher}{0}
+\setcounter{savedorigpublisher}{0}
+\setcounter{publisher}{0}
+\setcounter{savedpublisher}{0}
+\setcounter{language}{0}
+\setcounter{savedlanguage}{0}
+\setcounter{origlanguage}{0}
+\setcounter{savedoriglanguage}{0}
+\setcounter{pageref}{0}
+\setcounter{savedpageref}{0}
+\setcounter{textcitecount}{0}
+\setcounter{textcitetotal}{0}
+\setcounter{textcitemaxnames}{0}
+\setcounter{biburlbigbreakpenalty}{100}
+\setcounter{biburlbreakpenalty}{200}
+\setcounter{biburlnumpenalty}{0}
+\setcounter{biburlucpenalty}{0}
+\setcounter{biburllcpenalty}{0}
+\setcounter{smartand}{1}
+\setcounter{bbx:relatedcount}{0}
+\setcounter{bbx:relatedtotal}{0}
+\setcounter{cbx@tempcnta}{0}
+\setcounter{cbx@tempcntb}{2}
+\setcounter{cbx@tempcntc}{0}
+\setcounter{cbx@tempcntd}{-1}
+\setcounter{float@type}{4}
+\setcounter{parentequation}{0}
+\setcounter{ALG@line}{0}
+\setcounter{ALG@rem}{0}
+\setcounter{ALG@nested}{0}
+\setcounter{ALG@Lnr}{2}
+\setcounter{ALG@blocknr}{10}
+\setcounter{ALG@storecount}{0}
+\setcounter{ALG@tmpcounter}{0}
+\setcounter{thmt@dummyctr}{2}
+\setcounter{nlinenum}{0}
+\setcounter{caption@flags}{2}
+\setcounter{continuedfloat}{0}
+\setcounter{subfigure}{0}
+\setcounter{subtable}{0}
+\setcounter{section@level}{0}
+\setcounter{Item}{3}
+\setcounter{Hfootnote}{0}
+\setcounter{bookmark@seq@number}{0}
+\setcounter{AlgoLine}{11}
+\setcounter{algocfline}{4}
+\setcounter{algocfproc}{4}
+\setcounter{algocf}{4}
+\setcounter{mlemma}{0}
+}
--- a/conclusion/conclusion.tex
+++ b/conclusion/conclusion.tex
@ -0,0 +1,5 @@
+\chapter{Concluding Remarks}\label{ch: conclusion}
+
+An implementation of the thin-QR factorization and limited memory BFGS has been presented, in particular with exact line search in order to solve more efficiently the least squares problem. Convergence for both methods have been proven and tested.
+An implementation of BFGS and DFP, with both exact line search and trust region method using the dogleg method, and SR1 have been implemented and tested.
+From the experiments it is pretty clear that L-BFGS is better than all other Quasi-Newton methods for solving the least squares problem, even when using a small parameter for the memory. Instead the QR method performs better when solving for ill-conditioned matrices, but the memory usage is higher.
--- a/proofs/proofs.aux
+++ b/proofs/proofs.aux
@ -0,0 +1,179 @@
+\relax 
+\providecommand\hyper@newdestlabel[2]{}
+\@writefile{toc}{\contentsline {chapter}{\numberline {6}Proofs}{22}{chapter.6}\protected@file@percent }
+\@writefile{lof}{\addvspace {10\p@ }}
+\@writefile{lot}{\addvspace {10\p@ }}
+\@writefile{loa}{\addvspace {10\p@ }}
+\newlabel{ch: proofs}{{6}{22}{Proofs}{chapter.6}{}}
+\@writefile{loe}{\addvspace {10\p@ }}
+\@writefile{loe}{\contentsline {mlemma}{\ifthmt@listswap Lemma~1\else \numberline {1}Lemma\fi }{22}{mlemma.1}\protected@file@percent }
+\newlabel{proofs: fullcolumn}{{1}{22}{}{mlemma.1}{}}
+\newlabel{proof:triangularsystem}{{1}{22}{}{mlemma.1}{}}
+\@writefile{loe}{\contentsline {mproof}{\ifthmt@listswap \else \numberline {\let \autodot \@empty }\fi Proof}{22}{thmt@dummyctr.dummy.4}\protected@file@percent }
+\@writefile{loe}{\contentsline {mlemma}{\ifthmt@listswap Lemma~2\else \numberline {2}Lemma\fi }{22}{mlemma.2}\protected@file@percent }
+\newlabel{proof:eigenvalues_translation}{{2}{22}{}{mlemma.2}{}}
+\@writefile{loe}{\contentsline {mproof}{\ifthmt@listswap \else \numberline {\let \autodot \@empty }\fi Proof}{22}{thmt@dummyctr.dummy.6}\protected@file@percent }
+\@writefile{loe}{\contentsline {mlemma}{\ifthmt@listswap Lemma~3\else \numberline {3}Lemma\fi }{22}{mlemma.3}\protected@file@percent }
+\newlabel{proofs: eigenvalues}{{3}{22}{}{mlemma.3}{}}
+\@writefile{loe}{\contentsline {mproof}{\ifthmt@listswap \else \numberline {\let \autodot \@empty }\fi Proof}{22}{thmt@dummyctr.dummy.8}\protected@file@percent }
+\@setckpt{(7) - proofs/proofs}{
+\setcounter{page}{23}
+\setcounter{equation}{0}
+\setcounter{enumi}{3}
+\setcounter{enumii}{0}
+\setcounter{enumiii}{0}
+\setcounter{enumiv}{0}
+\setcounter{footnote}{0}
+\setcounter{mpfootnote}{0}
+\setcounter{part}{0}
+\setcounter{chapter}{6}
+\setcounter{section}{0}
+\setcounter{subsection}{0}
+\setcounter{subsubsection}{0}
+\setcounter{paragraph}{0}
+\setcounter{subparagraph}{0}
+\setcounter{figure}{0}
+\setcounter{table}{0}
+\setcounter{tabx@nest}{0}
+\setcounter{listtotal}{0}
+\setcounter{listcount}{0}
+\setcounter{liststart}{0}
+\setcounter{liststop}{0}
+\setcounter{citecount}{0}
+\setcounter{citetotal}{0}
+\setcounter{multicitecount}{0}
+\setcounter{multicitetotal}{0}
+\setcounter{instcount}{5}
+\setcounter{maxnames}{3}
+\setcounter{minnames}{3}
+\setcounter{maxitems}{3}
+\setcounter{minitems}{1}
+\setcounter{citecounter}{0}
+\setcounter{maxcitecounter}{0}
+\setcounter{savedcitecounter}{0}
+\setcounter{uniquelist}{0}
+\setcounter{uniquename}{0}
+\setcounter{refsection}{0}
+\setcounter{refsegment}{0}
+\setcounter{maxextratitle}{0}
+\setcounter{maxextratitleyear}{0}
+\setcounter{maxextraname}{0}
+\setcounter{maxextradate}{0}
+\setcounter{maxextraalpha}{0}
+\setcounter{abbrvpenalty}{50}
+\setcounter{highnamepenalty}{50}
+\setcounter{lownamepenalty}{25}
+\setcounter{maxparens}{3}
+\setcounter{parenlevel}{0}
+\setcounter{blx@maxsection}{0}
+\setcounter{mincomprange}{10}
+\setcounter{maxcomprange}{100000}
+\setcounter{mincompwidth}{1}
+\setcounter{afterword}{0}
+\setcounter{savedafterword}{0}
+\setcounter{annotator}{0}
+\setcounter{savedannotator}{0}
+\setcounter{author}{0}
+\setcounter{savedauthor}{0}
+\setcounter{bookauthor}{0}
+\setcounter{savedbookauthor}{0}
+\setcounter{commentator}{0}
+\setcounter{savedcommentator}{0}
+\setcounter{editor}{0}
+\setcounter{savededitor}{0}
+\setcounter{editora}{0}
+\setcounter{savededitora}{0}
+\setcounter{editorb}{0}
+\setcounter{savededitorb}{0}
+\setcounter{editorc}{0}
+\setcounter{savededitorc}{0}
+\setcounter{foreword}{0}
+\setcounter{savedforeword}{0}
+\setcounter{holder}{0}
+\setcounter{savedholder}{0}
+\setcounter{introduction}{0}
+\setcounter{savedintroduction}{0}
+\setcounter{namea}{0}
+\setcounter{savednamea}{0}
+\setcounter{nameb}{0}
+\setcounter{savednameb}{0}
+\setcounter{namec}{0}
+\setcounter{savednamec}{0}
+\setcounter{translator}{0}
+\setcounter{savedtranslator}{0}
+\setcounter{shortauthor}{0}
+\setcounter{savedshortauthor}{0}
+\setcounter{shorteditor}{0}
+\setcounter{savedshorteditor}{0}
+\setcounter{labelname}{0}
+\setcounter{savedlabelname}{0}
+\setcounter{institution}{0}
+\setcounter{savedinstitution}{0}
+\setcounter{lista}{0}
+\setcounter{savedlista}{0}
+\setcounter{listb}{0}
+\setcounter{savedlistb}{0}
+\setcounter{listc}{0}
+\setcounter{savedlistc}{0}
+\setcounter{listd}{0}
+\setcounter{savedlistd}{0}
+\setcounter{liste}{0}
+\setcounter{savedliste}{0}
+\setcounter{listf}{0}
+\setcounter{savedlistf}{0}
+\setcounter{location}{0}
+\setcounter{savedlocation}{0}
+\setcounter{organization}{0}
+\setcounter{savedorganization}{0}
+\setcounter{origlocation}{0}
+\setcounter{savedoriglocation}{0}
+\setcounter{origpublisher}{0}
+\setcounter{savedorigpublisher}{0}
+\setcounter{publisher}{0}
+\setcounter{savedpublisher}{0}
+\setcounter{language}{0}
+\setcounter{savedlanguage}{0}
+\setcounter{origlanguage}{0}
+\setcounter{savedoriglanguage}{0}
+\setcounter{pageref}{0}
+\setcounter{savedpageref}{0}
+\setcounter{textcitecount}{0}
+\setcounter{textcitetotal}{0}
+\setcounter{textcitemaxnames}{0}
+\setcounter{biburlbigbreakpenalty}{100}
+\setcounter{biburlbreakpenalty}{200}
+\setcounter{biburlnumpenalty}{0}
+\setcounter{biburlucpenalty}{0}
+\setcounter{biburllcpenalty}{0}
+\setcounter{smartand}{1}
+\setcounter{bbx:relatedcount}{0}
+\setcounter{bbx:relatedtotal}{0}
+\setcounter{cbx@tempcnta}{0}
+\setcounter{cbx@tempcntb}{2}
+\setcounter{cbx@tempcntc}{0}
+\setcounter{cbx@tempcntd}{-1}
+\setcounter{float@type}{4}
+\setcounter{parentequation}{0}
+\setcounter{ALG@line}{0}
+\setcounter{ALG@rem}{0}
+\setcounter{ALG@nested}{0}
+\setcounter{ALG@Lnr}{2}
+\setcounter{ALG@blocknr}{10}
+\setcounter{ALG@storecount}{0}
+\setcounter{ALG@tmpcounter}{0}
+\setcounter{thmt@dummyctr}{8}
+\setcounter{nlinenum}{0}
+\setcounter{caption@flags}{2}
+\setcounter{continuedfloat}{0}
+\setcounter{subfigure}{0}
+\setcounter{subtable}{0}
+\setcounter{section@level}{0}
+\setcounter{Item}{3}
+\setcounter{Hfootnote}{0}
+\setcounter{bookmark@seq@number}{0}
+\setcounter{AlgoLine}{11}
+\setcounter{algocfline}{4}
+\setcounter{algocfproc}{4}
+\setcounter{algocf}{4}
+\setcounter{mlemma}{3}
+}
--- a/proofs/proofs.tex
+++ b/proofs/proofs.tex
@ -0,0 +1,52 @@
+\chapter{Proofs}\label{ch: proofs}
+
+\begin{mlemma}\label{proofs: fullcolumn}\label{proof:triangularsystem}
+    $\hat{X} \text{ has full column rank } \iff \hat{X}^T\hat{X} \succ 0$
+\end{mlemma}
+
+\begin{mproof}
+To show that $\hat{X}^T\hat{X} = XX^T + \lambda^2 I_m \succ 0$, $\lambda > 0$, we can consider the quadratic form $x^T (X^{T}X + \lambda^2 I_m) x$. Let $B = XX^T \succeq 0$. 
+\begin{align*}
+    x^T (B + \lambda^2 I_m) x &= x^{T}Bx + \lambda^2 x^{T}I_{m}x \\
+    &= x^{T}Bx + \lambda^2 \norm{x}^2
+\end{align*}
+
+Since $B$ is positive semidefinite, we have $x^{T}Bx \geq 0\ \ \forall x \in \mathbb{R}^m$. Additionally, $\lambda^2 \|x\|^2 > 0\ \ \forall x \neq 0$. Therefore, $x^T (B + \lambda^2 I_m) x > 0$ for all non-zero vectors $x$, meaning that $\hat{X}^T\hat{X} \succ 0$.
+
+\end{mproof}
+
+\begin{mlemma}\label{proof:eigenvalues_translation}
+    $\alpha \in Sp(A) \iff (\alpha + \lambda) \in Sp(A + \lambda I)$
+\end{mlemma}
+
+\begin{mproof}
+    $Av = \alpha v \iff (A + \lambda I)v = Av + \lambda v = \alpha v + \lambda v = (\alpha + \lambda)v$
+\end{mproof}
+
+\begin{mlemma}\label{proofs: eigenvalues}
+    The singular values of the matrix $XX^T, X \in \mathbb{R}^{m \times n}$\newline are $\{\sigma_1^2 \dots \sigma_n^2, 0 \dots 0\}$, with $\sigma_1 \dots \sigma_n$ being the singular values of $X$.
+\end{mlemma}
+
+\begin{mproof}
+    Consider the Singular Value Decomposition of the rank $n$ matrix $X$
+    \[
+    X = U \Sigma V^T
+    \]
+    $\Sigma = \diag(\sigma_1, \dots, \sigma_n) \in \mathbb{R}^{m \times n}
+    $
+    Then
+    \[
+    XX^T = U \Sigma V^T V \Sigma^T U^T = U \Sigma \Sigma^T U
+    \]
+    with
+    \[
+    \Sigma \Sigma^T = \diag(\sigma_1^2, \dots, \sigma_n^2, 0, \dots, 0) \in \mathbb{R}^{m \times m}
+    \]
+    Hence, $XX^T$ has exactly $m$ singular values of which $m-n$ are zeros.
+\end{mproof}
+
+%%% Local Variables:
+%%% mode: latex
+%%% TeX-master: "../main"
+%%% TeX-command-extra-options: "-shell-escape"
+%%% End:
--- a/Report/auto/main.el
+++ b/Report/auto/main.el
@ -0,0 +1,102 @@
+;; -*- lexical-binding: t; -*-
+
+(TeX-add-style-hook
+ "main"
+ (lambda ()
+   (setq TeX-command-extra-options
+         "-shell-escape")
+   (TeX-add-to-alist 'LaTeX-provided-class-options
+                     '(("report" "12pt")))
+   (TeX-add-to-alist 'LaTeX-provided-package-options
+                     '(("geometry" "headheight=20pt" "centering") ("inputenc" "utf8") ("babel" "english") ("scrlayer-scrpage" "") ("biblatex" "backend=biber" "style=ieee" "sorting=ynt") ("import" "") ("graphicx" "") ("float" "") ("amssymb" "") ("amsmath" "") ("tabularray" "") ("booktabs" "") ("mathrsfs" "") ("mathtools" "") ("algpseudocode" "") ("amsthm" "") ("thmtools" "") ("pgfplots" "") ("tikz" "") ("enumitem" "") ("color" "") ("xcolor" "") ("colortbl" "") ("makecell" "") ("cancel" "") ("pgfornament" "") ("multicol" "") ("subcaption" "") ("bold-extra" "") ("csquotes" "") ("hyperref" "hidelinks") ("bookmark" "") ("algorithm2e" "ruled" "vlined" "linesnumbered") ("titlesec" "") ("setspace" "")))
+   (add-to-list 'LaTeX-verbatim-environments-local "minted")
+   (add-to-list 'LaTeX-verbatim-macros-with-braces-local "href")
+   (add-to-list 'LaTeX-verbatim-macros-with-braces-local "hyperimage")
+   (add-to-list 'LaTeX-verbatim-macros-with-braces-local "hyperbaseurl")
+   (add-to-list 'LaTeX-verbatim-macros-with-braces-local "nolinkurl")
+   (add-to-list 'LaTeX-verbatim-macros-with-braces-local "url")
+   (add-to-list 'LaTeX-verbatim-macros-with-braces-local "path")
+   (add-to-list 'LaTeX-verbatim-macros-with-delims-local "path")
+   (TeX-run-style-hooks
+    "latex2e"
+    "(1) - introduction/introduction"
+    "(2) - problem definition/problem definition"
+    "(3) - algorithms/algorithms"
+    "(4) - experiments/experiments"
+    "(6) - conclusion/conclusion"
+    "(7) - proofs/proofs"
+    "report"
+    "rep12"
+    "geometry"
+    "inputenc"
+    "babel"
+    "scrlayer-scrpage"
+    "biblatex"
+    "import"
+    "graphicx"
+    "float"
+    "amssymb"
+    "amsmath"
+    "tabularray"
+    "booktabs"
+    "mathrsfs"
+    "mathtools"
+    "algpseudocode"
+    "amsthm"
+    "thmtools"
+    "pgfplots"
+    "tikz"
+    "enumitem"
+    "color"
+    "xcolor"
+    "colortbl"
+    "makecell"
+    "cancel"
+    "pgfornament"
+    "multicol"
+    "subcaption"
+    "bold-extra"
+    "csquotes"
+    "hyperref"
+    "bookmark"
+    "algorithm2e"
+    "titlesec"
+    "setspace")
+   (TeX-add-symbols
+    '("ieb" ["argument"] 1)
+    '("thend" 1)
+    '("norm" 1)
+    '("vec" 1)
+    "zerodisplayskips"
+    "lgt"
+    "gry"
+    "rd"
+    "logoA"
+    "smallLogoA"
+    "course"
+    "assignment"
+    "authors"
+    "emails"
+    "universityA"
+    "academicYear"
+    "department")
+   (LaTeX-add-environments
+    "olditemize")
+   (LaTeX-add-bibliographies
+    "document")
+   (LaTeX-add-thmtools-declaretheoremstyles
+    "steo"
+    "slemma"
+    "sdef"
+    "sprf")
+   (LaTeX-add-thmtools-declaretheorems
+    "mtheo"
+    "mlemma"
+    "mdef"
+    "mproof")
+   (LaTeX-add-color-definecolors
+    "red"
+    "lightgreen"
+    "gray"))
+ :latex)
+
--- a/Report/document.bib
+++ b/Report/document.bib
@ -0,0 +1,51 @@
+@Book{Numerical-Optimization-2006,
+  author    = {Jorge Nocedal and Stephen J. Wright},
+  publisher = {Springer},
+  title     = {Numerical Optimization},
+  year      = {2006},
+  address   = {New York, NY, USA},
+  edition   = {2e},
+}
+
+@article{convergence_lbfgs,
+    title   = {On the limited memory BFGS method for large scale optimization},
+    volume  = {45},
+    DOI     = {10.1007/bf01589116},
+    number  = {1–3},
+    journal = {Mathematical Programming},
+    author  = {Liu, Dong C. and Nocedal, Jorge},
+    year    = {1989},
+    month   = {8},
+    pages   = {503–528}
+}
+
+@ARTICLE{BenchmarkTools,
+  author    = {{Chen}, Jiahao and {Revels}, Jarrett},
+  title     = "{Robust benchmarking in noisy environments}",
+  journal   = {arXiv e-prints},
+  keywords  = {Computer Science - Performance, 68N30, B.8.1, D.2.5},
+  year      = 2016,
+  month     = 8,
+  eid       = {arXiv:1608.04295},
+  archivePrefix = {arXiv},
+  eprint    = {1608.04295},
+  primaryClass = {cs.PF},
+  adsurl    = {https://ui.adsabs.harvard.edu/abs/2016arXiv160804295C},
+  adsnote   = {Provided by the SAO/NASA Astrophysics Data System}
+}
+
+@INPROCEEDINGS {Dogleg,
+    author = {N. Ampazis and S. Spirou and S. Perantonis},
+    booktitle = {Neural Networks, IEEE - INNS - ENNS International Joint Conference on},
+    title = {Training Feedforward Neural Networks with the Dogleg Method and BFGS Hessian Updates},
+    year = {2000},
+    volume = {2},
+    issn = {1098-7576},
+    pages = {1138},
+    abstract = {In this paper, we introduce an advanced optimization algorithm for training feedforward neural networks. The algorithm combines the BFGS Hessian update formula with a special case of trust region techniques, called the Dogleg method, as an altenative technique to line search methods. Simulations regarding classification and function approximation problems are presented which reveal a clear improvement both in convergence and success rates over standard BFGS implementations.},
+    doi = {10.1109/IJCNN.2000.857827},
+    url = {https://doi.ieeecomputersociety.org/10.1109/IJCNN.2000.857827},
+    publisher = {IEEE Computer Society},
+    address = {Los Alamitos, CA, USA},
+    month = 7
+}
--- a/Report/images/UniPi
+++ b/Report/images/UniPi
--- a/Report/main.aux
+++ b/Report/main.aux
@ -0,0 +1,49 @@
+\relax 
+\providecommand\babel@aux[2]{}
+\@nameuse{bbl@beforestart}
+\abx@aux@refcontext{ynt/global//global/global/global}
+\providecommand\hyper@newdestlabel[2]{}
+\providecommand\HyField@AuxAddToFields[1]{}
+\providecommand\HyField@AuxAddToCoFields[2]{}
+\providecommand\BKM@entry[2]{}
+\providecommand \oddpage@label [2]{}
+\babel@aux{english}{}
+\@input{(1) - introduction/introduction.aux}
+\BKM@entry{id=1,dest={636861707465722E31},srcline={1}}{5C3337365C3337375C303030495C3030306E5C303030745C303030725C3030306F5C303030645C303030755C303030635C303030745C303030695C3030306F5C3030306E}
+\@input{(2) - problem definition/problem definition.aux}
+\BKM@entry{id=2,dest={636861707465722E32},srcline={1}}{5C3337365C3337375C303030505C303030725C3030306F5C303030625C3030306C5C303030655C3030306D5C3030305C3034305C303030445C303030655C303030665C303030695C3030306E5C303030695C303030745C303030695C3030306F5C3030306E}
+\BKM@entry{id=3,dest={73656374696F6E2E322E31},srcline={6}}{5C3337365C3337375C303030515C30303052}
+\BKM@entry{id=4,dest={73656374696F6E2E322E32},srcline={60}}{5C3337365C3337375C3030304C5C3030302D5C303030425C303030465C303030475C30303053}
+\BKM@entry{id=5,dest={73656374696F6E2E322E33},srcline={114}}{5C3337365C3337375C303030435C3030306F5C3030306E5C303030645C303030695C303030745C303030695C3030306F5C3030306E5C303030695C3030306E5C30303067}
+\@input{(3) - algorithms/algorithms.aux}
+\BKM@entry{id=6,dest={636861707465722E33},srcline={2}}{5C3337365C3337375C303030415C3030306C5C303030675C3030306F5C303030725C303030695C303030745C303030685C3030306D5C30303073}
+\BKM@entry{id=7,dest={73656374696F6E2E332E31},srcline={4}}{5C3337365C3337375C303030515C30303052}
+\BKM@entry{id=8,dest={73656374696F6E2E332E32},srcline={87}}{5C3337365C3337375C3030304C5C3030302D5C303030425C303030465C303030475C30303053}
+\abx@aux@cite{0}{Numerical-Optimization-2006}
+\abx@aux@segm{0}{0}{Numerical-Optimization-2006}
+\abx@aux@cite{0}{convergence_lbfgs}
+\abx@aux@segm{0}{0}{convergence_lbfgs}
+\@input{(4) - experiments/experiments.aux}
+\BKM@entry{id=9,dest={636861707465722E34},srcline={1}}{5C3337365C3337375C303030455C303030785C303030705C303030655C303030725C303030695C3030306D5C303030655C3030306E5C303030745C30303073}
+\abx@aux@cite{0}{BenchmarkTools}
+\abx@aux@segm{0}{0}{BenchmarkTools}
+\BKM@entry{id=10,dest={73656374696F6E2E342E31},srcline={17}}{5C3337365C3337375C303030515C30303052}
+\BKM@entry{id=11,dest={73656374696F6E2E342E32},srcline={51}}{5C3337365C3337375C3030304C5C3030302D5C303030425C303030465C303030475C30303053}
+\BKM@entry{id=12,dest={73656374696F6E2E342E33},srcline={91}}{5C3337365C3337375C303030435C3030306F5C3030306D5C303030705C303030615C303030725C303030695C303030735C3030306F5C3030306E5C3030305C3034305C303030625C303030655C303030745C303030775C303030655C303030655C3030306E5C3030305C3034305C303030515C303030525C3030305C3034305C303030615C3030306E5C303030645C3030305C3034305C3030304C5C3030302D5C303030425C303030465C303030475C30303053}
+\BKM@entry{id=13,dest={73656374696F6E2E342E34},srcline={135}}{5C3337365C3337375C3030304F5C303030745C303030685C303030655C303030725C3030305C3034305C303030455C303030785C303030705C303030655C303030725C303030695C3030306D5C303030655C3030306E5C303030745C30303073}
+\BKM@entry{id=14,dest={73756273656374696F6E2E342E342E31},srcline={137}}{5C3337365C3337375C303030545C303030685C303030655C3030305C3034305C303030455C303030665C303030665C303030655C303030635C303030745C3030305C3034305C3030306F5C303030665C3030305C3034305C303030745C303030685C303030655C3030305C3034305C3030304D5C303030655C3030306D5C3030306F5C303030725C303030795C3030305C3034305C303030535C303030695C3030307A5C30303065}
+\abx@aux@cite{0}{Numerical-Optimization-2006}
+\abx@aux@segm{0}{0}{Numerical-Optimization-2006}
+\BKM@entry{id=15,dest={73756273656374696F6E2E342E342E32},srcline={160}}{5C3337365C3337375C303030415C3030305C3034305C303030435C3030306F5C3030306D5C303030705C303030615C303030725C303030695C303030735C3030306F5C3030306E5C3030305C3034305C3030306F5C303030665C3030305C3034305C303030515C303030755C303030615C303030735C303030695C3030302D5C3030304E5C303030655C303030775C303030745C3030306F5C3030306E5C3030305C3034305C3030304D5C303030655C303030745C303030685C3030306F5C303030645C30303073}
+\abx@aux@cite{0}{Dogleg}
+\abx@aux@segm{0}{0}{Dogleg}
+\@input{(6) - conclusion/conclusion.aux}
+\BKM@entry{id=16,dest={636861707465722E35},srcline={1}}{5C3337365C3337375C303030435C3030306F5C3030306E5C303030635C3030306C5C303030755C303030645C303030695C3030306E5C303030675C3030305C3034305C303030525C303030655C3030306D5C303030615C303030725C3030306B5C30303073}
+\@input{(7) - proofs/proofs.aux}
+\BKM@entry{id=17,dest={636861707465722E36},srcline={1}}{5C3337365C3337375C303030505C303030725C3030306F5C3030306F5C303030665C30303073}
+\abx@aux@read@bbl@mdfivesum{51A82EAEBFB92FC698FF03FF115EBC95}
+\abx@aux@defaultrefcontext{0}{convergence_lbfgs}{ynt/global//global/global/global}
+\abx@aux@defaultrefcontext{0}{Dogleg}{ynt/global//global/global/global}
+\abx@aux@defaultrefcontext{0}{Numerical-Optimization-2006}{ynt/global//global/global/global}
+\abx@aux@defaultrefcontext{0}{BenchmarkTools}{ynt/global//global/global/global}
+\gdef \@abspage@last{25}
--- a/Report/main.bbl
+++ b/Report/main.bbl
@ -0,0 +1,189 @@
+% $ biblatex auxiliary file $
+% $ biblatex bbl format version 3.3 $
+% Do not modify the above lines!
+%
+% This is an auxiliary file used by the 'biblatex' package.
+% This file may safely be deleted. It will be recreated by
+% biber as required.
+%
+\begingroup
+\makeatletter
+\@ifundefined{ver@biblatex.sty}
+  {\@latex@error
+     {Missing 'biblatex' package}
+     {The bibliography requires the 'biblatex' package.}
+      \aftergroup\endinput}
+  {}
+\endgroup
+
+
+\refsection{0}
+  \datalist[entry]{ynt/global//global/global/global}
+    \entry{convergence_lbfgs}{article}{}{}
+      \name{author}{2}{}{%
+        {{hash=3773c3d5aaf3636d59b5d646468683ae}{%
+           family={Liu},
+           familyi={L\bibinitperiod},
+           given={Dong\bibnamedelima C.},
+           giveni={D\bibinitperiod\bibinitdelim C\bibinitperiod}}}%
+        {{hash=88a342d927bf795b0d92af8a5613da31}{%
+           family={Nocedal},
+           familyi={N\bibinitperiod},
+           given={Jorge},
+           giveni={J\bibinitperiod}}}%
+      }
+      \strng{namehash}{016da99314d2fb9b907bb0a91cc52ecd}
+      \strng{fullhash}{016da99314d2fb9b907bb0a91cc52ecd}
+      \strng{fullhashraw}{016da99314d2fb9b907bb0a91cc52ecd}
+      \strng{bibnamehash}{016da99314d2fb9b907bb0a91cc52ecd}
+      \strng{authorbibnamehash}{016da99314d2fb9b907bb0a91cc52ecd}
+      \strng{authornamehash}{016da99314d2fb9b907bb0a91cc52ecd}
+      \strng{authorfullhash}{016da99314d2fb9b907bb0a91cc52ecd}
+      \strng{authorfullhashraw}{016da99314d2fb9b907bb0a91cc52ecd}
+      \field{sortinit}{1}
+      \field{sortinithash}{4f6aaa89bab872aa0999fec09ff8e98a}
+      \field{labelnamesource}{author}
+      \field{labeltitlesource}{title}
+      \field{journaltitle}{Mathematical Programming}
+      \field{month}{8}
+      \field{number}{1–3}
+      \field{title}{On the limited memory BFGS method for large scale optimization}
+      \field{volume}{45}
+      \field{year}{1989}
+      \field{pages}{503\bibrangedash 528}
+      \range{pages}{26}
+      \verb{doi}
+      \verb 10.1007/bf01589116
+      \endverb
+    \endentry
+    \entry{Dogleg}{inproceedings}{}{}
+      \name{author}{3}{}{%
+        {{hash=f8ef8253cc84bb120a25129626f9bf75}{%
+           family={Ampazis},
+           familyi={A\bibinitperiod},
+           given={N.},
+           giveni={N\bibinitperiod}}}%
+        {{hash=ca32e2eb0ff1d4baff79927c876e24bc}{%
+           family={Spirou},
+           familyi={S\bibinitperiod},
+           given={S.},
+           giveni={S\bibinitperiod}}}%
+        {{hash=8857a8a8805ecdaf2863ccf5ebb56be2}{%
+           family={Perantonis},
+           familyi={P\bibinitperiod},
+           given={S.},
+           giveni={S\bibinitperiod}}}%
+      }
+      \list{location}{1}{%
+        {Los Alamitos, CA, USA}%
+      }
+      \list{publisher}{1}{%
+        {IEEE Computer Society}%
+      }
+      \strng{namehash}{043d6ae890a3edd2e8dd411887d99c3c}
+      \strng{fullhash}{043d6ae890a3edd2e8dd411887d99c3c}
+      \strng{fullhashraw}{043d6ae890a3edd2e8dd411887d99c3c}
+      \strng{bibnamehash}{043d6ae890a3edd2e8dd411887d99c3c}
+      \strng{authorbibnamehash}{043d6ae890a3edd2e8dd411887d99c3c}
+      \strng{authornamehash}{043d6ae890a3edd2e8dd411887d99c3c}
+      \strng{authorfullhash}{043d6ae890a3edd2e8dd411887d99c3c}
+      \strng{authorfullhashraw}{043d6ae890a3edd2e8dd411887d99c3c}
+      \field{sortinit}{2}
+      \field{sortinithash}{8b555b3791beccb63322c22f3320aa9a}
+      \field{labelnamesource}{author}
+      \field{labeltitlesource}{title}
+      \field{abstract}{In this paper, we introduce an advanced optimization algorithm for training feedforward neural networks. The algorithm combines the BFGS Hessian update formula with a special case of trust region techniques, called the Dogleg method, as an altenative technique to line search methods. Simulations regarding classification and function approximation problems are presented which reveal a clear improvement both in convergence and success rates over standard BFGS implementations.}
+      \field{booktitle}{Neural Networks, IEEE - INNS - ENNS International Joint Conference on}
+      \field{issn}{1098-7576}
+      \field{month}{7}
+      \field{title}{Training Feedforward Neural Networks with the Dogleg Method and BFGS Hessian Updates}
+      \field{volume}{2}
+      \field{year}{2000}
+      \field{pages}{1138}
+      \range{pages}{1}
+      \verb{doi}
+      \verb 10.1109/IJCNN.2000.857827
+      \endverb
+      \verb{urlraw}
+      \verb https://doi.ieeecomputersociety.org/10.1109/IJCNN.2000.857827
+      \endverb
+      \verb{url}
+      \verb https://doi.ieeecomputersociety.org/10.1109/IJCNN.2000.857827
+      \endverb
+    \endentry
+    \entry{Numerical-Optimization-2006}{book}{}{}
+      \name{author}{2}{}{%
+        {{hash=88a342d927bf795b0d92af8a5613da31}{%
+           family={Nocedal},
+           familyi={N\bibinitperiod},
+           given={Jorge},
+           giveni={J\bibinitperiod}}}%
+        {{hash=c49efb16d3fa7eef002cc3620d42ab8a}{%
+           family={Wright},
+           familyi={W\bibinitperiod},
+           given={Stephen\bibnamedelima J.},
+           giveni={S\bibinitperiod\bibinitdelim J\bibinitperiod}}}%
+      }
+      \list{location}{1}{%
+        {New York, NY, USA}%
+      }
+      \list{publisher}{1}{%
+        {Springer}%
+      }
+      \strng{namehash}{5b8fd28f6245ae8b0af119560622fa4f}
+      \strng{fullhash}{5b8fd28f6245ae8b0af119560622fa4f}
+      \strng{fullhashraw}{5b8fd28f6245ae8b0af119560622fa4f}
+      \strng{bibnamehash}{5b8fd28f6245ae8b0af119560622fa4f}
+      \strng{authorbibnamehash}{5b8fd28f6245ae8b0af119560622fa4f}
+      \strng{authornamehash}{5b8fd28f6245ae8b0af119560622fa4f}
+      \strng{authorfullhash}{5b8fd28f6245ae8b0af119560622fa4f}
+      \strng{authorfullhashraw}{5b8fd28f6245ae8b0af119560622fa4f}
+      \field{sortinit}{2}
+      \field{sortinithash}{8b555b3791beccb63322c22f3320aa9a}
+      \field{labelnamesource}{author}
+      \field{labeltitlesource}{title}
+      \field{edition}{2e}
+      \field{title}{Numerical Optimization}
+      \field{year}{2006}
+    \endentry
+    \entry{BenchmarkTools}{article}{}{}
+      \name{author}{2}{}{%
+        {{hash=69bd6c70a08a67d2841abb1b24a5ccfa}{%
+           family={{Chen}},
+           familyi={C\bibinitperiod},
+           given={Jiahao},
+           giveni={J\bibinitperiod}}}%
+        {{hash=d8a5df654714218c1b828ffab66545f1}{%
+           family={{Revels}},
+           familyi={R\bibinitperiod},
+           given={Jarrett},
+           giveni={J\bibinitperiod}}}%
+      }
+      \strng{namehash}{9162f41ecdba576d7faa1a6d6e27bc25}
+      \strng{fullhash}{9162f41ecdba576d7faa1a6d6e27bc25}
+      \strng{fullhashraw}{9162f41ecdba576d7faa1a6d6e27bc25}
+      \strng{bibnamehash}{9162f41ecdba576d7faa1a6d6e27bc25}
+      \strng{authorbibnamehash}{9162f41ecdba576d7faa1a6d6e27bc25}
+      \strng{authornamehash}{9162f41ecdba576d7faa1a6d6e27bc25}
+      \strng{authorfullhash}{9162f41ecdba576d7faa1a6d6e27bc25}
+      \strng{authorfullhashraw}{9162f41ecdba576d7faa1a6d6e27bc25}
+      \field{sortinit}{2}
+      \field{sortinithash}{8b555b3791beccb63322c22f3320aa9a}
+      \field{labelnamesource}{author}
+      \field{labeltitlesource}{title}
+      \field{eid}{arXiv:1608.04295}
+      \field{eprintclass}{cs.PF}
+      \field{eprinttype}{arXiv}
+      \field{journaltitle}{arXiv e-prints}
+      \field{month}{8}
+      \field{title}{{Robust benchmarking in noisy environments}}
+      \field{year}{2016}
+      \verb{eprint}
+      \verb 1608.04295
+      \endverb
+      \keyw{Computer Science - Performance,68N30,B.8.1,D.2.5}
+    \endentry
+  \enddatalist
+\endrefsection
+\endinput
+
--- a/Report/main.bcf
+++ b/Report/main.bcf
--- a/Report/main.blg
+++ b/Report/main.blg
@ -0,0 +1,15 @@
+[0] Config.pm:308> INFO - This is Biber 2.20
+[0] Config.pm:311> INFO - Logfile is 'main.blg'
+[30] biber-darwin:340> INFO - === Mar Lug 30, 2024, 14:41:06
+[36] Biber.pm:420> INFO - Reading 'main.bcf'
+[68] Biber.pm:994> INFO - Found 4 citekeys in bib section 0
+[75] Biber.pm:4463> INFO - Processing section 0
+[79] Biber.pm:4654> INFO - Looking for bibtex file 'document.bib' for section 0
+[80] bibtex.pm:1713> INFO - LaTeX decoding ...
+[83] bibtex.pm:1519> INFO - Found BibTeX data source 'document.bib'
+[106] UCollate.pm:68> INFO - Overriding locale 'en-US' defaults 'normalization = NFD' with 'normalization = prenormalized'
+[106] UCollate.pm:68> INFO - Overriding locale 'en-US' defaults 'variable = shifted' with 'variable = non-ignorable'
+[106] Biber.pm:4283> INFO - Sorting list 'ynt/global//global/global/global' of type 'entry' with template 'ynt' and locale 'en-US'
+[106] Biber.pm:4289> INFO - No sort tailoring available for locale 'en-US'
+[110] bbl.pm:676> INFO - Writing 'main.bbl' with encoding 'UTF-8'
+[111] bbl.pm:779> INFO - Output to main.bbl
--- a/Report/main.log
+++ b/Report/main.log
--- a/Report/main.pdf
+++ b/Report/main.pdf
--- a/Report/main.run.xml
+++ b/Report/main.run.xml
@ -0,0 +1,88 @@
+<?xml version="1.0" standalone="yes"?>
+<!-- logreq request file -->
+<!-- logreq version 1.0 / dtd version 1.0 -->
+<!-- Do not edit this file! -->
+<!DOCTYPE requests [
+  <!ELEMENT requests (internal | external)*>
+  <!ELEMENT internal (generic, (provides | requires)*)>
+  <!ELEMENT external (generic, cmdline?, input?, output?, (provides | requires)*)>
+  <!ELEMENT cmdline (binary, (option | infile | outfile)*)>
+  <!ELEMENT input (file)+>
+  <!ELEMENT output (file)+>
+  <!ELEMENT provides (file)+>
+  <!ELEMENT requires (file)+>
+  <!ELEMENT generic (#PCDATA)>
+  <!ELEMENT binary (#PCDATA)>
+  <!ELEMENT option (#PCDATA)>
+  <!ELEMENT infile (#PCDATA)>
+  <!ELEMENT outfile (#PCDATA)>
+  <!ELEMENT file (#PCDATA)>
+  <!ATTLIST requests
+    version CDATA #REQUIRED
+  >
+  <!ATTLIST internal
+    package CDATA #REQUIRED
+    priority (9) #REQUIRED
+    active (0 | 1) #REQUIRED
+  >
+  <!ATTLIST external
+    package CDATA #REQUIRED
+    priority (1 | 2 | 3 | 4 | 5 | 6 | 7 | 8) #REQUIRED
+    active (0 | 1) #REQUIRED
+  >
+  <!ATTLIST provides
+    type (static | dynamic | editable) #REQUIRED
+  >
+  <!ATTLIST requires
+    type (static | dynamic | editable) #REQUIRED
+  >
+  <!ATTLIST file
+    type CDATA #IMPLIED
+  >
+]>
+<requests version="1.0">
+  <internal package="biblatex" priority="9" active="0">
+    <generic>latex</generic>
+    <provides type="dynamic">
+      <file>main.bcf</file>
+    </provides>
+    <requires type="dynamic">
+      <file>main.bbl</file>
+    </requires>
+    <requires type="static">
+      <file>blx-dm.def</file>
+      <file>blx-compat.def</file>
+      <file>biblatex.def</file>
+      <file>standard.bbx</file>
+      <file>numeric.bbx</file>
+      <file>numeric-comp.bbx</file>
+      <file>ieee.bbx</file>
+      <file>numeric-comp.cbx</file>
+      <file>ieee.cbx</file>
+      <file>biblatex.cfg</file>
+      <file>english.lbx</file>
+    </requires>
+  </internal>
+  <external package="biblatex" priority="5" active="0">
+    <generic>biber</generic>
+    <cmdline>
+      <binary>biber</binary>
+      <infile>main</infile>
+    </cmdline>
+    <input>
+      <file>main.bcf</file>
+    </input>
+    <output>
+      <file>main.bbl</file>
+    </output>
+    <provides type="dynamic">
+      <file>main.bbl</file>
+    </provides>
+    <requires type="dynamic">
+      <file>main.bcf</file>
+    </requires>
+    <requires type="editable">
+      <file>document.bib</file>
+    </requires>
+  </external>
+</requests>
--- a/Report/main.synctex.gz
+++ b/Report/main.synctex.gz
--- a/Report/main.tex
+++ b/Report/main.tex
@ -0,0 +1,292 @@
+\documentclass[12pt]{report}
+%% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - %%
+%%                               Load Packages                               %%
+%% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - %%
+\usepackage[
+  %top=2cm,
+  %bottom=2cm,
+  %left=2cm,
+  %right=2cm,
+  headheight=20pt,
+  centering
+]{geometry}
+\geometry{a4paper}
+
+
+\usepackage[utf8]{inputenc} %% use UTF-8, maybe not needed since 2018
+\usepackage[english]{babel} %% language
+
+\pagestyle{headings}
+
+\usepackage{scrlayer-scrpage}
+
+\ifoot[]{}
+\cfoot[]{}
+\ofoot[\pagemark]{\pagemark}
+\pagestyle{scrplain}
+
+\usepackage[
+  backend=biber,
+  style=ieee,
+  sorting=ynt
+]{biblatex} %% for citations
+\addbibresource{document.bib}
+
+\usepackage{import} %% specify path for import
+
+%% math packages
+\usepackage{graphicx} %% for pictures
+\usepackage{float}
+\usepackage{amssymb} %% math symbols
+\usepackage{amsmath} %% math matrix etc
+\usepackage{tabularray} %% better tables
+\usepackage{booktabs} %% rules for tables
+\usepackage{mathrsfs}
+\usepackage{mathtools}
+\usepackage{algpseudocode} %% loads algorithmicx
+\usepackage{amsthm}
+\usepackage{thmtools} %% theorems
+
+%% plot packages
+\usepackage{pgfplots} %% plots used with \begin{tikzpicture}
+\usepackage{tikz} %% for pictures
+\usetikzlibrary{trees}
+\usetikzlibrary{matrix, positioning, fit}
+\pgfplotsset{width=10cm,compat=newest}
+
+%% design packages
+\usepackage{enumitem} %% for lists and enumerating
+\usepackage{color}
+\usepackage{xcolor,colortbl} % xcolor for defining colors, colortbl for table colors
+\usepackage{makecell} %% for multiple lines in cell of table
+\usepackage{cancel}
+\usepackage{pgfornament} %% ornaments
+\usepackage{multicol}
+\usepackage{subcaption}
+\usepackage{bold-extra}
+
+%% load last
+\usepackage{csquotes} %% correct language also for citations
+\usepackage[hidelinks]{hyperref} %% links for table of contents, load last
+\usepackage{bookmark} %% for better table of contents
+
+
+%% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - %%
+%%                       Configuration of the packages                       %%
+%% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - %%
+
+\linespread{1}
+\raggedbottom %% spaces if page is empty % chktex 1
+
+%% set max table of contents recursion to subsection (3->subsubsecition)
+\setcounter{tocdepth}{3}
+\setcounter{secnumdepth}{3}
+
+%% use bar instead of arrow for vectors
+\renewcommand{\vec}[1]{\bar{#1}}
+%% easy norm
+\newcommand{\norm}[1]{\left\lVert#1\right\rVert}
+
+\DeclareMathOperator{\Tr}{tr}
+
+% argmin and argmax
+\DeclareMathOperator*{\argmax}{argmax}
+\DeclareMathOperator*{\argmin}{argmin}
+\DeclareMathOperator{\diag}{diag}
+
+%% itemize use less vertical space (use olditemize for default behaviour)
+\let\olditemize=\itemize%% old itemize
+\let\endolditemize=\enditemize%% old end itemize
+\renewenvironment{itemize}{\olditemize\itemsep-0.2em}{\endolditemize}
+
+%% items in itemize emph+box
+%% usage: \ieb{Class:} for simple item
+%%        \ieb[4cm]{Class:} for specific size of box
+\newcommand{\ieb}[2][2cm]{
+        \makebox[#1][l]{\emph{#2}}
+} %% TODO: replace with description environment (? maybe)
+
+% less vertical space around align & align*
+\newcommand{\zerodisplayskips}{
+  \setlength{\abovedisplayskip}{0pt}
+  \setlength{\belowdisplayskip}{0pt}
+  \setlength{\abovedisplayshortskip}{0pt}
+  \setlength{\belowdisplayshortskip}{0pt}
+}
+
+% make dotfill use all the space available
+\renewcommand{\dotfill}{
+  \leavevmode\cleaders\hbox to 1.00em{\hss .\hss }\hfill\kern0pt } % chktex 1 chktex 26
+
+\setlength{\fboxsep}{-\fboxrule} % for debugging
+
+
+%% PACKAGE algorithm
+\usepackage[ruled,vlined,linesnumbered]{algorithm2e}
+
+
+%% PACKAGE tabularray
+\UseTblrLibrary{amsmath}
+
+
+%% PACKAGE color
+\definecolor{red}{rgb}{1, 0.1, 0.1}
+\definecolor{lightgreen}{rgb}{0.55, 0.87, 0.47}
+\definecolor{gray}{rgb}{0.3, 0.3, 0.3}
+\newcommand{\lgt}{\cellcolor{lightgreen}} %% light green in tables
+\newcommand{\gry}{\textcolor{gray}} %% gray text
+\newcommand{\rd}{\textcolor{red}} %% red text
+
+%% PACKAGE minipage
+\newcommand{\thend}[1]{\begin{center}
+  \begin{minipage}[c][1em][c]{#1}
+    \dotfill{}
+  \end{minipage}
+\end{center}}
+
+
+%% PACKAGE thmtools
+\declaretheoremstyle[
+ headfont=\normalfont\bfseries,
+ notefont=\mdseries,
+ bodyfont=\normalfont,
+ qed=\qedsymbol{}
+]{steo}
+\declaretheorem[name={Theorem}, numbered=no, style=steo]{mtheo}
+
+\declaretheoremstyle[
+ headfont=\normalfont\bfseries,
+ notefont=\mdseries,
+ bodyfont=\normalfont,
+ qed=\qedsymbol{}
+]{slemma}
+\declaretheorem[name={Lemma}, numbered=yes, style=slemma]{mlemma}
+
+\declaretheoremstyle[
+  headfont=\normalfont\bfseries,
+  notefont=\mdseries,
+  bodyfont=\normalfont,
+]{sdef}
+\declaretheorem[numbered=no, style=sdef]{mdef}
+
+\declaretheoremstyle[
+  spaceabove=-6pt,
+  spacebelow=6pt,
+  headfont=\normalfont\bfseries,
+  bodyfont=\normalfont,
+  postheadspace=1em,
+  qed=$\blacksquare$,
+  headpunct={:}
+]{sprf}
+\declaretheorem[name={Proof}, style=sprf, numbered=no]{mproof}
+
+% -------------------- Fancy style --------------------
+
+% \usepackage{fancyhdr}
+% \pagestyle{fancy}
+% \renewcommand{\chaptermark}[1]{\markboth{#1}{}}
+% \setlength{\headheight}{22pt}
+% \fancyhead[R]{\leftmark}
+% \fancyhead[L]{\smallLogoA}
+% \setlength{\parskip}{0.5em}
+
+
+\usepackage{titlesec}
+\titlespacing*{\chapter}{0pt}{-20pt}{20pt} 
+\titleformat{\chapter}[display]{\normalfont\bfseries}{}{0pt}{\Huge}
+
+
+%% ......................................................................... %%
+%% local changes
+% \setcounter{secnumdepth}{0}
+
+
+%% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - %%
+
+\title{Document}
+\date{\today}
+
+%% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - %%
+\newcommand{\logoA}{\includegraphics[height=4cm]{images/UniPi Logo.png}}
+\newcommand{\smallLogoA}{\includegraphics[height=0.6cm]{images/UniPi Logo.png}}
+\newcommand{\course}{Computational Mathematics for Learning and Data Analysis}
+\newcommand{\assignment}{Project Track 19 --- Non-ML}
+\newcommand{\authors}{
+    \begin{tblr}{colspec={c}, cells={cmd={\bf\textit}}}
+        De Castelli Fabrizio \\
+        Rossi Elvis
+    \end{tblr}
+}
+
+\newcommand{\emails}{
+\begin{tblr}{colspec = {rr}, colsep={0pt}, cells={cmd={\textit}}, rowsep={4pt}}
+    f.decastelli&\gry{@studenti.unipi.it} \\
+    e.rossi46&\gry{@studenti.unipi.it}
+\end{tblr}
+}
+\newcommand{\universityA}{Università di Pisa}
+
+\newcommand{\academicYear}{Academic Year 2023--2024}
+\newcommand{\department}{Department of Computer Science}
+
+% Inter-word Spacing
+\usepackage{setspace}
+\setstretch{1.25}
+
+\begin{document}
+
+\begin{titlepage}
+
+\centering
+\logoA\par
+\vspace{0.7cm}
+{\scshape\LARGE \universityA\par}
+    
+\vspace{2cm}
+{\scshape\Huge \textbf{\course} \par}
+
+\vspace{1cm}
+{\scshape\LARGE \assignment\par}
+
+\vfill
+
+{\scshape\LARGE \textbf{Team 16} \par}
+\vspace{0.5cm}
+\begin{tblr}{colspec = {Q[m,c]Q[m,c]}, colsep={6pt}}
+    {\Large \authors} & {\large \emails}
+\end{tblr} \\
+\vspace{1cm}
+{\large \academicYear\par}
+
+\end{titlepage}
+
+\pagenumbering{roman}
+\setcounter{page}{1}
+
+% Table of contents
+\tableofcontents
+
+\clearpage
+\pagenumbering{arabic}
+\setcounter{page}{1}
+
+
+\begin{sloppypar}
+    % Chapters
+    \include{(1) - introduction/introduction}
+    \include{(2) - problem definition/problem definition}
+    \include{(3) - algorithms/algorithms}
+    \include{(4) - experiments/experiments}
+    \include{(6) - conclusion/conclusion}
+    \include{(7) - proofs/proofs}
+\end{sloppypar}
+
+\printbibliography{}
+
+\end{document}
+
+%% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - %%
+
+%%% Local Variables:
+%%% TeX-command-extra-options: "-shell-escape"
+%%% End:
--- a/Report/main.toc
+++ b/Report/main.toc
@ -0,0 +1,18 @@
+\babel@toc {english}{}\relax 
+\contentsline {chapter}{\numberline {1}Introduction}{1}{chapter.1}%
+\contentsline {chapter}{\numberline {2}Problem Definition}{2}{chapter.2}%
+\contentsline {section}{\numberline {2.1}QR}{2}{section.2.1}%
+\contentsline {section}{\numberline {2.2}L-BFGS}{3}{section.2.2}%
+\contentsline {section}{\numberline {2.3}Conditioning}{4}{section.2.3}%
+\contentsline {chapter}{\numberline {3}Algorithms}{6}{chapter.3}%
+\contentsline {section}{\numberline {3.1}QR}{6}{section.3.1}%
+\contentsline {section}{\numberline {3.2}L-BFGS}{8}{section.3.2}%
+\contentsline {chapter}{\numberline {4}Experiments}{12}{chapter.4}%
+\contentsline {section}{\numberline {4.1}QR}{13}{section.4.1}%
+\contentsline {section}{\numberline {4.2}L-BFGS}{14}{section.4.2}%
+\contentsline {section}{\numberline {4.3}Comparison between QR and L-BFGS}{15}{section.4.3}%
+\contentsline {section}{\numberline {4.4}Other Experiments}{17}{section.4.4}%
+\contentsline {subsection}{\numberline {4.4.1}The Effect of the Memory Size}{17}{subsection.4.4.1}%
+\contentsline {subsection}{\numberline {4.4.2}A Comparison of Quasi-Newton Methods}{18}{subsection.4.4.2}%
+\contentsline {chapter}{\numberline {5}Concluding Remarks}{21}{chapter.5}%
+\contentsline {chapter}{\numberline {6}Proofs}{22}{chapter.6}%