Added Project and Report
This commit is contained in:
186
Report/(3) - algorithms/algorithms.aux
Normal file
186
Report/(3) - algorithms/algorithms.aux
Normal file
@ -0,0 +1,186 @@
|
||||
\relax
|
||||
\providecommand\hyper@newdestlabel[2]{}
|
||||
\@writefile{toc}{\contentsline {chapter}{\numberline {3}Algorithms}{6}{chapter.3}\protected@file@percent }
|
||||
\@writefile{lof}{\addvspace {10\p@ }}
|
||||
\@writefile{lot}{\addvspace {10\p@ }}
|
||||
\@writefile{loa}{\addvspace {10\p@ }}
|
||||
\newlabel{ch: algorithms}{{3}{6}{Algorithms}{chapter.3}{}}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {3.1}QR}{6}{section.3.1}\protected@file@percent }
|
||||
\@writefile{loa}{\contentsline {algocf}{\numberline {1}{\ignorespaces Thin QR}}{7}{algocf.1}\protected@file@percent }
|
||||
\newlabel{algo: thinQR}{{1}{7}{QR}{algocf.1}{}}
|
||||
\@writefile{loa}{\contentsline {algocf}{\numberline {2}{\ignorespaces householder\_vector}}{7}{algocf.2}\protected@file@percent }
|
||||
\newlabel{algo: householder_vector}{{2}{7}{QR}{algocf.2}{}}
|
||||
\@writefile{toc}{\contentsline {section}{\numberline {3.2}L-BFGS}{8}{section.3.2}\protected@file@percent }
|
||||
\@writefile{loa}{\contentsline {algocf}{\numberline {3}{\ignorespaces Limited Memory BFGS}}{8}{algocf.3}\protected@file@percent }
|
||||
\newlabel{algo: L-BFGS}{{3}{8}{L-BFGS}{algocf.3}{}}
|
||||
\@writefile{loa}{\contentsline {algocf}{\numberline {4}{\ignorespaces Limited Memory BFGS {-} Two-Loop Recursion}}{9}{algocf.4}\protected@file@percent }
|
||||
\newlabel{algo: L-BFGS Two-Loop Recursion}{{4}{9}{L-BFGS}{algocf.4}{}}
|
||||
\newlabel{algo: convergence1}{{1}{9}{}{Item.1}{}}
|
||||
\newlabel{algo: convergence2}{{2}{9}{}{Item.2}{}}
|
||||
\newlabel{algo: convergence3}{{3}{9}{}{Item.3}{}}
|
||||
\newlabel{eq:6}{{3}{9}{}{Item.3}{}}
|
||||
\newlabel{algo: definition y_k}{{3.1}{10}{}{equation.3.2.1}{}}
|
||||
\@writefile{loe}{\addvspace {10\p@ }}
|
||||
\@writefile{loe}{\contentsline {mtheo}{\ifthmt@listswap \else \numberline {\let \autodot \@empty }\fi Theorem}{10}{thmt@dummyctr.dummy.1}\protected@file@percent }
|
||||
\@writefile{loe}{\contentsline {mproof}{\ifthmt@listswap \else \numberline {\let \autodot \@empty }\fi Proof}{10}{thmt@dummyctr.dummy.2}\protected@file@percent }
|
||||
\@setckpt{(3) - algorithms/algorithms}{
|
||||
\setcounter{page}{12}
|
||||
\setcounter{equation}{1}
|
||||
\setcounter{enumi}{3}
|
||||
\setcounter{enumii}{0}
|
||||
\setcounter{enumiii}{0}
|
||||
\setcounter{enumiv}{0}
|
||||
\setcounter{footnote}{0}
|
||||
\setcounter{mpfootnote}{0}
|
||||
\setcounter{part}{0}
|
||||
\setcounter{chapter}{3}
|
||||
\setcounter{section}{2}
|
||||
\setcounter{subsection}{0}
|
||||
\setcounter{subsubsection}{0}
|
||||
\setcounter{paragraph}{0}
|
||||
\setcounter{subparagraph}{0}
|
||||
\setcounter{figure}{0}
|
||||
\setcounter{table}{0}
|
||||
\setcounter{tabx@nest}{0}
|
||||
\setcounter{listtotal}{0}
|
||||
\setcounter{listcount}{0}
|
||||
\setcounter{liststart}{0}
|
||||
\setcounter{liststop}{0}
|
||||
\setcounter{citecount}{0}
|
||||
\setcounter{citetotal}{0}
|
||||
\setcounter{multicitecount}{0}
|
||||
\setcounter{multicitetotal}{0}
|
||||
\setcounter{instcount}{2}
|
||||
\setcounter{maxnames}{3}
|
||||
\setcounter{minnames}{3}
|
||||
\setcounter{maxitems}{3}
|
||||
\setcounter{minitems}{1}
|
||||
\setcounter{citecounter}{0}
|
||||
\setcounter{maxcitecounter}{0}
|
||||
\setcounter{savedcitecounter}{0}
|
||||
\setcounter{uniquelist}{0}
|
||||
\setcounter{uniquename}{0}
|
||||
\setcounter{refsection}{0}
|
||||
\setcounter{refsegment}{0}
|
||||
\setcounter{maxextratitle}{0}
|
||||
\setcounter{maxextratitleyear}{0}
|
||||
\setcounter{maxextraname}{0}
|
||||
\setcounter{maxextradate}{0}
|
||||
\setcounter{maxextraalpha}{0}
|
||||
\setcounter{abbrvpenalty}{50}
|
||||
\setcounter{highnamepenalty}{50}
|
||||
\setcounter{lownamepenalty}{25}
|
||||
\setcounter{maxparens}{3}
|
||||
\setcounter{parenlevel}{0}
|
||||
\setcounter{blx@maxsection}{0}
|
||||
\setcounter{mincomprange}{10}
|
||||
\setcounter{maxcomprange}{100000}
|
||||
\setcounter{mincompwidth}{1}
|
||||
\setcounter{afterword}{0}
|
||||
\setcounter{savedafterword}{0}
|
||||
\setcounter{annotator}{0}
|
||||
\setcounter{savedannotator}{0}
|
||||
\setcounter{author}{0}
|
||||
\setcounter{savedauthor}{0}
|
||||
\setcounter{bookauthor}{0}
|
||||
\setcounter{savedbookauthor}{0}
|
||||
\setcounter{commentator}{0}
|
||||
\setcounter{savedcommentator}{0}
|
||||
\setcounter{editor}{0}
|
||||
\setcounter{savededitor}{0}
|
||||
\setcounter{editora}{0}
|
||||
\setcounter{savededitora}{0}
|
||||
\setcounter{editorb}{0}
|
||||
\setcounter{savededitorb}{0}
|
||||
\setcounter{editorc}{0}
|
||||
\setcounter{savededitorc}{0}
|
||||
\setcounter{foreword}{0}
|
||||
\setcounter{savedforeword}{0}
|
||||
\setcounter{holder}{0}
|
||||
\setcounter{savedholder}{0}
|
||||
\setcounter{introduction}{0}
|
||||
\setcounter{savedintroduction}{0}
|
||||
\setcounter{namea}{0}
|
||||
\setcounter{savednamea}{0}
|
||||
\setcounter{nameb}{0}
|
||||
\setcounter{savednameb}{0}
|
||||
\setcounter{namec}{0}
|
||||
\setcounter{savednamec}{0}
|
||||
\setcounter{translator}{0}
|
||||
\setcounter{savedtranslator}{0}
|
||||
\setcounter{shortauthor}{0}
|
||||
\setcounter{savedshortauthor}{0}
|
||||
\setcounter{shorteditor}{0}
|
||||
\setcounter{savedshorteditor}{0}
|
||||
\setcounter{labelname}{0}
|
||||
\setcounter{savedlabelname}{0}
|
||||
\setcounter{institution}{0}
|
||||
\setcounter{savedinstitution}{0}
|
||||
\setcounter{lista}{0}
|
||||
\setcounter{savedlista}{0}
|
||||
\setcounter{listb}{0}
|
||||
\setcounter{savedlistb}{0}
|
||||
\setcounter{listc}{0}
|
||||
\setcounter{savedlistc}{0}
|
||||
\setcounter{listd}{0}
|
||||
\setcounter{savedlistd}{0}
|
||||
\setcounter{liste}{0}
|
||||
\setcounter{savedliste}{0}
|
||||
\setcounter{listf}{0}
|
||||
\setcounter{savedlistf}{0}
|
||||
\setcounter{location}{0}
|
||||
\setcounter{savedlocation}{0}
|
||||
\setcounter{organization}{0}
|
||||
\setcounter{savedorganization}{0}
|
||||
\setcounter{origlocation}{0}
|
||||
\setcounter{savedoriglocation}{0}
|
||||
\setcounter{origpublisher}{0}
|
||||
\setcounter{savedorigpublisher}{0}
|
||||
\setcounter{publisher}{0}
|
||||
\setcounter{savedpublisher}{0}
|
||||
\setcounter{language}{0}
|
||||
\setcounter{savedlanguage}{0}
|
||||
\setcounter{origlanguage}{0}
|
||||
\setcounter{savedoriglanguage}{0}
|
||||
\setcounter{pageref}{0}
|
||||
\setcounter{savedpageref}{0}
|
||||
\setcounter{textcitecount}{0}
|
||||
\setcounter{textcitetotal}{0}
|
||||
\setcounter{textcitemaxnames}{0}
|
||||
\setcounter{biburlbigbreakpenalty}{100}
|
||||
\setcounter{biburlbreakpenalty}{200}
|
||||
\setcounter{biburlnumpenalty}{0}
|
||||
\setcounter{biburlucpenalty}{0}
|
||||
\setcounter{biburllcpenalty}{0}
|
||||
\setcounter{smartand}{1}
|
||||
\setcounter{bbx:relatedcount}{0}
|
||||
\setcounter{bbx:relatedtotal}{0}
|
||||
\setcounter{cbx@tempcnta}{0}
|
||||
\setcounter{cbx@tempcntb}{1}
|
||||
\setcounter{cbx@tempcntc}{0}
|
||||
\setcounter{cbx@tempcntd}{-1}
|
||||
\setcounter{float@type}{4}
|
||||
\setcounter{parentequation}{0}
|
||||
\setcounter{ALG@line}{0}
|
||||
\setcounter{ALG@rem}{0}
|
||||
\setcounter{ALG@nested}{0}
|
||||
\setcounter{ALG@Lnr}{2}
|
||||
\setcounter{ALG@blocknr}{10}
|
||||
\setcounter{ALG@storecount}{0}
|
||||
\setcounter{ALG@tmpcounter}{0}
|
||||
\setcounter{thmt@dummyctr}{2}
|
||||
\setcounter{nlinenum}{0}
|
||||
\setcounter{caption@flags}{2}
|
||||
\setcounter{continuedfloat}{0}
|
||||
\setcounter{subfigure}{0}
|
||||
\setcounter{subtable}{0}
|
||||
\setcounter{section@level}{0}
|
||||
\setcounter{Item}{3}
|
||||
\setcounter{Hfootnote}{0}
|
||||
\setcounter{bookmark@seq@number}{0}
|
||||
\setcounter{AlgoLine}{11}
|
||||
\setcounter{algocfline}{4}
|
||||
\setcounter{algocfproc}{4}
|
||||
\setcounter{algocf}{4}
|
||||
\setcounter{mlemma}{0}
|
||||
}
|
||||
218
Report/(3) - algorithms/algorithms.tex
Normal file
218
Report/(3) - algorithms/algorithms.tex
Normal file
@ -0,0 +1,218 @@
|
||||
% chktex-file 9 chktex-file 17
|
||||
\chapter{Algorithms}\label{ch: algorithms}
|
||||
|
||||
\section{QR}
|
||||
The algorithm has been implemented considering that the input matrix $A \in \mathbb{R}^{m \times n}$, where $m$ may be different from $n$, namely it can be rectangular \textit{horizontally} or \textit{vertically}. In this version we store in a proper data structure a matrix $\Upsilon \in {m \times n}$ of the following form ($m > n$ in this example):
|
||||
|
||||
\begin{equation*}
|
||||
\Upsilon = {(\upsilon_{i,j})}_{i,j} = \begin{tikzpicture}[baseline=-1ex]
|
||||
\matrix[%
|
||||
matrix of math nodes,
|
||||
nodes in empty cells,
|
||||
left delimiter={[},right delimiter={]},
|
||||
inner xsep=2pt, column sep=6pt,
|
||||
] (m)
|
||||
{%
|
||||
\vphantom{1} & * & \cdots & * \\
|
||||
& \vphantom{1} & \ddots & \vdots \\
|
||||
& & \vphantom{1} & * \\
|
||||
& & & \vphantom{1} \\
|
||||
u_1 & u_2 & \cdots & u_n \\
|
||||
\vphantom{1} & \vphantom{1} & \vphantom{1} & \vphantom{1} \\
|
||||
};
|
||||
\node[rectangle, draw, fit={(m-1-1) (m-6-1)}, inner sep=-1.5pt, text width=22pt] {};
|
||||
\node[rectangle, draw, fit={(m-2-2) (m-6-2)}, inner sep=-1.5pt, text width=22pt] {};
|
||||
\node[rectangle, draw, fit={(m-3-3) (m-6-3)}, inner sep=-1.5pt, text width=22pt] {};
|
||||
\node[rectangle, draw, fit={(m-4-4) (m-6-4)}, inner sep=-1.5pt, text width=22pt] {};
|
||||
\end{tikzpicture}
|
||||
\end{equation*}
|
||||
|
||||
\begin{center}
|
||||
$u_k \in \mathbb{R}^{m - k + 1},\ 1 \leq k \leq n$
|
||||
\end{center}
|
||||
and the values of the diagonal of R in a vector $d \in \mathbb{R}^{n}$. The $*$ entries are elements computed in the QR factorization belonging to the upper triangular matrix, yielded by line 6 of \hyperref[algo: thinQR]{Algorithm 1}. In this way we are allowed to lazily perform the products $Qy$ and $Q^T y$ by means of the householder vectors $u_1 \dots, u_n $ that we stored. On the other hand, to compute a product between the upper part of $\Upsilon$ and an input vector we reconstruct the upper triangular matrix by taking element $\upsilon_{ij} \text{ such that } j > i$ and attach the vector $d$ as the diagonal of the resulting matrix.
|
||||
The zeros of the matrix $R$ are ignored.
|
||||
|
||||
\begin{algorithm}[H]
|
||||
\SetAlgoLined%
|
||||
\caption{Thin QR}\label{algo: thinQR}
|
||||
\SetKwInOut{Input}{Input}
|
||||
\SetKwInOut{Output}{Output}
|
||||
|
||||
\BlankLine%
|
||||
\Input{$A \in \mathbb{R}^{m \times n}$}
|
||||
\Output{$Q \in \mathbb{R}^{m \times m},\ R \in \mathbb{R}^{m \times n}$ implicit $QR$ factorization of $A$}
|
||||
|
||||
\BlankLine%
|
||||
$\Upsilon = copy(A)$ \\
|
||||
$d = zeros(\min(m, n))$ \\
|
||||
|
||||
\For{
|
||||
$k \in 1 \dots \min(m, n)$
|
||||
}{
|
||||
$u_k, s_k = householder\_vector(\Upsilon[k:m, k])$\\
|
||||
$d_k = s_k$ \\
|
||||
$\Upsilon[k:m, k+1:n] = \Upsilon[k:m, k+1:n] - 2u(u^T \Upsilon[k:m, k+1:n])$\\
|
||||
$\Upsilon[k:m, k] = u_k$
|
||||
}
|
||||
|
||||
\Return$\Upsilon, d$
|
||||
\end{algorithm}
|
||||
|
||||
\begin{algorithm}[H]
|
||||
\SetAlgoLined%
|
||||
\caption{householder\_vector}\label{algo: householder_vector}
|
||||
\SetKwInOut{Input}{Input}
|
||||
\SetKwInOut{Output}{Output}
|
||||
|
||||
\BlankLine%
|
||||
\Input{$x \in \mathbb{R}^d$}
|
||||
\Output{$u \in \mathbb{R}^{d},\ s \in \mathbb{R}$ householder vector of $x$}
|
||||
|
||||
\BlankLine%
|
||||
$s = \norm{x}$ \\
|
||||
\If{$x_1 \geq 0$}{
|
||||
$s = -s$
|
||||
}
|
||||
$u = copy(x)$ \\
|
||||
$u_1 = u_1 - s$ \\
|
||||
$u = u\ / \norm{u}$ \\
|
||||
|
||||
\Return$u, s$
|
||||
\end{algorithm}
|
||||
|
||||
We assume $m > n$ as the case $n > m$ is similar for the complexity analysis. The time complexity of this algorithm is $\theta\bigl(mn^2 \bigr) \approx \theta\bigl(n^3 \bigr)$, because $m \approx n$ in (P). We will see in \hyperref[ch: experiments]{section Experiments} that the running time scales linearly with $m$ as expected, where $m$ is the size of $\hat{X}$.
|
||||
|
||||
\newpage
|
||||
\section{L-BFGS}
|
||||
|
||||
We follow the syntax from \textit{Numerical Optimization}\cite{Numerical-Optimization-2006} and define $f_k = f(x_k)$
|
||||
|
||||
\begin{algorithm}[H]
|
||||
\SetAlgoLined%
|
||||
\caption{Limited Memory BFGS}\label{algo: L-BFGS}
|
||||
\SetKwInOut{Input}{Input}
|
||||
\SetKwInOut{Output}{Output}
|
||||
|
||||
\BlankLine%
|
||||
\Input{$\textbf{f}: \mathbb{R}^n \longrightarrow \mathbb{R},\ \textbf{x} \in \mathbb{R}^n,\ m \text{ memory, } \epsilon \text{ tolerance}$}
|
||||
\Output{${\bf x^*}\ \text{ending point},\ {\bf f(x^*)},\ {\bf \nabla f(x^*)}$}
|
||||
|
||||
\BlankLine%
|
||||
$k = 0$ \\
|
||||
\While{$\nabla f_k \geq \epsilon \nabla f_0$} {
|
||||
\uIf{storage is empty}{
|
||||
$H_k^0 = I$
|
||||
}\uElse{
|
||||
$H_k^0 = \frac{\langle y_{k-1}, s_{k-1} \rangle}{\norm{y_{k-1}}^2} \cdot I$
|
||||
}
|
||||
Calculate $p_k = H_k \nabla{f_k}$ with \hyperref[algo: L-BFGS Two-Loop Recursion]{\textbf{Algorithm 4}} \\
|
||||
Choose $\alpha_k$ satisfying the Armijo-Wolfe conditions or with exact line search \\
|
||||
$x_{k+1} = x_k + \alpha_k p_k$ \\
|
||||
$s_k = x_{k+1} - x_k$ \\
|
||||
$y_k = \nabla f_{k+1} - \nabla f_k$ \\
|
||||
$curvature = \langle y_k, s_k \rangle$ \\
|
||||
$\rho_k = curvature^{-1}$ \\
|
||||
\uIf{$curvature \leq 10^{-16}$}{
|
||||
free the storage and start again from gradient descent
|
||||
}\uElse{
|
||||
Discard the vector pair $\{s_{k-m}, y_{k-m}, \rho_{k-m}\}$ from storage \\
|
||||
Save $s_k, y_k, \rho_k$
|
||||
}
|
||||
|
||||
$k = k + 1$
|
||||
}
|
||||
\Return$x_k$, $f_k$, $\nabla f_k$
|
||||
\end{algorithm}
|
||||
\begin{algorithm}[H]
|
||||
\SetAlgoLined%
|
||||
\caption{Limited Memory BFGS {-} Two-Loop Recursion}\label{algo: L-BFGS Two-Loop Recursion}
|
||||
|
||||
$q = \nabla f_k$ \\
|
||||
\For{$i = (k - 1), \dots, (k - m)$}{
|
||||
$\alpha_i = \rho_i s_i^T q$ \\
|
||||
$q = q - \alpha_i y_i$ \\
|
||||
}
|
||||
|
||||
$r = H_k^0 q$ \\
|
||||
\For{$i = (k - m), \dots, (k - 1)$}{
|
||||
$\beta = \rho_i y_i^T r$ \\
|
||||
$r = r + s_i\bigl(\alpha_i - \beta\bigr)$ \\
|
||||
}
|
||||
|
||||
\Return$-r$
|
||||
|
||||
\end{algorithm}
|
||||
In our implementation we keep the triplets $(s_k, y_k, \rho_k)$ in a circular buffer with capacity $m$ and the values of $\alpha_i$ in \hyperref[algo: L-BFGS Two-Loop Recursion]{Algorithm 4} in a stack such that no explicit indices are needed.
|
||||
|
||||
In case the curvature of the function is too small, we free the storage and restart with a gradient step.
|
||||
|
||||
We prefer using an exact line search to compute the step size over an inexact line search since the computational cost for our problem is lesser.
|
||||
|
||||
\subsection*{Convergence}
|
||||
To prove that the implemented method converges to the global minimum of the function we have to optimize, we follow~\cite{convergence_lbfgs} and state the following assumptions about our problem:
|
||||
\begin{enumerate}
|
||||
\item\label{algo: convergence1} $f \in C^2$
|
||||
\item\label{algo: convergence2} The level sets $\mathcal{L} = \{ x \in \mathbb{R}^n\ |\ f(x) \leq f(x_0) \} $ is convex
|
||||
\item\label{algo: convergence3} $\exists\ M_1, M_2 \in \mathbb{R}^+$ such that
|
||||
\begin{equation*}
|
||||
M_1\norm{z}^2 \leq z^T G(x) z \leq M_2\norm{z}^2\label{eq:6}
|
||||
\end{equation*}
|
||||
$\forall z \in \mathbb{R}^n$ and $\forall x \in \mathcal{L}$
|
||||
\end{enumerate}
|
||||
|
||||
We follow the publication's notation and define:
|
||||
|
||||
\[ G(x) \coloneqq \nabla^{2}f(x) \]
|
||||
\[ \bar{G}_k(x) \coloneqq \int_0^1 G(x_k + \tau \alpha_k p_k) d\tau \]
|
||||
|
||||
From Taylor's theorem:
|
||||
|
||||
\begin{equation}\label{algo: definition y_k}
|
||||
y_k = \bar{G}_k \alpha_k p_k = \bar{G}_k s_k
|
||||
\end{equation}
|
||||
|
||||
The first assumption for our problem follows from the definition. The second assumption is proved by \autoref{definitions: hessian tomography}. The third assumption is also a consequence of the fact that the hessian of $f$ is constant.
|
||||
|
||||
% \[ z_k \coloneqq {\bar{G}_k}^{1/2} s_k \]
|
||||
|
||||
\begin{mtheo}
|
||||
Let $B_0$ be any symmetric positive definite initial matrix, and let $x_0$ be a starting point for which the Assumptions~\ref{algo: convergence1},~\ref{algo: convergence2} and~\ref{algo: convergence3} hold, then the sequence ${x_k}$ generated by the L-BFGS algorithm converges to the minimizer $x^*$ of $f$ linearly.
|
||||
\end{mtheo}
|
||||
|
||||
\begin{mproof}
|
||||
Using \autoref{algo: definition y_k} and Assumption~\ref{algo: convergence3}:
|
||||
\[ M_1 \norm{s_k}^2 \leq y_k^T s_k \leq M_2 \norm{s_k}^2 \]
|
||||
and:
|
||||
\[ \frac{\norm{y_k}^2}{y_k^T s_k} = \frac{s_k^T \hat{G}_k^2 s_k}{s_k^T \hat{G}_k s_k} \]
|
||||
Both trace and determinant can be expressed in terms of the trace and determinant of the starting matrix from which the approximate hessian is constructed:
|
||||
\begin{align*}
|
||||
\Tr(B_{k+1}) &\leq \Tr(B_k^{(0)}) + \Tilde{m} M_2 \leq M_3 \\
|
||||
\det(B_{k+1}) &= \det(B_k^{(0)}) \cdot \prod_{l=0}^{\Tilde{m}-1} \frac{y_l^T s_l}{s_l^T B_k^{(l)} s_l} \geq \det\left(B_k^{(0)} {\left(\frac{M_1}{M_3}\right)}^{\Tilde{m}}\right) \geq M_4
|
||||
\end{align*}
|
||||
where $\Tilde{m}$ is the memory size and $M_3$ and $M_4$ are chosen appropriately in $\mathbb{R}^+$.
|
||||
|
||||
From these two bounds we have that for some constant $\delta > 0$:
|
||||
|
||||
\[ \cos(\theta_k) = \frac{s_k^T B_k s_k}{\norm{s_k} \norm{B_k s_k}} \geq \delta \]
|
||||
Since with exact line search the Armijo condition $f(x_k + \alpha_k p_k) \leq f(x_k) + m_1 \alpha_k \nabla f(x_k)$ is always satisfied if the constant $m_1$ does not exclude the minimum $x_*$ and since the strong Wolfe condition $\norm{\nabla f(x_k + \alpha_k p_k)} \leq m_3 \norm{\nabla f(x_k)}$ is also always satisfied since $\norm{\nabla f(x_k + \alpha_k p_k)} = O(u)$, follows from the two conditions and Assumptions~\ref{algo: convergence1} and~\ref{algo: convergence2} that:
|
||||
\begin{align*}
|
||||
& f(x_{k+1}) - f(x_*) \leq (1 - c \cos^2(\theta_k) (f(x_k) - f(x_*))) \\
|
||||
\implies& f(x_k) - f(x_*) \leq {(1 - c \cdot \delta^2)}^k (f(x_0) - f(x_*)) \\
|
||||
\implies& f(x_k) - f(x_*) \leq r^k (f(x_0) - f(x_*))
|
||||
\end{align*}
|
||||
for some $r \in [0, 1)$. Using Assumption~\ref{algo: convergence3}:
|
||||
\begin{gather*}
|
||||
\frac{1}{2} M_1 \norm{x_k - x_*}^2 \leq f(x_k) - f(x_*) \\
|
||||
\implies \norm{x_k - x_*} \leq r^{k/2} {\left( 2 \frac{f(x_0) - f(x_*)}{M_1} \right)}^{(1/2)}
|
||||
\end{gather*}
|
||||
so the sequence $\{x_k\}$ is linearly convergent.
|
||||
\end{mproof}
|
||||
|
||||
The implementation of L-BFGS that uses Armijo-Wolfe line search also satisfies the assumptions so it also converges linearly to $x_*$.
|
||||
|
||||
%%% Local Variables:
|
||||
%%% mode: latex
|
||||
%%% TeX-master: "../main"
|
||||
%%% TeX-command-extra-options: "-shell-escape"
|
||||
%%% End:
|
||||
Reference in New Issue
Block a user