lesson 9/11

This commit is contained in:
elvis
2023-11-17 12:42:12 +01:00
parent 9e7f427faa
commit 7afb1372f8
23 changed files with 7604 additions and 0 deletions

387
11-09/NWTN.jl Normal file
View File

@ -0,0 +1,387 @@
using LinearAlgebra, Printf, Plots
function NWTN(f;
x::Union{Nothing, Vector}=nothing,
eps::Real=1e-6,
MaxFeval::Integer=1000,
m1::Real=1e-4,
m2::Real=0.9,
delta::Real=1e-6,
tau::Real=0.9,
sfgrd::Real=0.2,
MInf::Real=-Inf,
mina::Real=1e-16,
plt::Union{Plots.Plot, Nothing}=nothing,
plotatend::Bool=true,
Plotf::Integer=0,
printing::Bool=true)::Tuple{AbstractArray, String}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# inner functions - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
function f2phi(alpha, derivate=false)
# computes and returns the value of the tomography at alpha
#
# phi( alpha ) = f( x + alpha * d )
#
# if Plotf > 2 saves the data in gap() for plotting
#
# if the second output parameter is required, put there the derivative
# of the tomography in alpha
#
# phi'( alpha ) = < \nabla f( x + alpha * d ) , d >
#
# saves the point in lastx, the gradient in lastg, the Hessian in lasth,
# and increases feval
lastx = x + alpha * d
phi, lastg, lastH = f(lastx)
if Plotf > 2
if fStar > - Inf
push!(gap, (phi - fStar) / max(abs(fStar), 1))
else
push!(gap, phi)
end
end
feval += 1
if derivate
return (phi, dot(d, lastg))
end
return (phi, nothing)
end
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
function ArmijoWolfeLS(phi0, phip0, as, m1, m2, tau)
# performs an Armijo-Wolfe Line Search.
#
# phi0 = phi( 0 ), phip0 = phi'( 0 ) < 0
#
# as > 0 is the first value to be tested: if phi'( as ) < 0 then as is
# divided by tau < 1 (hence it is increased) until this does not happen
# any longer
#
# m1 and m2 are the standard Armijo-Wolfe parameters; note that the strong
# Wolfe condition is used
#
# returns the optimal step and the optimal f-value
lsiter = 1 # count iterations of first phase
local phips, phia
while feval MaxFeval
phia, phips = f2phi(as, true)
if (phia phi0 + m1 * as * phip0) && (abs(phips) - m2 * phip0)
if printing
@printf(" %2d", lsiter)
end
a = as;
return (a, phia) # Armijo + strong Wolfe satisfied, we are done
end
if phips 0
break
end
as = as / tau
lsiter += 1
end
if printing
@printf(" %2d ", lsiter)
end
lsiter = 1 # count iterations of second phase
am = 0
a = as
phipm = phip0
while (feval MaxFeval ) && ((as - am)) > mina && (phips > 1e-12)
# compute the new value by safeguarded quadratic interpolation
a = (am * phips - as * phipm) / (phips - phipm)
a = max(am + ( as - am ) * sfgrd, min(as - ( as - am ) * sfgrd, a))
# compute phi(a)
phia, phip = f2phi(a, true)
if (phia phi0 + m1 * a * phip0) && (abs(phip) -m2 * phip0)
break # Armijo + strong Wolfe satisfied, we are done
end
# restrict the interval based on sign of the derivative in a
if phip < 0
am = a
phipm = phip
else
as = a
if as mina
break
end
phips = phip
end
lsiter += 1
end
if printing
@printf("%2d", lsiter)
end
return (a, phia)
end
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
function BacktrackingLS(phi0, phip0, as, m1, tau)
# performs a Backtracking Line Search.
#
# phi0 = phi( 0 ), phip0 = phi'( 0 ) < 0
#
# as > 0 is the first value to be tested, which is decreased by
# multiplying it by tau < 1 until the Armijo condition with parameter
# m1 is satisfied
#
# returns the optimal step and the optimal f-value
lsiter = 1 # count ls iterations
while feval MaxFeval && as > mina
phia, _ = f2phi(as)
if phia phi0 + m1 * as * phip0 # Armijo satisfied
break # we are done
end
as *= tau
lsiter += 1
end
if printing
@printf(" %2d", lsiter)
end
return (as, phia)
end
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#Plotf = 2
# 0 = nothing is plotted
# 1 = the level sets of f and the trajectory are plotted (when n = 2)
# 2 = the function value / gap are plotted, iteration-wise
# 3 = the function value / gap are plotted, function-evaluation-wise
Interactive = false
PXY = Matrix{Real}(undef, 2, 0)
local gap
if Plotf > 1
if Plotf == 2
MaxIter = 50 # expected number of iterations for the gap plot
else
MaxIter = 70 # expected number of iterations for the gap plot
end
gap = []
end
if Plotf == 2 && plt == nothing
plt = plot(xlims=(0, MaxIter), ylims=(1e-15, 1e+1))
end
if Plotf > 1 && plt == nothing
plt = plot(xlims=(0, MaxIter))
end
if plt == nothing
plt = plot()
end
local fStar
if isnothing(x)
(fStar, x, _) = f(nothing)
else
(fStar, _, _) = f(nothing)
end
n = size(x, 1)
if m1 0 || m1 1
throw(ArgumentError("m1 is not in (0 ,1)"))
end
AWLS = (m2 > 0 && m2 < 1)
if delta < 0
throw(ArgumentError("delta must be ≥ 0"))
end
if tau 0 || tau 1
throw(ArgumentError("tau is not in (0 ,1)"))
end
if sfgrd 0 || sfgrd 1
throw(ArgumentError("sfgrd is not in (0, 1)"))
end
if mina < 0
throw(ArgumentError("mina must be ≥ 0"))
end
# "global" variables- - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
lastx = zeros(n) # last point visited in the line search
lastg = zeros(n) # gradient of lastx
lastH = zeros(n, n) # Hessian of lastx
d = zeros(n) # Newton's direction
feval = 0 # f() evaluations count ("common" with LSs)
status = "error"
# initializations - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if fStar > -Inf
prevv = Inf
end
if printing
@printf("Newton's method\n")
if fStar > -Inf
@printf("feval\trel gap\t\t|| g(x) ||\trate\t\tdelta")
else
@printf("feval\tf(x)\t\t\t|| g(x) ||\tdelta")
end
@printf("\t\tls it\ta*")
@printf("\n\n")
end
v, _ = f2phi(0)
ng = norm(lastg)
if eps < 0
ng0 = -ng # norm of first subgradient: why is there a "-"? ;-)
else
ng0 = 1 # un-scaled stopping criterion
end
# main loop - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
while true
# output statistics - - - - - - - - - - - - - - - - - - - - - - - - - -
if fStar > -Inf
gapk = ( v - fStar ) / max(abs( fStar ), 1)
if printing
@printf("%4d\t%1.4e\t%1.4e", feval, gapk, ng)
if prevv < Inf
@printf("\t%1.4e", ( v - fStar ) / ( prevv - fStar ))
else
@printf("\t\t")
end
end
prevv = v
if Plotf > 1
if Plotf 2
push!(gap, gapk)
end
end
else
if printing
@printf("%4d\t%1.8e\t\t%1.4e", feval, v, ng)
end
if Plotf > 1
if Plotf 2
push!(gap, v)
end
end
end
# stopping criteria - - - - - - - - - - - - - - - - - - - - - - - - - -
if ng eps * ng0
status = "optimal"
break
end
if feval > MaxFeval
status = "stopped"
break
end
# compute Newton's direction- - - - - - - - - - - - - - - - - - - - - -
lambdan = eigmin(lastH) # smallest eigenvalue
if lambdan < delta
if printing
@printf("\t%1.4e", delta - lambdan)
end
lastH = lastH + (delta - lambdan) * I
else
if printing
@printf("\t0.00e+00")
end
end
d = -lastH \ lastg
phip0 = lastg' * d
# compute step size - - - - - - - - - - - - - - - - - - - - - - - - - -
# in Newton's method, the default initial stepsize is 1
if AWLS
a, v = ArmijoWolfeLS(v, phip0, 1, m1, m2, tau)
else
a, v = BacktrackingLS(v, phip0, 1, m1, tau)
end
# output statistics - - - - - - - - - - - - - - - - - - - - - - - - - -
if printing
@printf("\t%1.2e", a)
@printf("\n")
end
if a mina
status = "error"
break
end
if v MInf
status = "unbounded"
break
end
# compute new point - - - - - - - - - - - - - - - - - - - - - - - - - -
# possibly plot the trajectory
if n == 2 && Plotf == 1
PXY = hcat(PXY, hcat(x, lastx))
end
x = lastx
ng = norm(lastg)
# iterate - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if Interactive
readline()
end
end
if plotatend
if Plotf 2
plot!(plt, gap)
elseif Plotf == 1 && n == 2
plot!(plt, PXY[1, :], PXY[2, :])
end
display(plt)
end
# end of main loop- - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
return (x, status)
end

553
11-09/NWTN.m Normal file
View File

@ -0,0 +1,553 @@
function [ x , status ] = NWTN( f , varargin )
%function [ x , status ] = NWTN( f , x , eps , MaxFeval , m1 , m2 , delta ,
% tau , sfgrd , MInf , mina )
%
% Apply a classical Newton's method for the minimization of the provided
% function f, which must have the following interface:
%
% [ v , g , H ] = f( x )
%
% Input:
%
% - x is either a [ n x 1 ] real (column) vector denoting the input of
% f(), or [] (empty).
%
% Output:
%
% - v (real, scalar): if x == [] this is the best known lower bound on
% the unconstrained global optimum of f(); it can be -Inf if either f()
% is not bounded below, or no such information is available. If x ~= []
% then v = f(x).
%
% - g (real, [ n x 1 ] real vector): this also depends on x. if x == []
% this is the standard starting point from which the algorithm should
% start, otherwise it is the gradient of f() at x (or a subgradient if
% f() is not differentiable at x, which it should not be if you are
% applying the gradient method to it).
%
% - H (real, [ n x n ] real matrix) must only be specified if x ~= [],
% and it is the Hessian of f() at x. If no such information is
% available, the function throws error.
%
% The other [optional] input parameters are:
%
% - x (either [ n x 1 ] real vector or [], default []): starting point.
% If x == [], the default starting point provided by f() is used.
%
% - eps (real scalar, optional, default value 1e-6): the accuracy in the
% stopping criterion: the algorithm is stopped when the norm of the
% gradient is less than or equal to eps. If a negative value is provided,
% this is used in a *relative* stopping criterion: the algorithm is
% stopped when the norm of the gradient is less than or equal to
% (- eps) * || norm of the first gradient ||.
%
% - MaxFeval (integer scalar, optional, default value 1000): the maximum
% number of function evaluations (hence, iterations will be not more than
% MaxFeval because at each iteration at least a function evaluation is
% performed, possibly more due to the line search).
%
% - m1 (real scalar, optional, default value 1e-4): first parameter of the
% Armijo-Wolfe-type line search (sufficient decrease). Has to be in (0,1)
%
% - m2 (real scalar, optional, default value 0.9): typically the second
% parameter of the Armijo-Wolfe-type line search (strong curvature
% condition). It should to be in (0,1); if not, it is taken to mean that
% the simpler Backtracking line search should be used instead
%
% - delta (real scalar, optional, default value 1e-6): minimum positive
% value for the eigenvalues of the modified Hessian used to compute the
% Newton direction
%
% - tau (real scalar, optional, default value 0.9): scaling parameter for
% the line search. In the Armijo-Wolfe line search it is used in the
% first phase: if the derivative is not positive, then the step is
% divided by tau (which is < 1, hence it is increased). In the
% Backtracking line search, each time the step is multiplied by tau
% (hence it is decreased).
%
% - sfgrd (real scalar, optional, default value 0.2): safeguard parameter
% for the line search. to avoid numerical problems that can occur with
% the quadratic interpolation if the derivative at one endpoint is too
% large w.r.t. the one at the other (which leads to choosing a point
% extremely near to the other endpoint), a *safeguarded* version of
% interpolation is used whereby the new point is chosen in the interval
% [ as * ( 1 + sfgrd ) , am * ( 1 - sfgrd ) ], being [ as , am ] the
% current interval, whatever quadratic interpolation says. If you
% experiemce problems with the line search taking too many iterations to
% converge at "nasty" points, try to increase this
%
% - MInf (real scalar, optional, default value -Inf): if the algorithm
% determines a value for f() <= MInf this is taken as an indication that
% the problem is unbounded below and computation is stopped
% (a "finite -Inf").
%
% - mina (real scalar, optional, default value 1e-16): if the algorithm
% determines a stepsize value <= mina, this is taken as an indication
% that something has gone wrong (the gradient is not a direction of
% descent, so maybe the function is not differentiable) and computation
% is stopped. It is legal to take mina = 0, thereby in fact skipping this
% test.
%
% Output:
%
% - x ([ n x 1 ] real column vector): the best solution found so far.
%
% - status (string): a string describing the status of the algorithm at
% termination
%
% = 'optimal': the algorithm terminated having proven that x is a(n
% approximately) optimal solution, i.e., the norm of the gradient at x
% is less than the required threshold
%
% = 'unbounded': the algorithm has determined an extrenely large negative
% value for f() that is taken as an indication that the problem is
% unbounded below (a "finite -Inf", see MInf above)
%
% = 'stopped': the algorithm terminated having exhausted the maximum
% number of iterations: x is the bast solution found so far, but not
% necessarily the optimal one
%
% = 'error': the algorithm found a numerical error that prevents it from
% continuing optimization (see mina above)
%
%{
=======================================
Author: Antonio Frangioni
Date: 29-10-21
Version 1.21
Copyright Antonio Frangioni
=======================================
%}
Plotf = 2;
% 0 = nothing is plotted
% 1 = the level sets of f and the trajectory are plotted (when n = 2)
% 2 = the function value / gap are plotted, iteration-wise
% 3 = the function value / gap are plotted, function-evaluation-wise
Interactive = false; % if we pause at every iteration
if Plotf > 1
if Plotf == 2
MaxIter = 50; % expected number of iterations for the gap plot
else
MaxIter = 70; % expected number of iterations for the gap plot
end
gap = [];
xlim( [ 0 MaxIter ] );
ax = gca;
ax.FontSize = 16;
ax.Position = [ 0.03 0.07 0.95 0.92 ];
ax.Toolbar.Visible = 'off';
end
% reading and checking input- - - - - - - - - - - - - - - - - - - - - - - -
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if ~ isa( f , 'function_handle' )
error( 'f not a function' );
end
if isempty( varargin ) || isempty( varargin{ 1 } )
[ fStar , x ] = f( [] );
else
x = varargin{ 1 };
if ~ isreal( x )
error( 'x not a real vector' );
end
if size( x , 2 ) ~= 1
error( 'x is not a (column) vector' );
end
fStar = f( [] );
end
n = size( x , 1 );
if length( varargin ) > 1
eps = varargin{ 2 };
if ~ isreal( eps ) || ~ isscalar( eps )
error( 'eps is not a real scalar' );
end
else
eps = 1e-6;
end
if length( varargin ) > 2
MaxFeval = round( varargin{ 3 } );
if ~ isscalar( MaxFeval )
error( 'MaxFeval is not an integer scalar' );
end
else
MaxFeval = 1000;
end
if length( varargin ) > 3
m1 = varargin{ 4 };
if ~ isscalar( m1 )
error( 'm1 is not a real scalar' );
end
if m1 <= 0 || m1 >= 1
error( 'm1 is not in (0 ,1)' );
end
else
m1 = 1e-4;
end
if length( varargin ) > 4
m2 = varargin{ 5 };
if ~ isscalar( m1 )
error( 'm2 is not a real scalar' );
end
else
m2 = 0.9;
end
AWLS = ( m2 > 0 && m2 < 1 );
if length( varargin ) > 5
delta = varargin{ 6 };
if ~ isscalar( delta )
error( 'delta is not a real scalar' );
end
if delta < 0
error( 'delta must be > 0' );
end
else
delta = 1e-6;
end
if length( varargin ) > 6
tau = varargin{ 7 };
if ~ isscalar( tau )
error( 'tau is not a real scalar' );
end
if tau <= 0 || tau >= 1
error( 'tau is not in (0 ,1)' );
end
else
tau = 0.9;
end
if length( varargin ) > 7
sfgrd = varargin{ 8 };
if ~ isscalar( sfgrd )
error( 'sfgrd is not a real scalar' );
end
if sfgrd <= 0 || sfgrd >= 1
error( 'sfgrd is not in (0, 1)' );
end
else
sfgrd = 0.2;
end
if length( varargin ) > 8
MInf = varargin{ 9 };
if ~ isscalar( MInf )
error( 'MInf is not a real scalar' );
end
else
MInf = - Inf;
end
if length( varargin ) > 9
mina = varargin{ 10 };
if ~ isscalar( mina )
error( 'mina is not a real scalar' );
end
if mina < 0
error( 'mina is < 0' );
end
else
mina = 1e-16;
end
% "global" variables- - - - - - - - - - - - - - - - - - - - - - - - - - - -
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
lastx = zeros( n , 1 ); % last point visited in the line search
lastg = zeros( n , 1 ); % gradient of lastx
lastH = zeros( n , n ); % Hessian of lastx
d = zeros( n , 1 ); % Newton's direction
feval = 0; % f() evaluations count ("common" with LSs)
% initializations - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
fprintf( 'Newton''s method\n');
if fStar > - Inf
fprintf( 'feval\trel gap\t\t|| g(x) ||\trate\t\tdelta');
prevv = Inf;
else
fprintf( 'feval\tf(x)\t\t\t|| g(x) ||\tdelta');
end
fprintf( '\t\tls it\ta*');
fprintf( '\n\n' );
v = f2phi( 0 );
ng = norm( lastg );
if eps < 0
ng0 = - ng; % norm of first subgradient: why is there a "-"? ;-)
else
ng0 = 1; % un-scaled stopping criterion
end
% main loop - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
while true
% output statistics - - - - - - - - - - - - - - - - - - - - - - - - - -
if fStar > - Inf
gapk = ( v - fStar ) / max( [ abs( fStar ) 1 ] );
fprintf( '%4d\t%1.4e\t%1.4e' , feval , gapk , ng );
if prevv < Inf
fprintf( '\t%1.4e' , ( v - fStar ) / ( prevv - fStar ) );
else
fprintf( '\t\t' );
end
prevv = v;
if Plotf > 1
if Plotf >= 2
gap( end + 1 ) = gapk;
end
semilogy( gap , 'Color' , 'k' , 'LineWidth' , 2 );
if Plotf == 2
ylim( [ 1e-15 1e+1 ] );
else
ylim( [ 1e-15 1e+4 ] );
end
drawnow;
end
else
fprintf( '%4d\t%1.8e\t\t%1.4e' , feval , v , ng );
if Plotf > 1
if Plotf >= 2
gap( end + 1 ) = v;
end
plot( gap , 'Color' , 'k' , 'LineWidth' , 2 );
drawnow;
end
end
% stopping criteria - - - - - - - - - - - - - - - - - - - - - - - - - -
if ng <= eps * ng0
status = 'optimal';
break;
end
if feval > MaxFeval
status = 'stopped';
break;
end
% compute Newton's direction- - - - - - - - - - - - - - - - - - - - - -
lambdan = eigs( lastH , 1 , 'sa' ); % smallest eigenvalue
if lambdan < delta
fprintf( '\t%1.4e' , delta - lambdan );
lastH = lastH + ( delta - lambdan ) * eye( n );
else
fprintf( '\t0.00e+00' );
end
d = - lastH \ lastg;
phip0 = lastg' * d;
% compute step size - - - - - - - - - - - - - - - - - - - - - - - - - -
% in Newton's method, the default initial stepsize is 1
if AWLS
[ a , v ] = ArmijoWolfeLS( v , phip0 , 1 , m1 , m2 , tau );
else
[ a , v ] = BacktrackingLS( v , phip0 , 1 , m1 , tau );
end
% output statistics - - - - - - - - - - - - - - - - - - - - - - - - - -
fprintf( '\t%1.2e' , a );
fprintf( '\n' );
if a <= mina
status = 'error';
break;
end
if v <= MInf
status = 'unbounded';
break;
end
% compute new point - - - - - - - - - - - - - - - - - - - - - - - - - -
% possibly plot the trajectory
if n == 2 && Plotf == 1
PXY = [ x , lastx ];
line( 'XData' , PXY( 1 , : ) , 'YData' , PXY( 2 , : ) , ...
'LineStyle' , '-' , 'LineWidth' , 2 , 'Marker' , 'o' , ...
'Color' , [ 0 0 0 ] );
end
x = lastx;
ng = norm( lastg );
% iterate - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if Interactive
pause;
end
end
% end of main loop- - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% inner functions - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
function [ phi , varargout ] = f2phi( alpha )
%
% computes and returns the value of the tomography at alpha
%
% phi( alpha ) = f( x + alpha * d )
%
% if Plotf > 2 saves the data in gap() for plotting
%
% if the second output parameter is required, put there the derivative
% of the tomography in alpha
%
% phi'( alpha ) = < \nabla f( x + alpha * d ) , d >
%
% saves the point in lastx, the gradient in lastg, the Hessian in lasth,
% and increases feval
lastx = x + alpha * d;
[ phi , lastg , lastH ] = f( lastx );
if Plotf > 2
if fStar > - Inf
gap( end + 1 ) = ( phi - fStar ) / max( [ abs( fStar ) 1 ] );
else
gap( end + 1 ) = phi;
end
end
if nargout > 1
varargout{ 1 } = d' * lastg;
end
feval = feval + 1;
end
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
function [ a , phia ] = ArmijoWolfeLS( phi0 , phip0 , as , m1 , m2 , tau )
% performs an Armijo-Wolfe Line Search.
%
% phi0 = phi( 0 ), phip0 = phi'( 0 ) < 0
%
% as > 0 is the first value to be tested: if phi'( as ) < 0 then as is
% divided by tau < 1 (hence it is increased) until this does not happen
% any longer
%
% m1 and m2 are the standard Armijo-Wolfe parameters; note that the strong
% Wolfe condition is used
%
% returns the optimal step and the optimal f-value
lsiter = 1; % count iterations of first phase
while feval <= MaxFeval
[ phia , phips ] = f2phi( as );
if ( phia <= phi0 + m1 * as * phip0 ) && ( abs( phips ) <= - m2 * phip0 )
fprintf( ' %2d' , lsiter );
a = as;
return; % Armijo + strong Wolfe satisfied, we are done
end
if phips >= 0
break;
end
as = as / tau;
lsiter = lsiter + 1;
end
fprintf( ' %2d ' , lsiter );
lsiter = 1; % count iterations of second phase
am = 0;
a = as;
phipm = phip0;
while ( feval <= MaxFeval ) && ( ( as - am ) ) > mina && ( phips > 1e-12 )
% compute the new value by safeguarded quadratic interpolation
a = ( am * phips - as * phipm ) / ( phips - phipm );
a = max( [ am + ( as - am ) * sfgrd ...
min( [ as - ( as - am ) * sfgrd a ] ) ] );
% compute phi( a )
[ phia , phip ] = f2phi( a );
if ( phia <= phi0 + m1 * a * phip0 ) && ( abs( phip ) <= - m2 * phip0 )
break; % Armijo + strong Wolfe satisfied, we are done
end
% restrict the interval based on sign of the derivative in a
if phip < 0
am = a;
phipm = phip;
else
as = a;
if as <= mina
break;
end
phips = phip;
end
lsiter = lsiter + 1;
end
fprintf( '%2d' , lsiter );
end
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
function [ as , phia ] = BacktrackingLS( phi0 , phip0 , as , m1 , tau )
% performs a Backtracking Line Search.
%
% phi0 = phi( 0 ), phip0 = phi'( 0 ) < 0
%
% as > 0 is the first value to be tested, which is decreased by
% multiplying it by tau < 1 until the Armijo condition with parameter
% m1 is satisfied
%
% returns the optimal step and the optimal f-value
lsiter = 1; % count ls iterations
while feval <= MaxFeval && as > mina
[ phia , ~ ] = f2phi( as );
if phia <= phi0 + m1 * as * phip0 % Armijo satisfied
break; % we are done
end
as = as * tau;
lsiter = lsiter + 1;
end
fprintf( ' %2d' , lsiter );
end
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
end % the end- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

423
11-09/SDG.jl Normal file
View File

@ -0,0 +1,423 @@
using LinearAlgebra, Printf, Plots
function SDG(f;
x::Union{Nothing, Vector}=nothing,
astart::Real=1,
eps::Real=1e-6,
MaxFeval::Integer=1000,
m1::Real=1e-3,
m2::Real=0.9,
tau::Real=0.9,
sfgrd::Real=0.01,
MInf::Real=-Inf,
mina::Real=1e-16,
plt::Union{Plots.Plot, Nothing}=nothing,
plotatend::Bool=true,
Plotf::Integer=0,
printing::Bool=true)::Tuple{AbstractArray, String}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# local functions - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
function f2phi(alpha, derivate=false)
lastx = x .- alpha .* g
(phi, lastg, _) = f(lastx)
if (Plotf > 2)
if fStar > -Inf
push!(gap, (phi - fStar) / max(abs(fStar), 1))
else
push!(gap, phi)
end
end
feval += 1
if derivate
return phi, dot(-g, lastg)
end
return phi, nothing
end
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
function ArmijoWolfeLS(phi0, phip0, as, m1, m2, tau)
# performs an Armijo-Wolfe Line Search.
#
# Inputs:
#
# - phi0 = phi( 0 )
#
# - phip0 = phi'( 0 ) (< 0)
#
# - as (> 0) is the first value to be tested: if the Armijo condition
#
# phi( as ) <= phi0 + m1 * as * phip0
#
# is satisfied but the Wolfe condition is not, which means that the
# derivative in as is still negative, which means that longer steps
# might be possible), then as is divided by tau < 1 (hence it is
# increased) until this does not happen any longer
#
# - m1 (> 0 and < 1, typically small, like 0.01) is the parameter of
# the Armijo condition
#
# - m2 (> m1 > 0, typically large, like 0.9) is the parameter of the
# Wolfe condition
#
# - tau (> 0 and < 1) is the increasing coefficient for the first phase
# (extrapolation)
#
# Outputs:
#
# - a is the "optimal" step
#
# - phia = phi( a ) (the "optimal" f-value)
lsiter = 1 # count iterations of first phase
local phips, phia
while feval MaxFeval
(phia, phips) = f2phi(as, true) # compute phi( a ) and phi'( a )
if phia > phi0 + m1 * as * phip0 # Armijo not satisfied
break
end
if phips m2 * phip0 # Wolfe satisfied
if printing
@printf("%2d ", lsiter)
end
a = as
return (a, phia) # Armijo + Wolfe satisfied, done
end
if phips 0 # derivative is positive, break
break
end
as = as / tau
lsiter += 1
end
if printing
@printf("%2d ", lsiter)
end
lsiter = 1 # count iterations of second phase
am = 0
a = as
phipm = phip0
while (feval MaxFeval) && ((as - am) > abs(mina)) && (abs(phips) > 1e-12)
if (phipm < 0) && (phips > 0)
# if the derivative in as is positive and that in am is negative,
# then compute the new step by safeguarded quadratic interpolation
a = (am * phips - as * phipm) / (phips - phipm)
a = max(am + ( as - am ) * sfgrd, min(as - ( as - am ) * sfgrd, a))
else
a = (as - am) / 2 # else just use dumb binary search
end
phia, phipa = f2phi(a, true) # compute phi( a ) and phi'( a )
if phia phi0 + m1 * as * phip0 # Armijo satisfied
if phipa m2 * phip0 # Wolfe satisfied
break # Armijo + Wolfe satisfied, done
end
am = a # Armijo is satisfied but Wolfe is not, i.e., the
phipm = phipa # derivative is still negative: move the left
# endpoint of the interval to a
else # Armijo not satisfied
as = a # move the right endpoint of the interval to a
phips = phipa
end
lsiter += 1
end
if printing
@printf("%2d", lsiter)
end
return (a, phia)
end
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
function BacktrackingLS(phi0, phip0, as, m1, tau)
# performs a Backtracking Line Search.
#
# phi0 = phi( 0 ), phip0 = phi'( 0 ) < 0
#
# as > 0 is the first value to be tested, which is decreased by
# multiplying it by tau < 1 until the Armijo condition with parameter
# m1 is satisfied
#
# returns the optimal step and the optimal f-value
local phia
lsiter = 1 # count ls iterations
while feval MaxFeval && as > mina
(phia, _) = f2phi(as)
if phia phi0 + m1 * as * phip0 # Armijo satisfied
break # we are done
end
as = as * tau
lsiter += 1
end
if printing
@printf("\t%2d", lsiter)
end
return (as, phia)
end
# Plotf = 1
# 0 = nothing is plotted
# 1 = the level sets of f and the trajectory are plotted (when n = 2)
# 2 = the function value / gap are plotted, iteration-wise
# 3 = the function value / gap are plotted, function-evaluation-wise
Interactive = false
local gap
PXY = Matrix{Real}(undef, 2, 0)
status = "error"
if Plotf > 1
if Plotf == 2
MaxIter = 200 # expected number of iterations for the gap plot
else
MaxIter = 1000 # expected number of iterations for the gap plot
end
gap = []
end
if x == nothing
(fStar, x, _) = f(nothing)
else
(fStar, _, _) = f(nothing)
end
n = size(x, 1)
if astart == 0
throw(ArgumentError("astart must be ≠ 0"))
end
if m1 0 || m1 1
throw(ArgumentError("m1: ($m1) is not in (0, 1)"))
end
AWLS = (m2 > 0 && m2 < 1)
if tau 0 || tau 1
throw(ArgumentError("tau: ($tau) is not in (0, 1)"))
end
if sfgrd 0 || sfgrd 1
throw(ArgumentError("sfgrd: ($sfgrd) is not in (0, 1)"))
end
if mina < 0
throw(ArgumentError("mina: ($mina) must be ≥ 0"))
end
if Plotf > 1 && plt == nothing
plt = plot(xlims=(0, MaxIter))
elseif plt == nothing
plt = plot()
end
# "global" variables- - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
lastx = zeros(n) # last point visited in the line search
lastg = zeros(n) # gradient of lastx
feval = 1 # f() evaluations count ("common" with LSs)
# initializations - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if printing
println("Gradient method")
end
if fStar > -Inf
if printing
print("feval\trel gap\t\t|| g(x) ||\trate\t")
end
prevv = Inf
else
if printing
print("feval\tf(x)\t\t\t|| g(x) ||")
end
end
if astart > 0
if printing
print("\tls feval\ta*")
end
end
if printing
print("\n\n")
end
# compute first f-value and gradient in x^0 - - - - - - - - - - - - - - - -
g = zeros(2, 1)
v, _ = f2phi(0)
g = lastg
# compute norm of the (first) gradient- - - - - - - - - - - - - - - - - - -
ng = norm(g)
if eps < 0
ng0 = -ng # norm of first subgradient: why is there a "-"? ;-)
else
ng0 = 1 # un-scaled stopping criterion
end
# main loop - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
while true
# output statistics & plot gap/f-values - - - - - - - - - - - - - - - -
if fStar > -Inf
gapk = (v .- fStar) / max(abs(fStar), 1)
if printing
@printf("%4d\t%1.4e\t%1.4e", feval, gapk, ng)
end
if prevv < Inf
if printing
@printf("\t%1.4e", (v .- fStar) / (prevv - fStar))
end
else
if printing
print(" \t ")
end
end
prevv = v
if Plotf > 1
if Plotf 2
push!(gap, gapk)
end
plot!(plt, yscale=:log)
if Plotf == 2
plot!(plt, ylims=(1e-15, 1e+1))
else
plot!(plt, ylims=(1e-15, 1e+4))
end
end
else
if printing
@printf("%4d\t%1.8e\t\t%1.4e", feval, v, ng)
end
if Plotf 2
push!(gap, v)
end
end
# stopping criteria - - - - - - - - - - - - - - - - - - - - - - - - - -
if ng (eps * ng0)
status = "optimal"
if printing
print("\n")
end
break
end
if feval > MaxFeval
status = "stopped"
if printing
print("\n")
end
break
end
# compute step size - - - - - - - - - - - - - - - - - - - - - - - - - -
phip0 = -ng * ng
if astart < 0
# fixed-step approach
lastx = x .+ astart .* g
(v, lastg, _) = f(lastx)
feval = feval + 1
else
# line-search approach, either Armijo-Wolfe or Backtracking
if AWLS
a, v = ArmijoWolfeLS(v, phip0, astart, m1, m2, tau)
else
a, v = BacktrackingLS(v, phip0, astart, m1, tau)
end
end
# output statistics - - - - - - - - - - - - - - - - - - - - - - - - - -
if astart > 0
if printing
@printf("\t%1.4e\n", a)
end
if a mina
status = "error"
if printing
print("\n")
end
break
end
else
if printing
print("\n")
end
end
if v MInf
status = "unbounded"
if printing
print("\n")
end
break
end
# compute new point - - - - - - - - - - - - - - - - - - - - - - - - - -
# possibly plot the trajectory
if n == 2 && Plotf == 1
PXY = hcat(PXY, hcat(x, lastx))
end
x = lastx
# update gradient - - - - - - - - - - - - - - - - - - - - - - - - - - -
g = lastg
ng = norm(g)
# iterate - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if Interactive
readline()
end
end
if plotatend
if Plotf 2
plot!(plt, gap)
elseif Plotf == 1 && n == 2
plot!(plt, PXY[1, :], PXY[2, :])
end
display(plt)
end
# end of main loop- - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
return (x, status)
end

602
11-09/SDG.m Normal file
View File

@ -0,0 +1,602 @@
function [ x , status ] = SDG( f , varargin )
%function [ x , status ] = SDG( f , x , astart , eps , MaxFeval , m1 , ...
% m2 , tau , sfgrd , MInf , mina )
%
% Apply the classical Steepest Descent algorithm for the minimization of
% the provided function f, which must have the following interface:
%
% [ v , g ] = f( x )
%
% Input:
%
% - x is either a [ n x 1 ] real (column) vector denoting the input of
% f(), or [] (empty).
%
% Output:
%
% - v (real, scalar): if x == [] this is the best known lower bound on
% the unconstrained global optimum of f(); it can be -Inf if either f()
% is not bounded below, or no such information is available. If x ~= []
% then v = f( x ).
%
% - g (real, [ n x 1 ] real vector): this also depends on x. if x == []
% this is the standard starting point from which the algorithm should
% start, otherwise it is the gradient of f() at x (or a subgradient if
% f() is not differentiable at x, which it should not be if you are
% applying the gradient method to it).
%
% The other [optional] input parameters are:
%
% - x (either [ n x 1 ] real vector or [], default []): starting point.
% If x == [], the default starting point provided by f() is used.
%
% - astart (real scalar, optional, default value 1): if it is > 0, then it
% is used as the starting value of alpha in the line search, be it the
% Armijo-Wolfe or the Backtracking one. Otherwise, it is taken to mean
% that no line search is to be performed, i.e., a fixed step approach
% has to be used with step = - astart (hence, astart == 0 is an invald
% setting in either case).
%
% - eps (real scalar, optional, default value 1e-6): the accuracy in the
% stopping criterion: the algorithm is stopped when the norm of the
% gradient is less than or equal to eps. If a negative value is provided,
% this is used in a *relative* stopping criterion: the algorithm is
% stopped when the norm of the gradient is less than or equal to
% (- eps) * || norm of the first gradient ||.
%
% - MaxFeval (integer scalar, optional, default value 1000): the maximum
% number of function evaluations (hence, iterations will be not more than
% MaxFeval because at each iteration at least a function evaluation is
% performed, possibly more due to the line search).
%
% - m1 (real scalar, optional, must be in ( 0 , 1 ), default value 1e-3):
% parameter of the Armijo condition (sufficient decrease) in the line
% search
%
% - m2 (real scalar, optional, default value 0.9): typically the parameter
% of the Wolfe condition (sufficient derivative increase) in the line
% search. It should to be in ( 0 , 1 ); if not, it is taken to mean that
% the simpler Backtracking line search should be used instead
%
% - tau (real scalar, optional, default value 0.9): scaling parameter for
% the line search. In the Armijo-Wolfe line search it is used in the
% first phase to identify a point where the Armijo condition is not
% satisfied or the derivative is positive by divding the current
% value (starting with astart, see above) by tau (which is < 1, hence it
% is increased). In the Backtracking line search, each time the step is
% multiplied by tau (hence it is decreased).
%
% - sfgrd (real scalar, optional, default value 0.01): safeguard parameter
% for the line search. to avoid numerical problems that can occur with
% the quadratic interpolation if the derivative at one endpoint is too
% large w.r.t. the one at the other (which leads to choosing a point
% extremely near to the other endpoint), a *safeguarded* version of
% interpolation is used whereby the new point is chosen in the interval
% [ as * ( 1 + sfgrd ) , am * ( 1 - sfgrd ) ], being [ as , am ] the
% current interval, whatever quadratic interpolation says. If you
% experiemce problems with the line search taking too many iterations to
% converge at "nasty" points, try to increase this
%
% - MInf (real scalar, optional, default value -Inf): if the algorithm
% determines a value for f() <= MInf this is taken as an indication that
% the problem is unbounded below and computation is stopped
% (a "finite -Inf").
%
% - mina (real scalar, optional, default value 1e-16): if the algorithm
% determines a stepsize value <= mina, this is taken as an indication
% that something has gone wrong (the gradient is not a direction of
% descent, so maybe the function is not differentiable) and computation
% is stopped. It is legal to take mina = 0, thereby in fact skipping this
% test.
%
% Output:
%
% - x ([ n x 1 ] real column vector): the best solution found so far
%
% - status (string): a string describing the status of the algorithm at
% termination
%
% = 'optimal': the algorithm terminated having proven that x is a(n
% approximately) optimal solution, i.e., the norm of the gradient at x
% is less than the required threshold
%
% = 'unbounded': the algorithm has determined an extrenely large negative
% value for f() that is taken as an indication that the problem is
% unbounded below (a "finite -Inf", see MInf above)
%
% = 'stopped': the algorithm terminated having exhausted the maximum
% number of iterations: x is the bast solution found so far, but not
% necessarily the optimal one
%
% = 'error': the algorithm found a numerical error that prevents it from
% continuing optimization (see mina above)
%
%{
=======================================
Author: Antonio Frangioni
Date: 27-04-23
Version 1.30
Copyright Antonio Frangioni
=======================================
%}
Plotf = 1;
% 0 = nothing is plotted
% 1 = the level sets of f and the trajectory are plotted (when n = 2)
% 2 = the function value / gap are plotted, iteration-wise
% 3 = the function value / gap are plotted, function-evaluation-wise
Interactive = true; % if we pause at every iteration
if Plotf > 1
if Plotf == 2
MaxIter = 200; % expected number of iterations for the gap plot
else
MaxIter = 1000; % expected number of iterations for the gap plot
end
gap = [];
end
% reading and checking input- - - - - - - - - - - - - - - - - - - - - - - -
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if ~ isa( f , 'function_handle' )
error( 'f not a function' );
end
if isempty( varargin ) || isempty( varargin{ 1 } )
[ fStar , x ] = f( [] );
else
x = varargin{ 1 };
if ~ isreal( x )
error( 'x not a real vector' );
end
if size( x , 2 ) ~= 1
error( 'x is not a (column) vector' );
end
fStar = f( [] );
end
n = size( x , 1 );
if length( varargin ) > 1
astart = varargin{ 2 };
if ~ isscalar( astart )
error( 'astart is not a real scalar' );
end
if astart == 0
error( 'astart must be != 0' );
end
else
astart = 1;
end
if length( varargin ) > 2
eps = varargin{ 3 };
if ~ isreal( eps ) || ~ isscalar( eps )
error( 'eps is not a real scalar' );
end
else
eps = 1e-6;
end
if length( varargin ) > 3
MaxFeval = round( varargin{ 4 } );
if ~ isscalar( MaxFeval )
error( 'MaxFeval is not an integer scalar' );
end
else
MaxFeval = 1000;
end
if length( varargin ) > 4
m1 = varargin{ 5 };
if ~ isscalar( m1 )
error( 'm1 is not a real scalar' );
end
if m1 <= 0 || m1 >= 1
error( 'm1 is not in ( 0 , 1 )' );
end
else
m1 = 1e-3;
end
if length( varargin ) > 5
m2 = varargin{ 6 };
if ~ isscalar( m1 )
error( 'm2 is not a real scalar' );
end
else
m2 = 0.9;
end
AWLS = ( m2 > 0 && m2 < 1 );
if length( varargin ) > 6
tau = varargin{ 7 };
if ~ isscalar( tau )
error( 'tau is not a real scalar' );
end
if tau <= 0 || tau >= 1
error( 'tau is not in ( 0 , 1 )' );
end
else
tau = 0.9;
end
if length( varargin ) > 7
sfgrd = varargin{ 8 };
if ~ isscalar( sfgrd )
error( 'sfgrd is not a real scalar' );
end
if sfgrd <= 0 || sfgrd >= 1
error( 'sfgrd is not in ( 0 , 1 )' );
end
else
sfgrd = 0.01;
end
if length( varargin ) > 8
MInf = varargin{ 9 };
if ~ isscalar( MInf )
error( 'MInf is not a real scalar' );
end
else
MInf = - Inf;
end
if length( varargin ) > 9
mina = varargin{ 10 };
if ~ isscalar( mina )
error( 'mina is not a real scalar' );
end
if mina < 0
error( 'mina is < 0' );
end
else
mina = 1e-16;
end
if Plotf > 1
xlim( [ 0 MaxIter ] );
ax = gca;
ax.FontSize = 16;
ax.Position = [ 0.03 0.07 0.95 0.92 ];
ax.Toolbar.Visible = 'off';
end
% "global" variables- - - - - - - - - - - - - - - - - - - - - - - - - - - -
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
lastx = zeros( n , 1 ); % last point visited in the line search
lastg = zeros( n , 1 ); % gradient of lastx
feval = 1; % f() evaluations count ("common" with LSs)
% initializations - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
fprintf( 'Gradient method\n');
if fStar > - Inf
fprintf( 'feval\trel gap\t\t|| g(x) ||\trate\t');
prevv = Inf;
else
fprintf( 'feval\tf(x)\t\t\t|| g(x) ||');
end
if astart > 0
fprintf( '\tls feval\ta*' );
end
fprintf( '\n\n' );
% compute first f-value and gradient in x^0 - - - - - - - - - - - - - - - -
g = 0;
v = f2phi( 0 );
g = lastg;
% compute norm of the (first) gradient- - - - - - - - - - - - - - - - - - -
ng = norm( g );
if eps < 0
ng0 = - ng; % norm of first subgradient: why is there a "-"? ;-)
else
ng0 = 1; % un-scaled stopping criterion
end
% main loop - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
while true
% output statistics & plot gap/f-values - - - - - - - - - - - - - - - -
if fStar > - Inf
gapk = ( v - fStar ) / max( [ abs( fStar ) 1 ] );
fprintf( '%4d\t%1.4e\t%1.4e' , feval , gapk , ng );
if prevv < Inf
fprintf( '\t%1.4e' , ( v - fStar ) / ( prevv - fStar ) );
else
fprintf( ' \t ' );
end
prevv = v;
if Plotf > 1
if Plotf >= 2
gap( end + 1 ) = gapk;
end
semilogy( gap , 'Color' , 'k' , 'LineWidth' , 2 );
if Plotf == 2
ylim( [ 1e-15 1e+1 ] );
else
ylim( [ 1e-15 1e+4 ] );
end
%drawnow;
end
else
fprintf( '%4d\t%1.8e\t\t%1.4e' , feval , v , ng );
if Plotf > 1
if Plotf >= 2
gap( end + 1 ) = v;
end
plot( gap , 'Color' , 'k' , 'LineWidth' , 2 );
%drawnow;
end
end
% stopping criteria - - - - - - - - - - - - - - - - - - - - - - - - - -
if ng <= eps * ng0
status = 'optimal';
fprintf( '\n' );
break;
end
if feval > MaxFeval
status = 'stopped';
fprintf( '\n' );
break;
end
% compute step size - - - - - - - - - - - - - - - - - - - - - - - - - -
phip0 = - ng * ng;
if astart < 0
% fixed-step approach
lastx = x + astart * g;
[ v , lastg ] = f( lastx );
feval = feval + 1;
else
% line-search approach, either Armijo-Wolfe or Backtracking
if AWLS
[ a , v ] = ArmijoWolfeLS( v , phip0 , astart , m1 , m2 , tau );
else
[ a , v ] = BacktrackingLS( v , phip0 , astart , m1 , tau );
end
end
% output statistics - - - - - - - - - - - - - - - - - - - - - - - - - -
if astart > 0
fprintf( '\t%1.4e\n' , a );
if a <= mina
status = 'error';
fprintf( '\n' );
break;
end
else
fprintf( '\n' );
end
if v <= MInf
status = 'unbounded';
fprintf( '\n' );
break;
end
% compute new point - - - - - - - - - - - - - - - - - - - - - - - - - -
% possibly plot the trajectory
if n == 2 && Plotf == 1
PXY = [ x , lastx ];
line( 'XData' , PXY( 1 , : ) , 'YData' , PXY( 2 , : ) , ...
'LineStyle' , '-' , 'LineWidth' , 2 , 'Marker' , 'o' , ...
'Color' , [ 0 0 0 ] );
end
x = lastx;
% update gradient - - - - - - - - - - - - - - - - - - - - - - - - - - -
g = lastg;
ng = norm( g );
% iterate - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if Interactive
pause;
end
end
% end of main loop- - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% inner functions - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
function [ phi , varargout ] = f2phi( alpha )
%
% computes and returns the value of the tomography at alpha
%
% phi( alpha ) = f( x - alpha * g )
%
% if Plotf > 2 saves the data in gap() for plotting
%
% if the second output parameter is required, put there the derivative
% of the tomography in alpha
%
% phi'( alpha ) = < \nabla f( x - alpha * g ) , - g >
%
% saves the point in lastx, the gradient in lastg and increases feval
lastx = x - alpha * g;
[ phi , lastg ] = f( lastx );
if Plotf > 2
if fStar > - Inf
gap( end + 1 ) = ( phi - fStar ) / max( [ abs( fStar ) 1 ] );
else
gap( end + 1 ) = phi;
end
end
if nargout > 1
varargout{ 1 } = - g' * lastg;
end
feval = feval + 1;
end
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
function [ a , phia ] = ArmijoWolfeLS( phi0 , phip0 , as , m1 , m2 , tau )
% performs an Armijo-Wolfe Line Search.
%
% Inputs:
%
% - phi0 = phi( 0 )
%
% - phip0 = phi'( 0 ) (< 0)
%
% - as (> 0) is the first value to be tested: if the Armijo condition
%
% phi( as ) <= phi0 + m1 * as * phip0
%
% is satisfied but the Wolfe condition is not, which means that the
% derivative in as is still negative, which means that longer steps
% might be possible), then as is divided by tau < 1 (hence it is
% increased) until this does not happen any longer
%
% - m1 (> 0 and < 1, typically small, like 0.01) is the parameter of
% the Armijo condition
%
% - m2 (> m1 > 0, typically large, like 0.9) is the parameter of the
% Wolfe condition
%
% - tau (> 0 and < 1) is the increasing coefficient for the first phase
% (extrapolation)
%
% Outputs:
%
% - a is the "optimal" step
%
% - phia = phi( a ) (the "optimal" f-value)
lsiter = 1; % count iterations of first phase
while feval <= MaxFeval
[ phia , phips ] = f2phi( as ); % compute phi( a ) and phi'( a )
if phia > phi0 + m1 * as * phip0 % Armijo not satisfied
break;
end
if phips >= m2 * phip0 % Wolfe satisfied
fprintf( ' %2d ' , lsiter );
a = as;
return; % Armijo + Wolfe satisfied, done
end
if phips >= 0 % derivative is positive, break
break;
end
as = as / tau;
lsiter = lsiter + 1;
end
fprintf( ' %2d ' , lsiter );
lsiter = 1; % count iterations of second phase
am = 0;
a = as;
phipm = phip0;
while ( feval <= MaxFeval ) && ( ( as - am ) ) > abs( mina ) && ...
( abs( phips ) > 1e-12 )
if ( phipm < 0 ) && ( phips > 0 )
% if the derivative in as is positive and that in am is negative,
% then compute the new step by safeguarded quadratic interpolation
a = ( am * phips - as * phipm ) / ( phips - phipm );
a = max( [ am + ( as - am ) * sfgrd ...
min( [ as - ( as - am ) * sfgrd a ] ) ] );
else
a = ( as - am ) / 2; % else just use dumb binary search
end
[ phia , phipa ] = f2phi( a ); % compute phi( a ) and phi'( a )
if phia <= phi0 + m1 * as * phip0 % Armijo satisfied
if phipa >= m2 * phip0 % Wolfe satisfied
break; % Armijo + Wolfe satisfied, done
end
am = a; % Armijo is satisfied but Wolfe is not, i.e., the
phipm = phipa; % derivative is still negative: move the left
% endpoint of the interval to a
else % Armijo not satisfied
as = a; % move the right endpoint of the interval to a
phips = phipa;
end
lsiter = lsiter + 1;
end
fprintf( '%2d' , lsiter );
end
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
function [ as , phia ] = BacktrackingLS( phi0 , phip0 , as , m1 , tau )
% performs a Backtracking Line Search.
%
% phi0 = phi( 0 ), phip0 = phi'( 0 ) < 0
%
% as > 0 is the first value to be tested, which is decreased by
% multiplying it by tau < 1 until the Armijo condition with parameter
% m1 is satisfied
%
% returns the optimal step and the optimal f-value
lsiter = 1; % count ls iterations
while feval <= MaxFeval && as > mina
[ phia , ~ ] = f2phi( as );
if phia <= phi0 + m1 * as * phip0 % Armijo satisfied
break; % we are done
end
as = as * tau;
lsiter = lsiter + 1;
end
fprintf( '\t%2d' , lsiter );
end
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
end % the end- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

View File

@ -0,0 +1,415 @@
function TF = TestFunctions()
%function TF = TestFunctions()
%
% Produces a cell array of function handlers, useful to test unconstrained
% optimization algorithms.
%
% Each function in the array has the following interface:
%
% [ v , varargout ] = f( x )
%
% Input:
%
% - x is either a [ n x 1 ] real (column) vector denoting the input of
% f(), or [] (empty).
%
% Output:
%
% - v (real, scalar): if x == [] this is the best known lower bound on
% the unconstrained global optimum of f(); it can be -Inf if either f()
% is not bounded below, or no such information is available. If x ~= []
% then v = f(x).
%
% - g (real, [ n x 1 ] real vector) is the first optional argument. This
% also depends on x. if x == [] this is the standard starting point of an
% optimization algorithm, otherwise it is the gradient of f() at x, or a
% subgradient if f() is not differentiable at x.
%
% - H (real, [ n x n ] real matrix) is the first optional argument. This
% must only be specified if x ~= [], and it is the Hessian of f() at x.
% If no such information is available, the function throws error.
%
% The current list of functions is the following:
%
% 1 Standard 2x2 PSD quadratic function with nicely conditioned Hessian.
%
% 2 Standard 2x2 PSD quadratic function with less nicely conditioned
% Hessian.
%
% 3 Standard 2x2 PSD quadratic function with Hessian having one zero
% eigenvalue.
%
% 4 Standard 2x2 quadratic function with indefinite Hessian (one positive
% and one negative eigenvalue)
%
% 5 Standard 2x2 quadratic function with "very elongated" Hessian (a
% very small positive minimum eigenvalue, the other much larger)
%
% 6 the 2-dim Rosenbrock function
%
% 7 the "six-hump camel" function
%
% 8 the Ackley function
%
% 9 a 2-dim nondifferentiable function coming from Lasso regularization
%
% 10 a 76-dim (nonconvex, differentiable) function coming from a fitting
% problem with ( X , y ) both [ 288 , 1 ] (i.e., a fitting with only
% one feature) using a "rough" NN with 1 input, 1 output, 3 hidden
% layers of 5 nodes each, and tanh activation function
%
% 11 same as 10 plus a 1e-4 || x ||^2 / 2 ridge stabilising term
%
%{
=======================================
Author: Antonio Frangioni
Date: 08-11-18
Version 1.01
Copyright Antonio Frangioni
=======================================
%}
TF = cell( 10 , 1 );
TF{ 1 } = @(x) genericquad( [ 6 -2 ; -2 6 ] , [ 10 ; 5 ] , x );
% eigenvalues: 4, 8
TF{ 2 } = @(x) genericquad( [ 5 -3 ; -3 5 ] , [ 10 ; 5 ] , x );
% eigenvalues: 2, 8
TF{ 3 } = @(x) genericquad( [ 4 -4 ; -4 4 ] , [ 10 ; 5 ] , x );
% eigenvalues: 0, 8
TF{ 4 } = @(x) genericquad( [ 3 -5 ; -5 3 ] , [ 10 ; 5 ] , x );
% eigenvalues: -2, 8
TF{ 5 } = @(x) genericquad( [ 101 -99 ; -99 101 ] , [ 10 ; 5 ] , x );
% eigenvalues: 2, 200
% HBG: alpha = 0.0165 , beta = 0.678
TF{ 6 } = @rosenbrock;
TF{ 7 } = @sixhumpcamel;
TF{ 8 } = @ackley;
TF{ 9 } = @lasso;
TF{ 10 } = @myNN;
TF{ 11 } = @myNN2;
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
function [ v , varargout ] = genericquad( Q , q , x )
% generic quadratic function f(x) = x' * Q * x / 2 + q' * x
if isempty( x ) % informative call
if min( eig( Q ) ) > 1e-14
xStar = Q \ -q;
v = 0.5 * xStar' * Q * xStar + q' * xStar;
else
v = - Inf;
end
if nargout > 1
varargout{ 1 } = [ 0 ; 0 ];
end
else
if ~ isequal( size( x ) , [ 2 1 ] )
error( 'genericquad: x is of wrong size' );
end
v = 0.5 * x' * Q * x + q' * x; % f(x)
if nargout > 1
varargout{ 1 } = Q * x + q; % \nabla f(x)
if nargout > 2
varargout{ 2 } = Q; % \nabla^2 f(x)
end
end
end
end % genericquad
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
function [ v , varargout ] = rosenbrock( x )
% rosenbrock's valley-shaped function
% syms x y
% f = @(x, y) 100 * ( y - x^2 )^2 + ( x - 1 )^2
%
% diff( f , x )
% 2 * x - 400 * x * ( - x^2 + y ) - 2
%
% diff( f , y )
% - 200 * x^2 + 200 * y
%
% diff( f , x , 2 )
% 1200 * x^2 - 400 * y + 2
%
% diff( f , y , 2 )
% 200
%
% diff( f , x , y )
% -400 * x
if isempty( x ) % informative call
v = 0;
if nargout > 1
varargout{ 1 } = [ -1 ; 1 ];
end
else
v = 100 * ( x( 2 ) - x( 1 )^2 )^2 + ( x( 1 ) - 1 )^2; % f(x)
if nargout > 1
g = zeros( 2 , 1 );
g( 1 ) = 2 * x( 1 ) - 400* x( 1 ) * ( x( 2 ) - x( 1 )^2 ) - 2;
g( 2 ) = - 200 * x( 1 )^2 + 200 * x( 2 );
varargout{ 1 } = g; % \nabla f(x)
if nargout > 2
H = zeros( 2 , 2 );
H( 1 , 1 ) = 1200 * x( 1 )^2 - 400 * x( 2 ) + 2;
H( 2 , 2 ) = 200;
H( 2 , 1 ) = -400 * x( 1 );
H( 1 , 2 ) = H( 2 , 1 );
varargout{ 2 } = H; % \nabla^2 f(x)
end
end
end
end % rosenbrock
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
function [ v , varargout ] = sixhumpcamel( x )
% six-hump-camel valley-shaped function
% syms x y
% f = @(x, y) ( 4 - 2.1 * x^2 + x^4 / 3 ) * x^2 + x * y + 4 * ( y^2 - 1 ) *
% y^2
%
% diff( f , x )
% 2 * x^5 - ( 42 * x^3 ) / 5 + 8 * x + y
%
% diff( f , y )
% 16 * y^3 - 8 * y + x
%
% diff( f , x , 2 )
% 10 * x^4 - ( 126 * x^2 ) / 5 + 8
%
% diff( f , y , 2 )
% 48 * y^2 - 8
%
% diff( f , x , y )
% 1
if isempty( x ) % informative call
v = -1.03162845349;
if nargout > 1
varargout{ 1 } = [ 0 ; 0 ];
end
else
v = ( 4 - 2.1 * x( 1 )^2 + x( 1 )^4 / 3 ) * x( 1 )^2 + ...
x( 1 ) * x( 2 ) + 4 * ( x( 2 )^2 - 1 ) * x( 2 )^2; % f(x)
if nargout > 1
g = zeros( 2 , 1 );
g( 1 ) = 2 * x( 1 )^5 - ( 42 * x( 1 )^3 ) / 5 + 8 * x( 1 ) + x( 2 );
g( 2 ) = 16 * x( 2 )^3 - 8 * x( 2 ) + x( 1 );
varargout{ 1 } = g; % \nabla f(x)
if nargout > 2
H = zeros( 2 , 2 );
H( 1 , 1 ) = 10 * x( 1 )^4 - ( 126 * x( 1 )^2 ) / 5 + 8;
H( 2 , 2 ) = 48 * x( 2 )^2 - 8;
H( 2 , 1 ) = 1;
H( 1 , 2 ) = H( 2 , 1 );
varargout{ 2 } = H; % \nabla^2 f(x)
end
end
end
end % sixhumpcamel
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
function [ v , varargout ] = ackley( xx )
% syms x y
% f = @(x, y) - 20 * exp( - 0.2 * sqrt( ( x^2 + y^2 ) / 2 ) ) ...
% - exp( ( cos( 2 * pi * x ) + cos( 2 * pi * y ) ) / 2 ) ...
% + 20 + exp(1)
%
ManuallyComputedfGH = 0;
if isempty( xx ) % informative call
v = 0;
if nargout > 1
varargout{ 1 } = [ 2 ; 2 ];
end
else
if ~ isequal( size( xx ) , [ 2 1 ] )
error( 'ackley: x is of wrong size' );
end
if ManuallyComputedfGH
% diff( f , x )
% pi*exp(cos(2*pi*x)/2 + cos(2*pi*y)/2)*sin(2*pi*x) +
% (2*x*exp(-(x^2/2 + y^2/2)^(1/2)/5))/(x^2/2 + y^2/2)^(1/2)
%
% diff( f , y )
% pi*exp(cos(2*pi*x)/2 + cos(2*pi*y)/2)*sin(2*pi*y) +
% (2*y*exp(-(x^2/2 + y^2/2)^(1/2)/5))/(x^2/2 + y^2/2)^(1/2)
%
% diff( f , x , 2 )
%
% (2*exp(-(x^2/2 + y^2/2)^(1/2)/5))/(x^2/2 + y^2/2)^(1/2) +
% 2*pi^2*exp(cos(2*pi*x)/2 + cos(2*pi*y)/2)*cos(2*pi*x) -
% (x^2*exp(-(x^2/2 + y^2/2)^(1/2)/5))/(5*(x^2/2 + y^2/2)) -
% (x^2*exp(-(x^2/2 + y^2/2)^(1/2)/5))/(x^2/2 + y^2/2)^(3/2) -
% pi^2*exp(cos(2*pi*x)/2 + cos(2*pi*y)/2)*sin(2*pi*x)^2
%
% diff( f , y , 2 )
% (2*exp(-(x^2/2 + y^2/2)^(1/2)/5))/(x^2/2 + y^2/2)^(1/2) +
% 2*pi^2*exp(cos(2*pi*x)/2 + cos(2*pi*y)/2)*cos(2*pi*y) -
% (y^2*exp(-(x^2/2 + y^2/2)^(1/2)/5))/(5*(x^2/2 + y^2/2)) -
% (y^2*exp(-(x^2/2 + y^2/2)^(1/2)/5))/(x^2/2 + y^2/2)^(3/2) -
% pi^2*exp(cos(2*pi*x)/2 + cos(2*pi*y)/2)*sin(2*pi*y)^2
%
% diff( f , x , y)
% - (x*y*exp(-(x^2/2 + y^2/2)^(1/2)/5))/(5*(x^2/2 + y^2/2)) -
% (x*y*exp(-(x^2/2 + y^2/2)^(1/2)/5))/(x^2/2 + y^2/2)^(3/2) -
% pi^2*exp(cos(2*pi*x)/2 + cos(2*pi*y)/2)*sin(2*pi*x)*sin(2*pi*y)
x = xx( 1 );
y = xx( 2 );
sqn2 = ( x^2 + y^2 ) / 2;
cosx = cos( 2 * pi * x );
cosy = cos( 2 * pi * y );
comp1 = exp( - (sqn2)^(1/2) / 5 );
comp2 = exp( ( cosx + cosy ) / 2 );
v = - 20 * comp1 - comp2 + 20 + exp( 1 );
if nargout > 1
sinx = sin( 2 * pi * x );
siny = sin( 2 * pi * y );
g = zeros( 2 , 1 );
g( 1 ) = pi * comp2 * sinx + 2 * x * comp1 / (sqn2)^(1/2);
g( 2 ) = pi * comp2 * siny + 2 * y * comp1 / (sqn2)^(1/2);
varargout{ 1 } = g; % \nabla f(x)
if nargout > 2
H = zeros( 2 , 2 );
H( 1 , 1 ) = (2*comp1)/(sqn2)^(1/2) + 2*pi^2*comp2*cosx ...
- (x^2*comp1)/(5*sqn2) - (x^2*comp1)/(sqn2)^(3/2)...
- pi^2*comp2*sinx^2;
H( 2 , 2 ) = (2*comp1)/(sqn2)^(1/2) + 2*pi^2*comp2*cosy ...
- (y^2*comp1)/(5*sqn2) - (y^2*comp1)/(sqn2)^(3/2)...
- pi^2*comp2*siny^2;
H( 1 , 2 ) = - (x*y*comp1)/(5*(sqn2)) ...
- (x*y*comp1)/(sqn2)^(3/2) ...
- pi^2*comp2*sinx*siny;
H( 2 , 1 ) = H( 1 , 2 );
varargout{ 2 } = H; % \nabla^2 f(x)
end
end
else
if nargout > 2
[ H , g , v ] = ackley_Hes( xx );
varargout{ 2 } = H;
varargout{ 1 } = g';
elseif nargout > 1
[ g , v ] = ackley_Grd( xx );
varargout{ 1 } = g';
else
v = - 20 * exp( - ( ( xx( 1 )^2 + xx( 2 )^2 ) / 2 )^(1/2) / 5 )...
- exp( cos( 2 * pi * xx( 1 ) ) / 2 + ...
cos( 2 * pi * xx( 2 ) ) / 2 ) + 20 + exp( 1 );
end
end
end
end % ackley
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
function [ v , varargout ] = lasso( x )
% nondifferentiable lasso example:
%
% f( x , y ) = || 3 * x + 2 * y - 2 ||_2^2 + 10 ( | x | + | y | )
if isempty( x ) % informative call
v = ( 2 - 1/3 )^2 + 10/9; % optimal solution [ 1/9 , 0 ]
if nargout > 1
varargout{ 1 } = [ 0 ; 0 ];
end
else
v = ( 3 * x( 1 ) + 2 * x( 2 ) - 2 )^2 + ...
10 * ( abs( x( 1 ) ) + abs( x( 2 ) ) ); % f(x)
if nargout > 1
g = zeros( 2 , 1 );
g( 1 ) = 18 * x( 1 ) + 12 * x( 2 ) - 12 + 10 * sign( x( 1 ) );
g( 2 ) = 12 * x( 1 ) + 8 * x( 2 ) - 8 + 10 * sign( x( 2 ) );
varargout{ 1 } = g; % \nabla f(x)
if nargout > 2
error( 'lasso: Hessian not available' );
end
end
end
end % lasso
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
function [ v , varargout ] = myNN( x )
% 1 x 5 x 5 x 5 x 1 = 76 w NN for solving a 1D fitting problem
if isempty( x ) % informative call
v = - Inf; % optimal value unknown (although 0 may perhaps be good)
if nargout > 1
% Xavier initialization: uniform random in [ - A , A ] with
% A = \sqrt{6} / \sqrt{n + m}, with n and m the input and output
% layers. in our case n + m is either 6 or 10, so we take A = 1
%
% note that starting point is random, so each run will be different
% (unless an explicit starting point is provided); if stability is
% neeed, the seed of the generator has to be set externally
varargout{ 1 } = 2 * rand( 76 , 1 ) - 1;
end
else
v = testNN( x ); % f(x)
if nargout > 1
varargout{ 1 } = testNN_Jac( x )'; % \nabla f( x )
if nargout > 2
varargout{ 2 } = testNN_Hes( x )'; % \nabla^2 f( x )
end
end
end
end % myNN
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
function [ v , varargout ] = myNN2( x )
% 1 x 5 x 5 x 5 x 1 = 76 w NN for solving a 1D fitting problem
% plus ridge stabilization \lambda || x ||^2 / 2
lambda = 1e+2;
if isempty( x ) % informative call
v = - Inf; % optimal value unknown (although 0 may perhaps be good)
if nargout > 1
% Xavier initialization: uniform random in [ - A , A ] with
% A = \sqrt{6} / \sqrt{n + m}, with n and m the input and output
% layers. in our case n + m is either 6 or 10, so we take A = 1
%
% note that starting point is random, so each run will be different
% (unless an explicit starting point is provided); if stability is
% neeed, the seed of the generator has to be set externally
varargout{ 1 } = 2 * rand( 76 , 1 ) - 1;
end
else
v = testNN( x ) + lambda * x' * x / 2; % f(x)
if nargout > 1
varargout{ 1 } = testNN_Jac( x )' + lambda * x; % \nabla f( x )
if nargout > 2
varargout{ 2 } = testNN_Hes( x )' + lambda * eye( 76 );
% \nabla^2 f( x )
end
end
end
end % myNN2
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
end

View File

@ -0,0 +1,42 @@
function v = roughNN( w , x )
%
% v = roughNN( w , x )
%
% returns the falue of the function v = f( x ) as currently estimated by
% a small NN with 1 input, 1 output, 3 hidden layers of 5 nodes each, and
% tanh activation function.
%
% Input:
%
% - w is the [ 76 x 1 ] real vector containing the weights of the NN,
% i.e., w is made as follows:
% [ 1 .. 5 ] are the [ 5 x 1 ] weigths of the first layer
% [ 6 .. 10 ] are the [ 5 x 1 ] biases of the first layer
% [ 11 .. 35 ] are the [ 5 x 5 ] weigths of the second layer
% [ 36 .. 40 ] are the [ 5 x 1 ] biases of the second layer
% [ 41 .. 65 ] are the [ 5 x 5 ] weigths of the third layer
% [ 66 .. 70 ] are the [ 5 x 1 ] biases of the third layer
% [ 71 .. 75 ] are the [ 5 x 1 ] weigths of the fourth (output) layer
% [ 76 ] is the [ 1 x 1 ] bias of the fourth (output) layer
%
% - x is the real scalar containing the input of f()
%
% Output:
%
% - v (real, scalar): v = f( x ) as estimated by the NN with weights w
%
%{
% =======================================
% Author: Antonio Frangioni
% Date: 28-08-22
% Version 1.00
% Copyright Antonio Frangioni
% =======================================
%}
g = tanh( ( ones( 5 , 1 ) * x ) .* w( 1 : 5 ) + w( 6 : 10 ) );
g = tanh( reshape( w( 11 : 35 ) , [ 5 5 ] ) * g + w( 36 : 40 ) );
g = tanh( reshape( w( 41 : 65 ) , [ 5 5 ] ) * g + w( 66 : 70 ) );
v = g' * w( 71 : 75 ) + w( 76 );
end

View File

@ -0,0 +1,627 @@
function v = testNN( w )
%
% v = testNN( w )
%
% returns the falue of the empirical error of the NN (or, in fact,
% whatever function is encoded in 'roughNN()') with the weights contained
% in w.
%
% The empirical error is estimated over a 288-strong input/output pair
% ( X , y ), with X containing only one feature, that is hard-coded into
% the function so that its gradient can be easily computed by ADiGator.
%
% Input:
%
% - w is the real vector containing the weights of the NN, see roughNN
% for details
%
% Output:
%
% - the MSE of the error done by roughNN() on the given test set
%
%{
% =======================================
% Author: Antonio Frangioni
% Date: 28-08-22
% Version 1.00
% Copyright Antonio Frangioni
% =======================================
%}
N = 288; % size
% inputs
X = [
0.0000000000000000
0.0034843205574913
0.0069686411149826
0.0104529616724739
0.0139372822299652
0.0174216027874564
0.0209059233449477
0.0243902439024390
0.0278745644599303
0.0313588850174216
0.0348432055749129
0.0383275261324042
0.0418118466898955
0.0452961672473868
0.0487804878048781
0.0522648083623693
0.0557491289198606
0.0592334494773519
0.0627177700348432
0.0662020905923345
0.0696864111498258
0.0731707317073171
0.0766550522648084
0.0801393728222996
0.0836236933797909
0.0871080139372822
0.0905923344947735
0.0940766550522648
0.0975609756097561
0.1010452961672474
0.1045296167247387
0.1080139372822300
0.1114982578397213
0.1149825783972125
0.1184668989547038
0.1219512195121951
0.1254355400696864
0.1289198606271777
0.1324041811846690
0.1358885017421603
0.1393728222996516
0.1428571428571428
0.1463414634146341
0.1498257839721254
0.1533101045296167
0.1567944250871080
0.1602787456445993
0.1637630662020906
0.1672473867595819
0.1707317073170732
0.1742160278745645
0.1777003484320558
0.1811846689895470
0.1846689895470383
0.1881533101045296
0.1916376306620209
0.1951219512195122
0.1986062717770035
0.2020905923344948
0.2055749128919861
0.2090592334494774
0.2125435540069686
0.2160278745644599
0.2195121951219512
0.2229965156794425
0.2264808362369338
0.2299651567944251
0.2334494773519164
0.2369337979094077
0.2404181184668990
0.2439024390243902
0.2473867595818815
0.2508710801393728
0.2543554006968641
0.2578397212543554
0.2613240418118467
0.2648083623693380
0.2682926829268293
0.2717770034843205
0.2752613240418119
0.2787456445993031
0.2822299651567944
0.2857142857142857
0.2891986062717770
0.2926829268292683
0.2961672473867596
0.2996515679442509
0.3031358885017422
0.3066202090592334
0.3101045296167247
0.3135888501742160
0.3170731707317073
0.3205574912891986
0.3240418118466899
0.3275261324041812
0.3310104529616725
0.3344947735191638
0.3379790940766551
0.3414634146341464
0.3449477351916376
0.3484320557491289
0.3519163763066202
0.3554006968641115
0.3588850174216028
0.3623693379790941
0.3658536585365854
0.3693379790940767
0.3728222996515679
0.3763066202090593
0.3797909407665505
0.3832752613240418
0.3867595818815331
0.3902439024390244
0.3937282229965157
0.3972125435540070
0.4006968641114982
0.4041811846689896
0.4076655052264808
0.4111498257839721
0.4146341463414634
0.4181184668989547
0.4216027874564460
0.4250871080139373
0.4285714285714285
0.4320557491289199
0.4355400696864111
0.4390243902439024
0.4425087108013937
0.4459930313588850
0.4494773519163763
0.4529616724738676
0.4564459930313589
0.4599303135888502
0.4634146341463415
0.4668989547038327
0.4703832752613241
0.4738675958188153
0.4773519163763066
0.4808362369337979
0.4843205574912892
0.4878048780487805
0.4912891986062718
0.4947735191637631
0.4982578397212544
0.5017421602787456
0.5052264808362370
0.5087108013937283
0.5121951219512195
0.5156794425087108
0.5191637630662020
0.5226480836236933
0.5261324041811847
0.5296167247386759
0.5331010452961673
0.5365853658536586
0.5400696864111498
0.5435540069686411
0.5470383275261324
0.5505226480836236
0.5540069686411150
0.5574912891986064
0.5609756097560976
0.5644599303135889
0.5679442508710801
0.5714285714285714
0.5749128919860627
0.5783972125435540
0.5818815331010453
0.5853658536585367
0.5888501742160279
0.5923344947735192
0.5958188153310104
0.5993031358885017
0.6027874564459930
0.6062717770034843
0.6097560975609756
0.6132404181184670
0.6167247386759582
0.6202090592334495
0.6236933797909407
0.6271777003484320
0.6306620209059233
0.6341463414634146
0.6376306620209059
0.6411149825783973
0.6445993031358885
0.6480836236933798
0.6515679442508711
0.6550522648083623
0.6585365853658536
0.6620209059233449
0.6655052264808362
0.6689895470383276
0.6724738675958188
0.6759581881533101
0.6794425087108014
0.6829268292682926
0.6864111498257840
0.6898954703832753
0.6933797909407666
0.6968641114982579
0.7003484320557491
0.7038327526132404
0.7073170731707317
0.7108013937282229
0.7142857142857143
0.7177700348432056
0.7212543554006969
0.7247386759581882
0.7282229965156795
0.7317073170731707
0.7351916376306620
0.7386759581881532
0.7421602787456446
0.7456445993031359
0.7491289198606272
0.7526132404181185
0.7560975609756098
0.7595818815331010
0.7630662020905923
0.7665505226480837
0.7700348432055749
0.7735191637630662
0.7770034843205575
0.7804878048780488
0.7839721254355401
0.7874564459930313
0.7909407665505226
0.7944250871080140
0.7979094076655052
0.8013937282229965
0.8048780487804879
0.8083623693379791
0.8118466898954704
0.8153310104529616
0.8188153310104529
0.8222996515679443
0.8257839721254355
0.8292682926829268
0.8327526132404182
0.8362369337979094
0.8397212543554007
0.8432055749128919
0.8466898954703833
0.8501742160278746
0.8536585365853658
0.8571428571428572
0.8606271777003485
0.8641114982578397
0.8675958188153310
0.8710801393728222
0.8745644599303136
0.8780487804878049
0.8815331010452961
0.8850174216027875
0.8885017421602788
0.8919860627177700
0.8954703832752613
0.8989547038327526
0.9024390243902439
0.9059233449477352
0.9094076655052264
0.9128919860627178
0.9163763066202091
0.9198606271777003
0.9233449477351916
0.9268292682926830
0.9303135888501742
0.9337979094076655
0.9372822299651568
0.9407665505226481
0.9442508710801394
0.9477351916376306
0.9512195121951219
0.9547038327526133
0.9581881533101045
0.9616724738675958
0.9651567944250871
0.9686411149825784
0.9721254355400697
0.9756097560975610
0.9790940766550522
0.9825783972125436
0.9860627177700348
0.9895470383275261
0.9930313588850174
0.9965156794425087
1.0000000000000000 ];
% outputs
y = [
0.096798166000
0.143459740000
0.208317990000
-0.038018393000
0.148793230000
0.512799550000
-0.120798510000
0.177158750000
0.083816932000
0.000756494710
0.006887211700
0.213572840000
0.493783350000
0.035274935000
0.243769090000
0.087417919000
0.476797600000
0.271438160000
0.178877000000
0.302770820000
0.219586200000
0.397548740000
0.215089090000
0.086588415000
0.304056660000
0.513946170000
0.113409000000
0.270068060000
0.471061630000
0.046628439000
0.443157150000
0.477349380000
0.411852220000
0.280063680000
0.410626170000
0.442082230000
0.585090200000
0.561297160000
0.426446760000
0.739395540000
0.506414480000
0.409925250000
0.483992110000
0.696575460000
0.615166110000
0.737349800000
0.632542540000
1.013287300000
0.408451860000
0.613835270000
0.681370910000
0.724988310000
0.947395900000
0.779004190000
0.745667780000
0.789666080000
0.908202240000
0.707755840000
0.894037990000
0.606428220000
0.843615470000
0.727874550000
0.784348430000
0.937189250000
0.737952220000
0.769620390000
0.701166820000
0.604155740000
0.924881630000
1.130475900000
0.936493470000
0.935667120000
0.819976810000
1.219958800000
0.949769640000
1.185254200000
1.048672000000
0.957402250000
1.160938800000
1.147023700000
0.983283410000
1.194051400000
1.265849000000
0.987167510000
0.956395550000
1.052589900000
1.041239900000
1.105649800000
0.941725790000
1.082398200000
1.127045200000
0.990602660000
0.980803460000
0.763155870000
0.768571290000
0.718186990000
0.743430540000
0.899271220000
0.672586160000
1.243876900000
1.009891400000
0.580803050000
0.709665650000
0.858643730000
0.609667610000
0.789520360000
1.014111700000
0.817911210000
0.824534040000
0.676622590000
0.735885580000
0.609022520000
0.859070820000
0.729465540000
0.907844320000
0.969161960000
0.938595000000
0.765435590000
0.688922170000
0.574990840000
0.770659830000
0.891310740000
0.690971710000
0.711048000000
0.824634750000
0.857126400000
0.510549630000
0.748820900000
0.744129450000
0.688191070000
0.841053850000
0.648943870000
0.576231820000
0.738291460000
0.762720980000
0.658108930000
0.807248650000
0.457323660000
0.521077750000
0.218860160000
0.755337450000
0.525976310000
0.634217410000
0.821176590000
0.675074910000
0.599022390000
0.535501720000
0.624415250000
0.748616920000
0.428448630000
0.643341520000
0.768654000000
0.435878620000
0.747073780000
0.746823840000
0.509674810000
0.413964070000
0.702246380000
0.756141550000
0.719368010000
0.744580020000
0.450466060000
0.713008860000
0.536099090000
0.536595750000
0.385158420000
0.781369420000
0.640457830000
0.762680940000
0.836824400000
0.437730550000
0.703038130000
0.603083350000
0.740709380000
0.768477480000
0.724346000000
0.477804350000
0.580883120000
0.639146320000
1.073252500000
0.783713950000
0.948384040000
0.663369380000
0.634232460000
0.696070360000
0.526957260000
0.794798220000
0.587766610000
0.408654360000
0.749043110000
0.387306230000
0.350567280000
0.675537030000
0.495158740000
0.507149810000
0.625867220000
0.583647850000
0.630796900000
0.712643020000
0.504536230000
0.504499780000
0.381836730000
0.647114640000
0.814415180000
0.618741310000
0.808727320000
0.824111580000
0.901249190000
0.910594790000
0.668334220000
0.652467030000
0.797380800000
0.699257390000
1.025428600000
1.022629700000
0.837597600000
0.766407010000
0.913657810000
0.744506570000
0.829397600000
0.773018020000
0.872046570000
1.028215500000
0.972177970000
1.033239200000
0.724398150000
0.887466840000
0.710846670000
0.912868530000
0.899725750000
1.039970600000
1.003988400000
0.929601600000
0.747319110000
0.742110530000
0.495198080000
0.724133980000
0.546209190000
0.904975290000
0.886555800000
0.756973180000
0.663691170000
0.725449860000
0.927661000000
0.871628610000
0.583857660000
0.657822350000
0.445564610000
0.654537190000
0.685853290000
0.690412010000
0.306045040000
0.591718740000
0.366728870000
0.420310670000
0.575582700000
0.482907520000
0.394669790000
0.491601190000
0.627475460000
0.270874460000
0.144405290000
0.155561360000
0.171715630000
0.196642150000
0.368318080000
-0.046015957000
0.287831380000
0.121822920000
0.390236930000
0.084253654000
0.201575720000
0.048222309000
0.075602342000
0.128340910000
0.123106810000
0.069294711000
0.308367180000
0.213239800000
0.401070710000
0.073746174000
0.268322470000
-0.213145400000
0.191332180000
0.145485930000
0.028213679000
0.183566020000
0.206160990000 ];
% compute MSE of prediction on all ( X( i ) , y( i ) )
v = 0; % return value
for i = 1 : N % for all input / output pairs
v = v + ( y( i ) - roughNN( w , X( i ) ) )^2;
end
v = v / 2;
end

File diff suppressed because one or more lines are too long

Binary file not shown.

File diff suppressed because one or more lines are too long

Binary file not shown.

File diff suppressed because one or more lines are too long

Binary file not shown.

View File

@ -0,0 +1,21 @@
% function [Grd,Fun] = testNN_Grd(w)
%
% Gradient wrapper file generated by ADiGator
% ©2010-2014 Matthew J. Weinstein and Anil V. Rao
% ADiGator may be obtained at https://sourceforge.net/projects/adigator/
% Contact: mweinstein@ufl.edu
% Bugs/suggestions may be reported to the sourceforge forums
% DISCLAIMER
% ADiGator is a general-purpose software distributed under the GNU General
% Public License version 3.0. While the software is distributed with the
% hope that it will be useful, both the software and generated code are
% provided 'AS IS' with NO WARRANTIES OF ANY KIND and no merchantability
% or fitness for any purpose or application.
function [Grd,Fun] = testNN_Grd(w)
gator_w.f = w;
gator_w.dw = ones(76,1);
v = testNN_ADiGatorGrd(gator_w);
Grd = reshape(v.dw,[1 76]);
Fun = v.f;
end

View File

@ -0,0 +1,25 @@
% function [Grd,Fun] = testNN_Grd(w)
%
% Gradient wrapper file generated by ADiGator
% ©2010-2014 Matthew J. Weinstein and Anil V. Rao
% ADiGator may be obtained at https://sourceforge.net/projects/adigator/
% Contact: mweinstein@ufl.edu
% Bugs/suggestions may be reported to the sourceforge forums
% DISCLAIMER
% ADiGator is a general-purpose software distributed under the GNU General
% Public License version 3.0. While the software is distributed with the
% hope that it will be useful, both the software and generated code are
% provided 'AS IS' with NO WARRANTIES OF ANY KIND and no merchantability
% or fitness for any purpose or application.
function [Hes,Grd,Fun] = testNN_Hes(w)
gator_w.f = w;
gator_w.dw = ones(76,1);
v = testNN_ADiGatorHes(gator_w);
xind1 = v.dwdw_location(:,1);
xind2 = v.dwdw_location(:,2);
Hes = zeros(76,76);
Hes((xind2-1)*76 + xind1) = v.dwdw;
Grd = reshape(v.dw,[1 76]);
Fun = v.f;
end

View File

@ -0,0 +1,21 @@
% function [Jac,Fun] = testNN_Jac(w)
%
% Jacobian wrapper file generated by ADiGator
% ©2010-2014 Matthew J. Weinstein and Anil V. Rao
% ADiGator may be obtained at https://sourceforge.net/projects/adigator/
% Contact: mweinstein@ufl.edu
% Bugs/suggestions may be reported to the sourceforge forums
% DISCLAIMER
% ADiGator is a general-purpose software distributed under the GNU General
% Public License version 3.0. While the software is distributed with the
% hope that it will be useful, both the software and generated code are
% provided 'AS IS' with NO WARRANTIES OF ANY KIND and no merchantability
% or fitness for any purpose or application.
function [Jac,Fun] = testNN_Jac(w)
gator_w.f = w;
gator_w.dw = ones(76,1);
v = testNN_ADiGatorJac(gator_w);
Jac = reshape(v.dw,[1 76]);
Fun = v.f;
end

View File

@ -0,0 +1,372 @@
using LinearAlgebra: I, eigvals
function TestFunctions()
# function TF = TestFunctions()
#
# Produces a cell array of function handlers, useful to test unconstrained
# optimization algorithms.
#
# Each function in the array has the following interface:
#
# [ v , varargout ] = f( x )
#
# Input:
#
# - x is either a [ n x 1 ] real (column) vector denoting the input of
# f(), or [] (empty).
#
# Output:
#
# - v (real, scalar): if x == [] this is the best known lower bound on
# the unconstrained global optimum of f(); it can be -Inf if either f()
# is not bounded below, or no such information is available. If x ~= []
# then v = f(x).
#
# - g (real, [ n x 1 ] real vector) is the first optional argument. This
# also depends on x. if x == [] this is the standard starting point of an
# optimization algorithm, otherwise it is the gradient of f() at x, or a
# subgradient if f() is not differentiable at x.
#
# - H (real, [ n x n ] real matrix) is the first optional argument. This
# must only be specified if x ~= [], and it is the Hessian of f() at x.
# If no such information is available, the function throws error.
#
# The current list of functions is the following:
#
# 1 Standard 2x2 PSD quadratic function with nicely conditioned Hessian.
#
# 2 Standard 2x2 PSD quadratic function with less nicely conditioned
# Hessian.
#
# 3 Standard 2x2 PSD quadratic function with Hessian having one zero
# eigenvalue.
#
# 4 Standard 2x2 quadratic function with indefinite Hessian (one positive
# and one negative eigenvalue)
#
# 5 Standard 2x2 quadratic function with "very elongated" Hessian (a
# very small positive minimum eigenvalue, the other much larger)
#
# 6 the 2-dim Rosenbrock function
#
# 7 the "six-hump camel" function
#
# 8 the Ackley function
#
# 9 a 2-dim nondifferentiable function coming from Lasso regularization
#
# 10 a 76-dim (nonconvex, differentiable) function coming from a fitting
# problem with ( X , y ) both [ 288 , 1 ] (i.e., a fitting with only
# one feature) using a "rough" NN with 1 input, 1 output, 3 hidden
# layers of 5 nodes each, and tanh activation function
#
# 11 same as 10 plus a 1e-4 || x ||^2 / 2 ridge stabilising term
#
#{
# =======================================
# Author: Antonio Frangioni
# Date: 08-11-18
# Version 1.01
# Copyright Antonio Frangioni
# =======================================
#}
TF = []
push!(TF, x -> genericquad([6 -2; -2 6], [10; 5], x))
# eigenvalues: 4, 8
push!(TF, x -> genericquad([5 -3; -3 5], [10; 5], x))
# eigenvalues: 2, 8
push!(TF, x -> genericquad([4 -4; -4 4], [10; 5], x))
# eigenvalues: 0, 8
push!(TF, x -> genericquad([3 -5; -5 3], [10; 5], x))
# eigenvalues: -2, 8
push!(TF, x -> genericquad([101 -99; -99 101], [10; 5], x))
# eigenvalues: 2, 200
# HBG: alpha = 0.0165 , beta = 0.678
push!(TF, rosenbrock)
push!(TF, sixhumpcamel)
push!(TF, ackley)
push!(TF, lasso)
push!(TF, myNN)
push!(TF, myNN2)
return TF
end
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
function genericquad(Q, q, x::Union{Nothing, Real})
# generic quadratic function f(x) = x' * Q * x / 2 + q' * x
if x === nothing # informative call
if minimum(eigvals(Q)) > 1e-14
xStar = Q \ -q
v = 0.5 * xStar' * Q * xStar + q' * xStar
else
v = -Inf
end
return (v, zeros(size(q)), zeros(size(Q)))
else
if size(x) (2, 1)
throw(ArgumentError("genericquad: x is of wrong size"))
end
v = 0.5 * x' * Q * x + q' * x # f(x)
return (v, Q * x + q, Q)
end
end # genericquad
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
function rosenbrock(x::Union{Nothing, AbstractVecOrMat})
# rosenbrock's valley-shaped function
# syms x y
# f = @(x, y) 100 * ( y - x^2 )^2 + ( x - 1 )^2
#
# diff( f , x )
# 2 * x - 400 * x * ( - x^2 + y ) - 2
#
# diff( f , y )
# - 200 * x^2 + 200 * y
#
# diff( f , x , 2 )
# 1200 * x^2 - 400 * y + 2
#
# diff( f , y , 2 )
# 200
#
# diff( f , x , y )
# -400 * x
if isnothing(x) # informative call
v = 0
return (v, [-1, 1], [0 0; 0 0])
else
v = 100 * (x[2] - x[1]^2 )^2 + ( x[1] - 1 )^2 # f(x)
g = zeros(2)
g[1] = 2 * x[1] - 400* x[1] * (x[2] - x[1]^2) - 2
g[2] = -200 * x[1]^2 + 200 * x[2]
H = zeros(2, 2)
H[1, 1] = 1200 * x[1]^2 -400 * x[2] + 2
H[2, 2] = 200
H[2, 1] = -400 * x[1]
H[1, 2] = H[2, 1]
return (v, g, H)
end
end # rosenbrock
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
function sixhumpcamel(x::Union{Nothing, AbstractVecOrMat})
# six-hump-camel valley-shaped function
# syms x y
# f = @(x, y) ( 4 - 2.1 * x^2 + x^4 / 3 ) * x^2 + x * y + 4 * ( y^2 - 1 ) *
# y^2
#
# diff( f , x )
# 2 * x^5 - ( 42 * x^3 ) / 5 + 8 * x + y
#
# diff( f , y )
# 16 * y^3 - 8 * y + x
#
# diff( f , x , 2 )
# 10 * x^4 - ( 126 * x^2 ) / 5 + 8
#
# diff( f , y , 2 )
# 48 * y^2 - 8
#
# diff( f , x , y )
# 1
if isnothing(x) # informative call
v = -1.03162845349
return (v, [1, 1], [0 0; 0 0])
else
v = ( 4 - 2.1 * x[1]^2 + x[1]^4 / 3 ) * x[1]^2 + x[1] * x[2] +
4 * ( x[2]^2 - 1 ) * x[2]^2 # f(x)
g = zeros(2)
g[1] = 2 * x[1]^5 - (42 * x[1]^3) / 5 + 8 * x[1] + x[2]
g[2] = 16 * x[2]^3 - 8 * x[2] + x[1]
H = zeros(2, 2)
H[1, 1] = 10 * x[1]^4 - ( 126 * x[1]^2 ) / 5 + 8
H[2, 2] = 48 * x[2]^2 - 8
H[2, 1] = 1
H[1, 2] = H[2, 1]
return (v, g, H)
end
end # sixhumpcamel
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
function ackley(xx::Union{Nothing, AbstractVecOrMat})
# syms x y
# f = @(x, y) - 20 * exp( - 0.2 * sqrt( ( x^2 + y^2 ) / 2 ) ) ...
# - exp( ( cos( 2 * pi * x ) + cos( 2 * pi * y ) ) / 2 ) ...
# + 20 + exp(1)
#
ManuallyComputedfGH = true
if isnothing(xx) # informative call
v = 0
return (v, [2, 2], [0 0; 0 0])
else
if size(xx, 1) 2 || size(xx, 2) 1
error("ackley: x is of wrong size")
end
if ManuallyComputedfGH
# diff( f , x )
# pi*exp(cos(2*pi*x)/2 + cos(2*pi*y)/2)*sin(2*pi*x) +
# (2*x*exp(-(x^2/2 + y^2/2)^(1/2)/5))/(x^2/2 + y^2/2)^(1/2)
#
# diff( f , y )
# pi*exp(cos(2*pi*x)/2 + cos(2*pi*y)/2)*sin(2*pi*y) +
# (2*y*exp(-(x^2/2 + y^2/2)^(1/2)/5))/(x^2/2 + y^2/2)^(1/2)
#
# diff( f , x , 2 )
#
# (2*exp(-(x^2/2 + y^2/2)^(1/2)/5))/(x^2/2 + y^2/2)^(1/2) +
# 2*pi^2*exp(cos(2*pi*x)/2 + cos(2*pi*y)/2)*cos(2*pi*x) -
# (x^2*exp(-(x^2/2 + y^2/2)^(1/2)/5))/(5*(x^2/2 + y^2/2)) -
# (x^2*exp(-(x^2/2 + y^2/2)^(1/2)/5))/(x^2/2 + y^2/2)^(3/2) -
# pi^2*exp(cos(2*pi*x)/2 + cos(2*pi*y)/2)*sin(2*pi*x)^2
#
# diff( f , y , 2 )
# (2*exp(-(x^2/2 + y^2/2)^(1/2)/5))/(x^2/2 + y^2/2)^(1/2) +
# 2*pi^2*exp(cos(2*pi*x)/2 + cos(2*pi*y)/2)*cos(2*pi*y) -
# (y^2*exp(-(x^2/2 + y^2/2)^(1/2)/5))/(5*(x^2/2 + y^2/2)) -
# (y^2*exp(-(x^2/2 + y^2/2)^(1/2)/5))/(x^2/2 + y^2/2)^(3/2) -
# pi^2*exp(cos(2*pi*x)/2 + cos(2*pi*y)/2)*sin(2*pi*y)^2
#
# diff( f , x , y)
# - (x*y*exp(-(x^2/2 + y^2/2)^(1/2)/5))/(5*(x^2/2 + y^2/2)) -
# (x*y*exp(-(x^2/2 + y^2/2)^(1/2)/5))/(x^2/2 + y^2/2)^(3/2) -
# pi^2*exp(cos(2*pi*x)/2 + cos(2*pi*y)/2)*sin(2*pi*x)*sin(2*pi*y)
x = xx[1]
y = xx[2]
sqn2 = (x^2 + y^2) / 2
cosx = cos(2 * π * x)
cosy = cos(2 * π * y)
comp1 = exp(-(sqn2)^(1/2) / 5)
comp2 = exp((cosx + cosy) / 2)
v = -20 * comp1 - comp2 + 20 +
sinx = sin(2 * π * x)
siny = sin(2 * π * y)
g = zeros(2) # \nabla f(x)
g[1] = π * comp2 * sinx + 2 * x * comp1 / (sqn2)^(1/2)
g[2] = π * comp2 * siny + 2 * y * comp1 / (sqn2)^(1/2)
H = zeros(2, 2)
H[1, 1] = (2*comp1)/(sqn2)^(1/2) + 2*π^2*comp2*cosx +
- (x^2*comp1)/(5*sqn2) - (x^2*comp1)/(sqn2)^(3/2) +
- π^2*comp2*sinx^2
H[2, 2] = (2*comp1)/(sqn2)^(1/2) + 2*π^2*comp2*cosy +
- (y^2*comp1)/(5*sqn2) - (y^2*comp1)/(sqn2)^(3/2) +
- π^2*comp2*siny^2
H[1, 2] = -(x*y*comp1)/(5*(sqn2)) +
- (x*y*comp1)/(sqn2)^(3/2) +
- π^2*comp2*sinx*siny
H[2, 1] = H[1, 2]
else
error("first you need to find the ackley_Hes and ackley_Grd files :/")
(H, g, v) = ackley_Hes(xx)
g = g'
(g, v) = ackley_Grd(xx)
v = - 20 * exp( - ( ( xx[1]^2 + xx[2]^2 ) / 2 )^(1/2) / 5 ) +
-exp( cos( 2 * π * xx[1] ) / 2 +
cos( 2 * π * xx[2] ) / 2 ) + 20 +
end
return (v, g, H)
end
end # ackley
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
function lasso(x::Union{Nothing, AbstractVecOrMat})
# nondifferentiable lasso example:
#
# f( x , y ) = || 3 * x + 2 * y - 2 ||_2^2 + 10 ( | x | + | y | )
if isnothing(x) # informative call
v = ( 2 - 1/3 )^2 + 10/9 # optimal solution [ 1/9 , 0 ]
return (v, [0, 0])
else
v = ( 3 * x( 1 ) + 2 * x( 2 ) - 2 )^2 +
10 * ( abs( x( 1 ) ) + abs( x( 2 ) ) ) # f(x)
g = zeros(2)
g[1] = 18 * x[1] + 12 * x[2] - 12 + 10 * sign( x[1] )
g[2] = 12 * x[1] + 8 * x[2] - 8 + 10 * sign( x[2] )
return (v, g)
end
end # lasso
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
include("./testNN_Jac.jl")
include("./testNN_Hes.jl")
include("testNN.jl")
function myNN(x::Union{Nothing, AbstractVecOrMat})
# 1 x 5 x 5 x 5 x 1 = 76 w NN for solving a 1D fitting problem
if isnothing(x) # informative call
v = -Inf; # optimal value unknown (although 0 may perhaps be good)
# Xavier initialization: uniform random in [ - A , A ] with
# A = \sqrt{6} / \sqrt{n + m}, with n and m the input and output
# layers. in our case n + m is either 6 or 10, so we take A = 1
#
# note that starting point is random, so each run will be different
# (unless an explicit starting point is provided); if stability is
# neeed, the seed of the generator has to be set externally
return (v, 2 * rand(76, 1) - 1)
else
v = testNN(x) # f(x)
return (v, testNN_Jac(x)', testNN_Hes(x)')
end
end # myNN
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
function myNN2(x::Union{Nothing, AbstractVecOrMat})
# 1 x 5 x 5 x 5 x 1 = 76 w NN for solving a 1D fitting problem
# plus ridge stabilization \lambda || x ||^2 / 2
lambda = 1e+2
if isnothing(x) # informative call
v = -Inf # optimal value unknown (although 0 may perhaps be good)
# Xavier initialization: uniform random in [ - A , A ] with
# A = \sqrt{6} / \sqrt{n + m}, with n and m the input and output
# layers. in our case n + m is either 6 or 10, so we take A = 1
#
# note that starting point is random, so each run will be different
# (unless an explicit starting point is provided); if stability is
# neeed, the seed of the generator has to be set externally
return (v, 2 * rand(76, 1) - 1)
else
v = testNN(x) + lambda * x' * x / 2 # f(x)
return (v, testNN_Jac(x)' + lambda * x, testNN_Hes(x)' + lambda * I)
end
end # myNN2
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

View File

@ -0,0 +1,43 @@
function roughNN(w, x)
#
# v = roughNN( w , x )
#
# returns the falue of the function v = f( x ) as currently estimated by
# a small NN with 1 input, 1 output, 3 hidden layers of 5 nodes each, and
# tanh activation function.
#
# Input:
#
# - w is the [ 76 x 1 ] real vector containing the weights of the NN,
# i.e., w is made as follows:
# [ 1 .. 5 ] are the [ 5 x 1 ] weigths of the first layer
# [ 6 .. 10 ] are the [ 5 x 1 ] biases of the first layer
# [ 11 .. 35 ] are the [ 5 x 5 ] weigths of the second layer
# [ 36 .. 40 ] are the [ 5 x 1 ] biases of the second layer
# [ 41 .. 65 ] are the [ 5 x 5 ] weigths of the third layer
# [ 66 .. 70 ] are the [ 5 x 1 ] biases of the third layer
# [ 71 .. 75 ] are the [ 5 x 1 ] weigths of the fourth (output) layer
# [ 76 ] is the [ 1 x 1 ] bias of the fourth (output) layer
#
# - x is the real scalar containing the input of f()
#
# Output:
#
# - v (real, scalar): v = f( x ) as estimated by the NN with weights w
#
#{
# =======================================
# Author: Antonio Frangioni
# Date: 28-08-22
# Version 1.00
# Copyright Antonio Frangioni
# =======================================
#}
g = tanh( (ones(5, 1) * x ) .* w( 1 : 5 ) + w( 6:10 ) )
g = tanh( reshape( w( 11:35 ) , (5, 5) ) * g + w( 36:40 ) )
g = tanh( reshape( w( 41:65 ) , (5, 5) ) * g + w( 66:70 ) )
v = g' * w( 71:75 ) + w( 76 )
return v
end

View File

@ -0,0 +1,628 @@
include("./roughNN.jl")
function testNN(w)
#
# v = testNN( w )
#
# returns the falue of the empirical error of the NN (or, in fact,
# whatever function is encoded in 'roughNN()') with the weights contained
# in w.
#
# The empirical error is estimated over a 288-strong input/output pair
# ( X , y ), with X containing only one feature, that is hard-coded into
# the function so that its gradient can be easily computed by ADiGator.
#
# Input:
#
# - w is the real vector containing the weights of the NN, see roughNN
# for details
#
# Output:
#
# - the MSE of the error done by roughNN() on the given test set
#
#{
# =======================================
# Author: Antonio Frangioni
# Date: 28-08-22
# Version 1.00
# Copyright Antonio Frangioni
# =======================================
#}
N = 288 # size
# inputs
X = [
0.0000000000000000
0.0034843205574913
0.0069686411149826
0.0104529616724739
0.0139372822299652
0.0174216027874564
0.0209059233449477
0.0243902439024390
0.0278745644599303
0.0313588850174216
0.0348432055749129
0.0383275261324042
0.0418118466898955
0.0452961672473868
0.0487804878048781
0.0522648083623693
0.0557491289198606
0.0592334494773519
0.0627177700348432
0.0662020905923345
0.0696864111498258
0.0731707317073171
0.0766550522648084
0.0801393728222996
0.0836236933797909
0.0871080139372822
0.0905923344947735
0.0940766550522648
0.0975609756097561
0.1010452961672474
0.1045296167247387
0.1080139372822300
0.1114982578397213
0.1149825783972125
0.1184668989547038
0.1219512195121951
0.1254355400696864
0.1289198606271777
0.1324041811846690
0.1358885017421603
0.1393728222996516
0.1428571428571428
0.1463414634146341
0.1498257839721254
0.1533101045296167
0.1567944250871080
0.1602787456445993
0.1637630662020906
0.1672473867595819
0.1707317073170732
0.1742160278745645
0.1777003484320558
0.1811846689895470
0.1846689895470383
0.1881533101045296
0.1916376306620209
0.1951219512195122
0.1986062717770035
0.2020905923344948
0.2055749128919861
0.2090592334494774
0.2125435540069686
0.2160278745644599
0.2195121951219512
0.2229965156794425
0.2264808362369338
0.2299651567944251
0.2334494773519164
0.2369337979094077
0.2404181184668990
0.2439024390243902
0.2473867595818815
0.2508710801393728
0.2543554006968641
0.2578397212543554
0.2613240418118467
0.2648083623693380
0.2682926829268293
0.2717770034843205
0.2752613240418119
0.2787456445993031
0.2822299651567944
0.2857142857142857
0.2891986062717770
0.2926829268292683
0.2961672473867596
0.2996515679442509
0.3031358885017422
0.3066202090592334
0.3101045296167247
0.3135888501742160
0.3170731707317073
0.3205574912891986
0.3240418118466899
0.3275261324041812
0.3310104529616725
0.3344947735191638
0.3379790940766551
0.3414634146341464
0.3449477351916376
0.3484320557491289
0.3519163763066202
0.3554006968641115
0.3588850174216028
0.3623693379790941
0.3658536585365854
0.3693379790940767
0.3728222996515679
0.3763066202090593
0.3797909407665505
0.3832752613240418
0.3867595818815331
0.3902439024390244
0.3937282229965157
0.3972125435540070
0.4006968641114982
0.4041811846689896
0.4076655052264808
0.4111498257839721
0.4146341463414634
0.4181184668989547
0.4216027874564460
0.4250871080139373
0.4285714285714285
0.4320557491289199
0.4355400696864111
0.4390243902439024
0.4425087108013937
0.4459930313588850
0.4494773519163763
0.4529616724738676
0.4564459930313589
0.4599303135888502
0.4634146341463415
0.4668989547038327
0.4703832752613241
0.4738675958188153
0.4773519163763066
0.4808362369337979
0.4843205574912892
0.4878048780487805
0.4912891986062718
0.4947735191637631
0.4982578397212544
0.5017421602787456
0.5052264808362370
0.5087108013937283
0.5121951219512195
0.5156794425087108
0.5191637630662020
0.5226480836236933
0.5261324041811847
0.5296167247386759
0.5331010452961673
0.5365853658536586
0.5400696864111498
0.5435540069686411
0.5470383275261324
0.5505226480836236
0.5540069686411150
0.5574912891986064
0.5609756097560976
0.5644599303135889
0.5679442508710801
0.5714285714285714
0.5749128919860627
0.5783972125435540
0.5818815331010453
0.5853658536585367
0.5888501742160279
0.5923344947735192
0.5958188153310104
0.5993031358885017
0.6027874564459930
0.6062717770034843
0.6097560975609756
0.6132404181184670
0.6167247386759582
0.6202090592334495
0.6236933797909407
0.6271777003484320
0.6306620209059233
0.6341463414634146
0.6376306620209059
0.6411149825783973
0.6445993031358885
0.6480836236933798
0.6515679442508711
0.6550522648083623
0.6585365853658536
0.6620209059233449
0.6655052264808362
0.6689895470383276
0.6724738675958188
0.6759581881533101
0.6794425087108014
0.6829268292682926
0.6864111498257840
0.6898954703832753
0.6933797909407666
0.6968641114982579
0.7003484320557491
0.7038327526132404
0.7073170731707317
0.7108013937282229
0.7142857142857143
0.7177700348432056
0.7212543554006969
0.7247386759581882
0.7282229965156795
0.7317073170731707
0.7351916376306620
0.7386759581881532
0.7421602787456446
0.7456445993031359
0.7491289198606272
0.7526132404181185
0.7560975609756098
0.7595818815331010
0.7630662020905923
0.7665505226480837
0.7700348432055749
0.7735191637630662
0.7770034843205575
0.7804878048780488
0.7839721254355401
0.7874564459930313
0.7909407665505226
0.7944250871080140
0.7979094076655052
0.8013937282229965
0.8048780487804879
0.8083623693379791
0.8118466898954704
0.8153310104529616
0.8188153310104529
0.8222996515679443
0.8257839721254355
0.8292682926829268
0.8327526132404182
0.8362369337979094
0.8397212543554007
0.8432055749128919
0.8466898954703833
0.8501742160278746
0.8536585365853658
0.8571428571428572
0.8606271777003485
0.8641114982578397
0.8675958188153310
0.8710801393728222
0.8745644599303136
0.8780487804878049
0.8815331010452961
0.8850174216027875
0.8885017421602788
0.8919860627177700
0.8954703832752613
0.8989547038327526
0.9024390243902439
0.9059233449477352
0.9094076655052264
0.9128919860627178
0.9163763066202091
0.9198606271777003
0.9233449477351916
0.9268292682926830
0.9303135888501742
0.9337979094076655
0.9372822299651568
0.9407665505226481
0.9442508710801394
0.9477351916376306
0.9512195121951219
0.9547038327526133
0.9581881533101045
0.9616724738675958
0.9651567944250871
0.9686411149825784
0.9721254355400697
0.9756097560975610
0.9790940766550522
0.9825783972125436
0.9860627177700348
0.9895470383275261
0.9930313588850174
0.9965156794425087
1.0000000000000000]
# outputs
y = [
0.096798166000
0.143459740000
0.208317990000
-0.038018393000
0.148793230000
0.512799550000
-0.120798510000
0.177158750000
0.083816932000
0.000756494710
0.006887211700
0.213572840000
0.493783350000
0.035274935000
0.243769090000
0.087417919000
0.476797600000
0.271438160000
0.178877000000
0.302770820000
0.219586200000
0.397548740000
0.215089090000
0.086588415000
0.304056660000
0.513946170000
0.113409000000
0.270068060000
0.471061630000
0.046628439000
0.443157150000
0.477349380000
0.411852220000
0.280063680000
0.410626170000
0.442082230000
0.585090200000
0.561297160000
0.426446760000
0.739395540000
0.506414480000
0.409925250000
0.483992110000
0.696575460000
0.615166110000
0.737349800000
0.632542540000
1.013287300000
0.408451860000
0.613835270000
0.681370910000
0.724988310000
0.947395900000
0.779004190000
0.745667780000
0.789666080000
0.908202240000
0.707755840000
0.894037990000
0.606428220000
0.843615470000
0.727874550000
0.784348430000
0.937189250000
0.737952220000
0.769620390000
0.701166820000
0.604155740000
0.924881630000
1.130475900000
0.936493470000
0.935667120000
0.819976810000
1.219958800000
0.949769640000
1.185254200000
1.048672000000
0.957402250000
1.160938800000
1.147023700000
0.983283410000
1.194051400000
1.265849000000
0.987167510000
0.956395550000
1.052589900000
1.041239900000
1.105649800000
0.941725790000
1.082398200000
1.127045200000
0.990602660000
0.980803460000
0.763155870000
0.768571290000
0.718186990000
0.743430540000
0.899271220000
0.672586160000
1.243876900000
1.009891400000
0.580803050000
0.709665650000
0.858643730000
0.609667610000
0.789520360000
1.014111700000
0.817911210000
0.824534040000
0.676622590000
0.735885580000
0.609022520000
0.859070820000
0.729465540000
0.907844320000
0.969161960000
0.938595000000
0.765435590000
0.688922170000
0.574990840000
0.770659830000
0.891310740000
0.690971710000
0.711048000000
0.824634750000
0.857126400000
0.510549630000
0.748820900000
0.744129450000
0.688191070000
0.841053850000
0.648943870000
0.576231820000
0.738291460000
0.762720980000
0.658108930000
0.807248650000
0.457323660000
0.521077750000
0.218860160000
0.755337450000
0.525976310000
0.634217410000
0.821176590000
0.675074910000
0.599022390000
0.535501720000
0.624415250000
0.748616920000
0.428448630000
0.643341520000
0.768654000000
0.435878620000
0.747073780000
0.746823840000
0.509674810000
0.413964070000
0.702246380000
0.756141550000
0.719368010000
0.744580020000
0.450466060000
0.713008860000
0.536099090000
0.536595750000
0.385158420000
0.781369420000
0.640457830000
0.762680940000
0.836824400000
0.437730550000
0.703038130000
0.603083350000
0.740709380000
0.768477480000
0.724346000000
0.477804350000
0.580883120000
0.639146320000
1.073252500000
0.783713950000
0.948384040000
0.663369380000
0.634232460000
0.696070360000
0.526957260000
0.794798220000
0.587766610000
0.408654360000
0.749043110000
0.387306230000
0.350567280000
0.675537030000
0.495158740000
0.507149810000
0.625867220000
0.583647850000
0.630796900000
0.712643020000
0.504536230000
0.504499780000
0.381836730000
0.647114640000
0.814415180000
0.618741310000
0.808727320000
0.824111580000
0.901249190000
0.910594790000
0.668334220000
0.652467030000
0.797380800000
0.699257390000
1.025428600000
1.022629700000
0.837597600000
0.766407010000
0.913657810000
0.744506570000
0.829397600000
0.773018020000
0.872046570000
1.028215500000
0.972177970000
1.033239200000
0.724398150000
0.887466840000
0.710846670000
0.912868530000
0.899725750000
1.039970600000
1.003988400000
0.929601600000
0.747319110000
0.742110530000
0.495198080000
0.724133980000
0.546209190000
0.904975290000
0.886555800000
0.756973180000
0.663691170000
0.725449860000
0.927661000000
0.871628610000
0.583857660000
0.657822350000
0.445564610000
0.654537190000
0.685853290000
0.690412010000
0.306045040000
0.591718740000
0.366728870000
0.420310670000
0.575582700000
0.482907520000
0.394669790000
0.491601190000
0.627475460000
0.270874460000
0.144405290000
0.155561360000
0.171715630000
0.196642150000
0.368318080000
-0.046015957000
0.287831380000
0.121822920000
0.390236930000
0.084253654000
0.201575720000
0.048222309000
0.075602342000
0.128340910000
0.123106810000
0.069294711000
0.308367180000
0.213239800000
0.401070710000
0.073746174000
0.268322470000
-0.213145400000
0.191332180000
0.145485930000
0.028213679000
0.183566020000
0.206160990000]
# compute MSE of prediction on all ( X( i ) , y( i ) )
v = 0 # return value
for i = 1:N # for all input / output pairs
v = v + ( y[i] - roughNN( w , X[i] ) )^2
end
v = v / 2
return v
end

View File

@ -0,0 +1,26 @@
# function [Grd,Fun] = testNN_Grd(w)
#
# Gradient wrapper file generated by ADiGator
# ©2010-2014 Matthew J. Weinstein and Anil V. Rao
# ADiGator may be obtained at https://sourceforge.net/projects/adigator/
# Contact: mweinstein@ufl.edu
# Bugs/suggestions may be reported to the sourceforge forums
# DISCLAIMER
# ADiGator is a general-purpose software distributed under the GNU General
# Public License version 3.0. While the software is distributed with the
# hope that it will be useful, both the software and generated code are
# provided 'AS IS' with NO WARRANTIES OF ANY KIND and no merchantability
# or fitness for any purpose or application.
function testNN_Hes(w)
gator_w.f = w
gator_w.dw = ones(76,1)
v = testNN_ADiGatorHes(gator_w)
xind1 = v.dwdw_location(:,1)
xind2 = v.dwdw_location(:,2)
Hes = zeros(76,76)
Hes[(xind2-1)*76 + xind1] = v.dwdw
Grd = reshape(v.dw,[1 76])
Fun = v.f
return (Hes, Grd, Fun)
end

View File

@ -0,0 +1,23 @@
# function [Jac,Fun] = testNN_Jac(w)
#
# Jacobian wrapper file generated by ADiGator
# ©2010-2014 Matthew J. Weinstein and Anil V. Rao
# ADiGator may be obtained at https://sourceforge.net/projects/adigator/
# Contact: mweinstein@ufl.edu
# Bugs/suggestions may be reported to the sourceforge forums
# DISCLAIMER
# ADiGator is a general-purpose software distributed under the GNU General
# Public License version 3.0. While the software is distributed with the
# hope that it will be useful, both the software and generated code are
# provided 'AS IS' with NO WARRANTIES OF ANY KIND and no merchantability
# or fitness for any purpose or application.
function testNN_Jac(w)
gator_w.f = w
gator_w.dw = ones(76,1)
v = testNN_ADiGatorJac(gator_w)
Jac = reshape(v.dw, (1, 76))
Fun = v.f
return (Jac, Fun)
end

570
11-09/Untitled.ipynb generated Normal file

File diff suppressed because one or more lines are too long

1864
11-09/lesson.ipynb generated Normal file

File diff suppressed because one or more lines are too long