Lesson 23/11 with methods from previeus lessons

2023-11-30 15:24:44 +01:00
parent 65c8842e83
commit c828453e94
4 changed files with 4793 additions and 0 deletions
--- a/11-23/HBG.jl
+++ b/11-23/HBG.jl
@ -0,0 +1,266 @@
+using LinearAlgebra, Printf, Plots
+
+function HBG(f;
+             x::Union{Nothing, Vector}=nothing,
+             alpha::Real=1,
+             beta::Real=0.9,
+             eps::Real=1e-6,
+             MaxIter::Integer=300,
+             MInf::Real=-Inf,
+             plt::Union{Plots.Plot, Nothing}=nothing,
+             plotatend::Bool=true,
+             Plotf::Integer=0,
+             printing::Bool=true)::Tuple{AbstractArray, String}
+    #function [ x , status ] = HBG( f , x , alpha , beta , eps , MaxIter ,
+    #                               MInf )
+    #
+    # Apply a Heavy Ball Gradient approach for the minimization of the
+    # provided function f, which must have the following interface:
+    #
+    #   [ v , g ] = f( x )
+    #
+    # Input:
+    #
+    # - x is either a [ n x 1 ] real (column) vector denoting the input of
+    #   f(), or [] (empty).
+    #
+    # Output:
+    #
+    # - v (real, scalar): if x == [] this is the best known lower bound on
+    #   the unconstrained global optimum of f(); it can be -Inf if either f()
+    #   is not bounded below, or no such information is available. If x ~= []
+    #   then v = f(x).
+    #
+    # - g (real, [ n x 1 ] real vector): this also depends on x. if x == []
+    #   this is the standard starting point from which the algorithm should
+    #   start, otherwise it is the gradient of f() at x (or a subgradient if
+    #   f() is not differentiable at x, which it should not be if you are
+    #   applying the gradient method to it).
+    #
+    # The other [optional] input parameters are:
+    #
+    # - x (either [ n x 1 ] real vector or [], default []): starting point.
+    #   If x == [], the default starting point provided by f() is used.
+    #
+    # - alpha (real scalar, optional, default value 1): the fixed stepsize of
+    #   the Heavy Ball Gradient approach (along the anti-gradient).
+    #
+    # - beta (real scalar, optional, default value 0.9): the fixed weight of
+    #   the momentum term
+    #
+    #        beta * || x^i - x^{i - 1} ||
+    #
+    #   Note that beta has to be >= 0, although 0 is accepted which turns the
+    #   Heavy Ball Gradient approach into a "Light" Ball Gradient approach,
+    #   i.e., a standard Gradient approach with fixed stepsize.
+    #
+    # - eps (real scalar, optional, default value 1e-6): the accuracy in the
+    #   stopping criterion: the algorithm is stopped when the norm of the
+    #   gradient is less than or equal to eps. If a negative value is provided,
+    #   this is used in a *relative* stopping criterion: the algorithm is
+    #   stopped when the norm of the gradient is less than or equal to
+    #   (- eps) * || norm of the first gradient ||.
+    #
+    # - MaxIter (integer scalar, optional, default value 300): the maximum
+    #   number of iterations == function evaluations.
+    #
+    # - MInf (real scalar, optional, default value -Inf): if the algorithm
+    #   determines a value for f() <= MInf this is taken as an indication that
+    #   the problem is unbounded below and computation is stopped
+    #   (a "finite -Inf").
+    #
+    # Output:
+    #
+    # - x ([ n x 1 ] real column vector): the best solution found so far.
+    #
+    # - status (string): a string describing the status of the algorithm at
+    #   termination
+    #
+    #   = 'optimal': the algorithm terminated having proven that x is a(n
+    #     approximately) optimal solution, i.e., the norm of the gradient at x
+    #     is less than the required threshold
+    #
+    #   = 'unbounded': the algorithm has determined an extrenely large negative
+    #     value for f() that is taken as an indication that the problem is
+    #     unbounded below (a "finite -Inf", see MInf above)
+    #
+    #   = 'stopped': the algorithm terminated having exhausted the maximum
+    #     number of iterations: x is the bast solution found so far, but not
+    #     necessarily the optimal one
+    #
+    #   = 'error': the algorithm found a numerical error that prevents it from
+    #     continuing optimization (see mina above)
+    #
+    #{
+    # =======================================
+    # Author: Antonio Frangioni
+    # Date: 10-11-22
+    # Version 1.01
+    # Copyright Antonio Frangioni
+    # =======================================
+    #}
+
+    # Plotf = 1;
+    # 0 = nothing is plotted
+    # 1 = the level sets of f and the trajectory are plotted (when n = 2)
+    # 2 = the function value / gap are plotted
+
+    local gap
+    if Plotf == 2
+        gap = []
+    end
+    PXY = Matrix{Real}(undef, 2, 0)
+
+    Interactive = false  # if we pause at every iteration
+
+    # reading and checking input- - - - - - - - - - - - - - - - - - - - - - - -
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    if isnothing(x)
+        (fStar, x, _) = f(nothing)
+    else
+        (fStar, _, _) = f(nothing)
+    end
+
+    n = size(x, 1)
+
+    if alpha ≤ 0
+        error("alpha must be positive")
+    end
+
+    if beta < 0
+        error("beta must be non-negative")
+    end
+
+    # initializations - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    if printing
+        @printf("Heavy Ball Gradient method\n")
+        if fStar > -Inf
+            @printf("feval\trel gap\t\tbest gap")
+        else
+            @printf("feval\tf(x)\tfbest")
+        end
+        @printf("\t|| g(x) ||\n\n")
+    end
+
+    if Plotf == 2 && isnothing(plt)
+        plt = plot(xlims=(0, MaxIter))
+    elseif isnothing(plt)
+        plt = plot()
+    end
+
+    (v, g, _) = f(x)
+    ng = norm(g)
+    vbest = v
+    local ng0
+    if eps < 0
+        ng0 = -ng  # norm of first subgradient: why is there a "-"? ;-)
+    else
+        ng0 = 1    # un-scaled stopping criterion
+    end
+
+    pastd = zeros(n)     # the direction at the previous iteration
+    feval = 1               # f() evaluations count
+
+    status = "error"
+
+    # main loop - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    while true
+        # output statistics - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+        if fStar > -Inf
+            gapk = (v - fStar)/max(abs(fStar), 1)
+            bstgapk = (vbest - fStar)/max(abs(fStar), 1)
+
+            if printing
+                @printf("%4d\t%1.4e\t%1.4e\t%1.4e\n", feval, gapk, bstgapk, ng)
+            end
+
+            if Plotf == 2
+                push!(gap, gapk)
+            end
+        else
+            if printing
+                @printf("%4d\t%1.8e\t%1.8e\t\t%1.4e\n", feval, v, vbest, ng)
+            end
+
+            if Plotf == 2
+                push!(gap, v)
+            end
+        end
+
+        # stopping criteria - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+        if ng ≤ eps * ng0
+            status = "optimal"
+            break
+        end
+
+        if feval > MaxIter
+            status = "stopped"
+            break
+        end
+
+        if v ≤ MInf
+            status = "unbounded"
+            break
+        end
+
+        # compute deflected gradient direction- - - - - - - - - - - - - - - - -
+
+        d = -alpha * g .+ beta * pastd
+
+        # compute new point - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+        # possibly plot the trajectory
+        if n == 2 && Plotf == 1
+            PXY = hcat(PXY, hcat(x, x + d))
+        end
+
+        x += d
+        pastd .= d
+
+        # compute f() - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+        (v, g, _) = f(x)
+        ng = norm(g)
+        if v < vbest
+            vbest = v
+        end
+        feval += 1
+
+        # iterate - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+        if Interactive
+            l = readline()
+            if l == "exit"
+                break
+            end
+        end
+    end
+
+    # end of main loop- - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    # inner functions - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    if plotatend
+        if Plotf ≥ 2
+            plot!(plt, gap)
+        elseif Plotf == 1 && n == 2
+            plot!(plt, PXY[1, :], PXY[2, :])
+        end
+        display(plt)
+    end
+
+    return (x, status)
+end  # the end- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
--- a/11-23/NCG.jl
+++ b/11-23/NCG.jl
@ -0,0 +1,555 @@
+using LinearAlgebra, Printf, Plots
+
+function NCG(f;
+             x::Union{Nothing, Vector}=nothing,
+             wf::Symbol=:Fletcher_Reeves,
+             rstart::Integer=0,
+             eps::Real=1e-6,
+             astart::Real=1,
+             MaxFeval::Integer=1000,
+             m1::Real=0.01,
+             m2::Real=0.9,
+             tau::Real=0.9,
+             sfgrd::Real=0.2,
+             MInf::Real=-Inf,
+             mina::Real=1e-16,
+             plt::Union{Plots.Plot, Nothing}=nothing,
+             plotatend::Bool=true,
+             Plotf::Integer=0,
+             printing::Bool=true)::Tuple{AbstractArray, String}
+
+    #function [ x , status ] = NCG( f , x , wf , rstart , eps , astart ,
+    #                               MaxFeval , m1 , m2 , tau , sfgrd , MInf ,
+    #                               mina )
+    #
+    # Apply a Nonlinear Conjugated Gradient algorithm for the minimiztion of
+    # the provided function f, which must have the following interface:
+    #
+    #   [ v , g ] = f( x )
+    #
+    # Input:
+    #
+    # - x is either a [ n x 1 ] real (column) vector denoting the input of
+    #   f(), or [] (empty).
+    #
+    # Output:
+    #
+    # - v (real, scalar): if x == [] this is the best known lower bound on
+    #   the unconstrained global optimum of f(); it can be -Inf if either f()
+    #   is not bounded below, or no such information is available. If x ~= []
+    #   then v = f(x).
+    #
+    # - g (real, [ n x 1 ] real vector): this also depends on x. if x == []
+    #   this is the standard starting point from which the algorithm should
+    #   start, otherwise it is the gradient of f() at x (or a subgradient if
+    #   f() is not differentiable at x, which it should not be if you are
+    #   applying the gradient method to it).
+    #
+    # The other [optional] input parameters are:
+    #
+    # - x (either [ n x 1 ] real vector or [], default []): starting point.
+    #   If x == [], the default starting point provided by f() is used.
+    #
+    # - wf (integer scalar, optional, default value 0): which of the Nonlinear
+    #   Conjugated Gradient formulae to use. Possible values are:
+    #   = 0: Fletcher-Reeves
+    #   = 1: Polak-Ribiere
+    #   = 2: Hestenes-Stiefel
+    #   = 3: Dai-Yuan
+    #
+    # - rstart (integer scalar, optional, default value 0): if > 0, restarts
+    #   (setting beta = 0) are performed every n * rstart iterations
+    #
+    # - eps (real scalar, optional, default value 1e-6): the accuracy in the
+    #   stopping criterion: the algorithm is stopped when the norm of the
+    #   gradient is less than or equal to eps. If a negative value is provided,
+    #   this is used in a *relative* stopping criterion: the algorithm is
+    #   stopped when the norm of the gradient is less than or equal to
+    #   (- eps) * || norm of the first gradient ||.
+    #
+    # - astart (real scalar, optional, default value 1): starting value of
+    #   alpha in the line search (> 0)
+    #
+    # - MaxFeval (integer scalar, optional, default value 1000): the maximum
+    #   number of function evaluations (hence, iterations will be not more than
+    #   MaxFeval because at each iteration at least a function evaluation is
+    #   performed, possibly more due to the line search).
+    #
+    # - m1 (real scalar, optional, default value 0.01): first parameter of the
+    #   Armijo-Wolfe-type line search (sufficient decrease). Has to be in (0,1)
+    #
+    # - m2 (real scalar, optional, default value 0.9): typically the second
+    #   parameter of the Armijo-Wolfe-type line search (strong curvature
+    #   condition). It should to be in (0,1); if not, it is taken to mean that
+    #   the simpler Backtracking line search should be used instead
+    #
+    # - tau (real scalar, optional, default value 0.9): scaling parameter for
+    #   the line search. In the Armijo-Wolfe line search it is used in the
+    #   first phase: if the derivative is not positive, then the step is
+    #   divided by tau (which is < 1, hence it is increased). In the
+    #   Backtracking line search, each time the step is multiplied by tau
+    #   (hence it is decreased).
+    #
+    # - sfgrd (real scalar, optional, default value 0.2): safeguard parameter
+    #   for the line search. to avoid numerical problems that can occur with
+    #   the quadratic interpolation if the derivative at one endpoint is too
+    #   large w.r.t. the one at the other (which leads to choosing a point
+    #   extremely near to the other endpoint), a *safeguarded* version of
+    #   interpolation is used whereby the new point is chosen in the interval
+    #   [ as * ( 1 + sfgrd ) , am * ( 1 - sfgrd ) ], being [ as , am ] the
+    #   current interval, whatever quadratic interpolation says. If you
+    #   experiemce problems with the line search taking too many iterations to
+    #   converge at "nasty" points, try to increase this
+    #
+    # - MInf (real scalar, optional, default value -Inf): if the algorithm
+    #   determines a value for f() <= MInf this is taken as an indication that
+    #   the problem is unbounded below and computation is stopped
+    #   (a "finite -Inf").
+    #
+    # - mina (real scalar, optional, default value 1e-16): if the algorithm
+    #   determines a stepsize value <= mina, this is taken as an indication
+    #   that something has gone wrong (the gradient is not a direction of
+    #   descent, so maybe the function is not differentiable) and computation
+    #   is stopped. It is legal to take mina = 0, thereby in fact skipping this
+    #   test.
+    #
+    # Output:
+    #
+    # - x ([ n x 1 ] real column vector): the best solution found so far.
+    #
+    # - status (string): a string describing the status of the algorithm at
+    #   termination
+    #
+    #   = 'optimal': the algorithm terminated having proven that x is a(n
+    #     approximately) optimal solution, i.e., the norm of the gradient at x
+    #     is less than the required threshold
+    #
+    #   = 'unbounded': the algorithm has determined an extrenely large negative
+    #     value for f() that is taken as an indication that the problem is
+    #     unbounded below (a "finite -Inf", see MInf above)
+    #
+    #   = 'stopped': the algorithm terminated having exhausted the maximum
+    #     number of iterations: x is the bast solution found so far, but not
+    #     necessarily the optimal one
+    #
+    #   = 'error': the algorithm found a numerical error that prevents it from
+    #     continuing optimization (see mina above)
+    #
+    #{
+    # =======================================
+    # Author: Antonio Frangioni
+    # Date: 10-11-22
+    # Version 1.21
+    # Copyright Antonio Frangioni
+    # =======================================
+    #}
+
+
+
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    # inner functions - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    function f2phi(alpha, derivate=false)
+        #
+        # computes and returns the value of the tomography at alpha
+        #
+        #    phi( alpha ) = f( x + alpha * d )
+        #
+        # if Plotf > 2 saves the data in gap() for plotting
+        #
+        # if the second output parameter is required, put there the derivative
+        # of the tomography in alpha
+        #
+        #    phi'( alpha ) = < \nabla f( x + alpha * d ) , d >
+        #
+        # saves the point in lastx, the gradient in lastg and increases feval
+        lastx = x + alpha * d
+        (phi, lastg, _) = f(lastx)
+
+        if Plotf > 2
+            if fStar > - Inf
+                push!(gap, (phi - fStar) / max(abs(fStar), 1))
+            else
+                push!(gap, phi)
+            end
+        end
+
+        feval += 1
+
+        if derivate
+            return (phi, dot(d, lastg))
+        end
+        return (phi, nothing)
+    end
+
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    function ArmijoWolfeLS(phi0, phip0, as, m1, m2, tau)
+
+        # performs an Armijo-Wolfe Line Search.
+        #
+        # phi0 = phi( 0 ), phip0 = phi'( 0 ) < 0
+        #
+        # as > 0 is the first value to be tested: if phi'( as ) < 0 then as is
+        # divided by tau < 1 (hence it is increased) until this does not happen
+        # any longer
+        #
+        # m1 and m2 are the standard Armijo-Wolfe parameters; note that the strong
+        # Wolfe condition is used
+        #
+        # returns the optimal step and the optimal f-value
+
+        lsiter = 1  # count iterations of first phase
+        local phips, phia
+        while feval ≤ MaxFeval
+            phia, phips = f2phi(as, true)
+
+            if (phia ≤ phi0 + m1 * as * phip0) && (abs(phips) ≤ - m2 * phip0)
+                if printing
+                    @printf("\t%2d", lsiter)
+                end
+                a = as
+                return (a, phia)  # Armijo + strong Wolfe satisfied, we are done
+            end
+            if phips ≥ 0
+                break
+            end
+            as = as / tau
+            lsiter += 1
+        end
+
+        if printing
+            @printf("\t%2d ", lsiter)
+        end
+        lsiter = 1  # count iterations of second phase
+
+        am = 0
+        a = as
+        phipm = phip0
+        while (feval ≤ MaxFeval ) && (as - am) > mina && (phips > 1e-12)
+
+            # compute the new value by safeguarded quadratic interpolation
+            a = (am * phips - as * phipm) / (phips - phipm)
+            a = max(am + ( as - am ) * sfgrd, min(as - ( as - am ) * sfgrd, a))
+
+            # compute phi(a)
+            phia, phip = f2phi(a, true)
+
+            if (phia ≤ phi0 + m1 * a * phip0) && (abs(phip) ≤ -m2 * phip0)
+                break  # Armijo + strong Wolfe satisfied, we are done
+            end
+
+            # restrict the interval based on sign of the derivative in a
+            if phip < 0
+                am = a
+                phipm = phip
+            else
+                as = a
+                if as ≤ mina
+                    break
+                end
+                phips = phip
+            end
+            lsiter += 1
+        end
+
+        if printing
+            @printf("%2d", lsiter)
+        end
+        return (a, phia)
+    end
+
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    function BacktrackingLS( phi0 , phip0 , as , m1 , tau )
+
+        # performs a Backtracking Line Search.
+        #
+        # phi0 = phi( 0 ), phip0 = phi'( 0 ) < 0
+        #
+        # as > 0 is the first value to be tested, which is decreased by
+        # multiplying it by tau < 1 until the Armijo condition with parameter
+        # m1 is satisfied
+        #
+        # returns the optimal step and the optimal f-value
+
+        lsiter = 1  # count ls iterations
+        while feval ≤ MaxFeval && as > mina
+            phia, _ = f2phi(as)
+            if phia ≤ phi0 + m1 * as * phip0 # Armijo satisfied
+                break                        # we are done
+            end
+            as *= tau
+            lsiter += 1
+        end
+
+        if printing
+            @printf("\t%2d", lsiter)
+        end
+        return (as, phia)
+    end
+
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    # Plotf = 1;
+    # 1 = the level sets of f and the trajectory are plotted (when n = 2)
+    # 2 = the function value / gap are plotted, iteration-wise
+    # 3 = the function value / gap are plotted, function-evaluation-wise
+    # all the rest: nothing is plotted
+
+    Interactive = false  # if we pause at every iteration
+
+    local gap
+    PXY = Matrix{Real}(undef, 2, 0)
+    status = "error"
+
+    if Plotf > 1
+        if Plotf == 2
+            MaxIter = 50 # expected number of iterations for the gap plot
+        else
+            MaxIter = 70 # expected number of iterations for the gap plot
+        end
+        gap = []
+    end
+
+    # reading and checking input- - - - - - - - - - - - - - - - - - - - - - - -
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    if isnothing(x)
+        (fStar, x, _) = f(nothing)
+    else
+        (Star, _, _) = f(nothing)
+    end
+
+    n = size(x, 1)
+
+    if wf != :Fletcher_Reeves && wf != :Polak_Ribiere && wf != :Hestenes_Stiefel &&
+        wf != :Dai_Yuan
+        error("unknown NCG formula $wf")
+    end
+
+    if astart ≤ 0
+        error("astart must be > 0")
+    end
+
+    if m1 ≤ 0 || m1 ≥ 1
+        error("m1 is not in (0 ,1)")
+    end
+
+    AWLS = (m2 > 0 && m2 < 1)
+
+    if tau ≤ 0 || tau ≥ 1
+        error("tau is not in (0 ,1)")
+    end
+
+    if sfgrd ≤ 0 || sfgrd ≥ 1
+        error("sfgrd is not in (0, 1)")
+    end
+
+    if mina < 0
+        error("mina is < 0")
+    end
+
+    # "global" variables- - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    lastx = zeros(n)  # last point visited in the line search
+    lastg = zeros(n)  # gradient of lastx
+    d = zeros(n)      # NGC's direction
+    feval = 0         # f() evaluations count ("common" with LSs)
+
+# initializations - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    if printing
+        @printf("NCG method ")
+
+        if wf == :Fletcher_Reeves
+            @printf("(Fletcher-Reeves)\n")
+        elseif wf == :Polak_Ribiere
+            @printf("(Polak-Ribiere)\n")
+        elseif wf == :Hestenes_Stiefel
+            @printf("(Hestenes-Stiefel)\n")
+        elseif wf == :Dai_Yuan
+            @printf("(Dai-Yuan)\n")
+        end
+        if fStar > - Inf
+            @printf("feval\trel gap")
+        else
+            @printf("feval\tf(x)")
+        end
+        @printf("\t\t|| g(x) ||\tbeta\tls feval\ta*\n\n")
+    end
+
+
+    if Plotf > 1 && isnothing(plt)
+        if Plotf == 2
+            plt = plot(xlims=(0, MaxIter), ylims=(1e-15, 1e+1), yscale=:log10)
+        else
+            plt = plot(xlims=(0, MaxIter), ylims=(1e-15, 1e+4), yscale=:log10)
+        end
+    elseif isnothing(plt)
+        plt = plot()
+    end
+
+    v, _ = f2phi(0)
+    ng = norm(lastg)
+    if eps < 0
+        ng0 = -ng  # norm of first subgradient: why is there a "-"? ;-)
+    else
+        ng0 = 1    # un-scaled stopping criterion
+    end
+
+    iter = 1       # iterations count (as distinguished from f() evals)
+    local pastg, pastd
+
+    # main loop - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    while true
+        # output statistics - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+        if fStar > -Inf
+            gapk = ( v - fStar ) / max(abs(fStar), 1)
+
+            if printing
+                @printf("%4d\t%1.4e\t%1.4e", feval, gapk, ng)
+            end
+
+            if Plotf > 1
+                if Plotf == 2
+                    push!(gap, gapk)
+                end
+            end
+        else
+            if printing
+                @printf("%4d\t%1.8e\t\t%1.4e", feval, v, ng)
+            end
+
+            if Plotf > 1
+                if Plotf == 2
+                    push!(gap, v)
+                end
+            end
+        end
+
+        # stopping criteria - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+        if ng ≤ eps * ng0
+            status = "optimal"
+            break
+        end
+
+        if feval > MaxFeval
+            status = "stopped"
+            break
+        end
+
+        # compute search direction- - - - - - - - - - - - - - - - - - - - - - -
+        # formulae could be streamlined somewhat and some norms could be saved
+        # from previous iterations
+
+        if iter == 1  # first iteration is off-line, standard gradient
+            d = -lastg
+            if printing
+                @printf("\t")
+            end
+        else          # normal iterations, use appropriate NCG formula
+            if rstart > 0 && mod( iter , n * rstart ) == 0
+                # ... unless a restart is being prformed
+                beta = 0
+                if printing
+                    @printf("\t(res)")
+                end
+            else
+                if wf == :Fletcher_Reeves
+                    beta = (ng / norm(pastg))^2
+                elseif wf == :Polak_Ribiere
+                    beta = (dot(lastg, (lastg - pastg))) / norm(pastg)^2
+                    beta = max(beta, 0)
+                elseif wf == :Hestenes_Stiefel
+                    beta = (dot(lastg, (lastg - pastg))) / (dot((lastg - pastg), pastd))
+                    if beta < 0
+                        beta = 0
+                    end
+                elseif wf == :Dai_Yuan
+                    beta = ng^2 / (dot((lastg - pastg), pastd))
+                end
+
+                if printing
+                    @printf("\t%1.4f", beta)
+                end
+            end
+
+            if beta != 0
+                d = -lastg + beta * pastd
+            else
+                d = -lastg
+            end
+        end
+
+        pastg = lastg # previous gradient
+        pastd = d     # previous search direction
+
+        # compute step size - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+        phip0 = dot(lastg, d)
+
+        if AWLS
+            (a, v) = ArmijoWolfeLS(v, phip0, astart, m1, m2, tau)
+        else
+            (a, v) = BacktrackingLS(v, phip0, astart, m1, tau)
+        end
+
+        # output statistics - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+        if printing
+            @printf("\t%1.2e\n", a)
+        end
+
+        if a ≤ mina
+            status = "error"
+            break
+        end
+
+        if v <= MInf
+            status = "unbounded"
+            break
+        end
+
+        # compute new point - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+        # possibly plot the trajectory
+        if n == 2 && Plotf == 1
+            PXY = hcat(PXY, hcat(x, lastx))
+        end
+
+        x = lastx
+        ng = norm(lastg)
+
+        # iterate - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+        iter += 1
+
+        if Interactive
+            readline()
+        end
+    end
+
+    # end of main loop- - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    if Plotf ≥ 2
+        plot!(plt, gap)
+    elseif Plotf == 1 && n == 2
+        plot!(plt, PXY[1, :], PXY[2, :])
+    end
+    if plotatend
+        display(plt)
+    end
+
+    return (x, status)
+end  # the end- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
--- a/11-23/SGM.jl
+++ b/11-23/SGM.jl
@ -0,0 +1,339 @@
+using LinearAlgebra, Printf, Plots
+
+function SGM(f;
+             x::Union{Nothing, Vector}=nothing,
+             eps::Real=1e-6,
+             astart::Real=1e-4,
+             tau::Real=0.96,
+             MaxFeval::Integer=300,
+             MInf::Real=-Inf,
+             mina::Real=1e-16,
+             plt::Union{Plots.Plot, Nothing}=nothing,
+             plotatend::Bool=true,
+             Plotf::Integer=0,
+             printing::Bool=true)::Tuple{AbstractArray, String}
+    # function [ x , status ] = SGM( f , x , eps , astart , tau , MaxFeval ,
+    #                                MInf , mina )
+    #
+    # Apply the classical Subgradient Method for the minimization of the
+    # provided function f, which must have the following interface:
+    #
+    #   [ v , g ] = f( x )
+    #
+    # Input:
+    #
+    # - x is either a [ n x 1 ] real (column) vector denoting the input of
+    #   f(), or [] (empty).
+    #
+    # Output:
+    #
+    # - v (real, scalar): if x == [] this is the best known lower bound on
+    #   the unconstrained global optimum of f(); it can be -Inf if either f()
+    #   is not bounded below, or no such information is available. If x ~= []
+    #   then v = f(x).
+    #
+    # - g (real, [ n x 1 ] real vector): this also depends on x. if x == []
+    #   this is the standard starting point from which the algorithm should
+    #   start, otherwise it is a subgradient of f() at x (possibly the
+    #   gradient, but you should not apply this algorithm to a differentiable
+    #   f)
+    #
+    # The other [optional] input parameters are:
+    #
+    # - x (either [ n x 1 ] real vector or [], default []): starting point.
+    #   If x == [], the default starting point provided by f() is used.
+    #
+    # - eps (real scalar, optional, default value 1e-6): the accuracy in the
+    #   stopping criterion. If eps > 0, then a target-level Polyak stepsize
+    #   with nonvanishing threshold is used, and eps is taken as the minimum
+    #   *relative* value for the displacement, i.e.,
+    #
+    #       delta^i >= eps * max( abs( f( x^i ) ) , 1 )
+    #
+    #   is used as the minimum value for the displacement. If eps < 0 and
+    #   v_* = f( [] ) > -Inf, then the algorithm "cheats" and it does an
+    #   *exact* Polyak stepsize with termination criteria
+    #
+    #       ( f^i_{ref} - v_* ) <= ( - eps ) * max( abs( v_* ) , 1 )
+    #
+    #   Finally, if eps == 0 the algorithm rather uses a DSS (diminishing
+    #   square-summable) stepsize, i.e., astart * ( 1 / i ) [see below]
+    #
+    # - astart (real scalar, optional, default value 1e-4): if eps > 0, i.e.,
+    #   a target-level Polyak stepsize with nonvanishing threshold is used,
+    #   then astart is used as the relative value to which the displacement is
+    #   reset each time f( x^{i + 1} ) <= f^i_{ref} - delta^i, i.e.,
+    #
+    #     delta^{i + 1} = astart * max( abs( f^{i + 1}_{ref} ) , 1 )
+    #
+    #   If eps == 0, i.e. a diminishing square-summable) stepsize is used, then
+    #   astart is used as the fixed scaling factor for the stepsize sequence
+    #   astart * ( 1 / i ).
+    #
+    # - tau (real scalar, optional, default value 0.95): if eps > 0, i.e.,
+    #   a target-level Polyak stepsize with nonvanishing threshold is used,
+    #   then delta^{i + 1} = delta^i * tau each time
+    #      f( x^{i + 1} ) > f^i_{ref} - delta^i
+    #
+    # - MaxFeval (integer scalar, optional, default value 300): the maximum
+    #   number of function evaluations (hence, iterations, since there is
+    #   exactly one function evaluation per iteration).
+    #
+    # - MInf (real scalar, optional, default value -Inf): if the algorithm
+    #   determines a value for f() <= MInf this is taken as an indication that
+    #   the problem is unbounded below and computation is stopped
+    #   (a "finite -Inf").
+    #
+    # - mina (real scalar, optional, default value 1e-16): if the algorithm
+    #   determines a stepsize value <= mina, this is taken as the fact that the
+    #   algorithm has already obtained the most it can and computation is
+    #   stopped. It is legal to take mina = 0.
+    #
+    # Output:
+    #
+    # - x ([ n x 1 ] real column vector): the best solution found so far.
+    #
+    # - status (string): a string describing the status of the algorithm at
+    #   termination
+    #
+    #   = 'optimal': the algorithm terminated having proven that x is a(n
+    #     approximately) optimal solution; this only happens when "cheating",
+    #     i.e., explicitly uses v_* = f( [] ) > -Inf, unless in the very
+    #     unlikely case that f() spontaneously produces an almost-null
+    #     subgradient
+    #
+    #   = 'unbounded': the algorithm has determined an extrenely large negative
+    #     value for f() that is taken as an indication that the problem is
+    #     unbounded below (a "finite -Inf", see MInf above)
+    #
+    #   = 'stopped': the algorithm terminated having exhausted the maximum
+    #     number of iterations: x is the bast solution found so far, but not
+    #     necessarily the optimal one
+    #
+    #{
+    # =======================================
+    # Author: Antonio Frangioni
+    # Date: 17-11-22
+    # Version 1.11
+    # Copyright Antonio Frangioni
+    # =======================================
+    #}
+
+    # Plotf = 1;
+    # 0 = nothing is plotted
+    # 1 = the level sets of f and the trajectory are plotted (when n = 2)
+    # 2 = the function value / gap are plotted
+
+    Interactive = false # if we pause at every iteration
+
+    # reading and checking input- - - - - - - - - - - - - - - - - - - - - - - -
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    local gap
+    PXY = Matrix{Real}(undef, 2, 0)
+
+    status = "error"
+
+    if isnothing(x)
+        (fStar, x, _) = f(nothing)
+    else
+        (fStar, _, _) = f(nothing)
+    end
+
+    n = size(x, 1)
+
+    if eps < 0 && fStar == - Inf
+        # no way of cheating since the true optimal value is unknonw
+        eps = - eps # revert to ordinary target level stepsize
+    end
+
+    if astart ≤ 0
+        error("astart must be > 0")
+    end
+
+    if tau ≤ 0 || tau ≥ 1
+        error("tau is not in (0 ,1)")
+    end
+
+    if mina < 0
+        error("mina is < 0")
+    end
+
+    # initializations - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    if printing
+        @printf("Subradient method\n")
+        if fStar > - Inf
+            @printf("iter\trel gap\t\tbest gap\t|| g(x) ||\ta\n\n")
+        else
+            @printf("iter\tf(x)\t\tf best\t\t|| g(x) ||\ta\n\n")
+        end
+    end
+
+
+    if Plotf == 2
+        gap = []
+    end
+    if Plotf > 1 && isnothing(plt)
+        plt = plot(xlims=(0, MaxFeval))
+    elseif isnothing(plt)
+        plt = plot()
+    end
+
+    # main loop - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    iter = 1
+    xref = x
+    fref = Inf     # best f-value found so far
+    if eps > 0
+        delta = 0  # required displacement from fref
+    end
+
+    while true
+
+        # compute function and subgradient- - - - - - - - - - - - - - - - - - - - -
+
+        (v, g, _) = f(x)
+        ng = norm(g)
+
+        if eps > 0  # target-level stepsize
+            if v ≤ fref - delta  # found a "significantly" better point
+                delta = astart * max(abs(v), 1)  # reset delta
+            else    # decrease delta
+                delta = max(delta * tau, eps * max(abs(min(v, fref)), 1))
+            end
+        end
+
+        if v < fref    # found a better f-value (however slightly better)
+            fref = v   # update fref
+            xref = x   # this is the incumbent solution
+        end
+
+        # output statistics - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+        if fStar > -Inf
+            gapk = (v - fStar)/max(abs(fStar), 1)
+            bstgapk = (fref - fStar)/max(abs(fStar), 1)
+
+            if printing
+                @printf("%4d\t%1.4e\t%1.4e\t%1.4e", iter, gapk, bstgapk, ng)
+            end
+
+            if Plotf == 2
+                push!(gap, gapk)
+            end
+        else
+            if printing
+                @printf("%4d\t%1.8e\t%1.8e\t\t%1.4e", iter, fref, v, ng)
+            end
+
+            if Plotf == 2
+                push!(gap, v)
+            end
+        end
+
+        # stopping criteria - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+        if eps < 0 && fref - fStar ≤ - eps * max(abs(fStar), 1)
+            xref = x
+            status = "optimal"
+            if printing
+                @printf("\n")
+            end
+
+            break
+        end
+
+        if ng < 1e-12  # unlikely, but it could happen
+            xref = x
+            status = "optimal"
+            if printing
+                @printf("\n")
+            end
+
+            break;
+        end
+
+        if iter > MaxFeval
+            status = "stopped"
+            if printing
+                @printf("\n")
+            end
+
+            break
+        end
+
+        # compute stepsize- - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+        if eps > 0        # Polyak stepsize with target level
+            a = ( v - fref + delta ) / ( ng * ng )
+        elseif eps < 0    # true Polyak stepsize (cheating)
+            a = ( v - fStar ) / ( ng * ng )
+        else              # diminishing square-summable stepsize
+            a = astart * ( 1 / iter )
+        end
+
+        # output statistics - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+        if printing
+            @printf("\t%1.4e", a)
+            @printf("\n")
+        end
+
+        if a ≤ mina
+            status = "stopped"
+            if printing
+                @printf("\n")
+            end
+
+            break
+        end
+
+        if v ≤ MInf
+            status = "unbounded"
+            if printing
+                @printf("\n")
+            end
+
+            break
+        end
+
+        # compute new point - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+        # possibly plot the trajectory
+        if n == 2 && Plotf == 1
+            PXY = hcat(PXY, hcat(x, x - a * g))
+        end
+
+        x = x - a * g
+
+        # iterate - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+        iter += 1;
+
+        if Interactive
+            readline()
+        end
+    end
+
+    # end of main loop- - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    x = xref   # return point corresponding to best value found so far
+
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    if plotatend
+        if Plotf ≥ 2
+            plot!(plt, gap)
+        elseif Plotf == 1 && n == 2
+            plot!(plt, PXY[1, :], PXY[2, :])
+        end
+        display(plt)
+    end
+
+    return (x, status)
+end  # the end- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
--- a/11-23/lesson.ipynb
+++ b/11-23/lesson.ipynb