lesson 22/11, heavy ball method not translated yet

2023-11-25 20:16:56 +01:00
parent b1c433832f
commit f36dec52dd
5 changed files with 3506 additions and 10 deletions
--- a/11-09/NWTN.jl
+++ b/11-09/NWTN.jl
@ -80,9 +80,8 @@ function NWTN(f;
                if printing
                    @printf("  %2d", lsiter)
                end
-                a = as;
+                a = as
                return (a, phia)  # Armijo + strong Wolfe satisfied, we are done
-        
            end
            if phips ≥ 0
                break
@ -272,7 +271,7 @@ function NWTN(f;
        # output statistics - - - - - - - - - - - - - - - - - - - - - - - - - -
        
        if fStar > -Inf
-            gapk = ( v - fStar ) / max(abs( fStar ), 1)
+            gapk = (v - fStar) / max(abs(fStar), 1)

            if printing
                @printf("%4d\t%1.4e\t%1.4e", feval, gapk, ng)
--- a/11-09/TestFunctions/TestFunctions.jl
+++ b/11-09/TestFunctions/TestFunctions.jl
@ -95,22 +95,22 @@ end

 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

-function genericquad(Q, q, x::Union{Nothing, Real})
+function genericquad(Q, q, x::Union{Nothing, AbstractVecOrMat})
    # generic quadratic function f(x) = x' * Q * x / 2 + q' * x

    if x === nothing  # informative call
        if minimum(eigvals(Q)) > 1e-14
            xStar = Q \ -q
-            v = 0.5 * xStar' * Q * xStar + q' * xStar
+            v = 0.5 * dot(xStar, Q * xStar) + dot(q, xStar)
        else
            v = -Inf
        end
        return (v, zeros(size(q)), zeros(size(Q)))
    else
-        if size(x) ≠ (2, 1)
+        if size(x, 1) ≠ 2 || size(x, 2) ≠ 1
            throw(ArgumentError("genericquad: x is of wrong size"))
        end
-        v = 0.5 * x' * Q * x + q' * x  # f(x)
+        v = 0.5 * dot(x, Q * x) + dot(q, x)  # f(x)
        return (v, Q * x + q, Q)
    end
 end  # genericquad
--- a/11-17/lesson.ipynb
+++ b/11-17/lesson.ipynb
@ -12,11 +12,56 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
   "id": "7da43c78-28c9-4847-ae3b-1c928f79f5a3",
   "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "4-element Vector{Float64}:\n",
+       "  0.18246182305194636\n",
+       " -0.3668991739223996\n",
+       "  0.20260186844658193\n",
+       " -0.9242616501263348"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "A = randn(4, 4)\n",
+    "y = randn(4)\n",
+    "\n",
+    "x = A \\ y"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "ae9b165b-aac9-4b2a-8575-4646e0627098",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "4-element Vector{Float64}:\n",
+       "  0.0\n",
+       " -1.1102230246251565e-16\n",
+       " -1.1102230246251565e-16\n",
+       " -8.326672684688674e-17"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "A*x - y"
+   ]
  }
 ],
 "metadata": {
--- a/11-22/NCG.jl
+++ b/11-22/NCG.jl
@ -0,0 +1,546 @@
+using LinearAlgebra, Printf, Plots
+
+function NCG(f;
+             x::Union{Nothing, Vector}=nothing,
+             wf::Integer=0,
+             rstart::Integer=0,
+             eps::Real=1e-6,
+             astart::Real=1,
+             MaxFeval::Integer=1000,
+             m1::Real=0.01,
+             m2::Real=0.9,
+             tau::Real=0.9,
+             sfgrd::Real=0.2,
+             MInf::Real=-Inf,
+             mina::Real=1e-16,
+             plt::Union{Plots.Plot, Nothing}=nothing,
+             plotatend::Bool=false,
+             Plotf::Integer=0,
+             printing::Bool=true)
+
+    #function [ x , status ] = NCG( f , x , wf , rstart , eps , astart ,
+    #                               MaxFeval , m1 , m2 , tau , sfgrd , MInf ,
+    #                               mina )
+    #
+    # Apply a Nonlinear Conjugated Gradient algorithm for the minimiztion of
+    # the provided function f, which must have the following interface:
+    #
+    #   [ v , g ] = f( x )
+    #
+    # Input:
+    #
+    # - x is either a [ n x 1 ] real (column) vector denoting the input of
+    #   f(), or [] (empty).
+    #
+    # Output:
+    #
+    # - v (real, scalar): if x == [] this is the best known lower bound on
+    #   the unconstrained global optimum of f(); it can be -Inf if either f()
+    #   is not bounded below, or no such information is available. If x ~= []
+    #   then v = f(x).
+    #
+    # - g (real, [ n x 1 ] real vector): this also depends on x. if x == []
+    #   this is the standard starting point from which the algorithm should
+    #   start, otherwise it is the gradient of f() at x (or a subgradient if
+    #   f() is not differentiable at x, which it should not be if you are
+    #   applying the gradient method to it).
+    #
+    # The other [optional] input parameters are:
+    #
+    # - x (either [ n x 1 ] real vector or [], default []): starting point.
+    #   If x == [], the default starting point provided by f() is used.
+    #
+    # - wf (integer scalar, optional, default value 0): which of the Nonlinear
+    #   Conjugated Gradient formulae to use. Possible values are:
+    #   = 0: Fletcher-Reeves
+    #   = 1: Polak-Ribiere
+    #   = 2: Hestenes-Stiefel
+    #   = 3: Dai-Yuan
+    #
+    # - rstart (integer scalar, optional, default value 0): if > 0, restarts
+    #   (setting beta = 0) are performed every n * rstart iterations
+    #
+    # - eps (real scalar, optional, default value 1e-6): the accuracy in the
+    #   stopping criterion: the algorithm is stopped when the norm of the
+    #   gradient is less than or equal to eps. If a negative value is provided,
+    #   this is used in a *relative* stopping criterion: the algorithm is
+    #   stopped when the norm of the gradient is less than or equal to
+    #   (- eps) * || norm of the first gradient ||.
+    #
+    # - astart (real scalar, optional, default value 1): starting value of
+    #   alpha in the line search (> 0)
+    #
+    # - MaxFeval (integer scalar, optional, default value 1000): the maximum
+    #   number of function evaluations (hence, iterations will be not more than
+    #   MaxFeval because at each iteration at least a function evaluation is
+    #   performed, possibly more due to the line search).
+    #
+    # - m1 (real scalar, optional, default value 0.01): first parameter of the
+    #   Armijo-Wolfe-type line search (sufficient decrease). Has to be in (0,1)
+    #
+    # - m2 (real scalar, optional, default value 0.9): typically the second
+    #   parameter of the Armijo-Wolfe-type line search (strong curvature
+    #   condition). It should to be in (0,1); if not, it is taken to mean that
+    #   the simpler Backtracking line search should be used instead
+    #
+    # - tau (real scalar, optional, default value 0.9): scaling parameter for
+    #   the line search. In the Armijo-Wolfe line search it is used in the
+    #   first phase: if the derivative is not positive, then the step is
+    #   divided by tau (which is < 1, hence it is increased). In the
+    #   Backtracking line search, each time the step is multiplied by tau
+    #   (hence it is decreased).
+    #
+    # - sfgrd (real scalar, optional, default value 0.2): safeguard parameter
+    #   for the line search. to avoid numerical problems that can occur with
+    #   the quadratic interpolation if the derivative at one endpoint is too
+    #   large w.r.t. the one at the other (which leads to choosing a point
+    #   extremely near to the other endpoint), a *safeguarded* version of
+    #   interpolation is used whereby the new point is chosen in the interval
+    #   [ as * ( 1 + sfgrd ) , am * ( 1 - sfgrd ) ], being [ as , am ] the
+    #   current interval, whatever quadratic interpolation says. If you
+    #   experiemce problems with the line search taking too many iterations to
+    #   converge at "nasty" points, try to increase this
+    #
+    # - MInf (real scalar, optional, default value -Inf): if the algorithm
+    #   determines a value for f() <= MInf this is taken as an indication that
+    #   the problem is unbounded below and computation is stopped
+    #   (a "finite -Inf").
+    #
+    # - mina (real scalar, optional, default value 1e-16): if the algorithm
+    #   determines a stepsize value <= mina, this is taken as an indication
+    #   that something has gone wrong (the gradient is not a direction of
+    #   descent, so maybe the function is not differentiable) and computation
+    #   is stopped. It is legal to take mina = 0, thereby in fact skipping this
+    #   test.
+    #
+    # Output:
+    #
+    # - x ([ n x 1 ] real column vector): the best solution found so far.
+    #
+    # - status (string): a string describing the status of the algorithm at
+    #   termination
+    #
+    #   = 'optimal': the algorithm terminated having proven that x is a(n
+    #     approximately) optimal solution, i.e., the norm of the gradient at x
+    #     is less than the required threshold
+    #
+    #   = 'unbounded': the algorithm has determined an extrenely large negative
+    #     value for f() that is taken as an indication that the problem is
+    #     unbounded below (a "finite -Inf", see MInf above)
+    #
+    #   = 'stopped': the algorithm terminated having exhausted the maximum
+    #     number of iterations: x is the bast solution found so far, but not
+    #     necessarily the optimal one
+    #
+    #   = 'error': the algorithm found a numerical error that prevents it from
+    #     continuing optimization (see mina above)
+    #
+    #{
+    # =======================================
+    # Author: Antonio Frangioni
+    # Date: 10-11-22
+    # Version 1.21
+    # Copyright Antonio Frangioni
+    # =======================================
+    #}
+
+
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    # inner functions - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    function f2phi(alpha, derivate=false)
+        # computes and returns the value of the tomography at alpha
+        #
+        #    phi( alpha ) = f( x + alpha * d )
+        #
+        # if Plotf > 2 saves the data in gap() for plotting
+        #
+        # if the second output parameter is required, put there the derivative
+        # of the tomography in alpha
+        #
+        #    phi'( alpha ) = < \nabla f( x + alpha * d ) , d >
+        #
+        # saves the point in lastx, the gradient in lastg and increases feval
+
+        lastx = x + alpha * d
+        phi, lastg, _ = f(lastx)
+
+        if Plotf > 2
+            if fStar > -Inf
+                push!(gap, (phi - fStar) / max(abs(fStar), 1))
+            else
+                push!(gap, phi)
+            end
+        end
+
+        feval += 1
+
+        if derivate
+            return (phi, dot(d, lastg))
+        end
+        return (phi, nothing)
+    end
+
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    function ArmijoWolfeLS(phi0, phip0, as, m1, m2, tau)
+        # performs an Armijo-Wolfe Line Search.
+        #
+        # phi0 = phi( 0 ), phip0 = phi'( 0 ) < 0
+        #
+        # as > 0 is the first value to be tested: if phi'( as ) < 0 then as is
+        # divided by tau < 1 (hence it is increased) until this does not happen
+        # any longer
+        #
+        # m1 and m2 are the standard Armijo-Wolfe parameters; note that the strong
+        # Wolfe condition is used
+        #
+        # returns the optimal step and the optimal f-value
+
+        lsiter = 1 # count iterations of first phase
+        local phips, phia
+        while feval ≤ MaxFeval
+            phia, phips = f2phi(as, true)
+
+            if (phia ≤ phi0 + m1 * as * phip0) && (abs(phips) ≤ -m2 * phip0)
+                if printing
+                    @printf("\t%2d", lsiter)
+                end
+                a = as
+                return (a, phia) # Armijo + strong Wolfe satisfied, we are done
+            end
+            if phips ≥ 0 # derivative is positive, break
+                break
+            end
+            as = as / tau
+            lsiter += 1
+        end
+
+        if printing
+            @printf("\t%2d ", lsiter)
+        end
+        lsiter = 1  # count iterations of second phase
+        am = 0
+        a = as
+        phipm = phip0
+        while (feval ≤ MaxFeval ) && (as - am) > mina && (phips > 1e-12)
+
+            # compute the new value by safeguarded quadratic interpolation
+            a = (am * phips - as * phipm) / (phips - phipm)
+            a = max(am + ( as - am ) * sfgrd, min(as - ( as - am ) * sfgrd, a))
+
+            # compute phi(a)
+            phia, phip = f2phi(a, true)
+
+            if (phia ≤ phi0 + m1 * a * phip0) && (abs(phip) ≤ -m2 * phip0)
+                break  # Armijo + strong Wolfe satisfied, we are done
+            end
+
+            # restrict the interval based on sign of the derivative in a
+            if phip < 0
+                am = a
+                phipm = phip
+            else
+                as = a
+                if as ≤ mina
+                    break
+                end
+                phips = phip
+            end
+            lsiter += 1
+        end
+
+        if printing
+            @printf("%2d", lsiter)
+        end
+        return (a, phia)
+    end
+
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    function BacktrackingLS(phi0, phip0, as, m1, tau)
+        # performs a Backtracking Line Search.
+        #
+        # phi0 = phi( 0 ), phip0 = phi'( 0 ) < 0
+        #
+        # as > 0 is the first value to be tested, which is decreased by
+        # multiplying it by tau < 1 until the Armijo condition with parameter
+        # m1 is satisfied
+        #
+        # returns the optimal step and the optimal f-value
+
+        lsiter = 1  # count ls iterations
+        while feval ≤ MaxFeval && as > mina
+            phia, _ = f2phi(as)
+            if phia ≤ phi0 + m1 * as * phip0 # Armijo satisfied
+                break                        # we are done
+            end
+            as *= tau
+            lsiter += 1
+        end
+
+        if printing
+            @printf("\t%2d", lsiter)
+        end
+
+        return (as, phia)
+    end
+
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    Plotf = 1;
+    # 1 = the level sets of f and the trajectory are plotted (when n = 2)
+    # 2 = the function value / gap are plotted, iteration-wise
+    # 3 = the function value / gap are plotted, function-evaluation-wise
+    # all the rest: nothing is plotted
+
+    Interactive = false # if we pause at every iteration
+
+    PXY = Matrix{Real}(undef, 2, 0)
+
+    local gap
+    if Plotf > 1
+        if Plotf == 2
+            MaxIter = 50  # expected number of iterations for the gap plot
+        else
+            MaxIter = 70  # expected number of iterations for the gap plot
+        end
+        gap = []
+    end
+
+    if Plotf == 2 && plt == nothing
+        plt = plot(xlims=(0, MaxIter), ylims=(1e-15, 1e+1))
+    end
+    if Plotf > 1 && plt == nothing
+        plt = plot(xlims=(0, MaxIter))
+    end
+    if plt == nothing
+        plt = plot()
+    end
+
+    # reading and checking input- - - - - - - - - - - - - - - - - - - - - - - -
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    local fStar
+    if isnothing(x)
+        fStar, x, _ = f(nothing)
+    else
+        fStar, _, _ = f(nothing)
+    end
+
+    n = size(x, 1)
+
+    if wf < 0 || wf > 3
+        error("unknown NCG formula %d", wf)
+    end
+    if astart ≤ 0
+        error("astart must be > 0")
+    end
+
+    if m1 ≤ 0 || m1 ≥ 1
+        error("m1 is not in (0 ,1)")
+    end
+
+    AWLS = ( m2 > 0 && m2 < 1 )
+
+    if tau ≤ 0 || tau ≥ 1
+        error("tau is not in (0 ,1)")
+    end
+
+    if sfgrd ≤ 0 || sfgrd ≥ 1
+        error("sfgrd is not in (0, 1)")
+   end
+
+    if mina < 0
+        error("mina is < 0")
+    end
+
+    # "global" variables- - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    lastx = zeros(n, 1)  # last point visited in the line search
+    lastg = zeros(n, 1)  # gradient of lastx
+    pastg = zeros(n, 1)
+    pastd = zeros(n, 1)
+    d = zeros(n, 1)      # NGC's direction
+    feval = 0            # f() evaluations count ("common" with LSs)
+
+    status = "error"
+
+    # initializations - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    if printing
+        @printf("NCG method ")
+        if wf == 0
+            @printf("(Fletcher-Reeves)\n")
+        elseif wf == 1
+            @printf("(Polak-Ribiere)\n")
+        elseif wf == 2
+            @printf("(Hestenes-Stiefel)\n")
+        elseif wf == 3
+            @printf("(Dai-Yuan)\n")
+        end
+
+        if fStar > -Inf
+            @printf("feval\trel gap")
+        else
+            @printf("feval\tf(x)")
+        end
+        @printf("\t\t|| g(x) ||\tbeta\tls feval\ta*\n\n")
+    end
+
+    v, _ = f2phi(0)
+    ng = norm(lastg)
+    if eps < 0
+        ng0 = -ng # norm of first subgradient: why is there a "-"? ;-)
+    else
+        ng0 = 1   # un-scaled stopping criterion
+    end
+
+    iter = 1      # iterations count (as distinguished from f() evals)
+
+    # main loop - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    while true
+        # output statistics - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+        if fStar > - Inf
+            gapk = (v - fStar) / max(abs(fStar), 1)
+
+            if printing
+                @printf("%4d\t%1.4e\t%1.4e", feval, gapk, ng)
+            end
+            if Plotf == 2
+                push!(gap, gapk)
+            end
+        else
+            if printing
+                @printf("%4d\t%1.8e\t\t%1.4e", feval, v, ng)
+            end
+            if Plotf == 2
+                push!(gap, v)
+            end
+        end
+
+        # stopping criteria - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+        if ng ≤ eps * ng0
+            status = "optimal"
+            break
+        end
+
+        if feval > MaxFeval
+            status = "stopped"
+            break
+        end
+
+        # compute search direction- - - - - - - - - - - - - - - - - - - - - - -
+        # formulae could be streamlined somewhat and some norms could be saved
+        # from previous iterations
+
+        if iter == 1  # first iteration is off-line, standard gradient
+            d = -lastg
+            if printing
+                @printf("\t")
+            end
+        else    # normal iterations, use appropriate NCG formula
+            if rstart > 0 && mod(iter, n * rstart) == 0
+                # ... unless a restart is being prformed
+                beta = 0
+                if printing
+                    @printf("\t(res)")
+                end
+            else
+                if wf == 0 # Fletcher-Reeves
+                    beta = (ng / norm(pastg))^2
+                elseif wf == 1 # Polak-Ribiere
+                    beta = (dot(lastg, (lastg - pastg))) / norm(pastg)^2
+                    beta = max(beta, 0)
+                elseif wf == 2 # Hestenes-Stiefel
+                    beta = (dot(lastg, (lastg - pastg))) / (dot((lastg - pastg), pastd))
+                    if beta < 0
+                        beta = 0
+                    end
+                elseif wf == 3 # Dai-Yuan
+                    beta = ng^2 / (dot((lastg - pastg), pastd) )
+                end
+                if printing
+                    @printf("\t%1.4f", beta)
+                end
+            end
+
+            if beta ≠ 0
+                d = -lastg + beta * pastd
+            else
+                d = -lastg
+            end
+        end
+
+        pastg = lastg  # previous gradient
+        pastd = d      # previous search direction
+
+        # compute step size - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+        phip0 = dot(lastg, d)
+
+        local a
+        if AWLS
+            a, v = ArmijoWolfeLS(v, phip0, astart, m1, m2, tau)
+        else
+            a, v = BacktrackingLS(v, phip0, astart, m1, tau)
+        end
+
+        # output statistics - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+        if printing
+            @printf("\t%1.2e\n", a)
+        end
+
+        if a ≤ mina
+            status = "error"
+            break
+        end
+
+        if v ≤ MInf
+            status = "unbounded"
+            break
+        end
+
+        # compute new point - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+        # possibly plot the trajectory
+        if n == 2 && Plotf == 1
+            PXY = hcat(PXY, hcat(x, lastx))
+        end
+
+        x = lastx
+        ng = norm(lastg)
+
+        # iterate - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+        iter += 1
+
+        if Interactive
+            readline()
+        end
+    end
+
+    # end of main loop- - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+    if Plotf ≥ 2
+        plot!(plt, gap)
+    elseif Plotf == 1 && n == 2
+        plot!(plt, PXY[1, :], PXY[2, :])
+    end
+    if plotatend
+        display(plt)
+    end
+
+    return (x, status)
+end  # the end- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
--- a/11-22/lesson.ipynb
+++ b/11-22/lesson.ipynb