cmdla/11-09/NWTN.jl

using LinearAlgebra, Printf, Plots

function NWTN(f;
        x::Union{Nothing, Vector}=nothing,
        eps::Real=1e-6,
        MaxFeval::Integer=1000,
        m1::Real=1e-4,
        m2::Real=0.9,
        delta::Real=1e-6,
        tau::Real=0.9,
        sfgrd::Real=0.2,
        MInf::Real=-Inf,
        mina::Real=1e-16,
        plt::Union{Plots.Plot, Nothing}=nothing,
        plotatend::Bool=true,
        Plotf::Integer=0,
        printing::Bool=true)::Tuple{AbstractArray, String}

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    # inner functions - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    function f2phi(alpha, derivate=false)
        # computes and returns the value of the tomography at alpha
        #
        #    phi( alpha ) = f( x + alpha * d )
        #
        # if Plotf > 2 saves the data in gap() for plotting
        #
        # if the second output parameter is required, put there the derivative
        # of the tomography in alpha
        #
        #    phi'( alpha ) = < \nabla f( x + alpha * d ) , d >
        #
        # saves the point in lastx, the gradient in lastg, the Hessian in lasth,
        # and increases feval

        lastx = x + alpha * d
        phi, lastg, lastH = f(lastx)

        if Plotf > 2
            if fStar > - Inf
                push!(gap, (phi - fStar) / max(abs(fStar), 1))
            else
                push!(gap, phi)
            end
        end

        feval += 1

        if derivate
            return (phi, dot(d, lastg))
        end
        return (phi, nothing)
    end

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    function ArmijoWolfeLS(phi0, phip0, as, m1, m2, tau)
        # performs an Armijo-Wolfe Line Search.
        #
        # phi0 = phi( 0 ), phip0 = phi'( 0 ) < 0
        #
        # as > 0 is the first value to be tested: if phi'( as ) < 0 then as is
        # divided by tau < 1 (hence it is increased) until this does not happen
        # any longer
        #
        # m1 and m2 are the standard Armijo-Wolfe parameters; note that the strong
        # Wolfe condition is used
        #
        # returns the optimal step and the optimal f-value

        lsiter = 1  # count iterations of first phase
        local phips, phia
        while feval ≤ MaxFeval
            phia, phips = f2phi(as, true)

            if (phia ≤ phi0 + m1 * as * phip0) && (abs(phips) ≤ - m2 * phip0)
                if printing
                    @printf("  %2d", lsiter)
                end
                a = as;
                return (a, phia)  # Armijo + strong Wolfe satisfied, we are done

            end
            if phips ≥ 0
                break
            end
            as = as / tau
            lsiter += 1
        end

        if printing
            @printf("  %2d ", lsiter)
        end
        lsiter = 1  # count iterations of second phase

        am = 0
        a = as
        phipm = phip0
        while (feval ≤ MaxFeval ) && ((as - am)) > mina && (phips > 1e-12)

            # compute the new value by safeguarded quadratic interpolation
            a = (am * phips - as * phipm) / (phips - phipm)
            a = max(am + ( as - am ) * sfgrd, min(as - ( as - am ) * sfgrd, a))

            # compute phi(a)
            phia, phip = f2phi(a, true)

            if (phia ≤ phi0 + m1 * a * phip0) && (abs(phip) ≤ -m2 * phip0)
                break  # Armijo + strong Wolfe satisfied, we are done
            end

            # restrict the interval based on sign of the derivative in a
            if phip < 0
                am = a
                phipm = phip
            else
                as = a
                if as ≤ mina
                    break
                end
                phips = phip
            end
            lsiter += 1
        end

        if printing
            @printf("%2d", lsiter)
        end
        return (a, phia)
    end

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    function BacktrackingLS(phi0, phip0, as, m1, tau)
        # performs a Backtracking Line Search.
        #
        # phi0 = phi( 0 ), phip0 = phi'( 0 ) < 0
        #
        # as > 0 is the first value to be tested, which is decreased by
        # multiplying it by tau < 1 until the Armijo condition with parameter
        # m1 is satisfied
        #
        # returns the optimal step and the optimal f-value

        lsiter = 1  # count ls iterations
        while feval ≤ MaxFeval && as > mina
            phia, _ = f2phi(as)
            if phia ≤ phi0 + m1 * as * phip0 # Armijo satisfied
                break                        # we are done
            end
            as *= tau
            lsiter += 1
        end

        if printing
            @printf("  %2d", lsiter)
        end

        return (as, phia)
    end

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    #Plotf = 2
    # 0 = nothing is plotted
    # 1 = the level sets of f and the trajectory are plotted (when n = 2)
    # 2 = the function value / gap are plotted, iteration-wise
    # 3 = the function value / gap are plotted, function-evaluation-wise

    Interactive = false

    PXY = Matrix{Real}(undef, 2, 0)

    local gap
    if Plotf > 1
        if Plotf == 2
            MaxIter = 50  # expected number of iterations for the gap plot
        else
            MaxIter = 70  # expected number of iterations for the gap plot
        end
        gap = []
    end

    if Plotf == 2 && plt == nothing
        plt = plot(xlims=(0, MaxIter), ylims=(1e-15, 1e+1))
    end
    if Plotf > 1 && plt == nothing
        plt = plot(xlims=(0, MaxIter))
    end
    if plt == nothing
        plt = plot()
    end

    local fStar
    if isnothing(x)
        (fStar, x, _) = f(nothing)
    else
        (fStar, _, _) = f(nothing)
    end

    n = size(x, 1)

    if m1 ≤ 0 || m1 ≥ 1
        throw(ArgumentError("m1 is not in (0 ,1)"))
    end

    AWLS = (m2 > 0 && m2 < 1)

    if delta < 0
        throw(ArgumentError("delta must be ≥ 0"))
    end

    if tau ≤ 0 || tau ≥ 1
        throw(ArgumentError("tau is not in (0 ,1)"))
    end

    if sfgrd ≤ 0 || sfgrd ≥ 1
        throw(ArgumentError("sfgrd is not in (0, 1)"))
    end

    if mina < 0
        throw(ArgumentError("mina must be ≥ 0"))
    end

    # "global" variables- - - - - - - - - - - - - - - - - - - - - - - - - - - -
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    lastx = zeros(n)     # last point visited in the line search
    lastg = zeros(n)     # gradient of lastx
    lastH = zeros(n, n)  # Hessian of lastx
    d = zeros(n)         # Newton's direction
    feval = 0            # f() evaluations count ("common" with LSs)

    status = "error"

    # initializations - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    if fStar > -Inf
        prevv = Inf
    end

    if printing
        @printf("Newton's method\n")
        if fStar > -Inf
            @printf("feval\trel gap\t\t|| g(x) ||\trate\t\tdelta")
        else
            @printf("feval\tf(x)\t\t\t|| g(x) ||\tdelta")
        end
        @printf("\t\tls it\ta*")
        @printf("\n\n")
    end


    v, _ = f2phi(0)
    ng = norm(lastg)
    if eps < 0
        ng0 = -ng  # norm of first subgradient: why is there a "-"? ;-)
    else
        ng0 = 1    # un-scaled stopping criterion
    end

    # main loop - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    while true

        # output statistics - - - - - - - - - - - - - - - - - - - - - - - - - -

        if fStar > -Inf
            gapk = ( v - fStar ) / max(abs( fStar ), 1)

            if printing
                @printf("%4d\t%1.4e\t%1.4e", feval, gapk, ng)
                if prevv < Inf
                    @printf("\t%1.4e", ( v - fStar ) / ( prevv - fStar ))
                else
                    @printf("\t\t")
                end
            end
            prevv = v

            if Plotf > 1
                if Plotf ≥ 2
                    push!(gap, gapk)
                end
            end
        else
            if printing
                @printf("%4d\t%1.8e\t\t%1.4e", feval, v, ng)
            end

            if Plotf > 1
                if Plotf ≥ 2
                    push!(gap, v)
                end
            end
        end

        # stopping criteria - - - - - - - - - - - - - - - - - - - - - - - - - -

        if ng ≤ eps * ng0
            status = "optimal"
            break
        end

        if feval > MaxFeval
            status = "stopped"
            break
        end

        # compute Newton's direction- - - - - - - - - - - - - - - - - - - - - -

        lambdan = eigmin(lastH)  # smallest eigenvalue
        if lambdan < delta
            if printing
                @printf("\t%1.4e", delta - lambdan)
            end
            lastH = lastH + (delta - lambdan) * I
        else
            if printing
                @printf("\t0.00e+00")
            end
        end
        d = -lastH \ lastg
        phip0 = lastg' * d

        # compute step size - - - - - - - - - - - - - - - - - - - - - - - - - -
        # in Newton's method, the default initial stepsize is 1

        if AWLS
            a, v = ArmijoWolfeLS(v, phip0, 1, m1, m2, tau)
        else
            a, v = BacktrackingLS(v, phip0, 1, m1, tau)
        end

        # output statistics - - - - - - - - - - - - - - - - - - - - - - - - - -

        if printing
            @printf("\t%1.2e", a)
            @printf("\n")
        end

        if a ≤ mina
            status = "error"
            break
        end

        if v ≤ MInf
            status = "unbounded"
            break
        end

        # compute new point - - - - - - - - - - - - - - - - - - - - - - - - - -

        # possibly plot the trajectory
        if n == 2 && Plotf == 1
            PXY = hcat(PXY, hcat(x, lastx))
        end

        x = lastx
        ng = norm(lastg)

        # iterate - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

        if Interactive
            readline()
        end
    end

    if plotatend
        if Plotf ≥ 2
            plot!(plt, gap)
        elseif Plotf == 1 && n == 2
            plot!(plt, PXY[1, :], PXY[2, :])
        end
        display(plt)
    end

    # end of main loop- - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    return (x, status)
end