using LinearAlgebra, Printf, Plots

function HBG(f;
             x::Union{Nothing, Vector}=nothing,
             alpha::Real=1,
             beta::Real=0.9,
             eps::Real=1e-6,
             MaxIter::Integer=300,
             MInf::Real=-Inf,
             plt::Union{Plots.Plot, Nothing}=nothing,
             plotatend::Bool=true,
             Plotf::Integer=0,
             printing::Bool=true)::Tuple{AbstractArray, String}
    #function [ x , status ] = HBG( f , x , alpha , beta , eps , MaxIter ,
    #                               MInf )
    #
    # Apply a Heavy Ball Gradient approach for the minimization of the
    # provided function f, which must have the following interface:
    #
    #   [ v , g ] = f( x )
    #
    # Input:
    #
    # - x is either a [ n x 1 ] real (column) vector denoting the input of
    #   f(), or [] (empty).
    #
    # Output:
    #
    # - v (real, scalar): if x == [] this is the best known lower bound on
    #   the unconstrained global optimum of f(); it can be -Inf if either f()
    #   is not bounded below, or no such information is available. If x ~= []
    #   then v = f(x).
    #
    # - g (real, [ n x 1 ] real vector): this also depends on x. if x == []
    #   this is the standard starting point from which the algorithm should
    #   start, otherwise it is the gradient of f() at x (or a subgradient if
    #   f() is not differentiable at x, which it should not be if you are
    #   applying the gradient method to it).
    #
    # The other [optional] input parameters are:
    #
    # - x (either [ n x 1 ] real vector or [], default []): starting point.
    #   If x == [], the default starting point provided by f() is used.
    #
    # - alpha (real scalar, optional, default value 1): the fixed stepsize of
    #   the Heavy Ball Gradient approach (along the anti-gradient).
    #
    # - beta (real scalar, optional, default value 0.9): the fixed weight of
    #   the momentum term
    #
    #        beta * || x^i - x^{i - 1} ||
    #
    #   Note that beta has to be >= 0, although 0 is accepted which turns the
    #   Heavy Ball Gradient approach into a "Light" Ball Gradient approach,
    #   i.e., a standard Gradient approach with fixed stepsize.
    #
    # - eps (real scalar, optional, default value 1e-6): the accuracy in the
    #   stopping criterion: the algorithm is stopped when the norm of the
    #   gradient is less than or equal to eps. If a negative value is provided,
    #   this is used in a *relative* stopping criterion: the algorithm is
    #   stopped when the norm of the gradient is less than or equal to
    #   (- eps) * || norm of the first gradient ||.
    #
    # - MaxIter (integer scalar, optional, default value 300): the maximum
    #   number of iterations == function evaluations.
    #
    # - MInf (real scalar, optional, default value -Inf): if the algorithm
    #   determines a value for f() <= MInf this is taken as an indication that
    #   the problem is unbounded below and computation is stopped
    #   (a "finite -Inf").
    #
    # Output:
    #
    # - x ([ n x 1 ] real column vector): the best solution found so far.
    #
    # - status (string): a string describing the status of the algorithm at
    #   termination
    #
    #   = 'optimal': the algorithm terminated having proven that x is a(n
    #     approximately) optimal solution, i.e., the norm of the gradient at x
    #     is less than the required threshold
    #
    #   = 'unbounded': the algorithm has determined an extrenely large negative
    #     value for f() that is taken as an indication that the problem is
    #     unbounded below (a "finite -Inf", see MInf above)
    #
    #   = 'stopped': the algorithm terminated having exhausted the maximum
    #     number of iterations: x is the bast solution found so far, but not
    #     necessarily the optimal one
    #
    #   = 'error': the algorithm found a numerical error that prevents it from
    #     continuing optimization (see mina above)
    #
    #{
    # =======================================
    # Author: Antonio Frangioni
    # Date: 10-11-22
    # Version 1.01
    # Copyright Antonio Frangioni
    # =======================================
    #}

    # Plotf = 1;
    # 0 = nothing is plotted
    # 1 = the level sets of f and the trajectory are plotted (when n = 2)
    # 2 = the function value / gap are plotted

    local gap
    if Plotf == 2
        gap = []
    end
    PXY = Matrix{Real}(undef, 2, 0)

    Interactive = false  # if we pause at every iteration

    # reading and checking input- - - - - - - - - - - - - - - - - - - - - - - -
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    if isnothing(x)
        (fStar, x, _) = f(nothing)
    else
        (fStar, _, _) = f(nothing)
    end

    n = size(x, 1)

    if alpha ≤ 0
        error("alpha must be positive")
    end

    if beta < 0
        error("beta must be non-negative")
    end

    # initializations - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    if printing
        @printf("Heavy Ball Gradient method\n")
        if fStar > -Inf
            @printf("feval\trel gap\t\tbest gap")
        else
            @printf("feval\tf(x)\tfbest")
        end
        @printf("\t|| g(x) ||\n\n")
    end

    if Plotf == 2 && isnothing(plt)
        plt = plot(xlims=(0, MaxIter))
    elseif isnothing(plt)
        plt = plot()
    end

    (v, g, _) = f(x)
    ng = norm(g)
    vbest = v
    local ng0
    if eps < 0
        ng0 = -ng  # norm of first subgradient: why is there a "-"? ;-)
    else
        ng0 = 1    # un-scaled stopping criterion
    end

    pastd = zeros(n)     # the direction at the previous iteration
    feval = 1               # f() evaluations count

    status = "error"

    # main loop - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    while true
        # output statistics - - - - - - - - - - - - - - - - - - - - - - - - - -

        if fStar > -Inf
            gapk = (v - fStar)/max(abs(fStar), 1)
            bstgapk = (vbest - fStar)/max(abs(fStar), 1)

            if printing
                @printf("%4d\t%1.4e\t%1.4e\t%1.4e\n", feval, gapk, bstgapk, ng)
            end

            if Plotf == 2
                push!(gap, gapk)
            end
        else
            if printing
                @printf("%4d\t%1.8e\t%1.8e\t\t%1.4e\n", feval, v, vbest, ng)
            end

            if Plotf == 2
                push!(gap, v)
            end
        end

        # stopping criteria - - - - - - - - - - - - - - - - - - - - - - - - - -

        if ng ≤ eps * ng0
            status = "optimal"
            break
        end

        if feval > MaxIter
            status = "stopped"
            break
        end

        if v ≤ MInf
            status = "unbounded"
            break
        end

        # compute deflected gradient direction- - - - - - - - - - - - - - - - -

        d = -alpha * g .+ beta * pastd

        # compute new point - - - - - - - - - - - - - - - - - - - - - - - - - -

        # possibly plot the trajectory
        if n == 2 && Plotf == 1
            PXY = hcat(PXY, hcat(x, x + d))
        end

        x += d
        pastd .= d

        # compute f() - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

        (v, g, _) = f(x)
        ng = norm(g)
        if v < vbest
            vbest = v
        end
        feval += 1

        # iterate - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

        if Interactive
            l = readline()
            if l == "exit"
                break
            end
        end
    end

    # end of main loop- - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    # inner functions - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    if plotatend
        if Plotf ≥ 2
            plot!(plt, gap)
        elseif Plotf == 1 && n == 2
            plot!(plt, PXY[1, :], PXY[2, :])
        end
        display(plt)
    end

    return (x, status)
end  # the end- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -