Added Project and Report

2024-07-30 14:43:25 +02:00
parent c828453e94
commit 3ad6f7f86f
311 changed files with 13490 additions and 3280 deletions
--- a/project/L-BFGS/BFGS.jl
+++ b/project/L-BFGS/BFGS.jl
@ -0,0 +1,314 @@
+module BFGS
+
+using LinearAlgebra: norm, I, dot, diagm, mul!
+
+using ..OracleFunction
+
+export BroydenFletcherGoldfarbShanno, BroydenFletcherGoldfarbShannoDogleg
+
+const armijiowolfeorexact = :exact
+BFGSorDFP = :BFGS
+
+function ArmijoWolfeLineSearch(
+        f::Union{LeastSquaresF, OracleF},
+        x::AbstractArray,
+        p::AbstractArray,
+        MaxEvaluations::Integer;
+        αinit::Real=1,
+        τ::Real=1.1,
+        c1::Real=1e-4,
+        c2::Real=0.9,
+        ϵα::Real=1e-16,
+        ϵgrad::Real=1e-12,
+        safeguard::Real=0.20,
+    )::Tuple{Real, Integer}
+
+    ϕ = (α) -> begin
+        v = f.eval(x + α * p)
+        gradient = f.grad(x + α * p)
+        return (v, dot(p, gradient))
+    end
+
+    α = αinit
+    local αgrad
+
+    ϕ_0, ϕd_0 = ϕ(0)
+
+    while MaxEvaluations > 0
+        αcurr, αgrad = ϕ(α)
+        MaxEvaluations -= 2
+
+        if (αcurr ≤ ϕ_0 + c1 * α * ϕd_0) && (abs(αgrad) ≤ -c2 * ϕd_0)
+            return (α, MaxEvaluations)
+        end
+
+        if αgrad ≥ 0
+            break
+        end
+        α *= τ
+    end
+
+    αlo = 0
+    αhi = α
+    αlograd = ϕd_0
+    αhigrad = αgrad
+
+    while (MaxEvaluations > 0) && (αhi - αlo) > ϵα && (αgrad > ϵgrad)
+        α = (αlo * αhigrad - αhi * αlograd)/(αhigrad - αlograd)
+        α = max(
+            αlo + (αhi - αlo) * safeguard,
+            min(αhi - (αhi - αlo) * safeguard, α)
+        )
+
+        αcurr, αgrad = ϕ(α)
+        MaxEvaluations -= 2
+
+        if (αcurr ≤ ϕ_0 + c1 * α * ϕd_0) && (abs(αgrad) ≤ -c2 * ϕd_0)
+            break
+        end
+
+        if αgrad < 0
+            αlo = α
+            αlograd = αgrad
+        else
+            αhi = α
+            if αhi ≤ ϵα
+                break
+            end
+            αhigrad = αgrad
+        end
+    end
+
+    return (α, MaxEvaluations)
+end
+
+function ExactLineSearch(
+        f::LeastSquaresF,
+        x::AbstractArray,
+        p::AbstractArray,
+        MaxEvaluations::Integer
+    )
+    MaxEvaluations -= 1
+    return (tomography(f, x, p), MaxEvaluations)
+end
+
+
+@doc raw"""
+```julia
+BroydenFletcherGoldfarbShanno(f::Union{LeastSquaresF, OracleF}, [x::AbstractVector{T}, ϵ::T=1e-6, MaxEvaluations::Integer=10000])
+```
+
+Computes the minimum of the input function `f`.
+
+### Input
+
+- `f` -- the input function to minimize.
+- `x` -- the starting point, if not specified the default one for the function `f` is used.
+- `ϵ` -- the tollerance for the stopping criteria.
+- `m` -- maximum number of vector to store that compute the approximate hessian.
+- `MaxEvaluations` -- maximum number of function evaluations. Both ```f.eval``` and ```f.grad``` are counted.
+
+### Output
+
+A named tuple containing:
+- `x` -- the minimum found
+- `eval` -- the value of the function at the minimum
+- `grad` -- the gradient of the function at the minimum
+- `RemainingEvaluations` -- the number of function evaluation not used.
+
+"""
+function BroydenFletcherGoldfarbShanno(
+        f::Union{LeastSquaresF, OracleF};
+        x::Union{Nothing, AbstractVector{T}}=nothing,
+        ϵ::T=1e-6,
+        MaxEvaluations::Integer=10000
+    )::NamedTuple where {T}
+
+    if isnothing(x)
+        x = f.starting_point
+    end
+
+    gradient = f.grad(x)
+    MaxEvaluations -= 1
+    normgradient0 = norm(gradient)
+    H = diagm(ones(length(x)))
+    tmp1 = similar(H)
+    tmp2 = similar(H)
+
+    firstEvaluation = true
+
+    while MaxEvaluations > 0 && norm(gradient) > ϵ * normgradient0
+        p = -H * gradient # direction
+
+        α, MaxEvaluations =
+            if armijiowolfeorexact === :armijiowolfe || f isa OracleF
+                ArmijoWolfeLineSearch(f, x, p, MaxEvaluations)
+            elseif armijiowolfeorexact === :exact
+                ExactLineSearch(f, x, p, MaxEvaluations)
+            end
+
+        previousx = x
+        x = x + α * p
+
+        previousgradient = gradient
+        gradient = f.grad(x)
+        MaxEvaluations -= 1
+
+        s = x - previousx
+        y = gradient - previousgradient
+        ρ = inv(dot(y, s))
+
+        # if its the first iteration then set H to an aproximation of the Hessian
+        if firstEvaluation
+            mul!(H, I, dot(y, s)/dot(y, y))
+            firstEvaluation = false
+        end
+
+        if BFGSorDFP == :DFP
+            # DFP update -------------------------------------------
+            # H = H - (H * y * y' * H)/(y' * H * y) + (s * s')/(y' * s)
+
+            mul!(tmp1, H * y * y', H)
+            mul!(tmp2, s, s')
+            H .+= -tmp1/dot(y, H, y) .+ ρ * tmp2
+        elseif BFGSorDFP == :BFGS
+            # BFGS update ------------------------------------------
+            # H = (I - ρ * s * y') * H * (I - ρ * y * s') + ρ * s * s'
+
+            mul!(tmp1, H * y, s')
+            mul!(tmp2, s, s')
+            H .+= ρ * ((1 + ρ * dot(y, H, y)) .* tmp2 .- tmp1 .- tmp1')
+        end
+    end
+
+    return (;
+        :x => x,
+        :eval => f.eval(x),
+        :grad => gradient,
+        :RemainingEvaluations => MaxEvaluations)
+end
+
+
+function BroydenFletcherGoldfarbShannoDogleg(
+        f::Union{LeastSquaresF, OracleF};
+        x::Union{Nothing, AbstractVector{T}}=nothing,
+        ϵ::T=1e-6,
+        MaxEvaluations::Integer=10000
+    )::NamedTuple where {T}
+
+    if isnothing(x)
+        x = f.starting_point
+    end
+
+    Δ = 1 # initial size of trust region
+    smallestΔ = 1e-4 # smallest size where linear aproximation is applied
+
+    gradient = f.grad(x)
+    MaxEvaluations -= 1
+    normgradient0 = norm(gradient)
+    normgradient = normgradient0
+    H = diagm(ones(length(x)))
+    B = copy(H)
+    tmp1 = similar(H)
+    tmp2 = similar(H)
+    tmp3 = similar(H)
+
+    firstEvaluation = true
+
+    while MaxEvaluations > 0 && norm(gradient) > ϵ * normgradient0
+        # compute s by solving the subproblem min_s grad' * s + 0.5 s' * B * s with norm(s) ≤ Δ
+        CauchyPoint = -(Δ/normgradient) * gradient
+        τ = if dot(gradient, B, gradient) ≤ 0
+                1
+            else
+                min((normgradient^3)/(Δ * dot(gradient, B, gradient)), 1)
+            end
+
+        if Δ ≤ smallestΔ || B == I
+            # the Cauchy point is enought for small regions (linear aproximation)
+            s = τ * CauchyPoint
+        else
+            pB = -H * gradient
+            pU = -dot(gradient, gradient)/dot(gradient, B, gradient) * gradient
+
+            if norm(pB) ≤ Δ
+                # the region is larger than the dogleg
+                s = pB
+            elseif Δ ≤ norm(pU)
+                # the region is smaller than the first step
+                s = Δ/norm(pU) * pU
+            else
+                # solve the quadratic sistem for the dogleg
+                one = dot(pU, (pB - pU))
+                two = dot(pB - pU, pB - pU)
+                three = dot(pU, pU)
+
+                τ = (-one+two + sqrt(one^2 - three * two + two * Δ^2))/two
+                s = pU + (τ - 1) * (pB - pU)
+            end
+        end
+
+        previousx = x
+        x = x + s
+
+        previousgradient = gradient
+        gradient = f.grad(x)
+        normgradient = norm(gradient)
+        MaxEvaluations -= 1
+
+        y = gradient - previousgradient
+        ρ = inv(dot(y, s))
+
+        ared = f.eval(x) - f.eval(x + s) # actual reduction
+        pred = -(dot(gradient, s) + 0.5 * dot(s, B, s)) # predicted reduction
+        MaxEvaluations -= 2
+
+        # expand or contract the region
+        if (0.75 < ared/pred) && (0.8 * Δ < norm(s))
+            Δ = 2 * Δ
+        elseif (ared/pred < 0.1)
+            Δ = 0.5 * Δ
+        end
+
+        # if its the first iteration then set H to an aproximation of the Hessian
+        if firstEvaluation
+            mul!(H, I, dot(y, s)/dot(y, y))
+            firstEvaluation = false
+        end
+
+        if BFGSorDFP == :DFP
+            # DFP update -------------------------------------------
+            # H = H - (H * y * y' * H)/(y' * H * y) + (s * s')/(y' * s)
+
+            mul!(tmp1, H * y * y', H)
+            mul!(tmp2, s, s')
+            H .+= -tmp1/dot(y, H, y) .+ ρ * tmp2
+
+            mul!(tmp1, y, s')
+            tmp2 = I - ρ * tmp1
+            mul!(tmp1, tmp2, B)
+            mul!(tmp3, tmp1, tmp2')
+            mul!(tmp2, y, y')
+            B .= tmp3 .+ ρ * tmp2
+        elseif BFGSorDFP == :BFGS
+            # BFGS update ------------------------------------------
+            # H = (I - ρ * s * y') * H * (I - ρ * y * s') + ρ * s * s'
+
+            mul!(tmp1, H * y, s')
+            mul!(tmp2, s, s')
+            H .+= ρ * ((1 + ρ * dot(y, H, y)) .* tmp2 .- tmp1 .- tmp1')
+
+            mul!(tmp1, B * s * s', B)
+            mul!(tmp2, y, y')
+            B .+= -tmp1/dot(s, B, s) .+ ρ * tmp2
+        end
+    end
+
+    return (;
+        :x => x,
+        :eval => f.eval(x),
+        :grad => gradient,
+        :RemainingEvaluations => MaxEvaluations)
+end
+
+end # module BFGS
--- a/project/L-BFGS/LBFGS.jl
+++ b/project/L-BFGS/LBFGS.jl
@ -0,0 +1,193 @@
+module LBFGS
+
+using LinearAlgebra: norm, I, dot
+using DataStructures: CircularBuffer
+
+using ..OracleFunction
+
+export LimitedMemoryBFGS
+
+const armijiowolfeorexact = :exact
+
+function ArmijoWolfeLineSearch(
+        f::Union{LeastSquaresF, OracleF},
+        x::AbstractArray,
+        p::AbstractArray,
+        MaxEvaluations::Integer;
+        αinit::Real=1,
+        τ::Real=1.1,
+        c1::Real=1e-4,
+        c2::Real=0.9,
+        ϵα::Real=1e-16,
+        ϵgrad::Real=1e-12,
+        safeguard::Real=0.20,
+    )::Tuple{Real, Integer}
+
+    ϕ = (α) -> begin
+        v = f.eval(x + α * p)
+        gradient = f.grad(x + α * p)
+        return (v, dot(p, gradient))
+    end
+
+    α = αinit
+    local αgrad
+
+    ϕ_0, ϕd_0 = ϕ(0)
+
+    while MaxEvaluations > 0
+        αcurr, αgrad = ϕ(α)
+        MaxEvaluations -= 2
+
+        if (αcurr ≤ ϕ_0 + c1 * α * ϕd_0) && (abs(αgrad) ≤ -c2 * ϕd_0)
+            return (α, MaxEvaluations)
+        end
+        
+        if αgrad ≥ 0
+            break
+        end
+        α *= τ
+    end
+
+    αlo = 0
+    αhi = α
+    αlograd = ϕd_0
+    αhigrad = αgrad
+
+    while (MaxEvaluations > 0) && (αhi - αlo) > ϵα && (αgrad > ϵgrad)
+        α = (αlo * αhigrad - αhi * αlograd)/(αhigrad - αlograd)
+        α = max(
+            αlo + (αhi - αlo) * safeguard,
+            min(αhi - (αhi - αlo) * safeguard, α)
+        )
+
+        αcurr, αgrad = ϕ(α)
+        MaxEvaluations -= 2
+
+        if (αcurr ≤ ϕ_0 + c1 * α * ϕd_0) && (abs(αgrad) ≤ -c2 * ϕd_0)
+            break
+        end
+
+        if αgrad < 0
+            αlo = α
+            αlograd = αgrad
+        else
+            αhi = α
+            if αhi ≤ ϵα
+                break
+            end
+            αhigrad = αgrad
+        end
+    end
+
+    return (α, MaxEvaluations)
+end
+
+function ExactLineSearch(
+        f::LeastSquaresF,
+        x::AbstractArray,
+        p::AbstractArray,
+        MaxEvaluations::Integer
+    )
+    MaxEvaluations -= 1
+    return (tomography(f, x, p), MaxEvaluations)
+end
+
+@doc raw"""
+```julia
+LimitedMemoryBFGS(f::Union{LeastSquaresF{T}, OracleF{T, F, G}}, [x::AbstractVector{T}, ϵ::T=1e-6, m::Integer=3, MaxEvaluations::Integer=10000])
+```
+
+Computes the minimum of the input function `f`.
+
+### Input
+
+- `f` -- the input function to minimize.
+- `x` -- the starting point, if not specified the default one for the function `f` is used.
+- `ϵ` -- the tollerance for the stopping criteria.
+- `m` -- maximum number of vector to store that compute the approximate hessian.
+- `MaxEvaluations` -- maximum number of function evaluations. Both ```f.eval``` and ```f.grad``` are counted.
+
+### Output
+
+A named tuple containing:
+- `x` -- the minimum found
+- `eval` -- the value of the function at the minimum
+- `grad` -- the gradient of the function at the minimum
+- `RemainingEvaluations` -- the number of function evaluation not used.
+
+See also [`QRhous`](@ref).
+"""
+function LimitedMemoryBFGS(
+        f::Union{LeastSquaresF, OracleF};
+        x::Union{Nothing, AbstractVector{T}}=nothing,
+        ϵ::T=1e-6,
+        m::Integer=3,
+        MaxEvaluations::Integer=10000
+    )::NamedTuple where {T}
+
+    if isnothing(x)
+        x = f.starting_point
+    end
+
+    gradient = f.grad(x)
+    MaxEvaluations -= 1
+    normgradient0 = norm(gradient)
+    H = CircularBuffer{NamedTuple}(m)
+    αstore = Array{eltype(x)}(undef, 0)
+
+    while MaxEvaluations > 0 && norm(gradient) > ϵ * normgradient0
+        # two loop recursion for finding the direction
+        q = gradient
+        empty!(αstore)
+
+        for i ∈ reverse(H)
+            push!(αstore, i[:ρ] * dot(i[:s], q))
+            q -= αstore[end] * i[:y]
+        end
+        # choose H0 as something resembling the hessian
+        H0 = if isempty(H)
+                I
+            else
+                (dot(H[end][:s], H[end][:y])/dot(H[end][:y], H[end][:y])) * I
+            end
+        r = H0 * q
+        for i ∈ H
+            βi = i[:ρ] * dot(i[:y], r)
+            r += i[:s] * (pop!(αstore) - βi)
+        end
+        p = -r # direction
+
+        if armijiowolfeorexact === :armijiowolfe || f isa OracleF
+            α, MaxEvaluations = ArmijoWolfeLineSearch(f, x, p, MaxEvaluations)
+        elseif armijiowolfeorexact === :exact
+            α, MaxEvaluations = ExactLineSearch(f, x, p, MaxEvaluations)
+        end
+
+        previousx = x
+        x = x + α * p
+
+        previousgradient = gradient
+        gradient = f.grad(x)
+        MaxEvaluations -= 1
+
+        s = x - previousx
+        y = gradient - previousgradient
+
+        curvature = dot(s, y)
+        ρ = inv(curvature)
+
+        if curvature ≤ 1e-16
+            empty!(H) # restart from the gradient
+        else
+            push!(H, (; :ρ => ρ, :y => y, :s => s))
+        end
+    end
+
+    return (;
+        :x => x,
+        :eval => f.eval(x),
+        :grad => gradient,
+        :RemainingEvaluations => MaxEvaluations)
+end
+
+end # module LBGGS
--- a/project/L-BFGS/OracleFunction.jl
+++ b/project/L-BFGS/OracleFunction.jl
@ -0,0 +1,85 @@
+module OracleFunction
+
+using LinearAlgebra: norm
+
+export OracleF, LeastSquaresF, tomography
+
+@doc """
+```julia
+OracleF{T, F<:Function, G<:Function}
+```
+
+Struct that holds a generic function to evaluate.
+`eval` is the function that evaluates a point, `grad` is the gradient of the function and
+`starting_point` is the point from which minimization should start.
+"""
+struct OracleF{T, F<:Function, G<:Function}
+    starting_point::AbstractArray{T}
+    eval::F
+    grad::G
+end
+
+@doc """
+```julia
+LeastSquaresF{T, F<:Function, G<:Function}
+```
+
+Struct that holds an instance of a least squares problem. The interface is similar to the `OracleF` struct.
+`eval` is the function that evaluates a point, `grad` is the gradient of the function and
+`starting_point` is the point from which minimization should start.
+
+See also [`OracleF`](@ref).
+"""
+struct LeastSquaresF{T, F<:Function, G<:Function}
+    oracle::OracleF{T, F, G}
+    X::AbstractMatrix{T}
+    y::AbstractArray{T}
+    symm::AbstractMatrix{T}
+    yX::AbstractArray{T}
+end
+
+function LeastSquaresF(starting_point::AbstractArray{T}, X::AbstractMatrix{T}, y::AbstractArray{T}) where T
+    f(x)  = norm(X * x - y)^2
+    df(x) = 2 * X' * (X * x - y)
+    symm  = X' * X
+    yX    = y' * X
+
+    o = OracleF(starting_point, f, df)
+    LeastSquaresF(o, X, y, symm, yX)
+end
+
+function LeastSquaresF(t::NamedTuple)
+    if [:X_hat, :y_hat, :start] ⊈ keys(t)
+        throw(ArgumentError("Input tuple does not contain necessary values, found: " * string(keys(t))))
+    end
+    starting_point, X, y = t[:start], t[:X_hat], t[:y_hat]
+
+    LeastSquaresF(starting_point, X, y)
+end
+
+@doc """
+```julia
+tomography(l::LeastSquaresF{T, F, G}, w::AbstractArray{T}, p::AbstractArray{T})
+```
+
+Function that returns the minimum of the function `l` along the plane in `w` and with direction `p`.
+
+See also [`LeastSquaresF`](@ref).
+"""
+function tomography(l::LeastSquaresF{T, F, G}, w::AbstractArray{T}, p::AbstractArray{T}) where {T, F, G}
+    (l.yX * p - w' * l.symm * p) * inv(p' * l.symm * p)
+end
+
+function Base.getproperty(l::LeastSquaresF{T, F, G}, name::Symbol) where {T, F, G}
+    if name === :eval
+        return l.oracle.eval
+    elseif name === :grad
+        return l.oracle.grad
+    elseif name === :starting_point
+        return l.oracle.starting_point
+    else
+        getfield(l, name)
+    end
+end
+
+end ## module OracleFunction
--- a/project/L-BFGS/SR1.jl
+++ b/project/L-BFGS/SR1.jl
@ -0,0 +1,119 @@
+module SR1
+
+using LinearAlgebra: norm, I, dot, diagm, mul!
+
+using ..OracleFunction
+
+export SymmetricRank1
+
+
+function SymmetricRank1(
+        f::Union{LeastSquaresF, OracleF};
+        x::Union{Nothing, AbstractVector{T}}=nothing,
+        ϵ::T=1e-6,
+        η::T=1e-4, # threshold for ignoring direction
+        r::T=1e-8, # skipping rule for updating B and H
+        MaxEvaluations::Integer=10000
+    )::NamedTuple where {T}
+
+    Δ = 1 # initial size of trust region
+    smallestΔ = 1e-4 # smallest size where linear aproximation is applied
+
+    if isnothing(x)
+        x = f.starting_point
+    end
+
+    gradient = f.grad(x)
+    evalx = f.eval(x)
+    nextevalx = 0
+    MaxEvaluations -= 2
+    normgradient0 = norm(gradient)
+    normgradient = normgradient0
+    B = diagm(ones(length(x)))
+    H = diagm(ones(length(x)))
+    tmp1 = similar(x)
+    tmp2 = similar(H)
+
+    local s
+
+    while MaxEvaluations > 0 && normgradient > ϵ * normgradient0
+        # compute s by solving the subproblem min_s grad' * s + 0.5 s' * B * s with norm(s) ≤ Δ
+        CauchyPoint = - (Δ/normgradient) * gradient
+        τ = if gradient' * B * gradient ≤ 0
+                1
+            else
+                min((normgradient^3)/(Δ * dot(gradient, B, gradient)), 1)
+            end
+
+        if Δ ≤ smallestΔ || B == I
+            # the Cauchy point is enought for small regions (linear aproximation)
+            s = τ * CauchyPoint
+        else
+            pB = -H * gradient
+            pU = -dot(gradient, gradient)/dot(gradient, B, gradient) * gradient
+
+            if norm(pB) ≤ Δ
+                # the region is larger than the dogleg
+                s = pB
+            elseif Δ ≤ norm(pU)
+                # the region is smaller than the first step
+                s = Δ/norm(pU) * pU
+            else
+                # solve the quadratic sistem for the dogleg
+                one = dot(pU, (pB - pU))
+                two = dot((pB - pU), (pB - pU))
+                three = dot(pU, pU)
+
+                τ = (-one+two + sqrt(one^2 - three * two + two * Δ^2))/two
+                s = pU + (τ - 1) * (pB - pU)
+            end
+        end
+
+        # ------
+        y = f.grad(x + s) - gradient
+
+        nextevalx = f.eval(x + s)
+        ared = evalx - nextevalx # actual reduction
+        pred = -(dot(gradient, s) + 0.5 * dot(s, B, s)) # predicted reduction
+
+        MaxEvaluations -= 2
+
+        if ared/pred > η
+            x = x + s
+            evalx = nextevalx
+            gradient = f.grad(x)
+            normgradient = norm(gradient)
+            MaxEvaluations -= 1
+        end
+
+        # expand or contract the region
+        if (0.75 < ared/pred) && (0.8 * Δ < norm(s))
+            Δ = 2 * Δ
+        elseif (ared/pred < 0.1)
+            Δ = 0.5 * Δ
+        end
+        if abs(s' * (y - B * s)) ≥ r * norm(s) * norm(y - B * s) # if the denominator is not too small
+            # B = B + ((y - B * s)*(y - B * s)')/((y - B * s)' * s)
+            mul!(tmp1, B, -s)
+            tmp1 .+= y
+            mul!(tmp2, tmp1, tmp1')
+            tmp2 ./= dot(tmp1, s)
+            B .+= tmp2
+
+            # H = H + ((s - H * y) * (s - H * y)')/((s - H * y)' * y)
+            mul!(tmp1, H, -y)
+            tmp1 += s
+            mul!(tmp2, tmp1, tmp1')
+            tmp2 ./= dot(tmp1, y)
+            H .+= tmp2
+        end
+    end
+
+    return (;
+        :x => x,
+        :eval => f.eval(x),
+        :grad => gradient,
+        :RemainingEvaluations => MaxEvaluations)
+end
+
+end # module SR1