module LBFGS using LinearAlgebra: norm, I, dot using DataStructures: CircularBuffer using ..OracleFunction export LimitedMemoryBFGS const armijiowolfeorexact = :exact function ArmijoWolfeLineSearch( f::Union{LeastSquaresF, OracleF}, x::AbstractArray, p::AbstractArray, MaxEvaluations::Integer; αinit::Real=1, τ::Real=1.1, c1::Real=1e-4, c2::Real=0.9, ϵα::Real=1e-16, ϵgrad::Real=1e-12, safeguard::Real=0.20, )::Tuple{Real, Integer} ϕ = (α) -> begin v = f.eval(x + α * p) gradient = f.grad(x + α * p) return (v, dot(p, gradient)) end α = αinit local αgrad ϕ_0, ϕd_0 = ϕ(0) while MaxEvaluations > 0 αcurr, αgrad = ϕ(α) MaxEvaluations -= 2 if (αcurr ≤ ϕ_0 + c1 * α * ϕd_0) && (abs(αgrad) ≤ -c2 * ϕd_0) return (α, MaxEvaluations) end if αgrad ≥ 0 break end α *= τ end αlo = 0 αhi = α αlograd = ϕd_0 αhigrad = αgrad while (MaxEvaluations > 0) && (αhi - αlo) > ϵα && (αgrad > ϵgrad) α = (αlo * αhigrad - αhi * αlograd)/(αhigrad - αlograd) α = max( αlo + (αhi - αlo) * safeguard, min(αhi - (αhi - αlo) * safeguard, α) ) αcurr, αgrad = ϕ(α) MaxEvaluations -= 2 if (αcurr ≤ ϕ_0 + c1 * α * ϕd_0) && (abs(αgrad) ≤ -c2 * ϕd_0) break end if αgrad < 0 αlo = α αlograd = αgrad else αhi = α if αhi ≤ ϵα break end αhigrad = αgrad end end return (α, MaxEvaluations) end function ExactLineSearch( f::LeastSquaresF, x::AbstractArray, p::AbstractArray, MaxEvaluations::Integer ) MaxEvaluations -= 1 return (tomography(f, x, p), MaxEvaluations) end @doc raw""" ```julia LimitedMemoryBFGS(f::Union{LeastSquaresF{T}, OracleF{T, F, G}}, [x::AbstractVector{T}, ϵ::T=1e-6, m::Integer=3, MaxEvaluations::Integer=10000]) ``` Computes the minimum of the input function `f`. ### Input - `f` -- the input function to minimize. - `x` -- the starting point, if not specified the default one for the function `f` is used. - `ϵ` -- the tollerance for the stopping criteria. - `m` -- maximum number of vector to store that compute the approximate hessian. - `MaxEvaluations` -- maximum number of function evaluations. Both ```f.eval``` and ```f.grad``` are counted. ### Output A named tuple containing: - `x` -- the minimum found - `eval` -- the value of the function at the minimum - `grad` -- the gradient of the function at the minimum - `RemainingEvaluations` -- the number of function evaluation not used. See also [`QRhous`](@ref). """ function LimitedMemoryBFGS( f::Union{LeastSquaresF, OracleF}; x::Union{Nothing, AbstractVector{T}}=nothing, ϵ::T=1e-6, m::Integer=3, MaxEvaluations::Integer=10000 )::NamedTuple where {T} if isnothing(x) x = f.starting_point end gradient = f.grad(x) MaxEvaluations -= 1 normgradient0 = norm(gradient) H = CircularBuffer{NamedTuple}(m) αstore = Array{eltype(x)}(undef, 0) while MaxEvaluations > 0 && norm(gradient) > ϵ * normgradient0 # two loop recursion for finding the direction q = gradient empty!(αstore) for i ∈ reverse(H) push!(αstore, i[:ρ] * dot(i[:s], q)) q -= αstore[end] * i[:y] end # choose H0 as something resembling the hessian H0 = if isempty(H) I else (dot(H[end][:s], H[end][:y])/dot(H[end][:y], H[end][:y])) * I end r = H0 * q for i ∈ H βi = i[:ρ] * dot(i[:y], r) r += i[:s] * (pop!(αstore) - βi) end p = -r # direction if armijiowolfeorexact === :armijiowolfe || f isa OracleF α, MaxEvaluations = ArmijoWolfeLineSearch(f, x, p, MaxEvaluations) elseif armijiowolfeorexact === :exact α, MaxEvaluations = ExactLineSearch(f, x, p, MaxEvaluations) end previousx = x x = x + α * p previousgradient = gradient gradient = f.grad(x) MaxEvaluations -= 1 s = x - previousx y = gradient - previousgradient curvature = dot(s, y) ρ = inv(curvature) if curvature ≤ 1e-16 empty!(H) # restart from the gradient else push!(H, (; :ρ => ρ, :y => y, :s => s)) end end return (; :x => x, :eval => f.eval(x), :grad => gradient, :RemainingEvaluations => MaxEvaluations) end end # module LBGGS