Added Project and Report
This commit is contained in:
314
project/L-BFGS/BFGS.jl
Normal file
314
project/L-BFGS/BFGS.jl
Normal file
@ -0,0 +1,314 @@
|
||||
module BFGS
|
||||
|
||||
using LinearAlgebra: norm, I, dot, diagm, mul!
|
||||
|
||||
using ..OracleFunction
|
||||
|
||||
export BroydenFletcherGoldfarbShanno, BroydenFletcherGoldfarbShannoDogleg
|
||||
|
||||
const armijiowolfeorexact = :exact
|
||||
BFGSorDFP = :BFGS
|
||||
|
||||
function ArmijoWolfeLineSearch(
|
||||
f::Union{LeastSquaresF, OracleF},
|
||||
x::AbstractArray,
|
||||
p::AbstractArray,
|
||||
MaxEvaluations::Integer;
|
||||
αinit::Real=1,
|
||||
τ::Real=1.1,
|
||||
c1::Real=1e-4,
|
||||
c2::Real=0.9,
|
||||
ϵα::Real=1e-16,
|
||||
ϵgrad::Real=1e-12,
|
||||
safeguard::Real=0.20,
|
||||
)::Tuple{Real, Integer}
|
||||
|
||||
ϕ = (α) -> begin
|
||||
v = f.eval(x + α * p)
|
||||
gradient = f.grad(x + α * p)
|
||||
return (v, dot(p, gradient))
|
||||
end
|
||||
|
||||
α = αinit
|
||||
local αgrad
|
||||
|
||||
ϕ_0, ϕd_0 = ϕ(0)
|
||||
|
||||
while MaxEvaluations > 0
|
||||
αcurr, αgrad = ϕ(α)
|
||||
MaxEvaluations -= 2
|
||||
|
||||
if (αcurr ≤ ϕ_0 + c1 * α * ϕd_0) && (abs(αgrad) ≤ -c2 * ϕd_0)
|
||||
return (α, MaxEvaluations)
|
||||
end
|
||||
|
||||
if αgrad ≥ 0
|
||||
break
|
||||
end
|
||||
α *= τ
|
||||
end
|
||||
|
||||
αlo = 0
|
||||
αhi = α
|
||||
αlograd = ϕd_0
|
||||
αhigrad = αgrad
|
||||
|
||||
while (MaxEvaluations > 0) && (αhi - αlo) > ϵα && (αgrad > ϵgrad)
|
||||
α = (αlo * αhigrad - αhi * αlograd)/(αhigrad - αlograd)
|
||||
α = max(
|
||||
αlo + (αhi - αlo) * safeguard,
|
||||
min(αhi - (αhi - αlo) * safeguard, α)
|
||||
)
|
||||
|
||||
αcurr, αgrad = ϕ(α)
|
||||
MaxEvaluations -= 2
|
||||
|
||||
if (αcurr ≤ ϕ_0 + c1 * α * ϕd_0) && (abs(αgrad) ≤ -c2 * ϕd_0)
|
||||
break
|
||||
end
|
||||
|
||||
if αgrad < 0
|
||||
αlo = α
|
||||
αlograd = αgrad
|
||||
else
|
||||
αhi = α
|
||||
if αhi ≤ ϵα
|
||||
break
|
||||
end
|
||||
αhigrad = αgrad
|
||||
end
|
||||
end
|
||||
|
||||
return (α, MaxEvaluations)
|
||||
end
|
||||
|
||||
function ExactLineSearch(
|
||||
f::LeastSquaresF,
|
||||
x::AbstractArray,
|
||||
p::AbstractArray,
|
||||
MaxEvaluations::Integer
|
||||
)
|
||||
MaxEvaluations -= 1
|
||||
return (tomography(f, x, p), MaxEvaluations)
|
||||
end
|
||||
|
||||
|
||||
@doc raw"""
|
||||
```julia
|
||||
BroydenFletcherGoldfarbShanno(f::Union{LeastSquaresF, OracleF}, [x::AbstractVector{T}, ϵ::T=1e-6, MaxEvaluations::Integer=10000])
|
||||
```
|
||||
|
||||
Computes the minimum of the input function `f`.
|
||||
|
||||
### Input
|
||||
|
||||
- `f` -- the input function to minimize.
|
||||
- `x` -- the starting point, if not specified the default one for the function `f` is used.
|
||||
- `ϵ` -- the tollerance for the stopping criteria.
|
||||
- `m` -- maximum number of vector to store that compute the approximate hessian.
|
||||
- `MaxEvaluations` -- maximum number of function evaluations. Both ```f.eval``` and ```f.grad``` are counted.
|
||||
|
||||
### Output
|
||||
|
||||
A named tuple containing:
|
||||
- `x` -- the minimum found
|
||||
- `eval` -- the value of the function at the minimum
|
||||
- `grad` -- the gradient of the function at the minimum
|
||||
- `RemainingEvaluations` -- the number of function evaluation not used.
|
||||
|
||||
"""
|
||||
function BroydenFletcherGoldfarbShanno(
|
||||
f::Union{LeastSquaresF, OracleF};
|
||||
x::Union{Nothing, AbstractVector{T}}=nothing,
|
||||
ϵ::T=1e-6,
|
||||
MaxEvaluations::Integer=10000
|
||||
)::NamedTuple where {T}
|
||||
|
||||
if isnothing(x)
|
||||
x = f.starting_point
|
||||
end
|
||||
|
||||
gradient = f.grad(x)
|
||||
MaxEvaluations -= 1
|
||||
normgradient0 = norm(gradient)
|
||||
H = diagm(ones(length(x)))
|
||||
tmp1 = similar(H)
|
||||
tmp2 = similar(H)
|
||||
|
||||
firstEvaluation = true
|
||||
|
||||
while MaxEvaluations > 0 && norm(gradient) > ϵ * normgradient0
|
||||
p = -H * gradient # direction
|
||||
|
||||
α, MaxEvaluations =
|
||||
if armijiowolfeorexact === :armijiowolfe || f isa OracleF
|
||||
ArmijoWolfeLineSearch(f, x, p, MaxEvaluations)
|
||||
elseif armijiowolfeorexact === :exact
|
||||
ExactLineSearch(f, x, p, MaxEvaluations)
|
||||
end
|
||||
|
||||
previousx = x
|
||||
x = x + α * p
|
||||
|
||||
previousgradient = gradient
|
||||
gradient = f.grad(x)
|
||||
MaxEvaluations -= 1
|
||||
|
||||
s = x - previousx
|
||||
y = gradient - previousgradient
|
||||
ρ = inv(dot(y, s))
|
||||
|
||||
# if its the first iteration then set H to an aproximation of the Hessian
|
||||
if firstEvaluation
|
||||
mul!(H, I, dot(y, s)/dot(y, y))
|
||||
firstEvaluation = false
|
||||
end
|
||||
|
||||
if BFGSorDFP == :DFP
|
||||
# DFP update -------------------------------------------
|
||||
# H = H - (H * y * y' * H)/(y' * H * y) + (s * s')/(y' * s)
|
||||
|
||||
mul!(tmp1, H * y * y', H)
|
||||
mul!(tmp2, s, s')
|
||||
H .+= -tmp1/dot(y, H, y) .+ ρ * tmp2
|
||||
elseif BFGSorDFP == :BFGS
|
||||
# BFGS update ------------------------------------------
|
||||
# H = (I - ρ * s * y') * H * (I - ρ * y * s') + ρ * s * s'
|
||||
|
||||
mul!(tmp1, H * y, s')
|
||||
mul!(tmp2, s, s')
|
||||
H .+= ρ * ((1 + ρ * dot(y, H, y)) .* tmp2 .- tmp1 .- tmp1')
|
||||
end
|
||||
end
|
||||
|
||||
return (;
|
||||
:x => x,
|
||||
:eval => f.eval(x),
|
||||
:grad => gradient,
|
||||
:RemainingEvaluations => MaxEvaluations)
|
||||
end
|
||||
|
||||
|
||||
function BroydenFletcherGoldfarbShannoDogleg(
|
||||
f::Union{LeastSquaresF, OracleF};
|
||||
x::Union{Nothing, AbstractVector{T}}=nothing,
|
||||
ϵ::T=1e-6,
|
||||
MaxEvaluations::Integer=10000
|
||||
)::NamedTuple where {T}
|
||||
|
||||
if isnothing(x)
|
||||
x = f.starting_point
|
||||
end
|
||||
|
||||
Δ = 1 # initial size of trust region
|
||||
smallestΔ = 1e-4 # smallest size where linear aproximation is applied
|
||||
|
||||
gradient = f.grad(x)
|
||||
MaxEvaluations -= 1
|
||||
normgradient0 = norm(gradient)
|
||||
normgradient = normgradient0
|
||||
H = diagm(ones(length(x)))
|
||||
B = copy(H)
|
||||
tmp1 = similar(H)
|
||||
tmp2 = similar(H)
|
||||
tmp3 = similar(H)
|
||||
|
||||
firstEvaluation = true
|
||||
|
||||
while MaxEvaluations > 0 && norm(gradient) > ϵ * normgradient0
|
||||
# compute s by solving the subproblem min_s grad' * s + 0.5 s' * B * s with norm(s) ≤ Δ
|
||||
CauchyPoint = -(Δ/normgradient) * gradient
|
||||
τ = if dot(gradient, B, gradient) ≤ 0
|
||||
1
|
||||
else
|
||||
min((normgradient^3)/(Δ * dot(gradient, B, gradient)), 1)
|
||||
end
|
||||
|
||||
if Δ ≤ smallestΔ || B == I
|
||||
# the Cauchy point is enought for small regions (linear aproximation)
|
||||
s = τ * CauchyPoint
|
||||
else
|
||||
pB = -H * gradient
|
||||
pU = -dot(gradient, gradient)/dot(gradient, B, gradient) * gradient
|
||||
|
||||
if norm(pB) ≤ Δ
|
||||
# the region is larger than the dogleg
|
||||
s = pB
|
||||
elseif Δ ≤ norm(pU)
|
||||
# the region is smaller than the first step
|
||||
s = Δ/norm(pU) * pU
|
||||
else
|
||||
# solve the quadratic sistem for the dogleg
|
||||
one = dot(pU, (pB - pU))
|
||||
two = dot(pB - pU, pB - pU)
|
||||
three = dot(pU, pU)
|
||||
|
||||
τ = (-one+two + sqrt(one^2 - three * two + two * Δ^2))/two
|
||||
s = pU + (τ - 1) * (pB - pU)
|
||||
end
|
||||
end
|
||||
|
||||
previousx = x
|
||||
x = x + s
|
||||
|
||||
previousgradient = gradient
|
||||
gradient = f.grad(x)
|
||||
normgradient = norm(gradient)
|
||||
MaxEvaluations -= 1
|
||||
|
||||
y = gradient - previousgradient
|
||||
ρ = inv(dot(y, s))
|
||||
|
||||
ared = f.eval(x) - f.eval(x + s) # actual reduction
|
||||
pred = -(dot(gradient, s) + 0.5 * dot(s, B, s)) # predicted reduction
|
||||
MaxEvaluations -= 2
|
||||
|
||||
# expand or contract the region
|
||||
if (0.75 < ared/pred) && (0.8 * Δ < norm(s))
|
||||
Δ = 2 * Δ
|
||||
elseif (ared/pred < 0.1)
|
||||
Δ = 0.5 * Δ
|
||||
end
|
||||
|
||||
# if its the first iteration then set H to an aproximation of the Hessian
|
||||
if firstEvaluation
|
||||
mul!(H, I, dot(y, s)/dot(y, y))
|
||||
firstEvaluation = false
|
||||
end
|
||||
|
||||
if BFGSorDFP == :DFP
|
||||
# DFP update -------------------------------------------
|
||||
# H = H - (H * y * y' * H)/(y' * H * y) + (s * s')/(y' * s)
|
||||
|
||||
mul!(tmp1, H * y * y', H)
|
||||
mul!(tmp2, s, s')
|
||||
H .+= -tmp1/dot(y, H, y) .+ ρ * tmp2
|
||||
|
||||
mul!(tmp1, y, s')
|
||||
tmp2 = I - ρ * tmp1
|
||||
mul!(tmp1, tmp2, B)
|
||||
mul!(tmp3, tmp1, tmp2')
|
||||
mul!(tmp2, y, y')
|
||||
B .= tmp3 .+ ρ * tmp2
|
||||
elseif BFGSorDFP == :BFGS
|
||||
# BFGS update ------------------------------------------
|
||||
# H = (I - ρ * s * y') * H * (I - ρ * y * s') + ρ * s * s'
|
||||
|
||||
mul!(tmp1, H * y, s')
|
||||
mul!(tmp2, s, s')
|
||||
H .+= ρ * ((1 + ρ * dot(y, H, y)) .* tmp2 .- tmp1 .- tmp1')
|
||||
|
||||
mul!(tmp1, B * s * s', B)
|
||||
mul!(tmp2, y, y')
|
||||
B .+= -tmp1/dot(s, B, s) .+ ρ * tmp2
|
||||
end
|
||||
end
|
||||
|
||||
return (;
|
||||
:x => x,
|
||||
:eval => f.eval(x),
|
||||
:grad => gradient,
|
||||
:RemainingEvaluations => MaxEvaluations)
|
||||
end
|
||||
|
||||
end # module BFGS
|
||||
193
project/L-BFGS/LBFGS.jl
Normal file
193
project/L-BFGS/LBFGS.jl
Normal file
@ -0,0 +1,193 @@
|
||||
module LBFGS
|
||||
|
||||
using LinearAlgebra: norm, I, dot
|
||||
using DataStructures: CircularBuffer
|
||||
|
||||
using ..OracleFunction
|
||||
|
||||
export LimitedMemoryBFGS
|
||||
|
||||
const armijiowolfeorexact = :exact
|
||||
|
||||
function ArmijoWolfeLineSearch(
|
||||
f::Union{LeastSquaresF, OracleF},
|
||||
x::AbstractArray,
|
||||
p::AbstractArray,
|
||||
MaxEvaluations::Integer;
|
||||
αinit::Real=1,
|
||||
τ::Real=1.1,
|
||||
c1::Real=1e-4,
|
||||
c2::Real=0.9,
|
||||
ϵα::Real=1e-16,
|
||||
ϵgrad::Real=1e-12,
|
||||
safeguard::Real=0.20,
|
||||
)::Tuple{Real, Integer}
|
||||
|
||||
ϕ = (α) -> begin
|
||||
v = f.eval(x + α * p)
|
||||
gradient = f.grad(x + α * p)
|
||||
return (v, dot(p, gradient))
|
||||
end
|
||||
|
||||
α = αinit
|
||||
local αgrad
|
||||
|
||||
ϕ_0, ϕd_0 = ϕ(0)
|
||||
|
||||
while MaxEvaluations > 0
|
||||
αcurr, αgrad = ϕ(α)
|
||||
MaxEvaluations -= 2
|
||||
|
||||
if (αcurr ≤ ϕ_0 + c1 * α * ϕd_0) && (abs(αgrad) ≤ -c2 * ϕd_0)
|
||||
return (α, MaxEvaluations)
|
||||
end
|
||||
|
||||
if αgrad ≥ 0
|
||||
break
|
||||
end
|
||||
α *= τ
|
||||
end
|
||||
|
||||
αlo = 0
|
||||
αhi = α
|
||||
αlograd = ϕd_0
|
||||
αhigrad = αgrad
|
||||
|
||||
while (MaxEvaluations > 0) && (αhi - αlo) > ϵα && (αgrad > ϵgrad)
|
||||
α = (αlo * αhigrad - αhi * αlograd)/(αhigrad - αlograd)
|
||||
α = max(
|
||||
αlo + (αhi - αlo) * safeguard,
|
||||
min(αhi - (αhi - αlo) * safeguard, α)
|
||||
)
|
||||
|
||||
αcurr, αgrad = ϕ(α)
|
||||
MaxEvaluations -= 2
|
||||
|
||||
if (αcurr ≤ ϕ_0 + c1 * α * ϕd_0) && (abs(αgrad) ≤ -c2 * ϕd_0)
|
||||
break
|
||||
end
|
||||
|
||||
if αgrad < 0
|
||||
αlo = α
|
||||
αlograd = αgrad
|
||||
else
|
||||
αhi = α
|
||||
if αhi ≤ ϵα
|
||||
break
|
||||
end
|
||||
αhigrad = αgrad
|
||||
end
|
||||
end
|
||||
|
||||
return (α, MaxEvaluations)
|
||||
end
|
||||
|
||||
function ExactLineSearch(
|
||||
f::LeastSquaresF,
|
||||
x::AbstractArray,
|
||||
p::AbstractArray,
|
||||
MaxEvaluations::Integer
|
||||
)
|
||||
MaxEvaluations -= 1
|
||||
return (tomography(f, x, p), MaxEvaluations)
|
||||
end
|
||||
|
||||
@doc raw"""
|
||||
```julia
|
||||
LimitedMemoryBFGS(f::Union{LeastSquaresF{T}, OracleF{T, F, G}}, [x::AbstractVector{T}, ϵ::T=1e-6, m::Integer=3, MaxEvaluations::Integer=10000])
|
||||
```
|
||||
|
||||
Computes the minimum of the input function `f`.
|
||||
|
||||
### Input
|
||||
|
||||
- `f` -- the input function to minimize.
|
||||
- `x` -- the starting point, if not specified the default one for the function `f` is used.
|
||||
- `ϵ` -- the tollerance for the stopping criteria.
|
||||
- `m` -- maximum number of vector to store that compute the approximate hessian.
|
||||
- `MaxEvaluations` -- maximum number of function evaluations. Both ```f.eval``` and ```f.grad``` are counted.
|
||||
|
||||
### Output
|
||||
|
||||
A named tuple containing:
|
||||
- `x` -- the minimum found
|
||||
- `eval` -- the value of the function at the minimum
|
||||
- `grad` -- the gradient of the function at the minimum
|
||||
- `RemainingEvaluations` -- the number of function evaluation not used.
|
||||
|
||||
See also [`QRhous`](@ref).
|
||||
"""
|
||||
function LimitedMemoryBFGS(
|
||||
f::Union{LeastSquaresF, OracleF};
|
||||
x::Union{Nothing, AbstractVector{T}}=nothing,
|
||||
ϵ::T=1e-6,
|
||||
m::Integer=3,
|
||||
MaxEvaluations::Integer=10000
|
||||
)::NamedTuple where {T}
|
||||
|
||||
if isnothing(x)
|
||||
x = f.starting_point
|
||||
end
|
||||
|
||||
gradient = f.grad(x)
|
||||
MaxEvaluations -= 1
|
||||
normgradient0 = norm(gradient)
|
||||
H = CircularBuffer{NamedTuple}(m)
|
||||
αstore = Array{eltype(x)}(undef, 0)
|
||||
|
||||
while MaxEvaluations > 0 && norm(gradient) > ϵ * normgradient0
|
||||
# two loop recursion for finding the direction
|
||||
q = gradient
|
||||
empty!(αstore)
|
||||
|
||||
for i ∈ reverse(H)
|
||||
push!(αstore, i[:ρ] * dot(i[:s], q))
|
||||
q -= αstore[end] * i[:y]
|
||||
end
|
||||
# choose H0 as something resembling the hessian
|
||||
H0 = if isempty(H)
|
||||
I
|
||||
else
|
||||
(dot(H[end][:s], H[end][:y])/dot(H[end][:y], H[end][:y])) * I
|
||||
end
|
||||
r = H0 * q
|
||||
for i ∈ H
|
||||
βi = i[:ρ] * dot(i[:y], r)
|
||||
r += i[:s] * (pop!(αstore) - βi)
|
||||
end
|
||||
p = -r # direction
|
||||
|
||||
if armijiowolfeorexact === :armijiowolfe || f isa OracleF
|
||||
α, MaxEvaluations = ArmijoWolfeLineSearch(f, x, p, MaxEvaluations)
|
||||
elseif armijiowolfeorexact === :exact
|
||||
α, MaxEvaluations = ExactLineSearch(f, x, p, MaxEvaluations)
|
||||
end
|
||||
|
||||
previousx = x
|
||||
x = x + α * p
|
||||
|
||||
previousgradient = gradient
|
||||
gradient = f.grad(x)
|
||||
MaxEvaluations -= 1
|
||||
|
||||
s = x - previousx
|
||||
y = gradient - previousgradient
|
||||
|
||||
curvature = dot(s, y)
|
||||
ρ = inv(curvature)
|
||||
|
||||
if curvature ≤ 1e-16
|
||||
empty!(H) # restart from the gradient
|
||||
else
|
||||
push!(H, (; :ρ => ρ, :y => y, :s => s))
|
||||
end
|
||||
end
|
||||
|
||||
return (;
|
||||
:x => x,
|
||||
:eval => f.eval(x),
|
||||
:grad => gradient,
|
||||
:RemainingEvaluations => MaxEvaluations)
|
||||
end
|
||||
|
||||
end # module LBGGS
|
||||
85
project/L-BFGS/OracleFunction.jl
Normal file
85
project/L-BFGS/OracleFunction.jl
Normal file
@ -0,0 +1,85 @@
|
||||
module OracleFunction
|
||||
|
||||
using LinearAlgebra: norm
|
||||
|
||||
export OracleF, LeastSquaresF, tomography
|
||||
|
||||
@doc """
|
||||
```julia
|
||||
OracleF{T, F<:Function, G<:Function}
|
||||
```
|
||||
|
||||
Struct that holds a generic function to evaluate.
|
||||
`eval` is the function that evaluates a point, `grad` is the gradient of the function and
|
||||
`starting_point` is the point from which minimization should start.
|
||||
"""
|
||||
struct OracleF{T, F<:Function, G<:Function}
|
||||
starting_point::AbstractArray{T}
|
||||
eval::F
|
||||
grad::G
|
||||
end
|
||||
|
||||
@doc """
|
||||
```julia
|
||||
LeastSquaresF{T, F<:Function, G<:Function}
|
||||
```
|
||||
|
||||
Struct that holds an instance of a least squares problem. The interface is similar to the `OracleF` struct.
|
||||
`eval` is the function that evaluates a point, `grad` is the gradient of the function and
|
||||
`starting_point` is the point from which minimization should start.
|
||||
|
||||
See also [`OracleF`](@ref).
|
||||
"""
|
||||
struct LeastSquaresF{T, F<:Function, G<:Function}
|
||||
oracle::OracleF{T, F, G}
|
||||
X::AbstractMatrix{T}
|
||||
y::AbstractArray{T}
|
||||
symm::AbstractMatrix{T}
|
||||
yX::AbstractArray{T}
|
||||
end
|
||||
|
||||
function LeastSquaresF(starting_point::AbstractArray{T}, X::AbstractMatrix{T}, y::AbstractArray{T}) where T
|
||||
f(x) = norm(X * x - y)^2
|
||||
df(x) = 2 * X' * (X * x - y)
|
||||
symm = X' * X
|
||||
yX = y' * X
|
||||
|
||||
o = OracleF(starting_point, f, df)
|
||||
LeastSquaresF(o, X, y, symm, yX)
|
||||
end
|
||||
|
||||
function LeastSquaresF(t::NamedTuple)
|
||||
if [:X_hat, :y_hat, :start] ⊈ keys(t)
|
||||
throw(ArgumentError("Input tuple does not contain necessary values, found: " * string(keys(t))))
|
||||
end
|
||||
starting_point, X, y = t[:start], t[:X_hat], t[:y_hat]
|
||||
|
||||
LeastSquaresF(starting_point, X, y)
|
||||
end
|
||||
|
||||
@doc """
|
||||
```julia
|
||||
tomography(l::LeastSquaresF{T, F, G}, w::AbstractArray{T}, p::AbstractArray{T})
|
||||
```
|
||||
|
||||
Function that returns the minimum of the function `l` along the plane in `w` and with direction `p`.
|
||||
|
||||
See also [`LeastSquaresF`](@ref).
|
||||
"""
|
||||
function tomography(l::LeastSquaresF{T, F, G}, w::AbstractArray{T}, p::AbstractArray{T}) where {T, F, G}
|
||||
(l.yX * p - w' * l.symm * p) * inv(p' * l.symm * p)
|
||||
end
|
||||
|
||||
function Base.getproperty(l::LeastSquaresF{T, F, G}, name::Symbol) where {T, F, G}
|
||||
if name === :eval
|
||||
return l.oracle.eval
|
||||
elseif name === :grad
|
||||
return l.oracle.grad
|
||||
elseif name === :starting_point
|
||||
return l.oracle.starting_point
|
||||
else
|
||||
getfield(l, name)
|
||||
end
|
||||
end
|
||||
|
||||
end ## module OracleFunction
|
||||
119
project/L-BFGS/SR1.jl
Normal file
119
project/L-BFGS/SR1.jl
Normal file
@ -0,0 +1,119 @@
|
||||
module SR1
|
||||
|
||||
using LinearAlgebra: norm, I, dot, diagm, mul!
|
||||
|
||||
using ..OracleFunction
|
||||
|
||||
export SymmetricRank1
|
||||
|
||||
|
||||
function SymmetricRank1(
|
||||
f::Union{LeastSquaresF, OracleF};
|
||||
x::Union{Nothing, AbstractVector{T}}=nothing,
|
||||
ϵ::T=1e-6,
|
||||
η::T=1e-4, # threshold for ignoring direction
|
||||
r::T=1e-8, # skipping rule for updating B and H
|
||||
MaxEvaluations::Integer=10000
|
||||
)::NamedTuple where {T}
|
||||
|
||||
Δ = 1 # initial size of trust region
|
||||
smallestΔ = 1e-4 # smallest size where linear aproximation is applied
|
||||
|
||||
if isnothing(x)
|
||||
x = f.starting_point
|
||||
end
|
||||
|
||||
gradient = f.grad(x)
|
||||
evalx = f.eval(x)
|
||||
nextevalx = 0
|
||||
MaxEvaluations -= 2
|
||||
normgradient0 = norm(gradient)
|
||||
normgradient = normgradient0
|
||||
B = diagm(ones(length(x)))
|
||||
H = diagm(ones(length(x)))
|
||||
tmp1 = similar(x)
|
||||
tmp2 = similar(H)
|
||||
|
||||
local s
|
||||
|
||||
while MaxEvaluations > 0 && normgradient > ϵ * normgradient0
|
||||
# compute s by solving the subproblem min_s grad' * s + 0.5 s' * B * s with norm(s) ≤ Δ
|
||||
CauchyPoint = - (Δ/normgradient) * gradient
|
||||
τ = if gradient' * B * gradient ≤ 0
|
||||
1
|
||||
else
|
||||
min((normgradient^3)/(Δ * dot(gradient, B, gradient)), 1)
|
||||
end
|
||||
|
||||
if Δ ≤ smallestΔ || B == I
|
||||
# the Cauchy point is enought for small regions (linear aproximation)
|
||||
s = τ * CauchyPoint
|
||||
else
|
||||
pB = -H * gradient
|
||||
pU = -dot(gradient, gradient)/dot(gradient, B, gradient) * gradient
|
||||
|
||||
if norm(pB) ≤ Δ
|
||||
# the region is larger than the dogleg
|
||||
s = pB
|
||||
elseif Δ ≤ norm(pU)
|
||||
# the region is smaller than the first step
|
||||
s = Δ/norm(pU) * pU
|
||||
else
|
||||
# solve the quadratic sistem for the dogleg
|
||||
one = dot(pU, (pB - pU))
|
||||
two = dot((pB - pU), (pB - pU))
|
||||
three = dot(pU, pU)
|
||||
|
||||
τ = (-one+two + sqrt(one^2 - three * two + two * Δ^2))/two
|
||||
s = pU + (τ - 1) * (pB - pU)
|
||||
end
|
||||
end
|
||||
|
||||
# ------
|
||||
y = f.grad(x + s) - gradient
|
||||
|
||||
nextevalx = f.eval(x + s)
|
||||
ared = evalx - nextevalx # actual reduction
|
||||
pred = -(dot(gradient, s) + 0.5 * dot(s, B, s)) # predicted reduction
|
||||
|
||||
MaxEvaluations -= 2
|
||||
|
||||
if ared/pred > η
|
||||
x = x + s
|
||||
evalx = nextevalx
|
||||
gradient = f.grad(x)
|
||||
normgradient = norm(gradient)
|
||||
MaxEvaluations -= 1
|
||||
end
|
||||
|
||||
# expand or contract the region
|
||||
if (0.75 < ared/pred) && (0.8 * Δ < norm(s))
|
||||
Δ = 2 * Δ
|
||||
elseif (ared/pred < 0.1)
|
||||
Δ = 0.5 * Δ
|
||||
end
|
||||
if abs(s' * (y - B * s)) ≥ r * norm(s) * norm(y - B * s) # if the denominator is not too small
|
||||
# B = B + ((y - B * s)*(y - B * s)')/((y - B * s)' * s)
|
||||
mul!(tmp1, B, -s)
|
||||
tmp1 .+= y
|
||||
mul!(tmp2, tmp1, tmp1')
|
||||
tmp2 ./= dot(tmp1, s)
|
||||
B .+= tmp2
|
||||
|
||||
# H = H + ((s - H * y) * (s - H * y)')/((s - H * y)' * y)
|
||||
mul!(tmp1, H, -y)
|
||||
tmp1 += s
|
||||
mul!(tmp2, tmp1, tmp1')
|
||||
tmp2 ./= dot(tmp1, y)
|
||||
H .+= tmp2
|
||||
end
|
||||
end
|
||||
|
||||
return (;
|
||||
:x => x,
|
||||
:eval => f.eval(x),
|
||||
:grad => gradient,
|
||||
:RemainingEvaluations => MaxEvaluations)
|
||||
end
|
||||
|
||||
end # module SR1
|
||||
Reference in New Issue
Block a user