2023-11-17 12:42:12 +01:00
|
|
|
|
using LinearAlgebra: I, eigvals
|
|
|
|
|
|
|
|
|
|
|
|
function TestFunctions()
|
|
|
|
|
|
|
|
|
|
|
|
# function TF = TestFunctions()
|
|
|
|
|
|
#
|
|
|
|
|
|
# Produces a cell array of function handlers, useful to test unconstrained
|
|
|
|
|
|
# optimization algorithms.
|
|
|
|
|
|
#
|
|
|
|
|
|
# Each function in the array has the following interface:
|
|
|
|
|
|
#
|
|
|
|
|
|
# [ v , varargout ] = f( x )
|
|
|
|
|
|
#
|
|
|
|
|
|
# Input:
|
|
|
|
|
|
#
|
|
|
|
|
|
# - x is either a [ n x 1 ] real (column) vector denoting the input of
|
|
|
|
|
|
# f(), or [] (empty).
|
|
|
|
|
|
#
|
|
|
|
|
|
# Output:
|
|
|
|
|
|
#
|
|
|
|
|
|
# - v (real, scalar): if x == [] this is the best known lower bound on
|
|
|
|
|
|
# the unconstrained global optimum of f(); it can be -Inf if either f()
|
|
|
|
|
|
# is not bounded below, or no such information is available. If x ~= []
|
|
|
|
|
|
# then v = f(x).
|
|
|
|
|
|
#
|
|
|
|
|
|
# - g (real, [ n x 1 ] real vector) is the first optional argument. This
|
|
|
|
|
|
# also depends on x. if x == [] this is the standard starting point of an
|
|
|
|
|
|
# optimization algorithm, otherwise it is the gradient of f() at x, or a
|
|
|
|
|
|
# subgradient if f() is not differentiable at x.
|
|
|
|
|
|
#
|
|
|
|
|
|
# - H (real, [ n x n ] real matrix) is the first optional argument. This
|
|
|
|
|
|
# must only be specified if x ~= [], and it is the Hessian of f() at x.
|
|
|
|
|
|
# If no such information is available, the function throws error.
|
|
|
|
|
|
#
|
|
|
|
|
|
# The current list of functions is the following:
|
|
|
|
|
|
#
|
|
|
|
|
|
# 1 Standard 2x2 PSD quadratic function with nicely conditioned Hessian.
|
|
|
|
|
|
#
|
|
|
|
|
|
# 2 Standard 2x2 PSD quadratic function with less nicely conditioned
|
|
|
|
|
|
# Hessian.
|
|
|
|
|
|
#
|
|
|
|
|
|
# 3 Standard 2x2 PSD quadratic function with Hessian having one zero
|
|
|
|
|
|
# eigenvalue.
|
|
|
|
|
|
#
|
|
|
|
|
|
# 4 Standard 2x2 quadratic function with indefinite Hessian (one positive
|
|
|
|
|
|
# and one negative eigenvalue)
|
|
|
|
|
|
#
|
|
|
|
|
|
# 5 Standard 2x2 quadratic function with "very elongated" Hessian (a
|
|
|
|
|
|
# very small positive minimum eigenvalue, the other much larger)
|
|
|
|
|
|
#
|
|
|
|
|
|
# 6 the 2-dim Rosenbrock function
|
|
|
|
|
|
#
|
|
|
|
|
|
# 7 the "six-hump camel" function
|
|
|
|
|
|
#
|
|
|
|
|
|
# 8 the Ackley function
|
|
|
|
|
|
#
|
|
|
|
|
|
# 9 a 2-dim nondifferentiable function coming from Lasso regularization
|
|
|
|
|
|
#
|
|
|
|
|
|
# 10 a 76-dim (nonconvex, differentiable) function coming from a fitting
|
|
|
|
|
|
# problem with ( X , y ) both [ 288 , 1 ] (i.e., a fitting with only
|
|
|
|
|
|
# one feature) using a "rough" NN with 1 input, 1 output, 3 hidden
|
|
|
|
|
|
# layers of 5 nodes each, and tanh activation function
|
|
|
|
|
|
#
|
|
|
|
|
|
# 11 same as 10 plus a 1e-4 || x ||^2 / 2 ridge stabilising term
|
|
|
|
|
|
#
|
|
|
|
|
|
#{
|
|
|
|
|
|
# =======================================
|
|
|
|
|
|
# Author: Antonio Frangioni
|
|
|
|
|
|
# Date: 08-11-18
|
|
|
|
|
|
# Version 1.01
|
|
|
|
|
|
# Copyright Antonio Frangioni
|
|
|
|
|
|
# =======================================
|
|
|
|
|
|
#}
|
|
|
|
|
|
|
|
|
|
|
|
TF = []
|
|
|
|
|
|
push!(TF, x -> genericquad([6 -2; -2 6], [10; 5], x))
|
|
|
|
|
|
# eigenvalues: 4, 8
|
|
|
|
|
|
push!(TF, x -> genericquad([5 -3; -3 5], [10; 5], x))
|
|
|
|
|
|
# eigenvalues: 2, 8
|
|
|
|
|
|
push!(TF, x -> genericquad([4 -4; -4 4], [10; 5], x))
|
|
|
|
|
|
# eigenvalues: 0, 8
|
|
|
|
|
|
push!(TF, x -> genericquad([3 -5; -5 3], [10; 5], x))
|
|
|
|
|
|
# eigenvalues: -2, 8
|
|
|
|
|
|
push!(TF, x -> genericquad([101 -99; -99 101], [10; 5], x))
|
|
|
|
|
|
# eigenvalues: 2, 200
|
|
|
|
|
|
# HBG: alpha = 0.0165 , beta = 0.678
|
|
|
|
|
|
push!(TF, rosenbrock)
|
|
|
|
|
|
push!(TF, sixhumpcamel)
|
|
|
|
|
|
push!(TF, ackley)
|
|
|
|
|
|
push!(TF, lasso)
|
|
|
|
|
|
push!(TF, myNN)
|
|
|
|
|
|
push!(TF, myNN2)
|
|
|
|
|
|
return TF
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
|
|
|
2023-11-25 20:16:56 +01:00
|
|
|
|
function genericquad(Q, q, x::Union{Nothing, AbstractVecOrMat})
|
2023-11-17 12:42:12 +01:00
|
|
|
|
# generic quadratic function f(x) = x' * Q * x / 2 + q' * x
|
|
|
|
|
|
|
|
|
|
|
|
if x === nothing # informative call
|
|
|
|
|
|
if minimum(eigvals(Q)) > 1e-14
|
|
|
|
|
|
xStar = Q \ -q
|
2023-11-25 20:16:56 +01:00
|
|
|
|
v = 0.5 * dot(xStar, Q * xStar) + dot(q, xStar)
|
2023-11-17 12:42:12 +01:00
|
|
|
|
else
|
|
|
|
|
|
v = -Inf
|
|
|
|
|
|
end
|
|
|
|
|
|
return (v, zeros(size(q)), zeros(size(Q)))
|
|
|
|
|
|
else
|
2023-11-25 20:16:56 +01:00
|
|
|
|
if size(x, 1) ≠ 2 || size(x, 2) ≠ 1
|
2023-11-17 12:42:12 +01:00
|
|
|
|
throw(ArgumentError("genericquad: x is of wrong size"))
|
|
|
|
|
|
end
|
2023-11-25 20:16:56 +01:00
|
|
|
|
v = 0.5 * dot(x, Q * x) + dot(q, x) # f(x)
|
2023-11-17 12:42:12 +01:00
|
|
|
|
return (v, Q * x + q, Q)
|
|
|
|
|
|
end
|
|
|
|
|
|
end # genericquad
|
|
|
|
|
|
|
|
|
|
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
|
|
|
|
|
|
|
|
function rosenbrock(x::Union{Nothing, AbstractVecOrMat})
|
|
|
|
|
|
# rosenbrock's valley-shaped function
|
|
|
|
|
|
# syms x y
|
|
|
|
|
|
# f = @(x, y) 100 * ( y - x^2 )^2 + ( x - 1 )^2
|
|
|
|
|
|
#
|
|
|
|
|
|
# diff( f , x )
|
|
|
|
|
|
# 2 * x - 400 * x * ( - x^2 + y ) - 2
|
|
|
|
|
|
#
|
|
|
|
|
|
# diff( f , y )
|
|
|
|
|
|
# - 200 * x^2 + 200 * y
|
|
|
|
|
|
#
|
|
|
|
|
|
# diff( f , x , 2 )
|
|
|
|
|
|
# 1200 * x^2 - 400 * y + 2
|
|
|
|
|
|
#
|
|
|
|
|
|
# diff( f , y , 2 )
|
|
|
|
|
|
# 200
|
|
|
|
|
|
#
|
|
|
|
|
|
# diff( f , x , y )
|
|
|
|
|
|
# -400 * x
|
|
|
|
|
|
|
|
|
|
|
|
if isnothing(x) # informative call
|
|
|
|
|
|
v = 0
|
|
|
|
|
|
return (v, [-1, 1], [0 0; 0 0])
|
|
|
|
|
|
else
|
|
|
|
|
|
v = 100 * (x[2] - x[1]^2 )^2 + ( x[1] - 1 )^2 # f(x)
|
|
|
|
|
|
|
|
|
|
|
|
g = zeros(2)
|
|
|
|
|
|
g[1] = 2 * x[1] - 400* x[1] * (x[2] - x[1]^2) - 2
|
|
|
|
|
|
g[2] = -200 * x[1]^2 + 200 * x[2]
|
|
|
|
|
|
|
|
|
|
|
|
H = zeros(2, 2)
|
|
|
|
|
|
H[1, 1] = 1200 * x[1]^2 -400 * x[2] + 2
|
|
|
|
|
|
H[2, 2] = 200
|
|
|
|
|
|
H[2, 1] = -400 * x[1]
|
|
|
|
|
|
H[1, 2] = H[2, 1]
|
|
|
|
|
|
|
|
|
|
|
|
return (v, g, H)
|
|
|
|
|
|
end
|
|
|
|
|
|
end # rosenbrock
|
|
|
|
|
|
|
|
|
|
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
|
|
|
|
|
|
|
|
function sixhumpcamel(x::Union{Nothing, AbstractVecOrMat})
|
|
|
|
|
|
# six-hump-camel valley-shaped function
|
|
|
|
|
|
# syms x y
|
|
|
|
|
|
# f = @(x, y) ( 4 - 2.1 * x^2 + x^4 / 3 ) * x^2 + x * y + 4 * ( y^2 - 1 ) *
|
|
|
|
|
|
# y^2
|
|
|
|
|
|
#
|
|
|
|
|
|
# diff( f , x )
|
|
|
|
|
|
# 2 * x^5 - ( 42 * x^3 ) / 5 + 8 * x + y
|
|
|
|
|
|
#
|
|
|
|
|
|
# diff( f , y )
|
|
|
|
|
|
# 16 * y^3 - 8 * y + x
|
|
|
|
|
|
#
|
|
|
|
|
|
# diff( f , x , 2 )
|
|
|
|
|
|
# 10 * x^4 - ( 126 * x^2 ) / 5 + 8
|
|
|
|
|
|
#
|
|
|
|
|
|
# diff( f , y , 2 )
|
|
|
|
|
|
# 48 * y^2 - 8
|
|
|
|
|
|
#
|
|
|
|
|
|
# diff( f , x , y )
|
|
|
|
|
|
# 1
|
|
|
|
|
|
|
|
|
|
|
|
if isnothing(x) # informative call
|
|
|
|
|
|
v = -1.03162845349
|
|
|
|
|
|
return (v, [1, 1], [0 0; 0 0])
|
|
|
|
|
|
else
|
|
|
|
|
|
v = ( 4 - 2.1 * x[1]^2 + x[1]^4 / 3 ) * x[1]^2 + x[1] * x[2] +
|
|
|
|
|
|
4 * ( x[2]^2 - 1 ) * x[2]^2 # f(x)
|
|
|
|
|
|
|
|
|
|
|
|
g = zeros(2)
|
|
|
|
|
|
g[1] = 2 * x[1]^5 - (42 * x[1]^3) / 5 + 8 * x[1] + x[2]
|
|
|
|
|
|
g[2] = 16 * x[2]^3 - 8 * x[2] + x[1]
|
|
|
|
|
|
|
|
|
|
|
|
H = zeros(2, 2)
|
|
|
|
|
|
H[1, 1] = 10 * x[1]^4 - ( 126 * x[1]^2 ) / 5 + 8
|
|
|
|
|
|
H[2, 2] = 48 * x[2]^2 - 8
|
|
|
|
|
|
H[2, 1] = 1
|
|
|
|
|
|
H[1, 2] = H[2, 1]
|
|
|
|
|
|
|
|
|
|
|
|
return (v, g, H)
|
|
|
|
|
|
end
|
|
|
|
|
|
end # sixhumpcamel
|
|
|
|
|
|
|
|
|
|
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
|
|
|
|
|
|
|
|
function ackley(xx::Union{Nothing, AbstractVecOrMat})
|
|
|
|
|
|
|
|
|
|
|
|
# syms x y
|
|
|
|
|
|
# f = @(x, y) - 20 * exp( - 0.2 * sqrt( ( x^2 + y^2 ) / 2 ) ) ...
|
|
|
|
|
|
# - exp( ( cos( 2 * pi * x ) + cos( 2 * pi * y ) ) / 2 ) ...
|
|
|
|
|
|
# + 20 + exp(1)
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
|
|
|
|
ManuallyComputedfGH = true
|
|
|
|
|
|
|
|
|
|
|
|
if isnothing(xx) # informative call
|
|
|
|
|
|
v = 0
|
|
|
|
|
|
return (v, [2, 2], [0 0; 0 0])
|
|
|
|
|
|
else
|
|
|
|
|
|
if size(xx, 1) ≠ 2 || size(xx, 2) ≠ 1
|
|
|
|
|
|
error("ackley: x is of wrong size")
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
if ManuallyComputedfGH
|
|
|
|
|
|
|
|
|
|
|
|
# diff( f , x )
|
|
|
|
|
|
# pi*exp(cos(2*pi*x)/2 + cos(2*pi*y)/2)*sin(2*pi*x) +
|
|
|
|
|
|
# (2*x*exp(-(x^2/2 + y^2/2)^(1/2)/5))/(x^2/2 + y^2/2)^(1/2)
|
|
|
|
|
|
#
|
|
|
|
|
|
# diff( f , y )
|
|
|
|
|
|
# pi*exp(cos(2*pi*x)/2 + cos(2*pi*y)/2)*sin(2*pi*y) +
|
|
|
|
|
|
# (2*y*exp(-(x^2/2 + y^2/2)^(1/2)/5))/(x^2/2 + y^2/2)^(1/2)
|
|
|
|
|
|
#
|
|
|
|
|
|
# diff( f , x , 2 )
|
|
|
|
|
|
#
|
|
|
|
|
|
# (2*exp(-(x^2/2 + y^2/2)^(1/2)/5))/(x^2/2 + y^2/2)^(1/2) +
|
|
|
|
|
|
# 2*pi^2*exp(cos(2*pi*x)/2 + cos(2*pi*y)/2)*cos(2*pi*x) -
|
|
|
|
|
|
# (x^2*exp(-(x^2/2 + y^2/2)^(1/2)/5))/(5*(x^2/2 + y^2/2)) -
|
|
|
|
|
|
# (x^2*exp(-(x^2/2 + y^2/2)^(1/2)/5))/(x^2/2 + y^2/2)^(3/2) -
|
|
|
|
|
|
# pi^2*exp(cos(2*pi*x)/2 + cos(2*pi*y)/2)*sin(2*pi*x)^2
|
|
|
|
|
|
#
|
|
|
|
|
|
# diff( f , y , 2 )
|
|
|
|
|
|
# (2*exp(-(x^2/2 + y^2/2)^(1/2)/5))/(x^2/2 + y^2/2)^(1/2) +
|
|
|
|
|
|
# 2*pi^2*exp(cos(2*pi*x)/2 + cos(2*pi*y)/2)*cos(2*pi*y) -
|
|
|
|
|
|
# (y^2*exp(-(x^2/2 + y^2/2)^(1/2)/5))/(5*(x^2/2 + y^2/2)) -
|
|
|
|
|
|
# (y^2*exp(-(x^2/2 + y^2/2)^(1/2)/5))/(x^2/2 + y^2/2)^(3/2) -
|
|
|
|
|
|
# pi^2*exp(cos(2*pi*x)/2 + cos(2*pi*y)/2)*sin(2*pi*y)^2
|
|
|
|
|
|
#
|
|
|
|
|
|
# diff( f , x , y)
|
|
|
|
|
|
# - (x*y*exp(-(x^2/2 + y^2/2)^(1/2)/5))/(5*(x^2/2 + y^2/2)) -
|
|
|
|
|
|
# (x*y*exp(-(x^2/2 + y^2/2)^(1/2)/5))/(x^2/2 + y^2/2)^(3/2) -
|
|
|
|
|
|
# pi^2*exp(cos(2*pi*x)/2 + cos(2*pi*y)/2)*sin(2*pi*x)*sin(2*pi*y)
|
|
|
|
|
|
|
|
|
|
|
|
x = xx[1]
|
|
|
|
|
|
y = xx[2]
|
|
|
|
|
|
sqn2 = (x^2 + y^2) / 2
|
|
|
|
|
|
cosx = cos(2 * π * x)
|
|
|
|
|
|
cosy = cos(2 * π * y)
|
|
|
|
|
|
comp1 = exp(-(sqn2)^(1/2) / 5)
|
|
|
|
|
|
comp2 = exp((cosx + cosy) / 2)
|
|
|
|
|
|
|
|
|
|
|
|
v = -20 * comp1 - comp2 + 20 + ℯ
|
|
|
|
|
|
|
|
|
|
|
|
sinx = sin(2 * π * x)
|
|
|
|
|
|
siny = sin(2 * π * y)
|
|
|
|
|
|
|
|
|
|
|
|
g = zeros(2) # \nabla f(x)
|
|
|
|
|
|
g[1] = π * comp2 * sinx + 2 * x * comp1 / (sqn2)^(1/2)
|
|
|
|
|
|
g[2] = π * comp2 * siny + 2 * y * comp1 / (sqn2)^(1/2)
|
|
|
|
|
|
|
|
|
|
|
|
H = zeros(2, 2)
|
|
|
|
|
|
|
|
|
|
|
|
H[1, 1] = (2*comp1)/(sqn2)^(1/2) + 2*π^2*comp2*cosx +
|
|
|
|
|
|
- (x^2*comp1)/(5*sqn2) - (x^2*comp1)/(sqn2)^(3/2) +
|
|
|
|
|
|
- π^2*comp2*sinx^2
|
|
|
|
|
|
|
|
|
|
|
|
H[2, 2] = (2*comp1)/(sqn2)^(1/2) + 2*π^2*comp2*cosy +
|
|
|
|
|
|
- (y^2*comp1)/(5*sqn2) - (y^2*comp1)/(sqn2)^(3/2) +
|
|
|
|
|
|
- π^2*comp2*siny^2
|
|
|
|
|
|
|
|
|
|
|
|
H[1, 2] = -(x*y*comp1)/(5*(sqn2)) +
|
|
|
|
|
|
- (x*y*comp1)/(sqn2)^(3/2) +
|
|
|
|
|
|
- π^2*comp2*sinx*siny
|
|
|
|
|
|
|
|
|
|
|
|
H[2, 1] = H[1, 2]
|
|
|
|
|
|
else
|
|
|
|
|
|
error("first you need to find the ackley_Hes and ackley_Grd files :/")
|
|
|
|
|
|
(H, g, v) = ackley_Hes(xx)
|
|
|
|
|
|
g = g'
|
|
|
|
|
|
|
|
|
|
|
|
(g, v) = ackley_Grd(xx)
|
|
|
|
|
|
|
|
|
|
|
|
v = - 20 * exp( - ( ( xx[1]^2 + xx[2]^2 ) / 2 )^(1/2) / 5 ) +
|
|
|
|
|
|
-exp( cos( 2 * π * xx[1] ) / 2 +
|
|
|
|
|
|
cos( 2 * π * xx[2] ) / 2 ) + 20 + ℯ
|
|
|
|
|
|
end
|
|
|
|
|
|
return (v, g, H)
|
|
|
|
|
|
end
|
|
|
|
|
|
end # ackley
|
|
|
|
|
|
|
|
|
|
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
|
|
|
|
|
|
|
|
function lasso(x::Union{Nothing, AbstractVecOrMat})
|
|
|
|
|
|
# nondifferentiable lasso example:
|
|
|
|
|
|
#
|
|
|
|
|
|
# f( x , y ) = || 3 * x + 2 * y - 2 ||_2^2 + 10 ( | x | + | y | )
|
|
|
|
|
|
|
|
|
|
|
|
if isnothing(x) # informative call
|
|
|
|
|
|
v = ( 2 - 1/3 )^2 + 10/9 # optimal solution [ 1/9 , 0 ]
|
|
|
|
|
|
return (v, [0, 0])
|
|
|
|
|
|
else
|
|
|
|
|
|
v = ( 3 * x( 1 ) + 2 * x( 2 ) - 2 )^2 +
|
|
|
|
|
|
10 * ( abs( x( 1 ) ) + abs( x( 2 ) ) ) # f(x)
|
|
|
|
|
|
|
|
|
|
|
|
g = zeros(2)
|
|
|
|
|
|
g[1] = 18 * x[1] + 12 * x[2] - 12 + 10 * sign( x[1] )
|
|
|
|
|
|
g[2] = 12 * x[1] + 8 * x[2] - 8 + 10 * sign( x[2] )
|
|
|
|
|
|
|
|
|
|
|
|
return (v, g)
|
|
|
|
|
|
end
|
|
|
|
|
|
end # lasso
|
|
|
|
|
|
|
|
|
|
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
|
|
|
|
|
|
|
|
include("./testNN_Jac.jl")
|
|
|
|
|
|
include("./testNN_Hes.jl")
|
|
|
|
|
|
include("testNN.jl")
|
|
|
|
|
|
|
|
|
|
|
|
function myNN(x::Union{Nothing, AbstractVecOrMat})
|
|
|
|
|
|
# 1 x 5 x 5 x 5 x 1 = 76 w NN for solving a 1D fitting problem
|
|
|
|
|
|
|
|
|
|
|
|
if isnothing(x) # informative call
|
|
|
|
|
|
v = -Inf; # optimal value unknown (although 0 may perhaps be good)
|
|
|
|
|
|
# Xavier initialization: uniform random in [ - A , A ] with
|
|
|
|
|
|
# A = \sqrt{6} / \sqrt{n + m}, with n and m the input and output
|
|
|
|
|
|
# layers. in our case n + m is either 6 or 10, so we take A = 1
|
|
|
|
|
|
#
|
|
|
|
|
|
# note that starting point is random, so each run will be different
|
|
|
|
|
|
# (unless an explicit starting point is provided); if stability is
|
|
|
|
|
|
# neeed, the seed of the generator has to be set externally
|
|
|
|
|
|
return (v, 2 * rand(76, 1) - 1)
|
|
|
|
|
|
else
|
|
|
|
|
|
v = testNN(x) # f(x)
|
|
|
|
|
|
return (v, testNN_Jac(x)', testNN_Hes(x)')
|
|
|
|
|
|
end
|
|
|
|
|
|
end # myNN
|
|
|
|
|
|
|
|
|
|
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
|
|
|
|
|
|
|
|
function myNN2(x::Union{Nothing, AbstractVecOrMat})
|
|
|
|
|
|
# 1 x 5 x 5 x 5 x 1 = 76 w NN for solving a 1D fitting problem
|
|
|
|
|
|
# plus ridge stabilization \lambda || x ||^2 / 2
|
|
|
|
|
|
|
|
|
|
|
|
lambda = 1e+2
|
|
|
|
|
|
|
|
|
|
|
|
if isnothing(x) # informative call
|
|
|
|
|
|
v = -Inf # optimal value unknown (although 0 may perhaps be good)
|
|
|
|
|
|
# Xavier initialization: uniform random in [ - A , A ] with
|
|
|
|
|
|
# A = \sqrt{6} / \sqrt{n + m}, with n and m the input and output
|
|
|
|
|
|
# layers. in our case n + m is either 6 or 10, so we take A = 1
|
|
|
|
|
|
#
|
|
|
|
|
|
# note that starting point is random, so each run will be different
|
|
|
|
|
|
# (unless an explicit starting point is provided); if stability is
|
|
|
|
|
|
# neeed, the seed of the generator has to be set externally
|
|
|
|
|
|
return (v, 2 * rand(76, 1) - 1)
|
|
|
|
|
|
else
|
|
|
|
|
|
v = testNN(x) + lambda * x' * x / 2 # f(x)
|
|
|
|
|
|
return (v, testNN_Jac(x)' + lambda * x, testNN_Hes(x)' + lambda * I)
|
|
|
|
|
|
end
|
|
|
|
|
|
end # myNN2
|
|
|
|
|
|
|
|
|
|
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|