Clean up backtracking gradient descent code
Drop experimental singularity handling strategies. Reduce the default tolerance to within 64-bit floating point precision. Report success.
This commit is contained in:
parent
25b09ebf92
commit
7b3efbc385
@ -6,7 +6,9 @@ using SparseArrays
|
|||||||
using Random
|
using Random
|
||||||
using Optim
|
using Optim
|
||||||
|
|
||||||
export rand_on_shell, Q, DescentHistory, realize_gram
|
export
|
||||||
|
rand_on_shell, Q, DescentHistory,
|
||||||
|
realize_gram_gradient, realize_gram_newton, realize_gram_optim, realize_gram
|
||||||
|
|
||||||
# === guessing ===
|
# === guessing ===
|
||||||
|
|
||||||
@ -82,7 +84,7 @@ struct DescentHistory{T}
|
|||||||
hess::Array{Hermitian{T, Matrix{T}}}
|
hess::Array{Hermitian{T, Matrix{T}}}
|
||||||
slope::Array{T}
|
slope::Array{T}
|
||||||
stepsize::Array{T}
|
stepsize::Array{T}
|
||||||
used_grad::Array{Bool}
|
positive::Array{Bool}
|
||||||
backoff_steps::Array{Int64}
|
backoff_steps::Array{Int64}
|
||||||
last_line_L::Array{Matrix{T}}
|
last_line_L::Array{Matrix{T}}
|
||||||
last_line_loss::Array{T}
|
last_line_loss::Array{T}
|
||||||
@ -94,12 +96,12 @@ struct DescentHistory{T}
|
|||||||
base_step = Array{Matrix{T}}(undef, 0),
|
base_step = Array{Matrix{T}}(undef, 0),
|
||||||
slope = Array{T}(undef, 0),
|
slope = Array{T}(undef, 0),
|
||||||
stepsize = Array{T}(undef, 0),
|
stepsize = Array{T}(undef, 0),
|
||||||
used_grad = Bool[],
|
positive = Bool[],
|
||||||
backoff_steps = Int64[],
|
backoff_steps = Int64[],
|
||||||
last_line_L = Array{Matrix{T}}(undef, 0),
|
last_line_L = Array{Matrix{T}}(undef, 0),
|
||||||
last_line_loss = Array{T}(undef, 0)
|
last_line_loss = Array{T}(undef, 0)
|
||||||
) where T
|
) where T
|
||||||
new(scaled_loss, neg_grad, hess, base_step, slope, stepsize, used_grad, backoff_steps, last_line_L, last_line_loss)
|
new(scaled_loss, neg_grad, hess, base_step, slope, stepsize, positive, backoff_steps, last_line_L, last_line_loss)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -305,7 +307,7 @@ end
|
|||||||
function realize_gram(
|
function realize_gram(
|
||||||
gram::SparseMatrixCSC{T, <:Any},
|
gram::SparseMatrixCSC{T, <:Any},
|
||||||
guess::Matrix{T};
|
guess::Matrix{T};
|
||||||
scaled_tol = 1e-30,
|
scaled_tol = 1e-16,
|
||||||
min_efficiency = 0.5,
|
min_efficiency = 0.5,
|
||||||
init_rate = 1.0,
|
init_rate = 1.0,
|
||||||
backoff = 0.9,
|
backoff = 0.9,
|
||||||
@ -358,54 +360,14 @@ function realize_gram(
|
|||||||
hess = Hermitian(hess)
|
hess = Hermitian(hess)
|
||||||
push!(history.hess, hess)
|
push!(history.hess, hess)
|
||||||
|
|
||||||
# choose a base step: the Newton step if the Hessian is non-singular, and
|
# regularize the Hessian
|
||||||
# the gradient descent direction otherwise
|
|
||||||
#=
|
|
||||||
sing = false
|
|
||||||
base_step = try
|
|
||||||
reshape(hess \ reshape(neg_grad, total_dim), dims)
|
|
||||||
catch ex
|
|
||||||
if isa(ex, SingularException)
|
|
||||||
sing = true
|
|
||||||
normalize(neg_grad)
|
|
||||||
else
|
|
||||||
throw(ex)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
=#
|
|
||||||
#=
|
|
||||||
if !sing
|
|
||||||
rate = one(T)
|
|
||||||
end
|
|
||||||
=#
|
|
||||||
#=
|
|
||||||
if cond(Float64.(hess)) < 1e5
|
|
||||||
sing = false
|
|
||||||
base_step = reshape(hess \ reshape(neg_grad, total_dim), dims)
|
|
||||||
else
|
|
||||||
sing = true
|
|
||||||
base_step = normalize(neg_grad)
|
|
||||||
end
|
|
||||||
=#
|
|
||||||
#=
|
|
||||||
if cond(Float64.(hess)) > 1e3
|
|
||||||
sing = true
|
|
||||||
hess += big"1e-5"*I
|
|
||||||
else
|
|
||||||
sing = false
|
|
||||||
end
|
|
||||||
base_step = reshape(hess \ reshape(neg_grad, total_dim), dims)
|
|
||||||
=#
|
|
||||||
min_eigval = minimum(eigvals(hess))
|
min_eigval = minimum(eigvals(hess))
|
||||||
if min_eigval < 0
|
push!(history.positive, min_eigval > 0)
|
||||||
|
if min_eigval <= 0
|
||||||
hess -= reg_scale * min_eigval * I
|
hess -= reg_scale * min_eigval * I
|
||||||
end
|
end
|
||||||
push!(history.used_grad, false)
|
|
||||||
base_step = reshape(hess \ reshape(neg_grad, total_dim), dims)
|
base_step = reshape(hess \ reshape(neg_grad, total_dim), dims)
|
||||||
push!(history.base_step, base_step)
|
push!(history.base_step, base_step)
|
||||||
#=
|
|
||||||
push!(history.used_grad, sing)
|
|
||||||
=#
|
|
||||||
|
|
||||||
# store the current position, loss, and slope
|
# store the current position, loss, and slope
|
||||||
L_last = L
|
L_last = L
|
||||||
@ -420,50 +382,32 @@ function realize_gram(
|
|||||||
empty!(history.last_line_L)
|
empty!(history.last_line_L)
|
||||||
empty!(history.last_line_loss)
|
empty!(history.last_line_loss)
|
||||||
rate = one(T)
|
rate = one(T)
|
||||||
|
step_success = false
|
||||||
for backoff_steps in 0:max_backoff_steps
|
for backoff_steps in 0:max_backoff_steps
|
||||||
history.stepsize[end] = rate
|
history.stepsize[end] = rate
|
||||||
|
L = L_last + rate * base_step
|
||||||
# try Newton step, but not on the first step. doing at least one step of
|
|
||||||
# gradient descent seems to help prevent getting stuck, for some reason?
|
|
||||||
if step > 0
|
|
||||||
L = L_last + rate * base_step
|
|
||||||
Δ_proj = proj_diff(gram, L'*Q*L)
|
|
||||||
loss = dot(Δ_proj, Δ_proj)
|
|
||||||
improvement = loss_last - loss
|
|
||||||
push!(history.last_line_L, L)
|
|
||||||
push!(history.last_line_loss, loss / scale_adjustment)
|
|
||||||
if improvement >= min_efficiency * rate * dot(neg_grad, base_step)
|
|
||||||
history.backoff_steps[end] = backoff_steps
|
|
||||||
break
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# try gradient descent step
|
|
||||||
slope = norm(neg_grad)
|
|
||||||
dir = neg_grad / slope
|
|
||||||
L = L_last + rate * grad_rate * dir
|
|
||||||
Δ_proj = proj_diff(gram, L'*Q*L)
|
Δ_proj = proj_diff(gram, L'*Q*L)
|
||||||
loss = dot(Δ_proj, Δ_proj)
|
loss = dot(Δ_proj, Δ_proj)
|
||||||
improvement = loss_last - loss
|
improvement = loss_last - loss
|
||||||
if improvement >= min_efficiency * rate * grad_rate * slope
|
push!(history.last_line_L, L)
|
||||||
grad_rate *= rate
|
push!(history.last_line_loss, loss / scale_adjustment)
|
||||||
history.used_grad[end] = true
|
if improvement >= min_efficiency * rate * dot(neg_grad, base_step)
|
||||||
history.backoff_steps[end] = backoff_steps
|
history.backoff_steps[end] = backoff_steps
|
||||||
|
step_success = true
|
||||||
break
|
break
|
||||||
end
|
end
|
||||||
|
|
||||||
rate *= backoff
|
rate *= backoff
|
||||||
end
|
end
|
||||||
|
|
||||||
# [DEBUG] if we've hit a wall, quit
|
# if we've hit a wall, quit
|
||||||
if history.backoff_steps[end] == max_backoff_steps
|
if !step_success
|
||||||
return L_last, history
|
return L_last, false, history
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# return the factorization and its history
|
# return the factorization and its history
|
||||||
push!(history.scaled_loss, loss / scale_adjustment)
|
push!(history.scaled_loss, loss / scale_adjustment)
|
||||||
L, history
|
L, true, history
|
||||||
end
|
end
|
||||||
|
|
||||||
end
|
end
|
@ -86,7 +86,7 @@ L, history = Engine.realize_gram_gradient(gram, guess, scaled_tol = 0.01)
|
|||||||
L_pol, history_pol = Engine.realize_gram_newton(gram, L, rate = 0.3, scaled_tol = 1e-9)
|
L_pol, history_pol = Engine.realize_gram_newton(gram, L, rate = 0.3, scaled_tol = 1e-9)
|
||||||
L_pol2, history_pol2 = Engine.realize_gram_newton(gram, L_pol)
|
L_pol2, history_pol2 = Engine.realize_gram_newton(gram, L_pol)
|
||||||
=#
|
=#
|
||||||
L, history = Engine.realize_gram(Float64.(gram), Float64.(guess))
|
L, success, history = Engine.realize_gram(Float64.(gram), Float64.(guess))
|
||||||
completed_gram = L'*Engine.Q*L
|
completed_gram = L'*Engine.Q*L
|
||||||
println("Completed Gram matrix:\n")
|
println("Completed Gram matrix:\n")
|
||||||
display(completed_gram)
|
display(completed_gram)
|
||||||
@ -99,5 +99,10 @@ println(
|
|||||||
)
|
)
|
||||||
println("Loss: ", history_pol2.scaled_loss[end], "\n")
|
println("Loss: ", history_pol2.scaled_loss[end], "\n")
|
||||||
=#
|
=#
|
||||||
println("\nSteps: ", size(history.scaled_loss, 1))
|
if success
|
||||||
|
println("\nTarget accuracy achieved!")
|
||||||
|
else
|
||||||
|
println("\nFailed to reach target accuracy")
|
||||||
|
end
|
||||||
|
println("Steps: ", size(history.scaled_loss, 1))
|
||||||
println("Loss: ", history.scaled_loss[end], "\n")
|
println("Loss: ", history.scaled_loss[end], "\n")
|
@ -52,7 +52,7 @@ guess = hcat(
|
|||||||
L, history = Engine.realize_gram_gradient(gram, guess, scaled_tol = 0.01)
|
L, history = Engine.realize_gram_gradient(gram, guess, scaled_tol = 0.01)
|
||||||
L_pol, history_pol = Engine.realize_gram_newton(gram, L)
|
L_pol, history_pol = Engine.realize_gram_newton(gram, L)
|
||||||
=#
|
=#
|
||||||
L, history = Engine.realize_gram(Float64.(gram), Float64.(guess))
|
L, success, history = Engine.realize_gram(Float64.(gram), Float64.(guess))
|
||||||
completed_gram = L'*Engine.Q*L
|
completed_gram = L'*Engine.Q*L
|
||||||
println("Completed Gram matrix:\n")
|
println("Completed Gram matrix:\n")
|
||||||
display(completed_gram)
|
display(completed_gram)
|
||||||
@ -60,7 +60,12 @@ display(completed_gram)
|
|||||||
println("\nSteps: ", size(history.scaled_loss, 1), " + ", size(history_pol.scaled_loss, 1))
|
println("\nSteps: ", size(history.scaled_loss, 1), " + ", size(history_pol.scaled_loss, 1))
|
||||||
println("Loss: ", history_pol.scaled_loss[end], "\n")
|
println("Loss: ", history_pol.scaled_loss[end], "\n")
|
||||||
=#
|
=#
|
||||||
println("\nSteps: ", size(history.scaled_loss, 1))
|
if success
|
||||||
|
println("\nTarget accuracy achieved!")
|
||||||
|
else
|
||||||
|
println("\nFailed to reach target accuracy")
|
||||||
|
end
|
||||||
|
println("Steps: ", size(history.scaled_loss, 1))
|
||||||
println("Loss: ", history.scaled_loss[end], "\n")
|
println("Loss: ", history.scaled_loss[end], "\n")
|
||||||
|
|
||||||
# === algebraic check ===
|
# === algebraic check ===
|
||||||
|
@ -37,9 +37,14 @@ guess = sqrt(1/BigFloat(3)) * BigFloat[
|
|||||||
#=
|
#=
|
||||||
L, history = Engine.realize_gram_newton(gram, guess)
|
L, history = Engine.realize_gram_newton(gram, guess)
|
||||||
=#
|
=#
|
||||||
L, history = Engine.realize_gram(gram, guess, max_descent_steps = 50)
|
L, success, history = Engine.realize_gram(gram, guess)
|
||||||
completed_gram = L'*Engine.Q*L
|
completed_gram = L'*Engine.Q*L
|
||||||
println("Completed Gram matrix:\n")
|
println("Completed Gram matrix:\n")
|
||||||
display(completed_gram)
|
display(completed_gram)
|
||||||
println("\nSteps: ", size(history.scaled_loss, 1))
|
if success
|
||||||
|
println("\nTarget accuracy achieved!")
|
||||||
|
else
|
||||||
|
println("\nFailed to reach target accuracy")
|
||||||
|
end
|
||||||
|
println("Steps: ", size(history.scaled_loss, 1))
|
||||||
println("Loss: ", history.scaled_loss[end], "\n")
|
println("Loss: ", history.scaled_loss[end], "\n")
|
Loading…
Reference in New Issue
Block a user