Compare commits

...

5 Commits

Author SHA1 Message Date
Aaron Fenyes
b185fd4b83 Switch to backtracking Newton's method in Optim
This performs much better than the trust region Newton's method for the
actual `circles-in-triangle` problem. (The trust region method performs
better for the simplified problem produced by the conversion bug.)
2024-07-15 15:52:38 -07:00
Aaron Fenyes
94e0d321d5 Switch back to BigFloat precision in examples 2024-07-15 14:31:30 -07:00
Aaron Fenyes
53d8c38047 Preserve explicit zeros in Gram matrix conversion
In previous commits, the `circles-in-triangle` example converged much
more slowly in BigFloat precision than in Float64 precision. This
turned out to be a sign of a bug in the Float64 computation: converting
the Gram matrix using `Float64.()` dropped the explicit zeros, removing
many constraints and making the problem much easier to solve. This
commit corrects the Gram matrix conversion. The Float64 search now
solves the same problem as the BigFloat search, with comparable
performance.
2024-07-15 14:08:57 -07:00
Aaron Fenyes
7b3efbc385 Clean up backtracking gradient descent code
Drop experimental singularity handling strategies. Reduce the default
tolerance to within 64-bit floating point precision. Report success.
2024-07-15 13:15:15 -07:00
Aaron Fenyes
25b09ebf92 Sketch backtracking Newton's method
This code is a mess, but I'm committing it to record a working state
before I start trying to clean up.
2024-07-15 11:32:04 -07:00
4 changed files with 220 additions and 10 deletions

View File

@ -1,10 +1,14 @@
module Engine
using LinearAlgebra
using GenericLinearAlgebra
using SparseArrays
using Random
using Optim
export rand_on_shell, Q, DescentHistory, realize_gram
export
rand_on_shell, Q, DescentHistory,
realize_gram_gradient, realize_gram_newton, realize_gram_optim, realize_gram
# === guessing ===
@ -76,8 +80,11 @@ end
struct DescentHistory{T}
scaled_loss::Array{T}
neg_grad::Array{Matrix{T}}
base_step::Array{Matrix{T}}
hess::Array{Hermitian{T, Matrix{T}}}
slope::Array{T}
stepsize::Array{T}
positive::Array{Bool}
backoff_steps::Array{Int64}
last_line_L::Array{Matrix{T}}
last_line_loss::Array{T}
@ -85,13 +92,16 @@ struct DescentHistory{T}
function DescentHistory{T}(
scaled_loss = Array{T}(undef, 0),
neg_grad = Array{Matrix{T}}(undef, 0),
hess = Array{Hermitian{T, Matrix{T}}}(undef, 0),
base_step = Array{Matrix{T}}(undef, 0),
slope = Array{T}(undef, 0),
stepsize = Array{T}(undef, 0),
positive = Bool[],
backoff_steps = Int64[],
last_line_L = Array{Matrix{T}}(undef, 0),
last_line_loss = Array{T}(undef, 0)
) where T
new(scaled_loss, neg_grad, slope, stepsize, backoff_steps, last_line_L, last_line_loss)
new(scaled_loss, neg_grad, hess, base_step, slope, stepsize, positive, backoff_steps, last_line_L, last_line_loss)
end
end
@ -101,7 +111,7 @@ function realize_gram_gradient(
gram::SparseMatrixCSC{T, <:Any},
guess::Matrix{T};
scaled_tol = 1e-30,
target_improvement = 0.5,
min_efficiency = 0.5,
init_stepsize = 1.0,
backoff = 0.9,
max_descent_steps = 600,
@ -152,7 +162,7 @@ function realize_gram_gradient(
improvement = loss_last - loss
push!(history.last_line_L, L)
push!(history.last_line_loss, loss / scale_adjustment)
if improvement >= target_improvement * stepsize * slope
if improvement >= min_efficiency * stepsize * slope
history.backoff_steps[end] = backoff_steps
break
end
@ -201,7 +211,7 @@ function realize_gram_newton(
scale_adjustment = sqrt(T(length(constrained)))
tol = scale_adjustment * scaled_tol
# use newton's method
# use Newton's method
L = copy(guess)
for step in 0:max_steps
# evaluate the loss function
@ -229,8 +239,10 @@ function realize_gram_newton(
deriv_grad = 4*Q*(-basis_mat*Δ_proj + L*neg_dΔ_proj)
hess[:, (k-1)*element_dim + j] = reshape(deriv_grad, total_dim)
end
hess = Hermitian(hess)
push!(history.hess, hess)
# compute the newton step
# compute the Newton step
step = hess \ reshape(neg_grad, total_dim)
L += rate * reshape(step, dims)
end
@ -239,4 +251,168 @@ function realize_gram_newton(
L, history
end
LinearAlgebra.eigen!(A::Symmetric{BigFloat, Matrix{BigFloat}}; sortby::Nothing) =
eigen!(Hermitian(A))
function convertnz(type, mat)
J, K, values = findnz(mat)
sparse(J, K, type.(values))
end
function realize_gram_optim(
gram::SparseMatrixCSC{T, <:Any},
guess::Matrix{T}
) where T <: Number
# find the dimension of the search space
dims = size(guess)
element_dim, construction_dim = dims
total_dim = element_dim * construction_dim
# list the constrained entries of the gram matrix
J, K, _ = findnz(gram)
constrained = zip(J, K)
# scale the loss function
scale_adjustment = length(constrained)
function loss(L_vec)
L = reshape(L_vec, dims)
Δ_proj = proj_diff(gram, L'*Q*L)
dot(Δ_proj, Δ_proj) / scale_adjustment
end
function loss_grad!(storage, L_vec)
L = reshape(L_vec, dims)
Δ_proj = proj_diff(gram, L'*Q*L)
storage .= reshape(-4*Q*L*Δ_proj, total_dim) / scale_adjustment
end
function loss_hess!(storage, L_vec)
L = reshape(L_vec, dims)
Δ_proj = proj_diff(gram, L'*Q*L)
indices = [(j, k) for k in 1:construction_dim for j in 1:element_dim]
for (j, k) in indices
basis_mat = basis_matrix(T, j, k, dims)
neg_dΔ = basis_mat'*Q*L + L'*Q*basis_mat
neg_dΔ_proj = proj_to_entries(neg_dΔ, constrained)
deriv_grad = 4*Q*(-basis_mat*Δ_proj + L*neg_dΔ_proj) / scale_adjustment
storage[:, (k-1)*element_dim + j] = reshape(deriv_grad, total_dim)
end
end
optimize(
loss, loss_grad!, loss_hess!,
reshape(guess, total_dim),
Newton()
)
end
# seek a matrix `L` for which `L'QL` matches the sparse matrix `gram` at every
# explicit entry of `gram`. use gradient descent starting from `guess`
function realize_gram(
gram::SparseMatrixCSC{T, <:Any},
guess::Matrix{T};
scaled_tol = 1e-30,
min_efficiency = 0.5,
init_rate = 1.0,
backoff = 0.9,
reg_scale = 1.1,
max_descent_steps = 200,
max_backoff_steps = 110
) where T <: Number
# start history
history = DescentHistory{T}()
# find the dimension of the search space
dims = size(guess)
element_dim, construction_dim = dims
total_dim = element_dim * construction_dim
# list the constrained entries of the gram matrix
J, K, _ = findnz(gram)
constrained = zip(J, K)
# scale the tolerance
scale_adjustment = sqrt(T(length(constrained)))
tol = scale_adjustment * scaled_tol
# initialize variables
grad_rate = init_rate
L = copy(guess)
# use Newton's method with backtracking and gradient descent backup
Δ_proj = proj_diff(gram, L'*Q*L)
loss = dot(Δ_proj, Δ_proj)
for step in 1:max_descent_steps
# stop if the loss is tolerably low
if loss < tol
break
end
# find the negative gradient of loss function
neg_grad = 4*Q*L*Δ_proj
# find the negative Hessian of the loss function
hess = Matrix{T}(undef, total_dim, total_dim)
indices = [(j, k) for k in 1:construction_dim for j in 1:element_dim]
for (j, k) in indices
basis_mat = basis_matrix(T, j, k, dims)
neg_dΔ = basis_mat'*Q*L + L'*Q*basis_mat
neg_dΔ_proj = proj_to_entries(neg_dΔ, constrained)
deriv_grad = 4*Q*(-basis_mat*Δ_proj + L*neg_dΔ_proj)
hess[:, (k-1)*element_dim + j] = reshape(deriv_grad, total_dim)
end
hess = Hermitian(hess)
push!(history.hess, hess)
# regularize the Hessian
min_eigval = minimum(eigvals(hess))
push!(history.positive, min_eigval > 0)
if min_eigval <= 0
hess -= reg_scale * min_eigval * I
end
base_step = reshape(hess \ reshape(neg_grad, total_dim), dims)
push!(history.base_step, base_step)
# store the current position, loss, and slope
L_last = L
loss_last = loss
push!(history.scaled_loss, loss / scale_adjustment)
push!(history.neg_grad, neg_grad)
push!(history.slope, norm(neg_grad))
# find a good step size using backtracking line search
push!(history.stepsize, 0)
push!(history.backoff_steps, max_backoff_steps)
empty!(history.last_line_L)
empty!(history.last_line_loss)
rate = one(T)
step_success = false
for backoff_steps in 0:max_backoff_steps
history.stepsize[end] = rate
L = L_last + rate * base_step
Δ_proj = proj_diff(gram, L'*Q*L)
loss = dot(Δ_proj, Δ_proj)
improvement = loss_last - loss
push!(history.last_line_L, L)
push!(history.last_line_loss, loss / scale_adjustment)
if improvement >= min_efficiency * rate * dot(neg_grad, base_step)
history.backoff_steps[end] = backoff_steps
step_success = true
break
end
rate *= backoff
end
# if we've hit a wall, quit
if !step_success
return L_last, false, history
end
end
# return the factorization and its history
push!(history.scaled_loss, loss / scale_adjustment)
L, true, history
end
end

View File

@ -81,16 +81,28 @@ guess = hcat(
=#
# complete the gram matrix using gradient descent followed by Newton's method
#=
L, history = Engine.realize_gram_gradient(gram, guess, scaled_tol = 0.01)
L_pol, history_pol = Engine.realize_gram_newton(gram, L, rate = 0.3, scaled_tol = 1e-9)
L_pol2, history_pol2 = Engine.realize_gram_newton(gram, L_pol)
=#
L, success, history = Engine.realize_gram(gram, guess)
completed_gram = L'*Engine.Q*L
println("Completed Gram matrix:\n")
display(completed_gram)
#=
println(
"\nSteps: ",
size(history.scaled_loss, 1),
" + ", size(history_pol.scaled_loss, 1),
" + ", size(history_pol2.scaled_loss, 1)
)
println("Loss: ", history_pol2.scaled_loss[end], "\n")
println("Loss: ", history_pol2.scaled_loss[end], "\n")
=#
if success
println("\nTarget accuracy achieved!")
else
println("\nFailed to reach target accuracy")
end
println("Steps: ", size(history.scaled_loss, 1))
println("Loss: ", history.scaled_loss[end], "\n")

View File

@ -47,17 +47,30 @@ guess = hcat(
Engine.rand_on_shell(fill(BigFloat(-1), 2))
)
# complete the gram matrix using gradient descent followed by Newton's method
# complete the gram matrix
#=
L, history = Engine.realize_gram_gradient(gram, guess, scaled_tol = 0.01)
L_pol, history_pol = Engine.realize_gram_newton(gram, L)
=#
L, success, history = Engine.realize_gram(gram, guess)
completed_gram = L'*Engine.Q*L
println("Completed Gram matrix:\n")
display(completed_gram)
#=
println("\nSteps: ", size(history.scaled_loss, 1), " + ", size(history_pol.scaled_loss, 1))
println("Loss: ", history_pol.scaled_loss[end], "\n")
=#
if success
println("\nTarget accuracy achieved!")
else
println("\nFailed to reach target accuracy")
end
println("Steps: ", size(history.scaled_loss, 1))
println("Loss: ", history.scaled_loss[end], "\n")
# === algebraic check ===
#=
R, gens = polynomial_ring(AbstractAlgebra.Rationals{BigInt}(), ["x", "t₁", "t₂", "t₃"])
x = gens[1]
t = gens[2:4]
@ -85,3 +98,4 @@ x_constraint = 25//16 * to_univariate(S, evaluate(rank_constraints[1], [2], [ind
t₂_constraint = 25//16 * to_univariate(S, evaluate(rank_constraints[3], [2], [indep_val]))
x_vals = PolynomialRoots.roots(x_constraint.coeffs)
t₂_vals = PolynomialRoots.roots(t₂_constraint.coeffs)
=#

View File

@ -33,10 +33,18 @@ guess = sqrt(1/BigFloat(3)) * BigFloat[
1 1 1 1 1
] + 0.2*Engine.rand_on_shell(fill(BigFloat(-1), 5))
# complete the gram matrix using Newton's method
# complete the gram matrix
#=
L, history = Engine.realize_gram_newton(gram, guess)
=#
L, success, history = Engine.realize_gram(gram, guess)
completed_gram = L'*Engine.Q*L
println("Completed Gram matrix:\n")
display(completed_gram)
println("\nSteps: ", size(history.scaled_loss, 1))
if success
println("\nTarget accuracy achieved!")
else
println("\nFailed to reach target accuracy")
end
println("Steps: ", size(history.scaled_loss, 1))
println("Loss: ", history.scaled_loss[end], "\n")