From 25b09ebf925bc9a03f3f52014b7d0a8a3693b12e Mon Sep 17 00:00:00 2001 From: Aaron Fenyes Date: Mon, 15 Jul 2024 11:32:04 -0700 Subject: [PATCH 1/5] Sketch backtracking Newton's method This code is a mess, but I'm committing it to record a working state before I start trying to clean up. --- engine-proto/gram-test/Engine.jl | 237 +++++++++++++++++- engine-proto/gram-test/circles-in-triangle.jl | 9 +- .../gram-test/overlapping-pyramids.jl | 11 +- .../gram-test/sphere-in-tetrahedron.jl | 5 +- 4 files changed, 254 insertions(+), 8 deletions(-) diff --git a/engine-proto/gram-test/Engine.jl b/engine-proto/gram-test/Engine.jl index e6f0d97..78d1409 100644 --- a/engine-proto/gram-test/Engine.jl +++ b/engine-proto/gram-test/Engine.jl @@ -1,8 +1,10 @@ module Engine using LinearAlgebra +using GenericLinearAlgebra using SparseArrays using Random +using Optim export rand_on_shell, Q, DescentHistory, realize_gram @@ -76,8 +78,11 @@ end struct DescentHistory{T} scaled_loss::Array{T} neg_grad::Array{Matrix{T}} + base_step::Array{Matrix{T}} + hess::Array{Hermitian{T, Matrix{T}}} slope::Array{T} stepsize::Array{T} + used_grad::Array{Bool} backoff_steps::Array{Int64} last_line_L::Array{Matrix{T}} last_line_loss::Array{T} @@ -85,13 +90,16 @@ struct DescentHistory{T} function DescentHistory{T}( scaled_loss = Array{T}(undef, 0), neg_grad = Array{Matrix{T}}(undef, 0), + hess = Array{Hermitian{T, Matrix{T}}}(undef, 0), + base_step = Array{Matrix{T}}(undef, 0), slope = Array{T}(undef, 0), stepsize = Array{T}(undef, 0), + used_grad = Bool[], backoff_steps = Int64[], last_line_L = Array{Matrix{T}}(undef, 0), last_line_loss = Array{T}(undef, 0) ) where T - new(scaled_loss, neg_grad, slope, stepsize, backoff_steps, last_line_L, last_line_loss) + new(scaled_loss, neg_grad, hess, base_step, slope, stepsize, used_grad, backoff_steps, last_line_L, last_line_loss) end end @@ -101,7 +109,7 @@ function realize_gram_gradient( gram::SparseMatrixCSC{T, <:Any}, guess::Matrix{T}; scaled_tol = 1e-30, - target_improvement = 0.5, + min_efficiency = 0.5, init_stepsize = 1.0, backoff = 0.9, max_descent_steps = 600, @@ -152,7 +160,7 @@ function realize_gram_gradient( improvement = loss_last - loss push!(history.last_line_L, L) push!(history.last_line_loss, loss / scale_adjustment) - if improvement >= target_improvement * stepsize * slope + if improvement >= min_efficiency * stepsize * slope history.backoff_steps[end] = backoff_steps break end @@ -201,7 +209,7 @@ function realize_gram_newton( scale_adjustment = sqrt(T(length(constrained))) tol = scale_adjustment * scaled_tol - # use newton's method + # use Newton's method L = copy(guess) for step in 0:max_steps # evaluate the loss function @@ -229,8 +237,10 @@ function realize_gram_newton( deriv_grad = 4*Q*(-basis_mat*Δ_proj + L*neg_dΔ_proj) hess[:, (k-1)*element_dim + j] = reshape(deriv_grad, total_dim) end + hess = Hermitian(hess) + push!(history.hess, hess) - # compute the newton step + # compute the Newton step step = hess \ reshape(neg_grad, total_dim) L += rate * reshape(step, dims) end @@ -239,4 +249,221 @@ function realize_gram_newton( L, history end +LinearAlgebra.eigen!(A::Symmetric{BigFloat, Matrix{BigFloat}}; sortby::Nothing) = + eigen!(Hermitian(A)) + +function realize_gram_optim( + gram::SparseMatrixCSC{T, <:Any}, + guess::Matrix{T} +) where T <: Number + # find the dimension of the search space + dims = size(guess) + element_dim, construction_dim = dims + total_dim = element_dim * construction_dim + + # list the constrained entries of the gram matrix + J, K, _ = findnz(gram) + constrained = zip(J, K) + + # scale the loss function + scale_adjustment = length(constrained) + + function loss(L_vec) + L = reshape(L_vec, dims) + Δ_proj = proj_diff(gram, L'*Q*L) + dot(Δ_proj, Δ_proj) / scale_adjustment + end + + function loss_grad!(storage, L_vec) + L = reshape(L_vec, dims) + Δ_proj = proj_diff(gram, L'*Q*L) + storage .= reshape(-4*Q*L*Δ_proj, total_dim) / scale_adjustment + end + + function loss_hess!(storage, L_vec) + L = reshape(L_vec, dims) + Δ_proj = proj_diff(gram, L'*Q*L) + indices = [(j, k) for k in 1:construction_dim for j in 1:element_dim] + for (j, k) in indices + basis_mat = basis_matrix(T, j, k, dims) + neg_dΔ = basis_mat'*Q*L + L'*Q*basis_mat + neg_dΔ_proj = proj_to_entries(neg_dΔ, constrained) + deriv_grad = 4*Q*(-basis_mat*Δ_proj + L*neg_dΔ_proj) / scale_adjustment + storage[:, (k-1)*element_dim + j] = reshape(deriv_grad, total_dim) + end + end + + optimize( + loss, loss_grad!, loss_hess!, + reshape(guess, total_dim), + NewtonTrustRegion() + ) +end + +# seek a matrix `L` for which `L'QL` matches the sparse matrix `gram` at every +# explicit entry of `gram`. use gradient descent starting from `guess` +function realize_gram( + gram::SparseMatrixCSC{T, <:Any}, + guess::Matrix{T}; + scaled_tol = 1e-30, + min_efficiency = 0.5, + init_rate = 1.0, + backoff = 0.9, + reg_scale = 1.1, + max_descent_steps = 200, + max_backoff_steps = 110 +) where T <: Number + # start history + history = DescentHistory{T}() + + # find the dimension of the search space + dims = size(guess) + element_dim, construction_dim = dims + total_dim = element_dim * construction_dim + + # list the constrained entries of the gram matrix + J, K, _ = findnz(gram) + constrained = zip(J, K) + + # scale the tolerance + scale_adjustment = sqrt(T(length(constrained))) + tol = scale_adjustment * scaled_tol + + # initialize variables + grad_rate = init_rate + L = copy(guess) + + # use Newton's method with backtracking and gradient descent backup + Δ_proj = proj_diff(gram, L'*Q*L) + loss = dot(Δ_proj, Δ_proj) + for step in 1:max_descent_steps + # stop if the loss is tolerably low + if loss < tol + break + end + + # find the negative gradient of loss function + neg_grad = 4*Q*L*Δ_proj + + # find the negative Hessian of the loss function + hess = Matrix{T}(undef, total_dim, total_dim) + indices = [(j, k) for k in 1:construction_dim for j in 1:element_dim] + for (j, k) in indices + basis_mat = basis_matrix(T, j, k, dims) + neg_dΔ = basis_mat'*Q*L + L'*Q*basis_mat + neg_dΔ_proj = proj_to_entries(neg_dΔ, constrained) + deriv_grad = 4*Q*(-basis_mat*Δ_proj + L*neg_dΔ_proj) + hess[:, (k-1)*element_dim + j] = reshape(deriv_grad, total_dim) + end + hess = Hermitian(hess) + push!(history.hess, hess) + + # choose a base step: the Newton step if the Hessian is non-singular, and + # the gradient descent direction otherwise + #= + sing = false + base_step = try + reshape(hess \ reshape(neg_grad, total_dim), dims) + catch ex + if isa(ex, SingularException) + sing = true + normalize(neg_grad) + else + throw(ex) + end + end + =# + #= + if !sing + rate = one(T) + end + =# + #= + if cond(Float64.(hess)) < 1e5 + sing = false + base_step = reshape(hess \ reshape(neg_grad, total_dim), dims) + else + sing = true + base_step = normalize(neg_grad) + end + =# + #= + if cond(Float64.(hess)) > 1e3 + sing = true + hess += big"1e-5"*I + else + sing = false + end + base_step = reshape(hess \ reshape(neg_grad, total_dim), dims) + =# + min_eigval = minimum(eigvals(hess)) + if min_eigval < 0 + hess -= reg_scale * min_eigval * I + end + push!(history.used_grad, false) + base_step = reshape(hess \ reshape(neg_grad, total_dim), dims) + push!(history.base_step, base_step) + #= + push!(history.used_grad, sing) + =# + + # store the current position, loss, and slope + L_last = L + loss_last = loss + push!(history.scaled_loss, loss / scale_adjustment) + push!(history.neg_grad, neg_grad) + push!(history.slope, norm(neg_grad)) + + # find a good step size using backtracking line search + push!(history.stepsize, 0) + push!(history.backoff_steps, max_backoff_steps) + empty!(history.last_line_L) + empty!(history.last_line_loss) + rate = one(T) + for backoff_steps in 0:max_backoff_steps + history.stepsize[end] = rate + + # try Newton step, but not on the first step. doing at least one step of + # gradient descent seems to help prevent getting stuck, for some reason? + if step > 0 + L = L_last + rate * base_step + Δ_proj = proj_diff(gram, L'*Q*L) + loss = dot(Δ_proj, Δ_proj) + improvement = loss_last - loss + push!(history.last_line_L, L) + push!(history.last_line_loss, loss / scale_adjustment) + if improvement >= min_efficiency * rate * dot(neg_grad, base_step) + history.backoff_steps[end] = backoff_steps + break + end + end + + # try gradient descent step + slope = norm(neg_grad) + dir = neg_grad / slope + L = L_last + rate * grad_rate * dir + Δ_proj = proj_diff(gram, L'*Q*L) + loss = dot(Δ_proj, Δ_proj) + improvement = loss_last - loss + if improvement >= min_efficiency * rate * grad_rate * slope + grad_rate *= rate + history.used_grad[end] = true + history.backoff_steps[end] = backoff_steps + break + end + + rate *= backoff + end + + # [DEBUG] if we've hit a wall, quit + if history.backoff_steps[end] == max_backoff_steps + return L_last, history + end + end + + # return the factorization and its history + push!(history.scaled_loss, loss / scale_adjustment) + L, history +end + end \ No newline at end of file diff --git a/engine-proto/gram-test/circles-in-triangle.jl b/engine-proto/gram-test/circles-in-triangle.jl index b031711..2f6caa7 100644 --- a/engine-proto/gram-test/circles-in-triangle.jl +++ b/engine-proto/gram-test/circles-in-triangle.jl @@ -81,16 +81,23 @@ guess = hcat( =# # complete the gram matrix using gradient descent followed by Newton's method +#= L, history = Engine.realize_gram_gradient(gram, guess, scaled_tol = 0.01) L_pol, history_pol = Engine.realize_gram_newton(gram, L, rate = 0.3, scaled_tol = 1e-9) L_pol2, history_pol2 = Engine.realize_gram_newton(gram, L_pol) +=# +L, history = Engine.realize_gram(Float64.(gram), Float64.(guess)) completed_gram = L'*Engine.Q*L println("Completed Gram matrix:\n") display(completed_gram) +#= println( "\nSteps: ", size(history.scaled_loss, 1), " + ", size(history_pol.scaled_loss, 1), " + ", size(history_pol2.scaled_loss, 1) ) -println("Loss: ", history_pol2.scaled_loss[end], "\n") \ No newline at end of file +println("Loss: ", history_pol2.scaled_loss[end], "\n") +=# +println("\nSteps: ", size(history.scaled_loss, 1)) +println("Loss: ", history.scaled_loss[end], "\n") \ No newline at end of file diff --git a/engine-proto/gram-test/overlapping-pyramids.jl b/engine-proto/gram-test/overlapping-pyramids.jl index 51606e1..ae6fb88 100644 --- a/engine-proto/gram-test/overlapping-pyramids.jl +++ b/engine-proto/gram-test/overlapping-pyramids.jl @@ -47,17 +47,25 @@ guess = hcat( Engine.rand_on_shell(fill(BigFloat(-1), 2)) ) -# complete the gram matrix using gradient descent followed by Newton's method +# complete the gram matrix +#= L, history = Engine.realize_gram_gradient(gram, guess, scaled_tol = 0.01) L_pol, history_pol = Engine.realize_gram_newton(gram, L) +=# +L, history = Engine.realize_gram(Float64.(gram), Float64.(guess)) completed_gram = L'*Engine.Q*L println("Completed Gram matrix:\n") display(completed_gram) +#= println("\nSteps: ", size(history.scaled_loss, 1), " + ", size(history_pol.scaled_loss, 1)) println("Loss: ", history_pol.scaled_loss[end], "\n") +=# +println("\nSteps: ", size(history.scaled_loss, 1)) +println("Loss: ", history.scaled_loss[end], "\n") # === algebraic check === +#= R, gens = polynomial_ring(AbstractAlgebra.Rationals{BigInt}(), ["x", "t₁", "t₂", "t₃"]) x = gens[1] t = gens[2:4] @@ -85,3 +93,4 @@ x_constraint = 25//16 * to_univariate(S, evaluate(rank_constraints[1], [2], [ind t₂_constraint = 25//16 * to_univariate(S, evaluate(rank_constraints[3], [2], [indep_val])) x_vals = PolynomialRoots.roots(x_constraint.coeffs) t₂_vals = PolynomialRoots.roots(t₂_constraint.coeffs) +=# \ No newline at end of file diff --git a/engine-proto/gram-test/sphere-in-tetrahedron.jl b/engine-proto/gram-test/sphere-in-tetrahedron.jl index dc5588c..6b428cb 100644 --- a/engine-proto/gram-test/sphere-in-tetrahedron.jl +++ b/engine-proto/gram-test/sphere-in-tetrahedron.jl @@ -33,8 +33,11 @@ guess = sqrt(1/BigFloat(3)) * BigFloat[ 1 1 1 1 1 ] + 0.2*Engine.rand_on_shell(fill(BigFloat(-1), 5)) -# complete the gram matrix using Newton's method +# complete the gram matrix +#= L, history = Engine.realize_gram_newton(gram, guess) +=# +L, history = Engine.realize_gram(gram, guess, max_descent_steps = 50) completed_gram = L'*Engine.Q*L println("Completed Gram matrix:\n") display(completed_gram) From 7b3efbc385c5561ee19dfc6b3322d7e79748d65e Mon Sep 17 00:00:00 2001 From: Aaron Fenyes Date: Mon, 15 Jul 2024 13:15:15 -0700 Subject: [PATCH 2/5] Clean up backtracking gradient descent code Drop experimental singularity handling strategies. Reduce the default tolerance to within 64-bit floating point precision. Report success. --- engine-proto/gram-test/Engine.jl | 96 ++++--------------- engine-proto/gram-test/circles-in-triangle.jl | 9 +- .../gram-test/overlapping-pyramids.jl | 9 +- .../gram-test/sphere-in-tetrahedron.jl | 9 +- 4 files changed, 41 insertions(+), 82 deletions(-) diff --git a/engine-proto/gram-test/Engine.jl b/engine-proto/gram-test/Engine.jl index 78d1409..e52982d 100644 --- a/engine-proto/gram-test/Engine.jl +++ b/engine-proto/gram-test/Engine.jl @@ -6,7 +6,9 @@ using SparseArrays using Random using Optim -export rand_on_shell, Q, DescentHistory, realize_gram +export + rand_on_shell, Q, DescentHistory, + realize_gram_gradient, realize_gram_newton, realize_gram_optim, realize_gram # === guessing === @@ -82,7 +84,7 @@ struct DescentHistory{T} hess::Array{Hermitian{T, Matrix{T}}} slope::Array{T} stepsize::Array{T} - used_grad::Array{Bool} + positive::Array{Bool} backoff_steps::Array{Int64} last_line_L::Array{Matrix{T}} last_line_loss::Array{T} @@ -94,12 +96,12 @@ struct DescentHistory{T} base_step = Array{Matrix{T}}(undef, 0), slope = Array{T}(undef, 0), stepsize = Array{T}(undef, 0), - used_grad = Bool[], + positive = Bool[], backoff_steps = Int64[], last_line_L = Array{Matrix{T}}(undef, 0), last_line_loss = Array{T}(undef, 0) ) where T - new(scaled_loss, neg_grad, hess, base_step, slope, stepsize, used_grad, backoff_steps, last_line_L, last_line_loss) + new(scaled_loss, neg_grad, hess, base_step, slope, stepsize, positive, backoff_steps, last_line_L, last_line_loss) end end @@ -305,7 +307,7 @@ end function realize_gram( gram::SparseMatrixCSC{T, <:Any}, guess::Matrix{T}; - scaled_tol = 1e-30, + scaled_tol = 1e-16, min_efficiency = 0.5, init_rate = 1.0, backoff = 0.9, @@ -358,54 +360,14 @@ function realize_gram( hess = Hermitian(hess) push!(history.hess, hess) - # choose a base step: the Newton step if the Hessian is non-singular, and - # the gradient descent direction otherwise - #= - sing = false - base_step = try - reshape(hess \ reshape(neg_grad, total_dim), dims) - catch ex - if isa(ex, SingularException) - sing = true - normalize(neg_grad) - else - throw(ex) - end - end - =# - #= - if !sing - rate = one(T) - end - =# - #= - if cond(Float64.(hess)) < 1e5 - sing = false - base_step = reshape(hess \ reshape(neg_grad, total_dim), dims) - else - sing = true - base_step = normalize(neg_grad) - end - =# - #= - if cond(Float64.(hess)) > 1e3 - sing = true - hess += big"1e-5"*I - else - sing = false - end - base_step = reshape(hess \ reshape(neg_grad, total_dim), dims) - =# + # regularize the Hessian min_eigval = minimum(eigvals(hess)) - if min_eigval < 0 + push!(history.positive, min_eigval > 0) + if min_eigval <= 0 hess -= reg_scale * min_eigval * I end - push!(history.used_grad, false) base_step = reshape(hess \ reshape(neg_grad, total_dim), dims) push!(history.base_step, base_step) - #= - push!(history.used_grad, sing) - =# # store the current position, loss, and slope L_last = L @@ -420,50 +382,32 @@ function realize_gram( empty!(history.last_line_L) empty!(history.last_line_loss) rate = one(T) + step_success = false for backoff_steps in 0:max_backoff_steps history.stepsize[end] = rate - - # try Newton step, but not on the first step. doing at least one step of - # gradient descent seems to help prevent getting stuck, for some reason? - if step > 0 - L = L_last + rate * base_step - Δ_proj = proj_diff(gram, L'*Q*L) - loss = dot(Δ_proj, Δ_proj) - improvement = loss_last - loss - push!(history.last_line_L, L) - push!(history.last_line_loss, loss / scale_adjustment) - if improvement >= min_efficiency * rate * dot(neg_grad, base_step) - history.backoff_steps[end] = backoff_steps - break - end - end - - # try gradient descent step - slope = norm(neg_grad) - dir = neg_grad / slope - L = L_last + rate * grad_rate * dir + L = L_last + rate * base_step Δ_proj = proj_diff(gram, L'*Q*L) loss = dot(Δ_proj, Δ_proj) improvement = loss_last - loss - if improvement >= min_efficiency * rate * grad_rate * slope - grad_rate *= rate - history.used_grad[end] = true + push!(history.last_line_L, L) + push!(history.last_line_loss, loss / scale_adjustment) + if improvement >= min_efficiency * rate * dot(neg_grad, base_step) history.backoff_steps[end] = backoff_steps + step_success = true break end - rate *= backoff end - # [DEBUG] if we've hit a wall, quit - if history.backoff_steps[end] == max_backoff_steps - return L_last, history + # if we've hit a wall, quit + if !step_success + return L_last, false, history end end # return the factorization and its history push!(history.scaled_loss, loss / scale_adjustment) - L, history + L, true, history end end \ No newline at end of file diff --git a/engine-proto/gram-test/circles-in-triangle.jl b/engine-proto/gram-test/circles-in-triangle.jl index 2f6caa7..a919b8e 100644 --- a/engine-proto/gram-test/circles-in-triangle.jl +++ b/engine-proto/gram-test/circles-in-triangle.jl @@ -86,7 +86,7 @@ L, history = Engine.realize_gram_gradient(gram, guess, scaled_tol = 0.01) L_pol, history_pol = Engine.realize_gram_newton(gram, L, rate = 0.3, scaled_tol = 1e-9) L_pol2, history_pol2 = Engine.realize_gram_newton(gram, L_pol) =# -L, history = Engine.realize_gram(Float64.(gram), Float64.(guess)) +L, success, history = Engine.realize_gram(Float64.(gram), Float64.(guess)) completed_gram = L'*Engine.Q*L println("Completed Gram matrix:\n") display(completed_gram) @@ -99,5 +99,10 @@ println( ) println("Loss: ", history_pol2.scaled_loss[end], "\n") =# -println("\nSteps: ", size(history.scaled_loss, 1)) +if success + println("\nTarget accuracy achieved!") +else + println("\nFailed to reach target accuracy") +end +println("Steps: ", size(history.scaled_loss, 1)) println("Loss: ", history.scaled_loss[end], "\n") \ No newline at end of file diff --git a/engine-proto/gram-test/overlapping-pyramids.jl b/engine-proto/gram-test/overlapping-pyramids.jl index ae6fb88..3c16e35 100644 --- a/engine-proto/gram-test/overlapping-pyramids.jl +++ b/engine-proto/gram-test/overlapping-pyramids.jl @@ -52,7 +52,7 @@ guess = hcat( L, history = Engine.realize_gram_gradient(gram, guess, scaled_tol = 0.01) L_pol, history_pol = Engine.realize_gram_newton(gram, L) =# -L, history = Engine.realize_gram(Float64.(gram), Float64.(guess)) +L, success, history = Engine.realize_gram(Float64.(gram), Float64.(guess)) completed_gram = L'*Engine.Q*L println("Completed Gram matrix:\n") display(completed_gram) @@ -60,7 +60,12 @@ display(completed_gram) println("\nSteps: ", size(history.scaled_loss, 1), " + ", size(history_pol.scaled_loss, 1)) println("Loss: ", history_pol.scaled_loss[end], "\n") =# -println("\nSteps: ", size(history.scaled_loss, 1)) +if success + println("\nTarget accuracy achieved!") +else + println("\nFailed to reach target accuracy") +end +println("Steps: ", size(history.scaled_loss, 1)) println("Loss: ", history.scaled_loss[end], "\n") # === algebraic check === diff --git a/engine-proto/gram-test/sphere-in-tetrahedron.jl b/engine-proto/gram-test/sphere-in-tetrahedron.jl index 6b428cb..273d3ad 100644 --- a/engine-proto/gram-test/sphere-in-tetrahedron.jl +++ b/engine-proto/gram-test/sphere-in-tetrahedron.jl @@ -37,9 +37,14 @@ guess = sqrt(1/BigFloat(3)) * BigFloat[ #= L, history = Engine.realize_gram_newton(gram, guess) =# -L, history = Engine.realize_gram(gram, guess, max_descent_steps = 50) +L, success, history = Engine.realize_gram(gram, guess) completed_gram = L'*Engine.Q*L println("Completed Gram matrix:\n") display(completed_gram) -println("\nSteps: ", size(history.scaled_loss, 1)) +if success + println("\nTarget accuracy achieved!") +else + println("\nFailed to reach target accuracy") +end +println("Steps: ", size(history.scaled_loss, 1)) println("Loss: ", history.scaled_loss[end], "\n") \ No newline at end of file From 53d8c380479546ed2b0cf2735502ba4afb671e1c Mon Sep 17 00:00:00 2001 From: Aaron Fenyes Date: Mon, 15 Jul 2024 14:08:57 -0700 Subject: [PATCH 3/5] Preserve explicit zeros in Gram matrix conversion In previous commits, the `circles-in-triangle` example converged much more slowly in BigFloat precision than in Float64 precision. This turned out to be a sign of a bug in the Float64 computation: converting the Gram matrix using `Float64.()` dropped the explicit zeros, removing many constraints and making the problem much easier to solve. This commit corrects the Gram matrix conversion. The Float64 search now solves the same problem as the BigFloat search, with comparable performance. --- engine-proto/gram-test/Engine.jl | 5 +++++ engine-proto/gram-test/circles-in-triangle.jl | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/engine-proto/gram-test/Engine.jl b/engine-proto/gram-test/Engine.jl index e52982d..01ca9a2 100644 --- a/engine-proto/gram-test/Engine.jl +++ b/engine-proto/gram-test/Engine.jl @@ -254,6 +254,11 @@ end LinearAlgebra.eigen!(A::Symmetric{BigFloat, Matrix{BigFloat}}; sortby::Nothing) = eigen!(Hermitian(A)) +function convertnz(type, mat) + J, K, values = findnz(mat) + sparse(J, K, type.(values)) +end + function realize_gram_optim( gram::SparseMatrixCSC{T, <:Any}, guess::Matrix{T} diff --git a/engine-proto/gram-test/circles-in-triangle.jl b/engine-proto/gram-test/circles-in-triangle.jl index a919b8e..aa24c0f 100644 --- a/engine-proto/gram-test/circles-in-triangle.jl +++ b/engine-proto/gram-test/circles-in-triangle.jl @@ -86,7 +86,7 @@ L, history = Engine.realize_gram_gradient(gram, guess, scaled_tol = 0.01) L_pol, history_pol = Engine.realize_gram_newton(gram, L, rate = 0.3, scaled_tol = 1e-9) L_pol2, history_pol2 = Engine.realize_gram_newton(gram, L_pol) =# -L, success, history = Engine.realize_gram(Float64.(gram), Float64.(guess)) +L, success, history = Engine.realize_gram(Engine.convertnz(Float64, gram), Float64.(guess)) completed_gram = L'*Engine.Q*L println("Completed Gram matrix:\n") display(completed_gram) From 94e0d321d5175f15e6a0a1f356ec8eff0c9b4b4a Mon Sep 17 00:00:00 2001 From: Aaron Fenyes Date: Mon, 15 Jul 2024 14:31:30 -0700 Subject: [PATCH 4/5] Switch back to BigFloat precision in examples --- engine-proto/gram-test/Engine.jl | 2 +- engine-proto/gram-test/circles-in-triangle.jl | 2 +- engine-proto/gram-test/overlapping-pyramids.jl | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/engine-proto/gram-test/Engine.jl b/engine-proto/gram-test/Engine.jl index 01ca9a2..0a4303b 100644 --- a/engine-proto/gram-test/Engine.jl +++ b/engine-proto/gram-test/Engine.jl @@ -312,7 +312,7 @@ end function realize_gram( gram::SparseMatrixCSC{T, <:Any}, guess::Matrix{T}; - scaled_tol = 1e-16, + scaled_tol = 1e-30, min_efficiency = 0.5, init_rate = 1.0, backoff = 0.9, diff --git a/engine-proto/gram-test/circles-in-triangle.jl b/engine-proto/gram-test/circles-in-triangle.jl index aa24c0f..12975c2 100644 --- a/engine-proto/gram-test/circles-in-triangle.jl +++ b/engine-proto/gram-test/circles-in-triangle.jl @@ -86,7 +86,7 @@ L, history = Engine.realize_gram_gradient(gram, guess, scaled_tol = 0.01) L_pol, history_pol = Engine.realize_gram_newton(gram, L, rate = 0.3, scaled_tol = 1e-9) L_pol2, history_pol2 = Engine.realize_gram_newton(gram, L_pol) =# -L, success, history = Engine.realize_gram(Engine.convertnz(Float64, gram), Float64.(guess)) +L, success, history = Engine.realize_gram(gram, guess) completed_gram = L'*Engine.Q*L println("Completed Gram matrix:\n") display(completed_gram) diff --git a/engine-proto/gram-test/overlapping-pyramids.jl b/engine-proto/gram-test/overlapping-pyramids.jl index 3c16e35..ee4c1fc 100644 --- a/engine-proto/gram-test/overlapping-pyramids.jl +++ b/engine-proto/gram-test/overlapping-pyramids.jl @@ -52,7 +52,7 @@ guess = hcat( L, history = Engine.realize_gram_gradient(gram, guess, scaled_tol = 0.01) L_pol, history_pol = Engine.realize_gram_newton(gram, L) =# -L, success, history = Engine.realize_gram(Float64.(gram), Float64.(guess)) +L, success, history = Engine.realize_gram(gram, guess) completed_gram = L'*Engine.Q*L println("Completed Gram matrix:\n") display(completed_gram) From b185fd4b83ff494e36967008ae2f207a190eb610 Mon Sep 17 00:00:00 2001 From: Aaron Fenyes Date: Mon, 15 Jul 2024 15:52:38 -0700 Subject: [PATCH 5/5] Switch to backtracking Newton's method in Optim This performs much better than the trust region Newton's method for the actual `circles-in-triangle` problem. (The trust region method performs better for the simplified problem produced by the conversion bug.) --- engine-proto/gram-test/Engine.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/engine-proto/gram-test/Engine.jl b/engine-proto/gram-test/Engine.jl index 0a4303b..10f4b02 100644 --- a/engine-proto/gram-test/Engine.jl +++ b/engine-proto/gram-test/Engine.jl @@ -303,7 +303,7 @@ function realize_gram_optim( optimize( loss, loss_grad!, loss_hess!, reshape(guess, total_dim), - NewtonTrustRegion() + Newton() ) end