From 25b09ebf925bc9a03f3f52014b7d0a8a3693b12e Mon Sep 17 00:00:00 2001
From: Aaron Fenyes <aaron.fenyes@fareycircles.ooo>
Date: Mon, 15 Jul 2024 11:32:04 -0700
Subject: [PATCH 1/5] Sketch backtracking Newton's method

This code is a mess, but I'm committing it to record a working state
before I start trying to clean up.
---
 engine-proto/gram-test/Engine.jl              | 237 +++++++++++++++++-
 engine-proto/gram-test/circles-in-triangle.jl |   9 +-
 .../gram-test/overlapping-pyramids.jl         |  11 +-
 .../gram-test/sphere-in-tetrahedron.jl        |   5 +-
 4 files changed, 254 insertions(+), 8 deletions(-)

diff --git a/engine-proto/gram-test/Engine.jl b/engine-proto/gram-test/Engine.jl
index e6f0d97..78d1409 100644
--- a/engine-proto/gram-test/Engine.jl
+++ b/engine-proto/gram-test/Engine.jl
@@ -1,8 +1,10 @@
 module Engine
 
 using LinearAlgebra
+using GenericLinearAlgebra
 using SparseArrays
 using Random
+using Optim
 
 export rand_on_shell, Q, DescentHistory, realize_gram
 
@@ -76,8 +78,11 @@ end
 struct DescentHistory{T}
   scaled_loss::Array{T}
   neg_grad::Array{Matrix{T}}
+  base_step::Array{Matrix{T}}
+  hess::Array{Hermitian{T, Matrix{T}}}
   slope::Array{T}
   stepsize::Array{T}
+  used_grad::Array{Bool}
   backoff_steps::Array{Int64}
   last_line_L::Array{Matrix{T}}
   last_line_loss::Array{T}
@@ -85,13 +90,16 @@ struct DescentHistory{T}
   function DescentHistory{T}(
     scaled_loss = Array{T}(undef, 0),
     neg_grad = Array{Matrix{T}}(undef, 0),
+    hess = Array{Hermitian{T, Matrix{T}}}(undef, 0),
+    base_step = Array{Matrix{T}}(undef, 0),
     slope = Array{T}(undef, 0),
     stepsize = Array{T}(undef, 0),
+    used_grad = Bool[],
     backoff_steps = Int64[],
     last_line_L = Array{Matrix{T}}(undef, 0),
     last_line_loss = Array{T}(undef, 0)
   ) where T
-    new(scaled_loss, neg_grad, slope, stepsize, backoff_steps, last_line_L, last_line_loss)
+    new(scaled_loss, neg_grad, hess, base_step, slope, stepsize, used_grad, backoff_steps, last_line_L, last_line_loss)
   end
 end
 
@@ -101,7 +109,7 @@ function realize_gram_gradient(
   gram::SparseMatrixCSC{T, <:Any},
   guess::Matrix{T};
   scaled_tol = 1e-30,
-  target_improvement = 0.5,
+  min_efficiency = 0.5,
   init_stepsize = 1.0,
   backoff = 0.9,
   max_descent_steps = 600,
@@ -152,7 +160,7 @@ function realize_gram_gradient(
       improvement = loss_last - loss
       push!(history.last_line_L, L)
       push!(history.last_line_loss, loss / scale_adjustment)
-      if improvement >= target_improvement * stepsize * slope
+      if improvement >= min_efficiency * stepsize * slope
         history.backoff_steps[end] = backoff_steps
         break
       end
@@ -201,7 +209,7 @@ function realize_gram_newton(
   scale_adjustment = sqrt(T(length(constrained)))
   tol = scale_adjustment * scaled_tol
   
-  # use newton's method
+  # use Newton's method
   L = copy(guess)
   for step in 0:max_steps
     # evaluate the loss function
@@ -229,8 +237,10 @@ function realize_gram_newton(
       deriv_grad = 4*Q*(-basis_mat*Δ_proj + L*neg_dΔ_proj)
       hess[:, (k-1)*element_dim + j] = reshape(deriv_grad, total_dim)
     end
+    hess = Hermitian(hess)
+    push!(history.hess, hess)
     
-    # compute the newton step
+    # compute the Newton step
     step = hess \ reshape(neg_grad, total_dim)
     L += rate * reshape(step, dims)
   end
@@ -239,4 +249,221 @@ function realize_gram_newton(
   L, history
 end
 
+LinearAlgebra.eigen!(A::Symmetric{BigFloat, Matrix{BigFloat}}; sortby::Nothing) =
+  eigen!(Hermitian(A))
+
+function realize_gram_optim(
+  gram::SparseMatrixCSC{T, <:Any},
+  guess::Matrix{T}
+) where T <: Number
+  # find the dimension of the search space
+  dims = size(guess)
+  element_dim, construction_dim = dims
+  total_dim = element_dim * construction_dim
+  
+  # list the constrained entries of the gram matrix
+  J, K, _ = findnz(gram)
+  constrained = zip(J, K)
+  
+  # scale the loss function
+  scale_adjustment = length(constrained)
+  
+  function loss(L_vec)
+    L = reshape(L_vec, dims)
+    Δ_proj = proj_diff(gram, L'*Q*L)
+    dot(Δ_proj, Δ_proj) / scale_adjustment
+  end
+  
+  function loss_grad!(storage, L_vec)
+    L = reshape(L_vec, dims)
+    Δ_proj = proj_diff(gram, L'*Q*L)
+    storage .= reshape(-4*Q*L*Δ_proj, total_dim) / scale_adjustment
+  end
+  
+  function loss_hess!(storage, L_vec)
+    L = reshape(L_vec, dims)
+    Δ_proj = proj_diff(gram, L'*Q*L)
+    indices = [(j, k) for k in 1:construction_dim for j in 1:element_dim]
+    for (j, k) in indices
+      basis_mat = basis_matrix(T, j, k, dims)
+      neg_dΔ = basis_mat'*Q*L + L'*Q*basis_mat
+      neg_dΔ_proj = proj_to_entries(neg_dΔ, constrained)
+      deriv_grad = 4*Q*(-basis_mat*Δ_proj + L*neg_dΔ_proj) / scale_adjustment
+      storage[:, (k-1)*element_dim + j] = reshape(deriv_grad, total_dim)
+    end
+  end
+  
+  optimize(
+    loss, loss_grad!, loss_hess!,
+    reshape(guess, total_dim),
+    NewtonTrustRegion()
+  )
+end
+
+# seek a matrix `L` for which `L'QL` matches the sparse matrix `gram` at every
+# explicit entry of `gram`. use gradient descent starting from `guess`
+function realize_gram(
+  gram::SparseMatrixCSC{T, <:Any},
+  guess::Matrix{T};
+  scaled_tol = 1e-30,
+  min_efficiency = 0.5,
+  init_rate = 1.0,
+  backoff = 0.9,
+  reg_scale = 1.1,
+  max_descent_steps = 200,
+  max_backoff_steps = 110
+) where T <: Number
+  # start history
+  history = DescentHistory{T}()
+  
+  # find the dimension of the search space
+  dims = size(guess)
+  element_dim, construction_dim = dims
+  total_dim = element_dim * construction_dim
+  
+  # list the constrained entries of the gram matrix
+  J, K, _ = findnz(gram)
+  constrained = zip(J, K)
+  
+  # scale the tolerance
+  scale_adjustment = sqrt(T(length(constrained)))
+  tol = scale_adjustment * scaled_tol
+  
+  # initialize variables
+  grad_rate = init_rate
+  L = copy(guess)
+  
+  # use Newton's method with backtracking and gradient descent backup
+  Δ_proj = proj_diff(gram, L'*Q*L)
+  loss = dot(Δ_proj, Δ_proj)
+  for step in 1:max_descent_steps
+    # stop if the loss is tolerably low
+    if loss < tol
+      break
+    end
+    
+    # find the negative gradient of loss function
+    neg_grad = 4*Q*L*Δ_proj
+    
+    # find the negative Hessian of the loss function
+    hess = Matrix{T}(undef, total_dim, total_dim)
+    indices = [(j, k) for k in 1:construction_dim for j in 1:element_dim]
+    for (j, k) in indices
+      basis_mat = basis_matrix(T, j, k, dims)
+      neg_dΔ = basis_mat'*Q*L + L'*Q*basis_mat
+      neg_dΔ_proj = proj_to_entries(neg_dΔ, constrained)
+      deriv_grad = 4*Q*(-basis_mat*Δ_proj + L*neg_dΔ_proj)
+      hess[:, (k-1)*element_dim + j] = reshape(deriv_grad, total_dim)
+    end
+    hess = Hermitian(hess)
+    push!(history.hess, hess)
+    
+    # choose a base step: the Newton step if the Hessian is non-singular, and
+    # the gradient descent direction otherwise
+    #=
+    sing = false
+    base_step = try
+      reshape(hess \ reshape(neg_grad, total_dim), dims)
+    catch ex
+      if isa(ex, SingularException)
+        sing = true
+        normalize(neg_grad)
+      else
+        throw(ex)
+      end
+    end
+    =#
+    #=
+    if !sing
+      rate = one(T)
+    end
+    =#
+    #=
+    if cond(Float64.(hess)) < 1e5
+      sing = false
+      base_step = reshape(hess \ reshape(neg_grad, total_dim), dims)
+    else
+      sing = true
+      base_step = normalize(neg_grad)
+    end
+    =#
+    #=
+    if cond(Float64.(hess)) > 1e3
+      sing = true
+      hess += big"1e-5"*I
+    else
+      sing = false
+    end
+    base_step = reshape(hess \ reshape(neg_grad, total_dim), dims)
+    =#
+    min_eigval = minimum(eigvals(hess))
+    if min_eigval < 0
+      hess -= reg_scale * min_eigval * I
+    end
+    push!(history.used_grad, false)
+    base_step = reshape(hess \ reshape(neg_grad, total_dim), dims)
+    push!(history.base_step, base_step)
+    #=
+    push!(history.used_grad, sing)
+    =#
+    
+    # store the current position, loss, and slope
+    L_last = L
+    loss_last = loss
+    push!(history.scaled_loss, loss / scale_adjustment)
+    push!(history.neg_grad, neg_grad)
+    push!(history.slope, norm(neg_grad))
+    
+    # find a good step size using backtracking line search
+    push!(history.stepsize, 0)
+    push!(history.backoff_steps, max_backoff_steps)
+    empty!(history.last_line_L)
+    empty!(history.last_line_loss)
+    rate = one(T)
+    for backoff_steps in 0:max_backoff_steps
+      history.stepsize[end] = rate
+      
+      # try Newton step, but not on the first step. doing at least one step of
+      # gradient descent seems to help prevent getting stuck, for some reason?
+      if step > 0
+        L = L_last + rate * base_step
+        Δ_proj = proj_diff(gram, L'*Q*L)
+        loss = dot(Δ_proj, Δ_proj)
+        improvement = loss_last - loss
+        push!(history.last_line_L, L)
+        push!(history.last_line_loss, loss / scale_adjustment)
+        if improvement >= min_efficiency * rate * dot(neg_grad, base_step)
+          history.backoff_steps[end] = backoff_steps
+          break
+        end
+      end
+      
+      # try gradient descent step
+      slope = norm(neg_grad)
+      dir = neg_grad / slope
+      L = L_last + rate * grad_rate * dir
+      Δ_proj = proj_diff(gram, L'*Q*L)
+      loss = dot(Δ_proj, Δ_proj)
+      improvement = loss_last - loss
+      if improvement >= min_efficiency * rate * grad_rate * slope
+        grad_rate *= rate
+        history.used_grad[end] = true
+        history.backoff_steps[end] = backoff_steps
+        break
+      end
+      
+      rate *= backoff
+    end
+    
+    # [DEBUG] if we've hit a wall, quit
+    if history.backoff_steps[end] == max_backoff_steps
+      return L_last, history
+    end
+  end
+  
+  # return the factorization and its history
+  push!(history.scaled_loss, loss / scale_adjustment)
+  L, history
+end
+
 end
\ No newline at end of file
diff --git a/engine-proto/gram-test/circles-in-triangle.jl b/engine-proto/gram-test/circles-in-triangle.jl
index b031711..2f6caa7 100644
--- a/engine-proto/gram-test/circles-in-triangle.jl
+++ b/engine-proto/gram-test/circles-in-triangle.jl
@@ -81,16 +81,23 @@ guess = hcat(
 =#
 
 # complete the gram matrix using gradient descent followed by Newton's method
+#=
 L, history = Engine.realize_gram_gradient(gram, guess, scaled_tol = 0.01)
 L_pol, history_pol = Engine.realize_gram_newton(gram, L, rate = 0.3, scaled_tol = 1e-9)
 L_pol2, history_pol2 = Engine.realize_gram_newton(gram, L_pol)
+=#
+L, history = Engine.realize_gram(Float64.(gram), Float64.(guess))
 completed_gram = L'*Engine.Q*L
 println("Completed Gram matrix:\n")
 display(completed_gram)
+#=
 println(
   "\nSteps: ",
   size(history.scaled_loss, 1),
   " + ", size(history_pol.scaled_loss, 1),
   " + ", size(history_pol2.scaled_loss, 1)
 )
-println("Loss: ", history_pol2.scaled_loss[end], "\n")
\ No newline at end of file
+println("Loss: ", history_pol2.scaled_loss[end], "\n")
+=#
+println("\nSteps: ", size(history.scaled_loss, 1))
+println("Loss: ", history.scaled_loss[end], "\n")
\ No newline at end of file
diff --git a/engine-proto/gram-test/overlapping-pyramids.jl b/engine-proto/gram-test/overlapping-pyramids.jl
index 51606e1..ae6fb88 100644
--- a/engine-proto/gram-test/overlapping-pyramids.jl
+++ b/engine-proto/gram-test/overlapping-pyramids.jl
@@ -47,17 +47,25 @@ guess = hcat(
   Engine.rand_on_shell(fill(BigFloat(-1), 2))
 )
 
-# complete the gram matrix using gradient descent followed by Newton's method
+# complete the gram matrix
+#=
 L, history = Engine.realize_gram_gradient(gram, guess, scaled_tol = 0.01)
 L_pol, history_pol = Engine.realize_gram_newton(gram, L)
+=#
+L, history = Engine.realize_gram(Float64.(gram), Float64.(guess))
 completed_gram = L'*Engine.Q*L
 println("Completed Gram matrix:\n")
 display(completed_gram)
+#=
 println("\nSteps: ", size(history.scaled_loss, 1), " + ", size(history_pol.scaled_loss, 1))
 println("Loss: ", history_pol.scaled_loss[end], "\n")
+=#
+println("\nSteps: ", size(history.scaled_loss, 1))
+println("Loss: ", history.scaled_loss[end], "\n")
 
 # === algebraic check ===
 
+#=
 R, gens = polynomial_ring(AbstractAlgebra.Rationals{BigInt}(), ["x", "t₁", "t₂", "t₃"])
 x = gens[1]
 t = gens[2:4]
@@ -85,3 +93,4 @@ x_constraint = 25//16 * to_univariate(S, evaluate(rank_constraints[1], [2], [ind
 t₂_constraint = 25//16 * to_univariate(S, evaluate(rank_constraints[3], [2], [indep_val]))
 x_vals = PolynomialRoots.roots(x_constraint.coeffs)
 t₂_vals = PolynomialRoots.roots(t₂_constraint.coeffs)
+=#
\ No newline at end of file
diff --git a/engine-proto/gram-test/sphere-in-tetrahedron.jl b/engine-proto/gram-test/sphere-in-tetrahedron.jl
index dc5588c..6b428cb 100644
--- a/engine-proto/gram-test/sphere-in-tetrahedron.jl
+++ b/engine-proto/gram-test/sphere-in-tetrahedron.jl
@@ -33,8 +33,11 @@ guess = sqrt(1/BigFloat(3)) * BigFloat[
   1  1  1  1  1
 ] + 0.2*Engine.rand_on_shell(fill(BigFloat(-1), 5))
 
-# complete the gram matrix using Newton's method
+# complete the gram matrix
+#=
 L, history = Engine.realize_gram_newton(gram, guess)
+=#
+L, history = Engine.realize_gram(gram, guess, max_descent_steps = 50)
 completed_gram = L'*Engine.Q*L
 println("Completed Gram matrix:\n")
 display(completed_gram)

From 7b3efbc385c5561ee19dfc6b3322d7e79748d65e Mon Sep 17 00:00:00 2001
From: Aaron Fenyes <aaron.fenyes@fareycircles.ooo>
Date: Mon, 15 Jul 2024 13:15:15 -0700
Subject: [PATCH 2/5] Clean up backtracking gradient descent code

Drop experimental singularity handling strategies. Reduce the default
tolerance to within 64-bit floating point precision. Report success.
---
 engine-proto/gram-test/Engine.jl              | 96 ++++---------------
 engine-proto/gram-test/circles-in-triangle.jl |  9 +-
 .../gram-test/overlapping-pyramids.jl         |  9 +-
 .../gram-test/sphere-in-tetrahedron.jl        |  9 +-
 4 files changed, 41 insertions(+), 82 deletions(-)

diff --git a/engine-proto/gram-test/Engine.jl b/engine-proto/gram-test/Engine.jl
index 78d1409..e52982d 100644
--- a/engine-proto/gram-test/Engine.jl
+++ b/engine-proto/gram-test/Engine.jl
@@ -6,7 +6,9 @@ using SparseArrays
 using Random
 using Optim
 
-export rand_on_shell, Q, DescentHistory, realize_gram
+export
+  rand_on_shell, Q, DescentHistory,
+  realize_gram_gradient, realize_gram_newton, realize_gram_optim, realize_gram
 
 # === guessing ===
 
@@ -82,7 +84,7 @@ struct DescentHistory{T}
   hess::Array{Hermitian{T, Matrix{T}}}
   slope::Array{T}
   stepsize::Array{T}
-  used_grad::Array{Bool}
+  positive::Array{Bool}
   backoff_steps::Array{Int64}
   last_line_L::Array{Matrix{T}}
   last_line_loss::Array{T}
@@ -94,12 +96,12 @@ struct DescentHistory{T}
     base_step = Array{Matrix{T}}(undef, 0),
     slope = Array{T}(undef, 0),
     stepsize = Array{T}(undef, 0),
-    used_grad = Bool[],
+    positive = Bool[],
     backoff_steps = Int64[],
     last_line_L = Array{Matrix{T}}(undef, 0),
     last_line_loss = Array{T}(undef, 0)
   ) where T
-    new(scaled_loss, neg_grad, hess, base_step, slope, stepsize, used_grad, backoff_steps, last_line_L, last_line_loss)
+    new(scaled_loss, neg_grad, hess, base_step, slope, stepsize, positive, backoff_steps, last_line_L, last_line_loss)
   end
 end
 
@@ -305,7 +307,7 @@ end
 function realize_gram(
   gram::SparseMatrixCSC{T, <:Any},
   guess::Matrix{T};
-  scaled_tol = 1e-30,
+  scaled_tol = 1e-16,
   min_efficiency = 0.5,
   init_rate = 1.0,
   backoff = 0.9,
@@ -358,54 +360,14 @@ function realize_gram(
     hess = Hermitian(hess)
     push!(history.hess, hess)
     
-    # choose a base step: the Newton step if the Hessian is non-singular, and
-    # the gradient descent direction otherwise
-    #=
-    sing = false
-    base_step = try
-      reshape(hess \ reshape(neg_grad, total_dim), dims)
-    catch ex
-      if isa(ex, SingularException)
-        sing = true
-        normalize(neg_grad)
-      else
-        throw(ex)
-      end
-    end
-    =#
-    #=
-    if !sing
-      rate = one(T)
-    end
-    =#
-    #=
-    if cond(Float64.(hess)) < 1e5
-      sing = false
-      base_step = reshape(hess \ reshape(neg_grad, total_dim), dims)
-    else
-      sing = true
-      base_step = normalize(neg_grad)
-    end
-    =#
-    #=
-    if cond(Float64.(hess)) > 1e3
-      sing = true
-      hess += big"1e-5"*I
-    else
-      sing = false
-    end
-    base_step = reshape(hess \ reshape(neg_grad, total_dim), dims)
-    =#
+    # regularize the Hessian
     min_eigval = minimum(eigvals(hess))
-    if min_eigval < 0
+    push!(history.positive, min_eigval > 0)
+    if min_eigval <= 0
       hess -= reg_scale * min_eigval * I
     end
-    push!(history.used_grad, false)
     base_step = reshape(hess \ reshape(neg_grad, total_dim), dims)
     push!(history.base_step, base_step)
-    #=
-    push!(history.used_grad, sing)
-    =#
     
     # store the current position, loss, and slope
     L_last = L
@@ -420,50 +382,32 @@ function realize_gram(
     empty!(history.last_line_L)
     empty!(history.last_line_loss)
     rate = one(T)
+    step_success = false
     for backoff_steps in 0:max_backoff_steps
       history.stepsize[end] = rate
-      
-      # try Newton step, but not on the first step. doing at least one step of
-      # gradient descent seems to help prevent getting stuck, for some reason?
-      if step > 0
-        L = L_last + rate * base_step
-        Δ_proj = proj_diff(gram, L'*Q*L)
-        loss = dot(Δ_proj, Δ_proj)
-        improvement = loss_last - loss
-        push!(history.last_line_L, L)
-        push!(history.last_line_loss, loss / scale_adjustment)
-        if improvement >= min_efficiency * rate * dot(neg_grad, base_step)
-          history.backoff_steps[end] = backoff_steps
-          break
-        end
-      end
-      
-      # try gradient descent step
-      slope = norm(neg_grad)
-      dir = neg_grad / slope
-      L = L_last + rate * grad_rate * dir
+      L = L_last + rate * base_step
       Δ_proj = proj_diff(gram, L'*Q*L)
       loss = dot(Δ_proj, Δ_proj)
       improvement = loss_last - loss
-      if improvement >= min_efficiency * rate * grad_rate * slope
-        grad_rate *= rate
-        history.used_grad[end] = true
+      push!(history.last_line_L, L)
+      push!(history.last_line_loss, loss / scale_adjustment)
+      if improvement >= min_efficiency * rate * dot(neg_grad, base_step)
         history.backoff_steps[end] = backoff_steps
+        step_success = true
         break
       end
-      
       rate *= backoff
     end
     
-    # [DEBUG] if we've hit a wall, quit
-    if history.backoff_steps[end] == max_backoff_steps
-      return L_last, history
+    # if we've hit a wall, quit
+    if !step_success
+      return L_last, false, history
     end
   end
   
   # return the factorization and its history
   push!(history.scaled_loss, loss / scale_adjustment)
-  L, history
+  L, true, history
 end
 
 end
\ No newline at end of file
diff --git a/engine-proto/gram-test/circles-in-triangle.jl b/engine-proto/gram-test/circles-in-triangle.jl
index 2f6caa7..a919b8e 100644
--- a/engine-proto/gram-test/circles-in-triangle.jl
+++ b/engine-proto/gram-test/circles-in-triangle.jl
@@ -86,7 +86,7 @@ L, history = Engine.realize_gram_gradient(gram, guess, scaled_tol = 0.01)
 L_pol, history_pol = Engine.realize_gram_newton(gram, L, rate = 0.3, scaled_tol = 1e-9)
 L_pol2, history_pol2 = Engine.realize_gram_newton(gram, L_pol)
 =#
-L, history = Engine.realize_gram(Float64.(gram), Float64.(guess))
+L, success, history = Engine.realize_gram(Float64.(gram), Float64.(guess))
 completed_gram = L'*Engine.Q*L
 println("Completed Gram matrix:\n")
 display(completed_gram)
@@ -99,5 +99,10 @@ println(
 )
 println("Loss: ", history_pol2.scaled_loss[end], "\n")
 =#
-println("\nSteps: ", size(history.scaled_loss, 1))
+if success
+  println("\nTarget accuracy achieved!")
+else
+  println("\nFailed to reach target accuracy")
+end
+println("Steps: ", size(history.scaled_loss, 1))
 println("Loss: ", history.scaled_loss[end], "\n")
\ No newline at end of file
diff --git a/engine-proto/gram-test/overlapping-pyramids.jl b/engine-proto/gram-test/overlapping-pyramids.jl
index ae6fb88..3c16e35 100644
--- a/engine-proto/gram-test/overlapping-pyramids.jl
+++ b/engine-proto/gram-test/overlapping-pyramids.jl
@@ -52,7 +52,7 @@ guess = hcat(
 L, history = Engine.realize_gram_gradient(gram, guess, scaled_tol = 0.01)
 L_pol, history_pol = Engine.realize_gram_newton(gram, L)
 =#
-L, history = Engine.realize_gram(Float64.(gram), Float64.(guess))
+L, success, history = Engine.realize_gram(Float64.(gram), Float64.(guess))
 completed_gram = L'*Engine.Q*L
 println("Completed Gram matrix:\n")
 display(completed_gram)
@@ -60,7 +60,12 @@ display(completed_gram)
 println("\nSteps: ", size(history.scaled_loss, 1), " + ", size(history_pol.scaled_loss, 1))
 println("Loss: ", history_pol.scaled_loss[end], "\n")
 =#
-println("\nSteps: ", size(history.scaled_loss, 1))
+if success
+  println("\nTarget accuracy achieved!")
+else
+  println("\nFailed to reach target accuracy")
+end
+println("Steps: ", size(history.scaled_loss, 1))
 println("Loss: ", history.scaled_loss[end], "\n")
 
 # === algebraic check ===
diff --git a/engine-proto/gram-test/sphere-in-tetrahedron.jl b/engine-proto/gram-test/sphere-in-tetrahedron.jl
index 6b428cb..273d3ad 100644
--- a/engine-proto/gram-test/sphere-in-tetrahedron.jl
+++ b/engine-proto/gram-test/sphere-in-tetrahedron.jl
@@ -37,9 +37,14 @@ guess = sqrt(1/BigFloat(3)) * BigFloat[
 #=
 L, history = Engine.realize_gram_newton(gram, guess)
 =#
-L, history = Engine.realize_gram(gram, guess, max_descent_steps = 50)
+L, success, history = Engine.realize_gram(gram, guess)
 completed_gram = L'*Engine.Q*L
 println("Completed Gram matrix:\n")
 display(completed_gram)
-println("\nSteps: ", size(history.scaled_loss, 1))
+if success
+  println("\nTarget accuracy achieved!")
+else
+  println("\nFailed to reach target accuracy")
+end
+println("Steps: ", size(history.scaled_loss, 1))
 println("Loss: ", history.scaled_loss[end], "\n")
\ No newline at end of file

From 53d8c380479546ed2b0cf2735502ba4afb671e1c Mon Sep 17 00:00:00 2001
From: Aaron Fenyes <aaron.fenyes@fareycircles.ooo>
Date: Mon, 15 Jul 2024 14:08:57 -0700
Subject: [PATCH 3/5] Preserve explicit zeros in Gram matrix conversion

In previous commits, the `circles-in-triangle` example converged much
more slowly in BigFloat precision than in Float64 precision. This
turned out to be a sign of a bug in the Float64 computation: converting
the Gram matrix using `Float64.()` dropped the explicit zeros, removing
many constraints and making the problem much easier to solve. This
commit corrects the Gram matrix conversion. The Float64 search now
solves the same problem as the BigFloat search, with comparable
performance.
---
 engine-proto/gram-test/Engine.jl              | 5 +++++
 engine-proto/gram-test/circles-in-triangle.jl | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/engine-proto/gram-test/Engine.jl b/engine-proto/gram-test/Engine.jl
index e52982d..01ca9a2 100644
--- a/engine-proto/gram-test/Engine.jl
+++ b/engine-proto/gram-test/Engine.jl
@@ -254,6 +254,11 @@ end
 LinearAlgebra.eigen!(A::Symmetric{BigFloat, Matrix{BigFloat}}; sortby::Nothing) =
   eigen!(Hermitian(A))
 
+function convertnz(type, mat)
+  J, K, values = findnz(mat)
+  sparse(J, K, type.(values))
+end
+
 function realize_gram_optim(
   gram::SparseMatrixCSC{T, <:Any},
   guess::Matrix{T}
diff --git a/engine-proto/gram-test/circles-in-triangle.jl b/engine-proto/gram-test/circles-in-triangle.jl
index a919b8e..aa24c0f 100644
--- a/engine-proto/gram-test/circles-in-triangle.jl
+++ b/engine-proto/gram-test/circles-in-triangle.jl
@@ -86,7 +86,7 @@ L, history = Engine.realize_gram_gradient(gram, guess, scaled_tol = 0.01)
 L_pol, history_pol = Engine.realize_gram_newton(gram, L, rate = 0.3, scaled_tol = 1e-9)
 L_pol2, history_pol2 = Engine.realize_gram_newton(gram, L_pol)
 =#
-L, success, history = Engine.realize_gram(Float64.(gram), Float64.(guess))
+L, success, history = Engine.realize_gram(Engine.convertnz(Float64, gram), Float64.(guess))
 completed_gram = L'*Engine.Q*L
 println("Completed Gram matrix:\n")
 display(completed_gram)

From 94e0d321d5175f15e6a0a1f356ec8eff0c9b4b4a Mon Sep 17 00:00:00 2001
From: Aaron Fenyes <aaron.fenyes@fareycircles.ooo>
Date: Mon, 15 Jul 2024 14:31:30 -0700
Subject: [PATCH 4/5] Switch back to BigFloat precision in examples

---
 engine-proto/gram-test/Engine.jl               | 2 +-
 engine-proto/gram-test/circles-in-triangle.jl  | 2 +-
 engine-proto/gram-test/overlapping-pyramids.jl | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/engine-proto/gram-test/Engine.jl b/engine-proto/gram-test/Engine.jl
index 01ca9a2..0a4303b 100644
--- a/engine-proto/gram-test/Engine.jl
+++ b/engine-proto/gram-test/Engine.jl
@@ -312,7 +312,7 @@ end
 function realize_gram(
   gram::SparseMatrixCSC{T, <:Any},
   guess::Matrix{T};
-  scaled_tol = 1e-16,
+  scaled_tol = 1e-30,
   min_efficiency = 0.5,
   init_rate = 1.0,
   backoff = 0.9,
diff --git a/engine-proto/gram-test/circles-in-triangle.jl b/engine-proto/gram-test/circles-in-triangle.jl
index aa24c0f..12975c2 100644
--- a/engine-proto/gram-test/circles-in-triangle.jl
+++ b/engine-proto/gram-test/circles-in-triangle.jl
@@ -86,7 +86,7 @@ L, history = Engine.realize_gram_gradient(gram, guess, scaled_tol = 0.01)
 L_pol, history_pol = Engine.realize_gram_newton(gram, L, rate = 0.3, scaled_tol = 1e-9)
 L_pol2, history_pol2 = Engine.realize_gram_newton(gram, L_pol)
 =#
-L, success, history = Engine.realize_gram(Engine.convertnz(Float64, gram), Float64.(guess))
+L, success, history = Engine.realize_gram(gram, guess)
 completed_gram = L'*Engine.Q*L
 println("Completed Gram matrix:\n")
 display(completed_gram)
diff --git a/engine-proto/gram-test/overlapping-pyramids.jl b/engine-proto/gram-test/overlapping-pyramids.jl
index 3c16e35..ee4c1fc 100644
--- a/engine-proto/gram-test/overlapping-pyramids.jl
+++ b/engine-proto/gram-test/overlapping-pyramids.jl
@@ -52,7 +52,7 @@ guess = hcat(
 L, history = Engine.realize_gram_gradient(gram, guess, scaled_tol = 0.01)
 L_pol, history_pol = Engine.realize_gram_newton(gram, L)
 =#
-L, success, history = Engine.realize_gram(Float64.(gram), Float64.(guess))
+L, success, history = Engine.realize_gram(gram, guess)
 completed_gram = L'*Engine.Q*L
 println("Completed Gram matrix:\n")
 display(completed_gram)

From b185fd4b83ff494e36967008ae2f207a190eb610 Mon Sep 17 00:00:00 2001
From: Aaron Fenyes <aaron.fenyes@fareycircles.ooo>
Date: Mon, 15 Jul 2024 15:52:38 -0700
Subject: [PATCH 5/5] Switch to backtracking Newton's method in Optim

This performs much better than the trust region Newton's method for the
actual `circles-in-triangle` problem. (The trust region method performs
better for the simplified problem produced by the conversion bug.)
---
 engine-proto/gram-test/Engine.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/engine-proto/gram-test/Engine.jl b/engine-proto/gram-test/Engine.jl
index 0a4303b..10f4b02 100644
--- a/engine-proto/gram-test/Engine.jl
+++ b/engine-proto/gram-test/Engine.jl
@@ -303,7 +303,7 @@ function realize_gram_optim(
   optimize(
     loss, loss_grad!, loss_hess!,
     reshape(guess, total_dim),
-    NewtonTrustRegion()
+    Newton()
   )
 end