Use Newton's method for polishing

Correct improvement threshold by using unit step
Our formula for the improvement theshold works when the step size is an absolute distance. However, in commit `4d5ea06`, the step size was measured relative to the current gradient instead. This commit scales the base step to unit length, so now the step size really is an absolute distance.
2024-07-11 13:43:52 -07:00 · 2024-07-10 23:31:44 -07:00 · 2024-07-09 15:00:13 -07:00 · 2024-07-09 14:10:23 -07:00 · 2024-07-09 14:01:30 -07:00 · 2024-07-09 14:00:24 -07:00
5 changed files with 241 additions and 30 deletions
--- a/engine-proto/gram-test/Engine.jl
+++ b/engine-proto/gram-test/Engine.jl
@ -51,11 +51,21 @@ end
 # the Lorentz form
 Q = diagm([1, 1, 1, 1, -1])
 # project a matrix onto the subspace of matrices whose entries vanish at the
 # given indices
 function proj_to_entries(mat, indices)
  result = zeros(size(mat))
  for (j, k) in indices
    result[j, k] = mat[j, k]
  end
  result
 end
 # the difference between the matrices `target` and `attempt`, projected onto the
 # subspace of matrices whose entries vanish at each empty index of `target`
 function proj_diff(target::SparseMatrixCSC{T, <:Any}, attempt::Matrix{T}) where T
  J, K, values = findnz(target)
-  result = zeros(size(target)...)
+  result = zeros(size(target))
  for (j, k, val) in zip(J, K, values)
    result[j, k] = val - attempt[j, k]
  end
@ -65,23 +75,29 @@ end
 # a type for keeping track of gradient descent history
 struct DescentHistory{T}
  scaled_loss::Array{T}
  neg_grad::Array{Matrix{T}}
  slope::Array{T}
  stepsize::Array{T}
  backoff_steps::Array{Int64}
  last_line_L::Array{Matrix{T}}
  last_line_loss::Array{T}
  function DescentHistory{T}(
    scaled_loss = Array{T}(undef, 0),
    neg_grad = Array{Matrix{T}}(undef, 0),
    slope = Array{T}(undef, 0),
    stepsize = Array{T}(undef, 0),
-    backoff_steps = Int64[]
+    backoff_steps = Int64[],
    last_line_L = Array{Matrix{T}}(undef, 0),
    last_line_loss = Array{T}(undef, 0)
  ) where T
-    new(scaled_loss, slope, stepsize, backoff_steps)
+    new(scaled_loss, neg_grad, slope, stepsize, backoff_steps, last_line_L, last_line_loss)
  end
 end
 # seek a matrix `L` for which `L'QL` matches the sparse matrix `gram` at every
 # explicit entry of `gram`. use gradient descent starting from `guess`
-function realize_gram(
+function realize_gram_gradient(
  gram::SparseMatrixCSC{T, <:Any},
  guess::Matrix{T};
  scaled_tol = 1e-30,
@ -104,8 +120,8 @@ function realize_gram(
  # do gradient descent
  Δ_proj = proj_diff(gram, L'*Q*L)
-  loss = norm(Δ_proj)
+  loss = dot(Δ_proj, Δ_proj)
-  for step in 1:max_descent_steps
+  for _ in 1:max_descent_steps
    # stop if the loss is tolerably low
    if loss < tol
      break
@ -114,28 +130,39 @@ function realize_gram(
    # find negative gradient of loss function
    neg_grad = 4*Q*L*Δ_proj
    slope = norm(neg_grad)
    dir = neg_grad / slope
    # store current position, loss, and slope
    L_last = L
    loss_last = loss
    push!(history.scaled_loss, loss / scale_adjustment)
    push!(history.neg_grad, neg_grad)
    push!(history.slope, slope)
    # find a good step size using backtracking line search
    push!(history.stepsize, 0)
    push!(history.backoff_steps, max_backoff_steps)
    empty!(history.last_line_L)
    empty!(history.last_line_loss)
    for backoff_steps in 0:max_backoff_steps
      history.stepsize[end] = stepsize
-      L = L_last + stepsize * neg_grad
+      L = L_last + stepsize * dir
      Δ_proj = proj_diff(gram, L'*Q*L)
-      loss = norm(Δ_proj)
+      loss = dot(Δ_proj, Δ_proj)
      improvement = loss_last - loss
      push!(history.last_line_L, L)
      push!(history.last_line_loss, loss / scale_adjustment)
      if improvement >= target_improvement * stepsize * slope
        history.backoff_steps[end] = backoff_steps
        break
      end
      stepsize *= backoff
    end
    # [DEBUG] if we've hit a wall, quit
    if history.backoff_steps[end] == max_backoff_steps
      break
    end
  end
  # return the factorization and its history
@ -143,4 +170,73 @@ function realize_gram(
  L, history
 end
 function basis_matrix(::Type{T}, j, k, dims) where T
  result = zeros(T, dims)
  result[j, k] = one(T)
  result
 end
 # seek a matrix `L` for which `L'QL` matches the sparse matrix `gram` at every
 # explicit entry of `gram`. use Newton's method starting from `guess`
 function realize_gram_newton(
  gram::SparseMatrixCSC{T, <:Any},
  guess::Matrix{T};
  scaled_tol = 1e-30,
  rate = 1,
  max_steps = 100
 ) where T <: Number
  # start history
  history = DescentHistory{T}()
  # find the dimension of the search space
  dims = size(guess)
  element_dim, construction_dim = dims
  total_dim = element_dim * construction_dim
  # list the constrained entries of the gram matrix
  J, K, _ = findnz(gram)
  constrained = zip(J, K)
  # scale the tolerance
  scale_adjustment = sqrt(T(length(constrained)))
  tol = scale_adjustment * scaled_tol
  # use newton's method
  L = copy(guess)
  for step in 0:max_steps
    # evaluate the loss function
    Δ_proj = proj_diff(gram, L'*Q*L)
    loss = dot(Δ_proj, Δ_proj)
    # store the current loss
    push!(history.scaled_loss, loss / scale_adjustment)
    # stop if the loss is tolerably low
    if loss < tol || step > max_steps
      break
    end
    # find the negative gradient of loss function
    neg_grad = 4*Q*L*Δ_proj
    # find the negative Hessian of the loss function
    hess = Matrix{T}(undef, total_dim, total_dim)
    indices = [(j, k) for k in 1:construction_dim for j in 1:element_dim]
    for (j, k) in indices
      basis_mat = basis_matrix(T, j, k, dims)
      neg_dΔ = basis_mat'*Q*L + L'*Q*basis_mat
      neg_dΔ_proj = proj_to_entries(neg_dΔ, constrained)
      deriv_grad = 4*Q*(-basis_mat*Δ_proj + L*neg_dΔ_proj)
      hess[:, (k-1)*element_dim + j] = reshape(deriv_grad, total_dim)
    end
    # compute the newton step
    step = hess \ reshape(neg_grad, total_dim)
    L += rate * reshape(step, dims)
  end
  # return the factorization and its history
  L, history
 end
 end
--- a/engine-proto/gram-test/basin-shapes.jl
+++ b/engine-proto/gram-test/basin-shapes.jl
@ -0,0 +1,99 @@
 include("Engine.jl")
 using LinearAlgebra
 using SparseArrays
 function sphere_in_tetrahedron_shape()
  # initialize the partial gram matrix for a sphere inscribed in a regular
  # tetrahedron
  J = Int64[]
  K = Int64[]
  values = BigFloat[]
  for j in 1:5
    for k in 1:5
      push!(J, j)
      push!(K, k)
      if j == k
        push!(values, 1)
      elseif (j <= 4 && k <= 4)
        push!(values, -1/BigFloat(3))
      else
        push!(values, -1)
      end
    end
  end
  gram = sparse(J, K, values)
  # plot loss along a slice
  loss_lin = []
  loss_sq = []
  mesh = range(0.9, 1.1, 101)
  for t in mesh
    L = hcat(
      Engine.plane(normalize(BigFloat[ 1,  1,  1]), BigFloat(1)),
      Engine.plane(normalize(BigFloat[ 1, -1, -1]), BigFloat(1)),
      Engine.plane(normalize(BigFloat[-1,  1, -1]), BigFloat(1)),
      Engine.plane(normalize(BigFloat[-1, -1,  1]), BigFloat(1)),
      Engine.sphere(BigFloat[0, 0, 0], BigFloat(t))
    )
    Δ_proj = Engine.proj_diff(gram, L'*Engine.Q*L)
    push!(loss_lin, norm(Δ_proj))
    push!(loss_sq, dot(Δ_proj, Δ_proj))
  end
  mesh, loss_lin, loss_sq
 end
 function circles_in_triangle_shape()
  # initialize the partial gram matrix for a sphere inscribed in a regular
  # tetrahedron
  J = Int64[]
  K = Int64[]
  values = BigFloat[]
  for j in 1:8
    for k in 1:8
      filled = false
      if j == k
        push!(values, 1)
        filled = true
      elseif (j == 1 || k == 1)
        push!(values, 0)
        filled = true
      elseif (j == 2 || k == 2)
        push!(values, -1)
        filled = true
      end
      #=elseif (j <= 5 && j != 2 && k == 9 || k == 9 && k <= 5 && k != 2)
        push!(values, 0)
        filled = true
      end=#
      if filled
        push!(J, j)
        push!(K, k)
      end
    end
  end
  append!(J, [6, 4, 6, 5, 7, 5, 7, 3, 8, 3, 8, 4])
  append!(K, [4, 6, 5, 6, 5, 7, 3, 7, 3, 8, 4, 8])
  append!(values, fill(-1, 12))
  # plot loss along a slice
  loss_lin = []
  loss_sq = []
  mesh = range(0.99, 1.01, 101)
  for t in mesh
    L = hcat(
      Engine.plane(BigFloat[0, 0, 1], BigFloat(0)),
      Engine.sphere(BigFloat[0, 0, 0], BigFloat(t)),
      Engine.plane(BigFloat[1, 0, 0], BigFloat(1)),
      Engine.plane(BigFloat[cos(2pi/3), sin(2pi/3), 0], BigFloat(1)),
      Engine.plane(BigFloat[cos(-2pi/3), sin(-2pi/3), 0], BigFloat(1)),
      Engine.sphere(4//3*BigFloat[-1, 0, 0], BigFloat(1//3)),
      Engine.sphere(4//3*BigFloat[cos(-pi/3), sin(-pi/3), 0], BigFloat(1//3)),
      Engine.sphere(4//3*BigFloat[cos(pi/3), sin(pi/3), 0], BigFloat(1//3))
    )
    Δ_proj = Engine.proj_diff(gram, L'*Engine.Q*L)
    push!(loss_lin, norm(Δ_proj))
    push!(loss_sq, dot(Δ_proj, Δ_proj))
  end
  mesh, loss_lin, loss_sq
 end
--- a/engine-proto/gram-test/circles-in-triangle.jl
+++ b/engine-proto/gram-test/circles-in-triangle.jl
@ -1,20 +1,28 @@
 include("Engine.jl")
 using SparseArrays
 using AbstractAlgebra
 using PolynomialRoots
 # initialize the partial gram matrix for a sphere inscribed in a regular
 # tetrahedron
 J = Int64[]
 K = Int64[]
 values = BigFloat[]
-for j in 1:8
+for j in 1:9
-  for k in 1:8
+  for k in 1:9
    filled = false
    if j == k
-      push!(values, 1)
+      push!(values, j < 9 ? 1 : 0)
      filled = true
    elseif (j == 9)
      if (k <= 5 && k != 2)
        push!(values, 0)
        filled = true
      end
    elseif (k == 9)
      if (j <= 5 && j != 2)
        push!(values, 0)
        filled = true
      end
    elseif (j == 1 || k == 1)
      push!(values, 0)
      filled = true
@ -47,7 +55,6 @@ gram = sparse(J, K, values)
 ## guess = Engine.rand_on_shell(fill(BigFloat(-1), 8))
 # set initial guess
 #=
 guess = hcat(
  Engine.plane(BigFloat[0, 0, 1], BigFloat(0)),
  Engine.sphere(BigFloat[0, 0, 0], BigFloat(1//2)),
@ -56,9 +63,10 @@ guess = hcat(
  Engine.plane(BigFloat[cos(-2pi/3), sin(-2pi/3), 0], BigFloat(1)),
  Engine.sphere(BigFloat[-1, 0, 0], BigFloat(1//5)),
  Engine.sphere(BigFloat[cos(-pi/3), sin(-pi/3), 0], BigFloat(1//5)),
-  Engine.sphere(BigFloat[cos(pi/3), sin(pi/3), 0], BigFloat(1//5))
+  Engine.sphere(BigFloat[cos(pi/3), sin(pi/3), 0], BigFloat(1//5)),
  BigFloat[0, 0, 0, 1, 1]
 )
-=#
+#=
 guess = hcat(
  Engine.plane(BigFloat[0, 0, 1], BigFloat(0)),
  Engine.sphere(BigFloat[0, 0, 0], BigFloat(0.9)),
@ -67,13 +75,22 @@ guess = hcat(
  Engine.plane(BigFloat[cos(-2pi/3), sin(-2pi/3), 0], BigFloat(1)),
  Engine.sphere(4//3*BigFloat[-1, 0, 0], BigFloat(1//3)),
  Engine.sphere(4//3*BigFloat[cos(-pi/3), sin(-pi/3), 0], BigFloat(1//3)),
-  Engine.sphere(4//3*BigFloat[cos(pi/3), sin(pi/3), 0], BigFloat(1//3))
+  Engine.sphere(4//3*BigFloat[cos(pi/3), sin(pi/3), 0], BigFloat(1//3)),
  BigFloat[0, 0, 0, 1, 1]
 )
 =#
-# complete the gram matrix using gradient descent
+# complete the gram matrix using gradient descent followed by Newton's method
-L, history = Engine.realize_gram(gram, guess, max_descent_steps = 200)
+L, history = Engine.realize_gram_gradient(gram, guess, scaled_tol = 0.01)
 L_pol, history_pol = Engine.realize_gram_newton(gram, L, rate = 0.3, scaled_tol = 1e-9)
 L_pol2, history_pol2 = Engine.realize_gram_newton(gram, L_pol)
 completed_gram = L'*Engine.Q*L
 println("Completed Gram matrix:\n")
 display(completed_gram)
-println("\nSteps: ", size(history.stepsize, 1))
+println(
-println("Loss: ", history.scaled_loss[end], "\n")
+  "\nSteps: ",
  size(history.scaled_loss, 1),
  " + ", size(history_pol.scaled_loss, 1),
  " + ", size(history_pol2.scaled_loss, 1)
 )
 println("Loss: ", history_pol2.scaled_loss[end], "\n")
--- a/engine-proto/gram-test/overlapping-pyramids.jl
+++ b/engine-proto/gram-test/overlapping-pyramids.jl
@ -47,13 +47,14 @@ guess = hcat(
  Engine.rand_on_shell(fill(BigFloat(-1), 2))
 )
-# complete the gram matrix using gradient descent
+# complete the gram matrix using gradient descent followed by Newton's method
-L, history = Engine.realize_gram(gram, guess)
+L, history = Engine.realize_gram_gradient(gram, guess, scaled_tol = 0.01)
 L_pol, history_pol = Engine.realize_gram_newton(gram, L)
 completed_gram = L'*Engine.Q*L
 println("Completed Gram matrix:\n")
 display(completed_gram)
-println("\nSteps: ", size(history.stepsize, 1))
+println("\nSteps: ", size(history.scaled_loss, 1), " + ", size(history_pol.scaled_loss, 1))
-println("Loss: ", history.scaled_loss[end], "\n")
+println("Loss: ", history_pol.scaled_loss[end], "\n")
 # === algebraic check ===
--- a/engine-proto/gram-test/sphere-in-tetrahedron.jl
+++ b/engine-proto/gram-test/sphere-in-tetrahedron.jl
@ -1,8 +1,6 @@
 include("Engine.jl")
 using SparseArrays
 using AbstractAlgebra
 using PolynomialRoots
 using Random
 # initialize the partial gram matrix for a sphere inscribed in a regular
@ -35,10 +33,10 @@ guess = sqrt(1/BigFloat(3)) * BigFloat[
  1  1  1  1  1
 ] + 0.2*Engine.rand_on_shell(fill(BigFloat(-1), 5))
-# complete the gram matrix using gradient descent
+# complete the gram matrix using Newton's method
-L, history = Engine.realize_gram(gram, guess)
+L, history = Engine.realize_gram_newton(gram, guess)
 completed_gram = L'*Engine.Q*L
 println("Completed Gram matrix:\n")
 display(completed_gram)
-println("\nSteps: ", size(history.stepsize, 1))
+println("\nSteps: ", size(history.scaled_loss, 1))
 println("Loss: ", history.scaled_loss[end], "\n")
Author	SHA1	Message	Date
Aaron Fenyes	3910b9f740	Use Newton's method for polishing	2024-07-11 13:43:52 -07:00
Aaron Fenyes	d538cbf716	Correct improvement threshold by using unit step Our formula for the improvement theshold works when the step size is an absolute distance. However, in commit ``4d5ea06``, the step size was measured relative to the current gradient instead. This commit scales the base step to unit length, so now the step size really is an absolute distance.	2024-07-10 23:31:44 -07:00
Aaron Fenyes	4d5ea062a3	Record gradient and last line search in history	2024-07-09 15:00:13 -07:00
Aaron Fenyes	5652719642	Require triangle sides to be planar	2024-07-09 14:10:23 -07:00
Aaron Fenyes	f84d475580	Visualize neighborhoods of global minima	2024-07-09 14:01:30 -07:00
Aaron Fenyes	77bc124170	Change loss function to match gradient	2024-07-09 14:00:24 -07:00