From 77bc124170e2ee72fe94ec55d92af7f689aa24f7 Mon Sep 17 00:00:00 2001 From: Aaron Fenyes Date: Tue, 9 Jul 2024 14:00:24 -0700 Subject: [PATCH 1/6] Change loss function to match gradient --- engine-proto/gram-test/Engine.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/engine-proto/gram-test/Engine.jl b/engine-proto/gram-test/Engine.jl index e8bda9c..2fb41e0 100644 --- a/engine-proto/gram-test/Engine.jl +++ b/engine-proto/gram-test/Engine.jl @@ -104,7 +104,7 @@ function realize_gram( # do gradient descent Δ_proj = proj_diff(gram, L'*Q*L) - loss = norm(Δ_proj) + loss = dot(Δ_proj, Δ_proj) for step in 1:max_descent_steps # stop if the loss is tolerably low if loss < tol @@ -128,7 +128,7 @@ function realize_gram( history.stepsize[end] = stepsize L = L_last + stepsize * neg_grad Δ_proj = proj_diff(gram, L'*Q*L) - loss = norm(Δ_proj) + loss = dot(Δ_proj, Δ_proj) improvement = loss_last - loss if improvement >= target_improvement * stepsize * slope history.backoff_steps[end] = backoff_steps From f84d475580391e5ef9b5ef2d0eca02af9c121087 Mon Sep 17 00:00:00 2001 From: Aaron Fenyes Date: Tue, 9 Jul 2024 14:01:30 -0700 Subject: [PATCH 2/6] Visualize neighborhoods of global minima --- engine-proto/gram-test/basin-shapes.jl | 99 ++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 engine-proto/gram-test/basin-shapes.jl diff --git a/engine-proto/gram-test/basin-shapes.jl b/engine-proto/gram-test/basin-shapes.jl new file mode 100644 index 0000000..5c03c01 --- /dev/null +++ b/engine-proto/gram-test/basin-shapes.jl @@ -0,0 +1,99 @@ +include("Engine.jl") + +using LinearAlgebra +using SparseArrays + +function sphere_in_tetrahedron_shape() + # initialize the partial gram matrix for a sphere inscribed in a regular + # tetrahedron + J = Int64[] + K = Int64[] + values = BigFloat[] + for j in 1:5 + for k in 1:5 + push!(J, j) + push!(K, k) + if j == k + push!(values, 1) + elseif (j <= 4 && k <= 4) + push!(values, -1/BigFloat(3)) + else + push!(values, -1) + end + end + end + gram = sparse(J, K, values) + + # plot loss along a slice + loss_lin = [] + loss_sq = [] + mesh = range(0.9, 1.1, 101) + for t in mesh + L = hcat( + Engine.plane(normalize(BigFloat[ 1, 1, 1]), BigFloat(1)), + Engine.plane(normalize(BigFloat[ 1, -1, -1]), BigFloat(1)), + Engine.plane(normalize(BigFloat[-1, 1, -1]), BigFloat(1)), + Engine.plane(normalize(BigFloat[-1, -1, 1]), BigFloat(1)), + Engine.sphere(BigFloat[0, 0, 0], BigFloat(t)) + ) + Δ_proj = Engine.proj_diff(gram, L'*Engine.Q*L) + push!(loss_lin, norm(Δ_proj)) + push!(loss_sq, dot(Δ_proj, Δ_proj)) + end + mesh, loss_lin, loss_sq +end + +function circles_in_triangle_shape() + # initialize the partial gram matrix for a sphere inscribed in a regular + # tetrahedron + J = Int64[] + K = Int64[] + values = BigFloat[] + for j in 1:8 + for k in 1:8 + filled = false + if j == k + push!(values, 1) + filled = true + elseif (j == 1 || k == 1) + push!(values, 0) + filled = true + elseif (j == 2 || k == 2) + push!(values, -1) + filled = true + end + #=elseif (j <= 5 && j != 2 && k == 9 || k == 9 && k <= 5 && k != 2) + push!(values, 0) + filled = true + end=# + if filled + push!(J, j) + push!(K, k) + end + end + end + append!(J, [6, 4, 6, 5, 7, 5, 7, 3, 8, 3, 8, 4]) + append!(K, [4, 6, 5, 6, 5, 7, 3, 7, 3, 8, 4, 8]) + append!(values, fill(-1, 12)) + + # plot loss along a slice + loss_lin = [] + loss_sq = [] + mesh = range(0.99, 1.01, 101) + for t in mesh + L = hcat( + Engine.plane(BigFloat[0, 0, 1], BigFloat(0)), + Engine.sphere(BigFloat[0, 0, 0], BigFloat(t)), + Engine.plane(BigFloat[1, 0, 0], BigFloat(1)), + Engine.plane(BigFloat[cos(2pi/3), sin(2pi/3), 0], BigFloat(1)), + Engine.plane(BigFloat[cos(-2pi/3), sin(-2pi/3), 0], BigFloat(1)), + Engine.sphere(4//3*BigFloat[-1, 0, 0], BigFloat(1//3)), + Engine.sphere(4//3*BigFloat[cos(-pi/3), sin(-pi/3), 0], BigFloat(1//3)), + Engine.sphere(4//3*BigFloat[cos(pi/3), sin(pi/3), 0], BigFloat(1//3)) + ) + Δ_proj = Engine.proj_diff(gram, L'*Engine.Q*L) + push!(loss_lin, norm(Δ_proj)) + push!(loss_sq, dot(Δ_proj, Δ_proj)) + end + mesh, loss_lin, loss_sq +end \ No newline at end of file From 5652719642b236d36205b167a49510c9d21ae87b Mon Sep 17 00:00:00 2001 From: Aaron Fenyes Date: Tue, 9 Jul 2024 14:10:23 -0700 Subject: [PATCH 3/6] Require triangle sides to be planar --- engine-proto/gram-test/circles-in-triangle.jl | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/engine-proto/gram-test/circles-in-triangle.jl b/engine-proto/gram-test/circles-in-triangle.jl index 2bca4f2..9dc3fac 100644 --- a/engine-proto/gram-test/circles-in-triangle.jl +++ b/engine-proto/gram-test/circles-in-triangle.jl @@ -1,20 +1,28 @@ include("Engine.jl") using SparseArrays -using AbstractAlgebra -using PolynomialRoots # initialize the partial gram matrix for a sphere inscribed in a regular # tetrahedron J = Int64[] K = Int64[] values = BigFloat[] -for j in 1:8 - for k in 1:8 +for j in 1:9 + for k in 1:9 filled = false if j == k - push!(values, 1) + push!(values, j < 9 ? 1 : 0) filled = true + elseif (j == 9) + if (k <= 5 && k != 2) + push!(values, 0) + filled = true + end + elseif (k == 9) + if (j <= 5 && j != 2) + push!(values, 0) + filled = true + end elseif (j == 1 || k == 1) push!(values, 0) filled = true @@ -56,7 +64,8 @@ guess = hcat( Engine.plane(BigFloat[cos(-2pi/3), sin(-2pi/3), 0], BigFloat(1)), Engine.sphere(BigFloat[-1, 0, 0], BigFloat(1//5)), Engine.sphere(BigFloat[cos(-pi/3), sin(-pi/3), 0], BigFloat(1//5)), - Engine.sphere(BigFloat[cos(pi/3), sin(pi/3), 0], BigFloat(1//5)) + Engine.sphere(BigFloat[cos(pi/3), sin(pi/3), 0], BigFloat(1//5)), + BigFloat[0, 0, 0, 1, 1] ) =# guess = hcat( @@ -67,7 +76,8 @@ guess = hcat( Engine.plane(BigFloat[cos(-2pi/3), sin(-2pi/3), 0], BigFloat(1)), Engine.sphere(4//3*BigFloat[-1, 0, 0], BigFloat(1//3)), Engine.sphere(4//3*BigFloat[cos(-pi/3), sin(-pi/3), 0], BigFloat(1//3)), - Engine.sphere(4//3*BigFloat[cos(pi/3), sin(pi/3), 0], BigFloat(1//3)) + Engine.sphere(4//3*BigFloat[cos(pi/3), sin(pi/3), 0], BigFloat(1//3)), + BigFloat[0, 0, 0, 1, 1] ) # complete the gram matrix using gradient descent From 4d5ea062a3dc47ae92472dda23e7f8bdccc6c614 Mon Sep 17 00:00:00 2001 From: Aaron Fenyes Date: Tue, 9 Jul 2024 15:00:13 -0700 Subject: [PATCH 4/6] Record gradient and last line search in history --- engine-proto/gram-test/Engine.jl | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/engine-proto/gram-test/Engine.jl b/engine-proto/gram-test/Engine.jl index 2fb41e0..0539326 100644 --- a/engine-proto/gram-test/Engine.jl +++ b/engine-proto/gram-test/Engine.jl @@ -65,17 +65,23 @@ end # a type for keeping track of gradient descent history struct DescentHistory{T} scaled_loss::Array{T} + neg_grad::Array{Matrix{T}} slope::Array{T} stepsize::Array{T} backoff_steps::Array{Int64} + last_line_L::Array{Matrix{T}} + last_line_loss::Array{T} function DescentHistory{T}( scaled_loss = Array{T}(undef, 0), + neg_grad = Array{Matrix{T}}(undef, 0), slope = Array{T}(undef, 0), stepsize = Array{T}(undef, 0), - backoff_steps = Int64[] + backoff_steps = Int64[], + last_line_L = Array{Matrix{T}}(undef, 0), + last_line_loss = Array{T}(undef, 0) ) where T - new(scaled_loss, slope, stepsize, backoff_steps) + new(scaled_loss, neg_grad, slope, stepsize, backoff_steps, last_line_L, last_line_loss) end end @@ -119,23 +125,33 @@ function realize_gram( L_last = L loss_last = loss push!(history.scaled_loss, loss / scale_adjustment) + push!(history.neg_grad, neg_grad) push!(history.slope, slope) # find a good step size using backtracking line search push!(history.stepsize, 0) push!(history.backoff_steps, max_backoff_steps) + empty!(history.last_line_L) + empty!(history.last_line_loss) for backoff_steps in 0:max_backoff_steps history.stepsize[end] = stepsize L = L_last + stepsize * neg_grad Δ_proj = proj_diff(gram, L'*Q*L) loss = dot(Δ_proj, Δ_proj) improvement = loss_last - loss + push!(history.last_line_L, L) + push!(history.last_line_loss, loss / scale_adjustment) if improvement >= target_improvement * stepsize * slope history.backoff_steps[end] = backoff_steps break end stepsize *= backoff end + + # [DEBUG] if we've hit a wall, quit + if history.backoff_steps[end] == max_backoff_steps + break + end end # return the factorization and its history From d538cbf716607f9b53a348c8dfa202363575abf0 Mon Sep 17 00:00:00 2001 From: Aaron Fenyes Date: Wed, 10 Jul 2024 23:31:44 -0700 Subject: [PATCH 5/6] Correct improvement threshold by using unit step Our formula for the improvement theshold works when the step size is an absolute distance. However, in commit `4d5ea06`, the step size was measured relative to the current gradient instead. This commit scales the base step to unit length, so now the step size really is an absolute distance. --- engine-proto/gram-test/Engine.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/engine-proto/gram-test/Engine.jl b/engine-proto/gram-test/Engine.jl index 0539326..a160291 100644 --- a/engine-proto/gram-test/Engine.jl +++ b/engine-proto/gram-test/Engine.jl @@ -120,6 +120,7 @@ function realize_gram( # find negative gradient of loss function neg_grad = 4*Q*L*Δ_proj slope = norm(neg_grad) + dir = neg_grad / slope # store current position, loss, and slope L_last = L @@ -135,7 +136,7 @@ function realize_gram( empty!(history.last_line_loss) for backoff_steps in 0:max_backoff_steps history.stepsize[end] = stepsize - L = L_last + stepsize * neg_grad + L = L_last + stepsize * dir Δ_proj = proj_diff(gram, L'*Q*L) loss = dot(Δ_proj, Δ_proj) improvement = loss_last - loss From 3910b9f740b7a775b5642892ac2b0575c1c73b22 Mon Sep 17 00:00:00 2001 From: Aaron Fenyes Date: Thu, 11 Jul 2024 13:43:52 -0700 Subject: [PATCH 6/6] Use Newton's method for polishing --- engine-proto/gram-test/Engine.jl | 85 ++++++++++++++++++- engine-proto/gram-test/circles-in-triangle.jl | 19 +++-- .../gram-test/overlapping-pyramids.jl | 9 +- .../gram-test/sphere-in-tetrahedron.jl | 8 +- 4 files changed, 103 insertions(+), 18 deletions(-) diff --git a/engine-proto/gram-test/Engine.jl b/engine-proto/gram-test/Engine.jl index a160291..e6f0d97 100644 --- a/engine-proto/gram-test/Engine.jl +++ b/engine-proto/gram-test/Engine.jl @@ -51,11 +51,21 @@ end # the Lorentz form Q = diagm([1, 1, 1, 1, -1]) +# project a matrix onto the subspace of matrices whose entries vanish at the +# given indices +function proj_to_entries(mat, indices) + result = zeros(size(mat)) + for (j, k) in indices + result[j, k] = mat[j, k] + end + result +end + # the difference between the matrices `target` and `attempt`, projected onto the # subspace of matrices whose entries vanish at each empty index of `target` function proj_diff(target::SparseMatrixCSC{T, <:Any}, attempt::Matrix{T}) where T J, K, values = findnz(target) - result = zeros(size(target)...) + result = zeros(size(target)) for (j, k, val) in zip(J, K, values) result[j, k] = val - attempt[j, k] end @@ -87,7 +97,7 @@ end # seek a matrix `L` for which `L'QL` matches the sparse matrix `gram` at every # explicit entry of `gram`. use gradient descent starting from `guess` -function realize_gram( +function realize_gram_gradient( gram::SparseMatrixCSC{T, <:Any}, guess::Matrix{T}; scaled_tol = 1e-30, @@ -111,7 +121,7 @@ function realize_gram( # do gradient descent Δ_proj = proj_diff(gram, L'*Q*L) loss = dot(Δ_proj, Δ_proj) - for step in 1:max_descent_steps + for _ in 1:max_descent_steps # stop if the loss is tolerably low if loss < tol break @@ -160,4 +170,73 @@ function realize_gram( L, history end +function basis_matrix(::Type{T}, j, k, dims) where T + result = zeros(T, dims) + result[j, k] = one(T) + result +end + +# seek a matrix `L` for which `L'QL` matches the sparse matrix `gram` at every +# explicit entry of `gram`. use Newton's method starting from `guess` +function realize_gram_newton( + gram::SparseMatrixCSC{T, <:Any}, + guess::Matrix{T}; + scaled_tol = 1e-30, + rate = 1, + max_steps = 100 +) where T <: Number + # start history + history = DescentHistory{T}() + + # find the dimension of the search space + dims = size(guess) + element_dim, construction_dim = dims + total_dim = element_dim * construction_dim + + # list the constrained entries of the gram matrix + J, K, _ = findnz(gram) + constrained = zip(J, K) + + # scale the tolerance + scale_adjustment = sqrt(T(length(constrained))) + tol = scale_adjustment * scaled_tol + + # use newton's method + L = copy(guess) + for step in 0:max_steps + # evaluate the loss function + Δ_proj = proj_diff(gram, L'*Q*L) + loss = dot(Δ_proj, Δ_proj) + + # store the current loss + push!(history.scaled_loss, loss / scale_adjustment) + + # stop if the loss is tolerably low + if loss < tol || step > max_steps + break + end + + # find the negative gradient of loss function + neg_grad = 4*Q*L*Δ_proj + + # find the negative Hessian of the loss function + hess = Matrix{T}(undef, total_dim, total_dim) + indices = [(j, k) for k in 1:construction_dim for j in 1:element_dim] + for (j, k) in indices + basis_mat = basis_matrix(T, j, k, dims) + neg_dΔ = basis_mat'*Q*L + L'*Q*basis_mat + neg_dΔ_proj = proj_to_entries(neg_dΔ, constrained) + deriv_grad = 4*Q*(-basis_mat*Δ_proj + L*neg_dΔ_proj) + hess[:, (k-1)*element_dim + j] = reshape(deriv_grad, total_dim) + end + + # compute the newton step + step = hess \ reshape(neg_grad, total_dim) + L += rate * reshape(step, dims) + end + + # return the factorization and its history + L, history +end + end \ No newline at end of file diff --git a/engine-proto/gram-test/circles-in-triangle.jl b/engine-proto/gram-test/circles-in-triangle.jl index 9dc3fac..b031711 100644 --- a/engine-proto/gram-test/circles-in-triangle.jl +++ b/engine-proto/gram-test/circles-in-triangle.jl @@ -55,7 +55,6 @@ gram = sparse(J, K, values) ## guess = Engine.rand_on_shell(fill(BigFloat(-1), 8)) # set initial guess -#= guess = hcat( Engine.plane(BigFloat[0, 0, 1], BigFloat(0)), Engine.sphere(BigFloat[0, 0, 0], BigFloat(1//2)), @@ -67,7 +66,7 @@ guess = hcat( Engine.sphere(BigFloat[cos(pi/3), sin(pi/3), 0], BigFloat(1//5)), BigFloat[0, 0, 0, 1, 1] ) -=# +#= guess = hcat( Engine.plane(BigFloat[0, 0, 1], BigFloat(0)), Engine.sphere(BigFloat[0, 0, 0], BigFloat(0.9)), @@ -79,11 +78,19 @@ guess = hcat( Engine.sphere(4//3*BigFloat[cos(pi/3), sin(pi/3), 0], BigFloat(1//3)), BigFloat[0, 0, 0, 1, 1] ) +=# -# complete the gram matrix using gradient descent -L, history = Engine.realize_gram(gram, guess, max_descent_steps = 200) +# complete the gram matrix using gradient descent followed by Newton's method +L, history = Engine.realize_gram_gradient(gram, guess, scaled_tol = 0.01) +L_pol, history_pol = Engine.realize_gram_newton(gram, L, rate = 0.3, scaled_tol = 1e-9) +L_pol2, history_pol2 = Engine.realize_gram_newton(gram, L_pol) completed_gram = L'*Engine.Q*L println("Completed Gram matrix:\n") display(completed_gram) -println("\nSteps: ", size(history.stepsize, 1)) -println("Loss: ", history.scaled_loss[end], "\n") \ No newline at end of file +println( + "\nSteps: ", + size(history.scaled_loss, 1), + " + ", size(history_pol.scaled_loss, 1), + " + ", size(history_pol2.scaled_loss, 1) +) +println("Loss: ", history_pol2.scaled_loss[end], "\n") \ No newline at end of file diff --git a/engine-proto/gram-test/overlapping-pyramids.jl b/engine-proto/gram-test/overlapping-pyramids.jl index 97fc119..51606e1 100644 --- a/engine-proto/gram-test/overlapping-pyramids.jl +++ b/engine-proto/gram-test/overlapping-pyramids.jl @@ -47,13 +47,14 @@ guess = hcat( Engine.rand_on_shell(fill(BigFloat(-1), 2)) ) -# complete the gram matrix using gradient descent -L, history = Engine.realize_gram(gram, guess) +# complete the gram matrix using gradient descent followed by Newton's method +L, history = Engine.realize_gram_gradient(gram, guess, scaled_tol = 0.01) +L_pol, history_pol = Engine.realize_gram_newton(gram, L) completed_gram = L'*Engine.Q*L println("Completed Gram matrix:\n") display(completed_gram) -println("\nSteps: ", size(history.stepsize, 1)) -println("Loss: ", history.scaled_loss[end], "\n") +println("\nSteps: ", size(history.scaled_loss, 1), " + ", size(history_pol.scaled_loss, 1)) +println("Loss: ", history_pol.scaled_loss[end], "\n") # === algebraic check === diff --git a/engine-proto/gram-test/sphere-in-tetrahedron.jl b/engine-proto/gram-test/sphere-in-tetrahedron.jl index 3730390..dc5588c 100644 --- a/engine-proto/gram-test/sphere-in-tetrahedron.jl +++ b/engine-proto/gram-test/sphere-in-tetrahedron.jl @@ -1,8 +1,6 @@ include("Engine.jl") using SparseArrays -using AbstractAlgebra -using PolynomialRoots using Random # initialize the partial gram matrix for a sphere inscribed in a regular @@ -35,10 +33,10 @@ guess = sqrt(1/BigFloat(3)) * BigFloat[ 1 1 1 1 1 ] + 0.2*Engine.rand_on_shell(fill(BigFloat(-1), 5)) -# complete the gram matrix using gradient descent -L, history = Engine.realize_gram(gram, guess) +# complete the gram matrix using Newton's method +L, history = Engine.realize_gram_newton(gram, guess) completed_gram = L'*Engine.Q*L println("Completed Gram matrix:\n") display(completed_gram) -println("\nSteps: ", size(history.stepsize, 1)) +println("\nSteps: ", size(history.scaled_loss, 1)) println("Loss: ", history.scaled_loss[end], "\n") \ No newline at end of file