From 16df161fe761e7c550282ff02916e6f089ff17b6 Mon Sep 17 00:00:00 2001 From: Aaron Fenyes Date: Thu, 24 Oct 2024 19:51:10 -0700 Subject: [PATCH] Test alternate projection technique --- engine-proto/gram-test/Engine.jl | 133 +++++++++++++++++- engine-proto/gram-test/irisawa-hexlet.jl | 11 +- .../gram-test/sphere-in-tetrahedron.jl | 11 +- .../gram-test/tetrahedron-radius-ratio.jl | 11 +- 4 files changed, 159 insertions(+), 7 deletions(-) diff --git a/engine-proto/gram-test/Engine.jl b/engine-proto/gram-test/Engine.jl index 22f5914..1eb72f5 100644 --- a/engine-proto/gram-test/Engine.jl +++ b/engine-proto/gram-test/Engine.jl @@ -8,7 +8,8 @@ using Optim export rand_on_shell, Q, DescentHistory, - realize_gram_gradient, realize_gram_newton, realize_gram_optim, realize_gram + realize_gram_gradient, realize_gram_newton, realize_gram_optim, + realize_gram_alt_proj, realize_gram # === guessing === @@ -143,7 +144,7 @@ function realize_gram_gradient( break end - # find negative gradient of loss function + # find the negative gradient of the loss function neg_grad = 4*Q*L*Δ_proj slope = norm(neg_grad) dir = neg_grad / slope @@ -232,7 +233,7 @@ function realize_gram_newton( break end - # find the negative gradient of loss function + # find the negative gradient of the loss function neg_grad = 4*Q*L*Δ_proj # find the negative Hessian of the loss function @@ -313,6 +314,130 @@ function realize_gram_optim( ) end +# seek a matrix `L` for which `L'QL` matches the sparse matrix `gram` at every +# explicit entry of `gram`. use gradient descent starting from `guess`, with an +# alternate technique for finding the projected base step from the unprojected +# Hessian +function realize_gram_alt_proj( + gram::SparseMatrixCSC{T, <:Any}, + guess::Matrix{T}, + frozen = CartesianIndex[]; + scaled_tol = 1e-30, + min_efficiency = 0.5, + init_rate = 1.0, + backoff = 0.9, + reg_scale = 1.1, + max_descent_steps = 200, + max_backoff_steps = 110 +) where T <: Number + # start history + history = DescentHistory{T}() + + # find the dimension of the search space + dims = size(guess) + element_dim, construction_dim = dims + total_dim = element_dim * construction_dim + + # list the constrained entries of the gram matrix + J, K, _ = findnz(gram) + constrained = zip(J, K) + + # scale the tolerance + scale_adjustment = sqrt(T(length(constrained))) + tol = scale_adjustment * scaled_tol + + # convert the frozen indices to stacked format + frozen_stacked = [(index[2]-1)*element_dim + index[1] for index in frozen] + + # initialize variables + grad_rate = init_rate + L = copy(guess) + + # use Newton's method with backtracking and gradient descent backup + Δ_proj = proj_diff(gram, L'*Q*L) + loss = dot(Δ_proj, Δ_proj) + for step in 1:max_descent_steps + # stop if the loss is tolerably low + if loss < tol + break + end + + # find the negative gradient of the loss function + neg_grad = 4*Q*L*Δ_proj + + # find the negative Hessian of the loss function + hess = Matrix{T}(undef, total_dim, total_dim) + indices = [(j, k) for k in 1:construction_dim for j in 1:element_dim] + for (j, k) in indices + basis_mat = basis_matrix(T, j, k, dims) + neg_dΔ = basis_mat'*Q*L + L'*Q*basis_mat + neg_dΔ_proj = proj_to_entries(neg_dΔ, constrained) + deriv_grad = 4*Q*(-basis_mat*Δ_proj + L*neg_dΔ_proj) + hess[:, (k-1)*element_dim + j] = reshape(deriv_grad, total_dim) + end + hess_sym = Hermitian(hess) + push!(history.hess, hess_sym) + + # regularize the Hessian + min_eigval = minimum(eigvals(hess_sym)) + push!(history.positive, min_eigval > 0) + if min_eigval <= 0 + hess -= reg_scale * min_eigval * I + end + + # compute the Newton step + neg_grad_stacked = reshape(neg_grad, total_dim) + for k in frozen_stacked + neg_grad_stacked[k] = 0 + hess[k, :] .= 0 + hess[:, k] .= 0 + hess[k, k] = 1 + end + base_step_stacked = Hermitian(hess) \ neg_grad_stacked + base_step = reshape(base_step_stacked, dims) + push!(history.base_step, base_step) + + # store the current position, loss, and slope + L_last = L + loss_last = loss + push!(history.scaled_loss, loss / scale_adjustment) + push!(history.neg_grad, neg_grad) + push!(history.slope, norm(neg_grad)) + + # find a good step size using backtracking line search + push!(history.stepsize, 0) + push!(history.backoff_steps, max_backoff_steps) + empty!(history.last_line_L) + empty!(history.last_line_loss) + rate = one(T) + step_success = false + for backoff_steps in 0:max_backoff_steps + history.stepsize[end] = rate + L = L_last + rate * base_step + Δ_proj = proj_diff(gram, L'*Q*L) + loss = dot(Δ_proj, Δ_proj) + improvement = loss_last - loss + push!(history.last_line_L, L) + push!(history.last_line_loss, loss / scale_adjustment) + if improvement >= min_efficiency * rate * dot(neg_grad, base_step) + history.backoff_steps[end] = backoff_steps + step_success = true + break + end + rate *= backoff + end + + # if we've hit a wall, quit + if !step_success + return L_last, false, history + end + end + + # return the factorization and its history + push!(history.scaled_loss, loss / scale_adjustment) + L, loss < tol, history +end + # seek a matrix `L` for which `L'QL` matches the sparse matrix `gram` at every # explicit entry of `gram`. use gradient descent starting from `guess` function realize_gram( @@ -365,7 +490,7 @@ function realize_gram( break end - # find the negative gradient of loss function + # find the negative gradient of the loss function neg_grad = 4*Q*L*Δ_proj # find the negative Hessian of the loss function diff --git a/engine-proto/gram-test/irisawa-hexlet.jl b/engine-proto/gram-test/irisawa-hexlet.jl index 67def8c..607db61 100644 --- a/engine-proto/gram-test/irisawa-hexlet.jl +++ b/engine-proto/gram-test/irisawa-hexlet.jl @@ -74,4 +74,13 @@ if success for k in 5:9 println(" ", 1 / L[4,k], " sun") end -end \ No newline at end of file +end + +# test an alternate technique for finding the projected base step from the +# unprojected Hessian +L_alt, success_alt, history_alt = Engine.realize_gram_alt_proj(gram, guess, frozen) +completed_gram_alt = L_alt'*Engine.Q*L_alt +println("\nDifference in result using alternate projection:\n") +display(completed_gram_alt - completed_gram) +println("\nDifference in steps: ", size(history_alt.scaled_loss, 1) - size(history.scaled_loss, 1)) +println("Difference in loss: ", history_alt.scaled_loss[end] - history.scaled_loss[end], "\n") \ No newline at end of file diff --git a/engine-proto/gram-test/sphere-in-tetrahedron.jl b/engine-proto/gram-test/sphere-in-tetrahedron.jl index 97f0720..5d479cf 100644 --- a/engine-proto/gram-test/sphere-in-tetrahedron.jl +++ b/engine-proto/gram-test/sphere-in-tetrahedron.jl @@ -64,4 +64,13 @@ else println("\nFailed to reach target accuracy") end println("Steps: ", size(history.scaled_loss, 1)) -println("Loss: ", history.scaled_loss[end], "\n") \ No newline at end of file +println("Loss: ", history.scaled_loss[end], "\n") + +# test an alternate technique for finding the projected base step from the +# unprojected Hessian +L_alt, success_alt, history_alt = Engine.realize_gram_alt_proj(gram, guess, frozen) +completed_gram_alt = L_alt'*Engine.Q*L_alt +println("\nDifference in result using alternate projection:\n") +display(completed_gram_alt - completed_gram) +println("\nDifference in steps: ", size(history_alt.scaled_loss, 1) - size(history.scaled_loss, 1)) +println("Difference in loss: ", history_alt.scaled_loss[end] - history.scaled_loss[end], "\n") \ No newline at end of file diff --git a/engine-proto/gram-test/tetrahedron-radius-ratio.jl b/engine-proto/gram-test/tetrahedron-radius-ratio.jl index 7ceb794..9fec28e 100644 --- a/engine-proto/gram-test/tetrahedron-radius-ratio.jl +++ b/engine-proto/gram-test/tetrahedron-radius-ratio.jl @@ -93,4 +93,13 @@ if success infty = BigFloat[0, 0, 0, 0, 1] radius_ratio = dot(infty, Engine.Q * L[:,5]) / dot(infty, Engine.Q * L[:,6]) println("\nCircumradius / inradius: ", radius_ratio) -end \ No newline at end of file +end + +# test an alternate technique for finding the projected base step from the +# unprojected Hessian +L_alt, success_alt, history_alt = Engine.realize_gram_alt_proj(gram, guess, frozen) +completed_gram_alt = L_alt'*Engine.Q*L_alt +println("\nDifference in result using alternate projection:\n") +display(completed_gram_alt - completed_gram) +println("\nDifference in steps: ", size(history_alt.scaled_loss, 1) - size(history.scaled_loss, 1)) +println("Difference in loss: ", history_alt.scaled_loss[end] - history.scaled_loss[end], "\n") \ No newline at end of file