diff --git a/engine-proto/gram-test/Engine.jl b/engine-proto/gram-test/Engine.jl index e6f0d97..10f4b02 100644 --- a/engine-proto/gram-test/Engine.jl +++ b/engine-proto/gram-test/Engine.jl @@ -1,10 +1,14 @@ module Engine using LinearAlgebra +using GenericLinearAlgebra using SparseArrays using Random +using Optim -export rand_on_shell, Q, DescentHistory, realize_gram +export + rand_on_shell, Q, DescentHistory, + realize_gram_gradient, realize_gram_newton, realize_gram_optim, realize_gram # === guessing === @@ -76,8 +80,11 @@ end struct DescentHistory{T} scaled_loss::Array{T} neg_grad::Array{Matrix{T}} + base_step::Array{Matrix{T}} + hess::Array{Hermitian{T, Matrix{T}}} slope::Array{T} stepsize::Array{T} + positive::Array{Bool} backoff_steps::Array{Int64} last_line_L::Array{Matrix{T}} last_line_loss::Array{T} @@ -85,13 +92,16 @@ struct DescentHistory{T} function DescentHistory{T}( scaled_loss = Array{T}(undef, 0), neg_grad = Array{Matrix{T}}(undef, 0), + hess = Array{Hermitian{T, Matrix{T}}}(undef, 0), + base_step = Array{Matrix{T}}(undef, 0), slope = Array{T}(undef, 0), stepsize = Array{T}(undef, 0), + positive = Bool[], backoff_steps = Int64[], last_line_L = Array{Matrix{T}}(undef, 0), last_line_loss = Array{T}(undef, 0) ) where T - new(scaled_loss, neg_grad, slope, stepsize, backoff_steps, last_line_L, last_line_loss) + new(scaled_loss, neg_grad, hess, base_step, slope, stepsize, positive, backoff_steps, last_line_L, last_line_loss) end end @@ -101,7 +111,7 @@ function realize_gram_gradient( gram::SparseMatrixCSC{T, <:Any}, guess::Matrix{T}; scaled_tol = 1e-30, - target_improvement = 0.5, + min_efficiency = 0.5, init_stepsize = 1.0, backoff = 0.9, max_descent_steps = 600, @@ -152,7 +162,7 @@ function realize_gram_gradient( improvement = loss_last - loss push!(history.last_line_L, L) push!(history.last_line_loss, loss / scale_adjustment) - if improvement >= target_improvement * stepsize * slope + if improvement >= min_efficiency * stepsize * slope history.backoff_steps[end] = backoff_steps break end @@ -201,7 +211,7 @@ function realize_gram_newton( scale_adjustment = sqrt(T(length(constrained))) tol = scale_adjustment * scaled_tol - # use newton's method + # use Newton's method L = copy(guess) for step in 0:max_steps # evaluate the loss function @@ -229,8 +239,10 @@ function realize_gram_newton( deriv_grad = 4*Q*(-basis_mat*Δ_proj + L*neg_dΔ_proj) hess[:, (k-1)*element_dim + j] = reshape(deriv_grad, total_dim) end + hess = Hermitian(hess) + push!(history.hess, hess) - # compute the newton step + # compute the Newton step step = hess \ reshape(neg_grad, total_dim) L += rate * reshape(step, dims) end @@ -239,4 +251,168 @@ function realize_gram_newton( L, history end +LinearAlgebra.eigen!(A::Symmetric{BigFloat, Matrix{BigFloat}}; sortby::Nothing) = + eigen!(Hermitian(A)) + +function convertnz(type, mat) + J, K, values = findnz(mat) + sparse(J, K, type.(values)) +end + +function realize_gram_optim( + gram::SparseMatrixCSC{T, <:Any}, + guess::Matrix{T} +) where T <: Number + # find the dimension of the search space + dims = size(guess) + element_dim, construction_dim = dims + total_dim = element_dim * construction_dim + + # list the constrained entries of the gram matrix + J, K, _ = findnz(gram) + constrained = zip(J, K) + + # scale the loss function + scale_adjustment = length(constrained) + + function loss(L_vec) + L = reshape(L_vec, dims) + Δ_proj = proj_diff(gram, L'*Q*L) + dot(Δ_proj, Δ_proj) / scale_adjustment + end + + function loss_grad!(storage, L_vec) + L = reshape(L_vec, dims) + Δ_proj = proj_diff(gram, L'*Q*L) + storage .= reshape(-4*Q*L*Δ_proj, total_dim) / scale_adjustment + end + + function loss_hess!(storage, L_vec) + L = reshape(L_vec, dims) + Δ_proj = proj_diff(gram, L'*Q*L) + indices = [(j, k) for k in 1:construction_dim for j in 1:element_dim] + for (j, k) in indices + basis_mat = basis_matrix(T, j, k, dims) + neg_dΔ = basis_mat'*Q*L + L'*Q*basis_mat + neg_dΔ_proj = proj_to_entries(neg_dΔ, constrained) + deriv_grad = 4*Q*(-basis_mat*Δ_proj + L*neg_dΔ_proj) / scale_adjustment + storage[:, (k-1)*element_dim + j] = reshape(deriv_grad, total_dim) + end + end + + optimize( + loss, loss_grad!, loss_hess!, + reshape(guess, total_dim), + Newton() + ) +end + +# seek a matrix `L` for which `L'QL` matches the sparse matrix `gram` at every +# explicit entry of `gram`. use gradient descent starting from `guess` +function realize_gram( + gram::SparseMatrixCSC{T, <:Any}, + guess::Matrix{T}; + scaled_tol = 1e-30, + min_efficiency = 0.5, + init_rate = 1.0, + backoff = 0.9, + reg_scale = 1.1, + max_descent_steps = 200, + max_backoff_steps = 110 +) where T <: Number + # start history + history = DescentHistory{T}() + + # find the dimension of the search space + dims = size(guess) + element_dim, construction_dim = dims + total_dim = element_dim * construction_dim + + # list the constrained entries of the gram matrix + J, K, _ = findnz(gram) + constrained = zip(J, K) + + # scale the tolerance + scale_adjustment = sqrt(T(length(constrained))) + tol = scale_adjustment * scaled_tol + + # initialize variables + grad_rate = init_rate + L = copy(guess) + + # use Newton's method with backtracking and gradient descent backup + Δ_proj = proj_diff(gram, L'*Q*L) + loss = dot(Δ_proj, Δ_proj) + for step in 1:max_descent_steps + # stop if the loss is tolerably low + if loss < tol + break + end + + # find the negative gradient of loss function + neg_grad = 4*Q*L*Δ_proj + + # find the negative Hessian of the loss function + hess = Matrix{T}(undef, total_dim, total_dim) + indices = [(j, k) for k in 1:construction_dim for j in 1:element_dim] + for (j, k) in indices + basis_mat = basis_matrix(T, j, k, dims) + neg_dΔ = basis_mat'*Q*L + L'*Q*basis_mat + neg_dΔ_proj = proj_to_entries(neg_dΔ, constrained) + deriv_grad = 4*Q*(-basis_mat*Δ_proj + L*neg_dΔ_proj) + hess[:, (k-1)*element_dim + j] = reshape(deriv_grad, total_dim) + end + hess = Hermitian(hess) + push!(history.hess, hess) + + # regularize the Hessian + min_eigval = minimum(eigvals(hess)) + push!(history.positive, min_eigval > 0) + if min_eigval <= 0 + hess -= reg_scale * min_eigval * I + end + base_step = reshape(hess \ reshape(neg_grad, total_dim), dims) + push!(history.base_step, base_step) + + # store the current position, loss, and slope + L_last = L + loss_last = loss + push!(history.scaled_loss, loss / scale_adjustment) + push!(history.neg_grad, neg_grad) + push!(history.slope, norm(neg_grad)) + + # find a good step size using backtracking line search + push!(history.stepsize, 0) + push!(history.backoff_steps, max_backoff_steps) + empty!(history.last_line_L) + empty!(history.last_line_loss) + rate = one(T) + step_success = false + for backoff_steps in 0:max_backoff_steps + history.stepsize[end] = rate + L = L_last + rate * base_step + Δ_proj = proj_diff(gram, L'*Q*L) + loss = dot(Δ_proj, Δ_proj) + improvement = loss_last - loss + push!(history.last_line_L, L) + push!(history.last_line_loss, loss / scale_adjustment) + if improvement >= min_efficiency * rate * dot(neg_grad, base_step) + history.backoff_steps[end] = backoff_steps + step_success = true + break + end + rate *= backoff + end + + # if we've hit a wall, quit + if !step_success + return L_last, false, history + end + end + + # return the factorization and its history + push!(history.scaled_loss, loss / scale_adjustment) + L, true, history +end + end \ No newline at end of file diff --git a/engine-proto/gram-test/circles-in-triangle.jl b/engine-proto/gram-test/circles-in-triangle.jl index b031711..12975c2 100644 --- a/engine-proto/gram-test/circles-in-triangle.jl +++ b/engine-proto/gram-test/circles-in-triangle.jl @@ -81,16 +81,28 @@ guess = hcat( =# # complete the gram matrix using gradient descent followed by Newton's method +#= L, history = Engine.realize_gram_gradient(gram, guess, scaled_tol = 0.01) L_pol, history_pol = Engine.realize_gram_newton(gram, L, rate = 0.3, scaled_tol = 1e-9) L_pol2, history_pol2 = Engine.realize_gram_newton(gram, L_pol) +=# +L, success, history = Engine.realize_gram(gram, guess) completed_gram = L'*Engine.Q*L println("Completed Gram matrix:\n") display(completed_gram) +#= println( "\nSteps: ", size(history.scaled_loss, 1), " + ", size(history_pol.scaled_loss, 1), " + ", size(history_pol2.scaled_loss, 1) ) -println("Loss: ", history_pol2.scaled_loss[end], "\n") \ No newline at end of file +println("Loss: ", history_pol2.scaled_loss[end], "\n") +=# +if success + println("\nTarget accuracy achieved!") +else + println("\nFailed to reach target accuracy") +end +println("Steps: ", size(history.scaled_loss, 1)) +println("Loss: ", history.scaled_loss[end], "\n") \ No newline at end of file diff --git a/engine-proto/gram-test/overlapping-pyramids.jl b/engine-proto/gram-test/overlapping-pyramids.jl index 51606e1..ee4c1fc 100644 --- a/engine-proto/gram-test/overlapping-pyramids.jl +++ b/engine-proto/gram-test/overlapping-pyramids.jl @@ -47,17 +47,30 @@ guess = hcat( Engine.rand_on_shell(fill(BigFloat(-1), 2)) ) -# complete the gram matrix using gradient descent followed by Newton's method +# complete the gram matrix +#= L, history = Engine.realize_gram_gradient(gram, guess, scaled_tol = 0.01) L_pol, history_pol = Engine.realize_gram_newton(gram, L) +=# +L, success, history = Engine.realize_gram(gram, guess) completed_gram = L'*Engine.Q*L println("Completed Gram matrix:\n") display(completed_gram) +#= println("\nSteps: ", size(history.scaled_loss, 1), " + ", size(history_pol.scaled_loss, 1)) println("Loss: ", history_pol.scaled_loss[end], "\n") +=# +if success + println("\nTarget accuracy achieved!") +else + println("\nFailed to reach target accuracy") +end +println("Steps: ", size(history.scaled_loss, 1)) +println("Loss: ", history.scaled_loss[end], "\n") # === algebraic check === +#= R, gens = polynomial_ring(AbstractAlgebra.Rationals{BigInt}(), ["x", "t₁", "t₂", "t₃"]) x = gens[1] t = gens[2:4] @@ -85,3 +98,4 @@ x_constraint = 25//16 * to_univariate(S, evaluate(rank_constraints[1], [2], [ind t₂_constraint = 25//16 * to_univariate(S, evaluate(rank_constraints[3], [2], [indep_val])) x_vals = PolynomialRoots.roots(x_constraint.coeffs) t₂_vals = PolynomialRoots.roots(t₂_constraint.coeffs) +=# \ No newline at end of file diff --git a/engine-proto/gram-test/sphere-in-tetrahedron.jl b/engine-proto/gram-test/sphere-in-tetrahedron.jl index dc5588c..273d3ad 100644 --- a/engine-proto/gram-test/sphere-in-tetrahedron.jl +++ b/engine-proto/gram-test/sphere-in-tetrahedron.jl @@ -33,10 +33,18 @@ guess = sqrt(1/BigFloat(3)) * BigFloat[ 1 1 1 1 1 ] + 0.2*Engine.rand_on_shell(fill(BigFloat(-1), 5)) -# complete the gram matrix using Newton's method +# complete the gram matrix +#= L, history = Engine.realize_gram_newton(gram, guess) +=# +L, success, history = Engine.realize_gram(gram, guess) completed_gram = L'*Engine.Q*L println("Completed Gram matrix:\n") display(completed_gram) -println("\nSteps: ", size(history.scaled_loss, 1)) +if success + println("\nTarget accuracy achieved!") +else + println("\nFailed to reach target accuracy") +end +println("Steps: ", size(history.scaled_loss, 1)) println("Loss: ", history.scaled_loss[end], "\n") \ No newline at end of file