Compare commits

..

6 Commits

Author SHA1 Message Date
Aaron Fenyes
3910b9f740 Use Newton's method for polishing 2024-07-11 13:43:52 -07:00
Aaron Fenyes
d538cbf716 Correct improvement threshold by using unit step
Our formula for the improvement theshold works when the step size is
an absolute distance. However, in commit `4d5ea06`, the step size was
measured relative to the current gradient instead. This commit scales
the base step to unit length, so now the step size really is an absolute
distance.
2024-07-10 23:31:44 -07:00
Aaron Fenyes
4d5ea062a3 Record gradient and last line search in history 2024-07-09 15:00:13 -07:00
Aaron Fenyes
5652719642 Require triangle sides to be planar 2024-07-09 14:10:23 -07:00
Aaron Fenyes
f84d475580 Visualize neighborhoods of global minima 2024-07-09 14:01:30 -07:00
Aaron Fenyes
77bc124170 Change loss function to match gradient 2024-07-09 14:00:24 -07:00
5 changed files with 241 additions and 30 deletions

View File

@ -51,11 +51,21 @@ end
# the Lorentz form
Q = diagm([1, 1, 1, 1, -1])
# project a matrix onto the subspace of matrices whose entries vanish at the
# given indices
function proj_to_entries(mat, indices)
result = zeros(size(mat))
for (j, k) in indices
result[j, k] = mat[j, k]
end
result
end
# the difference between the matrices `target` and `attempt`, projected onto the
# subspace of matrices whose entries vanish at each empty index of `target`
function proj_diff(target::SparseMatrixCSC{T, <:Any}, attempt::Matrix{T}) where T
J, K, values = findnz(target)
result = zeros(size(target)...)
result = zeros(size(target))
for (j, k, val) in zip(J, K, values)
result[j, k] = val - attempt[j, k]
end
@ -65,23 +75,29 @@ end
# a type for keeping track of gradient descent history
struct DescentHistory{T}
scaled_loss::Array{T}
neg_grad::Array{Matrix{T}}
slope::Array{T}
stepsize::Array{T}
backoff_steps::Array{Int64}
last_line_L::Array{Matrix{T}}
last_line_loss::Array{T}
function DescentHistory{T}(
scaled_loss = Array{T}(undef, 0),
neg_grad = Array{Matrix{T}}(undef, 0),
slope = Array{T}(undef, 0),
stepsize = Array{T}(undef, 0),
backoff_steps = Int64[]
backoff_steps = Int64[],
last_line_L = Array{Matrix{T}}(undef, 0),
last_line_loss = Array{T}(undef, 0)
) where T
new(scaled_loss, slope, stepsize, backoff_steps)
new(scaled_loss, neg_grad, slope, stepsize, backoff_steps, last_line_L, last_line_loss)
end
end
# seek a matrix `L` for which `L'QL` matches the sparse matrix `gram` at every
# explicit entry of `gram`. use gradient descent starting from `guess`
function realize_gram(
function realize_gram_gradient(
gram::SparseMatrixCSC{T, <:Any},
guess::Matrix{T};
scaled_tol = 1e-30,
@ -104,8 +120,8 @@ function realize_gram(
# do gradient descent
Δ_proj = proj_diff(gram, L'*Q*L)
loss = norm(Δ_proj)
for step in 1:max_descent_steps
loss = dot(Δ_proj, Δ_proj)
for _ in 1:max_descent_steps
# stop if the loss is tolerably low
if loss < tol
break
@ -114,28 +130,39 @@ function realize_gram(
# find negative gradient of loss function
neg_grad = 4*Q*L*Δ_proj
slope = norm(neg_grad)
dir = neg_grad / slope
# store current position, loss, and slope
L_last = L
loss_last = loss
push!(history.scaled_loss, loss / scale_adjustment)
push!(history.neg_grad, neg_grad)
push!(history.slope, slope)
# find a good step size using backtracking line search
push!(history.stepsize, 0)
push!(history.backoff_steps, max_backoff_steps)
empty!(history.last_line_L)
empty!(history.last_line_loss)
for backoff_steps in 0:max_backoff_steps
history.stepsize[end] = stepsize
L = L_last + stepsize * neg_grad
L = L_last + stepsize * dir
Δ_proj = proj_diff(gram, L'*Q*L)
loss = norm(Δ_proj)
loss = dot(Δ_proj, Δ_proj)
improvement = loss_last - loss
push!(history.last_line_L, L)
push!(history.last_line_loss, loss / scale_adjustment)
if improvement >= target_improvement * stepsize * slope
history.backoff_steps[end] = backoff_steps
break
end
stepsize *= backoff
end
# [DEBUG] if we've hit a wall, quit
if history.backoff_steps[end] == max_backoff_steps
break
end
end
# return the factorization and its history
@ -143,4 +170,73 @@ function realize_gram(
L, history
end
function basis_matrix(::Type{T}, j, k, dims) where T
result = zeros(T, dims)
result[j, k] = one(T)
result
end
# seek a matrix `L` for which `L'QL` matches the sparse matrix `gram` at every
# explicit entry of `gram`. use Newton's method starting from `guess`
function realize_gram_newton(
gram::SparseMatrixCSC{T, <:Any},
guess::Matrix{T};
scaled_tol = 1e-30,
rate = 1,
max_steps = 100
) where T <: Number
# start history
history = DescentHistory{T}()
# find the dimension of the search space
dims = size(guess)
element_dim, construction_dim = dims
total_dim = element_dim * construction_dim
# list the constrained entries of the gram matrix
J, K, _ = findnz(gram)
constrained = zip(J, K)
# scale the tolerance
scale_adjustment = sqrt(T(length(constrained)))
tol = scale_adjustment * scaled_tol
# use newton's method
L = copy(guess)
for step in 0:max_steps
# evaluate the loss function
Δ_proj = proj_diff(gram, L'*Q*L)
loss = dot(Δ_proj, Δ_proj)
# store the current loss
push!(history.scaled_loss, loss / scale_adjustment)
# stop if the loss is tolerably low
if loss < tol || step > max_steps
break
end
# find the negative gradient of loss function
neg_grad = 4*Q*L*Δ_proj
# find the negative Hessian of the loss function
hess = Matrix{T}(undef, total_dim, total_dim)
indices = [(j, k) for k in 1:construction_dim for j in 1:element_dim]
for (j, k) in indices
basis_mat = basis_matrix(T, j, k, dims)
neg_dΔ = basis_mat'*Q*L + L'*Q*basis_mat
neg_dΔ_proj = proj_to_entries(neg_dΔ, constrained)
deriv_grad = 4*Q*(-basis_mat*Δ_proj + L*neg_dΔ_proj)
hess[:, (k-1)*element_dim + j] = reshape(deriv_grad, total_dim)
end
# compute the newton step
step = hess \ reshape(neg_grad, total_dim)
L += rate * reshape(step, dims)
end
# return the factorization and its history
L, history
end
end

View File

@ -0,0 +1,99 @@
include("Engine.jl")
using LinearAlgebra
using SparseArrays
function sphere_in_tetrahedron_shape()
# initialize the partial gram matrix for a sphere inscribed in a regular
# tetrahedron
J = Int64[]
K = Int64[]
values = BigFloat[]
for j in 1:5
for k in 1:5
push!(J, j)
push!(K, k)
if j == k
push!(values, 1)
elseif (j <= 4 && k <= 4)
push!(values, -1/BigFloat(3))
else
push!(values, -1)
end
end
end
gram = sparse(J, K, values)
# plot loss along a slice
loss_lin = []
loss_sq = []
mesh = range(0.9, 1.1, 101)
for t in mesh
L = hcat(
Engine.plane(normalize(BigFloat[ 1, 1, 1]), BigFloat(1)),
Engine.plane(normalize(BigFloat[ 1, -1, -1]), BigFloat(1)),
Engine.plane(normalize(BigFloat[-1, 1, -1]), BigFloat(1)),
Engine.plane(normalize(BigFloat[-1, -1, 1]), BigFloat(1)),
Engine.sphere(BigFloat[0, 0, 0], BigFloat(t))
)
Δ_proj = Engine.proj_diff(gram, L'*Engine.Q*L)
push!(loss_lin, norm(Δ_proj))
push!(loss_sq, dot(Δ_proj, Δ_proj))
end
mesh, loss_lin, loss_sq
end
function circles_in_triangle_shape()
# initialize the partial gram matrix for a sphere inscribed in a regular
# tetrahedron
J = Int64[]
K = Int64[]
values = BigFloat[]
for j in 1:8
for k in 1:8
filled = false
if j == k
push!(values, 1)
filled = true
elseif (j == 1 || k == 1)
push!(values, 0)
filled = true
elseif (j == 2 || k == 2)
push!(values, -1)
filled = true
end
#=elseif (j <= 5 && j != 2 && k == 9 || k == 9 && k <= 5 && k != 2)
push!(values, 0)
filled = true
end=#
if filled
push!(J, j)
push!(K, k)
end
end
end
append!(J, [6, 4, 6, 5, 7, 5, 7, 3, 8, 3, 8, 4])
append!(K, [4, 6, 5, 6, 5, 7, 3, 7, 3, 8, 4, 8])
append!(values, fill(-1, 12))
# plot loss along a slice
loss_lin = []
loss_sq = []
mesh = range(0.99, 1.01, 101)
for t in mesh
L = hcat(
Engine.plane(BigFloat[0, 0, 1], BigFloat(0)),
Engine.sphere(BigFloat[0, 0, 0], BigFloat(t)),
Engine.plane(BigFloat[1, 0, 0], BigFloat(1)),
Engine.plane(BigFloat[cos(2pi/3), sin(2pi/3), 0], BigFloat(1)),
Engine.plane(BigFloat[cos(-2pi/3), sin(-2pi/3), 0], BigFloat(1)),
Engine.sphere(4//3*BigFloat[-1, 0, 0], BigFloat(1//3)),
Engine.sphere(4//3*BigFloat[cos(-pi/3), sin(-pi/3), 0], BigFloat(1//3)),
Engine.sphere(4//3*BigFloat[cos(pi/3), sin(pi/3), 0], BigFloat(1//3))
)
Δ_proj = Engine.proj_diff(gram, L'*Engine.Q*L)
push!(loss_lin, norm(Δ_proj))
push!(loss_sq, dot(Δ_proj, Δ_proj))
end
mesh, loss_lin, loss_sq
end

View File

@ -1,20 +1,28 @@
include("Engine.jl")
using SparseArrays
using AbstractAlgebra
using PolynomialRoots
# initialize the partial gram matrix for a sphere inscribed in a regular
# tetrahedron
J = Int64[]
K = Int64[]
values = BigFloat[]
for j in 1:8
for k in 1:8
for j in 1:9
for k in 1:9
filled = false
if j == k
push!(values, 1)
push!(values, j < 9 ? 1 : 0)
filled = true
elseif (j == 9)
if (k <= 5 && k != 2)
push!(values, 0)
filled = true
end
elseif (k == 9)
if (j <= 5 && j != 2)
push!(values, 0)
filled = true
end
elseif (j == 1 || k == 1)
push!(values, 0)
filled = true
@ -47,7 +55,6 @@ gram = sparse(J, K, values)
## guess = Engine.rand_on_shell(fill(BigFloat(-1), 8))
# set initial guess
#=
guess = hcat(
Engine.plane(BigFloat[0, 0, 1], BigFloat(0)),
Engine.sphere(BigFloat[0, 0, 0], BigFloat(1//2)),
@ -56,9 +63,10 @@ guess = hcat(
Engine.plane(BigFloat[cos(-2pi/3), sin(-2pi/3), 0], BigFloat(1)),
Engine.sphere(BigFloat[-1, 0, 0], BigFloat(1//5)),
Engine.sphere(BigFloat[cos(-pi/3), sin(-pi/3), 0], BigFloat(1//5)),
Engine.sphere(BigFloat[cos(pi/3), sin(pi/3), 0], BigFloat(1//5))
Engine.sphere(BigFloat[cos(pi/3), sin(pi/3), 0], BigFloat(1//5)),
BigFloat[0, 0, 0, 1, 1]
)
=#
#=
guess = hcat(
Engine.plane(BigFloat[0, 0, 1], BigFloat(0)),
Engine.sphere(BigFloat[0, 0, 0], BigFloat(0.9)),
@ -67,13 +75,22 @@ guess = hcat(
Engine.plane(BigFloat[cos(-2pi/3), sin(-2pi/3), 0], BigFloat(1)),
Engine.sphere(4//3*BigFloat[-1, 0, 0], BigFloat(1//3)),
Engine.sphere(4//3*BigFloat[cos(-pi/3), sin(-pi/3), 0], BigFloat(1//3)),
Engine.sphere(4//3*BigFloat[cos(pi/3), sin(pi/3), 0], BigFloat(1//3))
Engine.sphere(4//3*BigFloat[cos(pi/3), sin(pi/3), 0], BigFloat(1//3)),
BigFloat[0, 0, 0, 1, 1]
)
=#
# complete the gram matrix using gradient descent
L, history = Engine.realize_gram(gram, guess, max_descent_steps = 200)
# complete the gram matrix using gradient descent followed by Newton's method
L, history = Engine.realize_gram_gradient(gram, guess, scaled_tol = 0.01)
L_pol, history_pol = Engine.realize_gram_newton(gram, L, rate = 0.3, scaled_tol = 1e-9)
L_pol2, history_pol2 = Engine.realize_gram_newton(gram, L_pol)
completed_gram = L'*Engine.Q*L
println("Completed Gram matrix:\n")
display(completed_gram)
println("\nSteps: ", size(history.stepsize, 1))
println("Loss: ", history.scaled_loss[end], "\n")
println(
"\nSteps: ",
size(history.scaled_loss, 1),
" + ", size(history_pol.scaled_loss, 1),
" + ", size(history_pol2.scaled_loss, 1)
)
println("Loss: ", history_pol2.scaled_loss[end], "\n")

View File

@ -47,13 +47,14 @@ guess = hcat(
Engine.rand_on_shell(fill(BigFloat(-1), 2))
)
# complete the gram matrix using gradient descent
L, history = Engine.realize_gram(gram, guess)
# complete the gram matrix using gradient descent followed by Newton's method
L, history = Engine.realize_gram_gradient(gram, guess, scaled_tol = 0.01)
L_pol, history_pol = Engine.realize_gram_newton(gram, L)
completed_gram = L'*Engine.Q*L
println("Completed Gram matrix:\n")
display(completed_gram)
println("\nSteps: ", size(history.stepsize, 1))
println("Loss: ", history.scaled_loss[end], "\n")
println("\nSteps: ", size(history.scaled_loss, 1), " + ", size(history_pol.scaled_loss, 1))
println("Loss: ", history_pol.scaled_loss[end], "\n")
# === algebraic check ===

View File

@ -1,8 +1,6 @@
include("Engine.jl")
using SparseArrays
using AbstractAlgebra
using PolynomialRoots
using Random
# initialize the partial gram matrix for a sphere inscribed in a regular
@ -35,10 +33,10 @@ guess = sqrt(1/BigFloat(3)) * BigFloat[
1 1 1 1 1
] + 0.2*Engine.rand_on_shell(fill(BigFloat(-1), 5))
# complete the gram matrix using gradient descent
L, history = Engine.realize_gram(gram, guess)
# complete the gram matrix using Newton's method
L, history = Engine.realize_gram_newton(gram, guess)
completed_gram = L'*Engine.Q*L
println("Completed Gram matrix:\n")
display(completed_gram)
println("\nSteps: ", size(history.stepsize, 1))
println("\nSteps: ", size(history.scaled_loss, 1))
println("Loss: ", history.scaled_loss[end], "\n")