Correct improvement threshold by using unit step
Our formula for the improvement theshold works when the step size is an absolute distance. However, in commit `4d5ea06`, the step size was measured relative to the current gradient instead. This commit scales the base step to unit length, so now the step size really is an absolute distance.
This commit is contained in:
parent
4d5ea062a3
commit
d538cbf716
@ -120,6 +120,7 @@ function realize_gram(
|
|||||||
# find negative gradient of loss function
|
# find negative gradient of loss function
|
||||||
neg_grad = 4*Q*L*Δ_proj
|
neg_grad = 4*Q*L*Δ_proj
|
||||||
slope = norm(neg_grad)
|
slope = norm(neg_grad)
|
||||||
|
dir = neg_grad / slope
|
||||||
|
|
||||||
# store current position, loss, and slope
|
# store current position, loss, and slope
|
||||||
L_last = L
|
L_last = L
|
||||||
@ -135,7 +136,7 @@ function realize_gram(
|
|||||||
empty!(history.last_line_loss)
|
empty!(history.last_line_loss)
|
||||||
for backoff_steps in 0:max_backoff_steps
|
for backoff_steps in 0:max_backoff_steps
|
||||||
history.stepsize[end] = stepsize
|
history.stepsize[end] = stepsize
|
||||||
L = L_last + stepsize * neg_grad
|
L = L_last + stepsize * dir
|
||||||
Δ_proj = proj_diff(gram, L'*Q*L)
|
Δ_proj = proj_diff(gram, L'*Q*L)
|
||||||
loss = dot(Δ_proj, Δ_proj)
|
loss = dot(Δ_proj, Δ_proj)
|
||||||
improvement = loss_last - loss
|
improvement = loss_last - loss
|
||||||
|
Loading…
Reference in New Issue
Block a user