LSLOpt  1.0
LBFGSStorage.hpp
1 #pragma once
2 
3 #include "DefineOutputMacros.hpp"
4 #include "Utils.hpp"
5 
6 
7 namespace LSLOpt {
8 
9 namespace Implementation {
10 
31 template<typename Scalar, typename OutputFunction>
32 struct LBFGSStorage {
33 
42  Eigen::Index n,
43  Eigen::Index m,
44  Scalar epsilon,
45  OutputFunction& output_function);
46 
55  void reset();
56 
68  const Vector<Scalar>& v,
69  const Vector<Scalar>& STv,
70  const Vector<Scalar>& YTv);
71 
81 
90  Scalar calculate_vHv(const Vector<Scalar>& v);
91 
103  const Vector<Scalar>& v,
104  const Vector<Scalar>& STv,
105  const Vector<Scalar>& YTv);
106 
116 
127  Scalar calculate_vBv(const Vector<Scalar>& v);
128 
137  [[deprecated]] Matrix<Scalar> calculate_B();
138 
151  bool update(
152  const Vector<Scalar>& s,
153  const Vector<Scalar>& y,
154  const Vector<Scalar>& g);
155 
165  void resize(Eigen::Index b);
166 
168  Eigen::Index n;
170  Eigen::Index m;
172  Eigen::Index b;
173 
178 
193 
198 
200  Scalar gamma = Scalar{1};
201 
203  Scalar epsilon;
204 
206  OutputFunction& output_function;
207 };
208 
209 template<typename Scalar, typename OutputFunction>
211  Eigen::Index n,
212  Eigen::Index m,
213  Scalar epsilon,
214  OutputFunction& output_function)
215 : n(n)
216 , m(m)
217 , b(0)
218 , epsilon(epsilon)
219 , output_function(output_function)
220 {
221  this->reset();
222 }
223 
224 template<typename Scalar, typename OutputFunction>
226 {
227  b = 0;
228 
229  W = Matrix<Scalar>::Zero(n, 0);
230  M = Matrix<Scalar>::Zero(n, 0);
231 
232  S = Matrix<Scalar>::Zero(n, 0);
233  Y = Matrix<Scalar>::Zero(n, 0);
234  R = Matrix<Scalar>::Zero(0, 0);
235  L = Matrix<Scalar>::Zero(0, 0);
236  D = Matrix<Scalar>::Zero(0, 0);
237  YTY = Matrix<Scalar>::Zero(0, 0);
238  STS = Matrix<Scalar>::Zero(0, 0);
239 
240  LOW = Matrix<Scalar>::Zero(0, 0);
241  UPP = Matrix<Scalar>::Zero(0, 0);
242 
243  // initial scaling of H and B is 1.0
244  gamma = Scalar{1};
245 
246 }
247 
248 template<typename Scalar, typename OutputFunction>
250  const Vector<Scalar>& v,
251  const Vector<Scalar>& STv,
252  const Vector<Scalar>& YTv)
253 {
254  // O(m^2)
255  Vector<Scalar> RSTv = solve_triangular_system_and_check(
256  R, STv, TriangleMatrixType::Upper, epsilon, output_function);
257 
258  // we need these static casts if we're not using `double`
259  Vector<Scalar> RTYTv = solve_triangular_system_and_check(
260  R.transpose(),
261  YTv, TriangleMatrixType::Lower, epsilon, output_function);
262 
263  Vector<Scalar> tmp = solve_triangular_system_and_check(
264  R.transpose(),
265  (D + gamma * YTY) * RSTv,
266  TriangleMatrixType::Lower, epsilon, output_function);
267 
268  // 2*n*m
269  Vector<Scalar> Hv = gamma * v + S * (tmp - gamma * RTYTv) + gamma * Y * (-RSTv);
270 
271  return Hv;
272 }
273 
274 template<typename Scalar, typename OutputFunction>
276 {
277  return calculate_Hv(v, S.transpose() * v, Y.transpose() * v);
278 }
279 
280 template<typename Scalar, typename OutputFunction>
282 {
283  return v.dot(calculate_Hv(v));
284 }
285 
286 template<typename Scalar, typename OutputFunction>
288  const Vector<Scalar>& v,
289  const Vector<Scalar>& STv,
290  const Vector<Scalar>& YTv)
291 {
292  // O(m)
293  Vector<Scalar> p (2*b);
294  p << YTv.head(b), Scalar{1} / gamma * STv.head(b);
295 
296  Vector<Scalar> p_ = solve_triangular_system_and_check(
297  LOW, p, TriangleMatrixType::Lower, epsilon, output_function);
298 
299  // O(m^2)
300  p = solve_triangular_system_and_check(
301  UPP, p_, TriangleMatrixType::Upper, epsilon, output_function);
302 
303  // 2*n*m
304  Vector<Scalar> Bv = Scalar{1} / gamma * v - Y * p.head(b) - Scalar{1} / gamma * S * p.tail(b);
305 
306  return Bv;
307 }
308 
309 template<typename Scalar, typename OutputFunction>
311 {
312  return calculate_Bv(v, S.transpose() * v, Y.transpose() * v);
313 }
314 
315 template<typename Scalar, typename OutputFunction>
317 {
318  return v.dot(calculate_Bv(v));
319 }
320 
321 template<typename Scalar, typename OutputFunction>
323 {
324  return Scalar{1} / gamma * Matrix<Scalar>::Identity(n, n) - W * M * W.transpose();
325 }
326 
327 template<typename Scalar, typename OutputFunction>
329  const Vector<Scalar>& s,
330  const Vector<Scalar>& y,
331  const Vector<Scalar>& g)
332 {
333  // b is the actual size of the history
334  b = std::min(m, b + 1);
335  // if the history is not full (yet), increase the size
336  // if the history is already full, move the content
337  this->resize(b);
338 
339  // O(n)
340  S.col(b-1) = s;
341  Y.col(b-1) = y;
342 
343  // O(n*m)
344  // set the b-1 column of R to the scalar product of
345  // each column of S with the b-1 column of Y
346  R.row(b-1).head(b-1).setZero();
347  R.col(b-1).noalias() = S.transpose() * Y.col(b-1);
348 
349  // O(n*m)
350  // set the b-1 row of L to the scalar product of
351  // each column of Y with the b-1 column of S
352  // (the diagonal is 0.0 !)
353  L.row(b-1).head(b-1).noalias() = Y.transpose().topLeftCorner(b-1, n) * S.col(b-1);
354  L.col(b-1).setZero();
355 
356  // O(n*m)
357  YTY.col(b-1).noalias() = Y.transpose() * Y.col(b-1);
358  YTY.row(b-1).head(b-1) = YTY.col(b-1).head(b-1).eval();
359 
360  // O(m)
361  D.col(b-1).setZero();
362  D.row(b-1).setZero();
363  D(b - 1, b - 1) = R(b - 1, b - 1);
364 
365  // O(n*m)
366  STS.col(b-1).noalias() = S.transpose() * S.col(b-1);
367  STS.row(b-1).head(b-1) = STS.col(b-1).head(b-1).eval();
368 
369  // O(1)
370  gamma = R(b - 1, b - 1) / YTY(b - 1, b - 1);
371 
372  // convert the matrix to an explicit diagonal matrix to make the inverse efficient
373  DiagonalMatrix<Scalar> dD(D.diagonal());
374 
375  // O(m)
376  DiagonalMatrix<Scalar> dDI = dD.inverse();
377 
378  // O(m)
379  DiagonalMatrix<Scalar> dD_sq(b);
380  dD_sq.diagonal().array() = dD.diagonal().array().sqrt();
381 
382  // O(m), inverting diagonal matrix should be more efficient than sqrt
383  DiagonalMatrix<Scalar> dD_sqI = dD_sq.inverse();
384 
385  Matrix<Scalar> to_factorize = Scalar{1} / gamma * STS + L * dDI * L.transpose();
386  // compute cholesky factorization, O(m^3)
387  Eigen::LLT<Matrix<Scalar>> llt (to_factorize);
388  Matrix<Scalar> J = llt.matrixL();
389  Matrix<Scalar> JT = llt.matrixU();
390 
391  /*
392  * This is a step that can potentially suffer from (and also uncover)
393  * numerical instabilities and problems, like indefinite matrices
394  * (i.e. the initial matrix could have very small negative or very small
395  * complex eigenvalues, that should not be there).
396  */
397 
398  // check if cholesky factorization was successful
399  Matrix<Scalar> JJT = J * JT;
400  Scalar re{0};
401  // if the absolute error is != 0.0, then one of these values is also != 0.0
402  if (to_factorize.norm() != Scalar{0}) {
403  re = (to_factorize - JJT).norm() / to_factorize.norm();
404  }
405  else if (JJT.norm() != Scalar{0}) {
406  re = (to_factorize - JJT).norm() / JJT.norm();
407  }
408 
409  if (re > epsilon) {
410  this->reset();
412  "Relative error of Cholesky decomposition is " << re
413  << " and larger than " << epsilon);
414  // reset and terminate with error if we have a problem
415  return false;
416  }
417  else {
419  "Relative error of Cholesky decomposition is " << re
420  << " and smaller or equal than " << epsilon);
421  }
422 
423  // o(m^2) (because diagonal)
424  Matrix<Scalar> D_sqILT = dD_sqI * L.transpose();
425 
426  // o(m^2) (because diagonal)
427  Matrix<Scalar> LD_sqI = L * dD_sqI;
428 
429  // replace this by map!
430 
431  // O(m^2)
432  LOW = Matrix<Scalar>::Zero(2*b, 2*b);
433  LOW.topLeftCorner(b, b).diagonal() = dD_sq.diagonal();
434  LOW.bottomLeftCorner(b, b) = -LD_sqI;
435  LOW.bottomRightCorner(b, b) = J;
436 
437  // O(m^2)
438  UPP = Matrix<Scalar>::Zero(2*b, 2*b);
439  UPP.topLeftCorner(b, b).diagonal() = -dD_sq.diagonal();
440  UPP.topRightCorner(b, b) = D_sqILT;
441  UPP.bottomRightCorner(b, b) = JT;
442 
443  // O(m^2)
444  W = Matrix<Scalar>::Zero(n, 2 * b);
445  W.topLeftCorner(n, b) = Y;
446  W.topRightCorner(n, b) = 1.0 / gamma * S;
447 
448  // O(m^2)
449  Matrix<Scalar> M_ = Matrix<Scalar>::Zero(2 * b, 2 * b);
450  M_.topLeftCorner(b, b) = -D;
451  M_.topRightCorner(b, b) = L.transpose();
452  M_.bottomLeftCorner(b, b) = L;
453  M_.bottomRightCorner(b, b) = 1.0 / gamma * STS;
454 
455  /*
456  * This is a step that can potentially suffer from (and also uncover)
457  * numerical instabilities and problems, like indefinite matrices
458  * (i.e. the initial matrix could have very small negative or very small
459  * complex eigenvalues, that should not be there).
460  */
461 
462  // O(m^3)
463  M = Eigen::FullPivLU<Matrix<Scalar>>(M_).inverse();
464 
465  Matrix<Scalar> identity = Matrix<Scalar>::Identity(2*b, 2*b);
466  Matrix<Scalar> should_be_identity = M * M_;
467 
468  re = (identity - should_be_identity).norm() / identity.norm();
469 
470  if (re > epsilon) {
471  this->reset();
473  "Relative error of matrix inversion is " << re
474  << " and larger than " << epsilon);
475  // reset and return false in case of such an error
476  return false;
477  }
478  else {
480  "Relative error of matrix inversion is " << re
481  << " and smaller or equal than " << epsilon);
482  }
483 
484  return true;
485 }
486 
487 template<typename Scalar, typename OutputFunction>
489 {
490  if (S.cols() < b) {
491  this->b = b;
492  // S is a n x b matrix
493  S.conservativeResize(Eigen::NoChange, b);
494  // Y is a n x b matrix
495  Y.conservativeResize(Eigen::NoChange, b);
496  // R is a b x b matrix (and upper triangle)
497  R.conservativeResize(b, b);
498  // L is a b x b matrix (and lower triangle)
499  L.conservativeResize(b, b);
500  // D is a b x b matrix
501  D.conservativeResize(b, b);
502  // YY is a b x b matrix
503  YTY.conservativeResize(b, b);
504  // SS is a b x b matrix
505  STS.conservativeResize(b, b);
506  }
507  else {
508  S.topLeftCorner(n, b-1) = S.topRightCorner(n, b-1);
509  Y.topLeftCorner(n, b-1) = Y.topRightCorner(n, b-1);
510 
511  R.topLeftCorner(b-1, b-1) = R.bottomRightCorner(b-1, b-1);
512  L.topLeftCorner(b-1, b-1) = L.bottomRightCorner(b-1, b-1);
513  D.topLeftCorner(b-1, b-1) = D.bottomRightCorner(b-1, b-1);
514  YTY.topLeftCorner(b-1, b-1) = YTY.bottomRightCorner(b-1, b-1);
515  STS.topLeftCorner(b-1, b-1) = STS.bottomRightCorner(b-1, b-1);
516  }
517 }
518 
519 }
520 
521 }
Matrix< Scalar > YTY
matrix storing
Definition: LBFGSStorage.hpp:190
show status messages
Scalar gamma
current scaling of the inverse Hessian
Definition: LBFGSStorage.hpp:200
Matrix< Scalar > L
helper matrix
Definition: LBFGSStorage.hpp:186
OutputFunction & output_function
output function for status messages.
Definition: LBFGSStorage.hpp:206
Eigen::Index b
Current number of stored update pairs.
Definition: LBFGSStorage.hpp:172
Matrix< Scalar > S
matrix storing the last vectors
Definition: LBFGSStorage.hpp:180
Matrix< Scalar > R
helper matrix
Definition: LBFGSStorage.hpp:184
Matrix< Scalar > UPP
working matrix
Definition: LBFGSStorage.hpp:197
LBFGSStorage(Eigen::Index n, Eigen::Index m, Scalar epsilon, OutputFunction &output_function)
Construct a BFGS storage.
Definition: LBFGSStorage.hpp:210
void resize(Eigen::Index b)
Function that resizes the storage to b.
Definition: LBFGSStorage.hpp:488
Matrix< Scalar > LOW
working matrix
Definition: LBFGSStorage.hpp:195
Eigen::Index m
Maximal number of update pairs to store.
Definition: LBFGSStorage.hpp:170
Eigen::DiagonalMatrix< Scalar, Eigen::Dynamic, Eigen::Dynamic > DiagonalMatrix
Diagonal matrix type used.
Definition: Types.hpp:33
Eigen::Index n
Dimensionality of the problem.
Definition: LBFGSStorage.hpp:168
Matrix< Scalar > STS
matrix storing
Definition: LBFGSStorage.hpp:192
Matrix< Scalar > calculate_B()
Calculate the Hessian matrix approximation .
Definition: LBFGSStorage.hpp:322
bool update(const Vector< Scalar > &s, const Vector< Scalar > &y, const Vector< Scalar > &g)
Update the (inverse) Hessian approximation.
Definition: LBFGSStorage.hpp:328
Eigen::Matrix< Scalar, Eigen::Dynamic, 1 > Vector
Vector type used.
Definition: Types.hpp:15
Eigen::Matrix< Scalar, Eigen::Dynamic, Eigen::Dynamic > Matrix
Matrix type used.
Definition: Types.hpp:24
Matrix< Scalar > W
working matrix
Definition: LBFGSStorage.hpp:175
Scalar calculate_vHv(const Vector< Scalar > &v)
Calculate normalized scalar product of vector with inverse Hessian approximation ...
Definition: LBFGSStorage.hpp:281
Scalar epsilon
numerical stability check epsilon
Definition: LBFGSStorage.hpp:203
Matrix< Scalar > Y
matrix storing the last vectors
Definition: LBFGSStorage.hpp:182
BFGS optimizations.
Definition: BFGS.hpp:24
Matrix< Scalar > M
working matrix
Definition: LBFGSStorage.hpp:177
L-BFGS storage.
Definition: LBFGSStorage.hpp:32
void reset()
Reset the (inverse) Hessian approximation to identity matrix.
Definition: LBFGSStorage.hpp:225
Vector< Scalar > calculate_Bv(const Vector< Scalar > &v, const Vector< Scalar > &STv, const Vector< Scalar > &YTv)
Calculate product of Hessian approximation with vector .
Definition: LBFGSStorage.hpp:287
Vector< Scalar > calculate_Hv(const Vector< Scalar > &v, const Vector< Scalar > &STv, const Vector< Scalar > &YTv)
Calculate product of inverse Hessian approximation with vector .
Definition: LBFGSStorage.hpp:249
Scalar calculate_vBv(const Vector< Scalar > &v)
Calculate normalized scalar product of vector with Hessian approximation .
Definition: LBFGSStorage.hpp:316
Matrix< Scalar > D
helper matrix
Definition: LBFGSStorage.hpp:188