en/latest/LBFGSStorage_8hpp_source.html

 #pragma once

 #include "DefineOutputMacros.hpp"
 #include "Utils.hpp"


 namespace LSLOpt {

 namespace Implementation {

 template<typename Scalar, typename OutputFunction>
 struct LBFGSStorage {

     LBFGSStorage(
         Eigen::Index n,
         Eigen::Index m,
         Scalar epsilon,
         OutputFunction& output_function);

     void reset();

     Vector<Scalar> calculate_Hv(
         const Vector<Scalar>& v,
         const Vector<Scalar>& STv,
         const Vector<Scalar>& YTv);

     Vector<Scalar> calculate_Hv(const Vector<Scalar>& v);

     Scalar calculate_vHv(const Vector<Scalar>& v);

     Vector<Scalar> calculate_Bv(
         const Vector<Scalar>& v,
         const Vector<Scalar>& STv,
         const Vector<Scalar>& YTv);

     Vector<Scalar> calculate_Bv(const Vector<Scalar>& v);

     Scalar calculate_vBv(const Vector<Scalar>& v);

     [[deprecated]] Matrix<Scalar> calculate_B();

     bool update(
         const Vector<Scalar>& s,
         const Vector<Scalar>& y,
         const Vector<Scalar>& g);

     void resize(Eigen::Index b);

     Eigen::Index n;
     Eigen::Index m;
     Eigen::Index b;

     Matrix<Scalar> W;
     Matrix<Scalar> M;

     Matrix<Scalar> S;
     Matrix<Scalar> Y;
     Matrix<Scalar> R;
     Matrix<Scalar> L;
     Matrix<Scalar> D;
     Matrix<Scalar> YTY;
     Matrix<Scalar> STS;

     Matrix<Scalar> LOW;
     Matrix<Scalar> UPP;

     Scalar gamma = Scalar{1};

     Scalar epsilon;

     OutputFunction& output_function;
 };

 template<typename Scalar, typename OutputFunction>
 LBFGSStorage<Scalar, OutputFunction>::LBFGSStorage(
     Eigen::Index n,
     Eigen::Index m,
     Scalar epsilon,
     OutputFunction& output_function)
 : n(n)
 , m(m)
 , b(0)
 , epsilon(epsilon)
 , output_function(output_function)
 {
   this->reset();
 }

 template<typename Scalar, typename OutputFunction>
 void LBFGSStorage<Scalar, OutputFunction>::reset()
 {
   b = 0;

   W = Matrix<Scalar>::Zero(n, 0);
   M = Matrix<Scalar>::Zero(n, 0);

   S = Matrix<Scalar>::Zero(n, 0);
   Y = Matrix<Scalar>::Zero(n, 0);
   R = Matrix<Scalar>::Zero(0, 0);
   L = Matrix<Scalar>::Zero(0, 0);
   D = Matrix<Scalar>::Zero(0, 0);
   YTY = Matrix<Scalar>::Zero(0, 0);
   STS = Matrix<Scalar>::Zero(0, 0);

   LOW = Matrix<Scalar>::Zero(0, 0);
   UPP = Matrix<Scalar>::Zero(0, 0);

   // initial scaling of H and B is 1.0
   gamma = Scalar{1};

 }

 template<typename Scalar, typename OutputFunction>
 Vector<Scalar> LBFGSStorage<Scalar, OutputFunction>::calculate_Hv(
     const Vector<Scalar>& v,
     const Vector<Scalar>& STv,
     const Vector<Scalar>& YTv)
 {
   // O(m^2)
   Vector<Scalar> RSTv = solve_triangular_system_and_check(
       R, STv, TriangleMatrixType::Upper, epsilon, output_function);

   // we need these static casts if we're not using `double`
   Vector<Scalar> RTYTv = solve_triangular_system_and_check(
       R.transpose(),
       YTv, TriangleMatrixType::Lower, epsilon, output_function);

   Vector<Scalar> tmp = solve_triangular_system_and_check(
       R.transpose(),
       (D + gamma * YTY) * RSTv,
       TriangleMatrixType::Lower, epsilon, output_function);

   // 2*n*m
   Vector<Scalar> Hv = gamma * v + S * (tmp - gamma * RTYTv) + gamma * Y * (-RSTv);

   return Hv;
 }

 template<typename Scalar, typename OutputFunction>
 Vector<Scalar> LBFGSStorage<Scalar, OutputFunction>::calculate_Hv(const Vector<Scalar>& v)
 {
   return calculate_Hv(v, S.transpose() * v, Y.transpose() * v);
 }

 template<typename Scalar, typename OutputFunction>
 Scalar LBFGSStorage<Scalar, OutputFunction>::calculate_vHv(const Vector<Scalar>& v)
 {
   return v.dot(calculate_Hv(v));
 }

 template<typename Scalar, typename OutputFunction>
 Vector<Scalar> LBFGSStorage<Scalar, OutputFunction>::calculate_Bv(
     const Vector<Scalar>& v,
     const Vector<Scalar>& STv,
     const Vector<Scalar>& YTv)
 {
   // O(m)
   Vector<Scalar> p (2*b);
   p << YTv.head(b), Scalar{1} / gamma * STv.head(b);

   Vector<Scalar> p_ = solve_triangular_system_and_check(
       LOW, p, TriangleMatrixType::Lower, epsilon, output_function);

   // O(m^2)
   p = solve_triangular_system_and_check(
       UPP, p_, TriangleMatrixType::Upper, epsilon, output_function);

   // 2*n*m
   Vector<Scalar> Bv = Scalar{1} / gamma * v - Y * p.head(b) - Scalar{1} / gamma * S * p.tail(b);

   return Bv;
 }

 template<typename Scalar, typename OutputFunction>
 Vector<Scalar> LBFGSStorage<Scalar, OutputFunction>::calculate_Bv(const Vector<Scalar>& v)
 {
   return calculate_Bv(v, S.transpose() * v, Y.transpose() * v);
 }

 template<typename Scalar, typename OutputFunction>
 Scalar LBFGSStorage<Scalar, OutputFunction>::calculate_vBv(const Vector<Scalar>& v)
 {
   return v.dot(calculate_Bv(v));
 }

 template<typename Scalar, typename OutputFunction>
 [[deprecated]] Matrix<Scalar> LBFGSStorage<Scalar, OutputFunction>::calculate_B()
 {
   return Scalar{1} / gamma * Matrix<Scalar>::Identity(n, n) - W * M * W.transpose();
 }

 template<typename Scalar, typename OutputFunction>
 bool LBFGSStorage<Scalar, OutputFunction>::update(
     const Vector<Scalar>& s,
     const Vector<Scalar>& y,
     const Vector<Scalar>& g)
 {
   // b is the actual size of the history
   b = std::min(m, b + 1);
   // if the history is not full (yet), increase the size
   // if the history is already full, move the content
   this->resize(b);

   // O(n)
   S.col(b-1) = s;
   Y.col(b-1) = y;

   // O(n*m)
   // set the b-1 column of R to the scalar product of
   // each column of S with the b-1 column of Y
   R.row(b-1).head(b-1).setZero();
   R.col(b-1).noalias() = S.transpose() * Y.col(b-1);

   // O(n*m)
   // set the b-1 row of L to the scalar product of
   // each column of Y with the b-1 column of S
   // (the diagonal is 0.0 !)
   L.row(b-1).head(b-1).noalias() = Y.transpose().topLeftCorner(b-1, n) * S.col(b-1);
   L.col(b-1).setZero();

   // O(n*m)
   YTY.col(b-1).noalias() = Y.transpose() * Y.col(b-1);
   YTY.row(b-1).head(b-1) = YTY.col(b-1).head(b-1).eval();

   // O(m)
   D.col(b-1).setZero();
   D.row(b-1).setZero();
   D(b - 1, b - 1) = R(b - 1, b - 1);

   // O(n*m)
   STS.col(b-1).noalias() = S.transpose() * S.col(b-1);
   STS.row(b-1).head(b-1) = STS.col(b-1).head(b-1).eval();

   // O(1)
   gamma = R(b - 1, b - 1) / YTY(b - 1, b - 1);

   // convert the matrix to an explicit diagonal matrix to make the inverse efficient
   DiagonalMatrix<Scalar> dD(D.diagonal());

   // O(m)
   DiagonalMatrix<Scalar> dDI = dD.inverse();

   // O(m)
   DiagonalMatrix<Scalar> dD_sq(b);
   dD_sq.diagonal().array() = dD.diagonal().array().sqrt();

   // O(m), inverting diagonal matrix should be more efficient than sqrt
   DiagonalMatrix<Scalar> dD_sqI = dD_sq.inverse();

   Matrix<Scalar> to_factorize = Scalar{1} / gamma * STS + L * dDI * L.transpose();
   // compute cholesky factorization, O(m^3)
   Eigen::LLT<Matrix<Scalar>> llt (to_factorize);
   Matrix<Scalar> J = llt.matrixL();
   Matrix<Scalar> JT = llt.matrixU();

   /*
    * This is a step that can potentially suffer from (and also uncover)
    * numerical instabilities and problems, like indefinite matrices
    * (i.e. the initial matrix could have very small negative or very small
    *  complex eigenvalues, that should not be there).
    */

   // check if cholesky factorization was successful
   Matrix<Scalar> JJT = J * JT;
   Scalar re{0};
   // if the absolute error is != 0.0, then one of these values is also != 0.0
   if (to_factorize.norm() != Scalar{0}) {
     re = (to_factorize - JJT).norm() / to_factorize.norm();
   }
   else if (JJT.norm() != Scalar{0}) {
     re = (to_factorize - JJT).norm() / JJT.norm();
   }

   if (re > epsilon) {
     this->reset();
     LSL_OUTPUT(output_function, OutputLevel::Warning,
         "Relative error of Cholesky decomposition is " << re
           << " and larger than " << epsilon);
     // reset and terminate with error if we have a problem
     return false;
   }
   else {
     LSL_OUTPUT(output_function, OutputLevel::Debug,
         "Relative error of Cholesky decomposition is " << re
           << " and smaller or equal than " << epsilon);
   }

   // o(m^2) (because diagonal)
   Matrix<Scalar> D_sqILT = dD_sqI * L.transpose();

   // o(m^2) (because diagonal)
   Matrix<Scalar> LD_sqI = L * dD_sqI;

   // replace this by map!

   // O(m^2)
   LOW = Matrix<Scalar>::Zero(2*b, 2*b);
   LOW.topLeftCorner(b, b).diagonal() = dD_sq.diagonal();
   LOW.bottomLeftCorner(b, b) = -LD_sqI;
   LOW.bottomRightCorner(b, b) = J;

   // O(m^2)
   UPP = Matrix<Scalar>::Zero(2*b, 2*b);
   UPP.topLeftCorner(b, b).diagonal() = -dD_sq.diagonal();
   UPP.topRightCorner(b, b) = D_sqILT;
   UPP.bottomRightCorner(b, b) = JT;

   // O(m^2)
   W = Matrix<Scalar>::Zero(n, 2 * b);
   W.topLeftCorner(n, b) = Y;
   W.topRightCorner(n, b) = 1.0 / gamma * S;

   // O(m^2)
   Matrix<Scalar> M_ = Matrix<Scalar>::Zero(2 * b, 2 * b);
   M_.topLeftCorner(b, b) = -D;
   M_.topRightCorner(b, b) = L.transpose();
   M_.bottomLeftCorner(b, b) = L;
   M_.bottomRightCorner(b, b) = 1.0 / gamma * STS;

   /*
    * This is a step that can potentially suffer from (and also uncover)
    * numerical instabilities and problems, like indefinite matrices
    * (i.e. the initial matrix could have very small negative or very small
    *  complex eigenvalues, that should not be there).
    */

   // O(m^3)
   M = Eigen::FullPivLU<Matrix<Scalar>>(M_).inverse();

   Matrix<Scalar> identity = Matrix<Scalar>::Identity(2*b, 2*b);
   Matrix<Scalar> should_be_identity = M * M_;

   re = (identity - should_be_identity).norm() / identity.norm();

   if (re > epsilon) {
     this->reset();
     LSL_OUTPUT(output_function, OutputLevel::Warning,
         "Relative error of matrix inversion is " << re
           << " and larger than " << epsilon);
     // reset and return false in case of such an error
     return false;
   }
   else {
     LSL_OUTPUT(output_function, OutputLevel::Debug,
         "Relative error of matrix inversion is " << re
           << " and smaller or equal than " << epsilon);
   }

   return true;
 }

 template<typename Scalar, typename OutputFunction>
 void LBFGSStorage<Scalar, OutputFunction>::resize(Eigen::Index b)
 {
   if (S.cols() < b) {
     this->b = b;
     // S is a n x b matrix
     S.conservativeResize(Eigen::NoChange, b);
     // Y is a n x b matrix
     Y.conservativeResize(Eigen::NoChange, b);
     // R is a b x b matrix (and upper triangle)
     R.conservativeResize(b, b);
     // L is a b x b matrix (and lower triangle)
     L.conservativeResize(b, b);
     // D is a b x b matrix
     D.conservativeResize(b, b);
     // YY is a b x b matrix
     YTY.conservativeResize(b, b);
     // SS is a b x b matrix
     STS.conservativeResize(b, b);
   }
   else {
     S.topLeftCorner(n, b-1) = S.topRightCorner(n, b-1);
     Y.topLeftCorner(n, b-1) = Y.topRightCorner(n, b-1);

     R.topLeftCorner(b-1, b-1) = R.bottomRightCorner(b-1, b-1);
     L.topLeftCorner(b-1, b-1) = L.bottomRightCorner(b-1, b-1);
     D.topLeftCorner(b-1, b-1) = D.bottomRightCorner(b-1, b-1);
     YTY.topLeftCorner(b-1, b-1) = YTY.bottomRightCorner(b-1, b-1);
     STS.topLeftCorner(b-1, b-1) = STS.bottomRightCorner(b-1, b-1);
   }
 }

 }

 }
LSLOpt::Implementation::LBFGSStorage::YTY
Matrix< Scalar > YTY
 matrix storing
Definition: LBFGSStorage.hpp:190

LSLOpt::OutputLevel::Debug
show status messages

LSLOpt::Implementation::LBFGSStorage::gamma
Scalar gamma
current scaling of the inverse Hessian
Definition: LBFGSStorage.hpp:200

LSLOpt::Implementation::LBFGSStorage::L
Matrix< Scalar > L
 helper matrix
Definition: LBFGSStorage.hpp:186

LSLOpt::Implementation::LBFGSStorage::output_function
OutputFunction & output_function
output function for status messages.
Definition: LBFGSStorage.hpp:206

LSLOpt::Implementation::LBFGSStorage::b
Eigen::Index b
Current number of stored update pairs.
Definition: LBFGSStorage.hpp:172

LSLOpt::Implementation::LBFGSStorage::S
Matrix< Scalar > S
 matrix storing the last  vectors
Definition: LBFGSStorage.hpp:180

LSLOpt::Implementation::LBFGSStorage::R
Matrix< Scalar > R
 helper matrix
Definition: LBFGSStorage.hpp:184

LSLOpt::Implementation::LBFGSStorage::UPP
Matrix< Scalar > UPP
 working matrix
Definition: LBFGSStorage.hpp:197

LSLOpt::Implementation::LBFGSStorage::LBFGSStorage
LBFGSStorage(Eigen::Index n, Eigen::Index m, Scalar epsilon, OutputFunction &output_function)
Construct a BFGS storage.
Definition: LBFGSStorage.hpp:210

LSLOpt::Implementation::LBFGSStorage::resize
void resize(Eigen::Index b)
Function that resizes the storage to b.
Definition: LBFGSStorage.hpp:488

LSLOpt::Implementation::LBFGSStorage::LOW
Matrix< Scalar > LOW
 working matrix
Definition: LBFGSStorage.hpp:195

LSLOpt::Implementation::LBFGSStorage::m
Eigen::Index m
Maximal number of update pairs to store.
Definition: LBFGSStorage.hpp:170

LSLOpt::DiagonalMatrix
Eigen::DiagonalMatrix< Scalar, Eigen::Dynamic, Eigen::Dynamic > DiagonalMatrix
Diagonal matrix type used.
Definition: Types.hpp:33

LSLOpt::Implementation::LBFGSStorage::n
Eigen::Index n
Dimensionality of the problem.
Definition: LBFGSStorage.hpp:168

LSLOpt::Implementation::LBFGSStorage::STS
Matrix< Scalar > STS
 matrix storing
Definition: LBFGSStorage.hpp:192

LSLOpt::Implementation::LBFGSStorage::calculate_B
Matrix< Scalar > calculate_B()
Calculate the Hessian matrix approximation .
Definition: LBFGSStorage.hpp:322

LSLOpt::Implementation::LBFGSStorage::update
bool update(const Vector< Scalar > &s, const Vector< Scalar > &y, const Vector< Scalar > &g)
Update the (inverse) Hessian approximation.
Definition: LBFGSStorage.hpp:328

LSLOpt::OutputLevel::Warning
show fatal errors

LSLOpt::Vector
Eigen::Matrix< Scalar, Eigen::Dynamic, 1 > Vector
Vector type used.
Definition: Types.hpp:15

LSLOpt::Matrix
Eigen::Matrix< Scalar, Eigen::Dynamic, Eigen::Dynamic > Matrix
Matrix type used.
Definition: Types.hpp:24

LSLOpt::Implementation::LBFGSStorage::W
Matrix< Scalar > W
 working matrix
Definition: LBFGSStorage.hpp:175

LSLOpt::Implementation::LBFGSStorage::calculate_vHv
Scalar calculate_vHv(const Vector< Scalar > &v)
Calculate normalized scalar product of vector  with inverse Hessian approximation ...
Definition: LBFGSStorage.hpp:281

LSLOpt::Implementation::LBFGSStorage::epsilon
Scalar epsilon
numerical stability check epsilon
Definition: LBFGSStorage.hpp:203

LSLOpt::Implementation::LBFGSStorage::Y
Matrix< Scalar > Y
 matrix storing the last  vectors
Definition: LBFGSStorage.hpp:182

LSLOpt
BFGS optimizations.
Definition: BFGS.hpp:24

LSLOpt::Implementation::LBFGSStorage::M
Matrix< Scalar > M
 working matrix
Definition: LBFGSStorage.hpp:177

LSLOpt::Implementation::LBFGSStorage
L-BFGS storage.
Definition: LBFGSStorage.hpp:32

LSLOpt::Implementation::LBFGSStorage::reset
void reset()
Reset the (inverse) Hessian approximation to identity matrix.
Definition: LBFGSStorage.hpp:225

LSLOpt::Implementation::LBFGSStorage::calculate_Bv
Vector< Scalar > calculate_Bv(const Vector< Scalar > &v, const Vector< Scalar > &STv, const Vector< Scalar > &YTv)
Calculate product of Hessian approximation  with vector .
Definition: LBFGSStorage.hpp:287

LSLOpt::Implementation::LBFGSStorage::calculate_Hv
Vector< Scalar > calculate_Hv(const Vector< Scalar > &v, const Vector< Scalar > &STv, const Vector< Scalar > &YTv)
Calculate product of inverse Hessian approximation  with vector .
Definition: LBFGSStorage.hpp:249

LSLOpt::Implementation::LBFGSStorage::calculate_vBv
Scalar calculate_vBv(const Vector< Scalar > &v)
Calculate normalized scalar product of vector  with Hessian approximation .
Definition: LBFGSStorage.hpp:316

LSLOpt::Implementation::LBFGSStorage::D
Matrix< Scalar > D
 helper matrix
Definition: LBFGSStorage.hpp:188