update files
This commit is contained in:
838
NeuralTraj/src/plan_manage/include/arcPlan/lbfgs.hpp
Executable file
838
NeuralTraj/src/plan_manage/include/arcPlan/lbfgs.hpp
Executable file
@@ -0,0 +1,838 @@
|
||||
#ifndef LBFGS_HPP
|
||||
#define LBFGS_HPP
|
||||
|
||||
#include <Eigen/Eigen>
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
|
||||
namespace lbfgs
|
||||
{
|
||||
// ----------------------- Data Type Part -----------------------
|
||||
|
||||
/**
|
||||
* L-BFGS optimization parameters.
|
||||
*/
|
||||
struct lbfgs_parameter_t
|
||||
{
|
||||
/**
|
||||
* The number of corrections to approximate the inverse hessian matrix.
|
||||
* The L-BFGS routine stores the computation results of previous m
|
||||
* iterations to approximate the inverse hessian matrix of the current
|
||||
* iteration. This parameter controls the size of the limited memories
|
||||
* (corrections). The default value is 8. Values less than 3 are
|
||||
* not recommended. Large values will result in excessive computing time.
|
||||
*/
|
||||
int mem_size = 8;
|
||||
|
||||
/**
|
||||
* Epsilon for grad convergence test. DO NOT USE IT in nonsmooth cases!
|
||||
* Set it to 0.0 and use past-delta-based test for nonsmooth functions.
|
||||
* This parameter determines the accuracy with which the solution is to
|
||||
* be found. A minimization terminates when
|
||||
* ||g(x)||_inf / max(1, ||x||_inf) < g_epsilon,
|
||||
* where ||.||_inf is the infinity norm. The default value is 1.0e-5.
|
||||
* This should be greater than 1.0e-6 in practice because L-BFGS does
|
||||
* not directly reduce first-order residual. It still needs the function
|
||||
* value which can be corrupted by machine_prec when ||g|| is small.
|
||||
*/
|
||||
double g_epsilon = 1.0e-5;
|
||||
|
||||
/**
|
||||
* Distance for delta-based convergence test.
|
||||
* This parameter determines the distance, in iterations, to compute
|
||||
* the rate of decrease of the cost function. If the value of this
|
||||
* parameter is zero, the library does not perform the delta-based
|
||||
* convergence test. The default value is 3.
|
||||
*/
|
||||
int past = 3;
|
||||
|
||||
/**
|
||||
* Delta for convergence test.
|
||||
* This parameter determines the minimum rate of decrease of the
|
||||
* cost function. The library stops iterations when the following
|
||||
* condition is met:
|
||||
* |f' - f| / max(1, |f|) < delta,
|
||||
* where f' is the cost value of past iterations ago, and f is
|
||||
* the cost value of the current iteration.
|
||||
* The default value is 1.0e-6.
|
||||
*/
|
||||
double delta = 1.0e-6;
|
||||
|
||||
/**
|
||||
* The maximum number of iterations.
|
||||
* The lbfgs_optimize() function terminates an minimization process with
|
||||
* ::LBFGSERR_MAXIMUMITERATION status code when the iteration count
|
||||
* exceedes this parameter. Setting this parameter to zero continues an
|
||||
* minimization process until a convergence or error. The default value
|
||||
* is 0.
|
||||
*/
|
||||
int max_iterations = 0;
|
||||
|
||||
/**
|
||||
* The maximum number of trials for the line search.
|
||||
* This parameter controls the number of function and gradients evaluations
|
||||
* per iteration for the line search routine. The default value is 64.
|
||||
*/
|
||||
int max_linesearch = 64;
|
||||
|
||||
/**
|
||||
* The minimum step of the line search routine.
|
||||
* The default value is 1.0e-20. This value need not be modified unless
|
||||
* the exponents are too large for the machine being used, or unless the
|
||||
* problem is extremely badly scaled (in which case the exponents should
|
||||
* be increased).
|
||||
*/
|
||||
double min_step = 1.0e-20;
|
||||
|
||||
/**
|
||||
* The maximum step of the line search.
|
||||
* The default value is 1.0e+20. This value need not be modified unless
|
||||
* the exponents are too large for the machine being used, or unless the
|
||||
* problem is extremely badly scaled (in which case the exponents should
|
||||
* be increased).
|
||||
*/
|
||||
double max_step = 1.0e+20;
|
||||
|
||||
/**
|
||||
* A parameter to control the accuracy of the line search routine.
|
||||
* The default value is 1.0e-4. This parameter should be greater
|
||||
* than zero and smaller than 1.0.
|
||||
*/
|
||||
double f_dec_coeff = 1.0e-4;
|
||||
|
||||
/**
|
||||
* A parameter to control the accuracy of the line search routine.
|
||||
* The default value is 0.9. If the function and gradient
|
||||
* evaluations are inexpensive with respect to the cost of the
|
||||
* iteration (which is sometimes the case when solving very large
|
||||
* problems) it may be advantageous to set this parameter to a small
|
||||
* value. A typical small value is 0.1. This parameter should be
|
||||
* greater than the f_dec_coeff parameter and smaller than 1.0.
|
||||
*/
|
||||
double s_curv_coeff = 0.9;
|
||||
|
||||
/**
|
||||
* A parameter to ensure the global convergence for nonconvex functions.
|
||||
* The default value is 1.0e-6. The parameter performs the so called
|
||||
* cautious update for L-BFGS, especially when the convergence is
|
||||
* not sufficient. The parameter must be positive but might as well
|
||||
* be less than 1.0e-3 in practice.
|
||||
*/
|
||||
double cautious_factor = 1.0e-6;
|
||||
|
||||
/**
|
||||
* The machine precision for floating-point values. The default is 1.0e-16.
|
||||
* This parameter must be a positive value set by a client program to
|
||||
* estimate the machine precision.
|
||||
*/
|
||||
double machine_prec = 1.0e-16;
|
||||
};
|
||||
|
||||
/**
|
||||
* Return values of lbfgs_optimize().
|
||||
* Roughly speaking, a negative value indicates an error.
|
||||
*/
|
||||
enum
|
||||
{
|
||||
/** L-BFGS reaches convergence. */
|
||||
LBFGS_CONVERGENCE = 0,
|
||||
/** L-BFGS satisfies stopping criteria. */
|
||||
LBFGS_STOP,
|
||||
/** The iteration has been canceled by the monitor callback. */
|
||||
LBFGS_CANCELED,
|
||||
|
||||
/** Unknown error. */
|
||||
LBFGSERR_UNKNOWNERROR = -1024,
|
||||
/** Invalid number of variables specified. */
|
||||
LBFGSERR_INVALID_N,
|
||||
/** Invalid parameter lbfgs_parameter_t::mem_size specified. */
|
||||
LBFGSERR_INVALID_MEMSIZE,
|
||||
/** Invalid parameter lbfgs_parameter_t::g_epsilon specified. */
|
||||
LBFGSERR_INVALID_GEPSILON,
|
||||
/** Invalid parameter lbfgs_parameter_t::past specified. */
|
||||
LBFGSERR_INVALID_TESTPERIOD,
|
||||
/** Invalid parameter lbfgs_parameter_t::delta specified. */
|
||||
LBFGSERR_INVALID_DELTA,
|
||||
/** Invalid parameter lbfgs_parameter_t::min_step specified. */
|
||||
LBFGSERR_INVALID_MINSTEP,
|
||||
/** Invalid parameter lbfgs_parameter_t::max_step specified. */
|
||||
LBFGSERR_INVALID_MAXSTEP,
|
||||
/** Invalid parameter lbfgs_parameter_t::f_dec_coeff specified. */
|
||||
LBFGSERR_INVALID_FDECCOEFF,
|
||||
/** Invalid parameter lbfgs_parameter_t::s_curv_coeff specified. */
|
||||
LBFGSERR_INVALID_SCURVCOEFF,
|
||||
/** Invalid parameter lbfgs_parameter_t::machine_prec specified. */
|
||||
LBFGSERR_INVALID_MACHINEPREC,
|
||||
/** Invalid parameter lbfgs_parameter_t::max_linesearch specified. */
|
||||
LBFGSERR_INVALID_MAXLINESEARCH,
|
||||
/** The function value became NaN or Inf. */
|
||||
LBFGSERR_INVALID_FUNCVAL,
|
||||
/** The line-search step became smaller than lbfgs_parameter_t::min_step. */
|
||||
LBFGSERR_MINIMUMSTEP,
|
||||
/** The line-search step became larger than lbfgs_parameter_t::max_step. */
|
||||
LBFGSERR_MAXIMUMSTEP,
|
||||
/** Line search reaches the maximum, assumptions not satisfied or precision not achievable.*/
|
||||
LBFGSERR_MAXIMUMLINESEARCH,
|
||||
/** The algorithm routine reaches the maximum number of iterations. */
|
||||
LBFGSERR_MAXIMUMITERATION,
|
||||
/** Relative search interval width is at least lbfgs_parameter_t::machine_prec. */
|
||||
LBFGSERR_WIDTHTOOSMALL,
|
||||
/** A logic error (negative line-search step) occurred. */
|
||||
LBFGSERR_INVALIDPARAMETERS,
|
||||
/** The current search direction increases the cost function value. */
|
||||
LBFGSERR_INCREASEGRADIENT,
|
||||
};
|
||||
|
||||
/**
|
||||
* Callback interface to provide cost function and gradient evaluations.
|
||||
*
|
||||
* The lbfgs_optimize() function call this function to obtain the values of cost
|
||||
* function and its gradients when needed. A client program must implement
|
||||
* this function to evaluate the values of the cost function and its
|
||||
* gradients, given current values of variables.
|
||||
*
|
||||
* @param instance The user data sent for lbfgs_optimize() function by the client.
|
||||
* @param x The current values of variables.
|
||||
* @param g The gradient vector. The callback function must compute
|
||||
* the gradient values for the current variables.
|
||||
* @retval double The value of the cost function for the current variables.
|
||||
*/
|
||||
typedef double (*lbfgs_evaluate_t)(void *instance,
|
||||
const Eigen::VectorXd &x,
|
||||
Eigen::VectorXd &g);
|
||||
|
||||
/**
|
||||
* Callback interface to provide an upper bound at the beginning of the current line search.
|
||||
*
|
||||
* The lbfgs_optimize() function call this function to obtain the values of the
|
||||
* upperbound of the stepsize to search in, provided with the beginning values of
|
||||
* variables before the line search, and the current step vector (can be descent direction).
|
||||
* A client program can implement this function for more efficient linesearch. Any step
|
||||
* larger than this bound should not be considered. For example, it has a very large or even
|
||||
* inf function value. Note that the function value at the provided bound should be FINITE!
|
||||
* If it is not used, just set it nullptr.
|
||||
*
|
||||
* @param instance The user data sent for lbfgs_optimize() function by the client.
|
||||
* @param xp The values of variables before current line search.
|
||||
* @param d The step vector. It can be the descent direction.
|
||||
* @retval double The upperboud of the step in current line search routine,
|
||||
* such that (stpbound * d) is the maximum reasonable step.
|
||||
*/
|
||||
typedef double (*lbfgs_stepbound_t)(void *instance,
|
||||
const Eigen::VectorXd &xp,
|
||||
const Eigen::VectorXd &d);
|
||||
|
||||
/**
|
||||
* Callback interface to monitor the progress of the minimization process.
|
||||
*
|
||||
* The lbfgs_optimize() function call this function for each iteration. Implementing
|
||||
* this function, a client program can store or display the current progress
|
||||
* of the minimization process. If it is not used, just set it nullptr.
|
||||
*
|
||||
* @param instance The user data sent for lbfgs_optimize() function by the client.
|
||||
* @param x The current values of variables.
|
||||
* @param g The current gradient values of variables.
|
||||
* @param fx The current value of the cost function.
|
||||
* @param step The line-search step used for this iteration.
|
||||
* @param k The iteration count.
|
||||
* @param ls The number of evaluations called for this iteration.
|
||||
* @retval int Zero to continue the minimization process. Returning a
|
||||
* non-zero value will cancel the minimization process.
|
||||
*/
|
||||
typedef int (*lbfgs_progress_t)(void *instance,
|
||||
const Eigen::VectorXd &x,
|
||||
const Eigen::VectorXd &g,
|
||||
const double fx,
|
||||
const double step,
|
||||
const int k,
|
||||
const int ls);
|
||||
|
||||
/**
|
||||
* Callback data struct
|
||||
*/
|
||||
struct callback_data_t
|
||||
{
|
||||
void *instance = nullptr;
|
||||
lbfgs_evaluate_t proc_evaluate = nullptr;
|
||||
lbfgs_stepbound_t proc_stepbound = nullptr;
|
||||
lbfgs_progress_t proc_progress = nullptr;
|
||||
};
|
||||
|
||||
// ----------------------- L-BFGS Part -----------------------
|
||||
|
||||
/**
|
||||
* Line search method for smooth or nonsmooth functions.
|
||||
* This function performs line search to find a point that satisfy
|
||||
* both the Armijo condition and the weak Wolfe condition. It is
|
||||
* as robust as the backtracking line search but further applies
|
||||
* to continuous and piecewise smooth functions where the strong
|
||||
* Wolfe condition usually does not hold.
|
||||
*
|
||||
* @see
|
||||
* Adrian S. Lewis and Michael L. Overton. Nonsmooth optimization
|
||||
* via quasi-Newton methods. Mathematical Programming, Vol 141,
|
||||
* No 1, pp. 135-163, 2013.
|
||||
*/
|
||||
inline int line_search_lewisoverton(Eigen::VectorXd &x,
|
||||
double &f,
|
||||
Eigen::VectorXd &g,
|
||||
double &stp,
|
||||
const Eigen::VectorXd &s,
|
||||
const Eigen::VectorXd &xp,
|
||||
const Eigen::VectorXd &gp,
|
||||
const double stpmin,
|
||||
const double stpmax,
|
||||
const callback_data_t &cd,
|
||||
const lbfgs_parameter_t ¶m)
|
||||
{
|
||||
int count = 0;
|
||||
bool brackt = false, touched = false;
|
||||
double finit, dginit, dgtest, dstest;
|
||||
double mu = 0.0, nu = stpmax;
|
||||
|
||||
/* Check the input parameters for errors. */
|
||||
if (!(stp > 0.0))
|
||||
{
|
||||
return LBFGSERR_INVALIDPARAMETERS;
|
||||
}
|
||||
|
||||
/* Compute the initial gradient in the search direction. */
|
||||
dginit = gp.dot(s);
|
||||
/* Make sure that s points to a descent direction. */
|
||||
if (0.0 < dginit)
|
||||
{
|
||||
return LBFGSERR_INCREASEGRADIENT;
|
||||
}
|
||||
|
||||
/* The initial value of the cost function. */
|
||||
finit = f;
|
||||
dgtest = param.f_dec_coeff * dginit;
|
||||
dstest = param.s_curv_coeff * dginit;
|
||||
|
||||
while (true)
|
||||
{
|
||||
x = xp + stp * s;
|
||||
|
||||
/* Evaluate the function and gradient values. */
|
||||
f = cd.proc_evaluate(cd.instance, x, g);
|
||||
++count;
|
||||
|
||||
/* Test for errors. */
|
||||
if (std::isinf(f) || std::isnan(f))
|
||||
{
|
||||
return LBFGSERR_INVALID_FUNCVAL;
|
||||
}
|
||||
|
||||
if(param.past > 0 && fabs(finit-f)/(fabs(finit)+1.0)<param.delta/param.past)
|
||||
{
|
||||
return count;
|
||||
}
|
||||
/* Check the Armijo condition. */
|
||||
if (f > finit + stp * dgtest)
|
||||
{
|
||||
nu = stp;
|
||||
brackt = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Check the weak Wolfe condition. */
|
||||
if (g.dot(s) < dstest)
|
||||
{
|
||||
mu = stp;
|
||||
}
|
||||
else
|
||||
{
|
||||
return count;
|
||||
}
|
||||
}
|
||||
if (param.max_linesearch <= count)
|
||||
{
|
||||
// std::cout<< fabs(dginit)/finit<<std::endl;
|
||||
// std::cout << fabs(finit-f)/(fabs(finit)+1.0) << std::endl;
|
||||
/* Maximum number of iteration. */
|
||||
return LBFGSERR_MAXIMUMLINESEARCH;
|
||||
}
|
||||
if (brackt && (nu - mu) < param.machine_prec * nu)
|
||||
{
|
||||
/* Relative interval width is at least machine_prec. */
|
||||
return LBFGSERR_WIDTHTOOSMALL;
|
||||
}
|
||||
|
||||
if (brackt)
|
||||
{
|
||||
stp = 0.5 * (mu + nu);
|
||||
}
|
||||
else
|
||||
{
|
||||
stp *= 2.0;
|
||||
}
|
||||
|
||||
if (stp < stpmin)
|
||||
{
|
||||
/* The step is the minimum value. */
|
||||
return LBFGSERR_MINIMUMSTEP;
|
||||
}
|
||||
if (stp > stpmax)
|
||||
{
|
||||
if (touched)
|
||||
{
|
||||
/* The step is the maximum value. */
|
||||
return LBFGSERR_MAXIMUMSTEP;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* The maximum value should be tried once. */
|
||||
touched = true;
|
||||
stp = stpmax;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Start a L-BFGS optimization.
|
||||
* Assumptions: 1. f(x) is either C2 or C0 but piecewise C2;
|
||||
* 2. f(x) is lower bounded;
|
||||
* 3. f(x) has bounded level sets;
|
||||
* 4. g(x) is either the gradient or subgradient;
|
||||
* 5. The gradient exists at the initial guess x0.
|
||||
* A user must implement a function compatible with ::lbfgs_evaluate_t (evaluation
|
||||
* callback) and pass the pointer to the callback function to lbfgs_optimize()
|
||||
* arguments. Similarly, a user can implement a function compatible with
|
||||
* ::lbfgs_stepbound_t to provide an external upper bound for stepsize, and
|
||||
* ::lbfgs_progress_t (progress callback) to obtain the current progress
|
||||
* (e.g., variables, function, and gradient, etc) and to cancel the iteration
|
||||
* process if necessary. Implementation of the stepbound and the progress callback
|
||||
* is optional: a user can pass nullptr if progress notification is not necessary.
|
||||
*
|
||||
*
|
||||
* @param x The vector of decision variables.
|
||||
* THE INITIAL GUESS x0 SHOULD BE SET BEFORE THE CALL!
|
||||
* A client program can receive decision variables
|
||||
* through this vector, at which the cost and its
|
||||
* gradient are queried during minimization.
|
||||
* @param f The ref to the variable that receives the final
|
||||
* value of the cost function for the variables.
|
||||
* @param proc_evaluate The callback function to provide function f(x) and
|
||||
* gradient g(x) evaluations given a current values of
|
||||
* variables x. A client program must implement a
|
||||
* callback function compatible with lbfgs_evaluate_t
|
||||
* and pass the pointer to the callback function.
|
||||
* @param proc_stepbound The callback function to provide values of the
|
||||
* upperbound of the stepsize to search in, provided
|
||||
* with the beginning values of variables before the
|
||||
* line search, and the current step vector (can be
|
||||
* negative gradient). A client program can implement
|
||||
* this function for more efficient linesearch. If it is
|
||||
* not used, just set it nullptr.
|
||||
* @param proc_progress The callback function to receive the progress
|
||||
* (the number of iterations, the current value of
|
||||
* the cost function) of the minimization
|
||||
* process. This argument can be set to nullptr if
|
||||
* a progress report is unnecessary.
|
||||
* @param instance A user data pointer for client programs. The callback
|
||||
* functions will receive the value of this argument.
|
||||
* @param param The parameters for L-BFGS optimization.
|
||||
* @retval int The status code. This function returns a nonnegative
|
||||
* integer if the minimization process terminates without
|
||||
* an error. A negative integer indicates an error.
|
||||
*/
|
||||
inline int lbfgs_optimize(Eigen::VectorXd &x,
|
||||
double &f,
|
||||
lbfgs_evaluate_t proc_evaluate,
|
||||
lbfgs_stepbound_t proc_stepbound,
|
||||
lbfgs_progress_t proc_progress,
|
||||
void *instance,
|
||||
const lbfgs_parameter_t ¶m)
|
||||
{
|
||||
int ret, i, j, k, ls, end, bound;
|
||||
double step, step_min, step_max, fx, ys, yy;
|
||||
double gnorm_inf, xnorm_inf, beta, rate, cau;
|
||||
|
||||
const int n = x.size();
|
||||
const int m = param.mem_size;
|
||||
|
||||
/* Check the input parameters for errors. */
|
||||
if (n <= 0)
|
||||
{
|
||||
return LBFGSERR_INVALID_N;
|
||||
}
|
||||
if (m <= 0)
|
||||
{
|
||||
return LBFGSERR_INVALID_MEMSIZE;
|
||||
}
|
||||
if (param.g_epsilon < 0.0)
|
||||
{
|
||||
return LBFGSERR_INVALID_GEPSILON;
|
||||
}
|
||||
if (param.past < 0)
|
||||
{
|
||||
return LBFGSERR_INVALID_TESTPERIOD;
|
||||
}
|
||||
if (param.delta < 0.0)
|
||||
{
|
||||
return LBFGSERR_INVALID_DELTA;
|
||||
}
|
||||
if (param.min_step < 0.0)
|
||||
{
|
||||
return LBFGSERR_INVALID_MINSTEP;
|
||||
}
|
||||
if (param.max_step < param.min_step)
|
||||
{
|
||||
return LBFGSERR_INVALID_MAXSTEP;
|
||||
}
|
||||
if (!(param.f_dec_coeff > 0.0 &&
|
||||
param.f_dec_coeff < 1.0))
|
||||
{
|
||||
return LBFGSERR_INVALID_FDECCOEFF;
|
||||
}
|
||||
if (!(param.s_curv_coeff < 1.0 &&
|
||||
param.s_curv_coeff > param.f_dec_coeff))
|
||||
{
|
||||
return LBFGSERR_INVALID_SCURVCOEFF;
|
||||
}
|
||||
if (!(param.machine_prec > 0.0))
|
||||
{
|
||||
return LBFGSERR_INVALID_MACHINEPREC;
|
||||
}
|
||||
if (param.max_linesearch <= 0)
|
||||
{
|
||||
return LBFGSERR_INVALID_MAXLINESEARCH;
|
||||
}
|
||||
|
||||
/* Prepare intermediate variables. */
|
||||
Eigen::VectorXd xp(n);
|
||||
Eigen::VectorXd g(n);
|
||||
Eigen::VectorXd gp(n);
|
||||
Eigen::VectorXd d(n);
|
||||
Eigen::VectorXd pf(std::max(1, param.past));
|
||||
|
||||
/* Initialize the limited memory. */
|
||||
Eigen::VectorXd lm_alpha = Eigen::VectorXd::Zero(m);
|
||||
Eigen::MatrixXd lm_s = Eigen::MatrixXd::Zero(n, m);
|
||||
Eigen::MatrixXd lm_y = Eigen::MatrixXd::Zero(n, m);
|
||||
Eigen::VectorXd lm_ys = Eigen::VectorXd::Zero(m);
|
||||
|
||||
/* Construct a callback data. */
|
||||
callback_data_t cd;
|
||||
cd.instance = instance;
|
||||
cd.proc_evaluate = proc_evaluate;
|
||||
cd.proc_stepbound = proc_stepbound;
|
||||
cd.proc_progress = proc_progress;
|
||||
|
||||
/* Evaluate the function value and its gradient. */
|
||||
fx = cd.proc_evaluate(cd.instance, x, g);
|
||||
|
||||
/* Store the initial value of the cost function. */
|
||||
pf(0) = fx;
|
||||
|
||||
/*
|
||||
Compute the direction;
|
||||
we assume the initial hessian matrix H_0 as the identity matrix.
|
||||
*/
|
||||
d = -g;
|
||||
|
||||
/*
|
||||
Make sure that the initial variables are not a stationary point.
|
||||
*/
|
||||
gnorm_inf = g.cwiseAbs().maxCoeff();
|
||||
xnorm_inf = x.cwiseAbs().maxCoeff();
|
||||
|
||||
if (gnorm_inf / std::max(1.0, xnorm_inf) < param.g_epsilon)
|
||||
{
|
||||
/* The initial guess is already a stationary point. */
|
||||
ret = LBFGS_CONVERGENCE;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
Compute the initial step:
|
||||
*/
|
||||
step = 1.0 / d.norm();
|
||||
|
||||
k = 1;
|
||||
end = 0;
|
||||
bound = 0;
|
||||
|
||||
while (true)
|
||||
{
|
||||
/* Store the current position and gradient vectors. */
|
||||
xp = x;
|
||||
gp = g;
|
||||
|
||||
/* If the step bound can be provied dynamically, then apply it. */
|
||||
step_min = param.min_step;
|
||||
step_max = param.max_step;
|
||||
if (cd.proc_stepbound)
|
||||
{
|
||||
step_max = cd.proc_stepbound(cd.instance, xp, d);
|
||||
step_max = step_max < param.max_step ? step_max : param.max_step;
|
||||
step = step < step_max ? step : 0.5 * step_max;
|
||||
}
|
||||
|
||||
/* Search for an optimal step. */
|
||||
ls = line_search_lewisoverton(x, fx, g, step, d, xp, gp, step_min, step_max, cd, param);
|
||||
|
||||
// int idx = 1;
|
||||
// step = 1.0;
|
||||
// if(d.norm()>1.0){
|
||||
// d = d.normalized().eval()*1.0;
|
||||
// }
|
||||
// while(1){
|
||||
// // std::cout<<"step * d"<<(step * d).transpose()<<std::endl;
|
||||
// x = xp + step * d;
|
||||
// /* Evaluate the function and gradient values. */
|
||||
// double f = cd.proc_evaluate(cd.instance, x, g);
|
||||
// if(g.norm()>1.0){
|
||||
// g = g.normalized().eval()*1.0;
|
||||
// }
|
||||
// std::cout<<"fx: "<<fx<<" f: "<<f<<"step: "<<step<<std::endl;
|
||||
// if(f < fx || step <= step_min){
|
||||
// fx = f;
|
||||
// break;
|
||||
// }
|
||||
|
||||
// step = 1.0 / (++idx);
|
||||
// }
|
||||
// ls = 1;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if (ls < 0)
|
||||
{
|
||||
/* Revert to the previous point. */
|
||||
x = xp;
|
||||
g = gp;
|
||||
ret = ls;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Report the progress. */
|
||||
if (cd.proc_progress)
|
||||
{
|
||||
if (cd.proc_progress(cd.instance, x, g, fx, step, k, ls))
|
||||
{
|
||||
ret = LBFGS_CANCELED;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Convergence test.
|
||||
The criterion is given by the following formula:
|
||||
||g(x)||_inf / max(1, ||x||_inf) < g_epsilon
|
||||
*/
|
||||
gnorm_inf = g.cwiseAbs().maxCoeff();
|
||||
xnorm_inf = x.cwiseAbs().maxCoeff();
|
||||
if (gnorm_inf / std::max(1.0, xnorm_inf) < param.g_epsilon)
|
||||
{
|
||||
/* Convergence. */
|
||||
ret = LBFGS_CONVERGENCE;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
Test for stopping criterion.
|
||||
The criterion is given by the following formula:
|
||||
|f(past_x) - f(x)| / max(1, |f(x)|) < \delta.
|
||||
*/
|
||||
if (0 < param.past)
|
||||
{
|
||||
/* We don't test the stopping criterion while k < past. */
|
||||
if (param.past <= k)
|
||||
{
|
||||
/* The stopping criterion. */
|
||||
rate = std::fabs(pf(k % param.past) - fx) / std::max(1.0, std::fabs(fx));
|
||||
|
||||
if (rate < param.delta)
|
||||
{
|
||||
ret = LBFGS_STOP;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Store the current value of the cost function. */
|
||||
pf(k % param.past) = fx;
|
||||
}
|
||||
|
||||
if (param.max_iterations != 0 && param.max_iterations <= k)
|
||||
{
|
||||
/* Maximum number of iterations. */
|
||||
ret = LBFGSERR_MAXIMUMITERATION;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Count the iteration number. */
|
||||
++k;
|
||||
|
||||
/*
|
||||
Update vectors s and y:
|
||||
s_{k+1} = x_{k+1} - x_{k} = \step * d_{k}.
|
||||
y_{k+1} = g_{k+1} - g_{k}.
|
||||
*/
|
||||
lm_s.col(end) = x - xp;
|
||||
lm_y.col(end) = g - gp;
|
||||
|
||||
/*
|
||||
Compute scalars ys and yy:
|
||||
ys = y^t \cdot s = 1 / \rho.
|
||||
yy = y^t \cdot y.
|
||||
Notice that yy is used for scaling the hessian matrix H_0 (Cholesky factor).
|
||||
*/
|
||||
ys = lm_y.col(end).dot(lm_s.col(end));
|
||||
yy = lm_y.col(end).squaredNorm();
|
||||
lm_ys(end) = ys;
|
||||
|
||||
/* Compute the negative of gradients. */
|
||||
d = -g;
|
||||
|
||||
/*
|
||||
Only cautious update is performed here as long as
|
||||
(y^t \cdot s) / ||s_{k+1}||^2 > \epsilon * ||g_{k}||^\alpha,
|
||||
where \epsilon is the cautious factor and a proposed value
|
||||
for \alpha is 1.
|
||||
This is not for enforcing the PD of the approxomated Hessian
|
||||
since ys > 0 is already ensured by the weak Wolfe condition.
|
||||
This is to ensure the global convergence as described in:
|
||||
Dong-Hui Li and Masao Fukushima. On the global convergence of
|
||||
the BFGS method for nonconvex unconstrained optimization problems.
|
||||
SIAM Journal on Optimization, Vol 11, No 4, pp. 1054-1064, 2011.
|
||||
*/
|
||||
cau = lm_s.col(end).squaredNorm() * gp.norm() * param.cautious_factor;
|
||||
|
||||
if (ys > cau)
|
||||
{
|
||||
/*
|
||||
Recursive formula to compute dir = -(H \cdot g).
|
||||
This is described in page 779 of:
|
||||
Jorge Nocedal.
|
||||
Updating Quasi-Newton Matrices with Limited Storage.
|
||||
Mathematics of Computation, Vol. 35, No. 151,
|
||||
pp. 773--782, 1980.
|
||||
*/
|
||||
++bound;
|
||||
bound = m < bound ? m : bound;
|
||||
end = (end + 1) % m;
|
||||
|
||||
j = end;
|
||||
for (i = 0; i < bound; ++i)
|
||||
{
|
||||
j = (j + m - 1) % m; /* if (--j == -1) j = m-1; */
|
||||
/* \alpha_{j} = \rho_{j} s^{t}_{j} \cdot q_{k+1}. */
|
||||
lm_alpha(j) = lm_s.col(j).dot(d) / lm_ys(j);
|
||||
/* q_{i} = q_{i+1} - \alpha_{i} y_{i}. */
|
||||
d += (-lm_alpha(j)) * lm_y.col(j);
|
||||
}
|
||||
|
||||
d *= ys / yy;
|
||||
|
||||
for (i = 0; i < bound; ++i)
|
||||
{
|
||||
/* \beta_{j} = \rho_{j} y^t_{j} \cdot \gamm_{i}. */
|
||||
beta = lm_y.col(j).dot(d) / lm_ys(j);
|
||||
/* \gamm_{i+1} = \gamm_{i} + (\alpha_{j} - \beta_{j}) s_{j}. */
|
||||
d += (lm_alpha(j) - beta) * lm_s.col(j);
|
||||
j = (j + 1) % m; /* if (++j == m) j = 0; */
|
||||
}
|
||||
}
|
||||
|
||||
/* The search direction d is ready. We try step = 1 first. */
|
||||
step = 1.0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Return the final value of the cost function. */
|
||||
f = fx;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get string description of an lbfgs_optimize() return code.
|
||||
*
|
||||
* @param err A value returned by lbfgs_optimize().
|
||||
*/
|
||||
inline const char *lbfgs_strerror(const int err)
|
||||
{
|
||||
switch (err)
|
||||
{
|
||||
case LBFGS_CONVERGENCE:
|
||||
return "Success: reached convergence (g_epsilon).";
|
||||
|
||||
case LBFGS_STOP:
|
||||
return "Success: met stopping criteria (past f decrease less than delta).";
|
||||
|
||||
case LBFGS_CANCELED:
|
||||
return "The iteration has been canceled by the monitor callback.";
|
||||
|
||||
case LBFGSERR_UNKNOWNERROR:
|
||||
return "Unknown error.";
|
||||
|
||||
case LBFGSERR_INVALID_N:
|
||||
return "Invalid number of variables specified.";
|
||||
|
||||
case LBFGSERR_INVALID_MEMSIZE:
|
||||
return "Invalid parameter lbfgs_parameter_t::mem_size specified.";
|
||||
|
||||
case LBFGSERR_INVALID_GEPSILON:
|
||||
return "Invalid parameter lbfgs_parameter_t::g_epsilon specified.";
|
||||
|
||||
case LBFGSERR_INVALID_TESTPERIOD:
|
||||
return "Invalid parameter lbfgs_parameter_t::past specified.";
|
||||
|
||||
case LBFGSERR_INVALID_DELTA:
|
||||
return "Invalid parameter lbfgs_parameter_t::delta specified.";
|
||||
|
||||
case LBFGSERR_INVALID_MINSTEP:
|
||||
return "Invalid parameter lbfgs_parameter_t::min_step specified.";
|
||||
|
||||
case LBFGSERR_INVALID_MAXSTEP:
|
||||
return "Invalid parameter lbfgs_parameter_t::max_step specified.";
|
||||
|
||||
case LBFGSERR_INVALID_FDECCOEFF:
|
||||
return "Invalid parameter lbfgs_parameter_t::f_dec_coeff specified.";
|
||||
|
||||
case LBFGSERR_INVALID_SCURVCOEFF:
|
||||
return "Invalid parameter lbfgs_parameter_t::s_curv_coeff specified.";
|
||||
|
||||
case LBFGSERR_INVALID_MACHINEPREC:
|
||||
return "Invalid parameter lbfgs_parameter_t::machine_prec specified.";
|
||||
|
||||
case LBFGSERR_INVALID_MAXLINESEARCH:
|
||||
return "Invalid parameter lbfgs_parameter_t::max_linesearch specified.";
|
||||
|
||||
case LBFGSERR_INVALID_FUNCVAL:
|
||||
return "The function value became NaN or Inf.";
|
||||
|
||||
case LBFGSERR_MINIMUMSTEP:
|
||||
return "The line-search step became smaller than lbfgs_parameter_t::min_step.";
|
||||
|
||||
case LBFGSERR_MAXIMUMSTEP:
|
||||
return "The line-search step became larger than lbfgs_parameter_t::max_step.";
|
||||
|
||||
case LBFGSERR_MAXIMUMLINESEARCH:
|
||||
return "Line search reaches the maximum try number, assumptions not satisfied or precision not achievable.";
|
||||
|
||||
case LBFGSERR_MAXIMUMITERATION:
|
||||
return "The algorithm routine reaches the maximum number of iterations.";
|
||||
|
||||
case LBFGSERR_WIDTHTOOSMALL:
|
||||
return "Relative search interval width is at least lbfgs_parameter_t::machine_prec.";
|
||||
|
||||
case LBFGSERR_INVALIDPARAMETERS:
|
||||
return "A logic error (negative line-search step) occurred.";
|
||||
|
||||
case LBFGSERR_INCREASEGRADIENT:
|
||||
return "The current search direction increases the cost function value.";
|
||||
|
||||
default:
|
||||
return "(unknown)";
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace lbfgs
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user