ROL
ROL_TypeP_TrustRegionAlgorithm_Def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Rapid Optimization Library (ROL) Package
4 //
5 // Copyright 2014 NTESS and the ROL contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef ROL_TYPEP_TRUSTREGIONALGORITHM_DEF_HPP
11 #define ROL_TYPEP_TRUSTREGIONALGORITHM_DEF_HPP
12 
13 #include <deque>
14 
15 namespace ROL {
16 namespace TypeP {
17 
18 
19 template<typename Real>
21  const Ptr<Secant<Real>> &secant) {
22  // Set status test
23  status_->reset();
24  status_->add(makePtr<StatusTest<Real>>(list));
25 
26  ParameterList &trlist = list.sublist("Step").sublist("Trust Region");
27  // Trust-Region Parameters
28  state_->searchSize = trlist.get("Initial Radius", -1.0);
29  delMax_ = trlist.get("Maximum Radius", ROL_INF<Real>());
30  eta0_ = trlist.get("Step Acceptance Threshold", 0.05);
31  eta1_ = trlist.get("Radius Shrinking Threshold", 0.05);
32  eta2_ = trlist.get("Radius Growing Threshold", 0.9);
33  gamma0_ = trlist.get("Radius Shrinking Rate (Negative rho)", 0.0625);
34  gamma1_ = trlist.get("Radius Shrinking Rate (Positive rho)", 0.25);
35  gamma2_ = trlist.get("Radius Growing Rate", 2.5);
36  TRsafe_ = trlist.get("Safeguard Size", 100.0);
37  eps_ = TRsafe_*ROL_EPSILON<Real>();
38  interpRad_ = trlist.get("Use Radius Interpolation", false);
39  verbosity_ = trlist.sublist("General").get("Output Level", 0);
40  initProx_ = trlist.get("Apply Prox to Initial Guess", false);
41  t0_ = list.sublist("Status Test").get("Proximal Gradient Parameter", 1.0);
42  // Nonmonotone Parameters
43  storageNM_ = trlist.get("Nonmonotone Storage Size", 0);
44  useNM_ = (storageNM_ <= 0 ? false : true);
45  // Algorithm-Specific Parameters
46  ROL::ParameterList &lmlist = trlist.sublist("TRN");
47  mu0_ = lmlist.get("Sufficient Decrease Parameter", 1e-2);
48  spexp_ = lmlist.get("Relative Tolerance Exponent", 1.0);
49  spexp_ = std::max(static_cast<Real>(1),std::min(spexp_,static_cast<Real>(2)));
50  redlim_ = lmlist.sublist("Cauchy Point").get("Maximum Number of Reduction Steps", 10);
51  explim_ = lmlist.sublist("Cauchy Point").get("Maximum Number of Expansion Steps", 10);
52  alpha_ = lmlist.sublist("Cauchy Point").get("Initial Step Size", 1.0);
53  normAlpha_ = lmlist.sublist("Cauchy Point").get("Normalize Initial Step Size", false);
54  interpf_ = lmlist.sublist("Cauchy Point").get("Reduction Rate", 0.1);
55  extrapf_ = lmlist.sublist("Cauchy Point").get("Expansion Rate", 10.0);
56  qtol_ = lmlist.sublist("Cauchy Point").get("Decrease Tolerance", 1e-8);
57  // Subsolver (general) parameters
58  lambdaMin_ = lmlist.sublist("Solver").get("Minimum Spectral Step Size", 1e-8);
59  lambdaMax_ = lmlist.sublist("Solver").get("Maximum Spectral Step Size", 1e8);
60  gamma_ = lmlist.sublist("Solver").get("Sufficient Decrease Tolerance", 1e-4);
61  maxSize_ = lmlist.sublist("Solver").get("Maximum Storage Size", 10);
62  maxit_ = lmlist.sublist("Solver").get("Iteration Limit", 25);
63  tol1_ = lmlist.sublist("Solver").get("Absolute Tolerance", 1e-4);
64  tol2_ = lmlist.sublist("Solver").get("Relative Tolerance", 1e-2);
65  // Subsolver (spectral projected gradient) parameters
66  useMin_ = lmlist.sublist("Solver").get("Use Smallest Model Iterate", true);
67  useNMSP_ = lmlist.sublist("Solver").get("Use Nonmonotone Search", false);
68  std::string ssname = lmlist.sublist("Solver").get("Subproblem Solver", "SPG");
69  algSelect_ = StringToETrustRegionP(ssname);
70  // Subsolver (nonlinear conjugate gradient) parameters)
71  ncgType_ = lmlist.sublist("Solver").sublist("NCG").get("Nonlinear CG Type", 4);
72  etaNCG_ = lmlist.sublist("Solver").sublist("NCG").get("Truncation Parameter for HZ CG", 1e-2);
73  desPar_ = lmlist.sublist("Solver").sublist("NCG").get("Descent Parameter", 0.2);
74  // Inexactness Information
75  ParameterList &glist = list.sublist("General");
76  useInexact_.clear();
77  useInexact_.push_back(glist.get("Inexact Objective Function", false));
78  useInexact_.push_back(glist.get("Inexact Gradient", false));
79  useInexact_.push_back(glist.get("Inexact Hessian-Times-A-Vector", false));
80  // Trust-Region Inexactness Parameters
81  ParameterList &ilist = trlist.sublist("Inexact").sublist("Gradient");
82  scale0_ = ilist.get("Tolerance Scaling", static_cast<Real>(0.1));
83  scale1_ = ilist.get("Relative Tolerance", static_cast<Real>(2));
84  // Inexact Function Evaluation Information
85  ParameterList &vlist = trlist.sublist("Inexact").sublist("Value");
86  scale_ = vlist.get("Tolerance Scaling", static_cast<Real>(1.e-1));
87  omega_ = vlist.get("Exponent", static_cast<Real>(0.9));
88  force_ = vlist.get("Forcing Sequence Initial Value", static_cast<Real>(1.0));
89  updateIter_ = vlist.get("Forcing Sequence Update Frequency", static_cast<int>(10));
90  forceFactor_ = vlist.get("Forcing Sequence Reduction Factor", static_cast<Real>(0.1));
91  // Output Parameters
92  verbosity_ = list.sublist("General").get("Output Level",0);
93  writeHeader_ = verbosity_ > 2;
94  // Secant Information
95  useSecantPrecond_ = list.sublist("General").sublist("Secant").get("Use as Preconditioner", false);
96  useSecantHessVec_ = list.sublist("General").sublist("Secant").get("Use as Hessian", false);
98  if (useSecantPrecond_ && !useSecantHessVec_) mode = SECANTMODE_INVERSE;
99  else if (useSecantHessVec_ && !useSecantPrecond_) mode = SECANTMODE_FORWARD;
100  // Initialize trust region model
101  model_ = makePtr<TrustRegionModel_U<Real>>(list,secant,mode);
102  if (secant == nullPtr) {
103  std::string secantType = list.sublist("General").sublist("Secant").get("Type","Limited-Memory BFGS");
104  esec_ = StringToESecant(secantType);
105  }
106 }
107 
108 template<typename Real>
110  const Vector<Real> &g,
111  Real ftol,
112  Objective<Real> &sobj,
113  Objective<Real> &nobj,
114  Vector<Real> &px,
115  Vector<Real> &dg,
116  std::ostream &outStream) {
117  // Initialize data
119  nhess_ = 0;
120  // Update approximate gradient and approximate objective function.
121  if (initProx_){
122  nobj.prox(*state_->iterateVec,x,t0_, ftol); state_->nprox++;
123  x.set(*state_->iterateVec);
124  }
125  sobj.update(x,UpdateType::Initial,state_->iter);
126  state_->svalue = sobj.value(x,ftol); state_->nsval++;
127  nobj.update(x, UpdateType::Initial,state_->iter);
128  state_->nvalue = nobj.value(x,ftol); state_->nnval++;
129  state_->value = state_->svalue + state_->nvalue;
130  computeGradient(x,*state_->gradientVec,px,dg,*state_->stepVec,state_->searchSize,sobj,nobj,true,gtol_,state_->gnorm,outStream);
131 
132  state_->snorm = ROL_INF<Real>();
133  // Normalize initial CP step length
134  if (normAlpha_) alpha_ /= state_->gradientVec->norm();//change here?
135  // Compute initial trust region radius if desired.
136  if ( state_->searchSize <= static_cast<Real>(0) )
137  state_->searchSize = state_->gradientVec->norm();
138  SPiter_ = 0;
139  SPflag_ = 0;
140 }
141 
142 template<typename Real>
144  Real &outTol,
145  Real pRed,
146  Real &fold,
147  int iter,
148  const Vector<Real> &x,
149  const Vector<Real> &xold,
150  Objective<Real> &sobj) {
151  outTol = std::sqrt(ROL_EPSILON<Real>());
152  if ( useInexact_[0] ) {
153  if (!(iter%updateIter_) && (iter!=0)) force_ *= forceFactor_;
154  const Real one(1);
155  Real eta = static_cast<Real>(0.999)*std::min(eta1_,one-eta2_);
156  outTol = scale_*std::pow(eta*std::min(pRed,force_),one/omega_);
157  if (inTol > outTol) {
158  fold = sobj.value(xold,outTol); state_->nsval++;
159  }
160  }
161  // Evaluate objective function at new iterate
162  sobj.update(x,UpdateType::Trial);
163  Real fval = sobj.value(x,outTol); state_->nsval++;
164  return fval;
165 }
166 
167 template<typename Real>
169  Vector<Real> &g,
170  Vector<Real> &px,
171  Vector<Real> &dg,
172  Vector<Real> &step,
173  Real del,
174  Objective<Real> &sobj,
175  Objective<Real> &nobj,
176  bool accept,
177  Real &gtol,
178  Real &gnorm,
179  std::ostream &outStream) const {
180  if ( useInexact_[1] ) {
181  Real gtol0 = scale0_*del;
182  if (accept) gtol = gtol0 + static_cast<Real>(1);
183  else gtol0 = scale0_*std::min(gnorm,del);
184  while ( gtol > gtol0 ) {
185  gtol = gtol0;
186  sobj.gradient(g,x,gtol); state_->ngrad++;
187  dg.set(g.dual());
188  pgstep(px, step, nobj, x, dg, t0_, gtol0); // change gtol? one or ocScale?
189  gnorm = step.norm() / t0_;
190  gtol0 = scale0_*std::min(gnorm,del);
191  }
192  }
193  else {
194  if (accept) {
195  gtol = std::sqrt(ROL_EPSILON<Real>());
196  sobj.gradient(g,x,gtol); state_->ngrad++;
197  dg.set(g.dual());
198  pgstep(px, step, nobj, x, dg, t0_, gtol);
199  gnorm = step.norm() / t0_;
200  }
201  }
202 }
203 
204 template<typename Real>
206  const Vector<Real> &g,
207  Objective<Real> &sobj,
208  Objective<Real> &nobj,
209  std::ostream &outStream ) {
210  const Real zero(0), one(1);
211  //Real tol0 = std::sqrt(ROL_EPSILON<Real>());
212  Real inTol = static_cast<Real>(0.1)*ROL_OVERFLOW<Real>(), outTol(inTol);
213  Real strial(0), ntrial(0), smodel(0), Ftrial(0), pRed(0), rho(1);
214  // Initialize trust-region data
215  std::vector<std::string> output;
216  Ptr<Vector<Real>> gmod = g.clone();
217  Ptr<Vector<Real>> px = x.clone();
218  Ptr<Vector<Real>> dg = x.clone();
219  // Initialize Algorithm
220  initialize(x,g,inTol,sobj,nobj, *px, *dg, outStream);
221  // Initialize storage vectors
222  Ptr<Vector<Real>> pwa1 = x.clone(), pwa2 = x.clone();
223  Ptr<Vector<Real>> pwa3 = x.clone(), pwa4 = x.clone();
224  Ptr<Vector<Real>> pwa5 = x.clone(), pwa6 = x.clone();
225  Ptr<Vector<Real>> pwa7 = x.clone();
226  Ptr<Vector<Real>> dwa1 = g.clone(), dwa2 = g.clone();
227  // Initialize nonmonotone data
228  Real rhoNM(0), sigmac(0), sigmar(0);
229  Real fr(state_->value), fc(state_->value), fmin(state_->value);
230  TRUtils::ETRFlag TRflagNM;
231  int L(0);
232  // Output
233  if (verbosity_ > 0) writeOutput(outStream,true);
234 
235  while (status_->check(*state_)) {
236  // Build trust-region model
237  model_->setData(sobj,*state_->iterateVec,*state_->gradientVec,gtol_);
238 
239  /**** SOLVE TRUST-REGION SUBPROBLEM ****/
240  //q = state_->svalue + state_->nvalue;//q is no longer used
241  gmod->set(*state_->gradientVec);
242  smodel = state_->svalue;
243  ntrial = state_->nvalue;
244  switch (algSelect_) {
245  case TRUSTREGION_P_SPG:
246  default:
247  // Compute Cauchy point (TRON notation: x = x[1])
248  dcauchy(*state_->stepVec,alpha_, smodel, ntrial,
249  *state_->iterateVec, *dg, state_->searchSize,
250  *model_, nobj, *px, *dwa1, *dwa2, outStream); // Solve 1D optimization problem for alpha
251  x.plus(*state_->stepVec); // Set x = x[0] + alpha*g
252  // Model gradient at s = x[1] - x[0]
253  gmod->plus(*dwa1); // hessVec from Cauchy point computation
254 
255  // Apply SPG starting from the Cauchy point->change input
256  dspg(x,smodel,ntrial,*gmod,*state_->iterateVec,state_->searchSize,
257  *model_,nobj,*pwa1,*pwa2,*pwa3,*pwa4,*pwa5,*pwa6,*pwa7,*dwa1,
258  outStream);
259  pRed = state_->value - (smodel+ntrial);
260  break;
261  case TRUSTREGION_P_SPG2:
262  dspg2(x,smodel, ntrial, pRed, *gmod, *state_->iterateVec,
263  state_->searchSize, *model_, nobj,
264  *pwa1, *pwa2, *px, *dwa1, outStream);
265  break;
266  case TRUSTREGION_P_NCG:
267  dncg(x,smodel,ntrial,*gmod,*state_->iterateVec,state_->searchSize,
268  *model_,nobj,*pwa1,*pwa2,*pwa3,*pwa4,*pwa5,*pwa6,*dwa1,
269  outStream);
270  pRed = state_->value - (smodel+ntrial);
271  break;
272  }
273 
274  // Update storage and compute predicted reduction
275  //pRed = -q; // now updated in dcauchy/dspg
276  state_->stepVec->set(x); state_->stepVec->axpy(-one,*state_->iterateVec);
277  state_->snorm = state_->stepVec->norm();
278 
279  // Compute trial objective value
280  strial = computeValue(inTol,outTol,pRed,state_->svalue,state_->iter,x,*state_->iterateVec,sobj);
281  Ftrial = strial + ntrial;
282 
283  // Compute ratio of actual and predicted reduction
284  TRflag_ = TRUtils::SUCCESS;
285  TRUtils::analyzeRatio<Real>(rho,TRflag_,state_->value,Ftrial,pRed,eps_,outStream,verbosity_>1);
286  if (useNM_) {
287  TRUtils::analyzeRatio<Real>(rhoNM,TRflagNM,fr,Ftrial,pRed+sigmar,eps_,outStream,verbosity_>1);
288  TRflag_ = (rho < rhoNM ? TRflagNM : TRflag_);
289  rho = (rho < rhoNM ? rhoNM : rho );
290  }
291 
292  // Update algorithm state
293  state_->iter++;
294  // Accept/reject step and update trust region radius
295  if ((rho < eta0_ && TRflag_ == TRUtils::SUCCESS) || (TRflag_ >= 2)) { // Step Rejected
296  x.set(*state_->iterateVec);
297  sobj.update(x,UpdateType::Revert,state_->iter);
298  nobj.update(x,UpdateType::Revert,state_->iter);
299  if (interpRad_ && (rho < zero && TRflag_ != TRUtils::TRNAN)) {
300  // Negative reduction, interpolate to find new trust-region radius
301  state_->searchSize = TRUtils::interpolateRadius<Real>(*state_->gradientVec,*state_->stepVec,
302  state_->snorm,pRed,state_->value,Ftrial,state_->searchSize,gamma0_,gamma1_,eta2_,
303  outStream,verbosity_>1);
304  }
305  else { // Shrink trust-region radius
306  state_->searchSize = gamma1_*std::min(state_->snorm,state_->searchSize);
307  }
308  computeGradient(x,*state_->gradientVec,*px,*dg,*pwa1,state_->searchSize,sobj,nobj,false,gtol_,state_->gnorm,outStream);
309  }
310  else if ((rho >= eta0_ && TRflag_ != TRUtils::NPOSPREDNEG)
311  || (TRflag_ == TRUtils::POSPREDNEG)) { // Step Accepted
312  state_->value = Ftrial;
313  state_->svalue = strial;
314  state_->nvalue = ntrial;
315  sobj.update(x,UpdateType::Accept,state_->iter);
316  nobj.update(x,UpdateType::Accept,state_->iter);
317  inTol = outTol;
318  if (useNM_) {
319  sigmac += pRed; sigmar += pRed;
320  if (Ftrial < fmin) { fmin = Ftrial; fc = fmin; sigmac = zero; L = 0; }
321  else {
322  L++;
323  if (Ftrial > fc) { fc = Ftrial; sigmac = zero; }
324  if (L == storageNM_) { fr = fc; sigmar = sigmac; }
325  }
326  }
327  // Increase trust-region radius
328  if (rho >= eta2_) state_->searchSize = std::min(gamma2_*state_->searchSize, delMax_);
329  // Compute gradient at new iterate
330  dwa1->set(*state_->gradientVec);
331  computeGradient(x,*state_->gradientVec,*px,*dg,*pwa1,state_->searchSize,sobj,nobj,true,gtol_,state_->gnorm,outStream);
332  state_->iterateVec->set(x);
333  // Update secant information in trust-region model
334  model_->update(x,*state_->stepVec,*dwa1,*state_->gradientVec,
335  state_->snorm,state_->iter);
336  }
337 
338  // Update Output
339  if (verbosity_ > 0) writeOutput(outStream,writeHeader_);
340  }
341  if (verbosity_ > 0) TypeP::Algorithm<Real>::writeExitStatus(outStream);
342 }
343 
344 template<typename Real>
346  Real &alpha,
347  Real &sval,
348  Real &nval,
349  const Vector<Real> &x,
350  const Vector<Real> &g,
351  const Real del,
353  Objective<Real> &nobj,
354  Vector<Real> &px,
355  Vector<Real> &dwa,
356  Vector<Real> &dwa1,
357  std::ostream &outStream) {
358  const Real half(0.5), sold(sval), nold(nval);
359  Real tol = std::sqrt(ROL_EPSILON<Real>());
360  bool interp = false;
361  Real gs(0), snorm(0), Qk(0), pRed(0);
362  // Compute s = P(x[0] - alpha g[0]) - x[0]
363  pgstep(px, s, nobj, x, g, alpha, tol);
364  snorm = s.norm();
365  if (snorm > del) {
366  interp = true;
367  }
368  else {
369  model.hessVec(dwa,s,x,tol); nhess_++;
370  nobj.update(px, UpdateType::Trial);
371  nval = nobj.value(px, tol); state_->nnval++;
372  gs = s.dot(g);
373  sval = sold + gs + half * s.apply(dwa);
374  pRed = (sold + nold) - (sval + nval);
375  Qk = gs + nval - nold;
376  interp = (pRed < -mu0_*Qk);
377  }
378  // Either increase or decrease alpha to find approximate Cauchy point
379  int cnt = 0;
380  if (interp) {//decrease loop
381  bool search = true;
382  while (search) {
383  alpha *= interpf_;
384  pgstep(px, s, nobj, x, g, alpha, tol);
385  snorm = s.norm();
386  if (snorm <= del) {
387  model.hessVec(dwa,s,x,tol); nhess_++;
388  nobj.update(px, UpdateType::Trial);
389  nval = nobj.value(px, tol); state_->nnval++;
390  gs = s.dot(g);
391  sval = sold + gs + half * s.apply(dwa);
392  pRed = (sold + nold) - (sval + nval);
393  Qk = gs + nval - nold;
394  search = ((pRed < -mu0_*Qk) && (cnt < redlim_)) ;
395  }
396  cnt++;
397  }
398  }
399  else {
400  bool search = true;
401  Real alphas = alpha;
402  Real mvals = pRed;
403  Real svals = sval;
404  dwa1.set(dwa);
405  while (search) {
406  alpha *= extrapf_;
407  pgstep(px, s, nobj, x, g, alpha, tol);
408  snorm = s.norm();
409  if (snorm <= del && cnt < explim_){// && mnew < mold + mu0_*Qk) {
410  model.hessVec(dwa,s,x,tol); nhess_++;
411  nobj.update(px, UpdateType::Trial);
412  nval = nobj.value(px, tol); state_->nnval++;
413  gs = s.dot(g);
414  sval = sold + gs + half * s.apply(dwa);
415  pRed = (sold + nold) - (sval + nval);
416  Qk = gs + nval - nold;
417  if (pRed >= -mu0_*Qk && std::abs(pRed-mvals) > qtol_*std::abs(mvals)) {
418  dwa1.set(dwa);
419  alphas = alpha;
420  mvals = pRed;
421  svals = sval;
422  search = true;
423  }
424  else {
425  dwa.set(dwa1);
426  pRed = mvals;
427  sval = svals;
428  search = false;
429  }
430  }
431  else {
432  search = false;
433  }
434  cnt++;
435  }
436  alpha = alphas;
437  pgstep(px, s, nobj, x, g, alpha, tol);
438  snorm = s.norm();
439  }
440  if (verbosity_ > 1) {
441  outStream << " Cauchy point" << std::endl;
442  outStream << " Step length (alpha): " << alpha << std::endl;
443  outStream << " Step length (alpha*g): " << snorm << std::endl;
444  outStream << " Model decrease (pRed): " << pRed << std::endl;
445  if (!interp)
446  outStream << " Number of extrapolation steps: " << cnt << std::endl;
447  }
448  return snorm;
449 }
450 
451 template<typename Real>
453  Real &sval,
454  Real &nval,
455  Real &pRed,
456  Vector<Real> &gmod,
457  const Vector<Real> &x,
458  Real del,
460  Objective<Real> &nobj,
461  Vector<Real> &pwa,
462  Vector<Real> &pwa1,
463  Vector<Real> &pwa2,
464  Vector<Real> &dwa,
465  std::ostream &outStream) {
466  // Use SPG to approximately solve TR subproblem:
467  // min 1/2 <H(y-x), (y-x)> + <g, (y-x)> subject to y\in C, ||y|| \le del
468  //
469  // Inpute:
470  // y = Primal vector
471  // x = Current iterate
472  // g = Current gradient
473  const Real half(0.5), one(1), safeguard(1e2*ROL_EPSILON<Real>());
474  const Real mprev(sval+nval);
475  Real tol(std::sqrt(ROL_EPSILON<Real>()));
476  Real coeff(1), alpha(1), alphaMax(1), lambda(1), lambdaTmp(1);
477  Real gs(0), ss(0), gnorm(0), s0s0(0), ss0(0), sHs(0), snorm(0), nold(nval);
478  pwa1.zero();
479 
480  // Set y = x
481  y.set(x);
482 
483  // Compute initial step
484  coeff = one / gmod.norm();
485  lambda = std::max(lambdaMin_,std::min(coeff,lambdaMax_));
486  pgstep(pwa2, pwa, nobj, y, gmod.dual(), lambda, tol);
487  gs = gmod.apply(pwa); // gs = <step, model gradient>
488  ss = pwa.dot(pwa); // Norm squared of step
489  snorm = std::sqrt(ss); // norm(step)
490  gnorm = snorm / lambda; // norm(step) / lambda
491 
492  // Compute initial projected gradient norm
493  const Real gtol = std::min(tol1_,tol2_*gnorm);
494 
495  if (verbosity_ > 1)
496  outStream << " Spectral Projected Gradient" << std::endl;
497 
498  SPiter_ = 0;
499  while (SPiter_ < maxit_) {
500  SPiter_++;
501 
502  // Evaluate model Hessian
503  model.hessVec(dwa,pwa,x,tol); nhess_++; // dwa = H step
504  sHs = dwa.apply(pwa); // sHs = <step, H step>
505 
506  // Evaluate nonsmooth term
507  nobj.update(pwa2,UpdateType::Trial);
508  nval = nobj.value(pwa2,tol); state_->nnval++;
509 
510  // Perform line search
511  alphaMax = one;
512  if (snorm >= del-safeguard) { // Trust-region constraint is violated
513  ss0 = pwa1.dot(pwa);
514  alphaMax = std::min(one, (-ss0 + std::sqrt(ss0*ss0 - ss*(s0s0-del*del)))/ss);
515  }
516  alpha = (sHs <= safeguard) ? alphaMax : std::min(alphaMax, -(gs + nval - nold)/sHs);
517 
518  // Update model quantities
519  if (alpha == one) {
520  y.set(pwa2);
521  sval += gs + half * sHs;
522  gmod.plus(dwa);
523  }
524  else {
525  y.axpy(alpha,pwa); // New iterate
526  nobj.update(y,UpdateType::Trial);
527  nval = nobj.value(y, tol); state_->nnval++;
528  sval += alpha * (gs + half * alpha * sHs);
529  gmod.axpy(alpha,dwa);
530  }
531  nold = nval;
532  pRed = mprev - (sval+nval);
533 
534  // Check trust-region constraint violation
535  pwa1.set(y); pwa1.axpy(-one,x);
536  s0s0 = pwa1.dot(pwa1);
537  snorm = std::sqrt(s0s0);
538 
539  if (verbosity_ > 1) {
540  outStream << std::endl;
541  outStream << " Iterate: " << SPiter_ << std::endl;
542  outStream << " Spectral step length (lambda): " << lambda << std::endl;
543  outStream << " Step length (alpha): " << alpha << std::endl;
544  outStream << " Model decrease (pRed): " << pRed << std::endl;
545  outStream << " Optimality criterion: " << gnorm << std::endl;
546  outStream << " Step norm: " << snorm << std::endl;
547  outStream << std::endl;
548  }
549 
550  if (snorm >= del - safeguard) { SPflag_ = 2; break; }
551 
552  // Compute new spectral step
553  lambdaTmp = (sHs <= safeguard) ? one/gmod.norm() : ss/sHs;
554  lambda = std::max(lambdaMin_,std::min(lambdaTmp,lambdaMax_));
555 
556  pgstep(pwa2, pwa, nobj, y, gmod.dual(), alpha, tol); // pass pwa by reference? *pwa?
557  gs = gmod.apply(pwa);
558  ss = pwa.dot(pwa);
559  gnorm = std::sqrt(ss) / lambda;
560 
561  if (gnorm <= gtol) { SPflag_ = 0; break; }
562  }
563  SPflag_ = (SPiter_==maxit_) ? 1 : SPflag_;
564 }
565 
566 template<typename Real>
568  Real &sval,
569  Real &nval,
570  Vector<Real> &gmod,
571  const Vector<Real> &x,
572  Real del,
574  Objective<Real> &nobj,
575  Vector<Real> &ymin,
576  Vector<Real> &pwa,
577  Vector<Real> &pwa1,
578  Vector<Real> &pwa2,
579  Vector<Real> &pwa3,
580  Vector<Real> &pwa4,
581  Vector<Real> &pwa5,
582  Vector<Real> &dwa,
583  std::ostream &outStream) {
584  // Use SPG to approximately solve TR subproblem:
585  // min 1/2 <H(y-x), (y-x)> + <g, (y-x)> + phi(y) subject to ||y|| \le del
586  //
587  // Inpute:
588  // y = Cauchy step
589  // x = Current iterate
590  // g = Current gradient
591  const Real half(0.5), one(1), safeguard(1e2*ROL_EPSILON<Real>());
592  const Real mval(sval+nval);
593  Real tol(std::sqrt(ROL_EPSILON<Real>()));
594  Real mcomp(0), mval_min(0), sval_min(0), nval_min(0);
595  Real alpha(1), coeff(1), lambda(1), lambdaTmp(1);
596  Real snew(sval), nnew(nval), mnew(mval);
597  Real sold(sval), nold(nval), mold(mval);
598  Real sHs(0), ss(0), gs(0), Qk(0), gnorm(0);
599  std::deque<Real> mqueue; mqueue.push_back(mold);
600 
601  if (useNMSP_ && useMin_) {
602  mval_min = mval; sval_min = sval; nval_min = nval; ymin.set(y);
603  }
604 
605  // Compute initial proximal gradient norm
606  pwa1.set(gmod.dual());
607  pwa.set(y); pwa.axpy(-t0_,pwa1);
608  dprox(pwa,x,t0_,del,nobj,pwa2,pwa3,pwa4,pwa5,outStream);
609  pwa.axpy(-one,y);
610  gnorm = pwa.norm() / t0_;
611  const Real gtol = std::min(tol1_,tol2_*gnorm);
612 
613  // Compute initial spectral step size
614  coeff = one / gmod.norm();
615  lambda = std::max(lambdaMin_,std::min(coeff,lambdaMax_));
616 
617  if (verbosity_ > 1)
618  outStream << " Spectral Projected Gradient" << std::endl;
619 
620  SPiter_ = 0;
621  while (SPiter_ < maxit_) {
622  SPiter_++;
623 
624  // Compuate SPG step
625  alpha = one;
626  pwa.set(y); pwa.axpy(-lambda,pwa1); // pwa = y - lambda gmod.dual()
627  dprox(pwa,x,lambda,del,nobj,pwa2,pwa3,pwa4,pwa5,outStream); // pwa = P(y - lambda gmod.dual())
628  pwa2.set(pwa); // pwa2 = P(y - lambda gmod.dual())
629  pwa.axpy(-one,y); // pwa = P(y - lambda gmod.dual()) - y = step
630  ss = pwa.dot(pwa); // Norm squared of step
631 
632  // Evaluate model Hessian
633  model.hessVec(dwa,pwa,x,tol); nhess_++; // dwa = H step
634  nobj.update(pwa2, UpdateType::Trial);
635  nnew = nobj.value(pwa2, tol); state_->nnval++;
636  sHs = dwa.apply(pwa); // sHs = <step, H step>
637  gs = gmod.apply(pwa); // gs = <step, model gradient>
638  snew = half * sHs + gs + sold;
639  mnew = snew + nnew;
640  Qk = gs + nnew - nold;
641 
642  // Perform line search
643  //mcomp = useNMSP_ ? *std::max_element(mqueue.begin(),mqueue.end()) : mold;
644  //while( mnew > mcomp + mu0_*Qk ) {
645  // alpha *= interpf_;
646  // pwa2.set(y); pwa2.axpy(alpha,pwa);
647  // nobj.update(pwa2, UpdateType::Trial);
648  // nnew = nobj.value(pwa2, tol); state_->nnval++;
649  // snew = half * alpha * alpha * sHs + alpha * gs + sold;
650  // mnew = nnew + snew;
651  // Qk = alpha * gs + nnew - nold;
652  //}
653  if (useNMSP_) { // Nonmonotone
654  mcomp = *std::max_element(mqueue.begin(),mqueue.end());
655  while( mnew > mcomp + mu0_*Qk ) {
656  alpha *= interpf_;
657  pwa2.set(y); pwa2.axpy(alpha,pwa);
658  nobj.update(pwa2, UpdateType::Trial);
659  nnew = nobj.value(pwa2, tol); state_->nnval++;
660  snew = half * alpha * alpha * sHs + alpha * gs + sold;
661  mnew = nnew + snew;
662  Qk = alpha * gs + nnew - nold;
663  }
664  }
665  else {
666  alpha = (sHs <= safeguard) ? one : std::min(one,-(gs + nnew - nold)/sHs);
667  }
668 
669  // Update model quantities
670  y.set(pwa2);
671  sold = snew;
672  nold = nnew;
673  mold = mnew;
674  gmod.axpy(alpha,dwa); // Update model gradient
675  nobj.update(y, UpdateType::Accept);
676 
677  // Update nonmonotone line search information
678  if (useNMSP_) {
679  if (static_cast<int>(mqueue.size())==maxSize_) mqueue.pop_front();
680  mqueue.push_back(sval+nval);
681  if (useMin_ && mval <= mval_min) {
682  mval_min = mval; sval_min = sval; nval_min = nval; ymin.set(y);
683  }
684  }
685 
686  // Compute projected gradient norm
687  pwa1.set(gmod.dual());
688  pwa.set(y); pwa.axpy(-t0_,pwa1);
689  dprox(pwa,x,t0_,del,nobj,pwa2,pwa3,pwa4,pwa5,outStream);
690  pwa.axpy(-one,y);
691  gnorm = pwa.norm() / t0_;
692 
693  if (verbosity_ > 1) {
694  outStream << std::endl;
695  outStream << " Iterate: " << SPiter_ << std::endl;
696  outStream << " Spectral step length (lambda): " << lambda << std::endl;
697  outStream << " Step length (alpha): " << alpha << std::endl;
698  outStream << " Model decrease (pRed): " << mval-mold << std::endl;
699  outStream << " Optimality criterion: " << gnorm << std::endl;
700  outStream << std::endl;
701  }
702  if (gnorm < gtol) break;
703 
704  // Compute new spectral step
705  lambdaTmp = (sHs == 0 ? coeff : ss / sHs);
706  lambda = std::max(lambdaMin_, std::min(lambdaTmp, lambdaMax_));
707  }
708  if (useNMSP_ && useMin_) {
709  sval = sval_min; nval = nval_min; y.set(ymin);
710  }
711  else {
712  sval = sold; nval = nold;
713  }
714  SPflag_ = (SPiter_==maxit_) ? 1 : 0;
715 }
716 
717 template<typename Real>
719  const Vector<Real> &x0,
720  Real t,
721  Real del,
722  Objective<Real> &nobj,
723  Vector<Real> &y0,
724  Vector<Real> &y1,
725  Vector<Real> &yc,
726  Vector<Real> &pwa,
727  std::ostream &outStream) const {
728  // Solve ||P(t*x0 + (1-t)*(x-x0))-x0|| = del using Brent's method
729  const Real zero(0), half(0.5), one(1), two(2), three(3);
730  const Real eps(ROL_EPSILON<Real>()), tol0(1e1*eps), fudge(1.0-1e-2*sqrt(eps));
731  Real f0(0), f1(0), fc(0), t0(0), t1(1), tc(0), d1(1), d2(1), tol(1);
732  Real p(0), q(0), r(0), s(0), m(0);
733  int cnt(state_->nprox);
734  nobj.prox(y1, x, t, tol); state_->nprox++;
735  pwa.set(y1); pwa.axpy(-one,x0);
736  f1 = pwa.norm();
737  if (f1 <= del) {
738  x.set(y1);
739  return;
740  }
741  y0.set(x0);
742  tc = t0; fc = f0; yc.set(y0);
743  d1 = t1-t0; d2 = d1;
744  int code = 0;
745  while (true) {
746  if (std::abs(fc-del) < std::abs(f1-del)) {
747  t0 = t1; t1 = tc; tc = t0;
748  f0 = f1; f1 = fc; fc = f0;
749  y0.set(y1); y1.set(yc); yc.set(y0);
750  }
751  tol = two*eps*std::abs(t1) + half*tol0;
752  m = half*(tc - t1);
753  if (std::abs(m) <= tol) { code = 1; break; }
754  if ((f1 >= fudge*del && f1 <= del)) break;
755  if (std::abs(d1) < tol || std::abs(f0-del) <= std::abs(f1-del)) {
756  d1 = m; d2 = d1;
757  }
758  else {
759  s = (f1-del)/(f0-del);
760  if (t0 == tc) {
761  p = two*m*s;
762  q = one-s;
763  }
764  else {
765  q = (f0-del)/(fc-del);
766  r = (f1-del)/(fc-del);
767  p = s*(two*m*q*(q-r)-(t1-t0)*(r-one));
768  q = (q-one)*(r-one)*(s-one);
769  }
770  if (p > zero) q = -q;
771  else p = -p;
772  s = d1;
773  d1 = d2;
774  if (two*p < three*m*q-std::abs(tol*q) && p < std::abs(half*s*q)) {
775  d2 = p/q;
776  }
777  else {
778  d1 = m; d2 = d1;
779  }
780  }
781  t0 = t1; f0 = f1; y0.set(y1);
782  if (std::abs(d2) > tol) t1 += d2;
783  else if (m > zero) t1 += tol;
784  else t1 -= tol;
785  pwa.set(x); pwa.scale(t1); pwa.axpy(one-t1,x0);
786  nobj.prox(y1, pwa, t1*t, tol); state_->nprox++;
787  pwa.set(y1); pwa.axpy(-one,x0);
788  f1 = pwa.norm();
789  if ((f1 > del && fc > del) || (f1 <= del && fc <= del)) {
790  tc = t0; fc = f0; yc.set(y0);
791  d1 = t1-t0; d2 = d1;
792  }
793  }
794  if (code==1 && f1>del) x.set(yc);
795  else x.set(y1);
796  if (verbosity_ > 1) {
797  outStream << std::endl;
798  outStream << " Trust-Region Subproblem Proximity Operator" << std::endl;
799  outStream << " Number of proxes: " << state_->nprox-cnt << std::endl;
800  if (code == 1 && f1 > del) {
801  outStream << " Transformed Multiplier: " << tc << std::endl;
802  outStream << " Dual Residual: " << fc-del << std::endl;
803  }
804  else {
805  outStream << " Transformed Multiplier: " << t1 << std::endl;
806  outStream << " Dual Residual: " << f1-del << std::endl;
807  }
808  outStream << " Exit Code: " << code << std::endl;
809  outStream << std::endl;
810  }
811 }
812 
813 template<typename Real>
814 void TrustRegionAlgorithm<Real>::dbls(Real &alpha, Real &nval, Real &pred,
815  const Vector<Real> &y,
816  const Vector<Real> &s,
817  Real lambda, Real tmax,
818  Real kappa, Real gs,
819  Objective<Real> &nobj,
820  Vector<Real> &pwa) {
821  Real tol(std::sqrt(ROL_EPSILON<Real>()));
822  const Real eps(1e-2*std::sqrt(ROL_EPSILON<Real>())), tol0(1e4*tol);
823  const Real eps0(1e2*ROL_EPSILON<Real>());
824  const unsigned maxit(50);
825  const Real zero(0), half(0.5), one(1), two(2);
826  const Real lam(0.5*(3.0-std::sqrt(5.0)));
827  const Real nold(nval);
828  const Real mu(1e-4);
829 
830  // Evaluate model at initial left end point (t = 0)
831  Real tL(0), pL(0);
832  // Evaluate model at right end point (t = tmax)
833  Real tR = tmax;
834  pwa.set(y); pwa.axpy(tR, s);
835  nobj.update(pwa,UpdateType::Trial);
836  Real nR = nobj.value(pwa,tol); state_->nnval++;
837  Real pR = tR * (half * tR * kappa + gs) + nR - nold;
838 
839  // Compute minimizer of quadratic upper bound
840  Real t0 = tR, n0 = nR;
841  if (tmax > lambda) {
842  t0 = lambda;
843  pwa.set(y); pwa.axpy(t0, s);
844  nobj.update(pwa,UpdateType::Trial);
845  n0 = nobj.value(pwa,tol); state_->nnval++;
846  }
847  Real ts = (kappa > 0) ? std::min(tR,(((nold - n0) / kappa) / t0) - (gs / kappa)) : tR;
848  Real t = std::min(t0,ts);
849  bool useOptT = true;
850  if (t <= tL) {
851  t = (t0 < tR ? t0 : half*tR);
852  useOptT = false;
853  }
854 
855  // Evaluate model at t (t = minimizer of quadratic upper bound)
856  pwa.set(y); pwa.axpy(t, s);
857  nobj.update(pwa,UpdateType::Trial);
858  Real nt = nobj.value(pwa,tol); state_->nnval++;
859  Real Qt = t * gs + nt - nold, Qu = Qt;
860  Real pt = half * t * t * kappa + Qt;
861 
862  // If phi(x) = phi(x+tmax s) = phi(x+ts), then
863  // phi(x+ts) is constant for all t, so the TR
864  // model is quadratic---use the minimizer
865  if (useOptT && nt == nold && nR == nold) {
866  alpha = ts;
867  pred = ts * (half * ts * kappa + gs);
868  nval = nold;
869  return;
870  }
871 
872  // If pt >= max(pL, pR), then the model is concave
873  // and the minimum is obtained at the end points
874  if (pt >= std::max(pL, pR)) {
875  alpha = tR;
876  pred = pR;
877  nval = nR;
878  return;
879  }
880 
881  // Run Brent's method (quadratic interpolation + golden section)
882  // to minimize m_k(x+ts) with respect to t
883  Real w = t, v = t, pw = pt, pv = pt, d(0), pu(0), nu(0);
884  Real u(0), p(0), q(0), r(0), etmp(0), e(0), dL(0), dR(0);
885  Real tm = half * (tL + tR);
886  Real tol1 = tol0 * std::abs(t)+eps;
887  Real tol2 = two * tol1;
888  Real tol3 = eps;
889  for (unsigned it = 0u; it < maxit; ++it) {
890  dL = tL-t; dR = tR-t;
891  if (std::abs(e) > tol1) {
892  r = (t-w)*(pt-pv);
893  q = (t-v)*(pt-pw);
894  p = (t-v)*q-(t-w)*r;
895  q = two*(q-r);
896  if (q > zero) p = -p;
897  q = std::abs(q);
898  etmp = e;
899  e = d;
900  if ( std::abs(p) >= std::abs(half*q*etmp) || p <= q*dL || p >= q*dR ) {
901  e = (t > tm ? dL : dR);
902  d = lam * e;
903  }
904  else {
905  d = p/q;
906  u = t+d;
907  if (u-tL < tol2 || tR-u < tol2) d = (tm >= t ? tol1 : -tol1);
908  }
909  }
910  else {
911  e = (t > tm ? dL : dR);
912  d = lam * e;
913  }
914  u = t + (std::abs(d) >= tol1 ? d : (d >= zero ? tol1 : -tol1));
915  pwa.set(y); pwa.axpy(u, s);
916  nobj.update(pwa,UpdateType::Trial);
917  nu = nobj.value(pwa,tol); state_->nnval++;
918  Qu = u * gs + nu - nold;
919  pu = half * u * u * kappa + Qu;
920  if (pu <= pt) {
921  if (u >= t) tL = t;
922  else tR = t;
923  v = w; w = t; t = u;
924  pv = pw; pw = pt; pt = pu;
925  nt = nu; Qt = Qu;
926  }
927  else {
928  if (u < t) tL = u;
929  else tR = u;
930  if (pu <= pw || w == t) {
931  v = w; w = u;
932  pv = pw; pw = pu;
933  }
934  else if (pu <= pv || v == t || v == w) {
935  v = u; pv = pu;
936  }
937  }
938  tm = half * (tL+tR);
939  tol1 = tol0*std::abs(t)+eps;
940  tol2 = two*tol1;
941  tol3 = eps0 * std::max(std::abs(Qt),one);
942  if (pt <= (mu*std::min(zero,Qt)+tol3) && std::abs(t-tm) <= (tol2-half*(tR-tL))) break;
943  }
944  alpha = t;
945  pred = pt;
946  nval = nt;
947 }
948 
949 // NCG Subsolver
950 template<typename Real>
952  Real &sval,
953  Real &nval,
954  Vector<Real> &gmod,
955  const Vector<Real> &x,
956  Real del,
958  Objective<Real> &nobj,
959  Vector<Real> &s,
960  Vector<Real> &pwa1,
961  Vector<Real> &pwa2,
962  Vector<Real> &pwa3,
963  Vector<Real> &pwa4,
964  Vector<Real> &pwa5,
965  Vector<Real> &dwa,
966  std::ostream &outStream) {
967  // Use NCG to approximately solve TR subproblem:
968  // min 1/2 <H(y-x), (y-x)> + <g, (y-x)> + phi(y) subject to ||y-x|| \le del
969  //
970  // Inpute:
971  // y = computed iterate
972  // sval = smooth model value
973  // nval = nonsmooth value
974  // gmod = current gradient
975  // x = current iterate
976  // del = trust region radius
977  // model = trust region model
978  // nobj = nonsmooth objective function
979  // s = the current step
980  // pwa1 = the SPG iterate
981  // pwa2 = the "negative gradient"
982  // pwa3 = y - x
983  // pwa4 = the previous "negative gradient"
984  // pwa5 = temporary storage
985  // dwa = the Hessian applied to the step
986  const Real zero(0), half(0.5), one(1), two(2);
987  const Real del2(del*del);
988  Real tol(std::sqrt(ROL_EPSILON<Real>())), safeguard(tol);
989  Real mold(sval+nval), nold(nval);
990  Real snorm(0), snorm0(0), gnorm(0), gnorm0(0), gnorm2(0);
991  Real alpha(1), beta(1), lambdaTmp(1), lambda(1), eta(etaNCG_);
992  Real alphaMax(1), pred(0), lam1(1);
993  Real sy(0), gg(0), sHs(0), gs(0), ds(0), ss(0), ss0(0);
994  bool reset(true);
995 
996  // Set y = x
997  y.set(x);
998  pwa3.zero(); // Initially y - x = 0
999 
1000  // Compute initial spectral step length
1001  lambdaTmp = t0_ / gmod.norm();
1002  lambda = std::max(lambdaMin_,std::min(lambdaTmp,lambdaMax_));
1003  lam1 = lambda;
1004 
1005  // Compute Cauchy point via SPG
1006  pgstep(pwa1, pwa2, nobj, y, gmod.dual(), lambda, tol); // pwa1 = prox(x-lambda g), pwa2 = pwa1 - x
1007  pwa2.scale(one/lambda); // pwa2 = (pwa1 - x) / lambda (for smooth: pwa2 = negative gradient)
1008  s.set(pwa2); // s = (pwa1 - x) / lambda
1009  gs = gmod.apply(s); // gs = <g, prox(x-lambda g)-x> / lambda
1010  snorm = s.norm(); // snorm = norm(prox(x-lambda g)-x) / lambda
1011  gnorm = snorm;
1012  const Real gtol = std::min(tol1_,tol2_*gnorm);
1013 
1014  if (verbosity_>1) outStream << " Nonlinear Conjugate Gradient" << std::endl;
1015 
1016  SPiter_ = 0;
1017  SPflag_ = 1;
1018  while (SPiter_ < maxit_) {
1019  SPiter_++;
1020 
1021  // Compute the model curvature
1022  model.hessVec(dwa,s,x,tol); nhess_++; // dwa = H step
1023  sHs = dwa.apply(s); // sHs = <step, H step>
1024 
1025  // Compute alpha as the 1D minimize in the s direction
1026  ss = snorm*snorm;
1027  ds = s.dot(pwa3);
1028  alphaMax = (-ds + std::sqrt(ds*ds + ss*(del2 - snorm0*snorm0)))/ss;
1029  dbls(alpha,nold,pred,y,s,lam1,alphaMax,sHs,gs,nobj,pwa5);
1030 
1031  //if (sHs <= safeguard) alpha = alphaMax;
1032  //else {
1033  // pwa5.set(y); pwa5.axpy(alphaMax, s);
1034  // nobj.update(pwa5,UpdateType::Trial);
1035  // nmax = nobj.value(pwa5,tol); state_->nnval++;
1036  // alpha = alphaMax * std::min(one, -(alphaMax * gs + nmax - nold)/(alphaMax * alphaMax * sHs));
1037  // if (alpha <= safeguard) alpha = std::min(one, -(gs + nval - nold)/sHs);
1038  //}
1039 
1040  // Update quantities to evaluate quadratic model value and gradient
1041  y.axpy(alpha, s);
1042  gmod.axpy(alpha, dwa); // need dual here?
1043  sval += alpha*(gs + half*alpha*sHs);
1044 
1045  // Check step size
1046  pwa3.set(y); pwa3.axpy(-one,x);
1047  ss0 += alpha*(alpha*ss + two*ds);
1048  snorm0 = std::sqrt(ss0); // pwa3.norm();
1049 
1050  if (snorm0 >= (one-safeguard)*del) { SPflag_ = 2; break; }
1051 
1052  // Update spectral step length
1053  lambdaTmp = (sHs <= safeguard) ? t0_/gmod.norm() : ss/sHs;
1054  lambda = std::max(lambdaMin_, std::min(lambdaMax_, lambdaTmp));
1055 
1056  // Compute SPG direction
1057  pwa4.set(pwa2); // store previous "negative gradient"
1058  pgstep(pwa1, pwa2, nobj, y, gmod.dual(), lambda, tol); // pwa1 = prox(x-lambda g), pwa2 = pwa1 - x
1059  pwa2.scale(one/lambda); // pwa2 = (pwa1 - x) / lambda (for smooth: pwa2 = negative gradient)
1060  gnorm0 = gnorm;
1061  gnorm = pwa2.norm();
1062  if (gnorm <= gtol) { SPflag_ = 0; break; }
1063 
1064  gnorm2 = gnorm * gnorm;
1065  switch (ncgType_) {
1066  case 0: // Fletcher-Reeves
1067  beta = gnorm2/(gnorm0 * gnorm0);
1068  break;
1069  default:
1070  case 1: // Polyak-Ribiere+
1071  pwa5.set(pwa4); pwa5.axpy(-one,pwa2);
1072  beta = std::max(zero, -pwa5.dot(pwa2)/(gnorm0*gnorm0));
1073  break;
1074  case 2: // Hager-Zhang
1075  pwa5.set(pwa4); pwa5.axpy(-one,pwa2);
1076  sy = s.dot(pwa5);
1077  gg = pwa2.dot(pwa4);
1078  eta = -one/(s.norm()*std::min(etaNCG_, gnorm0));
1079  beta = std::max(eta, (gnorm2-gg-two*pwa2.dot(s)*(gnorm2-two*gg+(gnorm0*gnorm0))/sy)/sy);
1080  break;
1081  case 3: // Hestenes-Stiefel+
1082  pwa5.set(pwa4); pwa5.axpy(-one,pwa2);
1083  beta = std::max(zero, -pwa2.dot(pwa5)/s.dot(pwa5));
1084  break;
1085  case 4: // Dai-Yuan+
1086  pwa5.set(pwa4); pwa5.axpy(-one,pwa2);
1087  beta = std::max(zero, gnorm2/s.dot(pwa5));
1088  break;
1089  case 5: // Fletcher-Reeves-Polyak-Ribiere
1090  pwa5.set(pwa4); pwa5.axpy(-one,pwa2);
1091  beta = std::max(-gnorm2, std::min(gnorm2, -pwa5.dot(pwa2)))/(gnorm0*gnorm0);
1092  break;
1093  case 6: //Dai-Yuan-Hestenes-Stiefles
1094  pwa5.set(pwa4); pwa5.axpy(-one,pwa2);
1095  beta = std::max(zero, std::min(-pwa2.dot(pwa5), gnorm2)/s.dot(pwa5));
1096  break;
1097  }
1098 
1099  reset = true;
1100  if (beta != zero && beta < ROL_INF<Real>()){
1101  pwa5.set(pwa2); // pwa5 = pwa2 (SPG step)
1102  pwa5.axpy(beta, s); // pwa5 = pwa2 + beta*s
1103  pwa4.set(y); // pwa4 = y
1104  pwa4.plus(pwa5); // pwa4 = y + pwa5
1105  nobj.update(pwa4,UpdateType::Trial);
1106  nval = nobj.value(pwa4,tol); state_->nnval++;
1107  gs = gmod.apply(pwa5);
1108  if (gs + nval - nold <= -(one - desPar_)*gnorm2) {
1109  s.set(pwa5);
1110  lam1 = one;
1111  reset = false;
1112  }
1113  }
1114  if (reset){ // Reset because either beta=0 or step does not produce descent
1115  s.set(pwa2);
1116  gs = gmod.apply(s);
1117  lam1 = lambda;
1118  beta = zero;
1119  }
1120  snorm = s.norm();
1121 
1122  if (verbosity_ > 1) {
1123  outStream << std::endl;
1124  outStream << " Iterate: " << SPiter_ << std::endl;
1125  outStream << " Spectral step length (lambda): " << lambda << std::endl;
1126  outStream << " Step length (alpha): " << alpha << std::endl;
1127  outStream << " NCG parameter (beta): " << beta << std::endl;
1128  outStream << " Model decrease (pRed): " << mold-(sval+nold) << std::endl;
1129  outStream << " Step size: " << snorm0 << std::endl;
1130  outStream << " Optimality criterion: " << gnorm << std::endl;
1131  outStream << " Optimality tolerance: " << gtol << std::endl;
1132  outStream << std::endl;
1133  }
1134  }
1135  nval = nold;
1136 }
1137 
1138 template<typename Real>
1139 void TrustRegionAlgorithm<Real>::writeHeader( std::ostream& os ) const {
1140  std::ios_base::fmtflags osFlags(os.flags());
1141  if (verbosity_ > 1) {
1142  os << std::string(114,'-') << std::endl;
1143  switch (algSelect_) {
1144  default:
1145  case TRUSTREGION_P_SPG: os << " SPG "; break;
1146  case TRUSTREGION_P_SPG2: os << " Simplified SPG "; break;
1147  case TRUSTREGION_P_NCG: os << " NCG "; break;
1148  }
1149  os << "trust-region method status output definitions" << std::endl << std::endl;
1150  os << " iter - Number of iterates (steps taken)" << std::endl;
1151  os << " value - Objective function value" << std::endl;
1152  os << " gnorm - Norm of the gradient" << std::endl;
1153  os << " snorm - Norm of the step (update to optimization vector)" << std::endl;
1154  os << " delta - Trust-Region radius" << std::endl;
1155  os << " #sval - Number of times the smooth objective function was evaluated" << std::endl;
1156  os << " #nval - Number of times the nonsmooth objective function was evaluated" << std::endl;
1157  os << " #grad - Number of times the gradient was computed" << std::endl;
1158  os << " #hess - Number of times the Hessian was applied" << std::endl;
1159  os << " #prox - Number of times the proximal operator was computed" << std::endl;
1160  os << std::endl;
1161  os << " tr_flag - Trust-Region flag" << std::endl;
1162  for( int flag = TRUtils::SUCCESS; flag != TRUtils::UNDEFINED; ++flag ) {
1163  os << " " << NumberToString(flag) << " - "
1164  << TRUtils::ETRFlagToString(static_cast<TRUtils::ETRFlag>(flag)) << std::endl;
1165  }
1166  os << std::endl;
1167  os << " iterSP - Number of Spectral Projected Gradient iterations" << std::endl << std::endl;
1168  os << " flagSP - Trust-Region Spectral Projected Gradient flag" << std::endl;
1169  os << " 0 - Converged" << std::endl;
1170  os << " 1 - Iteration Limit Exceeded" << std::endl;
1171  os << std::string(114,'-') << std::endl;
1172  }
1173  os << " ";
1174  os << std::setw(6) << std::left << "iter";
1175  os << std::setw(15) << std::left << "value";
1176  os << std::setw(15) << std::left << "gnorm";
1177  os << std::setw(15) << std::left << "snorm";
1178  os << std::setw(15) << std::left << "delta";
1179  os << std::setw(10) << std::left << "#sval";
1180  os << std::setw(10) << std::left << "#nval";
1181  os << std::setw(10) << std::left << "#grad";
1182  os << std::setw(10) << std::left << "#hess";
1183  os << std::setw(10) << std::left << "#prox";
1184  os << std::setw(10) << std::left << "tr_flag";
1185  os << std::setw(10) << std::left << "iterSP";
1186  os << std::setw(10) << std::left << "flagSP";
1187  os << std::endl;
1188  os.flags(osFlags);
1189 }
1190 
1191 template<typename Real>
1192 void TrustRegionAlgorithm<Real>::writeName( std::ostream& os ) const {
1193  std::ios_base::fmtflags osFlags(os.flags());
1194  os << std::endl;
1195  switch (algSelect_) {
1196  default:
1197  case TRUSTREGION_P_SPG: os << "SPG "; break;
1198  case TRUSTREGION_P_SPG2: os << "Simplified SPG "; break;
1199  case TRUSTREGION_P_NCG: os << "NCG "; break;
1200  }
1201  os << "Trust-Region Method (Type P)" << std::endl;
1202  os.flags(osFlags);
1203 }
1204 
1205 template<typename Real>
1206 void TrustRegionAlgorithm<Real>::writeOutput( std::ostream& os, bool write_header ) const {
1207  std::ios_base::fmtflags osFlags(os.flags());
1208  os << std::scientific << std::setprecision(6);
1209  if ( state_->iter == 0 ) writeName(os);
1210  if ( write_header ) writeHeader(os);
1211  if ( state_->iter == 0 ) {
1212  os << " ";
1213  os << std::setw(6) << std::left << state_->iter;
1214  os << std::setw(15) << std::left << state_->value;
1215  os << std::setw(15) << std::left << state_->gnorm;
1216  os << std::setw(15) << std::left << "---";
1217  os << std::setw(15) << std::left << state_->searchSize;
1218  os << std::setw(10) << std::left << state_->nsval;
1219  os << std::setw(10) << std::left << state_->nnval;
1220  os << std::setw(10) << std::left << state_->ngrad;
1221  os << std::setw(10) << std::left << nhess_;
1222  os << std::setw(10) << std::left << state_->nprox;
1223  os << std::setw(10) << std::left << "---";
1224  os << std::setw(10) << std::left << "---";
1225  os << std::setw(10) << std::left << "---";
1226  os << std::endl;
1227  }
1228  else {
1229  os << " ";
1230  os << std::setw(6) << std::left << state_->iter;
1231  os << std::setw(15) << std::left << state_->value;
1232  os << std::setw(15) << std::left << state_->gnorm;
1233  os << std::setw(15) << std::left << state_->snorm;
1234  os << std::setw(15) << std::left << state_->searchSize;
1235  os << std::setw(10) << std::left << state_->nsval;
1236  os << std::setw(10) << std::left << state_->nnval;
1237  os << std::setw(10) << std::left << state_->ngrad;
1238  os << std::setw(10) << std::left << nhess_;
1239  os << std::setw(10) << std::left << state_->nprox;
1240  os << std::setw(10) << std::left << TRflag_;
1241  os << std::setw(10) << std::left << SPiter_;
1242  os << std::setw(10) << std::left << SPflag_;
1243  os << std::endl;
1244  }
1245  os.flags(osFlags);
1246 }
1247 
1248 } // namespace TypeP
1249 } // namespace ROL
1250 
1251 #endif
Provides the interface to evaluate objective functions.
void dspg2(Vector< Real > &y, Real &sval, Real &nval, Real &pRed, Vector< Real > &gmod, const Vector< Real > &x, Real del, TrustRegionModel_U< Real > &model, Objective< Real > &nobj, Vector< Real > &pwa, Vector< Real > &pwa1, Vector< Real > &pwa2, Vector< Real > &dwa, std::ostream &outStream=std::cout)
virtual const Vector & dual() const
Return dual representation of , for example, the result of applying a Riesz map, or change of basis...
Definition: ROL_Vector.hpp:192
Real computeValue(Real inTol, Real &outTol, Real pRed, Real &fold, int iter, const Vector< Real > &x, const Vector< Real > &xold, Objective< Real > &obj)
virtual void scale(const Real alpha)=0
Compute where .
void dspg(Vector< Real > &y, Real &sval, Real &nval, Vector< Real > &gmod, const Vector< Real > &x, Real del, TrustRegionModel_U< Real > &model, Objective< Real > &nobj, Vector< Real > &ymin, Vector< Real > &pwa, Vector< Real > &pwa1, Vector< Real > &pwa2, Vector< Real > &pwa3, Vector< Real > &pwa4, Vector< Real > &pwa5, Vector< Real > &dwa, std::ostream &outStream=std::cout)
virtual ROL::Ptr< Vector > clone() const =0
Clone to make a new (uninitialized) vector.
virtual Real apply(const Vector< Real > &x) const
Apply to a dual vector. This is equivalent to the call .
Definition: ROL_Vector.hpp:204
TrustRegionAlgorithm(ParameterList &list, const Ptr< Secant< Real >> &secant=nullPtr)
virtual void plus(const Vector &x)=0
Compute , where .
virtual void axpy(const Real alpha, const Vector &x)
Compute where .
Definition: ROL_Vector.hpp:119
void initialize(Vector< Real > &x, const Vector< Real > &g, Real ftol, Objective< Real > &sobj, Objective< Real > &nobj, Vector< Real > &px, Vector< Real > &dg, std::ostream &outStream=std::cout)
virtual Real value(const Vector< Real > &x, Real &tol)=0
Compute value.
virtual void prox(Vector< Real > &Pv, const Vector< Real > &v, Real t, Real &tol)
Compute the proximity operator.
ESecant StringToESecant(std::string s)
Definition: ROL_Types.hpp:513
virtual void zero()
Set to zero vector.
Definition: ROL_Vector.hpp:133
Defines the linear algebra or vector space interface.
Definition: ROL_Vector.hpp:46
Real dcauchy(Vector< Real > &s, Real &alpha, Real &sval, Real &nval, const Vector< Real > &x, const Vector< Real > &g, Real del, TrustRegionModel_U< Real > &model, Objective< Real > &nobj, Vector< Real > &px, Vector< Real > &dwa, Vector< Real > &dwa1, std::ostream &outStream=std::cout)
virtual void update(const Vector< Real > &x, UpdateType type, int iter=-1)
Update objective function.
virtual Real dot(const Vector &x) const =0
Compute where .
Objective_SerialSimOpt(const Ptr< Obj > &obj, const V &ui) z0_ zero()
void dbls(Real &alpha, Real &nval, Real &pred, const Vector< Real > &y, const Vector< Real > &s, Real lambda, Real tmax, Real kappa, Real gs, Objective< Real > &nobj, Vector< Real > &pwa)
virtual void hessVec(Vector< Real > &hv, const Vector< Real > &v, const Vector< Real > &s, Real &tol) override
Apply Hessian approximation to vector.
ETRFlag
Enumation of flags used by trust-region solvers.
ETrustRegionP StringToETrustRegionP(std::string s)
virtual void gradient(Vector< Real > &g, const Vector< Real > &x, Real &tol)
Compute gradient.
std::string NumberToString(T Number)
Definition: ROL_Types.hpp:47
Provides the interface to evaluate trust-region model functions.
ESecantMode
Definition: ROL_Secant.hpp:23
void run(Vector< Real > &x, const Vector< Real > &g, Objective< Real > &sobj, Objective< Real > &nobj, std::ostream &outStream=std::cout) override
Run algorithm on unconstrained problems (Type-U). This general interface supports the use of dual opt...
Provides interface for and implements limited-memory secant operators.
Definition: ROL_Secant.hpp:45
Provides an interface to check status of optimization algorithms.
std::string ETRFlagToString(ETRFlag trf)
void writeOutput(std::ostream &os, bool write_header=false) const override
Print iterate status.
void dprox(Vector< Real > &x, const Vector< Real > &x0, Real t, Real del, Objective< Real > &nobj, Vector< Real > &y0, Vector< Real > &y1, Vector< Real > &yc, Vector< Real > &pwa, std::ostream &outStream=std::cout) const
void dncg(Vector< Real > &y, Real &sval, Real &nval, Vector< Real > &gmod, const Vector< Real > &x, Real del, TrustRegionModel_U< Real > &model, Objective< Real > &nobj, Vector< Real > &s, Vector< Real > &pwa1, Vector< Real > &pwa2, Vector< Real > &pwa3, Vector< Real > &pwa4, Vector< Real > &pwa5, Vector< Real > &dwa, std::ostream &outStream=std::cout)
virtual void set(const Vector &x)
Set where .
Definition: ROL_Vector.hpp:175
virtual Real norm() const =0
Returns where .
virtual void writeExitStatus(std::ostream &os) const
void initialize(const Vector< Real > &x, const Vector< Real > &g)
void writeName(std::ostream &os) const override
Print step name.
void computeGradient(const Vector< Real > &x, Vector< Real > &g, Vector< Real > &px, Vector< Real > &dg, Vector< Real > &pwa, Real del, Objective< Real > &sobj, Objective< Real > &nobj, bool accept, Real &gtol, Real &gnorm, std::ostream &outStream=std::cout) const
void writeHeader(std::ostream &os) const override
Print iterate header.