ROL
ROL_TypeP_TrustRegionAlgorithm_Def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ************************************************************************
3 //
4 // Rapid Optimization Library (ROL) Package
5 // Copyright (2014) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact lead developers:
38 // Drew Kouri (dpkouri@sandia.gov) and
39 // Denis Ridzal (dridzal@sandia.gov)
40 //
41 // ************************************************************************
42 // @HEADER
43 
44 #ifndef ROL_TYPEP_TRUSTREGIONALGORITHM_DEF_HPP
45 #define ROL_TYPEP_TRUSTREGIONALGORITHM_DEF_HPP
46 
47 #include <deque>
48 
49 namespace ROL {
50 namespace TypeP {
51 
52 
53 template<typename Real>
55  const Ptr<Secant<Real>> &secant) {
56  // Set status test
57  status_->reset();
58  status_->add(makePtr<StatusTest<Real>>(list));
59 
60  ParameterList &trlist = list.sublist("Step").sublist("Trust Region");
61  // Trust-Region Parameters
62  state_->searchSize = trlist.get("Initial Radius", -1.0);
63  delMax_ = trlist.get("Maximum Radius", ROL_INF<Real>());
64  eta0_ = trlist.get("Step Acceptance Threshold", 0.05);
65  eta1_ = trlist.get("Radius Shrinking Threshold", 0.05);
66  eta2_ = trlist.get("Radius Growing Threshold", 0.9);
67  gamma0_ = trlist.get("Radius Shrinking Rate (Negative rho)", 0.0625);
68  gamma1_ = trlist.get("Radius Shrinking Rate (Positive rho)", 0.25);
69  gamma2_ = trlist.get("Radius Growing Rate", 2.5);
70  TRsafe_ = trlist.get("Safeguard Size", 100.0);
71  eps_ = TRsafe_*ROL_EPSILON<Real>();
72  interpRad_ = trlist.get("Use Radius Interpolation", false);
73  verbosity_ = trlist.sublist("General").get("Output Level", 0);
74  initProx_ = trlist.get("Apply Prox to Initial Guess", false);
75  t0_ = list.sublist("Status Test").get("Proximal Gradient Parameter", 1.0);
76  // Nonmonotone Parameters
77  storageNM_ = trlist.get("Nonmonotone Storage Size", 0);
78  useNM_ = (storageNM_ <= 0 ? false : true);
79  // Algorithm-Specific Parameters
80  ROL::ParameterList &lmlist = trlist.sublist("TRN");
81  mu0_ = lmlist.get("Sufficient Decrease Parameter", 1e-2);
82  spexp_ = lmlist.get("Relative Tolerance Exponent", 1.0);
83  spexp_ = std::max(static_cast<Real>(1),std::min(spexp_,static_cast<Real>(2)));
84  redlim_ = lmlist.sublist("Cauchy Point").get("Maximum Number of Reduction Steps", 10);
85  explim_ = lmlist.sublist("Cauchy Point").get("Maximum Number of Expansion Steps", 10);
86  alpha_ = lmlist.sublist("Cauchy Point").get("Initial Step Size", 1.0);
87  normAlpha_ = lmlist.sublist("Cauchy Point").get("Normalize Initial Step Size", false);
88  interpf_ = lmlist.sublist("Cauchy Point").get("Reduction Rate", 0.1);
89  extrapf_ = lmlist.sublist("Cauchy Point").get("Expansion Rate", 10.0);
90  qtol_ = lmlist.sublist("Cauchy Point").get("Decrease Tolerance", 1e-8);
91  // Subsolver (general) parameters
92  lambdaMin_ = lmlist.sublist("Solver").get("Minimum Spectral Step Size", 1e-8);
93  lambdaMax_ = lmlist.sublist("Solver").get("Maximum Spectral Step Size", 1e8);
94  gamma_ = lmlist.sublist("Solver").get("Sufficient Decrease Tolerance", 1e-4);
95  maxSize_ = lmlist.sublist("Solver").get("Maximum Storage Size", 10);
96  maxit_ = lmlist.sublist("Solver").get("Iteration Limit", 25);
97  tol1_ = lmlist.sublist("Solver").get("Absolute Tolerance", 1e-4);
98  tol2_ = lmlist.sublist("Solver").get("Relative Tolerance", 1e-2);
99  // Subsolver (spectral projected gradient) parameters
100  useMin_ = lmlist.sublist("Solver").get("Use Smallest Model Iterate", true);
101  useNMSP_ = lmlist.sublist("Solver").get("Use Nonmonotone Search", false);
102  std::string ssname = lmlist.sublist("Solver").get("Subproblem Solver", "SPG");
103  algSelect_ = StringToETrustRegionP(ssname);
104  // Subsolver (nonlinear conjugate gradient) parameters)
105  ncgType_ = lmlist.sublist("Solver").sublist("NCG").get("Nonlinear CG Type", 4);
106  etaNCG_ = lmlist.sublist("Solver").sublist("NCG").get("Truncation Parameter for HZ CG", 1e-2);
107  desPar_ = lmlist.sublist("Solver").sublist("NCG").get("Descent Parameter", 0.2);
108  // Inexactness Information
109  ParameterList &glist = list.sublist("General");
110  useInexact_.clear();
111  useInexact_.push_back(glist.get("Inexact Objective Function", false));
112  useInexact_.push_back(glist.get("Inexact Gradient", false));
113  useInexact_.push_back(glist.get("Inexact Hessian-Times-A-Vector", false));
114  // Trust-Region Inexactness Parameters
115  ParameterList &ilist = trlist.sublist("Inexact").sublist("Gradient");
116  scale0_ = ilist.get("Tolerance Scaling", static_cast<Real>(0.1));
117  scale1_ = ilist.get("Relative Tolerance", static_cast<Real>(2));
118  // Inexact Function Evaluation Information
119  ParameterList &vlist = trlist.sublist("Inexact").sublist("Value");
120  scale_ = vlist.get("Tolerance Scaling", static_cast<Real>(1.e-1));
121  omega_ = vlist.get("Exponent", static_cast<Real>(0.9));
122  force_ = vlist.get("Forcing Sequence Initial Value", static_cast<Real>(1.0));
123  updateIter_ = vlist.get("Forcing Sequence Update Frequency", static_cast<int>(10));
124  forceFactor_ = vlist.get("Forcing Sequence Reduction Factor", static_cast<Real>(0.1));
125  // Output Parameters
126  verbosity_ = list.sublist("General").get("Output Level",0);
127  writeHeader_ = verbosity_ > 2;
128  // Secant Information
129  useSecantPrecond_ = list.sublist("General").sublist("Secant").get("Use as Preconditioner", false);
130  useSecantHessVec_ = list.sublist("General").sublist("Secant").get("Use as Hessian", false);
132  if (useSecantPrecond_ && !useSecantHessVec_) mode = SECANTMODE_INVERSE;
133  else if (useSecantHessVec_ && !useSecantPrecond_) mode = SECANTMODE_FORWARD;
134  // Initialize trust region model
135  model_ = makePtr<TrustRegionModel_U<Real>>(list,secant,mode);
136  if (secant == nullPtr) {
137  esec_ = StringToESecant(list.sublist("General").sublist("Secant").get("Type","Limited-Memory BFGS"));
138  }
139 }
140 
141 template<typename Real>
143  const Vector<Real> &g,
144  Real ftol,
145  Objective<Real> &sobj,
146  Objective<Real> &nobj,
147  Vector<Real> &px,
148  Vector<Real> &dg,
149  std::ostream &outStream) {
150  // Initialize data
152  nhess_ = 0;
153  // Update approximate gradient and approximate objective function.
154  if (initProx_){
155  nobj.prox(*state_->iterateVec,x,t0_, ftol); state_->nprox++;
156  x.set(*state_->iterateVec);
157  }
158  sobj.update(x,UpdateType::Initial,state_->iter);
159  state_->svalue = sobj.value(x,ftol); state_->nsval++;
160  nobj.update(x, UpdateType::Initial,state_->iter);
161  state_->nvalue = nobj.value(x,ftol); state_->nnval++;
162  state_->value = state_->svalue + state_->nvalue;
163  computeGradient(x,*state_->gradientVec,px,dg,*state_->stepVec,state_->searchSize,sobj,nobj,true,gtol_,state_->gnorm,outStream);
164 
165  state_->snorm = ROL_INF<Real>();
166  // Normalize initial CP step length
167  if (normAlpha_) alpha_ /= state_->gradientVec->norm();//change here?
168  // Compute initial trust region radius if desired.
169  if ( state_->searchSize <= static_cast<Real>(0) )
170  state_->searchSize = state_->gradientVec->norm();
171  SPiter_ = 0;
172  SPflag_ = 0;
173 }
174 
175 template<typename Real>
177  Real &outTol,
178  Real pRed,
179  Real &fold,
180  int iter,
181  const Vector<Real> &x,
182  const Vector<Real> &xold,
183  Objective<Real> &sobj) {
184  outTol = std::sqrt(ROL_EPSILON<Real>());
185  if ( useInexact_[0] ) {
186  if (!(iter%updateIter_) && (iter!=0)) force_ *= forceFactor_;
187  const Real one(1);
188  Real eta = static_cast<Real>(0.999)*std::min(eta1_,one-eta2_);
189  outTol = scale_*std::pow(eta*std::min(pRed,force_),one/omega_);
190  if (inTol > outTol) {
191  fold = sobj.value(xold,outTol); state_->nsval++;
192  }
193  }
194  // Evaluate objective function at new iterate
195  sobj.update(x,UpdateType::Trial);
196  Real fval = sobj.value(x,outTol); state_->nsval++;
197  return fval;
198 }
199 
200 template<typename Real>
202  Vector<Real> &g,
203  Vector<Real> &px,
204  Vector<Real> &dg,
205  Vector<Real> &step,
206  Real del,
207  Objective<Real> &sobj,
208  Objective<Real> &nobj,
209  bool accept,
210  Real &gtol,
211  Real &gnorm,
212  std::ostream &outStream) const {
213  if ( useInexact_[1] ) {
214  Real gtol0 = scale0_*del;
215  if (accept) gtol = gtol0 + static_cast<Real>(1);
216  else gtol0 = scale0_*std::min(gnorm,del);
217  while ( gtol > gtol0 ) {
218  gtol = gtol0;
219  sobj.gradient(g,x,gtol); state_->ngrad++;
220  dg.set(g.dual());
221  pgstep(px, step, nobj, x, dg, t0_, gtol0); // change gtol? one or ocScale?
222  gnorm = step.norm() / t0_;
223  gtol0 = scale0_*std::min(gnorm,del);
224  }
225  }
226  else {
227  if (accept) {
228  gtol = std::sqrt(ROL_EPSILON<Real>());
229  sobj.gradient(g,x,gtol); state_->ngrad++;
230  dg.set(g.dual());
231  pgstep(px, step, nobj, x, dg, t0_, gtol);
232  gnorm = step.norm() / t0_;
233  }
234  }
235 }
236 
237 template<typename Real>
239  const Vector<Real> &g,
240  Objective<Real> &sobj,
241  Objective<Real> &nobj,
242  std::ostream &outStream ) {
243  const Real zero(0), one(1);
244  //Real tol0 = std::sqrt(ROL_EPSILON<Real>());
245  Real inTol = static_cast<Real>(0.1)*ROL_OVERFLOW<Real>(), outTol(inTol);
246  Real strial(0), ntrial(0), smodel(0), Ftrial(0), pRed(0), rho(1);
247  // Initialize trust-region data
248  std::vector<std::string> output;
249  Ptr<Vector<Real>> gmod = g.clone();
250  Ptr<Vector<Real>> px = x.clone();
251  Ptr<Vector<Real>> dg = x.clone();
252  // Initialize Algorithm
253  initialize(x,g,inTol,sobj,nobj, *px, *dg, outStream);
254  // Initialize storage vectors
255  Ptr<Vector<Real>> pwa1 = x.clone(), pwa2 = x.clone();
256  Ptr<Vector<Real>> pwa3 = x.clone(), pwa4 = x.clone();
257  Ptr<Vector<Real>> pwa5 = x.clone(), pwa6 = x.clone();
258  Ptr<Vector<Real>> pwa7 = x.clone();
259  Ptr<Vector<Real>> dwa1 = g.clone(), dwa2 = g.clone();
260  // Initialize nonmonotone data
261  Real rhoNM(0), sigmac(0), sigmar(0);
262  Real fr(state_->value), fc(state_->value), fmin(state_->value);
263  TRUtils::ETRFlag TRflagNM;
264  int L(0);
265  // Output
266  if (verbosity_ > 0) writeOutput(outStream,true);
267 
268  while (status_->check(*state_)) {
269  // Build trust-region model
270  model_->setData(sobj,*state_->iterateVec,*state_->gradientVec,gtol_);
271 
272  /**** SOLVE TRUST-REGION SUBPROBLEM ****/
273  //q = state_->svalue + state_->nvalue;//q is no longer used
274  gmod->set(*state_->gradientVec);
275  smodel = state_->svalue;
276  ntrial = state_->nvalue;
277  switch (algSelect_) {
278  case TRUSTREGION_P_SPG:
279  default:
280  // Compute Cauchy point (TRON notation: x = x[1])
281  dcauchy(*state_->stepVec,alpha_, smodel, ntrial,
282  *state_->iterateVec, *dg, state_->searchSize,
283  *model_, nobj, *px, *dwa1, *dwa2, outStream); // Solve 1D optimization problem for alpha
284  x.plus(*state_->stepVec); // Set x = x[0] + alpha*g
285  // Model gradient at s = x[1] - x[0]
286  gmod->plus(*dwa1); // hessVec from Cauchy point computation
287 
288  // Apply SPG starting from the Cauchy point->change input
289  dspg(x,smodel,ntrial,*gmod,*state_->iterateVec,state_->searchSize,
290  *model_,nobj,*pwa1,*pwa2,*pwa3,*pwa4,*pwa5,*pwa6,*pwa7,*dwa1,
291  outStream);
292  pRed = state_->value - (smodel+ntrial);
293  break;
294  case TRUSTREGION_P_SPG2:
295  dspg2(x,smodel, ntrial, pRed, *gmod, *state_->iterateVec,
296  state_->searchSize, *model_, nobj,
297  *pwa1, *pwa2, *px, *dwa1, outStream);
298  break;
299  case TRUSTREGION_P_NCG:
300  dncg(x,smodel,ntrial,*gmod,*state_->iterateVec,state_->searchSize,
301  *model_,nobj,*pwa1,*pwa2,*pwa3,*pwa4,*pwa5,*pwa6,*dwa1,
302  outStream);
303  pRed = state_->value - (smodel+ntrial);
304  break;
305  }
306 
307  // Update storage and compute predicted reduction
308  //pRed = -q; // now updated in dcauchy/dspg
309  state_->stepVec->set(x); state_->stepVec->axpy(-one,*state_->iterateVec);
310  state_->snorm = state_->stepVec->norm();
311 
312  // Compute trial objective value
313  strial = computeValue(inTol,outTol,pRed,state_->svalue,state_->iter,x,*state_->iterateVec,sobj);
314  Ftrial = strial + ntrial;
315 
316  // Compute ratio of actual and predicted reduction
317  TRflag_ = TRUtils::SUCCESS;
318  TRUtils::analyzeRatio<Real>(rho,TRflag_,state_->value,Ftrial,pRed,eps_,outStream,verbosity_>1);
319  if (useNM_) {
320  TRUtils::analyzeRatio<Real>(rhoNM,TRflagNM,fr,Ftrial,pRed+sigmar,eps_,outStream,verbosity_>1);
321  TRflag_ = (rho < rhoNM ? TRflagNM : TRflag_);
322  rho = (rho < rhoNM ? rhoNM : rho );
323  }
324 
325  // Update algorithm state
326  state_->iter++;
327  // Accept/reject step and update trust region radius
328  if ((rho < eta0_ && TRflag_ == TRUtils::SUCCESS) || (TRflag_ >= 2)) { // Step Rejected
329  x.set(*state_->iterateVec);
330  sobj.update(x,UpdateType::Revert,state_->iter);
331  nobj.update(x,UpdateType::Revert,state_->iter);
332  if (interpRad_ && (rho < zero && TRflag_ != TRUtils::TRNAN)) {
333  // Negative reduction, interpolate to find new trust-region radius
334  state_->searchSize = TRUtils::interpolateRadius<Real>(*state_->gradientVec,*state_->stepVec,
335  state_->snorm,pRed,state_->value,Ftrial,state_->searchSize,gamma0_,gamma1_,eta2_,
336  outStream,verbosity_>1);
337  }
338  else { // Shrink trust-region radius
339  state_->searchSize = gamma1_*std::min(state_->snorm,state_->searchSize);
340  }
341  computeGradient(x,*state_->gradientVec,*px,*dg,*pwa1,state_->searchSize,sobj,nobj,false,gtol_,state_->gnorm,outStream);
342  }
343  else if ((rho >= eta0_ && TRflag_ != TRUtils::NPOSPREDNEG)
344  || (TRflag_ == TRUtils::POSPREDNEG)) { // Step Accepted
345  state_->value = Ftrial;
346  state_->svalue = strial;
347  state_->nvalue = ntrial;
348  sobj.update(x,UpdateType::Accept,state_->iter);
349  nobj.update(x,UpdateType::Accept,state_->iter);
350  inTol = outTol;
351  if (useNM_) {
352  sigmac += pRed; sigmar += pRed;
353  if (Ftrial < fmin) { fmin = Ftrial; fc = fmin; sigmac = zero; L = 0; }
354  else {
355  L++;
356  if (Ftrial > fc) { fc = Ftrial; sigmac = zero; }
357  if (L == storageNM_) { fr = fc; sigmar = sigmac; }
358  }
359  }
360  // Increase trust-region radius
361  if (rho >= eta2_) state_->searchSize = std::min(gamma2_*state_->searchSize, delMax_);
362  // Compute gradient at new iterate
363  dwa1->set(*state_->gradientVec);
364  computeGradient(x,*state_->gradientVec,*px,*dg,*pwa1,state_->searchSize,sobj,nobj,true,gtol_,state_->gnorm,outStream);
365  state_->iterateVec->set(x);
366  // Update secant information in trust-region model
367  model_->update(x,*state_->stepVec,*dwa1,*state_->gradientVec,
368  state_->snorm,state_->iter);
369  }
370 
371  // Update Output
372  if (verbosity_ > 0) writeOutput(outStream,writeHeader_);
373  }
374  if (verbosity_ > 0) TypeP::Algorithm<Real>::writeExitStatus(outStream);
375 }
376 
377 template<typename Real>
379  Real &alpha,
380  Real &sval,
381  Real &nval,
382  const Vector<Real> &x,
383  const Vector<Real> &g,
384  const Real del,
386  Objective<Real> &nobj,
387  Vector<Real> &px,
388  Vector<Real> &dwa,
389  Vector<Real> &dwa1,
390  std::ostream &outStream) {
391  const Real half(0.5), sold(sval), nold(nval);
392  Real tol = std::sqrt(ROL_EPSILON<Real>());
393  bool interp = false;
394  Real gs(0), snorm(0), Qk(0), pRed(0);
395  // Compute s = P(x[0] - alpha g[0]) - x[0]
396  pgstep(px, s, nobj, x, g, alpha, tol);
397  snorm = s.norm();
398  if (snorm > del) {
399  interp = true;
400  }
401  else {
402  model.hessVec(dwa,s,x,tol); nhess_++;
403  nobj.update(px, UpdateType::Trial);
404  nval = nobj.value(px, tol); state_->nnval++;
405  gs = s.dot(g);
406  sval = sold + gs + half * s.apply(dwa);
407  pRed = (sold + nold) - (sval + nval);
408  Qk = gs + nval - nold;
409  interp = (pRed < -mu0_*Qk);
410  }
411  // Either increase or decrease alpha to find approximate Cauchy point
412  int cnt = 0;
413  if (interp) {//decrease loop
414  bool search = true;
415  while (search) {
416  alpha *= interpf_;
417  pgstep(px, s, nobj, x, g, alpha, tol);
418  snorm = s.norm();
419  if (snorm <= del) {
420  model.hessVec(dwa,s,x,tol); nhess_++;
421  nobj.update(px, UpdateType::Trial);
422  nval = nobj.value(px, tol); state_->nnval++;
423  gs = s.dot(g);
424  sval = sold + gs + half * s.apply(dwa);
425  pRed = (sold + nold) - (sval + nval);
426  Qk = gs + nval - nold;
427  search = ((pRed < -mu0_*Qk) && (cnt < redlim_)) ;
428  }
429  cnt++;
430  }
431  }
432  else {
433  bool search = true;
434  Real alphas = alpha;
435  Real mvals = pRed;
436  Real svals = sval;
437  dwa1.set(dwa);
438  while (search) {
439  alpha *= extrapf_;
440  pgstep(px, s, nobj, x, g, alpha, tol);
441  snorm = s.norm();
442  if (snorm <= del && cnt < explim_){// && mnew < mold + mu0_*Qk) {
443  model.hessVec(dwa,s,x,tol); nhess_++;
444  nobj.update(px, UpdateType::Trial);
445  nval = nobj.value(px, tol); state_->nnval++;
446  gs = s.dot(g);
447  sval = sold + gs + half * s.apply(dwa);
448  pRed = (sold + nold) - (sval + nval);
449  Qk = gs + nval - nold;
450  if (pRed >= -mu0_*Qk && std::abs(pRed-mvals) > qtol_*std::abs(mvals)) {
451  dwa1.set(dwa);
452  alphas = alpha;
453  mvals = pRed;
454  svals = sval;
455  search = true;
456  }
457  else {
458  dwa.set(dwa1);
459  pRed = mvals;
460  sval = svals;
461  search = false;
462  }
463  }
464  else {
465  search = false;
466  }
467  cnt++;
468  }
469  alpha = alphas;
470  pgstep(px, s, nobj, x, g, alpha, tol);
471  snorm = s.norm();
472  }
473  if (verbosity_ > 1) {
474  outStream << " Cauchy point" << std::endl;
475  outStream << " Step length (alpha): " << alpha << std::endl;
476  outStream << " Step length (alpha*g): " << snorm << std::endl;
477  outStream << " Model decrease (pRed): " << pRed << std::endl;
478  if (!interp)
479  outStream << " Number of extrapolation steps: " << cnt << std::endl;
480  }
481  return snorm;
482 }
483 
484 template<typename Real>
486  Real &sval,
487  Real &nval,
488  Real &pRed,
489  Vector<Real> &gmod,
490  const Vector<Real> &x,
491  Real del,
493  Objective<Real> &nobj,
494  Vector<Real> &pwa,
495  Vector<Real> &pwa1,
496  Vector<Real> &pwa2,
497  Vector<Real> &dwa,
498  std::ostream &outStream) {
499  // Use SPG to approximately solve TR subproblem:
500  // min 1/2 <H(y-x), (y-x)> + <g, (y-x)> subject to y\in C, ||y|| \le del
501  //
502  // Inpute:
503  // y = Primal vector
504  // x = Current iterate
505  // g = Current gradient
506  const Real half(0.5), one(1), safeguard(1e2*ROL_EPSILON<Real>());
507  const Real mprev(sval+nval);
508  Real tol(std::sqrt(ROL_EPSILON<Real>()));
509  Real coeff(1), alpha(1), alphaMax(1), lambda(1), lambdaTmp(1);
510  Real gs(0), ss(0), gnorm(0), s0s0(0), ss0(0), sHs(0), snorm(0), nold(nval);
511  pwa1.zero();
512 
513  // Set y = x
514  y.set(x);
515 
516  // Compute initial step
517  coeff = one / gmod.norm();
518  lambda = std::max(lambdaMin_,std::min(coeff,lambdaMax_));
519  pgstep(pwa2, pwa, nobj, y, gmod.dual(), lambda, tol);
520  gs = gmod.apply(pwa); // gs = <step, model gradient>
521  ss = pwa.dot(pwa); // Norm squared of step
522  snorm = std::sqrt(ss); // norm(step)
523  gnorm = snorm / lambda; // norm(step) / lambda
524 
525  // Compute initial projected gradient norm
526  const Real gtol = std::min(tol1_,tol2_*gnorm);
527 
528  if (verbosity_ > 1)
529  outStream << " Spectral Projected Gradient" << std::endl;
530 
531  SPiter_ = 0;
532  while (SPiter_ < maxit_) {
533  SPiter_++;
534 
535  // Evaluate model Hessian
536  model.hessVec(dwa,pwa,x,tol); nhess_++; // dwa = H step
537  sHs = dwa.apply(pwa); // sHs = <step, H step>
538 
539  // Evaluate nonsmooth term
540  nobj.update(pwa2,UpdateType::Trial);
541  nval = nobj.value(pwa2,tol); state_->nnval++;
542 
543  // Perform line search
544  alphaMax = one;
545  if (snorm >= del-safeguard) { // Trust-region constraint is violated
546  ss0 = pwa1.dot(pwa);
547  alphaMax = std::min(one, (-ss0 + std::sqrt(ss0*ss0 - ss*(s0s0-del*del)))/ss);
548  }
549  alpha = (sHs <= safeguard) ? alphaMax : std::min(alphaMax, -(gs + nval - nold)/sHs);
550 
551  // Update model quantities
552  if (alpha == one) {
553  y.set(pwa2);
554  sval += gs + half * sHs;
555  gmod.plus(dwa);
556  }
557  else {
558  y.axpy(alpha,pwa); // New iterate
559  nobj.update(y,UpdateType::Trial);
560  nval = nobj.value(y, tol); state_->nnval++;
561  sval += alpha * (gs + half * alpha * sHs);
562  gmod.axpy(alpha,dwa);
563  }
564  nold = nval;
565  pRed = mprev - (sval+nval);
566 
567  // Check trust-region constraint violation
568  pwa1.set(y); pwa1.axpy(-one,x);
569  s0s0 = pwa1.dot(pwa1);
570  snorm = std::sqrt(s0s0);
571 
572  if (verbosity_ > 1) {
573  outStream << std::endl;
574  outStream << " Iterate: " << SPiter_ << std::endl;
575  outStream << " Spectral step length (lambda): " << lambda << std::endl;
576  outStream << " Step length (alpha): " << alpha << std::endl;
577  outStream << " Model decrease (pRed): " << pRed << std::endl;
578  outStream << " Optimality criterion: " << gnorm << std::endl;
579  outStream << " Step norm: " << snorm << std::endl;
580  outStream << std::endl;
581  }
582 
583  if (snorm >= del - safeguard) { SPflag_ = 2; break; }
584 
585  // Compute new spectral step
586  lambdaTmp = (sHs <= safeguard) ? one/gmod.norm() : ss/sHs;
587  lambda = std::max(lambdaMin_,std::min(lambdaTmp,lambdaMax_));
588 
589  pgstep(pwa2, pwa, nobj, y, gmod.dual(), alpha, tol); // pass pwa by reference? *pwa?
590  gs = gmod.apply(pwa);
591  ss = pwa.dot(pwa);
592  gnorm = std::sqrt(ss) / lambda;
593 
594  if (gnorm <= gtol) { SPflag_ = 0; break; }
595  }
596  SPflag_ = (SPiter_==maxit_) ? 1 : SPflag_;
597 }
598 
599 template<typename Real>
601  Real &sval,
602  Real &nval,
603  Vector<Real> &gmod,
604  const Vector<Real> &x,
605  Real del,
607  Objective<Real> &nobj,
608  Vector<Real> &ymin,
609  Vector<Real> &pwa,
610  Vector<Real> &pwa1,
611  Vector<Real> &pwa2,
612  Vector<Real> &pwa3,
613  Vector<Real> &pwa4,
614  Vector<Real> &pwa5,
615  Vector<Real> &dwa,
616  std::ostream &outStream) {
617  // Use SPG to approximately solve TR subproblem:
618  // min 1/2 <H(y-x), (y-x)> + <g, (y-x)> + phi(y) subject to ||y|| \le del
619  //
620  // Inpute:
621  // y = Cauchy step
622  // x = Current iterate
623  // g = Current gradient
624  const Real half(0.5), one(1), safeguard(1e2*ROL_EPSILON<Real>());
625  const Real mval(sval+nval);
626  Real tol(std::sqrt(ROL_EPSILON<Real>()));
627  Real mcomp(0), mval_min(0), sval_min(0), nval_min(0);
628  Real alpha(1), coeff(1), lambda(1), lambdaTmp(1);
629  Real snew(sval), nnew(nval), mnew(mval);
630  Real sold(sval), nold(nval), mold(mval);
631  Real sHs(0), ss(0), gs(0), Qk(0), gnorm(0);
632  std::deque<Real> mqueue; mqueue.push_back(mold);
633 
634  if (useNMSP_ && useMin_) {
635  mval_min = mval; sval_min = sval; nval_min = nval; ymin.set(y);
636  }
637 
638  // Compute initial proximal gradient norm
639  pwa1.set(gmod.dual());
640  pwa.set(y); pwa.axpy(-t0_,pwa1);
641  dprox(pwa,x,t0_,del,nobj,pwa2,pwa3,pwa4,pwa5,outStream);
642  pwa.axpy(-one,y);
643  gnorm = pwa.norm() / t0_;
644  const Real gtol = std::min(tol1_,tol2_*gnorm);
645 
646  // Compute initial spectral step size
647  coeff = one / gmod.norm();
648  lambda = std::max(lambdaMin_,std::min(coeff,lambdaMax_));
649 
650  if (verbosity_ > 1)
651  outStream << " Spectral Projected Gradient" << std::endl;
652 
653  SPiter_ = 0;
654  while (SPiter_ < maxit_) {
655  SPiter_++;
656 
657  // Compuate SPG step
658  alpha = one;
659  pwa.set(y); pwa.axpy(-lambda,pwa1); // pwa = y - lambda gmod.dual()
660  dprox(pwa,x,lambda,del,nobj,pwa2,pwa3,pwa4,pwa5,outStream); // pwa = P(y - lambda gmod.dual())
661  pwa2.set(pwa); // pwa2 = P(y - lambda gmod.dual())
662  pwa.axpy(-one,y); // pwa = P(y - lambda gmod.dual()) - y = step
663  ss = pwa.dot(pwa); // Norm squared of step
664 
665  // Evaluate model Hessian
666  model.hessVec(dwa,pwa,x,tol); nhess_++; // dwa = H step
667  nobj.update(pwa2, UpdateType::Trial);
668  nnew = nobj.value(pwa2, tol); state_->nnval++;
669  sHs = dwa.apply(pwa); // sHs = <step, H step>
670  gs = gmod.apply(pwa); // gs = <step, model gradient>
671  snew = half * sHs + gs + sold;
672  mnew = snew + nnew;
673  Qk = gs + nnew - nold;
674 
675  // Perform line search
676  //mcomp = useNMSP_ ? *std::max_element(mqueue.begin(),mqueue.end()) : mold;
677  //while( mnew > mcomp + mu0_*Qk ) {
678  // alpha *= interpf_;
679  // pwa2.set(y); pwa2.axpy(alpha,pwa);
680  // nobj.update(pwa2, UpdateType::Trial);
681  // nnew = nobj.value(pwa2, tol); state_->nnval++;
682  // snew = half * alpha * alpha * sHs + alpha * gs + sold;
683  // mnew = nnew + snew;
684  // Qk = alpha * gs + nnew - nold;
685  //}
686  if (useNMSP_) { // Nonmonotone
687  mcomp = *std::max_element(mqueue.begin(),mqueue.end());
688  while( mnew > mcomp + mu0_*Qk ) {
689  alpha *= interpf_;
690  pwa2.set(y); pwa2.axpy(alpha,pwa);
691  nobj.update(pwa2, UpdateType::Trial);
692  nnew = nobj.value(pwa2, tol); state_->nnval++;
693  snew = half * alpha * alpha * sHs + alpha * gs + sold;
694  mnew = nnew + snew;
695  Qk = alpha * gs + nnew - nold;
696  }
697  }
698  else {
699  alpha = (sHs <= safeguard) ? one : std::min(one,-(gs + nnew - nold)/sHs);
700  }
701 
702  // Update model quantities
703  y.set(pwa2);
704  sold = snew;
705  nold = nnew;
706  mold = mnew;
707  gmod.axpy(alpha,dwa); // Update model gradient
708  nobj.update(y, UpdateType::Accept);
709 
710  // Update nonmonotone line search information
711  if (useNMSP_) {
712  if (static_cast<int>(mqueue.size())==maxSize_) mqueue.pop_front();
713  mqueue.push_back(sval+nval);
714  if (useMin_ && mval <= mval_min) {
715  mval_min = mval; sval_min = sval; nval_min = nval; ymin.set(y);
716  }
717  }
718 
719  // Compute projected gradient norm
720  pwa1.set(gmod.dual());
721  pwa.set(y); pwa.axpy(-t0_,pwa1);
722  dprox(pwa,x,t0_,del,nobj,pwa2,pwa3,pwa4,pwa5,outStream);
723  pwa.axpy(-one,y);
724  gnorm = pwa.norm() / t0_;
725 
726  if (verbosity_ > 1) {
727  outStream << std::endl;
728  outStream << " Iterate: " << SPiter_ << std::endl;
729  outStream << " Spectral step length (lambda): " << lambda << std::endl;
730  outStream << " Step length (alpha): " << alpha << std::endl;
731  outStream << " Model decrease (pRed): " << mval-mold << std::endl;
732  outStream << " Optimality criterion: " << gnorm << std::endl;
733  outStream << std::endl;
734  }
735  if (gnorm < gtol) break;
736 
737  // Compute new spectral step
738  lambdaTmp = (sHs == 0 ? coeff : ss / sHs);
739  lambda = std::max(lambdaMin_, std::min(lambdaTmp, lambdaMax_));
740  }
741  if (useNMSP_ && useMin_) {
742  sval = sval_min; nval = nval_min; y.set(ymin);
743  }
744  else {
745  sval = sold; nval = nold;
746  }
747  SPflag_ = (SPiter_==maxit_) ? 1 : 0;
748 }
749 
750 template<typename Real>
752  const Vector<Real> &x0,
753  Real t,
754  Real del,
755  Objective<Real> &nobj,
756  Vector<Real> &y0,
757  Vector<Real> &y1,
758  Vector<Real> &yc,
759  Vector<Real> &pwa,
760  std::ostream &outStream) const {
761  // Solve ||P(t*x0 + (1-t)*(x-x0))-x0|| = del using Brent's method
762  const Real zero(0), half(0.5), one(1), two(2), three(3);
763  const Real eps(ROL_EPSILON<Real>()), tol0(1e1*eps), fudge(1.0-1e-2*sqrt(eps));
764  Real f0(0), f1(0), fc(0), t0(0), t1(1), tc(0), d1(1), d2(1), tol(1);
765  Real p(0), q(0), r(0), s(0), m(0);
766  int cnt(state_->nprox);
767  nobj.prox(y1, x, t, tol); state_->nprox++;
768  pwa.set(y1); pwa.axpy(-one,x0);
769  f1 = pwa.norm();
770  if (f1 <= del) {
771  x.set(y1);
772  return;
773  }
774  y0.set(x0);
775  tc = t0; fc = f0; yc.set(y0);
776  d1 = t1-t0; d2 = d1;
777  int code = 0;
778  while (true) {
779  if (std::abs(fc-del) < std::abs(f1-del)) {
780  t0 = t1; t1 = tc; tc = t0;
781  f0 = f1; f1 = fc; fc = f0;
782  y0.set(y1); y1.set(yc); yc.set(y0);
783  }
784  tol = two*eps*std::abs(t1) + half*tol0;
785  m = half*(tc - t1);
786  if (std::abs(m) <= tol) { code = 1; break; }
787  if ((f1 >= fudge*del && f1 <= del)) break;
788  if (std::abs(d1) < tol || std::abs(f0-del) <= std::abs(f1-del)) {
789  d1 = m; d2 = d1;
790  }
791  else {
792  s = (f1-del)/(f0-del);
793  if (t0 == tc) {
794  p = two*m*s;
795  q = one-s;
796  }
797  else {
798  q = (f0-del)/(fc-del);
799  r = (f1-del)/(fc-del);
800  p = s*(two*m*q*(q-r)-(t1-t0)*(r-one));
801  q = (q-one)*(r-one)*(s-one);
802  }
803  if (p > zero) q = -q;
804  else p = -p;
805  s = d1;
806  d1 = d2;
807  if (two*p < three*m*q-std::abs(tol*q) && p < std::abs(half*s*q)) {
808  d2 = p/q;
809  }
810  else {
811  d1 = m; d2 = d1;
812  }
813  }
814  t0 = t1; f0 = f1; y0.set(y1);
815  if (std::abs(d2) > tol) t1 += d2;
816  else if (m > zero) t1 += tol;
817  else t1 -= tol;
818  pwa.set(x); pwa.scale(t1); pwa.axpy(one-t1,x0);
819  nobj.prox(y1, pwa, t1*t, tol); state_->nprox++;
820  pwa.set(y1); pwa.axpy(-one,x0);
821  f1 = pwa.norm();
822  if ((f1 > del && fc > del) || (f1 <= del && fc <= del)) {
823  tc = t0; fc = f0; yc.set(y0);
824  d1 = t1-t0; d2 = d1;
825  }
826  }
827  if (code==1 && f1>del) x.set(yc);
828  else x.set(y1);
829  if (verbosity_ > 1) {
830  outStream << std::endl;
831  outStream << " Trust-Region Subproblem Proximity Operator" << std::endl;
832  outStream << " Number of proxes: " << state_->nprox-cnt << std::endl;
833  if (code == 1 && f1 > del) {
834  outStream << " Transformed Multiplier: " << tc << std::endl;
835  outStream << " Dual Residual: " << fc-del << std::endl;
836  }
837  else {
838  outStream << " Transformed Multiplier: " << t1 << std::endl;
839  outStream << " Dual Residual: " << f1-del << std::endl;
840  }
841  outStream << " Exit Code: " << code << std::endl;
842  outStream << std::endl;
843  }
844 }
845 
846 template<typename Real>
847 void TrustRegionAlgorithm<Real>::dbls(Real &alpha, Real &nval, Real &pred,
848  const Vector<Real> &y,
849  const Vector<Real> &s,
850  Real lambda, Real tmax,
851  Real kappa, Real gs,
852  Objective<Real> &nobj,
853  Vector<Real> &pwa) {
854  Real tol(std::sqrt(ROL_EPSILON<Real>()));
855  const Real eps(1e-2*std::sqrt(ROL_EPSILON<Real>())), tol0(1e4*tol);
856  const Real eps0(1e2*ROL_EPSILON<Real>());
857  const unsigned maxit(50);
858  const Real zero(0), half(0.5), one(1), two(2);
859  const Real lam(0.5*(3.0-std::sqrt(5.0)));
860  const Real nold(nval);
861  const Real mu(1e-4);
862 
863  // Evaluate model at initial left end point (t = 0)
864  Real tL(0), pL(0);
865  // Evaluate model at right end point (t = tmax)
866  Real tR = tmax;
867  pwa.set(y); pwa.axpy(tR, s);
868  nobj.update(pwa,UpdateType::Trial);
869  Real nR = nobj.value(pwa,tol); state_->nnval++;
870  Real pR = tR * (half * tR * kappa + gs) + nR - nold;
871 
872  // Compute minimizer of quadratic upper bound
873  Real t0 = tR, n0 = nR;
874  if (tmax > lambda) {
875  t0 = lambda;
876  pwa.set(y); pwa.axpy(t0, s);
877  nobj.update(pwa,UpdateType::Trial);
878  n0 = nobj.value(pwa,tol); state_->nnval++;
879  }
880  Real ts = (kappa > 0) ? std::min(tR,(((nold - n0) / kappa) / t0) - (gs / kappa)) : tR;
881  Real t = std::min(t0,ts);
882  bool useOptT = true;
883  if (t <= tL) {
884  t = (t0 < tR ? t0 : half*tR);
885  useOptT = false;
886  }
887 
888  // Evaluate model at t (t = minimizer of quadratic upper bound)
889  pwa.set(y); pwa.axpy(t, s);
890  nobj.update(pwa,UpdateType::Trial);
891  Real nt = nobj.value(pwa,tol); state_->nnval++;
892  Real Qt = t * gs + nt - nold, Qu = Qt;
893  Real pt = half * t * t * kappa + Qt;
894 
895  // If phi(x) = phi(x+tmax s) = phi(x+ts), then
896  // phi(x+ts) is constant for all t, so the TR
897  // model is quadratic---use the minimizer
898  if (useOptT && nt == nold && nR == nold) {
899  alpha = ts;
900  pred = ts * (half * ts * kappa + gs);
901  nval = nold;
902  return;
903  }
904 
905  // If pt >= max(pL, pR), then the model is concave
906  // and the minimum is obtained at the end points
907  if (pt >= std::max(pL, pR)) {
908  alpha = tR;
909  pred = pR;
910  nval = nR;
911  return;
912  }
913 
914  // Run Brent's method (quadratic interpolation + golden section)
915  // to minimize m_k(x+ts) with respect to t
916  Real w = t, v = t, pw = pt, pv = pt, d(0), pu(0), nu(0);
917  Real u(0), p(0), q(0), r(0), etmp(0), e(0), dL(0), dR(0);
918  Real tm = half * (tL + tR);
919  Real tol1 = tol0 * std::abs(t)+eps;
920  Real tol2 = two * tol1;
921  Real tol3 = eps;
922  for (unsigned it = 0u; it < maxit; ++it) {
923  dL = tL-t; dR = tR-t;
924  if (std::abs(e) > tol1) {
925  r = (t-w)*(pt-pv);
926  q = (t-v)*(pt-pw);
927  p = (t-v)*q-(t-w)*r;
928  q = two*(q-r);
929  if (q > zero) p = -p;
930  q = std::abs(q);
931  etmp = e;
932  e = d;
933  if ( std::abs(p) >= std::abs(half*q*etmp) || p <= q*dL || p >= q*dR ) {
934  e = (t > tm ? dL : dR);
935  d = lam * e;
936  }
937  else {
938  d = p/q;
939  u = t+d;
940  if (u-tL < tol2 || tR-u < tol2) d = (tm >= t ? tol1 : -tol1);
941  }
942  }
943  else {
944  e = (t > tm ? dL : dR);
945  d = lam * e;
946  }
947  u = t + (std::abs(d) >= tol1 ? d : (d >= zero ? tol1 : -tol1));
948  pwa.set(y); pwa.axpy(u, s);
949  nobj.update(pwa,UpdateType::Trial);
950  nu = nobj.value(pwa,tol); state_->nnval++;
951  Qu = u * gs + nu - nold;
952  pu = half * u * u * kappa + Qu;
953  if (pu <= pt) {
954  if (u >= t) tL = t;
955  else tR = t;
956  v = w; w = t; t = u;
957  pv = pw; pw = pt; pt = pu;
958  nt = nu; Qt = Qu;
959  }
960  else {
961  if (u < t) tL = u;
962  else tR = u;
963  if (pu <= pw || w == t) {
964  v = w; w = u;
965  pv = pw; pw = pu;
966  }
967  else if (pu <= pv || v == t || v == w) {
968  v = u; pv = pu;
969  }
970  }
971  tm = half * (tL+tR);
972  tol1 = tol0*std::abs(t)+eps;
973  tol2 = two*tol1;
974  tol3 = eps0 * std::max(std::abs(Qt),one);
975  if (pt <= (mu*std::min(zero,Qt)+tol3) && std::abs(t-tm) <= (tol2-half*(tR-tL))) break;
976  }
977  alpha = t;
978  pred = pt;
979  nval = nt;
980 }
981 
982 // NCG Subsolver
983 template<typename Real>
985  Real &sval,
986  Real &nval,
987  Vector<Real> &gmod,
988  const Vector<Real> &x,
989  Real del,
991  Objective<Real> &nobj,
992  Vector<Real> &s,
993  Vector<Real> &pwa1,
994  Vector<Real> &pwa2,
995  Vector<Real> &pwa3,
996  Vector<Real> &pwa4,
997  Vector<Real> &pwa5,
998  Vector<Real> &dwa,
999  std::ostream &outStream) {
1000  // Use NCG to approximately solve TR subproblem:
1001  // min 1/2 <H(y-x), (y-x)> + <g, (y-x)> + phi(y) subject to ||y-x|| \le del
1002  //
1003  // Inpute:
1004  // y = computed iterate
1005  // sval = smooth model value
1006  // nval = nonsmooth value
1007  // gmod = current gradient
1008  // x = current iterate
1009  // del = trust region radius
1010  // model = trust region model
1011  // nobj = nonsmooth objective function
1012  // s = the current step
1013  // pwa1 = the SPG iterate
1014  // pwa2 = the "negative gradient"
1015  // pwa3 = y - x
1016  // pwa4 = the previous "negative gradient"
1017  // pwa5 = temporary storage
1018  // dwa = the Hessian applied to the step
1019  const Real zero(0), half(0.5), one(1), two(2);
1020  const Real del2(del*del);
1021  Real tol(std::sqrt(ROL_EPSILON<Real>())), safeguard(tol);
1022  Real mold(sval+nval), nold(nval);
1023  Real snorm(0), snorm0(0), gnorm(0), gnorm0(0), gnorm2(0);
1024  Real alpha(1), beta(1), lambdaTmp(1), lambda(1), eta(etaNCG_);
1025  Real alphaMax(1), pred(0), lam1(1);
1026  Real sy(0), gg(0), sHs(0), gs(0), ds(0), ss(0), ss0(0);
1027  bool reset(true);
1028 
1029  // Set y = x
1030  y.set(x);
1031  pwa3.zero(); // Initially y - x = 0
1032 
1033  // Compute initial spectral step length
1034  lambdaTmp = t0_ / gmod.norm();
1035  lambda = std::max(lambdaMin_,std::min(lambdaTmp,lambdaMax_));
1036  lam1 = lambda;
1037 
1038  // Compute Cauchy point via SPG
1039  pgstep(pwa1, pwa2, nobj, y, gmod.dual(), lambda, tol); // pwa1 = prox(x-lambda g), pwa2 = pwa1 - x
1040  pwa2.scale(one/lambda); // pwa2 = (pwa1 - x) / lambda (for smooth: pwa2 = negative gradient)
1041  s.set(pwa2); // s = (pwa1 - x) / lambda
1042  gs = gmod.apply(s); // gs = <g, prox(x-lambda g)-x> / lambda
1043  snorm = s.norm(); // snorm = norm(prox(x-lambda g)-x) / lambda
1044  gnorm = snorm;
1045  const Real gtol = std::min(tol1_,tol2_*gnorm);
1046 
1047  if (verbosity_>1) outStream << " Nonlinear Conjugate Gradient" << std::endl;
1048 
1049  SPiter_ = 0;
1050  SPflag_ = 1;
1051  while (SPiter_ < maxit_) {
1052  SPiter_++;
1053 
1054  // Compute the model curvature
1055  model.hessVec(dwa,s,x,tol); nhess_++; // dwa = H step
1056  sHs = dwa.apply(s); // sHs = <step, H step>
1057 
1058  // Compute alpha as the 1D minimize in the s direction
1059  ss = snorm*snorm;
1060  ds = s.dot(pwa3);
1061  alphaMax = (-ds + std::sqrt(ds*ds + ss*(del2 - snorm0*snorm0)))/ss;
1062  dbls(alpha,nold,pred,y,s,lam1,alphaMax,sHs,gs,nobj,pwa5);
1063 
1064  //if (sHs <= safeguard) alpha = alphaMax;
1065  //else {
1066  // pwa5.set(y); pwa5.axpy(alphaMax, s);
1067  // nobj.update(pwa5,UpdateType::Trial);
1068  // nmax = nobj.value(pwa5,tol); state_->nnval++;
1069  // alpha = alphaMax * std::min(one, -(alphaMax * gs + nmax - nold)/(alphaMax * alphaMax * sHs));
1070  // if (alpha <= safeguard) alpha = std::min(one, -(gs + nval - nold)/sHs);
1071  //}
1072 
1073  // Update quantities to evaluate quadratic model value and gradient
1074  y.axpy(alpha, s);
1075  gmod.axpy(alpha, dwa); // need dual here?
1076  sval += alpha*(gs + half*alpha*sHs);
1077 
1078  // Check step size
1079  pwa3.set(y); pwa3.axpy(-one,x);
1080  ss0 += alpha*(alpha*ss + two*ds);
1081  snorm0 = std::sqrt(ss0); // pwa3.norm();
1082 
1083  if (snorm0 >= (one-safeguard)*del) { SPflag_ = 2; break; }
1084 
1085  // Update spectral step length
1086  lambdaTmp = (sHs <= safeguard) ? t0_/gmod.norm() : ss/sHs;
1087  lambda = std::max(lambdaMin_, std::min(lambdaMax_, lambdaTmp));
1088 
1089  // Compute SPG direction
1090  pwa4.set(pwa2); // store previous "negative gradient"
1091  pgstep(pwa1, pwa2, nobj, y, gmod.dual(), lambda, tol); // pwa1 = prox(x-lambda g), pwa2 = pwa1 - x
1092  pwa2.scale(one/lambda); // pwa2 = (pwa1 - x) / lambda (for smooth: pwa2 = negative gradient)
1093  gnorm0 = gnorm;
1094  gnorm = pwa2.norm();
1095  if (gnorm <= gtol) { SPflag_ = 0; break; }
1096 
1097  gnorm2 = gnorm * gnorm;
1098  switch (ncgType_) {
1099  case 0: // Fletcher-Reeves
1100  beta = gnorm2/(gnorm0 * gnorm0);
1101  break;
1102  default:
1103  case 1: // Polyak-Ribiere+
1104  pwa5.set(pwa4); pwa5.axpy(-one,pwa2);
1105  beta = std::max(zero, -pwa5.dot(pwa2)/(gnorm0*gnorm0));
1106  break;
1107  case 2: // Hager-Zhang
1108  pwa5.set(pwa4); pwa5.axpy(-one,pwa2);
1109  sy = s.dot(pwa5);
1110  gg = pwa2.dot(pwa4);
1111  eta = -one/(s.norm()*std::min(etaNCG_, gnorm0));
1112  beta = std::max(eta, (gnorm2-gg-two*pwa2.dot(s)*(gnorm2-two*gg+(gnorm0*gnorm0))/sy)/sy);
1113  break;
1114  case 3: // Hestenes-Stiefel+
1115  pwa5.set(pwa4); pwa5.axpy(-one,pwa2);
1116  beta = std::max(zero, -pwa2.dot(pwa5)/s.dot(pwa5));
1117  break;
1118  case 4: // Dai-Yuan+
1119  pwa5.set(pwa4); pwa5.axpy(-one,pwa2);
1120  beta = std::max(zero, gnorm2/s.dot(pwa5));
1121  break;
1122  case 5: // Fletcher-Reeves-Polyak-Ribiere
1123  pwa5.set(pwa4); pwa5.axpy(-one,pwa2);
1124  beta = std::max(-gnorm2, std::min(gnorm2, -pwa5.dot(pwa2)))/(gnorm0*gnorm0);
1125  break;
1126  case 6: //Dai-Yuan-Hestenes-Stiefles
1127  pwa5.set(pwa4); pwa5.axpy(-one,pwa2);
1128  beta = std::max(zero, std::min(-pwa2.dot(pwa5), gnorm2)/s.dot(pwa5));
1129  break;
1130  }
1131 
1132  reset = true;
1133  if (beta != zero && beta < ROL_INF<Real>()){
1134  pwa5.set(pwa2); // pwa5 = pwa2 (SPG step)
1135  pwa5.axpy(beta, s); // pwa5 = pwa2 + beta*s
1136  pwa4.set(y); // pwa4 = y
1137  pwa4.plus(pwa5); // pwa4 = y + pwa5
1138  nobj.update(pwa4,UpdateType::Trial);
1139  nval = nobj.value(pwa4,tol); state_->nnval++;
1140  gs = gmod.apply(pwa5);
1141  if (gs + nval - nold <= -(one - desPar_)*gnorm2) {
1142  s.set(pwa5);
1143  lam1 = one;
1144  reset = false;
1145  }
1146  }
1147  if (reset){ // Reset because either beta=0 or step does not produce descent
1148  s.set(pwa2);
1149  gs = gmod.apply(s);
1150  lam1 = lambda;
1151  beta = zero;
1152  }
1153  snorm = s.norm();
1154 
1155  if (verbosity_ > 1) {
1156  outStream << std::endl;
1157  outStream << " Iterate: " << SPiter_ << std::endl;
1158  outStream << " Spectral step length (lambda): " << lambda << std::endl;
1159  outStream << " Step length (alpha): " << alpha << std::endl;
1160  outStream << " NCG parameter (beta): " << beta << std::endl;
1161  outStream << " Model decrease (pRed): " << mold-(sval+nold) << std::endl;
1162  outStream << " Step size: " << snorm0 << std::endl;
1163  outStream << " Optimality criterion: " << gnorm << std::endl;
1164  outStream << " Optimality tolerance: " << gtol << std::endl;
1165  outStream << std::endl;
1166  }
1167  }
1168  nval = nold;
1169 }
1170 
1171 template<typename Real>
1172 void TrustRegionAlgorithm<Real>::writeHeader( std::ostream& os ) const {
1173  std::ios_base::fmtflags osFlags(os.flags());
1174  if (verbosity_ > 1) {
1175  os << std::string(114,'-') << std::endl;
1176  switch (algSelect_) {
1177  default:
1178  case TRUSTREGION_P_SPG: os << " SPG "; break;
1179  case TRUSTREGION_P_SPG2: os << " Simplified SPG "; break;
1180  case TRUSTREGION_P_NCG: os << " NCG "; break;
1181  }
1182  os << "trust-region method status output definitions" << std::endl << std::endl;
1183  os << " iter - Number of iterates (steps taken)" << std::endl;
1184  os << " value - Objective function value" << std::endl;
1185  os << " gnorm - Norm of the gradient" << std::endl;
1186  os << " snorm - Norm of the step (update to optimization vector)" << std::endl;
1187  os << " delta - Trust-Region radius" << std::endl;
1188  os << " #sval - Number of times the smooth objective function was evaluated" << std::endl;
1189  os << " #nval - Number of times the nonsmooth objective function was evaluated" << std::endl;
1190  os << " #grad - Number of times the gradient was computed" << std::endl;
1191  os << " #hess - Number of times the Hessian was applied" << std::endl;
1192  os << " #prox - Number of times the proximal operator was computed" << std::endl;
1193  os << std::endl;
1194  os << " tr_flag - Trust-Region flag" << std::endl;
1195  for( int flag = TRUtils::SUCCESS; flag != TRUtils::UNDEFINED; ++flag ) {
1196  os << " " << NumberToString(flag) << " - "
1197  << TRUtils::ETRFlagToString(static_cast<TRUtils::ETRFlag>(flag)) << std::endl;
1198  }
1199  os << std::endl;
1200  os << " iterSP - Number of Spectral Projected Gradient iterations" << std::endl << std::endl;
1201  os << " flagSP - Trust-Region Spectral Projected Gradient flag" << std::endl;
1202  os << " 0 - Converged" << std::endl;
1203  os << " 1 - Iteration Limit Exceeded" << std::endl;
1204  os << std::string(114,'-') << std::endl;
1205  }
1206  os << " ";
1207  os << std::setw(6) << std::left << "iter";
1208  os << std::setw(15) << std::left << "value";
1209  os << std::setw(15) << std::left << "gnorm";
1210  os << std::setw(15) << std::left << "snorm";
1211  os << std::setw(15) << std::left << "delta";
1212  os << std::setw(10) << std::left << "#sval";
1213  os << std::setw(10) << std::left << "#nval";
1214  os << std::setw(10) << std::left << "#grad";
1215  os << std::setw(10) << std::left << "#hess";
1216  os << std::setw(10) << std::left << "#prox";
1217  os << std::setw(10) << std::left << "tr_flag";
1218  os << std::setw(10) << std::left << "iterSP";
1219  os << std::setw(10) << std::left << "flagSP";
1220  os << std::endl;
1221  os.flags(osFlags);
1222 }
1223 
1224 template<typename Real>
1225 void TrustRegionAlgorithm<Real>::writeName( std::ostream& os ) const {
1226  std::ios_base::fmtflags osFlags(os.flags());
1227  os << std::endl;
1228  switch (algSelect_) {
1229  default:
1230  case TRUSTREGION_P_SPG: os << "SPG "; break;
1231  case TRUSTREGION_P_SPG2: os << "Simplified SPG "; break;
1232  case TRUSTREGION_P_NCG: os << "NCG "; break;
1233  }
1234  os << "Trust-Region Method (Type P)" << std::endl;
1235  os.flags(osFlags);
1236 }
1237 
1238 template<typename Real>
1239 void TrustRegionAlgorithm<Real>::writeOutput( std::ostream& os, bool write_header ) const {
1240  std::ios_base::fmtflags osFlags(os.flags());
1241  os << std::scientific << std::setprecision(6);
1242  if ( state_->iter == 0 ) writeName(os);
1243  if ( write_header ) writeHeader(os);
1244  if ( state_->iter == 0 ) {
1245  os << " ";
1246  os << std::setw(6) << std::left << state_->iter;
1247  os << std::setw(15) << std::left << state_->value;
1248  os << std::setw(15) << std::left << state_->gnorm;
1249  os << std::setw(15) << std::left << "---";
1250  os << std::setw(15) << std::left << state_->searchSize;
1251  os << std::setw(10) << std::left << state_->nsval;
1252  os << std::setw(10) << std::left << state_->nnval;
1253  os << std::setw(10) << std::left << state_->ngrad;
1254  os << std::setw(10) << std::left << nhess_;
1255  os << std::setw(10) << std::left << state_->nprox;
1256  os << std::setw(10) << std::left << "---";
1257  os << std::setw(10) << std::left << "---";
1258  os << std::setw(10) << std::left << "---";
1259  os << std::endl;
1260  }
1261  else {
1262  os << " ";
1263  os << std::setw(6) << std::left << state_->iter;
1264  os << std::setw(15) << std::left << state_->value;
1265  os << std::setw(15) << std::left << state_->gnorm;
1266  os << std::setw(15) << std::left << state_->snorm;
1267  os << std::setw(15) << std::left << state_->searchSize;
1268  os << std::setw(10) << std::left << state_->nsval;
1269  os << std::setw(10) << std::left << state_->nnval;
1270  os << std::setw(10) << std::left << state_->ngrad;
1271  os << std::setw(10) << std::left << nhess_;
1272  os << std::setw(10) << std::left << state_->nprox;
1273  os << std::setw(10) << std::left << TRflag_;
1274  os << std::setw(10) << std::left << SPiter_;
1275  os << std::setw(10) << std::left << SPflag_;
1276  os << std::endl;
1277  }
1278  os.flags(osFlags);
1279 }
1280 
1281 } // namespace TypeP
1282 } // namespace ROL
1283 
1284 #endif
Provides the interface to evaluate objective functions.
void dspg2(Vector< Real > &y, Real &sval, Real &nval, Real &pRed, Vector< Real > &gmod, const Vector< Real > &x, Real del, TrustRegionModel_U< Real > &model, Objective< Real > &nobj, Vector< Real > &pwa, Vector< Real > &pwa1, Vector< Real > &pwa2, Vector< Real > &dwa, std::ostream &outStream=std::cout)
virtual const Vector & dual() const
Return dual representation of , for example, the result of applying a Riesz map, or change of basis...
Definition: ROL_Vector.hpp:226
Real computeValue(Real inTol, Real &outTol, Real pRed, Real &fold, int iter, const Vector< Real > &x, const Vector< Real > &xold, Objective< Real > &obj)
virtual void scale(const Real alpha)=0
Compute where .
void dspg(Vector< Real > &y, Real &sval, Real &nval, Vector< Real > &gmod, const Vector< Real > &x, Real del, TrustRegionModel_U< Real > &model, Objective< Real > &nobj, Vector< Real > &ymin, Vector< Real > &pwa, Vector< Real > &pwa1, Vector< Real > &pwa2, Vector< Real > &pwa3, Vector< Real > &pwa4, Vector< Real > &pwa5, Vector< Real > &dwa, std::ostream &outStream=std::cout)
virtual ROL::Ptr< Vector > clone() const =0
Clone to make a new (uninitialized) vector.
virtual Real apply(const Vector< Real > &x) const
Apply to a dual vector. This is equivalent to the call .
Definition: ROL_Vector.hpp:238
TrustRegionAlgorithm(ParameterList &list, const Ptr< Secant< Real >> &secant=nullPtr)
virtual void plus(const Vector &x)=0
Compute , where .
virtual void axpy(const Real alpha, const Vector &x)
Compute where .
Definition: ROL_Vector.hpp:153
void initialize(Vector< Real > &x, const Vector< Real > &g, Real ftol, Objective< Real > &sobj, Objective< Real > &nobj, Vector< Real > &px, Vector< Real > &dg, std::ostream &outStream=std::cout)
virtual Real value(const Vector< Real > &x, Real &tol)=0
Compute value.
virtual void prox(Vector< Real > &Pv, const Vector< Real > &v, Real t, Real &tol)
ESecant StringToESecant(std::string s)
Definition: ROL_Types.hpp:543
virtual void zero()
Set to zero vector.
Definition: ROL_Vector.hpp:167
Defines the linear algebra or vector space interface.
Definition: ROL_Vector.hpp:80
Real dcauchy(Vector< Real > &s, Real &alpha, Real &sval, Real &nval, const Vector< Real > &x, const Vector< Real > &g, Real del, TrustRegionModel_U< Real > &model, Objective< Real > &nobj, Vector< Real > &px, Vector< Real > &dwa, Vector< Real > &dwa1, std::ostream &outStream=std::cout)
virtual void update(const Vector< Real > &x, UpdateType type, int iter=-1)
Update objective function.
virtual Real dot(const Vector &x) const =0
Compute where .
Objective_SerialSimOpt(const Ptr< Obj > &obj, const V &ui) z0_ zero()
void dbls(Real &alpha, Real &nval, Real &pred, const Vector< Real > &y, const Vector< Real > &s, Real lambda, Real tmax, Real kappa, Real gs, Objective< Real > &nobj, Vector< Real > &pwa)
virtual void hessVec(Vector< Real > &hv, const Vector< Real > &v, const Vector< Real > &s, Real &tol) override
Apply Hessian approximation to vector.
ETRFlag
Enumation of flags used by trust-region solvers.
ETrustRegionP StringToETrustRegionP(std::string s)
virtual void gradient(Vector< Real > &g, const Vector< Real > &x, Real &tol)
Compute gradient.
std::string NumberToString(T Number)
Definition: ROL_Types.hpp:81
Provides the interface to evaluate trust-region model functions.
ESecantMode
Definition: ROL_Secant.hpp:57
void run(Vector< Real > &x, const Vector< Real > &g, Objective< Real > &sobj, Objective< Real > &nobj, std::ostream &outStream=std::cout) override
Run algorithm on unconstrained problems (Type-U). This general interface supports the use of dual opt...
Provides interface for and implements limited-memory secant operators.
Definition: ROL_Secant.hpp:79
Provides an interface to check status of optimization algorithms.
std::string ETRFlagToString(ETRFlag trf)
void writeOutput(std::ostream &os, bool write_header=false) const override
Print iterate status.
void dprox(Vector< Real > &x, const Vector< Real > &x0, Real t, Real del, Objective< Real > &nobj, Vector< Real > &y0, Vector< Real > &y1, Vector< Real > &yc, Vector< Real > &pwa, std::ostream &outStream=std::cout) const
void dncg(Vector< Real > &y, Real &sval, Real &nval, Vector< Real > &gmod, const Vector< Real > &x, Real del, TrustRegionModel_U< Real > &model, Objective< Real > &nobj, Vector< Real > &s, Vector< Real > &pwa1, Vector< Real > &pwa2, Vector< Real > &pwa3, Vector< Real > &pwa4, Vector< Real > &pwa5, Vector< Real > &dwa, std::ostream &outStream=std::cout)
virtual void set(const Vector &x)
Set where .
Definition: ROL_Vector.hpp:209
virtual Real norm() const =0
Returns where .
virtual void writeExitStatus(std::ostream &os) const
void initialize(const Vector< Real > &x, const Vector< Real > &g)
void writeName(std::ostream &os) const override
Print step name.
void computeGradient(const Vector< Real > &x, Vector< Real > &g, Vector< Real > &px, Vector< Real > &dg, Vector< Real > &pwa, Real del, Objective< Real > &sobj, Objective< Real > &nobj, bool accept, Real &gtol, Real &gnorm, std::ostream &outStream=std::cout) const
void writeHeader(std::ostream &os) const override
Print iterate header.