MoochoPack : Framework for Large-Scale Optimization Algorithms  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
MoochoPack_BFGSUpdate_Strategy.cpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Moocho: Multi-functional Object-Oriented arCHitecture for Optimization
5 // Copyright (2003) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Roscoe A. Bartlett (rabartl@sandia.gov)
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #include <limits>
43 
44 #include "MoochoPack_BFGSUpdate_Strategy.hpp"
45 #include "MoochoPack_Exceptions.hpp"
46 #include "AbstractLinAlgPack_TestMatrixSymSecant.hpp"
47 #include "AbstractLinAlgPack_MatrixSymSecant.hpp"
48 #include "AbstractLinAlgPack_MatrixSymOp.hpp"
49 #include "AbstractLinAlgPack_MatrixOpOut.hpp"
50 #include "AbstractLinAlgPack_VectorSpace.hpp"
51 #include "AbstractLinAlgPack_VectorMutable.hpp"
52 #include "AbstractLinAlgPack_VectorOut.hpp"
53 #include "AbstractLinAlgPack_VectorStdOps.hpp"
54 #include "AbstractLinAlgPack_LinAlgOpPack.hpp"
55 #include "Teuchos_dyn_cast.hpp"
56 #include "Teuchos_Assert.hpp"
57 
58 namespace MoochoPack {
59 
61  bool rescale_init_identity
62  ,bool use_dampening
63  ,ESecantTesting secant_testing
64  ,value_type secant_warning_tol
65  ,value_type secant_error_tol
66  )
67  :rescale_init_identity_(rescale_init_identity)
68  ,use_dampening_(use_dampening)
69  ,secant_testing_(secant_testing)
70  ,secant_warning_tol_(secant_warning_tol)
71  ,secant_error_tol_(secant_error_tol)
72 {}
73 
75  VectorMutable *s_bfgs
76  ,VectorMutable *y_bfgs
77  ,bool first_update
78  ,std::ostream &out
79  ,EJournalOutputLevel olevel
80  ,bool check_results
81  ,MatrixSymOp *B
82  ,QuasiNewtonStats *quasi_newton_stats
83  )
84 {
85  namespace rcp = MemMngPack;
86  using Teuchos::dyn_cast;
90  using LinAlgOpPack::Vp_V;
91  using LinAlgOpPack::V_StV;
92  using LinAlgOpPack::V_VpV;
93  using LinAlgOpPack::V_VmV;
94  using LinAlgOpPack::V_MtV;
95 
96  const value_type
97  NOT_CALCULATED = std::numeric_limits<value_type>::max();
98  value_type
99  sTy = NOT_CALCULATED,
100  yTy = NOT_CALCULATED;
101  bool used_dampening = false;
102 
103  // Make sure the update is defined!
104  if( sTy == NOT_CALCULATED )
105  sTy = dot( *s_bfgs, *y_bfgs );
106  if( sTy == 0 ) {
107  if( (int)olevel >= (int)PRINT_ALGORITHM_STEPS ) {
108  out << "\n(y'*s) == 0, skipping the update ...\n";
109  }
110  quasi_newton_stats->set_updated_stats( QuasiNewtonStats::INDEF_SKIPED );
111  return;
112  }
113 
114  MatrixSymSecant
115  &B_updatable = dyn_cast<MatrixSymSecant>(*B);
116 
117  // /////////////////////////////////////////////////////////////
118  // Consider rescaling the initial identity hessian before
119  // the update.
120  //
121  // This was taken from Nocedal & Wright, 1999, p. 200
122  //
123  // Bo = (y'*y)/(y'*s) * I
124  //
125  if( first_update && rescale_init_identity() ) {
126  if( sTy == NOT_CALCULATED )
127  sTy = dot( *s_bfgs, *y_bfgs );
128  if( yTy == NOT_CALCULATED )
129  yTy = dot( *y_bfgs, *y_bfgs );
130  const value_type
131  Iscale = yTy/sTy;
132  const value_type
133  Iscale_too_small = 1e-5; // ToDo: Make this adjustable
134  if( Iscale >= Iscale_too_small ) {
135  if( (int)olevel >= (int)PRINT_ALGORITHM_STEPS ) {
136  out << "\nRescaling the initial identity matrix before the update as:\n"
137  << "Iscale = (y'*y)/(y'*s) = ("<<yTy<<")/("<<sTy<<") = "<<Iscale<<"\n"
138  << "B = Iscale * eye(n-r) ...\n";
139  }
140  B_updatable.init_identity( y_bfgs->space(), Iscale );
141  if( (int)olevel >= (int)PRINT_ITERATION_QUANTITIES ) {
142  out << "\nB after rescaling = \n" << *B;
143  }
144  }
145  else {
146  if( (int)olevel >= (int)PRINT_ALGORITHM_STEPS ) {
147  out << "\nSkipping rescaling of the initial identity matrix since:\n"
148  << "Iscale = (y'*y)/(y'*s) = ("<<yTy<<")/("<<sTy<<") = "<<Iscale
149  << " < Iscale_too_small = " << Iscale_too_small << std::endl;
150  }
151  }
152  }
153 
154  // ////////////////////////////////////////////////////
155  // Modify the s_bfgs and y_bfgs vectors for dampening?
156  VectorSpace::vec_mut_ptr_t
157  Bs = Teuchos::null;
158  if( use_dampening() ) {
159  if( (int)olevel >= (int)PRINT_ALGORITHM_STEPS )
160  {
161  out
162  << "\nConsidering the dampened update ...\n";
163  }
164  // Bs = Bm1 * s_bfgs
165  Bs = y_bfgs->space().create_member();
166  V_MtV( Bs.get(), *B, BLAS_Cpp::no_trans, *s_bfgs );
167  // sTy = s_bfgs' * y_bfgs
168  if( sTy == NOT_CALCULATED )
169  sTy = dot( *s_bfgs, *y_bfgs );
170  // sTBs = s_bfgs' * Bs
171  const value_type sTBs = dot( *s_bfgs, *Bs );
172  // Select dampening parameter theta
173  const value_type
174  theta = ( sTy >= 0.2 * sTBs )
175  ? 1.0
176  : (0.8 * sTBs ) / ( sTBs - sTy );
177  if( (int)olevel >= (int)PRINT_ALGORITHM_STEPS )
178  {
179  out
180  << "\ns_bfgs'*y_bfgs = " << sTy
181  << ( theta == 1.0 ? " >= " : " < " )
182  << " s_bfgs' * B * s_bfgs = " << sTBs << std::endl;
183  }
184  if( theta == 1.0 ) {
185  if( (int)olevel >= (int)PRINT_ALGORITHM_STEPS )
186  {
187  out
188  << "Perform the undamped update ...\n";
189  }
190  }
191  else {
192  // y_bfgs = theta*y_bfgs + (1-theta)*B*s_bfgs
193  Vt_S( y_bfgs, theta );
194  Vp_StV( y_bfgs, (1.0-theta), *Bs );
195 
196  if( (int)olevel >= (int)PRINT_ALGORITHM_STEPS )
197  {
198  out
199  << "Dampen the update ...\n"
200  << "theta = " << theta << std::endl
201  << "y_bfgs = theta*y_bfgs + (1-theta)*B*s_bfgs ...\n"
202  << "||y_bfgs||inf = " << y_bfgs->norm_inf() << std::endl;
203  }
204 
205  if( (int)olevel >= (int)PRINT_VECTORS )
206  {
207  out
208  << "y_bfgs =\n" << *y_bfgs;
209  }
210 
211  used_dampening = true;
212  }
213  }
214 
215  // Perform the update if it is defined (s_bfgs' * y_bfgs > 0.0)
216 
217  VectorSpace::vec_mut_ptr_t
218  s_bfgs_save = Teuchos::null,
219  y_bfgs_save = Teuchos::null;
220  if( check_results ) {
221  // Save s and y since they may be overwritten in the update.
222  s_bfgs_save = s_bfgs->clone();
223  y_bfgs_save = y_bfgs->clone();
224  }
225  try {
226  B_updatable.secant_update(
227  s_bfgs
228  ,y_bfgs
229  ,Bs.get()
230  );
231  }
232  catch( const MatrixSymSecant::UpdateFailedException& excpt ) {
233  if( (int)olevel >= (int)PRINT_BASIC_ALGORITHM_INFO ) {
234  out << "\nThe factorization of B failed in a major way! Rethrow!\n";
235  }
236  throw;
237  }
238  catch( const MatrixSymSecant::UpdateSkippedException& excpt ) {
239  if( (int)olevel >= (int)PRINT_BASIC_ALGORITHM_INFO ) {
240  out << excpt.what() << std::endl
241  << "\nSkipping BFGS update. B = B ...\n";
242  }
243  quasi_newton_stats->set_updated_stats(
244  QuasiNewtonStats::INDEF_SKIPED );
245  return;
246  }
247 
248  if( (int)olevel >= (int)PRINT_ITERATION_QUANTITIES ) {
249  out << "\nB after the BFGS update = \n" << *B;
250  }
251 
252  if( ( check_results && secant_testing() == SECANT_TEST_DEFAULT )
253  || secant_testing() == SECANT_TEST_ALWAYS )
254  {
255  const bool result =
256  AbstractLinAlgPack::TestMatrixSymSecant(
257  *B, *s_bfgs_save, *y_bfgs_save
258  , secant_warning_tol(), secant_error_tol()
259  , (int)olevel >= (int)PRINT_VECTORS
260  , (int)olevel > (int)PRINT_NOTHING ? &out : NULL
261  , (int)olevel >= (int)PRINT_ALGORITHM_STEPS
262  );
263  if( !result ) {
264  const char
265  msg[] = "Error, the secant property for the BFGS update failed\n"
266  "Stopping the algorithm ...\n";
267  out << msg;
269  true, TestFailed
270  ," BFGSUpdate_Strategy::perform_update(...) : " << msg );
271  }
272  }
273 
274  quasi_newton_stats->set_updated_stats(
275  used_dampening
276  ? QuasiNewtonStats::DAMPENED_UPDATED
277  : QuasiNewtonStats::UPDATED );
278 }
279 
280 void BFGSUpdate_Strategy::print_step( std::ostream& out, const std::string& L ) const
281 {
282  out
283  << L << "if use_dampening == true then\n"
284  << L << " if s'*y >= 0.2 * s'*B*s then\n"
285  << L << " theta = 1.0\n"
286  << L << " else\n"
287  << L << " theta = 0.8*s'*B*s / (s'*B*s - s'*y)\n"
288  << L << " end\n"
289  << L << " y = theta*y + (1-theta)*B*s\n"
290  << L << "end\n"
291  << L << "if first_update && rescale_init_identity and y'*s is sufficently positive then\n"
292  << L << " B = |(y'*y)/(y'*s)| * eye(size(s))\n"
293  << L << "end\n"
294  << L << "if s'*y is sufficently positive then\n"
295  << L << " *** Peform BFGS update\n"
296  << L << " (B, s, y ) -> B (through MatrixSymSecantUpdate interface)\n"
297  << L << " if ( check_results && secant_testing == SECANT_TEST_DEFAULT )\n"
298  << L << " or ( secant_testing == SECANT_TEST_ALWAYS ) then\n"
299  << L << " if B*s != y then\n"
300  << L << " *** The secant condition does not check out\n"
301  << L << " Throw TestFailed!\n"
302  << L << " end\n"
303  << L << " end\n"
304  << L << "end\n"
305  ;
306 }
307 
308 } // end namespace MoochoPack
BFGSUpdate_Strategy(bool rescale_init_identity=true, bool use_dampening=true, ESecantTesting secant_testing=SECANT_TEST_DEFAULT, value_type secant_warning_tol=1e-6, value_type secant_error_tol=1e-1)
void print_step(std::ostream &out, const std::string &leading_str) const
void Vt_S(VectorMutable *v_lhs, const value_type &alpha)
Thrown if a runtime test failed.
Class for storing statistics about the Quasi-Newton updating.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
void Vp_StV(VectorMutable *v_lhs, const value_type &alpha, const Vector &v_rhs)
T_To & dyn_cast(T_From &from)
void V_StV(VectorMutable *v_lhs, value_type alpha, const V &V_rhs)
void perform_update(VectorMutable *s_bfgs, VectorMutable *y_bfgs, bool first_update, std::ostream &out, EJournalOutputLevel olevel, bool check_results, MatrixSymOp *B, QuasiNewtonStats *quasi_newton_stats)
Perform the BFGS update.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
void V_VpV(VectorMutable *v_lhs, const V1 &V1_rhs1, const V2 &V2_rhs2)
value_type dot(const Vector &v_rhs1, const Vector &v_rhs2)
void set_updated_stats(EUpdate update)
Initialize the statistics.
void V_VmV(VectorMutable *v_lhs, const V1 &V1_rhs1, const V2 &V2_rhs2)
void Vp_V(VectorMutable *v_lhs, const V &V_rhs)