Teuchos - Trilinos Tools Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Teuchos_TimeMonitor.cpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Teuchos: Common Tools Package
5 // Copyright (2004) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #include "Teuchos_TimeMonitor.hpp"
43 #include "Teuchos_CommHelpers.hpp"
44 #include "Teuchos_DefaultComm.hpp"
45 #include "Teuchos_TableColumn.hpp"
46 #include "Teuchos_TableFormat.hpp"
47 #include "Teuchos_StandardParameterEntryValidators.hpp"
48 #include "Teuchos_ScalarTraits.hpp"
49 #include "Teuchos_StackedTimer.hpp"
50 
51 #include <functional>
52 #include <iomanip>
53 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
54 #include <sstream>
55 #endif
56 
57 namespace Teuchos {
110  template<class Ordinal, class ScalarType, class IndexType>
111  class MaxLoc :
112  public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
113  public:
114  void
115  reduce (const Ordinal count,
116  const std::pair<ScalarType, IndexType> inBuffer[],
117  std::pair<ScalarType, IndexType> inoutBuffer[]) const;
118  };
119 
120  template<class Ordinal>
121  class MaxLoc<Ordinal, double, int> :
122  public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
123  public:
124  void
125  reduce (const Ordinal count,
126  const std::pair<double, int> inBuffer[],
127  std::pair<double, int> inoutBuffer[]) const
128  {
129  for (Ordinal ind = 0; ind < count; ++ind) {
130  const std::pair<double, int>& in = inBuffer[ind];
131  std::pair<double, int>& inout = inoutBuffer[ind];
132 
133  if (in.first > inout.first) {
134  inout.first = in.first;
135  inout.second = in.second;
136  } else if (in.first < inout.first) {
137  // Don't need to do anything; inout has the values.
138  } else { // equal, or at least one is NaN.
139  inout.first = in.first;
140  inout.second = std::min (in.second, inout.second);
141  }
142  }
143  }
144  };
145 
172  template<class Ordinal, class ScalarType, class IndexType>
173  class MinLoc :
174  public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
175  public:
176  void
177  reduce (const Ordinal count,
178  const std::pair<ScalarType, IndexType> inBuffer[],
179  std::pair<ScalarType, IndexType> inoutBuffer[]) const;
180  };
181 
182  template<class Ordinal>
183  class MinLoc<Ordinal, double, int> :
184  public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
185  public:
186  void
187  reduce (const Ordinal count,
188  const std::pair<double, int> inBuffer[],
189  std::pair<double, int> inoutBuffer[]) const
190  {
191  for (Ordinal ind = 0; ind < count; ++ind) {
192  const std::pair<double, int>& in = inBuffer[ind];
193  std::pair<double, int>& inout = inoutBuffer[ind];
194 
195  if (in.first < inout.first) {
196  inout.first = in.first;
197  inout.second = in.second;
198  } else if (in.first > inout.first) {
199  // Don't need to do anything; inout has the values.
200  } else { // equal, or at least one is NaN.
201  inout.first = in.first;
202  inout.second = std::min (in.second, inout.second);
203  }
204  }
205  }
206  };
207 
211  template<class Ordinal, class ScalarType, class IndexType>
213  public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
214  public:
215  void
216  reduce (const Ordinal count,
217  const std::pair<ScalarType, IndexType> inBuffer[],
218  std::pair<ScalarType, IndexType> inoutBuffer[]) const;
219  };
220 
221  template<class Ordinal>
222  class MinLocNonzero<Ordinal, double, int> :
223  public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
224  public:
225  void
226  reduce (const Ordinal count,
227  const std::pair<double, int> inBuffer[],
228  std::pair<double, int> inoutBuffer[]) const
229  {
230  for (Ordinal ind = 0; ind < count; ++ind) {
231  const std::pair<double, int>& in = inBuffer[ind];
232  std::pair<double, int>& inout = inoutBuffer[ind];
233 
234  if ( (in.first < inout.first && in.first != 0) || (inout.first == 0 && in.first != 0) ) {
235  inout.first = in.first;
236  inout.second = in.second;
237  } else if (in.first > inout.first) {
238  // Don't need to do anything; inout has the values.
239  } else { // equal, or at least one is NaN.
240  inout.first = in.first;
241  inout.second = std::min (in.second, inout.second);
242  }
243  }
244  }
245  };
246 
247  // Typedef used internally by TimeMonitor::summarize() and its
248  // helper functions. The map is keyed on timer label (a string).
249  // Each value is a pair: (total number of seconds over all calls to
250  // that timer, total number of calls to that timer).
251  typedef std::map<std::string, std::pair<double, int> > timer_map_t;
252 
253  // static initialization
255 
256  TimeMonitor::TimeMonitor (Time& timer, bool reset)
257  : PerformanceMonitorBase<Time>(timer, reset)
258  {
259  if (!isRecursiveCall()) {
260  counter().start(reset);
261 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
262  if (nonnull(stackedTimer_))
263  stackedTimer_->start(counter().name(),false);
264 #endif
265  }
266  }
267 
269  if (!isRecursiveCall()) {
270  counter().stop();
271 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
272  try {
273  if (nonnull(stackedTimer_))
274  stackedTimer_->stop(counter().name(),false);
275  }
276  catch (std::runtime_error&) {
277  std::ostringstream warning;
278  warning <<
279  "\n*********************************************************************\n"
280  "WARNING: Overlapping timers detected! Near: " <<counter().name()<<"\n"
281  "A TimeMonitor timer was stopped before a nested subtimer was\n"
282  "stopped. This is not allowed by the StackedTimer. This corner case\n"
283  "typically occurs if the TimeMonitor is stored in an RCP and the RCP is\n"
284  "assigned to a new timer. To disable this warning, either fix the\n"
285  "ordering of timer creation and destuction or disable the StackedTimer\n"
286  "support in the TimeMonitor by setting the StackedTimer to null\n"
287  "Example:\n"
288  " RCP<TimeMonitor> MM = rcp(new TimeMonitor(*(TimeMonitor::getNewTimer(\"Junk\"))));\n"
289  "///code to time \n"
290  "MM = Teuchos::null;\n"
291  "MM = rcp(new TimeMonitor(*(TimeMonitor::getNewTimer(\"SecondJunk\"))));\n"
292  "*********************************************************************\n";
293  std::cout << warning.str() << std::endl;
295  }
296 #endif
297  }
298  }
299 
300  void
301  TimeMonitor::disableTimer (const std::string& name)
302  {
303  RCP<Time> timer = lookupCounter (name);
305  timer == null, std::invalid_argument,
306  "TimeMonitor::disableTimer: Invalid timer \"" << name << "\"");
307  timer->disable ();
308  }
309 
310  void
311  TimeMonitor::enableTimer (const std::string& name)
312  {
313  RCP<Time> timer = lookupCounter (name);
315  timer == null, std::invalid_argument,
316  "TimeMonitor::enableTimer: Invalid timer \"" << name << "\"");
317  timer->enable ();
318  }
319 
320  void
322  {
323  typedef std::map<std::string, RCP<Time> > map_type;
324  typedef map_type::iterator iter_type;
325  map_type& ctrs = counters ();
326 
327  // In debug mode, loop first to check whether any of the timers
328  // are running, before resetting them. This ensures that this
329  // method satisfies the strong exception guarantee (either it
330  // completes normally, or there are no side effects).
331 #ifdef TEUCHOS_DEBUG
332  for (iter_type it = ctrs.begin(); it != ctrs.end(); ++it) {
333  // We throw a runtime_error rather than a logic_error, because
334  // logic_error suggests a bug in the implementation of
335  // TimeMonitor. Calling zeroOutTimers() when a timer is running
336  // is not TimeMonitor's fault.
338  it->second->isRunning (), std::runtime_error,
339  "Timer \"" << it->second->name () << "\" is currently running. "
340  "You are not allowed to reset running timers.");
341  }
342 #endif // TEUCHOS_DEBUG
343 
344  for (iter_type it = ctrs.begin(); it != ctrs.end(); ++it) {
345  it->second->reset ();
346  }
347  }
348 
349  // An anonymous namespace is the standard way of limiting linkage of
350  // its contained routines to file scope.
351  namespace {
352  // \brief Return an "empty" local timer datum.
353  //
354  // "Empty" means the datum has zero elapsed time and zero call
355  // count. This function does not actually create a timer.
356  //
357  // \param name The timer's name.
358  std::pair<std::string, std::pair<double, int> >
359  makeEmptyTimerDatum (const std::string& name)
360  {
361  return std::make_pair (name, std::make_pair (double(0), int(0)));
362  }
363 
364  // \fn collectLocalTimerData
365  // \brief Collect and sort local timer data by timer names.
366  //
367  // \param localData [out] Map whose keys are the timer names, and
368  // whose value for each key is the total elapsed time (in
369  // seconds) and the call count for the timer with that name.
370  //
371  // \param localCounters [in] Timers from which to extract data.
372  //
373  // \param filter [in] Filter for timer labels. If filter is not
374  // empty, this method will only collect data for local timers
375  // whose labels begin with this string.
376  //
377  // Extract the total elapsed time and call count from each timer
378  // in the given array. Merge results for timers with duplicate
379  // labels, by summing their total elapsed times and call counts
380  // pairwise.
381  void
382  collectLocalTimerData (timer_map_t& localData,
383  const std::map<std::string, RCP<Time> >& localCounters,
384  const std::string& filter="")
385  {
386  using std::make_pair;
387  typedef timer_map_t::iterator iter_t;
388 
389  timer_map_t theLocalData;
390  for (std::map<std::string, RCP<Time> >::const_iterator it = localCounters.begin();
391  it != localCounters.end(); ++it) {
392  const std::string& name = it->second->name ();
393 
394  // Filter current timer name, if provided filter is nonempty.
395  // Filter string must _start_ the timer label, not just be in it.
396  const bool skipThisOne = (filter != "" && name.find (filter) != 0);
397  if (! skipThisOne) {
398  const double timing = it->second->totalElapsedTime ();
399  const int numCalls = it->second->numCalls ();
400 
401  // Merge timers with duplicate labels, by summing their
402  // total elapsed times and call counts.
403  iter_t loc = theLocalData.find (name);
404  if (loc == theLocalData.end()) {
405  // Use loc as an insertion location hint.
406  theLocalData.insert (loc, make_pair (name, make_pair (timing, numCalls)));
407  }
408  else {
409  loc->second.first += timing;
410  loc->second.second += numCalls;
411  }
412  }
413  }
414  // This avoids copying the map, and also makes this method
415  // satisfy the strong exception guarantee.
416  localData.swap (theLocalData);
417  }
418 
419  // \brief Locally filter out timer data with zero call counts.
420  //
421  // \param timerData [in/out]
422  void
423  filterZeroData (timer_map_t& timerData)
424  {
425  // FIXME (mfh 15 Mar 2013) Should use std::map::erase with
426  // iterator hint, instead of rebuilding the map completely.
427  timer_map_t newTimerData;
428  for (timer_map_t::const_iterator it = timerData.begin();
429  it != timerData.end(); ++it) {
430  if (it->second.second > 0) {
431  newTimerData[it->first] = it->second;
432  }
433  }
434  timerData.swap (newTimerData);
435  }
436 
458  void
459  collectLocalTimerDataAndNames (timer_map_t& localTimerData,
460  Array<std::string>& localTimerNames,
461  const std::map<std::string, RCP<Time> >& localTimers,
462  const bool writeZeroTimers,
463  const std::string& filter="")
464  {
465  // Collect and sort local timer data by timer names.
466  collectLocalTimerData (localTimerData, localTimers, filter);
467 
468  // Filter out zero data locally first. This ensures that if we
469  // are writing global stats, and if a timer name exists in the
470  // set of global names, then that timer has a nonzero call count
471  // on at least one MPI process.
472  if (! writeZeroTimers) {
473  filterZeroData (localTimerData);
474  }
475 
476  // Extract the set of local timer names. The std::map keeps
477  // them sorted alphabetically.
478  localTimerNames.reserve (localTimerData.size());
479  for (timer_map_t::const_iterator it = localTimerData.begin();
480  it != localTimerData.end(); ++it) {
481  localTimerNames.push_back (it->first);
482  }
483  }
484 
519  void
520  collectGlobalTimerData (timer_map_t& globalTimerData,
521  Array<std::string>& globalTimerNames,
522  timer_map_t& localTimerData,
523  Array<std::string>& localTimerNames,
524  Ptr<const Comm<int> > comm,
525  const bool alwaysWriteLocal,
526  const ECounterSetOp setOp)
527  {
528  // There may be some global timers that are not local timers on
529  // the calling MPI process(es). In that case, if
530  // alwaysWriteLocal is true, then we need to fill in the
531  // "missing" local timers. That will ensure that both global
532  // and local timer columns in the output table have the same
533  // number of rows. The collectLocalTimerDataAndNames() method
534  // may have already filtered out local timers with zero call
535  // counts (if its writeZeroTimers argument was false), but we
536  // won't be filtering again. Thus, any local timer data we
537  // insert here won't get filtered out.
538  //
539  // Note that calling summarize() with writeZeroTimers == false
540  // will still do what it says, even if we insert local timers
541  // with zero call counts here.
542 
543  // This does the correct and inexpensive thing (just copies the
544  // timer data) if numProcs == 1. Otherwise, it initiates a
545  // communication with \f$O(\log P)\f$ messages along the
546  // critical path, where \f$P\f$ is the number of participating
547  // processes.
548  mergeCounterNames (*comm, localTimerNames, globalTimerNames, setOp);
549 
550 #ifdef TEUCHOS_DEBUG
551  {
552  // Sanity check that all processes have the name number of
553  // global timer names.
554  const timer_map_t::size_type myNumGlobalNames = globalTimerNames.size();
555  timer_map_t::size_type minNumGlobalNames = 0;
556  timer_map_t::size_type maxNumGlobalNames = 0;
557  reduceAll (*comm, REDUCE_MIN, myNumGlobalNames,
558  outArg (minNumGlobalNames));
559  reduceAll (*comm, REDUCE_MAX, myNumGlobalNames,
560  outArg (maxNumGlobalNames));
561  TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalNames != maxNumGlobalNames,
562  std::logic_error, "Min # global timer names = " << minNumGlobalNames
563  << " != max # global timer names = " << maxNumGlobalNames
564  << ". Please report this bug to the Teuchos developers.");
565  TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalNames != minNumGlobalNames,
566  std::logic_error, "My # global timer names = " << myNumGlobalNames
567  << " != min # global timer names = " << minNumGlobalNames
568  << ". Please report this bug to the Teuchos developers.");
569  }
570 #endif // TEUCHOS_DEBUG
571 
572  // mergeCounterNames() just merges the counters' names, not
573  // their actual data. Now we need to fill globalTimerData with
574  // this process' timer data for the timers in globalTimerNames.
575  //
576  // All processes need the full list of global timers, since
577  // there may be some global timers that are not local timers.
578  // That's why mergeCounterNames() has to be an all-reduce, not
579  // just a reduction to Proc 0.
580  //
581  // Insertion optimization: if the iterator given to map::insert
582  // points right before where we want to insert, insertion is
583  // O(1). globalTimerNames is sorted, so feeding the iterator
584  // output of map::insert into the next invocation's input should
585  // make the whole insertion O(N) where N is the number of
586  // entries in globalTimerNames.
587  timer_map_t::iterator globalMapIter = globalTimerData.begin();
588  timer_map_t::iterator localMapIter;
589  for (Array<string>::const_iterator it = globalTimerNames.begin();
590  it != globalTimerNames.end(); ++it) {
591  const std::string& globalName = *it;
592  localMapIter = localTimerData.find (globalName);
593 
594  if (localMapIter == localTimerData.end()) {
595  if (alwaysWriteLocal) {
596  // If there are some global timers that are not local
597  // timers, and if we want to print local timers, we insert
598  // a local timer datum with zero elapsed time and zero
599  // call count into localTimerData as well. This will
600  // ensure that both global and local timer columns in the
601  // output table have the same number of rows.
602  //
603  // We really only need to do this on Proc 0, which is the
604  // only process that currently may print local timers.
605  // However, we do it on all processes, just in case
606  // someone later wants to modify this function to print
607  // out local timer data for some process other than Proc
608  // 0. This extra computation won't affect the cost along
609  // the critical path, for future computations in which
610  // Proc 0 participates.
611  localMapIter = localTimerData.insert (localMapIter, makeEmptyTimerDatum (globalName));
612 
613  // Make sure the missing global name gets added to the
614  // list of local names. We'll re-sort the list of local
615  // names below.
616  localTimerNames.push_back (globalName);
617  }
618  // There's a global timer that's not a local timer. Add it
619  // to our pre-merge version of the global timer data so that
620  // we can safely merge the global timer data later.
621  globalMapIter = globalTimerData.insert (globalMapIter, makeEmptyTimerDatum (globalName));
622  }
623  else {
624  // We have this global timer name in our local timer list.
625  // Fill in our pre-merge version of the global timer data
626  // with our local data.
627  globalMapIter = globalTimerData.insert (globalMapIter, std::make_pair (globalName, localMapIter->second));
628  }
629  }
630 
631  if (alwaysWriteLocal) {
632  // Re-sort the list of local timer names, since we may have
633  // inserted "missing" names above.
634  std::sort (localTimerNames.begin(), localTimerNames.end());
635  }
636 
637 #ifdef TEUCHOS_DEBUG
638  {
639  // Sanity check that all processes have the name number of
640  // global timers.
641  const timer_map_t::size_type myNumGlobalTimers = globalTimerData.size();
642  timer_map_t::size_type minNumGlobalTimers = 0;
643  timer_map_t::size_type maxNumGlobalTimers = 0;
644  reduceAll (*comm, REDUCE_MIN, myNumGlobalTimers,
645  outArg (minNumGlobalTimers));
646  reduceAll (*comm, REDUCE_MAX, myNumGlobalTimers,
647  outArg (maxNumGlobalTimers));
648  TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalTimers != maxNumGlobalTimers,
649  std::logic_error, "Min # global timers = " << minNumGlobalTimers
650  << " != max # global timers = " << maxNumGlobalTimers
651  << ". Please report this bug to the Teuchos developers.");
652  TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalTimers != minNumGlobalTimers,
653  std::logic_error, "My # global timers = " << myNumGlobalTimers
654  << " != min # global timers = " << minNumGlobalTimers
655  << ". Please report this bug to the Teuchos developers.");
656  }
657 #endif // TEUCHOS_DEBUG
658  }
659 
706  void
707  computeGlobalTimerStats (stat_map_type& statData,
708  std::vector<std::string>& statNames,
709  Ptr<const Comm<int> > comm,
710  const timer_map_t& globalTimerData,
711  const bool ignoreZeroTimers)
712  {
713  using Teuchos::ScalarTraits;
714 
715  const int numTimers = static_cast<int> (globalTimerData.size());
716  const int numProcs = comm->getSize();
717 
718  // Extract pre-reduction timings and call counts into a
719  // sequential array. This array will be in the same order as
720  // the global timer names are in the map.
721  Array<std::pair<double, int> > timingsAndCallCounts;
722  timingsAndCallCounts.reserve (numTimers);
723  for (timer_map_t::const_iterator it = globalTimerData.begin();
724  it != globalTimerData.end(); ++it) {
725  timingsAndCallCounts.push_back (it->second);
726  }
727 
728  // For each timer name, compute the min timing and its
729  // corresponding call count. If two processes have the same
730  // timing but different call counts, the minimum call count will
731  // be used.
732  Array<std::pair<double, int> > minTimingsAndCallCounts (numTimers);
733  if (numTimers > 0) {
734  if (ignoreZeroTimers)
735  reduceAll (*comm, MinLocNonzero<int, double, int>(), numTimers,
736  &timingsAndCallCounts[0], &minTimingsAndCallCounts[0]);
737  else
738  reduceAll (*comm, MinLoc<int, double, int>(), numTimers,
739  &timingsAndCallCounts[0], &minTimingsAndCallCounts[0]);
740  }
741 
742  // For each timer name, compute the max timing and its
743  // corresponding call count. If two processes have the same
744  // timing but different call counts, the minimum call count will
745  // be used.
746  Array<std::pair<double, int> > maxTimingsAndCallCounts (numTimers);
747  if (numTimers > 0) {
748  reduceAll (*comm, MaxLoc<int, double, int>(), numTimers,
749  &timingsAndCallCounts[0], &maxTimingsAndCallCounts[0]);
750  }
751 
752  // For each timer name, compute the mean-over-processes timing,
753  // the mean call count, and the mean-over-call-counts timing.
754  // The mean call count is reported as a double to allow a
755  // fractional value.
756  //
757  // Each local timing is really the total timing over all local
758  // invocations. The number of local invocations is the call
759  // count. Thus, the mean-over-call-counts timing is the sum of
760  // all the timings (over all processes), divided by the sum of
761  // all the call counts (over all processes). We compute it in a
762  // different way to over unnecessary overflow.
763  Array<double> meanOverCallCountsTimings (numTimers);
764  Array<double> meanOverProcsTimings (numTimers);
765  Array<double> meanCallCounts (numTimers);
766  Array<int> ICallThisTimer (numTimers);
767  Array<int> numProcsCallingEachTimer (numTimers);
768  {
769  // Figure out how many processors actually call each timer.
770  if (ignoreZeroTimers) {
771  for (int k = 0; k < numTimers; ++k) {
772  const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
773  if (callCount > 0) ICallThisTimer[k] = 1;
774  else ICallThisTimer[k] = 0;
775  }
776  if (numTimers > 0) {
777  reduceAll (*comm, REDUCE_SUM, numTimers, &ICallThisTimer[0],
778  &numProcsCallingEachTimer[0]);
779  }
780  }
781 
782  // When summing, first scale by the number of processes. This
783  // avoids unnecessary overflow, and also gives us the mean
784  // call count automatically.
785  Array<double> scaledTimings (numTimers);
786  Array<double> scaledCallCounts (numTimers);
787  const double P = static_cast<double> (numProcs);
788 
789  if (ignoreZeroTimers) {
790  for (int k = 0; k < numTimers; ++k) {
791  const double timing = timingsAndCallCounts[k].first;
792  const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
793 
794  scaledTimings[k] = timing / numProcsCallingEachTimer[k];
795  scaledCallCounts[k] = callCount / numProcsCallingEachTimer[k];
796  }
797  }
798  else {
799  for (int k = 0; k < numTimers; ++k) {
800  const double timing = timingsAndCallCounts[k].first;
801  const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
802 
803  scaledTimings[k] = timing / P;
804  scaledCallCounts[k] = callCount / P;
805  }
806  }
807 
808  if (numTimers > 0) {
809  reduceAll (*comm, REDUCE_SUM, numTimers, &scaledTimings[0],
810  &meanOverProcsTimings[0]);
811  reduceAll (*comm, REDUCE_SUM, numTimers, &scaledCallCounts[0],
812  &meanCallCounts[0]);
813  }
814  // We don't have to undo the scaling for the mean timings;
815  // just divide by the scaled call count.
816  for (int k = 0; k < numTimers; ++k) {
817  if (meanCallCounts[k] > ScalarTraits<double>::zero ()) {
818  meanOverCallCountsTimings[k] = meanOverProcsTimings[k] / meanCallCounts[k];
819  }
820  else {
821  meanOverCallCountsTimings[k] = ScalarTraits<double>::zero ();
822  }
823  }
824  }
825 
826  // Reformat the data into the map of statistics. Be sure that
827  // each value (the std::vector of (timing, call count) pairs,
828  // each entry of which is a different statistic) preserves the
829  // order of statNames.
830  statNames.resize (4);
831  statNames[0] = "MinOverProcs";
832  statNames[1] = "MeanOverProcs";
833  statNames[2] = "MaxOverProcs";
834  statNames[3] = "MeanOverCallCounts";
835 
836  stat_map_type::iterator statIter = statData.end();
837  timer_map_t::const_iterator it = globalTimerData.begin();
838  for (int k = 0; it != globalTimerData.end(); ++k, ++it) {
839  std::vector<std::pair<double, double> > curData (4);
840  curData[0] = minTimingsAndCallCounts[k];
841  curData[1] = std::make_pair (meanOverProcsTimings[k], meanCallCounts[k]);
842  curData[2] = maxTimingsAndCallCounts[k];
843  curData[3] = std::make_pair (meanOverCallCountsTimings[k], meanCallCounts[k]);
844 
845  // statIter gives an insertion location hint that makes each
846  // insertion O(1), since we remember the location of the last
847  // insertion.
848  statIter = statData.insert (statIter, std::make_pair (it->first, curData));
849  }
850  }
851 
852 
869  RCP<const Comm<int> >
870  getDefaultComm ()
871  {
872  // The default communicator. If Trilinos was built with MPI
873  // enabled, this should be MPI_COMM_WORLD. (If MPI has not yet
874  // been initialized, it's not valid to use the communicator!)
875  // Otherwise, this should be a "serial" (no MPI, one "process")
876  // communicator.
877  RCP<const Comm<int> > comm = DefaultComm<int>::getComm ();
878 
879 #ifdef HAVE_MPI
880  {
881  int mpiHasBeenStarted = 0;
882  MPI_Initialized (&mpiHasBeenStarted);
883  if (! mpiHasBeenStarted) {
884  // Make pComm a new "serial communicator."
885  comm = rcp_implicit_cast<const Comm<int> > (rcp (new SerialComm<int> ()));
886  }
887  }
888 #endif // HAVE_MPI
889  return comm;
890  }
891 
892  } // namespace (anonymous)
893 
894 
895  void
897  std::vector<std::string>& statNames,
898  Ptr<const Comm<int> > comm,
899  const ECounterSetOp setOp,
900  const std::string& filter)
901  {
902  // Collect local timer data and names. Filter out timers with
903  // zero call counts if writeZeroTimers is false. Also, apply the
904  // timer label filter at this point, so we don't have to compute
905  // statistics on timers we don't want to display anyway.
906  timer_map_t localTimerData;
907  Array<std::string> localTimerNames;
908  const bool writeZeroTimers = false;
909  collectLocalTimerDataAndNames (localTimerData, localTimerNames,
910  counters(), writeZeroTimers, filter);
911  // Merge the local timer data and names into global timer data and
912  // names.
913  timer_map_t globalTimerData;
914  Array<std::string> globalTimerNames;
915  const bool alwaysWriteLocal = false;
916  collectGlobalTimerData (globalTimerData, globalTimerNames,
917  localTimerData, localTimerNames,
918  comm, alwaysWriteLocal, setOp);
919  // Compute statistics on the data.
920  computeGlobalTimerStats (statData, statNames, comm, globalTimerData, false);
921  }
922 
923 
924  void
926  std::ostream& out,
927  const bool alwaysWriteLocal,
928  const bool writeGlobalStats,
929  const bool writeZeroTimers,
930  const ECounterSetOp setOp,
931  const std::string& filter,
932  const bool ignoreZeroTimers)
933  {
934  //
935  // We can't just call computeGlobalTimerStatistics(), since
936  // summarize() has different options that affect whether global
937  // statistics are computed and printed.
938  //
939  const int numProcs = comm->getSize();
940  const int myRank = comm->getRank();
941 
942  // Collect local timer data and names. Filter out timers with
943  // zero call counts if writeZeroTimers is false. Also, apply the
944  // timer label filter at this point, so we don't have to compute
945  // statistics on timers we don't want to display anyway.
946  timer_map_t localTimerData;
947  Array<std::string> localTimerNames;
948  collectLocalTimerDataAndNames (localTimerData, localTimerNames,
949  counters(), writeZeroTimers, filter);
950 
951  // If we're computing global statistics, merge the local timer
952  // data and names into global timer data and names, and compute
953  // global timer statistics. Otherwise, leave the global data
954  // empty.
955  timer_map_t globalTimerData;
956  Array<std::string> globalTimerNames;
957  stat_map_type statData;
958  std::vector<std::string> statNames;
959  if (writeGlobalStats) {
960  collectGlobalTimerData (globalTimerData, globalTimerNames,
961  localTimerData, localTimerNames,
962  comm, alwaysWriteLocal, setOp);
963  // Compute statistics on the data, but only if the communicator
964  // contains more than one process. Otherwise, statistics don't
965  // make sense and we don't print them (see below).
966  if (numProcs > 1) {
967  computeGlobalTimerStats (statData, statNames, comm, globalTimerData, ignoreZeroTimers);
968  }
969  }
970 
971  // Precision of floating-point numbers in the table.
972  const int precision = format().precision();
973  const std::ios_base::fmtflags& flags = out.flags();
974 
975  // All columns of the table, in order.
976  Array<TableColumn> tableColumns;
977 
978  // Labels of all the columns of the table.
979  // We will append to this when we add each column.
980  Array<std::string> titles;
981 
982  // Widths (in number of characters) of each column.
983  // We will append to this when we add each column.
984  Array<int> columnWidths;
985 
986  // Table column containing all timer names. If writeGlobalStats
987  // is true, we use the global timer names, otherwise we use the
988  // local timer names. We build the table on all processes
989  // redundantly, but only print on Rank 0.
990  {
991  titles.append ("Timer Name");
992 
993  // The column labels depend on whether we are computing global statistics.
994  TableColumn nameCol (writeGlobalStats ? globalTimerNames : localTimerNames);
995  tableColumns.append (nameCol);
996 
997  // Each column is as wide as it needs to be to hold both its
998  // title and all of the column data. This column's title is the
999  // current last entry of the titles array.
1000  columnWidths.append (format().computeRequiredColumnWidth (titles.back(), nameCol));
1001  }
1002 
1003  // Table column containing local timer stats, if applicable. We
1004  // only write local stats if asked, only on MPI Proc 0, and only
1005  // if there is more than one MPI process in the communicator
1006  // (otherwise local stats == global stats, so we just print the
1007  // global stats). In this case, we've padded the local data on
1008  // Proc 0 if necessary to match the global timer list, so that the
1009  // columns have the same number of rows.
1010  if (alwaysWriteLocal && numProcs > 1 && myRank == 0) {
1011  titles.append ("Local time (num calls)");
1012 
1013  // Copy local timer data out of the array-of-structs into
1014  // separate arrays, for display in the table.
1015  Array<double> localTimings;
1016  Array<double> localNumCalls;
1017  for (timer_map_t::const_iterator it = localTimerData.begin();
1018  it != localTimerData.end(); ++it) {
1019  localTimings.push_back (it->second.first);
1020  localNumCalls.push_back (static_cast<double> (it->second.second));
1021  }
1022  TableColumn timeAndCalls (localTimings, localNumCalls, precision, flags, true);
1023  tableColumns.append (timeAndCalls);
1024  columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
1025  }
1026 
1027  if (writeGlobalStats) {
1028  // If there's only 1 process in the communicator, don't display
1029  // statistics; statistics don't make sense in that case. Just
1030  // display the timings and call counts. If there's more than 1
1031  // process, do display statistics.
1032  if (numProcs == 1) {
1033  // Extract timings and the call counts from globalTimerData.
1034  Array<double> globalTimings;
1035  Array<double> globalNumCalls;
1036  for (timer_map_t::const_iterator it = globalTimerData.begin();
1037  it != globalTimerData.end(); ++it) {
1038  globalTimings.push_back (it->second.first);
1039  globalNumCalls.push_back (static_cast<double> (it->second.second));
1040  }
1041  // Print the table column.
1042  titles.append ("Global time (num calls)");
1043  TableColumn timeAndCalls (globalTimings, globalNumCalls, precision, flags, true);
1044  tableColumns.append (timeAndCalls);
1045  columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
1046  }
1047  else { // numProcs > 1
1048  // Print a table column for each statistic. statNames and
1049  // each value in statData use the same ordering, so we can
1050  // iterate over valid indices of statNames to display the
1051  // statistics in the right order.
1052  const timer_map_t::size_type numGlobalTimers = globalTimerData.size();
1053  for (std::vector<std::string>::size_type statInd = 0; statInd < statNames.size(); ++statInd) {
1054  // Extract lists of timings and their call counts for the
1055  // current statistic.
1056  Array<double> statTimings (numGlobalTimers);
1057  Array<double> statCallCounts (numGlobalTimers);
1058  stat_map_type::const_iterator it = statData.begin();
1059  for (int k = 0; it != statData.end(); ++it, ++k) {
1060  statTimings[k] = (it->second[statInd]).first;
1061  statCallCounts[k] = (it->second[statInd]).second;
1062  }
1063  // Print the table column.
1064  const std::string& statisticName = statNames[statInd];
1065  const std::string titleString = statisticName;
1066  titles.append (titleString);
1067  TableColumn timeAndCalls (statTimings, statCallCounts, precision, flags, true);
1068  tableColumns.append (timeAndCalls);
1069  columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
1070  }
1071  }
1072  }
1073 
1074  // Print the whole table to the given output stream on MPI Rank 0.
1075  format().setColumnWidths (columnWidths);
1076  if (myRank == 0) {
1077  std::ostringstream theTitle;
1078  theTitle << "TimeMonitor results over " << numProcs << " processor"
1079  << (numProcs > 1 ? "s" : "");
1080  format().writeWholeTable (out, theTitle.str(), titles, tableColumns);
1081  }
1082  }
1083 
1084  void
1085  TimeMonitor::summarize (std::ostream &out,
1086  const bool alwaysWriteLocal,
1087  const bool writeGlobalStats,
1088  const bool writeZeroTimers,
1089  const ECounterSetOp setOp,
1090  const std::string& filter,
1091  const bool ignoreZeroTimers)
1092  {
1093  // The default communicator. If Trilinos was built with MPI
1094  // enabled, this should be MPI_COMM_WORLD. Otherwise, this should
1095  // be a "serial" (no MPI, one "process") communicator.
1096  RCP<const Comm<int> > comm = getDefaultComm();
1097 
1098  summarize (comm.ptr(), out, alwaysWriteLocal,
1099  writeGlobalStats, writeZeroTimers, setOp, filter, ignoreZeroTimers);
1100  }
1101 
1102  void
1104  std::vector<std::string>& statNames,
1105  const ECounterSetOp setOp,
1106  const std::string& filter)
1107  {
1108  // The default communicator. If Trilinos was built with MPI
1109  // enabled, this should be MPI_COMM_WORLD. Otherwise, this should
1110  // be a "serial" (no MPI, one "process") communicator.
1111  RCP<const Comm<int> > comm = getDefaultComm();
1112 
1113  computeGlobalTimerStatistics (statData, statNames, comm.ptr(), setOp, filter);
1114  }
1115 
1116  SyncTimeMonitor::SyncTimeMonitor(Time& timer, Ptr<const Comm<int> > comm, bool reset)
1117  : TimeMonitor(timer, reset), comm_(comm)
1118  { }
1119 
1121  comm_->barrier();
1122  }
1123 
1124 
1125  namespace {
1149  std::string
1150  quoteLabelForYaml (const std::string& label)
1151  {
1152  // YAML allows empty keys in key: value pairs. See Section 7.2
1153  // of the YAML 1.2 spec. We thus let an empty label pass
1154  // through without quoting or other special treatment.
1155  if (label.empty ()) {
1156  return label;
1157  }
1158 
1159  // Check whether the label is already quoted. If so, we don't
1160  // need to quote it again. However, we do need to quote any
1161  // quote symbols in the string inside the outer quotes.
1162  const bool alreadyQuoted = label.size () >= 2 &&
1163  label[0] == '"' && label[label.size() - 1] == '"';
1164 
1165  // We need to quote if there are any colons or (inner) quotes in
1166  // the string. We'll determine this as we read through the
1167  // string and escape any characters that need escaping.
1168  bool needToQuote = false;
1169 
1170  std::string out; // To fill with the return value
1171  out.reserve (label.size ());
1172 
1173  const size_t startPos = alreadyQuoted ? 1 : 0;
1174  const size_t endPos = alreadyQuoted ? label.size () - 1 : label.size ();
1175  for (size_t i = startPos; i < endPos; ++i) {
1176  const char c = label[i];
1177  if (c == '"' || c == '\\') {
1178  out.push_back ('\\'); // Escape the quote or backslash.
1179  needToQuote = true;
1180  }
1181  else if (c == ':') {
1182  needToQuote = true;
1183  }
1184  out.push_back (c);
1185  }
1186 
1187  if (needToQuote || alreadyQuoted) {
1188  // If the input string was already quoted, then out doesn't
1189  // include its quotes, so we have to add them back in.
1190  return "\"" + out + "\"";
1191  }
1192  else {
1193  return out;
1194  }
1195  }
1196 
1197  } // namespace (anonymous)
1198 
1199 
1200  void TimeMonitor::
1201  summarizeToYaml (Ptr<const Comm<int> > comm,
1202  std::ostream &out,
1203  const ETimeMonitorYamlFormat yamlStyle,
1204  const std::string& filter)
1205  {
1206  using Teuchos::FancyOStream;
1207  using Teuchos::fancyOStream;
1208  using Teuchos::getFancyOStream;
1209  using Teuchos::OSTab;
1210  using Teuchos::RCP;
1211  using Teuchos::rcpFromRef;
1212  using std::endl;
1213  typedef std::vector<std::string>::size_type size_type;
1214 
1215  const bool compact = (yamlStyle == YAML_FORMAT_COMPACT);
1216 
1217  // const bool writeGlobalStats = true;
1218  // const bool writeZeroTimers = true;
1219  // const bool alwaysWriteLocal = false;
1220  const ECounterSetOp setOp = Intersection;
1221 
1222  stat_map_type statData;
1223  std::vector<std::string> statNames;
1224  computeGlobalTimerStatistics (statData, statNames, comm, setOp, filter);
1225 
1226  const int numProcs = comm->getSize();
1227 
1228  // HACK (mfh 20 Aug 2012) For some reason, creating OSTab with "-
1229  // " as the line prefix does not work, else I would prefer that
1230  // method for printing each line of a YAML block sequence (see
1231  // Section 8.2.1 of the YAML 1.2 spec).
1232  //
1233  // Also, I have to set the tab indent string here, rather than in
1234  // OSTab's constructor. This is because line prefix (which for
1235  // some reason is what OSTab's constructor takes, rather than tab
1236  // indent string) means something different from tab indent
1237  // string, and turning on the line prefix prints all sorts of
1238  // things including "|" for some reason.
1239  RCP<FancyOStream> pfout = getFancyOStream (rcpFromRef (out));
1240  pfout->setTabIndentStr (" ");
1241  FancyOStream& fout = *pfout;
1242 
1243  fout << "# Teuchos::TimeMonitor report" << endl
1244  << "---" << endl;
1245 
1246  // mfh 19 Aug 2012: An important goal of our chosen output format
1247  // was to minimize the nesting depth. We have managed to keep the
1248  // nesting depth to 3, which is the limit that the current version
1249  // of PylotDB imposes for its YAML input.
1250 
1251  // Outermost level is a dictionary. (Individual entries of a
1252  // dictionary do _not_ begin with "- ".) We always print the
1253  // outermost level in standard style, not flow style, for better
1254  // readability. We begin the outermost level with metadata.
1255  fout << "Output mode: " << (compact ? "compact" : "spacious") << endl
1256  << "Number of processes: " << numProcs << endl
1257  << "Time unit: s" << endl;
1258  // For a key: value pair where the value is a sequence or
1259  // dictionary on the following line, YAML requires a space after
1260  // the colon.
1261  fout << "Statistics collected: ";
1262  // Print list of the names of all the statistics we collected.
1263  if (compact) {
1264  fout << " [";
1265  for (size_type i = 0; i < statNames.size (); ++i) {
1266  fout << quoteLabelForYaml (statNames[i]);
1267  if (i + 1 < statNames.size ()) {
1268  fout << ", ";
1269  }
1270  }
1271  fout << "]" << endl;
1272  }
1273  else {
1274  fout << endl;
1275  OSTab tab1 (pfout);
1276  for (size_type i = 0; i < statNames.size (); ++i) {
1277  fout << "- " << quoteLabelForYaml (statNames[i]) << endl;
1278  }
1279  }
1280 
1281  // Print the list of timer names.
1282  //
1283  // It might be nicer instead to print a map from timer name to all
1284  // of its data, but keeping the maximum nesting depth small
1285  // ensures better compatibility with different parsing tools.
1286  fout << "Timer names: ";
1287  if (compact) {
1288  fout << " [";
1289  size_type ind = 0;
1290  for (stat_map_type::const_iterator it = statData.begin();
1291  it != statData.end(); ++it, ++ind) {
1292  fout << quoteLabelForYaml (it->first);
1293  if (ind + 1 < statData.size ()) {
1294  fout << ", ";
1295  }
1296  }
1297  fout << "]" << endl;
1298  }
1299  else {
1300  fout << endl;
1301  OSTab tab1 (pfout);
1302  for (stat_map_type::const_iterator it = statData.begin();
1303  it != statData.end(); ++it) {
1304  fout << "- " << quoteLabelForYaml (it->first) << endl;
1305  }
1306  }
1307 
1308  // Print times for each timer, as a map from statistic name to its time.
1309  fout << "Total times: ";
1310  if (compact) {
1311  fout << " {";
1312  size_type outerInd = 0;
1313  for (stat_map_type::const_iterator outerIter = statData.begin();
1314  outerIter != statData.end(); ++outerIter, ++outerInd) {
1315  // Print timer name.
1316  fout << quoteLabelForYaml (outerIter->first) << ": ";
1317  // Print that timer's data.
1318  const std::vector<std::pair<double, double> >& curData = outerIter->second;
1319  fout << "{";
1320  for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1321  fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1322  << curData[innerInd].first;
1323  if (innerInd + 1 < curData.size ()) {
1324  fout << ", ";
1325  }
1326  }
1327  fout << "}";
1328  if (outerInd + 1 < statData.size ()) {
1329  fout << ", ";
1330  }
1331  }
1332  fout << "}" << endl;
1333  }
1334  else {
1335  fout << endl;
1336  OSTab tab1 (pfout);
1337  size_type outerInd = 0;
1338  for (stat_map_type::const_iterator outerIter = statData.begin();
1339  outerIter != statData.end(); ++outerIter, ++outerInd) {
1340  // Print timer name.
1341  fout << quoteLabelForYaml (outerIter->first) << ": " << endl;
1342  // Print that timer's data.
1343  OSTab tab2 (pfout);
1344  const std::vector<std::pair<double, double> >& curData = outerIter->second;
1345  for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1346  fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1347  << curData[innerInd].first << endl;
1348  }
1349  }
1350  }
1351 
1352  // Print call counts for each timer, for each statistic name.
1353  fout << "Call counts:";
1354  if (compact) {
1355  fout << " {";
1356  size_type outerInd = 0;
1357  for (stat_map_type::const_iterator outerIter = statData.begin();
1358  outerIter != statData.end(); ++outerIter, ++outerInd) {
1359  // Print timer name.
1360  fout << quoteLabelForYaml (outerIter->first) << ": ";
1361  // Print that timer's data.
1362  const std::vector<std::pair<double, double> >& curData = outerIter->second;
1363  fout << "{";
1364  for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1365  fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1366  << curData[innerInd].second;
1367  if (innerInd + 1 < curData.size ()) {
1368  fout << ", ";
1369  }
1370  }
1371  fout << "}";
1372  if (outerInd + 1 < statData.size ()) {
1373  fout << ", ";
1374  }
1375  }
1376  fout << "}" << endl;
1377  }
1378  else {
1379  fout << endl;
1380  OSTab tab1 (pfout);
1381  size_type outerInd = 0;
1382  for (stat_map_type::const_iterator outerIter = statData.begin();
1383  outerIter != statData.end(); ++outerIter, ++outerInd) {
1384  // Print timer name.
1385  fout << quoteLabelForYaml (outerIter->first) << ": " << endl;
1386  // Print that timer's data.
1387  OSTab tab2 (pfout);
1388  const std::vector<std::pair<double, double> >& curData = outerIter->second;
1389  for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1390  fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1391  << curData[innerInd].second << endl;
1392  }
1393  }
1394  }
1395  }
1396 
1397  void TimeMonitor::
1398  summarizeToYaml (std::ostream &out,
1399  const ETimeMonitorYamlFormat yamlStyle,
1400  const std::string& filter)
1401  {
1402  // The default communicator. If Trilinos was built with MPI
1403  // enabled, this should be MPI_COMM_WORLD. Otherwise, this should
1404  // be a "serial" (no MPI, one "process") communicator.
1405  RCP<const Comm<int> > comm = getDefaultComm ();
1406 
1407  summarizeToYaml (comm.ptr (), out, yamlStyle, filter);
1408  }
1409 
1410  // Default value is false. We'll set to true once
1411  // setReportParameters() completes successfully.
1412  bool TimeMonitor::setParams_ = false;
1413 
1414  // We have to declare all of these here in order to avoid linker errors.
1415  TimeMonitor::ETimeMonitorReportFormat TimeMonitor::reportFormat_ = TimeMonitor::REPORT_FORMAT_TABLE;
1416  TimeMonitor::ETimeMonitorYamlFormat TimeMonitor::yamlStyle_ = TimeMonitor::YAML_FORMAT_SPACIOUS;
1417  ECounterSetOp TimeMonitor::setOp_ = Intersection;
1418  bool TimeMonitor::alwaysWriteLocal_ = false;
1419  bool TimeMonitor::writeGlobalStats_ = true;
1420  bool TimeMonitor::writeZeroTimers_ = true;
1421 
1422  void
1423  TimeMonitor::setReportFormatParameter (ParameterList& plist)
1424  {
1425  const std::string name ("Report format");
1426  const std::string defaultValue ("Table");
1427  const std::string docString ("Output format for report of timer statistics");
1428  Array<std::string> strings;
1429  Array<std::string> docs;
1430  Array<ETimeMonitorReportFormat> values;
1431 
1432  strings.push_back ("YAML");
1433  docs.push_back ("YAML (see yaml.org) format");
1434  values.push_back (REPORT_FORMAT_YAML);
1435  strings.push_back ("Table");
1436  docs.push_back ("Tabular format via Teuchos::TableFormat");
1437  values.push_back (REPORT_FORMAT_TABLE);
1438 
1439  setStringToIntegralParameter<ETimeMonitorReportFormat> (name, defaultValue,
1440  docString,
1441  strings (), docs (),
1442  values (), &plist);
1443  }
1444 
1445  void
1446  TimeMonitor::setYamlFormatParameter (ParameterList& plist)
1447  {
1448  const std::string name ("YAML style");
1449  const std::string defaultValue ("spacious");
1450  const std::string docString ("YAML-specific output format");
1451  Array<std::string> strings;
1452  Array<std::string> docs;
1453  Array<ETimeMonitorYamlFormat> values;
1454 
1455  strings.push_back ("compact");
1456  docs.push_back ("Compact format: use \"flow style\" (see YAML 1.2 spec at "
1457  "yaml.org) for most sequences except the outermost sequence");
1458  values.push_back (YAML_FORMAT_COMPACT);
1459 
1460  strings.push_back ("spacious");
1461  docs.push_back ("Spacious format: avoid flow style");
1462  values.push_back (YAML_FORMAT_SPACIOUS);
1463 
1464  setStringToIntegralParameter<ETimeMonitorYamlFormat> (name, defaultValue,
1465  docString,
1466  strings (), docs (),
1467  values (), &plist);
1468  }
1469 
1470  void
1471  TimeMonitor::setSetOpParameter (ParameterList& plist)
1472  {
1473  const std::string name ("How to merge timer sets");
1474  const std::string defaultValue ("Intersection");
1475  const std::string docString ("How to merge differing sets of timers "
1476  "across processes");
1477  Array<std::string> strings;
1478  Array<std::string> docs;
1479  Array<ECounterSetOp> values;
1480 
1481  strings.push_back ("Intersection");
1482  docs.push_back ("Compute intersection of timer sets over processes");
1483  values.push_back (Intersection);
1484  strings.push_back ("Union");
1485  docs.push_back ("Compute union of timer sets over processes");
1486  values.push_back (Union);
1487 
1488  setStringToIntegralParameter<ECounterSetOp> (name, defaultValue, docString,
1489  strings (), docs (), values (),
1490  &plist);
1491  }
1492 
1493  void
1495  {
1496  stackedTimer_ = t;
1497  }
1498 
1501  {
1502  return stackedTimer_;
1503  }
1504 
1507  {
1508  // Our implementation favors recomputation over persistent
1509  // storage. That is, we simply recreate the list every time we
1510  // need it.
1511  RCP<ParameterList> plist = parameterList ("TimeMonitor::report");
1512 
1513  const bool alwaysWriteLocal = false;
1514  const bool writeGlobalStats = true;
1515  const bool writeZeroTimers = true;
1516 
1517  setReportFormatParameter (*plist);
1518  setYamlFormatParameter (*plist);
1519  setSetOpParameter (*plist);
1520  plist->set ("alwaysWriteLocal", alwaysWriteLocal,
1521  "Always output local timers' values on Proc 0");
1522  plist->set ("writeGlobalStats", writeGlobalStats, "Always output global "
1523  "statistics, even if there is only one process in the "
1524  "communicator");
1525  plist->set ("writeZeroTimers", writeZeroTimers, "Generate output for "
1526  "timers that have never been called");
1527 
1528  return rcp_const_cast<const ParameterList> (plist);
1529  }
1530 
1531  void
1532  TimeMonitor::setReportParameters (const RCP<ParameterList>& params)
1533  {
1534  ETimeMonitorReportFormat reportFormat = REPORT_FORMAT_TABLE;
1535  ETimeMonitorYamlFormat yamlStyle = YAML_FORMAT_SPACIOUS;
1536  ECounterSetOp setOp = Intersection;
1537  bool alwaysWriteLocal = false;
1538  bool writeGlobalStats = true;
1539  bool writeZeroTimers = true;
1540 
1541  if (params.is_null ()) {
1542  // If we've set parameters before, leave their current values.
1543  // Otherwise, set defaults (below).
1544  if (setParams_) {
1545  return;
1546  }
1547  }
1548  else { // params is nonnull. Let's read it!
1549  params->validateParametersAndSetDefaults (*getValidReportParameters ());
1550 
1551  reportFormat = getIntegralValue<ETimeMonitorReportFormat> (*params, "Report format");
1552  yamlStyle = getIntegralValue<ETimeMonitorYamlFormat> (*params, "YAML style");
1553  setOp = getIntegralValue<ECounterSetOp> (*params, "How to merge timer sets");
1554  alwaysWriteLocal = params->get<bool> ("alwaysWriteLocal");
1555  writeGlobalStats = params->get<bool> ("writeGlobalStats");
1556  writeZeroTimers = params->get<bool> ("writeZeroTimers");
1557  }
1558  // Defer setting state until here, to ensure the strong exception
1559  // guarantee for this method (either it throws with no externally
1560  // visible state changes, or it returns normally).
1561  reportFormat_ = reportFormat;
1562  yamlStyle_ = yamlStyle;
1563  setOp_ = setOp;
1564  alwaysWriteLocal_ = alwaysWriteLocal;
1565  writeGlobalStats_ = writeGlobalStats;
1566  writeZeroTimers_ = writeZeroTimers;
1567 
1568  setParams_ = true; // Yay, we successfully set parameters!
1569  }
1570 
1571  void
1573  std::ostream& out,
1574  const std::string& filter,
1575  const RCP<ParameterList>& params)
1576  {
1577  setReportParameters (params);
1578 
1579  if (reportFormat_ == REPORT_FORMAT_YAML) {
1580  summarizeToYaml (comm, out, yamlStyle_, filter);
1581  }
1582  else if (reportFormat_ == REPORT_FORMAT_TABLE) {
1583  summarize (comm, out, alwaysWriteLocal_, writeGlobalStats_,
1584  writeZeroTimers_, setOp_, filter);
1585  }
1586  else {
1587  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "TimeMonitor::report: "
1588  "Invalid report format. This should never happen; ParameterList "
1589  "validation should have caught this. Please report this bug to the "
1590  "Teuchos developers.");
1591  }
1592  }
1593 
1594  void
1596  std::ostream& out,
1597  const RCP<ParameterList>& params)
1598  {
1599  report (comm, out, "", params);
1600  }
1601 
1602  void
1603  TimeMonitor::report (std::ostream& out,
1604  const std::string& filter,
1605  const RCP<ParameterList>& params)
1606  {
1607  RCP<const Comm<int> > comm = getDefaultComm ();
1608  report (comm.ptr (), out, filter, params);
1609  }
1610 
1611  void
1612  TimeMonitor::report (std::ostream& out,
1613  const RCP<ParameterList>& params)
1614  {
1615  RCP<const Comm<int> > comm = getDefaultComm ();
1616  report (comm.ptr (), out, "", params);
1617  }
1618 
1619 } // namespace Teuchos
SyncTimeMonitor()=delete
Default constructor is deleted, since it would be unsafe.
static Teuchos::RCP< Teuchos::StackedTimer > getStackedTimer()
The StackedTimer used by the TimeMonitor.
Array< T > & append(const T &x)
Add a new entry at the end of the array.
std::map< std::string, std::vector< std::pair< double, double > > > stat_map_type
Global statistics collected from timer data.
basic_OSTab< char > OSTab
void setColumnWidths(const Array< int > &colWidths)
Set the column widths to be used for subsequent rows.
const Time & counter() const
Constant access to the instance&#39;s counter reference.
void writeWholeTable(std::ostream &out, const std::string &tableTitle, const Array< std::string > &columnNames, const Array< TableColumn > &columns) const
bool nonnull(const std::shared_ptr< T > &p)
Returns true if p.get()!=NULL.
static RCP< Time > lookupCounter(const std::string &name)
Return the first counter with the given name, or null if none.
basic_FancyOStream< char > FancyOStream
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
TimeMonitor()=delete
Default constructor is deleted, since it would be unsafe.
static void disableTimer(const std::string &name)
Disable the timer with the given name.
Teuchos version of MPI_MINLOC.
void reduce(const Ordinal count, const std::pair< ScalarType, IndexType > inBuffer[], std::pair< ScalarType, IndexType > inoutBuffer[]) const
T * get() const
Get the raw C++ pointer to the underlying object.
static Teuchos::RCP< const Comm< OrdinalType > > getComm()
Return the default global communicator.
static RCP< const ParameterList > getValidReportParameters()
Default parameters (with validators) for report().
This structure defines some basic traits for a scalar field type.
Teuchos version of MPI_MAXLOC.
Base interface class for user-defined reduction operations for objects that use value semantics...
static Teuchos::RCP< Teuchos::StackedTimer > stackedTimer_
Stacked timer for optional injection of timing from TimeMonitor-enabled objects.
void reduce(const Ordinal count, const std::pair< ScalarType, IndexType > inBuffer[], std::pair< ScalarType, IndexType > inoutBuffer[]) const
void start(bool reset=false)
Start the timer, if the timer is enabled (see disable()).
~TimeMonitor() override
Destructor: stops the timer.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Deprecated.
A column of TableEntry objects.
static void summarize(Ptr< const Comm< int > > comm, std::ostream &out=std::cout, const bool alwaysWriteLocal=false, const bool writeGlobalStats=true, const bool writeZeroTimers=true, const ECounterSetOp setOp=Intersection, const std::string &filter="", const bool ignoreZeroTimers=false)
Print summary statistics for all timers on the given communicator.
double stop()
Stop the timer, if the timer is enabled (see disable()).
Wall-clock timer.
Ptr< T > ptr() const
Get a safer wrapper raw C++ pointer to the underlying object.
int precision() const
Get the precision for writing doubles. Default is 4.
static void computeGlobalTimerStatistics(stat_map_type &statData, std::vector< std::string > &statNames, Ptr< const Comm< int > > comm, const ECounterSetOp setOp=Intersection, const std::string &filter="")
Compute global timer statistics for all timers on the given communicator.
static void setStackedTimer(const Teuchos::RCP< Teuchos::StackedTimer > &t)
Sets the StackedTimer into which the TimeMonitor will insert timings.
Provides utilities for formatting tabular output.
void reduce(const Ordinal count, const std::pair< ScalarType, IndexType > inBuffer[], std::pair< ScalarType, IndexType > inoutBuffer[]) const
void mergeCounterNames(const Comm< int > &comm, const Array< std::string > &localNames, Array< std::string > &globalNames, const ECounterSetOp setOp)
Merge counter names over all processors.
const std::string & name() const
The name of this timer.
A list of parameters of arbitrary type.
static std::map< std::string, RCP< Time > > & counters()
Array of all counters that were created with getNewCounter() on the calling (MPI) process...
reference back()
void push_back(const value_type &x)
bool isRecursiveCall() const
Whether we are currently in a recursive call of the counter.
size_type size() const
Defines basic traits for the scalar field type.
static T zero()
Returns representation of zero for this scalar type.
~SyncTimeMonitor() override
Destructor: stops the timer.
Scope guard for Teuchos::Time, with MPI collective timer reporting.
Smart reference counting pointer class for automatic garbage collection.
This class allows one to push and pop timers on and off a stack.
ECounterSetOp
Set operation type for mergeCounterNames() to perform.
Common capabilities for collecting and reporting performance data across processors.
same as MinLoc, but don&#39;t allow zero
Simple wrapper class for raw pointers to single objects where no persisting relationship exists...
static void enableTimer(const std::string &name)
Enable the timer with the given name.
static TableFormat & format()
Table format that will be used to print a summary of timer results.
Scope guard for Time, that can compute MPI collective timer statistics.
static void zeroOutTimers()
Reset all global timers to zero.
static void report(Ptr< const Comm< int > > comm, std::ostream &out, const std::string &filter, const RCP< ParameterList > &params=null)
Report timer statistics to the given output stream.
bool is_null() const
Returns true if the underlying pointer is null.