Teuchos - Trilinos Tools Package  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Teuchos_TimeMonitor.cpp
1 // @HEADER
2 // *****************************************************************************
3 // Teuchos: Common Tools Package
4 //
5 // Copyright 2004 NTESS and the Teuchos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #include "Teuchos_TimeMonitor.hpp"
11 #include "Teuchos_CommHelpers.hpp"
12 #include "Teuchos_DefaultComm.hpp"
13 #include "Teuchos_TableColumn.hpp"
14 #include "Teuchos_TableFormat.hpp"
15 #include "Teuchos_StandardParameterEntryValidators.hpp"
16 #include "Teuchos_ScalarTraits.hpp"
17 #include "Teuchos_StackedTimer.hpp"
18 
19 #include <functional>
20 #include <iomanip>
21 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
22 #include <sstream>
23 #endif
24 
25 namespace Teuchos {
78  template<class Ordinal, class ScalarType, class IndexType>
79  class MaxLoc :
80  public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
81  public:
82  void
83  reduce (const Ordinal count,
84  const std::pair<ScalarType, IndexType> inBuffer[],
85  std::pair<ScalarType, IndexType> inoutBuffer[]) const;
86  };
87 
88  template<class Ordinal>
89  class MaxLoc<Ordinal, double, int> :
90  public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
91  public:
92  void
93  reduce (const Ordinal count,
94  const std::pair<double, int> inBuffer[],
95  std::pair<double, int> inoutBuffer[]) const
96  {
97  for (Ordinal ind = 0; ind < count; ++ind) {
98  const std::pair<double, int>& in = inBuffer[ind];
99  std::pair<double, int>& inout = inoutBuffer[ind];
100 
101  if (in.first > inout.first) {
102  inout.first = in.first;
103  inout.second = in.second;
104  } else if (in.first < inout.first) {
105  // Don't need to do anything; inout has the values.
106  } else { // equal, or at least one is NaN.
107  inout.first = in.first;
108  inout.second = std::min (in.second, inout.second);
109  }
110  }
111  }
112  };
113 
140  template<class Ordinal, class ScalarType, class IndexType>
141  class MinLoc :
142  public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
143  public:
144  void
145  reduce (const Ordinal count,
146  const std::pair<ScalarType, IndexType> inBuffer[],
147  std::pair<ScalarType, IndexType> inoutBuffer[]) const;
148  };
149 
150  template<class Ordinal>
151  class MinLoc<Ordinal, double, int> :
152  public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
153  public:
154  void
155  reduce (const Ordinal count,
156  const std::pair<double, int> inBuffer[],
157  std::pair<double, int> inoutBuffer[]) const
158  {
159  for (Ordinal ind = 0; ind < count; ++ind) {
160  const std::pair<double, int>& in = inBuffer[ind];
161  std::pair<double, int>& inout = inoutBuffer[ind];
162 
163  if (in.first < inout.first) {
164  inout.first = in.first;
165  inout.second = in.second;
166  } else if (in.first > inout.first) {
167  // Don't need to do anything; inout has the values.
168  } else { // equal, or at least one is NaN.
169  inout.first = in.first;
170  inout.second = std::min (in.second, inout.second);
171  }
172  }
173  }
174  };
175 
179  template<class Ordinal, class ScalarType, class IndexType>
181  public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
182  public:
183  void
184  reduce (const Ordinal count,
185  const std::pair<ScalarType, IndexType> inBuffer[],
186  std::pair<ScalarType, IndexType> inoutBuffer[]) const;
187  };
188 
189  template<class Ordinal>
190  class MinLocNonzero<Ordinal, double, int> :
191  public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
192  public:
193  void
194  reduce (const Ordinal count,
195  const std::pair<double, int> inBuffer[],
196  std::pair<double, int> inoutBuffer[]) const
197  {
198  for (Ordinal ind = 0; ind < count; ++ind) {
199  const std::pair<double, int>& in = inBuffer[ind];
200  std::pair<double, int>& inout = inoutBuffer[ind];
201 
202  if ( (in.first < inout.first && in.first != 0) || (inout.first == 0 && in.first != 0) ) {
203  inout.first = in.first;
204  inout.second = in.second;
205  } else if (in.first > inout.first) {
206  // Don't need to do anything; inout has the values.
207  } else { // equal, or at least one is NaN.
208  inout.first = in.first;
209  inout.second = std::min (in.second, inout.second);
210  }
211  }
212  }
213  };
214 
215  constexpr const std::string_view defaultStackedTimerName = "Teuchos::StackedTimer";
216 
217  // Typedef used internally by TimeMonitor::summarize() and its
218  // helper functions. The map is keyed on timer label (a string).
219  // Each value is a pair: (total number of seconds over all calls to
220  // that timer, total number of calls to that timer).
221  typedef std::map<std::string, std::pair<double, int> > timer_map_t;
222 
223  // static initialization
224  Teuchos::RCP<Teuchos::StackedTimer> TimeMonitor::stackedTimer_ = Teuchos::rcp(new Teuchos::StackedTimer(std::string(defaultStackedTimerName).c_str()));
225 
226  TimeMonitor::TimeMonitor (Time& timer, bool reset)
227  : PerformanceMonitorBase<Time>(timer, reset)
228  {
229  if (!isRecursiveCall()) {
230  counter().start(reset);
231 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
232  if (nonnull(stackedTimer_))
233  stackedTimer_->start(counter().name(),false);
234 #endif
235  }
236  }
237 
239  if (!isRecursiveCall()) {
240  counter().stop();
241 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
242  try {
243  if (nonnull(stackedTimer_))
244  stackedTimer_->stop(counter().name(),false);
245  }
246  catch (std::runtime_error& e) {
247  std::ostringstream warning;
248  warning <<
249  "\n*********************************************************************\n"
250  "WARNING: Overlapping timers detected! Near: " <<counter().name()<<"\n"
251  "A TimeMonitor timer was stopped before a nested subtimer was\n"
252  "stopped. This is not allowed by the StackedTimer. This corner case\n"
253  "typically occurs if the TimeMonitor is stored in an RCP and the RCP is\n"
254  "assigned to a new timer. To disable this warning, either fix the\n"
255  "ordering of timer creation and destuction or disable the StackedTimer\n"
256  "support in the TimeMonitor by setting the StackedTimer to null\n"
257  "Example:\n"
258  " RCP<TimeMonitor> MM = rcp(new TimeMonitor(*(TimeMonitor::getNewTimer(\"Junk\"))));\n"
259  "///code to time \n"
260  "MM = Teuchos::null;\n"
261  "MM = rcp(new TimeMonitor(*(TimeMonitor::getNewTimer(\"SecondJunk\"))));\n"
262  "*********************************************************************\n";
263  std::cout << warning.str() << std::endl << e.what() << std::endl;
265  }
266 #endif
267  }
268  }
269 
270  void
271  TimeMonitor::disableTimer (const std::string& name)
272  {
273  RCP<Time> timer = lookupCounter (name);
275  timer == null, std::invalid_argument,
276  "TimeMonitor::disableTimer: Invalid timer \"" << name << "\"");
277  timer->disable ();
278  }
279 
280  void
281  TimeMonitor::enableTimer (const std::string& name)
282  {
283  RCP<Time> timer = lookupCounter (name);
285  timer == null, std::invalid_argument,
286  "TimeMonitor::enableTimer: Invalid timer \"" << name << "\"");
287  timer->enable ();
288  }
289 
290  void
292  {
293  typedef std::map<std::string, RCP<Time> > map_type;
294  typedef map_type::iterator iter_type;
295  map_type& ctrs = counters ();
296 
297  // In debug mode, loop first to check whether any of the timers
298  // are running, before resetting them. This ensures that this
299  // method satisfies the strong exception guarantee (either it
300  // completes normally, or there are no side effects).
301 #ifdef TEUCHOS_DEBUG
302  for (iter_type it = ctrs.begin(); it != ctrs.end(); ++it) {
303  // We throw a runtime_error rather than a logic_error, because
304  // logic_error suggests a bug in the implementation of
305  // TimeMonitor. Calling zeroOutTimers() when a timer is running
306  // is not TimeMonitor's fault.
308  it->second->isRunning (), std::runtime_error,
309  "Timer \"" << it->second->name () << "\" is currently running. "
310  "You are not allowed to reset running timers.");
311  }
312 #endif // TEUCHOS_DEBUG
313 
314  for (iter_type it = ctrs.begin(); it != ctrs.end(); ++it) {
315  it->second->reset ();
316  }
317  }
318 
319  // An anonymous namespace is the standard way of limiting linkage of
320  // its contained routines to file scope.
321  namespace {
322  // \brief Return an "empty" local timer datum.
323  //
324  // "Empty" means the datum has zero elapsed time and zero call
325  // count. This function does not actually create a timer.
326  //
327  // \param name The timer's name.
328  std::pair<std::string, std::pair<double, int> >
329  makeEmptyTimerDatum (const std::string& name)
330  {
331  return std::make_pair (name, std::make_pair (double(0), int(0)));
332  }
333 
334  // \fn collectLocalTimerData
335  // \brief Collect and sort local timer data by timer names.
336  //
337  // \param localData [out] Map whose keys are the timer names, and
338  // whose value for each key is the total elapsed time (in
339  // seconds) and the call count for the timer with that name.
340  //
341  // \param localCounters [in] Timers from which to extract data.
342  //
343  // \param filter [in] Filter for timer labels. If filter is not
344  // empty, this method will only collect data for local timers
345  // whose labels begin with this string.
346  //
347  // Extract the total elapsed time and call count from each timer
348  // in the given array. Merge results for timers with duplicate
349  // labels, by summing their total elapsed times and call counts
350  // pairwise.
351  void
352  collectLocalTimerData (timer_map_t& localData,
353  const std::map<std::string, RCP<Time> >& localCounters,
354  const std::string& filter="")
355  {
356  using std::make_pair;
357  typedef timer_map_t::iterator iter_t;
358 
359  timer_map_t theLocalData;
360  for (std::map<std::string, RCP<Time> >::const_iterator it = localCounters.begin();
361  it != localCounters.end(); ++it) {
362  const std::string& name = it->second->name ();
363 
364  // Filter current timer name, if provided filter is nonempty.
365  // Filter string must _start_ the timer label, not just be in it.
366  const bool skipThisOne = (filter != "" && name.find (filter) != 0);
367  if (! skipThisOne) {
368  const double timing = it->second->totalElapsedTime ();
369  const int numCalls = it->second->numCalls ();
370 
371  // Merge timers with duplicate labels, by summing their
372  // total elapsed times and call counts.
373  iter_t loc = theLocalData.find (name);
374  if (loc == theLocalData.end()) {
375  // Use loc as an insertion location hint.
376  theLocalData.insert (loc, make_pair (name, make_pair (timing, numCalls)));
377  }
378  else {
379  loc->second.first += timing;
380  loc->second.second += numCalls;
381  }
382  }
383  }
384  // This avoids copying the map, and also makes this method
385  // satisfy the strong exception guarantee.
386  localData.swap (theLocalData);
387  }
388 
389  // \brief Locally filter out timer data with zero call counts.
390  //
391  // \param timerData [in/out]
392  void
393  filterZeroData (timer_map_t& timerData)
394  {
395  // FIXME (mfh 15 Mar 2013) Should use std::map::erase with
396  // iterator hint, instead of rebuilding the map completely.
397  timer_map_t newTimerData;
398  for (timer_map_t::const_iterator it = timerData.begin();
399  it != timerData.end(); ++it) {
400  if (it->second.second > 0) {
401  newTimerData[it->first] = it->second;
402  }
403  }
404  timerData.swap (newTimerData);
405  }
406 
428  void
429  collectLocalTimerDataAndNames (timer_map_t& localTimerData,
430  Array<std::string>& localTimerNames,
431  const std::map<std::string, RCP<Time> >& localTimers,
432  const bool writeZeroTimers,
433  const std::string& filter="")
434  {
435  // Collect and sort local timer data by timer names.
436  collectLocalTimerData (localTimerData, localTimers, filter);
437 
438  // Filter out zero data locally first. This ensures that if we
439  // are writing global stats, and if a timer name exists in the
440  // set of global names, then that timer has a nonzero call count
441  // on at least one MPI process.
442  if (! writeZeroTimers) {
443  filterZeroData (localTimerData);
444  }
445 
446  // Extract the set of local timer names. The std::map keeps
447  // them sorted alphabetically.
448  localTimerNames.reserve (localTimerData.size());
449  for (timer_map_t::const_iterator it = localTimerData.begin();
450  it != localTimerData.end(); ++it) {
451  localTimerNames.push_back (it->first);
452  }
453  }
454 
489  void
490  collectGlobalTimerData (timer_map_t& globalTimerData,
491  Array<std::string>& globalTimerNames,
492  timer_map_t& localTimerData,
493  Array<std::string>& localTimerNames,
494  Ptr<const Comm<int> > comm,
495  const bool alwaysWriteLocal,
496  const ECounterSetOp setOp)
497  {
498  // There may be some global timers that are not local timers on
499  // the calling MPI process(es). In that case, if
500  // alwaysWriteLocal is true, then we need to fill in the
501  // "missing" local timers. That will ensure that both global
502  // and local timer columns in the output table have the same
503  // number of rows. The collectLocalTimerDataAndNames() method
504  // may have already filtered out local timers with zero call
505  // counts (if its writeZeroTimers argument was false), but we
506  // won't be filtering again. Thus, any local timer data we
507  // insert here won't get filtered out.
508  //
509  // Note that calling summarize() with writeZeroTimers == false
510  // will still do what it says, even if we insert local timers
511  // with zero call counts here.
512 
513  // This does the correct and inexpensive thing (just copies the
514  // timer data) if numProcs == 1. Otherwise, it initiates a
515  // communication with \f$O(\log P)\f$ messages along the
516  // critical path, where \f$P\f$ is the number of participating
517  // processes.
518  mergeCounterNames (*comm, localTimerNames, globalTimerNames, setOp);
519 
520 #ifdef TEUCHOS_DEBUG
521  {
522  // Sanity check that all processes have the name number of
523  // global timer names.
524  const timer_map_t::size_type myNumGlobalNames = globalTimerNames.size();
525  timer_map_t::size_type minNumGlobalNames = 0;
526  timer_map_t::size_type maxNumGlobalNames = 0;
527  reduceAll (*comm, REDUCE_MIN, myNumGlobalNames,
528  outArg (minNumGlobalNames));
529  reduceAll (*comm, REDUCE_MAX, myNumGlobalNames,
530  outArg (maxNumGlobalNames));
531  TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalNames != maxNumGlobalNames,
532  std::logic_error, "Min # global timer names = " << minNumGlobalNames
533  << " != max # global timer names = " << maxNumGlobalNames
534  << ". Please report this bug to the Teuchos developers.");
535  TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalNames != minNumGlobalNames,
536  std::logic_error, "My # global timer names = " << myNumGlobalNames
537  << " != min # global timer names = " << minNumGlobalNames
538  << ". Please report this bug to the Teuchos developers.");
539  }
540 #endif // TEUCHOS_DEBUG
541 
542  // mergeCounterNames() just merges the counters' names, not
543  // their actual data. Now we need to fill globalTimerData with
544  // this process' timer data for the timers in globalTimerNames.
545  //
546  // All processes need the full list of global timers, since
547  // there may be some global timers that are not local timers.
548  // That's why mergeCounterNames() has to be an all-reduce, not
549  // just a reduction to Proc 0.
550  //
551  // Insertion optimization: if the iterator given to map::insert
552  // points right before where we want to insert, insertion is
553  // O(1). globalTimerNames is sorted, so feeding the iterator
554  // output of map::insert into the next invocation's input should
555  // make the whole insertion O(N) where N is the number of
556  // entries in globalTimerNames.
557  timer_map_t::iterator globalMapIter = globalTimerData.begin();
558  timer_map_t::iterator localMapIter;
559  for (Array<string>::const_iterator it = globalTimerNames.begin();
560  it != globalTimerNames.end(); ++it) {
561  const std::string& globalName = *it;
562  localMapIter = localTimerData.find (globalName);
563 
564  if (localMapIter == localTimerData.end()) {
565  if (alwaysWriteLocal) {
566  // If there are some global timers that are not local
567  // timers, and if we want to print local timers, we insert
568  // a local timer datum with zero elapsed time and zero
569  // call count into localTimerData as well. This will
570  // ensure that both global and local timer columns in the
571  // output table have the same number of rows.
572  //
573  // We really only need to do this on Proc 0, which is the
574  // only process that currently may print local timers.
575  // However, we do it on all processes, just in case
576  // someone later wants to modify this function to print
577  // out local timer data for some process other than Proc
578  // 0. This extra computation won't affect the cost along
579  // the critical path, for future computations in which
580  // Proc 0 participates.
581  localMapIter = localTimerData.insert (localMapIter, makeEmptyTimerDatum (globalName));
582 
583  // Make sure the missing global name gets added to the
584  // list of local names. We'll re-sort the list of local
585  // names below.
586  localTimerNames.push_back (globalName);
587  }
588  // There's a global timer that's not a local timer. Add it
589  // to our pre-merge version of the global timer data so that
590  // we can safely merge the global timer data later.
591  globalMapIter = globalTimerData.insert (globalMapIter, makeEmptyTimerDatum (globalName));
592  }
593  else {
594  // We have this global timer name in our local timer list.
595  // Fill in our pre-merge version of the global timer data
596  // with our local data.
597  globalMapIter = globalTimerData.insert (globalMapIter, std::make_pair (globalName, localMapIter->second));
598  }
599  }
600 
601  if (alwaysWriteLocal) {
602  // Re-sort the list of local timer names, since we may have
603  // inserted "missing" names above.
604  std::sort (localTimerNames.begin(), localTimerNames.end());
605  }
606 
607 #ifdef TEUCHOS_DEBUG
608  {
609  // Sanity check that all processes have the name number of
610  // global timers.
611  const timer_map_t::size_type myNumGlobalTimers = globalTimerData.size();
612  timer_map_t::size_type minNumGlobalTimers = 0;
613  timer_map_t::size_type maxNumGlobalTimers = 0;
614  reduceAll (*comm, REDUCE_MIN, myNumGlobalTimers,
615  outArg (minNumGlobalTimers));
616  reduceAll (*comm, REDUCE_MAX, myNumGlobalTimers,
617  outArg (maxNumGlobalTimers));
618  TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalTimers != maxNumGlobalTimers,
619  std::logic_error, "Min # global timers = " << minNumGlobalTimers
620  << " != max # global timers = " << maxNumGlobalTimers
621  << ". Please report this bug to the Teuchos developers.");
622  TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalTimers != minNumGlobalTimers,
623  std::logic_error, "My # global timers = " << myNumGlobalTimers
624  << " != min # global timers = " << minNumGlobalTimers
625  << ". Please report this bug to the Teuchos developers.");
626  }
627 #endif // TEUCHOS_DEBUG
628  }
629 
676  void
677  computeGlobalTimerStats (stat_map_type& statData,
678  std::vector<std::string>& statNames,
679  Ptr<const Comm<int> > comm,
680  const timer_map_t& globalTimerData,
681  const bool ignoreZeroTimers)
682  {
683  using Teuchos::ScalarTraits;
684 
685  const int numTimers = static_cast<int> (globalTimerData.size());
686  const int numProcs = comm->getSize();
687 
688  // Extract pre-reduction timings and call counts into a
689  // sequential array. This array will be in the same order as
690  // the global timer names are in the map.
691  Array<std::pair<double, int> > timingsAndCallCounts;
692  timingsAndCallCounts.reserve (numTimers);
693  for (timer_map_t::const_iterator it = globalTimerData.begin();
694  it != globalTimerData.end(); ++it) {
695  timingsAndCallCounts.push_back (it->second);
696  }
697 
698  // For each timer name, compute the min timing and its
699  // corresponding call count. If two processes have the same
700  // timing but different call counts, the minimum call count will
701  // be used.
702  Array<std::pair<double, int> > minTimingsAndCallCounts (numTimers);
703  if (numTimers > 0) {
704  if (ignoreZeroTimers)
705  reduceAll (*comm, MinLocNonzero<int, double, int>(), numTimers,
706  &timingsAndCallCounts[0], &minTimingsAndCallCounts[0]);
707  else
708  reduceAll (*comm, MinLoc<int, double, int>(), numTimers,
709  &timingsAndCallCounts[0], &minTimingsAndCallCounts[0]);
710  }
711 
712  // For each timer name, compute the max timing and its
713  // corresponding call count. If two processes have the same
714  // timing but different call counts, the minimum call count will
715  // be used.
716  Array<std::pair<double, int> > maxTimingsAndCallCounts (numTimers);
717  if (numTimers > 0) {
718  reduceAll (*comm, MaxLoc<int, double, int>(), numTimers,
719  &timingsAndCallCounts[0], &maxTimingsAndCallCounts[0]);
720  }
721 
722  // For each timer name, compute the mean-over-processes timing,
723  // the mean call count, and the mean-over-call-counts timing.
724  // The mean call count is reported as a double to allow a
725  // fractional value.
726  //
727  // Each local timing is really the total timing over all local
728  // invocations. The number of local invocations is the call
729  // count. Thus, the mean-over-call-counts timing is the sum of
730  // all the timings (over all processes), divided by the sum of
731  // all the call counts (over all processes). We compute it in a
732  // different way to over unnecessary overflow.
733  Array<double> meanOverCallCountsTimings (numTimers);
734  Array<double> meanOverProcsTimings (numTimers);
735  Array<double> meanCallCounts (numTimers);
736  Array<int> ICallThisTimer (numTimers);
737  Array<int> numProcsCallingEachTimer (numTimers);
738  {
739  // Figure out how many processors actually call each timer.
740  if (ignoreZeroTimers) {
741  for (int k = 0; k < numTimers; ++k) {
742  const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
743  if (callCount > 0) ICallThisTimer[k] = 1;
744  else ICallThisTimer[k] = 0;
745  }
746  if (numTimers > 0) {
747  reduceAll (*comm, REDUCE_SUM, numTimers, &ICallThisTimer[0],
748  &numProcsCallingEachTimer[0]);
749  }
750  }
751 
752  // When summing, first scale by the number of processes. This
753  // avoids unnecessary overflow, and also gives us the mean
754  // call count automatically.
755  Array<double> scaledTimings (numTimers);
756  Array<double> scaledCallCounts (numTimers);
757  const double P = static_cast<double> (numProcs);
758 
759  if (ignoreZeroTimers) {
760  for (int k = 0; k < numTimers; ++k) {
761  const double timing = timingsAndCallCounts[k].first;
762  const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
763 
764  scaledTimings[k] = timing / numProcsCallingEachTimer[k];
765  scaledCallCounts[k] = callCount / numProcsCallingEachTimer[k];
766  }
767  }
768  else {
769  for (int k = 0; k < numTimers; ++k) {
770  const double timing = timingsAndCallCounts[k].first;
771  const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
772 
773  scaledTimings[k] = timing / P;
774  scaledCallCounts[k] = callCount / P;
775  }
776  }
777 
778  if (numTimers > 0) {
779  reduceAll (*comm, REDUCE_SUM, numTimers, &scaledTimings[0],
780  &meanOverProcsTimings[0]);
781  reduceAll (*comm, REDUCE_SUM, numTimers, &scaledCallCounts[0],
782  &meanCallCounts[0]);
783  }
784  // We don't have to undo the scaling for the mean timings;
785  // just divide by the scaled call count.
786  for (int k = 0; k < numTimers; ++k) {
787  if (meanCallCounts[k] > ScalarTraits<double>::zero ()) {
788  meanOverCallCountsTimings[k] = meanOverProcsTimings[k] / meanCallCounts[k];
789  }
790  else {
791  meanOverCallCountsTimings[k] = ScalarTraits<double>::zero ();
792  }
793  }
794  }
795 
796  // Reformat the data into the map of statistics. Be sure that
797  // each value (the std::vector of (timing, call count) pairs,
798  // each entry of which is a different statistic) preserves the
799  // order of statNames.
800  statNames.resize (4);
801  statNames[0] = "MinOverProcs";
802  statNames[1] = "MeanOverProcs";
803  statNames[2] = "MaxOverProcs";
804  statNames[3] = "MeanOverCallCounts";
805 
806  stat_map_type::iterator statIter = statData.end();
807  timer_map_t::const_iterator it = globalTimerData.begin();
808  for (int k = 0; it != globalTimerData.end(); ++k, ++it) {
809  std::vector<std::pair<double, double> > curData (4);
810  curData[0] = minTimingsAndCallCounts[k];
811  curData[1] = std::make_pair (meanOverProcsTimings[k], meanCallCounts[k]);
812  curData[2] = maxTimingsAndCallCounts[k];
813  curData[3] = std::make_pair (meanOverCallCountsTimings[k], meanCallCounts[k]);
814 
815  // statIter gives an insertion location hint that makes each
816  // insertion O(1), since we remember the location of the last
817  // insertion.
818  statIter = statData.insert (statIter, std::make_pair (it->first, curData));
819  }
820  }
821 
822 
839  RCP<const Comm<int> >
840  getDefaultComm ()
841  {
842  // The default communicator. If Trilinos was built with MPI
843  // enabled, this should be MPI_COMM_WORLD. (If MPI has not yet
844  // been initialized, it's not valid to use the communicator!)
845  // Otherwise, this should be a "serial" (no MPI, one "process")
846  // communicator.
847  RCP<const Comm<int> > comm = DefaultComm<int>::getComm ();
848 
849 #ifdef HAVE_MPI
850  {
851  int mpiHasBeenStarted = 0;
852  MPI_Initialized (&mpiHasBeenStarted);
853  if (! mpiHasBeenStarted) {
854  // Make pComm a new "serial communicator."
855  comm = rcp_implicit_cast<const Comm<int> > (rcp (new SerialComm<int> ()));
856  }
857  }
858 #endif // HAVE_MPI
859  return comm;
860  }
861 
862  } // namespace (anonymous)
863 
864 
865  void
867  std::vector<std::string>& statNames,
868  Ptr<const Comm<int> > comm,
869  const ECounterSetOp setOp,
870  const std::string& filter)
871  {
872  // Collect local timer data and names. Filter out timers with
873  // zero call counts if writeZeroTimers is false. Also, apply the
874  // timer label filter at this point, so we don't have to compute
875  // statistics on timers we don't want to display anyway.
876  timer_map_t localTimerData;
877  Array<std::string> localTimerNames;
878  const bool writeZeroTimers = false;
879  collectLocalTimerDataAndNames (localTimerData, localTimerNames,
880  counters(), writeZeroTimers, filter);
881  // Merge the local timer data and names into global timer data and
882  // names.
883  timer_map_t globalTimerData;
884  Array<std::string> globalTimerNames;
885  const bool alwaysWriteLocal = false;
886  collectGlobalTimerData (globalTimerData, globalTimerNames,
887  localTimerData, localTimerNames,
888  comm, alwaysWriteLocal, setOp);
889  // Compute statistics on the data.
890  computeGlobalTimerStats (statData, statNames, comm, globalTimerData, false);
891  }
892 
893 
894  void
896  std::ostream& out,
897  const bool alwaysWriteLocal,
898  const bool writeGlobalStats,
899  const bool writeZeroTimers,
900  const ECounterSetOp setOp,
901  const std::string& filter,
902  const bool ignoreZeroTimers)
903  {
904  //
905  // We can't just call computeGlobalTimerStatistics(), since
906  // summarize() has different options that affect whether global
907  // statistics are computed and printed.
908  //
909  const int numProcs = comm->getSize();
910  const int myRank = comm->getRank();
911 
912  // Collect local timer data and names. Filter out timers with
913  // zero call counts if writeZeroTimers is false. Also, apply the
914  // timer label filter at this point, so we don't have to compute
915  // statistics on timers we don't want to display anyway.
916  timer_map_t localTimerData;
917  Array<std::string> localTimerNames;
918  collectLocalTimerDataAndNames (localTimerData, localTimerNames,
919  counters(), writeZeroTimers, filter);
920 
921  // If we're computing global statistics, merge the local timer
922  // data and names into global timer data and names, and compute
923  // global timer statistics. Otherwise, leave the global data
924  // empty.
925  timer_map_t globalTimerData;
926  Array<std::string> globalTimerNames;
927  stat_map_type statData;
928  std::vector<std::string> statNames;
929  if (writeGlobalStats) {
930  collectGlobalTimerData (globalTimerData, globalTimerNames,
931  localTimerData, localTimerNames,
932  comm, alwaysWriteLocal, setOp);
933  // Compute statistics on the data, but only if the communicator
934  // contains more than one process. Otherwise, statistics don't
935  // make sense and we don't print them (see below).
936  if (numProcs > 1) {
937  computeGlobalTimerStats (statData, statNames, comm, globalTimerData, ignoreZeroTimers);
938  }
939  }
940 
941  // Precision of floating-point numbers in the table.
942  const int precision = format().precision();
943  const std::ios_base::fmtflags& flags = out.flags();
944 
945  // All columns of the table, in order.
946  Array<TableColumn> tableColumns;
947 
948  // Labels of all the columns of the table.
949  // We will append to this when we add each column.
950  Array<std::string> titles;
951 
952  // Widths (in number of characters) of each column.
953  // We will append to this when we add each column.
954  Array<int> columnWidths;
955 
956  // Table column containing all timer names. If writeGlobalStats
957  // is true, we use the global timer names, otherwise we use the
958  // local timer names. We build the table on all processes
959  // redundantly, but only print on Rank 0.
960  {
961  titles.append ("Timer Name");
962 
963  // The column labels depend on whether we are computing global statistics.
964  TableColumn nameCol (writeGlobalStats ? globalTimerNames : localTimerNames);
965  tableColumns.append (nameCol);
966 
967  // Each column is as wide as it needs to be to hold both its
968  // title and all of the column data. This column's title is the
969  // current last entry of the titles array.
970  columnWidths.append (format().computeRequiredColumnWidth (titles.back(), nameCol));
971  }
972 
973  // Table column containing local timer stats, if applicable. We
974  // only write local stats if asked, only on MPI Proc 0, and only
975  // if there is more than one MPI process in the communicator
976  // (otherwise local stats == global stats, so we just print the
977  // global stats). In this case, we've padded the local data on
978  // Proc 0 if necessary to match the global timer list, so that the
979  // columns have the same number of rows.
980  if (alwaysWriteLocal && numProcs > 1 && myRank == 0) {
981  titles.append ("Local time (num calls)");
982 
983  // Copy local timer data out of the array-of-structs into
984  // separate arrays, for display in the table.
985  Array<double> localTimings;
986  Array<double> localNumCalls;
987  for (timer_map_t::const_iterator it = localTimerData.begin();
988  it != localTimerData.end(); ++it) {
989  localTimings.push_back (it->second.first);
990  localNumCalls.push_back (static_cast<double> (it->second.second));
991  }
992  TableColumn timeAndCalls (localTimings, localNumCalls, precision, flags, true);
993  tableColumns.append (timeAndCalls);
994  columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
995  }
996 
997  if (writeGlobalStats) {
998  // If there's only 1 process in the communicator, don't display
999  // statistics; statistics don't make sense in that case. Just
1000  // display the timings and call counts. If there's more than 1
1001  // process, do display statistics.
1002  if (numProcs == 1) {
1003  // Extract timings and the call counts from globalTimerData.
1004  Array<double> globalTimings;
1005  Array<double> globalNumCalls;
1006  for (timer_map_t::const_iterator it = globalTimerData.begin();
1007  it != globalTimerData.end(); ++it) {
1008  globalTimings.push_back (it->second.first);
1009  globalNumCalls.push_back (static_cast<double> (it->second.second));
1010  }
1011  // Print the table column.
1012  titles.append ("Global time (num calls)");
1013  TableColumn timeAndCalls (globalTimings, globalNumCalls, precision, flags, true);
1014  tableColumns.append (timeAndCalls);
1015  columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
1016  }
1017  else { // numProcs > 1
1018  // Print a table column for each statistic. statNames and
1019  // each value in statData use the same ordering, so we can
1020  // iterate over valid indices of statNames to display the
1021  // statistics in the right order.
1022  const timer_map_t::size_type numGlobalTimers = globalTimerData.size();
1023  for (std::vector<std::string>::size_type statInd = 0; statInd < statNames.size(); ++statInd) {
1024  // Extract lists of timings and their call counts for the
1025  // current statistic.
1026  Array<double> statTimings (numGlobalTimers);
1027  Array<double> statCallCounts (numGlobalTimers);
1028  stat_map_type::const_iterator it = statData.begin();
1029  for (int k = 0; it != statData.end(); ++it, ++k) {
1030  statTimings[k] = (it->second[statInd]).first;
1031  statCallCounts[k] = (it->second[statInd]).second;
1032  }
1033  // Print the table column.
1034  const std::string& statisticName = statNames[statInd];
1035  const std::string titleString = statisticName;
1036  titles.append (titleString);
1037  TableColumn timeAndCalls (statTimings, statCallCounts, precision, flags, true);
1038  tableColumns.append (timeAndCalls);
1039  columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
1040  }
1041  }
1042  }
1043 
1044  // Print the whole table to the given output stream on MPI Rank 0.
1045  format().setColumnWidths (columnWidths);
1046  if (myRank == 0) {
1047  std::ostringstream theTitle;
1048  theTitle << "TimeMonitor results over " << numProcs << " processor"
1049  << (numProcs > 1 ? "s" : "");
1050  format().writeWholeTable (out, theTitle.str(), titles, tableColumns);
1051  }
1052  }
1053 
1054  void
1055  TimeMonitor::summarize (std::ostream &out,
1056  const bool alwaysWriteLocal,
1057  const bool writeGlobalStats,
1058  const bool writeZeroTimers,
1059  const ECounterSetOp setOp,
1060  const std::string& filter,
1061  const bool ignoreZeroTimers)
1062  {
1063  // The default communicator. If Trilinos was built with MPI
1064  // enabled, this should be MPI_COMM_WORLD. Otherwise, this should
1065  // be a "serial" (no MPI, one "process") communicator.
1066  RCP<const Comm<int> > comm = getDefaultComm();
1067 
1068  summarize (comm.ptr(), out, alwaysWriteLocal,
1069  writeGlobalStats, writeZeroTimers, setOp, filter, ignoreZeroTimers);
1070  }
1071 
1072  void
1074  std::vector<std::string>& statNames,
1075  const ECounterSetOp setOp,
1076  const std::string& filter)
1077  {
1078  // The default communicator. If Trilinos was built with MPI
1079  // enabled, this should be MPI_COMM_WORLD. Otherwise, this should
1080  // be a "serial" (no MPI, one "process") communicator.
1081  RCP<const Comm<int> > comm = getDefaultComm();
1082 
1083  computeGlobalTimerStatistics (statData, statNames, comm.ptr(), setOp, filter);
1084  }
1085 
1086  SyncTimeMonitor::SyncTimeMonitor(Time& timer, Ptr<const Comm<int> > comm, bool reset)
1087  : TimeMonitor(timer, reset), comm_(comm)
1088  { }
1089 
1091  comm_->barrier();
1092  }
1093 
1094 
1095  namespace {
1119  std::string
1120  quoteLabelForYaml (const std::string& label)
1121  {
1122  // YAML allows empty keys in key: value pairs. See Section 7.2
1123  // of the YAML 1.2 spec. We thus let an empty label pass
1124  // through without quoting or other special treatment.
1125  if (label.empty ()) {
1126  return label;
1127  }
1128 
1129  // Check whether the label is already quoted. If so, we don't
1130  // need to quote it again. However, we do need to quote any
1131  // quote symbols in the string inside the outer quotes.
1132  const bool alreadyQuoted = label.size () >= 2 &&
1133  label[0] == '"' && label[label.size() - 1] == '"';
1134 
1135  // We need to quote if there are any colons or (inner) quotes in
1136  // the string. We'll determine this as we read through the
1137  // string and escape any characters that need escaping.
1138  bool needToQuote = false;
1139 
1140  std::string out; // To fill with the return value
1141  out.reserve (label.size ());
1142 
1143  const size_t startPos = alreadyQuoted ? 1 : 0;
1144  const size_t endPos = alreadyQuoted ? label.size () - 1 : label.size ();
1145  for (size_t i = startPos; i < endPos; ++i) {
1146  const char c = label[i];
1147  if (c == '"' || c == '\\') {
1148  out.push_back ('\\'); // Escape the quote or backslash.
1149  needToQuote = true;
1150  }
1151  else if (c == ':') {
1152  needToQuote = true;
1153  }
1154  out.push_back (c);
1155  }
1156 
1157  if (needToQuote || alreadyQuoted) {
1158  // If the input string was already quoted, then out doesn't
1159  // include its quotes, so we have to add them back in.
1160  return "\"" + out + "\"";
1161  }
1162  else {
1163  return out;
1164  }
1165  }
1166 
1167  } // namespace (anonymous)
1168 
1169 
1170  void TimeMonitor::
1171  summarizeToYaml (Ptr<const Comm<int> > comm,
1172  std::ostream &out,
1173  const ETimeMonitorYamlFormat yamlStyle,
1174  const std::string& filter)
1175  {
1176  using Teuchos::FancyOStream;
1177  using Teuchos::fancyOStream;
1178  using Teuchos::getFancyOStream;
1179  using Teuchos::OSTab;
1180  using Teuchos::RCP;
1181  using Teuchos::rcpFromRef;
1182  using std::endl;
1183  typedef std::vector<std::string>::size_type size_type;
1184 
1185  const bool compact = (yamlStyle == YAML_FORMAT_COMPACT);
1186 
1187  // const bool writeGlobalStats = true;
1188  // const bool writeZeroTimers = true;
1189  // const bool alwaysWriteLocal = false;
1190  const ECounterSetOp setOp = Intersection;
1191 
1192  stat_map_type statData;
1193  std::vector<std::string> statNames;
1194  computeGlobalTimerStatistics (statData, statNames, comm, setOp, filter);
1195 
1196  const int numProcs = comm->getSize();
1197 
1198  // HACK (mfh 20 Aug 2012) For some reason, creating OSTab with "-
1199  // " as the line prefix does not work, else I would prefer that
1200  // method for printing each line of a YAML block sequence (see
1201  // Section 8.2.1 of the YAML 1.2 spec).
1202  //
1203  // Also, I have to set the tab indent string here, rather than in
1204  // OSTab's constructor. This is because line prefix (which for
1205  // some reason is what OSTab's constructor takes, rather than tab
1206  // indent string) means something different from tab indent
1207  // string, and turning on the line prefix prints all sorts of
1208  // things including "|" for some reason.
1209  RCP<FancyOStream> pfout = getFancyOStream (rcpFromRef (out));
1210  pfout->setTabIndentStr (" ");
1211  FancyOStream& fout = *pfout;
1212 
1213  fout << "# Teuchos::TimeMonitor report" << endl
1214  << "---" << endl;
1215 
1216  // mfh 19 Aug 2012: An important goal of our chosen output format
1217  // was to minimize the nesting depth. We have managed to keep the
1218  // nesting depth to 3, which is the limit that the current version
1219  // of PylotDB imposes for its YAML input.
1220 
1221  // Outermost level is a dictionary. (Individual entries of a
1222  // dictionary do _not_ begin with "- ".) We always print the
1223  // outermost level in standard style, not flow style, for better
1224  // readability. We begin the outermost level with metadata.
1225  fout << "Output mode: " << (compact ? "compact" : "spacious") << endl
1226  << "Number of processes: " << numProcs << endl
1227  << "Time unit: s" << endl;
1228  // For a key: value pair where the value is a sequence or
1229  // dictionary on the following line, YAML requires a space after
1230  // the colon.
1231  fout << "Statistics collected: ";
1232  // Print list of the names of all the statistics we collected.
1233  if (compact) {
1234  fout << " [";
1235  for (size_type i = 0; i < statNames.size (); ++i) {
1236  fout << quoteLabelForYaml (statNames[i]);
1237  if (i + 1 < statNames.size ()) {
1238  fout << ", ";
1239  }
1240  }
1241  fout << "]" << endl;
1242  }
1243  else {
1244  fout << endl;
1245  OSTab tab1 (pfout);
1246  for (size_type i = 0; i < statNames.size (); ++i) {
1247  fout << "- " << quoteLabelForYaml (statNames[i]) << endl;
1248  }
1249  }
1250 
1251  // Print the list of timer names.
1252  //
1253  // It might be nicer instead to print a map from timer name to all
1254  // of its data, but keeping the maximum nesting depth small
1255  // ensures better compatibility with different parsing tools.
1256  fout << "Timer names: ";
1257  if (compact) {
1258  fout << " [";
1259  size_type ind = 0;
1260  for (stat_map_type::const_iterator it = statData.begin();
1261  it != statData.end(); ++it, ++ind) {
1262  fout << quoteLabelForYaml (it->first);
1263  if (ind + 1 < statData.size ()) {
1264  fout << ", ";
1265  }
1266  }
1267  fout << "]" << endl;
1268  }
1269  else {
1270  fout << endl;
1271  OSTab tab1 (pfout);
1272  for (stat_map_type::const_iterator it = statData.begin();
1273  it != statData.end(); ++it) {
1274  fout << "- " << quoteLabelForYaml (it->first) << endl;
1275  }
1276  }
1277 
1278  // Print times for each timer, as a map from statistic name to its time.
1279  fout << "Total times: ";
1280  if (compact) {
1281  fout << " {";
1282  size_type outerInd = 0;
1283  for (stat_map_type::const_iterator outerIter = statData.begin();
1284  outerIter != statData.end(); ++outerIter, ++outerInd) {
1285  // Print timer name.
1286  fout << quoteLabelForYaml (outerIter->first) << ": ";
1287  // Print that timer's data.
1288  const std::vector<std::pair<double, double> >& curData = outerIter->second;
1289  fout << "{";
1290  for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1291  fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1292  << curData[innerInd].first;
1293  if (innerInd + 1 < curData.size ()) {
1294  fout << ", ";
1295  }
1296  }
1297  fout << "}";
1298  if (outerInd + 1 < statData.size ()) {
1299  fout << ", ";
1300  }
1301  }
1302  fout << "}" << endl;
1303  }
1304  else {
1305  fout << endl;
1306  OSTab tab1 (pfout);
1307  size_type outerInd = 0;
1308  for (stat_map_type::const_iterator outerIter = statData.begin();
1309  outerIter != statData.end(); ++outerIter, ++outerInd) {
1310  // Print timer name.
1311  fout << quoteLabelForYaml (outerIter->first) << ": " << endl;
1312  // Print that timer's data.
1313  OSTab tab2 (pfout);
1314  const std::vector<std::pair<double, double> >& curData = outerIter->second;
1315  for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1316  fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1317  << curData[innerInd].first << endl;
1318  }
1319  }
1320  }
1321 
1322  // Print call counts for each timer, for each statistic name.
1323  fout << "Call counts:";
1324  if (compact) {
1325  fout << " {";
1326  size_type outerInd = 0;
1327  for (stat_map_type::const_iterator outerIter = statData.begin();
1328  outerIter != statData.end(); ++outerIter, ++outerInd) {
1329  // Print timer name.
1330  fout << quoteLabelForYaml (outerIter->first) << ": ";
1331  // Print that timer's data.
1332  const std::vector<std::pair<double, double> >& curData = outerIter->second;
1333  fout << "{";
1334  for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1335  fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1336  << curData[innerInd].second;
1337  if (innerInd + 1 < curData.size ()) {
1338  fout << ", ";
1339  }
1340  }
1341  fout << "}";
1342  if (outerInd + 1 < statData.size ()) {
1343  fout << ", ";
1344  }
1345  }
1346  fout << "}" << endl;
1347  }
1348  else {
1349  fout << endl;
1350  OSTab tab1 (pfout);
1351  size_type outerInd = 0;
1352  for (stat_map_type::const_iterator outerIter = statData.begin();
1353  outerIter != statData.end(); ++outerIter, ++outerInd) {
1354  // Print timer name.
1355  fout << quoteLabelForYaml (outerIter->first) << ": " << endl;
1356  // Print that timer's data.
1357  OSTab tab2 (pfout);
1358  const std::vector<std::pair<double, double> >& curData = outerIter->second;
1359  for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1360  fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1361  << curData[innerInd].second << endl;
1362  }
1363  }
1364  }
1365  }
1366 
1367  void TimeMonitor::
1368  summarizeToYaml (std::ostream &out,
1369  const ETimeMonitorYamlFormat yamlStyle,
1370  const std::string& filter)
1371  {
1372  // The default communicator. If Trilinos was built with MPI
1373  // enabled, this should be MPI_COMM_WORLD. Otherwise, this should
1374  // be a "serial" (no MPI, one "process") communicator.
1375  RCP<const Comm<int> > comm = getDefaultComm ();
1376 
1377  summarizeToYaml (comm.ptr (), out, yamlStyle, filter);
1378  }
1379 
1380  // Default value is false. We'll set to true once
1381  // setReportParameters() completes successfully.
1382  bool TimeMonitor::setParams_ = false;
1383 
1384  // We have to declare all of these here in order to avoid linker errors.
1385  TimeMonitor::ETimeMonitorReportFormat TimeMonitor::reportFormat_ = TimeMonitor::REPORT_FORMAT_TABLE;
1386  TimeMonitor::ETimeMonitorYamlFormat TimeMonitor::yamlStyle_ = TimeMonitor::YAML_FORMAT_SPACIOUS;
1387  ECounterSetOp TimeMonitor::setOp_ = Intersection;
1388  bool TimeMonitor::alwaysWriteLocal_ = false;
1389  bool TimeMonitor::writeGlobalStats_ = true;
1390  bool TimeMonitor::writeZeroTimers_ = true;
1391 
1392  void
1393  TimeMonitor::setReportFormatParameter (ParameterList& plist)
1394  {
1395  const std::string name ("Report format");
1396  const std::string defaultValue ("Table");
1397  const std::string docString ("Output format for report of timer statistics");
1398  Array<std::string> strings;
1399  Array<std::string> docs;
1400  Array<ETimeMonitorReportFormat> values;
1401 
1402  strings.push_back ("YAML");
1403  docs.push_back ("YAML (see yaml.org) format");
1404  values.push_back (REPORT_FORMAT_YAML);
1405  strings.push_back ("Table");
1406  docs.push_back ("Tabular format via Teuchos::TableFormat");
1407  values.push_back (REPORT_FORMAT_TABLE);
1408 
1409  setStringToIntegralParameter<ETimeMonitorReportFormat> (name, defaultValue,
1410  docString,
1411  strings (), docs (),
1412  values (), &plist);
1413  }
1414 
1415  void
1416  TimeMonitor::setYamlFormatParameter (ParameterList& plist)
1417  {
1418  const std::string name ("YAML style");
1419  const std::string defaultValue ("spacious");
1420  const std::string docString ("YAML-specific output format");
1421  Array<std::string> strings;
1422  Array<std::string> docs;
1423  Array<ETimeMonitorYamlFormat> values;
1424 
1425  strings.push_back ("compact");
1426  docs.push_back ("Compact format: use \"flow style\" (see YAML 1.2 spec at "
1427  "yaml.org) for most sequences except the outermost sequence");
1428  values.push_back (YAML_FORMAT_COMPACT);
1429 
1430  strings.push_back ("spacious");
1431  docs.push_back ("Spacious format: avoid flow style");
1432  values.push_back (YAML_FORMAT_SPACIOUS);
1433 
1434  setStringToIntegralParameter<ETimeMonitorYamlFormat> (name, defaultValue,
1435  docString,
1436  strings (), docs (),
1437  values (), &plist);
1438  }
1439 
1440  void
1441  TimeMonitor::setSetOpParameter (ParameterList& plist)
1442  {
1443  const std::string name ("How to merge timer sets");
1444  const std::string defaultValue ("Intersection");
1445  const std::string docString ("How to merge differing sets of timers "
1446  "across processes");
1447  Array<std::string> strings;
1448  Array<std::string> docs;
1449  Array<ECounterSetOp> values;
1450 
1451  strings.push_back ("Intersection");
1452  docs.push_back ("Compute intersection of timer sets over processes");
1453  values.push_back (Intersection);
1454  strings.push_back ("Union");
1455  docs.push_back ("Compute union of timer sets over processes");
1456  values.push_back (Union);
1457 
1458  setStringToIntegralParameter<ECounterSetOp> (name, defaultValue, docString,
1459  strings (), docs (), values (),
1460  &plist);
1461  }
1462 
1463  void
1465  {
1466  stackedTimer_ = t;
1467  }
1468 
1471  {
1472  return stackedTimer_;
1473  }
1474 
1475  bool
1476  TimeMonitor::stackedTimerNameIsDefault() {
1477  return stackedTimer_.is_null() || (stackedTimer_->name() == defaultStackedTimerName);
1478  }
1479 
1480  RCP<const ParameterList>
1482  {
1483  // Our implementation favors recomputation over persistent
1484  // storage. That is, we simply recreate the list every time we
1485  // need it.
1486  RCP<ParameterList> plist = parameterList ("TimeMonitor::report");
1487 
1488  const bool alwaysWriteLocal = false;
1489  const bool writeGlobalStats = true;
1490  const bool writeZeroTimers = true;
1491 
1492  setReportFormatParameter (*plist);
1493  setYamlFormatParameter (*plist);
1494  setSetOpParameter (*plist);
1495  plist->set ("alwaysWriteLocal", alwaysWriteLocal,
1496  "Always output local timers' values on Proc 0");
1497  plist->set ("writeGlobalStats", writeGlobalStats, "Always output global "
1498  "statistics, even if there is only one process in the "
1499  "communicator");
1500  plist->set ("writeZeroTimers", writeZeroTimers, "Generate output for "
1501  "timers that have never been called");
1502 
1503  return rcp_const_cast<const ParameterList> (plist);
1504  }
1505 
1506  void
1507  TimeMonitor::setReportParameters (const RCP<ParameterList>& params)
1508  {
1509  ETimeMonitorReportFormat reportFormat = REPORT_FORMAT_TABLE;
1510  ETimeMonitorYamlFormat yamlStyle = YAML_FORMAT_SPACIOUS;
1511  ECounterSetOp setOp = Intersection;
1512  bool alwaysWriteLocal = false;
1513  bool writeGlobalStats = true;
1514  bool writeZeroTimers = true;
1515 
1516  if (params.is_null ()) {
1517  // If we've set parameters before, leave their current values.
1518  // Otherwise, set defaults (below).
1519  if (setParams_) {
1520  return;
1521  }
1522  }
1523  else { // params is nonnull. Let's read it!
1524  params->validateParametersAndSetDefaults (*getValidReportParameters ());
1525 
1526  reportFormat = getIntegralValue<ETimeMonitorReportFormat> (*params, "Report format");
1527  yamlStyle = getIntegralValue<ETimeMonitorYamlFormat> (*params, "YAML style");
1528  setOp = getIntegralValue<ECounterSetOp> (*params, "How to merge timer sets");
1529  alwaysWriteLocal = params->get<bool> ("alwaysWriteLocal");
1530  writeGlobalStats = params->get<bool> ("writeGlobalStats");
1531  writeZeroTimers = params->get<bool> ("writeZeroTimers");
1532  }
1533  // Defer setting state until here, to ensure the strong exception
1534  // guarantee for this method (either it throws with no externally
1535  // visible state changes, or it returns normally).
1536  reportFormat_ = reportFormat;
1537  yamlStyle_ = yamlStyle;
1538  setOp_ = setOp;
1539  alwaysWriteLocal_ = alwaysWriteLocal;
1540  writeGlobalStats_ = writeGlobalStats;
1541  writeZeroTimers_ = writeZeroTimers;
1542 
1543  setParams_ = true; // Yay, we successfully set parameters!
1544  }
1545 
1546  void
1548  std::ostream& out,
1549  const std::string& filter,
1550  const RCP<ParameterList>& params)
1551  {
1552  setReportParameters (params);
1553 
1554  if (reportFormat_ == REPORT_FORMAT_YAML) {
1555  summarizeToYaml (comm, out, yamlStyle_, filter);
1556  }
1557  else if (reportFormat_ == REPORT_FORMAT_TABLE) {
1558  summarize (comm, out, alwaysWriteLocal_, writeGlobalStats_,
1559  writeZeroTimers_, setOp_, filter);
1560  }
1561  else {
1562  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "TimeMonitor::report: "
1563  "Invalid report format. This should never happen; ParameterList "
1564  "validation should have caught this. Please report this bug to the "
1565  "Teuchos developers.");
1566  }
1567  }
1568 
1569  void
1571  std::ostream& out,
1572  const RCP<ParameterList>& params)
1573  {
1574  report (comm, out, "", params);
1575  }
1576 
1577  void
1578  TimeMonitor::report (std::ostream& out,
1579  const std::string& filter,
1580  const RCP<ParameterList>& params)
1581  {
1582  RCP<const Comm<int> > comm = getDefaultComm ();
1583  report (comm.ptr (), out, filter, params);
1584  }
1585 
1586  void
1587  TimeMonitor::report (std::ostream& out,
1588  const RCP<ParameterList>& params)
1589  {
1590  RCP<const Comm<int> > comm = getDefaultComm ();
1591  report (comm.ptr (), out, "", params);
1592  }
1593 
1594 } // namespace Teuchos
SyncTimeMonitor()=delete
Default constructor is deleted, since it would be unsafe.
static Teuchos::RCP< Teuchos::StackedTimer > getStackedTimer()
The StackedTimer used by the TimeMonitor.
Array< T > & append(const T &x)
Add a new entry at the end of the array.
std::map< std::string, std::vector< std::pair< double, double > > > stat_map_type
Global statistics collected from timer data.
basic_OSTab< char > OSTab
void setColumnWidths(const Array< int > &colWidths)
Set the column widths to be used for subsequent rows.
const Time & counter() const
Constant access to the instance&#39;s counter reference.
void writeWholeTable(std::ostream &out, const std::string &tableTitle, const Array< std::string > &columnNames, const Array< TableColumn > &columns) const
bool nonnull(const std::shared_ptr< T > &p)
Returns true if p.get()!=NULL.
static RCP< Time > lookupCounter(const std::string &name)
Return the first counter with the given name, or null if none.
basic_FancyOStream< char > FancyOStream
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
TimeMonitor()=delete
Default constructor is deleted, since it would be unsafe.
static void disableTimer(const std::string &name)
Disable the timer with the given name.
Teuchos version of MPI_MINLOC.
void reduce(const Ordinal count, const std::pair< ScalarType, IndexType > inBuffer[], std::pair< ScalarType, IndexType > inoutBuffer[]) const
T * get() const
Get the raw C++ pointer to the underlying object.
static Teuchos::RCP< const Comm< OrdinalType > > getComm()
Return the default global communicator.
static RCP< const ParameterList > getValidReportParameters()
Default parameters (with validators) for report().
This structure defines some basic traits for a scalar field type.
Teuchos version of MPI_MAXLOC.
Base interface class for user-defined reduction operations for objects that use value semantics...
static Teuchos::RCP< Teuchos::StackedTimer > stackedTimer_
Stacked timer for optional injection of timing from TimeMonitor-enabled objects.
void reduce(const Ordinal count, const std::pair< ScalarType, IndexType > inBuffer[], std::pair< ScalarType, IndexType > inoutBuffer[]) const
void start(bool reset=false)
Start the timer, if the timer is enabled (see disable()).
~TimeMonitor() override
Destructor: stops the timer.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Deprecated.
A column of TableEntry objects.
static void summarize(Ptr< const Comm< int > > comm, std::ostream &out=std::cout, const bool alwaysWriteLocal=false, const bool writeGlobalStats=true, const bool writeZeroTimers=true, const ECounterSetOp setOp=Intersection, const std::string &filter="", const bool ignoreZeroTimers=false)
Print summary statistics for all timers on the given communicator.
double stop()
Stop the timer, if the timer is enabled (see disable()).
Wall-clock timer.
Ptr< T > ptr() const
Get a safer wrapper raw C++ pointer to the underlying object.
int precision() const
Get the precision for writing doubles. Default is 4.
static void computeGlobalTimerStatistics(stat_map_type &statData, std::vector< std::string > &statNames, Ptr< const Comm< int > > comm, const ECounterSetOp setOp=Intersection, const std::string &filter="")
Compute global timer statistics for all timers on the given communicator.
static void setStackedTimer(const Teuchos::RCP< Teuchos::StackedTimer > &t)
Sets the StackedTimer into which the TimeMonitor will insert timings.
Provides utilities for formatting tabular output.
void reduce(const Ordinal count, const std::pair< ScalarType, IndexType > inBuffer[], std::pair< ScalarType, IndexType > inoutBuffer[]) const
void mergeCounterNames(const Comm< int > &comm, const Array< std::string > &localNames, Array< std::string > &globalNames, const ECounterSetOp setOp)
Merge counter names over all processors.
const std::string & name() const
The name of this timer.
A list of parameters of arbitrary type.
static std::map< std::string, RCP< Time > > & counters()
Array of all counters that were created with getNewCounter() on the calling (MPI) process...
reference back()
void push_back(const value_type &x)
bool isRecursiveCall() const
Whether we are currently in a recursive call of the counter.
size_type size() const
Defines basic traits for the scalar field type.
static T zero()
Returns representation of zero for this scalar type.
~SyncTimeMonitor() override
Destructor: stops the timer.
Scope guard for Teuchos::Time, with MPI collective timer reporting.
Smart reference counting pointer class for automatic garbage collection.
This class allows one to push and pop timers on and off a stack.
ECounterSetOp
Set operation type for mergeCounterNames() to perform.
Common capabilities for collecting and reporting performance data across processors.
same as MinLoc, but don&#39;t allow zero
Simple wrapper class for raw pointers to single objects where no persisting relationship exists...
static void enableTimer(const std::string &name)
Enable the timer with the given name.
static TableFormat & format()
Table format that will be used to print a summary of timer results.
Scope guard for Time, that can compute MPI collective timer statistics.
static void zeroOutTimers()
Reset all global timers to zero.
static void report(Ptr< const Comm< int > > comm, std::ostream &out, const std::string &filter, const RCP< ParameterList > &params=null)
Report timer statistics to the given output stream.
bool is_null() const
Returns true if the underlying pointer is null.