Teuchos - Trilinos Tools Package  Version of the Day
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Teuchos_TimeMonitor.cpp
1 // @HEADER
2 // *****************************************************************************
3 // Teuchos: Common Tools Package
4 //
5 // Copyright 2004 NTESS and the Teuchos contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #include "Teuchos_TimeMonitor.hpp"
11 #include "Teuchos_CommHelpers.hpp"
12 #include "Teuchos_DefaultComm.hpp"
13 #include "Teuchos_TableColumn.hpp"
14 #include "Teuchos_TableFormat.hpp"
15 #include "Teuchos_StandardParameterEntryValidators.hpp"
16 #include "Teuchos_ScalarTraits.hpp"
17 #include "Teuchos_StackedTimer.hpp"
18 
19 #include <functional>
20 #include <iomanip>
21 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
22 #include <sstream>
23 #endif
24 
25 namespace Teuchos {
78  template<class Ordinal, class ScalarType, class IndexType>
79  class MaxLoc :
80  public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
81  public:
82  void
83  reduce (const Ordinal count,
84  const std::pair<ScalarType, IndexType> inBuffer[],
85  std::pair<ScalarType, IndexType> inoutBuffer[]) const;
86  };
87 
88  template<class Ordinal>
89  class MaxLoc<Ordinal, double, int> :
90  public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
91  public:
92  void
93  reduce (const Ordinal count,
94  const std::pair<double, int> inBuffer[],
95  std::pair<double, int> inoutBuffer[]) const
96  {
97  for (Ordinal ind = 0; ind < count; ++ind) {
98  const std::pair<double, int>& in = inBuffer[ind];
99  std::pair<double, int>& inout = inoutBuffer[ind];
100 
101  if (in.first > inout.first) {
102  inout.first = in.first;
103  inout.second = in.second;
104  } else if (in.first < inout.first) {
105  // Don't need to do anything; inout has the values.
106  } else { // equal, or at least one is NaN.
107  inout.first = in.first;
108  inout.second = std::min (in.second, inout.second);
109  }
110  }
111  }
112  };
113 
140  template<class Ordinal, class ScalarType, class IndexType>
141  class MinLoc :
142  public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
143  public:
144  void
145  reduce (const Ordinal count,
146  const std::pair<ScalarType, IndexType> inBuffer[],
147  std::pair<ScalarType, IndexType> inoutBuffer[]) const;
148  };
149 
150  template<class Ordinal>
151  class MinLoc<Ordinal, double, int> :
152  public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
153  public:
154  void
155  reduce (const Ordinal count,
156  const std::pair<double, int> inBuffer[],
157  std::pair<double, int> inoutBuffer[]) const
158  {
159  for (Ordinal ind = 0; ind < count; ++ind) {
160  const std::pair<double, int>& in = inBuffer[ind];
161  std::pair<double, int>& inout = inoutBuffer[ind];
162 
163  if (in.first < inout.first) {
164  inout.first = in.first;
165  inout.second = in.second;
166  } else if (in.first > inout.first) {
167  // Don't need to do anything; inout has the values.
168  } else { // equal, or at least one is NaN.
169  inout.first = in.first;
170  inout.second = std::min (in.second, inout.second);
171  }
172  }
173  }
174  };
175 
179  template<class Ordinal, class ScalarType, class IndexType>
181  public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
182  public:
183  void
184  reduce (const Ordinal count,
185  const std::pair<ScalarType, IndexType> inBuffer[],
186  std::pair<ScalarType, IndexType> inoutBuffer[]) const;
187  };
188 
189  template<class Ordinal>
190  class MinLocNonzero<Ordinal, double, int> :
191  public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
192  public:
193  void
194  reduce (const Ordinal count,
195  const std::pair<double, int> inBuffer[],
196  std::pair<double, int> inoutBuffer[]) const
197  {
198  for (Ordinal ind = 0; ind < count; ++ind) {
199  const std::pair<double, int>& in = inBuffer[ind];
200  std::pair<double, int>& inout = inoutBuffer[ind];
201 
202  if ( (in.first < inout.first && in.first != 0) || (inout.first == 0 && in.first != 0) ) {
203  inout.first = in.first;
204  inout.second = in.second;
205  } else if (in.first > inout.first) {
206  // Don't need to do anything; inout has the values.
207  } else { // equal, or at least one is NaN.
208  inout.first = in.first;
209  inout.second = std::min (in.second, inout.second);
210  }
211  }
212  }
213  };
214 
215  // Typedef used internally by TimeMonitor::summarize() and its
216  // helper functions. The map is keyed on timer label (a string).
217  // Each value is a pair: (total number of seconds over all calls to
218  // that timer, total number of calls to that timer).
219  typedef std::map<std::string, std::pair<double, int> > timer_map_t;
220 
221  // static initialization
223 
224  TimeMonitor::TimeMonitor (Time& timer, bool reset)
225  : PerformanceMonitorBase<Time>(timer, reset)
226  {
227  if (!isRecursiveCall()) {
228  counter().start(reset);
229 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
230  if (nonnull(stackedTimer_))
231  stackedTimer_->start(counter().name(),false);
232 #endif
233  }
234  }
235 
237  if (!isRecursiveCall()) {
238  counter().stop();
239 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
240  try {
241  if (nonnull(stackedTimer_))
242  stackedTimer_->stop(counter().name(),false);
243  }
244  catch (std::runtime_error& e) {
245  std::ostringstream warning;
246  warning <<
247  "\n*********************************************************************\n"
248  "WARNING: Overlapping timers detected! Near: " <<counter().name()<<"\n"
249  "A TimeMonitor timer was stopped before a nested subtimer was\n"
250  "stopped. This is not allowed by the StackedTimer. This corner case\n"
251  "typically occurs if the TimeMonitor is stored in an RCP and the RCP is\n"
252  "assigned to a new timer. To disable this warning, either fix the\n"
253  "ordering of timer creation and destuction or disable the StackedTimer\n"
254  "support in the TimeMonitor by setting the StackedTimer to null\n"
255  "Example:\n"
256  " RCP<TimeMonitor> MM = rcp(new TimeMonitor(*(TimeMonitor::getNewTimer(\"Junk\"))));\n"
257  "///code to time \n"
258  "MM = Teuchos::null;\n"
259  "MM = rcp(new TimeMonitor(*(TimeMonitor::getNewTimer(\"SecondJunk\"))));\n"
260  "*********************************************************************\n";
261  std::cout << warning.str() << std::endl << e.what() << std::endl;
263  }
264 #endif
265  }
266  }
267 
268  void
269  TimeMonitor::disableTimer (const std::string& name)
270  {
271  RCP<Time> timer = lookupCounter (name);
273  timer == null, std::invalid_argument,
274  "TimeMonitor::disableTimer: Invalid timer \"" << name << "\"");
275  timer->disable ();
276  }
277 
278  void
279  TimeMonitor::enableTimer (const std::string& name)
280  {
281  RCP<Time> timer = lookupCounter (name);
283  timer == null, std::invalid_argument,
284  "TimeMonitor::enableTimer: Invalid timer \"" << name << "\"");
285  timer->enable ();
286  }
287 
288  void
290  {
291  typedef std::map<std::string, RCP<Time> > map_type;
292  typedef map_type::iterator iter_type;
293  map_type& ctrs = counters ();
294 
295  // In debug mode, loop first to check whether any of the timers
296  // are running, before resetting them. This ensures that this
297  // method satisfies the strong exception guarantee (either it
298  // completes normally, or there are no side effects).
299 #ifdef TEUCHOS_DEBUG
300  for (iter_type it = ctrs.begin(); it != ctrs.end(); ++it) {
301  // We throw a runtime_error rather than a logic_error, because
302  // logic_error suggests a bug in the implementation of
303  // TimeMonitor. Calling zeroOutTimers() when a timer is running
304  // is not TimeMonitor's fault.
306  it->second->isRunning (), std::runtime_error,
307  "Timer \"" << it->second->name () << "\" is currently running. "
308  "You are not allowed to reset running timers.");
309  }
310 #endif // TEUCHOS_DEBUG
311 
312  for (iter_type it = ctrs.begin(); it != ctrs.end(); ++it) {
313  it->second->reset ();
314  }
315  }
316 
317  // An anonymous namespace is the standard way of limiting linkage of
318  // its contained routines to file scope.
319  namespace {
320  // \brief Return an "empty" local timer datum.
321  //
322  // "Empty" means the datum has zero elapsed time and zero call
323  // count. This function does not actually create a timer.
324  //
325  // \param name The timer's name.
326  std::pair<std::string, std::pair<double, int> >
327  makeEmptyTimerDatum (const std::string& name)
328  {
329  return std::make_pair (name, std::make_pair (double(0), int(0)));
330  }
331 
332  // \fn collectLocalTimerData
333  // \brief Collect and sort local timer data by timer names.
334  //
335  // \param localData [out] Map whose keys are the timer names, and
336  // whose value for each key is the total elapsed time (in
337  // seconds) and the call count for the timer with that name.
338  //
339  // \param localCounters [in] Timers from which to extract data.
340  //
341  // \param filter [in] Filter for timer labels. If filter is not
342  // empty, this method will only collect data for local timers
343  // whose labels begin with this string.
344  //
345  // Extract the total elapsed time and call count from each timer
346  // in the given array. Merge results for timers with duplicate
347  // labels, by summing their total elapsed times and call counts
348  // pairwise.
349  void
350  collectLocalTimerData (timer_map_t& localData,
351  const std::map<std::string, RCP<Time> >& localCounters,
352  const std::string& filter="")
353  {
354  using std::make_pair;
355  typedef timer_map_t::iterator iter_t;
356 
357  timer_map_t theLocalData;
358  for (std::map<std::string, RCP<Time> >::const_iterator it = localCounters.begin();
359  it != localCounters.end(); ++it) {
360  const std::string& name = it->second->name ();
361 
362  // Filter current timer name, if provided filter is nonempty.
363  // Filter string must _start_ the timer label, not just be in it.
364  const bool skipThisOne = (filter != "" && name.find (filter) != 0);
365  if (! skipThisOne) {
366  const double timing = it->second->totalElapsedTime ();
367  const int numCalls = it->second->numCalls ();
368 
369  // Merge timers with duplicate labels, by summing their
370  // total elapsed times and call counts.
371  iter_t loc = theLocalData.find (name);
372  if (loc == theLocalData.end()) {
373  // Use loc as an insertion location hint.
374  theLocalData.insert (loc, make_pair (name, make_pair (timing, numCalls)));
375  }
376  else {
377  loc->second.first += timing;
378  loc->second.second += numCalls;
379  }
380  }
381  }
382  // This avoids copying the map, and also makes this method
383  // satisfy the strong exception guarantee.
384  localData.swap (theLocalData);
385  }
386 
387  // \brief Locally filter out timer data with zero call counts.
388  //
389  // \param timerData [in/out]
390  void
391  filterZeroData (timer_map_t& timerData)
392  {
393  // FIXME (mfh 15 Mar 2013) Should use std::map::erase with
394  // iterator hint, instead of rebuilding the map completely.
395  timer_map_t newTimerData;
396  for (timer_map_t::const_iterator it = timerData.begin();
397  it != timerData.end(); ++it) {
398  if (it->second.second > 0) {
399  newTimerData[it->first] = it->second;
400  }
401  }
402  timerData.swap (newTimerData);
403  }
404 
426  void
427  collectLocalTimerDataAndNames (timer_map_t& localTimerData,
428  Array<std::string>& localTimerNames,
429  const std::map<std::string, RCP<Time> >& localTimers,
430  const bool writeZeroTimers,
431  const std::string& filter="")
432  {
433  // Collect and sort local timer data by timer names.
434  collectLocalTimerData (localTimerData, localTimers, filter);
435 
436  // Filter out zero data locally first. This ensures that if we
437  // are writing global stats, and if a timer name exists in the
438  // set of global names, then that timer has a nonzero call count
439  // on at least one MPI process.
440  if (! writeZeroTimers) {
441  filterZeroData (localTimerData);
442  }
443 
444  // Extract the set of local timer names. The std::map keeps
445  // them sorted alphabetically.
446  localTimerNames.reserve (localTimerData.size());
447  for (timer_map_t::const_iterator it = localTimerData.begin();
448  it != localTimerData.end(); ++it) {
449  localTimerNames.push_back (it->first);
450  }
451  }
452 
487  void
488  collectGlobalTimerData (timer_map_t& globalTimerData,
489  Array<std::string>& globalTimerNames,
490  timer_map_t& localTimerData,
491  Array<std::string>& localTimerNames,
492  Ptr<const Comm<int> > comm,
493  const bool alwaysWriteLocal,
494  const ECounterSetOp setOp)
495  {
496  // There may be some global timers that are not local timers on
497  // the calling MPI process(es). In that case, if
498  // alwaysWriteLocal is true, then we need to fill in the
499  // "missing" local timers. That will ensure that both global
500  // and local timer columns in the output table have the same
501  // number of rows. The collectLocalTimerDataAndNames() method
502  // may have already filtered out local timers with zero call
503  // counts (if its writeZeroTimers argument was false), but we
504  // won't be filtering again. Thus, any local timer data we
505  // insert here won't get filtered out.
506  //
507  // Note that calling summarize() with writeZeroTimers == false
508  // will still do what it says, even if we insert local timers
509  // with zero call counts here.
510 
511  // This does the correct and inexpensive thing (just copies the
512  // timer data) if numProcs == 1. Otherwise, it initiates a
513  // communication with \f$O(\log P)\f$ messages along the
514  // critical path, where \f$P\f$ is the number of participating
515  // processes.
516  mergeCounterNames (*comm, localTimerNames, globalTimerNames, setOp);
517 
518 #ifdef TEUCHOS_DEBUG
519  {
520  // Sanity check that all processes have the name number of
521  // global timer names.
522  const timer_map_t::size_type myNumGlobalNames = globalTimerNames.size();
523  timer_map_t::size_type minNumGlobalNames = 0;
524  timer_map_t::size_type maxNumGlobalNames = 0;
525  reduceAll (*comm, REDUCE_MIN, myNumGlobalNames,
526  outArg (minNumGlobalNames));
527  reduceAll (*comm, REDUCE_MAX, myNumGlobalNames,
528  outArg (maxNumGlobalNames));
529  TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalNames != maxNumGlobalNames,
530  std::logic_error, "Min # global timer names = " << minNumGlobalNames
531  << " != max # global timer names = " << maxNumGlobalNames
532  << ". Please report this bug to the Teuchos developers.");
533  TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalNames != minNumGlobalNames,
534  std::logic_error, "My # global timer names = " << myNumGlobalNames
535  << " != min # global timer names = " << minNumGlobalNames
536  << ". Please report this bug to the Teuchos developers.");
537  }
538 #endif // TEUCHOS_DEBUG
539 
540  // mergeCounterNames() just merges the counters' names, not
541  // their actual data. Now we need to fill globalTimerData with
542  // this process' timer data for the timers in globalTimerNames.
543  //
544  // All processes need the full list of global timers, since
545  // there may be some global timers that are not local timers.
546  // That's why mergeCounterNames() has to be an all-reduce, not
547  // just a reduction to Proc 0.
548  //
549  // Insertion optimization: if the iterator given to map::insert
550  // points right before where we want to insert, insertion is
551  // O(1). globalTimerNames is sorted, so feeding the iterator
552  // output of map::insert into the next invocation's input should
553  // make the whole insertion O(N) where N is the number of
554  // entries in globalTimerNames.
555  timer_map_t::iterator globalMapIter = globalTimerData.begin();
556  timer_map_t::iterator localMapIter;
557  for (Array<string>::const_iterator it = globalTimerNames.begin();
558  it != globalTimerNames.end(); ++it) {
559  const std::string& globalName = *it;
560  localMapIter = localTimerData.find (globalName);
561 
562  if (localMapIter == localTimerData.end()) {
563  if (alwaysWriteLocal) {
564  // If there are some global timers that are not local
565  // timers, and if we want to print local timers, we insert
566  // a local timer datum with zero elapsed time and zero
567  // call count into localTimerData as well. This will
568  // ensure that both global and local timer columns in the
569  // output table have the same number of rows.
570  //
571  // We really only need to do this on Proc 0, which is the
572  // only process that currently may print local timers.
573  // However, we do it on all processes, just in case
574  // someone later wants to modify this function to print
575  // out local timer data for some process other than Proc
576  // 0. This extra computation won't affect the cost along
577  // the critical path, for future computations in which
578  // Proc 0 participates.
579  localMapIter = localTimerData.insert (localMapIter, makeEmptyTimerDatum (globalName));
580 
581  // Make sure the missing global name gets added to the
582  // list of local names. We'll re-sort the list of local
583  // names below.
584  localTimerNames.push_back (globalName);
585  }
586  // There's a global timer that's not a local timer. Add it
587  // to our pre-merge version of the global timer data so that
588  // we can safely merge the global timer data later.
589  globalMapIter = globalTimerData.insert (globalMapIter, makeEmptyTimerDatum (globalName));
590  }
591  else {
592  // We have this global timer name in our local timer list.
593  // Fill in our pre-merge version of the global timer data
594  // with our local data.
595  globalMapIter = globalTimerData.insert (globalMapIter, std::make_pair (globalName, localMapIter->second));
596  }
597  }
598 
599  if (alwaysWriteLocal) {
600  // Re-sort the list of local timer names, since we may have
601  // inserted "missing" names above.
602  std::sort (localTimerNames.begin(), localTimerNames.end());
603  }
604 
605 #ifdef TEUCHOS_DEBUG
606  {
607  // Sanity check that all processes have the name number of
608  // global timers.
609  const timer_map_t::size_type myNumGlobalTimers = globalTimerData.size();
610  timer_map_t::size_type minNumGlobalTimers = 0;
611  timer_map_t::size_type maxNumGlobalTimers = 0;
612  reduceAll (*comm, REDUCE_MIN, myNumGlobalTimers,
613  outArg (minNumGlobalTimers));
614  reduceAll (*comm, REDUCE_MAX, myNumGlobalTimers,
615  outArg (maxNumGlobalTimers));
616  TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalTimers != maxNumGlobalTimers,
617  std::logic_error, "Min # global timers = " << minNumGlobalTimers
618  << " != max # global timers = " << maxNumGlobalTimers
619  << ". Please report this bug to the Teuchos developers.");
620  TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalTimers != minNumGlobalTimers,
621  std::logic_error, "My # global timers = " << myNumGlobalTimers
622  << " != min # global timers = " << minNumGlobalTimers
623  << ". Please report this bug to the Teuchos developers.");
624  }
625 #endif // TEUCHOS_DEBUG
626  }
627 
674  void
675  computeGlobalTimerStats (stat_map_type& statData,
676  std::vector<std::string>& statNames,
677  Ptr<const Comm<int> > comm,
678  const timer_map_t& globalTimerData,
679  const bool ignoreZeroTimers)
680  {
681  using Teuchos::ScalarTraits;
682 
683  const int numTimers = static_cast<int> (globalTimerData.size());
684  const int numProcs = comm->getSize();
685 
686  // Extract pre-reduction timings and call counts into a
687  // sequential array. This array will be in the same order as
688  // the global timer names are in the map.
689  Array<std::pair<double, int> > timingsAndCallCounts;
690  timingsAndCallCounts.reserve (numTimers);
691  for (timer_map_t::const_iterator it = globalTimerData.begin();
692  it != globalTimerData.end(); ++it) {
693  timingsAndCallCounts.push_back (it->second);
694  }
695 
696  // For each timer name, compute the min timing and its
697  // corresponding call count. If two processes have the same
698  // timing but different call counts, the minimum call count will
699  // be used.
700  Array<std::pair<double, int> > minTimingsAndCallCounts (numTimers);
701  if (numTimers > 0) {
702  if (ignoreZeroTimers)
703  reduceAll (*comm, MinLocNonzero<int, double, int>(), numTimers,
704  &timingsAndCallCounts[0], &minTimingsAndCallCounts[0]);
705  else
706  reduceAll (*comm, MinLoc<int, double, int>(), numTimers,
707  &timingsAndCallCounts[0], &minTimingsAndCallCounts[0]);
708  }
709 
710  // For each timer name, compute the max timing and its
711  // corresponding call count. If two processes have the same
712  // timing but different call counts, the minimum call count will
713  // be used.
714  Array<std::pair<double, int> > maxTimingsAndCallCounts (numTimers);
715  if (numTimers > 0) {
716  reduceAll (*comm, MaxLoc<int, double, int>(), numTimers,
717  &timingsAndCallCounts[0], &maxTimingsAndCallCounts[0]);
718  }
719 
720  // For each timer name, compute the mean-over-processes timing,
721  // the mean call count, and the mean-over-call-counts timing.
722  // The mean call count is reported as a double to allow a
723  // fractional value.
724  //
725  // Each local timing is really the total timing over all local
726  // invocations. The number of local invocations is the call
727  // count. Thus, the mean-over-call-counts timing is the sum of
728  // all the timings (over all processes), divided by the sum of
729  // all the call counts (over all processes). We compute it in a
730  // different way to over unnecessary overflow.
731  Array<double> meanOverCallCountsTimings (numTimers);
732  Array<double> meanOverProcsTimings (numTimers);
733  Array<double> meanCallCounts (numTimers);
734  Array<int> ICallThisTimer (numTimers);
735  Array<int> numProcsCallingEachTimer (numTimers);
736  {
737  // Figure out how many processors actually call each timer.
738  if (ignoreZeroTimers) {
739  for (int k = 0; k < numTimers; ++k) {
740  const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
741  if (callCount > 0) ICallThisTimer[k] = 1;
742  else ICallThisTimer[k] = 0;
743  }
744  if (numTimers > 0) {
745  reduceAll (*comm, REDUCE_SUM, numTimers, &ICallThisTimer[0],
746  &numProcsCallingEachTimer[0]);
747  }
748  }
749 
750  // When summing, first scale by the number of processes. This
751  // avoids unnecessary overflow, and also gives us the mean
752  // call count automatically.
753  Array<double> scaledTimings (numTimers);
754  Array<double> scaledCallCounts (numTimers);
755  const double P = static_cast<double> (numProcs);
756 
757  if (ignoreZeroTimers) {
758  for (int k = 0; k < numTimers; ++k) {
759  const double timing = timingsAndCallCounts[k].first;
760  const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
761 
762  scaledTimings[k] = timing / numProcsCallingEachTimer[k];
763  scaledCallCounts[k] = callCount / numProcsCallingEachTimer[k];
764  }
765  }
766  else {
767  for (int k = 0; k < numTimers; ++k) {
768  const double timing = timingsAndCallCounts[k].first;
769  const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
770 
771  scaledTimings[k] = timing / P;
772  scaledCallCounts[k] = callCount / P;
773  }
774  }
775 
776  if (numTimers > 0) {
777  reduceAll (*comm, REDUCE_SUM, numTimers, &scaledTimings[0],
778  &meanOverProcsTimings[0]);
779  reduceAll (*comm, REDUCE_SUM, numTimers, &scaledCallCounts[0],
780  &meanCallCounts[0]);
781  }
782  // We don't have to undo the scaling for the mean timings;
783  // just divide by the scaled call count.
784  for (int k = 0; k < numTimers; ++k) {
785  if (meanCallCounts[k] > ScalarTraits<double>::zero ()) {
786  meanOverCallCountsTimings[k] = meanOverProcsTimings[k] / meanCallCounts[k];
787  }
788  else {
789  meanOverCallCountsTimings[k] = ScalarTraits<double>::zero ();
790  }
791  }
792  }
793 
794  // Reformat the data into the map of statistics. Be sure that
795  // each value (the std::vector of (timing, call count) pairs,
796  // each entry of which is a different statistic) preserves the
797  // order of statNames.
798  statNames.resize (4);
799  statNames[0] = "MinOverProcs";
800  statNames[1] = "MeanOverProcs";
801  statNames[2] = "MaxOverProcs";
802  statNames[3] = "MeanOverCallCounts";
803 
804  stat_map_type::iterator statIter = statData.end();
805  timer_map_t::const_iterator it = globalTimerData.begin();
806  for (int k = 0; it != globalTimerData.end(); ++k, ++it) {
807  std::vector<std::pair<double, double> > curData (4);
808  curData[0] = minTimingsAndCallCounts[k];
809  curData[1] = std::make_pair (meanOverProcsTimings[k], meanCallCounts[k]);
810  curData[2] = maxTimingsAndCallCounts[k];
811  curData[3] = std::make_pair (meanOverCallCountsTimings[k], meanCallCounts[k]);
812 
813  // statIter gives an insertion location hint that makes each
814  // insertion O(1), since we remember the location of the last
815  // insertion.
816  statIter = statData.insert (statIter, std::make_pair (it->first, curData));
817  }
818  }
819 
820 
837  RCP<const Comm<int> >
838  getDefaultComm ()
839  {
840  // The default communicator. If Trilinos was built with MPI
841  // enabled, this should be MPI_COMM_WORLD. (If MPI has not yet
842  // been initialized, it's not valid to use the communicator!)
843  // Otherwise, this should be a "serial" (no MPI, one "process")
844  // communicator.
845  RCP<const Comm<int> > comm = DefaultComm<int>::getComm ();
846 
847 #ifdef HAVE_MPI
848  {
849  int mpiHasBeenStarted = 0;
850  MPI_Initialized (&mpiHasBeenStarted);
851  if (! mpiHasBeenStarted) {
852  // Make pComm a new "serial communicator."
853  comm = rcp_implicit_cast<const Comm<int> > (rcp (new SerialComm<int> ()));
854  }
855  }
856 #endif // HAVE_MPI
857  return comm;
858  }
859 
860  } // namespace (anonymous)
861 
862 
863  void
865  std::vector<std::string>& statNames,
866  Ptr<const Comm<int> > comm,
867  const ECounterSetOp setOp,
868  const std::string& filter)
869  {
870  // Collect local timer data and names. Filter out timers with
871  // zero call counts if writeZeroTimers is false. Also, apply the
872  // timer label filter at this point, so we don't have to compute
873  // statistics on timers we don't want to display anyway.
874  timer_map_t localTimerData;
875  Array<std::string> localTimerNames;
876  const bool writeZeroTimers = false;
877  collectLocalTimerDataAndNames (localTimerData, localTimerNames,
878  counters(), writeZeroTimers, filter);
879  // Merge the local timer data and names into global timer data and
880  // names.
881  timer_map_t globalTimerData;
882  Array<std::string> globalTimerNames;
883  const bool alwaysWriteLocal = false;
884  collectGlobalTimerData (globalTimerData, globalTimerNames,
885  localTimerData, localTimerNames,
886  comm, alwaysWriteLocal, setOp);
887  // Compute statistics on the data.
888  computeGlobalTimerStats (statData, statNames, comm, globalTimerData, false);
889  }
890 
891 
892  void
894  std::ostream& out,
895  const bool alwaysWriteLocal,
896  const bool writeGlobalStats,
897  const bool writeZeroTimers,
898  const ECounterSetOp setOp,
899  const std::string& filter,
900  const bool ignoreZeroTimers)
901  {
902  //
903  // We can't just call computeGlobalTimerStatistics(), since
904  // summarize() has different options that affect whether global
905  // statistics are computed and printed.
906  //
907  const int numProcs = comm->getSize();
908  const int myRank = comm->getRank();
909 
910  // Collect local timer data and names. Filter out timers with
911  // zero call counts if writeZeroTimers is false. Also, apply the
912  // timer label filter at this point, so we don't have to compute
913  // statistics on timers we don't want to display anyway.
914  timer_map_t localTimerData;
915  Array<std::string> localTimerNames;
916  collectLocalTimerDataAndNames (localTimerData, localTimerNames,
917  counters(), writeZeroTimers, filter);
918 
919  // If we're computing global statistics, merge the local timer
920  // data and names into global timer data and names, and compute
921  // global timer statistics. Otherwise, leave the global data
922  // empty.
923  timer_map_t globalTimerData;
924  Array<std::string> globalTimerNames;
925  stat_map_type statData;
926  std::vector<std::string> statNames;
927  if (writeGlobalStats) {
928  collectGlobalTimerData (globalTimerData, globalTimerNames,
929  localTimerData, localTimerNames,
930  comm, alwaysWriteLocal, setOp);
931  // Compute statistics on the data, but only if the communicator
932  // contains more than one process. Otherwise, statistics don't
933  // make sense and we don't print them (see below).
934  if (numProcs > 1) {
935  computeGlobalTimerStats (statData, statNames, comm, globalTimerData, ignoreZeroTimers);
936  }
937  }
938 
939  // Precision of floating-point numbers in the table.
940  const int precision = format().precision();
941  const std::ios_base::fmtflags& flags = out.flags();
942 
943  // All columns of the table, in order.
944  Array<TableColumn> tableColumns;
945 
946  // Labels of all the columns of the table.
947  // We will append to this when we add each column.
948  Array<std::string> titles;
949 
950  // Widths (in number of characters) of each column.
951  // We will append to this when we add each column.
952  Array<int> columnWidths;
953 
954  // Table column containing all timer names. If writeGlobalStats
955  // is true, we use the global timer names, otherwise we use the
956  // local timer names. We build the table on all processes
957  // redundantly, but only print on Rank 0.
958  {
959  titles.append ("Timer Name");
960 
961  // The column labels depend on whether we are computing global statistics.
962  TableColumn nameCol (writeGlobalStats ? globalTimerNames : localTimerNames);
963  tableColumns.append (nameCol);
964 
965  // Each column is as wide as it needs to be to hold both its
966  // title and all of the column data. This column's title is the
967  // current last entry of the titles array.
968  columnWidths.append (format().computeRequiredColumnWidth (titles.back(), nameCol));
969  }
970 
971  // Table column containing local timer stats, if applicable. We
972  // only write local stats if asked, only on MPI Proc 0, and only
973  // if there is more than one MPI process in the communicator
974  // (otherwise local stats == global stats, so we just print the
975  // global stats). In this case, we've padded the local data on
976  // Proc 0 if necessary to match the global timer list, so that the
977  // columns have the same number of rows.
978  if (alwaysWriteLocal && numProcs > 1 && myRank == 0) {
979  titles.append ("Local time (num calls)");
980 
981  // Copy local timer data out of the array-of-structs into
982  // separate arrays, for display in the table.
983  Array<double> localTimings;
984  Array<double> localNumCalls;
985  for (timer_map_t::const_iterator it = localTimerData.begin();
986  it != localTimerData.end(); ++it) {
987  localTimings.push_back (it->second.first);
988  localNumCalls.push_back (static_cast<double> (it->second.second));
989  }
990  TableColumn timeAndCalls (localTimings, localNumCalls, precision, flags, true);
991  tableColumns.append (timeAndCalls);
992  columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
993  }
994 
995  if (writeGlobalStats) {
996  // If there's only 1 process in the communicator, don't display
997  // statistics; statistics don't make sense in that case. Just
998  // display the timings and call counts. If there's more than 1
999  // process, do display statistics.
1000  if (numProcs == 1) {
1001  // Extract timings and the call counts from globalTimerData.
1002  Array<double> globalTimings;
1003  Array<double> globalNumCalls;
1004  for (timer_map_t::const_iterator it = globalTimerData.begin();
1005  it != globalTimerData.end(); ++it) {
1006  globalTimings.push_back (it->second.first);
1007  globalNumCalls.push_back (static_cast<double> (it->second.second));
1008  }
1009  // Print the table column.
1010  titles.append ("Global time (num calls)");
1011  TableColumn timeAndCalls (globalTimings, globalNumCalls, precision, flags, true);
1012  tableColumns.append (timeAndCalls);
1013  columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
1014  }
1015  else { // numProcs > 1
1016  // Print a table column for each statistic. statNames and
1017  // each value in statData use the same ordering, so we can
1018  // iterate over valid indices of statNames to display the
1019  // statistics in the right order.
1020  const timer_map_t::size_type numGlobalTimers = globalTimerData.size();
1021  for (std::vector<std::string>::size_type statInd = 0; statInd < statNames.size(); ++statInd) {
1022  // Extract lists of timings and their call counts for the
1023  // current statistic.
1024  Array<double> statTimings (numGlobalTimers);
1025  Array<double> statCallCounts (numGlobalTimers);
1026  stat_map_type::const_iterator it = statData.begin();
1027  for (int k = 0; it != statData.end(); ++it, ++k) {
1028  statTimings[k] = (it->second[statInd]).first;
1029  statCallCounts[k] = (it->second[statInd]).second;
1030  }
1031  // Print the table column.
1032  const std::string& statisticName = statNames[statInd];
1033  const std::string titleString = statisticName;
1034  titles.append (titleString);
1035  TableColumn timeAndCalls (statTimings, statCallCounts, precision, flags, true);
1036  tableColumns.append (timeAndCalls);
1037  columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
1038  }
1039  }
1040  }
1041 
1042  // Print the whole table to the given output stream on MPI Rank 0.
1043  format().setColumnWidths (columnWidths);
1044  if (myRank == 0) {
1045  std::ostringstream theTitle;
1046  theTitle << "TimeMonitor results over " << numProcs << " processor"
1047  << (numProcs > 1 ? "s" : "");
1048  format().writeWholeTable (out, theTitle.str(), titles, tableColumns);
1049  }
1050  }
1051 
1052  void
1053  TimeMonitor::summarize (std::ostream &out,
1054  const bool alwaysWriteLocal,
1055  const bool writeGlobalStats,
1056  const bool writeZeroTimers,
1057  const ECounterSetOp setOp,
1058  const std::string& filter,
1059  const bool ignoreZeroTimers)
1060  {
1061  // The default communicator. If Trilinos was built with MPI
1062  // enabled, this should be MPI_COMM_WORLD. Otherwise, this should
1063  // be a "serial" (no MPI, one "process") communicator.
1064  RCP<const Comm<int> > comm = getDefaultComm();
1065 
1066  summarize (comm.ptr(), out, alwaysWriteLocal,
1067  writeGlobalStats, writeZeroTimers, setOp, filter, ignoreZeroTimers);
1068  }
1069 
1070  void
1072  std::vector<std::string>& statNames,
1073  const ECounterSetOp setOp,
1074  const std::string& filter)
1075  {
1076  // The default communicator. If Trilinos was built with MPI
1077  // enabled, this should be MPI_COMM_WORLD. Otherwise, this should
1078  // be a "serial" (no MPI, one "process") communicator.
1079  RCP<const Comm<int> > comm = getDefaultComm();
1080 
1081  computeGlobalTimerStatistics (statData, statNames, comm.ptr(), setOp, filter);
1082  }
1083 
1084  SyncTimeMonitor::SyncTimeMonitor(Time& timer, Ptr<const Comm<int> > comm, bool reset)
1085  : TimeMonitor(timer, reset), comm_(comm)
1086  { }
1087 
1089  comm_->barrier();
1090  }
1091 
1092 
1093  namespace {
1117  std::string
1118  quoteLabelForYaml (const std::string& label)
1119  {
1120  // YAML allows empty keys in key: value pairs. See Section 7.2
1121  // of the YAML 1.2 spec. We thus let an empty label pass
1122  // through without quoting or other special treatment.
1123  if (label.empty ()) {
1124  return label;
1125  }
1126 
1127  // Check whether the label is already quoted. If so, we don't
1128  // need to quote it again. However, we do need to quote any
1129  // quote symbols in the string inside the outer quotes.
1130  const bool alreadyQuoted = label.size () >= 2 &&
1131  label[0] == '"' && label[label.size() - 1] == '"';
1132 
1133  // We need to quote if there are any colons or (inner) quotes in
1134  // the string. We'll determine this as we read through the
1135  // string and escape any characters that need escaping.
1136  bool needToQuote = false;
1137 
1138  std::string out; // To fill with the return value
1139  out.reserve (label.size ());
1140 
1141  const size_t startPos = alreadyQuoted ? 1 : 0;
1142  const size_t endPos = alreadyQuoted ? label.size () - 1 : label.size ();
1143  for (size_t i = startPos; i < endPos; ++i) {
1144  const char c = label[i];
1145  if (c == '"' || c == '\\') {
1146  out.push_back ('\\'); // Escape the quote or backslash.
1147  needToQuote = true;
1148  }
1149  else if (c == ':') {
1150  needToQuote = true;
1151  }
1152  out.push_back (c);
1153  }
1154 
1155  if (needToQuote || alreadyQuoted) {
1156  // If the input string was already quoted, then out doesn't
1157  // include its quotes, so we have to add them back in.
1158  return "\"" + out + "\"";
1159  }
1160  else {
1161  return out;
1162  }
1163  }
1164 
1165  } // namespace (anonymous)
1166 
1167 
1168  void TimeMonitor::
1169  summarizeToYaml (Ptr<const Comm<int> > comm,
1170  std::ostream &out,
1171  const ETimeMonitorYamlFormat yamlStyle,
1172  const std::string& filter)
1173  {
1174  using Teuchos::FancyOStream;
1175  using Teuchos::fancyOStream;
1176  using Teuchos::getFancyOStream;
1177  using Teuchos::OSTab;
1178  using Teuchos::RCP;
1179  using Teuchos::rcpFromRef;
1180  using std::endl;
1181  typedef std::vector<std::string>::size_type size_type;
1182 
1183  const bool compact = (yamlStyle == YAML_FORMAT_COMPACT);
1184 
1185  // const bool writeGlobalStats = true;
1186  // const bool writeZeroTimers = true;
1187  // const bool alwaysWriteLocal = false;
1188  const ECounterSetOp setOp = Intersection;
1189 
1190  stat_map_type statData;
1191  std::vector<std::string> statNames;
1192  computeGlobalTimerStatistics (statData, statNames, comm, setOp, filter);
1193 
1194  const int numProcs = comm->getSize();
1195 
1196  // HACK (mfh 20 Aug 2012) For some reason, creating OSTab with "-
1197  // " as the line prefix does not work, else I would prefer that
1198  // method for printing each line of a YAML block sequence (see
1199  // Section 8.2.1 of the YAML 1.2 spec).
1200  //
1201  // Also, I have to set the tab indent string here, rather than in
1202  // OSTab's constructor. This is because line prefix (which for
1203  // some reason is what OSTab's constructor takes, rather than tab
1204  // indent string) means something different from tab indent
1205  // string, and turning on the line prefix prints all sorts of
1206  // things including "|" for some reason.
1207  RCP<FancyOStream> pfout = getFancyOStream (rcpFromRef (out));
1208  pfout->setTabIndentStr (" ");
1209  FancyOStream& fout = *pfout;
1210 
1211  fout << "# Teuchos::TimeMonitor report" << endl
1212  << "---" << endl;
1213 
1214  // mfh 19 Aug 2012: An important goal of our chosen output format
1215  // was to minimize the nesting depth. We have managed to keep the
1216  // nesting depth to 3, which is the limit that the current version
1217  // of PylotDB imposes for its YAML input.
1218 
1219  // Outermost level is a dictionary. (Individual entries of a
1220  // dictionary do _not_ begin with "- ".) We always print the
1221  // outermost level in standard style, not flow style, for better
1222  // readability. We begin the outermost level with metadata.
1223  fout << "Output mode: " << (compact ? "compact" : "spacious") << endl
1224  << "Number of processes: " << numProcs << endl
1225  << "Time unit: s" << endl;
1226  // For a key: value pair where the value is a sequence or
1227  // dictionary on the following line, YAML requires a space after
1228  // the colon.
1229  fout << "Statistics collected: ";
1230  // Print list of the names of all the statistics we collected.
1231  if (compact) {
1232  fout << " [";
1233  for (size_type i = 0; i < statNames.size (); ++i) {
1234  fout << quoteLabelForYaml (statNames[i]);
1235  if (i + 1 < statNames.size ()) {
1236  fout << ", ";
1237  }
1238  }
1239  fout << "]" << endl;
1240  }
1241  else {
1242  fout << endl;
1243  OSTab tab1 (pfout);
1244  for (size_type i = 0; i < statNames.size (); ++i) {
1245  fout << "- " << quoteLabelForYaml (statNames[i]) << endl;
1246  }
1247  }
1248 
1249  // Print the list of timer names.
1250  //
1251  // It might be nicer instead to print a map from timer name to all
1252  // of its data, but keeping the maximum nesting depth small
1253  // ensures better compatibility with different parsing tools.
1254  fout << "Timer names: ";
1255  if (compact) {
1256  fout << " [";
1257  size_type ind = 0;
1258  for (stat_map_type::const_iterator it = statData.begin();
1259  it != statData.end(); ++it, ++ind) {
1260  fout << quoteLabelForYaml (it->first);
1261  if (ind + 1 < statData.size ()) {
1262  fout << ", ";
1263  }
1264  }
1265  fout << "]" << endl;
1266  }
1267  else {
1268  fout << endl;
1269  OSTab tab1 (pfout);
1270  for (stat_map_type::const_iterator it = statData.begin();
1271  it != statData.end(); ++it) {
1272  fout << "- " << quoteLabelForYaml (it->first) << endl;
1273  }
1274  }
1275 
1276  // Print times for each timer, as a map from statistic name to its time.
1277  fout << "Total times: ";
1278  if (compact) {
1279  fout << " {";
1280  size_type outerInd = 0;
1281  for (stat_map_type::const_iterator outerIter = statData.begin();
1282  outerIter != statData.end(); ++outerIter, ++outerInd) {
1283  // Print timer name.
1284  fout << quoteLabelForYaml (outerIter->first) << ": ";
1285  // Print that timer's data.
1286  const std::vector<std::pair<double, double> >& curData = outerIter->second;
1287  fout << "{";
1288  for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1289  fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1290  << curData[innerInd].first;
1291  if (innerInd + 1 < curData.size ()) {
1292  fout << ", ";
1293  }
1294  }
1295  fout << "}";
1296  if (outerInd + 1 < statData.size ()) {
1297  fout << ", ";
1298  }
1299  }
1300  fout << "}" << endl;
1301  }
1302  else {
1303  fout << endl;
1304  OSTab tab1 (pfout);
1305  size_type outerInd = 0;
1306  for (stat_map_type::const_iterator outerIter = statData.begin();
1307  outerIter != statData.end(); ++outerIter, ++outerInd) {
1308  // Print timer name.
1309  fout << quoteLabelForYaml (outerIter->first) << ": " << endl;
1310  // Print that timer's data.
1311  OSTab tab2 (pfout);
1312  const std::vector<std::pair<double, double> >& curData = outerIter->second;
1313  for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1314  fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1315  << curData[innerInd].first << endl;
1316  }
1317  }
1318  }
1319 
1320  // Print call counts for each timer, for each statistic name.
1321  fout << "Call counts:";
1322  if (compact) {
1323  fout << " {";
1324  size_type outerInd = 0;
1325  for (stat_map_type::const_iterator outerIter = statData.begin();
1326  outerIter != statData.end(); ++outerIter, ++outerInd) {
1327  // Print timer name.
1328  fout << quoteLabelForYaml (outerIter->first) << ": ";
1329  // Print that timer's data.
1330  const std::vector<std::pair<double, double> >& curData = outerIter->second;
1331  fout << "{";
1332  for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1333  fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1334  << curData[innerInd].second;
1335  if (innerInd + 1 < curData.size ()) {
1336  fout << ", ";
1337  }
1338  }
1339  fout << "}";
1340  if (outerInd + 1 < statData.size ()) {
1341  fout << ", ";
1342  }
1343  }
1344  fout << "}" << endl;
1345  }
1346  else {
1347  fout << endl;
1348  OSTab tab1 (pfout);
1349  size_type outerInd = 0;
1350  for (stat_map_type::const_iterator outerIter = statData.begin();
1351  outerIter != statData.end(); ++outerIter, ++outerInd) {
1352  // Print timer name.
1353  fout << quoteLabelForYaml (outerIter->first) << ": " << endl;
1354  // Print that timer's data.
1355  OSTab tab2 (pfout);
1356  const std::vector<std::pair<double, double> >& curData = outerIter->second;
1357  for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1358  fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1359  << curData[innerInd].second << endl;
1360  }
1361  }
1362  }
1363  }
1364 
1365  void TimeMonitor::
1366  summarizeToYaml (std::ostream &out,
1367  const ETimeMonitorYamlFormat yamlStyle,
1368  const std::string& filter)
1369  {
1370  // The default communicator. If Trilinos was built with MPI
1371  // enabled, this should be MPI_COMM_WORLD. Otherwise, this should
1372  // be a "serial" (no MPI, one "process") communicator.
1373  RCP<const Comm<int> > comm = getDefaultComm ();
1374 
1375  summarizeToYaml (comm.ptr (), out, yamlStyle, filter);
1376  }
1377 
1378  // Default value is false. We'll set to true once
1379  // setReportParameters() completes successfully.
1380  bool TimeMonitor::setParams_ = false;
1381 
1382  // We have to declare all of these here in order to avoid linker errors.
1383  TimeMonitor::ETimeMonitorReportFormat TimeMonitor::reportFormat_ = TimeMonitor::REPORT_FORMAT_TABLE;
1384  TimeMonitor::ETimeMonitorYamlFormat TimeMonitor::yamlStyle_ = TimeMonitor::YAML_FORMAT_SPACIOUS;
1385  ECounterSetOp TimeMonitor::setOp_ = Intersection;
1386  bool TimeMonitor::alwaysWriteLocal_ = false;
1387  bool TimeMonitor::writeGlobalStats_ = true;
1388  bool TimeMonitor::writeZeroTimers_ = true;
1389 
1390  void
1391  TimeMonitor::setReportFormatParameter (ParameterList& plist)
1392  {
1393  const std::string name ("Report format");
1394  const std::string defaultValue ("Table");
1395  const std::string docString ("Output format for report of timer statistics");
1396  Array<std::string> strings;
1397  Array<std::string> docs;
1398  Array<ETimeMonitorReportFormat> values;
1399 
1400  strings.push_back ("YAML");
1401  docs.push_back ("YAML (see yaml.org) format");
1402  values.push_back (REPORT_FORMAT_YAML);
1403  strings.push_back ("Table");
1404  docs.push_back ("Tabular format via Teuchos::TableFormat");
1405  values.push_back (REPORT_FORMAT_TABLE);
1406 
1407  setStringToIntegralParameter<ETimeMonitorReportFormat> (name, defaultValue,
1408  docString,
1409  strings (), docs (),
1410  values (), &plist);
1411  }
1412 
1413  void
1414  TimeMonitor::setYamlFormatParameter (ParameterList& plist)
1415  {
1416  const std::string name ("YAML style");
1417  const std::string defaultValue ("spacious");
1418  const std::string docString ("YAML-specific output format");
1419  Array<std::string> strings;
1420  Array<std::string> docs;
1421  Array<ETimeMonitorYamlFormat> values;
1422 
1423  strings.push_back ("compact");
1424  docs.push_back ("Compact format: use \"flow style\" (see YAML 1.2 spec at "
1425  "yaml.org) for most sequences except the outermost sequence");
1426  values.push_back (YAML_FORMAT_COMPACT);
1427 
1428  strings.push_back ("spacious");
1429  docs.push_back ("Spacious format: avoid flow style");
1430  values.push_back (YAML_FORMAT_SPACIOUS);
1431 
1432  setStringToIntegralParameter<ETimeMonitorYamlFormat> (name, defaultValue,
1433  docString,
1434  strings (), docs (),
1435  values (), &plist);
1436  }
1437 
1438  void
1439  TimeMonitor::setSetOpParameter (ParameterList& plist)
1440  {
1441  const std::string name ("How to merge timer sets");
1442  const std::string defaultValue ("Intersection");
1443  const std::string docString ("How to merge differing sets of timers "
1444  "across processes");
1445  Array<std::string> strings;
1446  Array<std::string> docs;
1447  Array<ECounterSetOp> values;
1448 
1449  strings.push_back ("Intersection");
1450  docs.push_back ("Compute intersection of timer sets over processes");
1451  values.push_back (Intersection);
1452  strings.push_back ("Union");
1453  docs.push_back ("Compute union of timer sets over processes");
1454  values.push_back (Union);
1455 
1456  setStringToIntegralParameter<ECounterSetOp> (name, defaultValue, docString,
1457  strings (), docs (), values (),
1458  &plist);
1459  }
1460 
1461  void
1463  {
1464  stackedTimer_ = t;
1465  }
1466 
1469  {
1470  return stackedTimer_;
1471  }
1472 
1475  {
1476  // Our implementation favors recomputation over persistent
1477  // storage. That is, we simply recreate the list every time we
1478  // need it.
1479  RCP<ParameterList> plist = parameterList ("TimeMonitor::report");
1480 
1481  const bool alwaysWriteLocal = false;
1482  const bool writeGlobalStats = true;
1483  const bool writeZeroTimers = true;
1484 
1485  setReportFormatParameter (*plist);
1486  setYamlFormatParameter (*plist);
1487  setSetOpParameter (*plist);
1488  plist->set ("alwaysWriteLocal", alwaysWriteLocal,
1489  "Always output local timers' values on Proc 0");
1490  plist->set ("writeGlobalStats", writeGlobalStats, "Always output global "
1491  "statistics, even if there is only one process in the "
1492  "communicator");
1493  plist->set ("writeZeroTimers", writeZeroTimers, "Generate output for "
1494  "timers that have never been called");
1495 
1496  return rcp_const_cast<const ParameterList> (plist);
1497  }
1498 
1499  void
1500  TimeMonitor::setReportParameters (const RCP<ParameterList>& params)
1501  {
1502  ETimeMonitorReportFormat reportFormat = REPORT_FORMAT_TABLE;
1503  ETimeMonitorYamlFormat yamlStyle = YAML_FORMAT_SPACIOUS;
1504  ECounterSetOp setOp = Intersection;
1505  bool alwaysWriteLocal = false;
1506  bool writeGlobalStats = true;
1507  bool writeZeroTimers = true;
1508 
1509  if (params.is_null ()) {
1510  // If we've set parameters before, leave their current values.
1511  // Otherwise, set defaults (below).
1512  if (setParams_) {
1513  return;
1514  }
1515  }
1516  else { // params is nonnull. Let's read it!
1517  params->validateParametersAndSetDefaults (*getValidReportParameters ());
1518 
1519  reportFormat = getIntegralValue<ETimeMonitorReportFormat> (*params, "Report format");
1520  yamlStyle = getIntegralValue<ETimeMonitorYamlFormat> (*params, "YAML style");
1521  setOp = getIntegralValue<ECounterSetOp> (*params, "How to merge timer sets");
1522  alwaysWriteLocal = params->get<bool> ("alwaysWriteLocal");
1523  writeGlobalStats = params->get<bool> ("writeGlobalStats");
1524  writeZeroTimers = params->get<bool> ("writeZeroTimers");
1525  }
1526  // Defer setting state until here, to ensure the strong exception
1527  // guarantee for this method (either it throws with no externally
1528  // visible state changes, or it returns normally).
1529  reportFormat_ = reportFormat;
1530  yamlStyle_ = yamlStyle;
1531  setOp_ = setOp;
1532  alwaysWriteLocal_ = alwaysWriteLocal;
1533  writeGlobalStats_ = writeGlobalStats;
1534  writeZeroTimers_ = writeZeroTimers;
1535 
1536  setParams_ = true; // Yay, we successfully set parameters!
1537  }
1538 
1539  void
1541  std::ostream& out,
1542  const std::string& filter,
1543  const RCP<ParameterList>& params)
1544  {
1545  setReportParameters (params);
1546 
1547  if (reportFormat_ == REPORT_FORMAT_YAML) {
1548  summarizeToYaml (comm, out, yamlStyle_, filter);
1549  }
1550  else if (reportFormat_ == REPORT_FORMAT_TABLE) {
1551  summarize (comm, out, alwaysWriteLocal_, writeGlobalStats_,
1552  writeZeroTimers_, setOp_, filter);
1553  }
1554  else {
1555  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "TimeMonitor::report: "
1556  "Invalid report format. This should never happen; ParameterList "
1557  "validation should have caught this. Please report this bug to the "
1558  "Teuchos developers.");
1559  }
1560  }
1561 
1562  void
1564  std::ostream& out,
1565  const RCP<ParameterList>& params)
1566  {
1567  report (comm, out, "", params);
1568  }
1569 
1570  void
1571  TimeMonitor::report (std::ostream& out,
1572  const std::string& filter,
1573  const RCP<ParameterList>& params)
1574  {
1575  RCP<const Comm<int> > comm = getDefaultComm ();
1576  report (comm.ptr (), out, filter, params);
1577  }
1578 
1579  void
1580  TimeMonitor::report (std::ostream& out,
1581  const RCP<ParameterList>& params)
1582  {
1583  RCP<const Comm<int> > comm = getDefaultComm ();
1584  report (comm.ptr (), out, "", params);
1585  }
1586 
1587 } // namespace Teuchos
SyncTimeMonitor()=delete
Default constructor is deleted, since it would be unsafe.
static Teuchos::RCP< Teuchos::StackedTimer > getStackedTimer()
The StackedTimer used by the TimeMonitor.
Array< T > & append(const T &x)
Add a new entry at the end of the array.
std::map< std::string, std::vector< std::pair< double, double > > > stat_map_type
Global statistics collected from timer data.
basic_OSTab< char > OSTab
void setColumnWidths(const Array< int > &colWidths)
Set the column widths to be used for subsequent rows.
const Time & counter() const
Constant access to the instance&#39;s counter reference.
void writeWholeTable(std::ostream &out, const std::string &tableTitle, const Array< std::string > &columnNames, const Array< TableColumn > &columns) const
bool nonnull(const std::shared_ptr< T > &p)
Returns true if p.get()!=NULL.
static RCP< Time > lookupCounter(const std::string &name)
Return the first counter with the given name, or null if none.
basic_FancyOStream< char > FancyOStream
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
TimeMonitor()=delete
Default constructor is deleted, since it would be unsafe.
static void disableTimer(const std::string &name)
Disable the timer with the given name.
Teuchos version of MPI_MINLOC.
void reduce(const Ordinal count, const std::pair< ScalarType, IndexType > inBuffer[], std::pair< ScalarType, IndexType > inoutBuffer[]) const
T * get() const
Get the raw C++ pointer to the underlying object.
static Teuchos::RCP< const Comm< OrdinalType > > getComm()
Return the default global communicator.
static RCP< const ParameterList > getValidReportParameters()
Default parameters (with validators) for report().
This structure defines some basic traits for a scalar field type.
Teuchos version of MPI_MAXLOC.
Base interface class for user-defined reduction operations for objects that use value semantics...
static Teuchos::RCP< Teuchos::StackedTimer > stackedTimer_
Stacked timer for optional injection of timing from TimeMonitor-enabled objects.
void reduce(const Ordinal count, const std::pair< ScalarType, IndexType > inBuffer[], std::pair< ScalarType, IndexType > inoutBuffer[]) const
void start(bool reset=false)
Start the timer, if the timer is enabled (see disable()).
~TimeMonitor() override
Destructor: stops the timer.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Deprecated.
A column of TableEntry objects.
static void summarize(Ptr< const Comm< int > > comm, std::ostream &out=std::cout, const bool alwaysWriteLocal=false, const bool writeGlobalStats=true, const bool writeZeroTimers=true, const ECounterSetOp setOp=Intersection, const std::string &filter="", const bool ignoreZeroTimers=false)
Print summary statistics for all timers on the given communicator.
double stop()
Stop the timer, if the timer is enabled (see disable()).
Wall-clock timer.
Ptr< T > ptr() const
Get a safer wrapper raw C++ pointer to the underlying object.
int precision() const
Get the precision for writing doubles. Default is 4.
static void computeGlobalTimerStatistics(stat_map_type &statData, std::vector< std::string > &statNames, Ptr< const Comm< int > > comm, const ECounterSetOp setOp=Intersection, const std::string &filter="")
Compute global timer statistics for all timers on the given communicator.
static void setStackedTimer(const Teuchos::RCP< Teuchos::StackedTimer > &t)
Sets the StackedTimer into which the TimeMonitor will insert timings.
Provides utilities for formatting tabular output.
void reduce(const Ordinal count, const std::pair< ScalarType, IndexType > inBuffer[], std::pair< ScalarType, IndexType > inoutBuffer[]) const
void mergeCounterNames(const Comm< int > &comm, const Array< std::string > &localNames, Array< std::string > &globalNames, const ECounterSetOp setOp)
Merge counter names over all processors.
const std::string & name() const
The name of this timer.
A list of parameters of arbitrary type.
static std::map< std::string, RCP< Time > > & counters()
Array of all counters that were created with getNewCounter() on the calling (MPI) process...
reference back()
void push_back(const value_type &x)
bool isRecursiveCall() const
Whether we are currently in a recursive call of the counter.
size_type size() const
Defines basic traits for the scalar field type.
static T zero()
Returns representation of zero for this scalar type.
~SyncTimeMonitor() override
Destructor: stops the timer.
Scope guard for Teuchos::Time, with MPI collective timer reporting.
Smart reference counting pointer class for automatic garbage collection.
This class allows one to push and pop timers on and off a stack.
ECounterSetOp
Set operation type for mergeCounterNames() to perform.
Common capabilities for collecting and reporting performance data across processors.
same as MinLoc, but don&#39;t allow zero
Simple wrapper class for raw pointers to single objects where no persisting relationship exists...
static void enableTimer(const std::string &name)
Enable the timer with the given name.
static TableFormat & format()
Table format that will be used to print a summary of timer results.
Scope guard for Time, that can compute MPI collective timer statistics.
static void zeroOutTimers()
Reset all global timers to zero.
static void report(Ptr< const Comm< int > > comm, std::ostream &out, const std::string &filter, const RCP< ParameterList > &params=null)
Report timer statistics to the given output stream.
bool is_null() const
Returns true if the underlying pointer is null.