Teuchos Package Browser (Single Doxygen Collection)  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Teuchos_TimeMonitor.cpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Teuchos: Common Tools Package
5 // Copyright (2004) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #include "Teuchos_TimeMonitor.hpp"
43 #include "Teuchos_CommHelpers.hpp"
44 #include "Teuchos_DefaultComm.hpp"
45 #include "Teuchos_TableColumn.hpp"
46 #include "Teuchos_TableFormat.hpp"
48 #include "Teuchos_ScalarTraits.hpp"
49 #include "Teuchos_StackedTimer.hpp"
50 
51 #include <functional>
52 #include <iomanip>
53 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
54 #include <sstream>
55 #endif
56 
57 namespace Teuchos {
110  template<class Ordinal, class ScalarType, class IndexType>
111  class MaxLoc :
112  public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
113  public:
114  void
115  reduce (const Ordinal count,
116  const std::pair<ScalarType, IndexType> inBuffer[],
117  std::pair<ScalarType, IndexType> inoutBuffer[]) const;
118  };
119 
120  template<class Ordinal>
121  class MaxLoc<Ordinal, double, int> :
122  public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
123  public:
124  void
125  reduce (const Ordinal count,
126  const std::pair<double, int> inBuffer[],
127  std::pair<double, int> inoutBuffer[]) const
128  {
129  for (Ordinal ind = 0; ind < count; ++ind) {
130  const std::pair<double, int>& in = inBuffer[ind];
131  std::pair<double, int>& inout = inoutBuffer[ind];
132 
133  if (in.first > inout.first) {
134  inout.first = in.first;
135  inout.second = in.second;
136  } else if (in.first < inout.first) {
137  // Don't need to do anything; inout has the values.
138  } else { // equal, or at least one is NaN.
139  inout.first = in.first;
140  inout.second = std::min (in.second, inout.second);
141  }
142  }
143  }
144  };
145 
172  template<class Ordinal, class ScalarType, class IndexType>
173  class MinLoc :
174  public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
175  public:
176  void
177  reduce (const Ordinal count,
178  const std::pair<ScalarType, IndexType> inBuffer[],
179  std::pair<ScalarType, IndexType> inoutBuffer[]) const;
180  };
181 
182  template<class Ordinal>
183  class MinLoc<Ordinal, double, int> :
184  public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
185  public:
186  void
187  reduce (const Ordinal count,
188  const std::pair<double, int> inBuffer[],
189  std::pair<double, int> inoutBuffer[]) const
190  {
191  for (Ordinal ind = 0; ind < count; ++ind) {
192  const std::pair<double, int>& in = inBuffer[ind];
193  std::pair<double, int>& inout = inoutBuffer[ind];
194 
195  if (in.first < inout.first) {
196  inout.first = in.first;
197  inout.second = in.second;
198  } else if (in.first > inout.first) {
199  // Don't need to do anything; inout has the values.
200  } else { // equal, or at least one is NaN.
201  inout.first = in.first;
202  inout.second = std::min (in.second, inout.second);
203  }
204  }
205  }
206  };
207 
211  template<class Ordinal, class ScalarType, class IndexType>
213  public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
214  public:
215  void
216  reduce (const Ordinal count,
217  const std::pair<ScalarType, IndexType> inBuffer[],
218  std::pair<ScalarType, IndexType> inoutBuffer[]) const;
219  };
220 
221  template<class Ordinal>
222  class MinLocNonzero<Ordinal, double, int> :
223  public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
224  public:
225  void
226  reduce (const Ordinal count,
227  const std::pair<double, int> inBuffer[],
228  std::pair<double, int> inoutBuffer[]) const
229  {
230  for (Ordinal ind = 0; ind < count; ++ind) {
231  const std::pair<double, int>& in = inBuffer[ind];
232  std::pair<double, int>& inout = inoutBuffer[ind];
233 
234  if ( (in.first < inout.first && in.first != 0) || (inout.first == 0 && in.first != 0) ) {
235  inout.first = in.first;
236  inout.second = in.second;
237  } else if (in.first > inout.first) {
238  // Don't need to do anything; inout has the values.
239  } else { // equal, or at least one is NaN.
240  inout.first = in.first;
241  inout.second = std::min (in.second, inout.second);
242  }
243  }
244  }
245  };
246 
247  // Typedef used internally by TimeMonitor::summarize() and its
248  // helper functions. The map is keyed on timer label (a string).
249  // Each value is a pair: (total number of seconds over all calls to
250  // that timer, total number of calls to that timer).
251  typedef std::map<std::string, std::pair<double, int> > timer_map_t;
252 
253  // static initialization
255 
256  TimeMonitor::TimeMonitor (Time& timer, bool reset)
257  : PerformanceMonitorBase<Time>(timer, reset)
258  {
259  if (!isRecursiveCall()) {
260  counter().start(reset);
261 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
262  if (nonnull(stackedTimer_))
263  stackedTimer_->start(counter().name(),false);
264 #endif
265  }
266  }
267 
269  if (!isRecursiveCall()) {
270  counter().stop();
271 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
272  try {
273  if (nonnull(stackedTimer_))
274  stackedTimer_->stop(counter().name(),false);
275  }
276  catch (std::runtime_error&) {
277  std::ostringstream warning;
278  warning <<
279  "\n*********************************************************************\n"
280  "WARNING: Overlapping timers detected! Near: " <<counter().name()<<"\n"
281  "A TimeMonitor timer was stopped before a nested subtimer was\n"
282  "stopped. This is not allowed by the StackedTimer. This corner case\n"
283  "typically occurs if the TimeMonitor is stored in an RCP and the RCP is\n"
284  "assigned to a new timer. To disable this warning, either fix the\n"
285  "ordering of timer creation and destuction or disable the StackedTimer\n"
286  "support in the TimeMonitor by setting the StackedTimer to null\n"
287  "Example:\n"
288  " RCP<TimeMonitor> MM = rcp(new TimeMonitor(*(TimeMonitor::getNewTimer(\"Junk\"))));\n"
289  "///code to time \n"
290  "MM = Teuchos::null;\n"
291  "MM = rcp(new TimeMonitor(*(TimeMonitor::getNewTimer(\"SecondJunk\"))));\n"
292  "*********************************************************************\n";
293  std::cout << warning.str() << std::endl;
295  }
296 #endif
297  }
298  }
299 
300  void
301  TimeMonitor::disableTimer (const std::string& name)
302  {
303  RCP<Time> timer = lookupCounter (name);
305  timer == null, std::invalid_argument,
306  "TimeMonitor::disableTimer: Invalid timer \"" << name << "\"");
307  timer->disable ();
308  }
309 
310  void
311  TimeMonitor::enableTimer (const std::string& name)
312  {
313  RCP<Time> timer = lookupCounter (name);
315  timer == null, std::invalid_argument,
316  "TimeMonitor::enableTimer: Invalid timer \"" << name << "\"");
317  timer->enable ();
318  }
319 
320  void
322  {
323  typedef std::map<std::string, RCP<Time> > map_type;
324  typedef map_type::iterator iter_type;
325  map_type& ctrs = counters ();
326 
327  // In debug mode, loop first to check whether any of the timers
328  // are running, before resetting them. This ensures that this
329  // method satisfies the strong exception guarantee (either it
330  // completes normally, or there are no side effects).
331 #ifdef TEUCHOS_DEBUG
332  for (iter_type it = ctrs.begin(); it != ctrs.end(); ++it) {
333  // We throw a runtime_error rather than a logic_error, because
334  // logic_error suggests a bug in the implementation of
335  // TimeMonitor. Calling zeroOutTimers() when a timer is running
336  // is not TimeMonitor's fault.
338  it->second->isRunning (), std::runtime_error,
339  "Timer \"" << it->second->name () << "\" is currently running. "
340  "You are not allowed to reset running timers.");
341  }
342 #endif // TEUCHOS_DEBUG
343 
344  for (iter_type it = ctrs.begin(); it != ctrs.end(); ++it) {
345  it->second->reset ();
346  }
347  }
348 
349  // An anonymous namespace is the standard way of limiting linkage of
350  // its contained routines to file scope.
351  namespace {
352  // \brief Return an "empty" local timer datum.
353  //
354  // "Empty" means the datum has zero elapsed time and zero call
355  // count. This function does not actually create a timer.
356  //
357  // \param name The timer's name.
358  std::pair<std::string, std::pair<double, int> >
359  makeEmptyTimerDatum (const std::string& name)
360  {
361  return std::make_pair (name, std::make_pair (double(0), int(0)));
362  }
363 
364  // \fn collectLocalTimerData
365  // \brief Collect and sort local timer data by timer names.
366  //
367  // \param localData [out] Map whose keys are the timer names, and
368  // whose value for each key is the total elapsed time (in
369  // seconds) and the call count for the timer with that name.
370  //
371  // \param localCounters [in] Timers from which to extract data.
372  //
373  // \param filter [in] Filter for timer labels. If filter is not
374  // empty, this method will only collect data for local timers
375  // whose labels begin with this string.
376  //
377  // Extract the total elapsed time and call count from each timer
378  // in the given array. Merge results for timers with duplicate
379  // labels, by summing their total elapsed times and call counts
380  // pairwise.
381  void
382  collectLocalTimerData (timer_map_t& localData,
383  const std::map<std::string, RCP<Time> >& localCounters,
384  const std::string& filter="")
385  {
386  using std::make_pair;
387  typedef timer_map_t::iterator iter_t;
388 
389  timer_map_t theLocalData;
390  for (std::map<std::string, RCP<Time> >::const_iterator it = localCounters.begin();
391  it != localCounters.end(); ++it) {
392  const std::string& name = it->second->name ();
393 
394  // Filter current timer name, if provided filter is nonempty.
395  // Filter string must _start_ the timer label, not just be in it.
396  const bool skipThisOne = (filter != "" && name.find (filter) != 0);
397  if (! skipThisOne) {
398  const double timing = it->second->totalElapsedTime ();
399  const int numCalls = it->second->numCalls ();
400 
401  // Merge timers with duplicate labels, by summing their
402  // total elapsed times and call counts.
403  iter_t loc = theLocalData.find (name);
404  if (loc == theLocalData.end()) {
405  // Use loc as an insertion location hint.
406  theLocalData.insert (loc, make_pair (name, make_pair (timing, numCalls)));
407  }
408  else {
409  loc->second.first += timing;
410  loc->second.second += numCalls;
411  }
412  }
413  }
414  // This avoids copying the map, and also makes this method
415  // satisfy the strong exception guarantee.
416  localData.swap (theLocalData);
417  }
418 
419  // \brief Locally filter out timer data with zero call counts.
420  //
421  // \param timerData [in/out]
422  void
423  filterZeroData (timer_map_t& timerData)
424  {
425  // FIXME (mfh 15 Mar 2013) Should use std::map::erase with
426  // iterator hint, instead of rebuilding the map completely.
427  timer_map_t newTimerData;
428  for (timer_map_t::const_iterator it = timerData.begin();
429  it != timerData.end(); ++it) {
430  if (it->second.second > 0) {
431  newTimerData[it->first] = it->second;
432  }
433  }
434  timerData.swap (newTimerData);
435  }
436 
458  void
459  collectLocalTimerDataAndNames (timer_map_t& localTimerData,
460  Array<std::string>& localTimerNames,
461  const std::map<std::string, RCP<Time> >& localTimers,
462  const bool writeZeroTimers,
463  const std::string& filter="")
464  {
465  // Collect and sort local timer data by timer names.
466  collectLocalTimerData (localTimerData, localTimers, filter);
467 
468  // Filter out zero data locally first. This ensures that if we
469  // are writing global stats, and if a timer name exists in the
470  // set of global names, then that timer has a nonzero call count
471  // on at least one MPI process.
472  if (! writeZeroTimers) {
473  filterZeroData (localTimerData);
474  }
475 
476  // Extract the set of local timer names. The std::map keeps
477  // them sorted alphabetically.
478  localTimerNames.reserve (localTimerData.size());
479  for (timer_map_t::const_iterator it = localTimerData.begin();
480  it != localTimerData.end(); ++it) {
481  localTimerNames.push_back (it->first);
482  }
483  }
484 
519  void
520  collectGlobalTimerData (timer_map_t& globalTimerData,
521  Array<std::string>& globalTimerNames,
522  timer_map_t& localTimerData,
523  Array<std::string>& localTimerNames,
524  Ptr<const Comm<int> > comm,
525  const bool alwaysWriteLocal,
526  const ECounterSetOp setOp)
527  {
528  // There may be some global timers that are not local timers on
529  // the calling MPI process(es). In that case, if
530  // alwaysWriteLocal is true, then we need to fill in the
531  // "missing" local timers. That will ensure that both global
532  // and local timer columns in the output table have the same
533  // number of rows. The collectLocalTimerDataAndNames() method
534  // may have already filtered out local timers with zero call
535  // counts (if its writeZeroTimers argument was false), but we
536  // won't be filtering again. Thus, any local timer data we
537  // insert here won't get filtered out.
538  //
539  // Note that calling summarize() with writeZeroTimers == false
540  // will still do what it says, even if we insert local timers
541  // with zero call counts here.
542 
543  // This does the correct and inexpensive thing (just copies the
544  // timer data) if numProcs == 1. Otherwise, it initiates a
545  // communication with \f$O(\log P)\f$ messages along the
546  // critical path, where \f$P\f$ is the number of participating
547  // processes.
548  mergeCounterNames (*comm, localTimerNames, globalTimerNames, setOp);
549 
550 #ifdef TEUCHOS_DEBUG
551  {
552  // Sanity check that all processes have the name number of
553  // global timer names.
554  const timer_map_t::size_type myNumGlobalNames = globalTimerNames.size();
555  timer_map_t::size_type minNumGlobalNames = 0;
556  timer_map_t::size_type maxNumGlobalNames = 0;
557  reduceAll (*comm, REDUCE_MIN, myNumGlobalNames,
558  outArg (minNumGlobalNames));
559  reduceAll (*comm, REDUCE_MAX, myNumGlobalNames,
560  outArg (maxNumGlobalNames));
561  TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalNames != maxNumGlobalNames,
562  std::logic_error, "Min # global timer names = " << minNumGlobalNames
563  << " != max # global timer names = " << maxNumGlobalNames
564  << ". Please report this bug to the Teuchos developers.");
565  TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalNames != minNumGlobalNames,
566  std::logic_error, "My # global timer names = " << myNumGlobalNames
567  << " != min # global timer names = " << minNumGlobalNames
568  << ". Please report this bug to the Teuchos developers.");
569  }
570 #endif // TEUCHOS_DEBUG
571 
572  // mergeCounterNames() just merges the counters' names, not
573  // their actual data. Now we need to fill globalTimerData with
574  // this process' timer data for the timers in globalTimerNames.
575  //
576  // All processes need the full list of global timers, since
577  // there may be some global timers that are not local timers.
578  // That's why mergeCounterNames() has to be an all-reduce, not
579  // just a reduction to Proc 0.
580  //
581  // Insertion optimization: if the iterator given to map::insert
582  // points right before where we want to insert, insertion is
583  // O(1). globalTimerNames is sorted, so feeding the iterator
584  // output of map::insert into the next invocation's input should
585  // make the whole insertion O(N) where N is the number of
586  // entries in globalTimerNames.
587  timer_map_t::iterator globalMapIter = globalTimerData.begin();
588  timer_map_t::iterator localMapIter;
589  for (Array<string>::const_iterator it = globalTimerNames.begin();
590  it != globalTimerNames.end(); ++it) {
591  const std::string& globalName = *it;
592  localMapIter = localTimerData.find (globalName);
593 
594  if (localMapIter == localTimerData.end()) {
595  if (alwaysWriteLocal) {
596  // If there are some global timers that are not local
597  // timers, and if we want to print local timers, we insert
598  // a local timer datum with zero elapsed time and zero
599  // call count into localTimerData as well. This will
600  // ensure that both global and local timer columns in the
601  // output table have the same number of rows.
602  //
603  // We really only need to do this on Proc 0, which is the
604  // only process that currently may print local timers.
605  // However, we do it on all processes, just in case
606  // someone later wants to modify this function to print
607  // out local timer data for some process other than Proc
608  // 0. This extra computation won't affect the cost along
609  // the critical path, for future computations in which
610  // Proc 0 participates.
611  localMapIter = localTimerData.insert (localMapIter, makeEmptyTimerDatum (globalName));
612 
613  // Make sure the missing global name gets added to the
614  // list of local names. We'll re-sort the list of local
615  // names below.
616  localTimerNames.push_back (globalName);
617  }
618  // There's a global timer that's not a local timer. Add it
619  // to our pre-merge version of the global timer data so that
620  // we can safely merge the global timer data later.
621  globalMapIter = globalTimerData.insert (globalMapIter, makeEmptyTimerDatum (globalName));
622  }
623  else {
624  // We have this global timer name in our local timer list.
625  // Fill in our pre-merge version of the global timer data
626  // with our local data.
627  globalMapIter = globalTimerData.insert (globalMapIter, std::make_pair (globalName, localMapIter->second));
628  }
629  }
630 
631  if (alwaysWriteLocal) {
632  // Re-sort the list of local timer names, since we may have
633  // inserted "missing" names above.
634  std::sort (localTimerNames.begin(), localTimerNames.end());
635  }
636 
637 #ifdef TEUCHOS_DEBUG
638  {
639  // Sanity check that all processes have the name number of
640  // global timers.
641  const timer_map_t::size_type myNumGlobalTimers = globalTimerData.size();
642  timer_map_t::size_type minNumGlobalTimers = 0;
643  timer_map_t::size_type maxNumGlobalTimers = 0;
644  reduceAll (*comm, REDUCE_MIN, myNumGlobalTimers,
645  outArg (minNumGlobalTimers));
646  reduceAll (*comm, REDUCE_MAX, myNumGlobalTimers,
647  outArg (maxNumGlobalTimers));
648  TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalTimers != maxNumGlobalTimers,
649  std::logic_error, "Min # global timers = " << minNumGlobalTimers
650  << " != max # global timers = " << maxNumGlobalTimers
651  << ". Please report this bug to the Teuchos developers.");
652  TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalTimers != minNumGlobalTimers,
653  std::logic_error, "My # global timers = " << myNumGlobalTimers
654  << " != min # global timers = " << minNumGlobalTimers
655  << ". Please report this bug to the Teuchos developers.");
656  }
657 #endif // TEUCHOS_DEBUG
658  }
659 
706  void
707  computeGlobalTimerStats (stat_map_type& statData,
708  std::vector<std::string>& statNames,
709  Ptr<const Comm<int> > comm,
710  const timer_map_t& globalTimerData,
711  const bool ignoreZeroTimers)
712  {
713  using Teuchos::ScalarTraits;
714 
715  const int numTimers = static_cast<int> (globalTimerData.size());
716  const int numProcs = comm->getSize();
717 
718  // Extract pre-reduction timings and call counts into a
719  // sequential array. This array will be in the same order as
720  // the global timer names are in the map.
721  Array<std::pair<double, int> > timingsAndCallCounts;
722  timingsAndCallCounts.reserve (numTimers);
723  for (timer_map_t::const_iterator it = globalTimerData.begin();
724  it != globalTimerData.end(); ++it) {
725  timingsAndCallCounts.push_back (it->second);
726  }
727 
728  // For each timer name, compute the min timing and its
729  // corresponding call count. If two processes have the same
730  // timing but different call counts, the minimum call count will
731  // be used.
732  Array<std::pair<double, int> > minTimingsAndCallCounts (numTimers);
733  if (numTimers > 0) {
734  if (ignoreZeroTimers)
735  reduceAll (*comm, MinLocNonzero<int, double, int>(), numTimers,
736  &timingsAndCallCounts[0], &minTimingsAndCallCounts[0]);
737  else
738  reduceAll (*comm, MinLoc<int, double, int>(), numTimers,
739  &timingsAndCallCounts[0], &minTimingsAndCallCounts[0]);
740  }
741 
742  // For each timer name, compute the max timing and its
743  // corresponding call count. If two processes have the same
744  // timing but different call counts, the minimum call count will
745  // be used.
746  Array<std::pair<double, int> > maxTimingsAndCallCounts (numTimers);
747  if (numTimers > 0) {
748  reduceAll (*comm, MaxLoc<int, double, int>(), numTimers,
749  &timingsAndCallCounts[0], &maxTimingsAndCallCounts[0]);
750  }
751 
752  // For each timer name, compute the mean-over-processes timing,
753  // the mean call count, and the mean-over-call-counts timing.
754  // The mean call count is reported as a double to allow a
755  // fractional value.
756  //
757  // Each local timing is really the total timing over all local
758  // invocations. The number of local invocations is the call
759  // count. Thus, the mean-over-call-counts timing is the sum of
760  // all the timings (over all processes), divided by the sum of
761  // all the call counts (over all processes). We compute it in a
762  // different way to over unnecessary overflow.
763  Array<double> meanOverCallCountsTimings (numTimers);
764  Array<double> meanOverProcsTimings (numTimers);
765  Array<double> meanCallCounts (numTimers);
766  Array<int> ICallThisTimer (numTimers);
767  Array<int> numProcsCallingEachTimer (numTimers);
768  {
769  // Figure out how many processors actually call each timer.
770  if (ignoreZeroTimers) {
771  for (int k = 0; k < numTimers; ++k) {
772  const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
773  if (callCount > 0) ICallThisTimer[k] = 1;
774  else ICallThisTimer[k] = 0;
775  }
776  if (numTimers > 0) {
777  reduceAll (*comm, REDUCE_SUM, numTimers, &ICallThisTimer[0],
778  &numProcsCallingEachTimer[0]);
779  }
780  }
781 
782  // When summing, first scale by the number of processes. This
783  // avoids unnecessary overflow, and also gives us the mean
784  // call count automatically.
785  Array<double> scaledTimings (numTimers);
786  Array<double> scaledCallCounts (numTimers);
787  const double P = static_cast<double> (numProcs);
788 
789  if (ignoreZeroTimers) {
790  for (int k = 0; k < numTimers; ++k) {
791  const double timing = timingsAndCallCounts[k].first;
792  const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
793 
794  scaledTimings[k] = timing / numProcsCallingEachTimer[k];
795  scaledCallCounts[k] = callCount / numProcsCallingEachTimer[k];
796  }
797  }
798  else {
799  for (int k = 0; k < numTimers; ++k) {
800  const double timing = timingsAndCallCounts[k].first;
801  const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
802 
803  scaledTimings[k] = timing / P;
804  scaledCallCounts[k] = callCount / P;
805  }
806  }
807 
808  if (numTimers > 0) {
809  reduceAll (*comm, REDUCE_SUM, numTimers, &scaledTimings[0],
810  &meanOverProcsTimings[0]);
811  reduceAll (*comm, REDUCE_SUM, numTimers, &scaledCallCounts[0],
812  &meanCallCounts[0]);
813  }
814  // We don't have to undo the scaling for the mean timings;
815  // just divide by the scaled call count.
816  for (int k = 0; k < numTimers; ++k) {
817  if (meanCallCounts[k] > ScalarTraits<double>::zero ()) {
818  meanOverCallCountsTimings[k] = meanOverProcsTimings[k] / meanCallCounts[k];
819  }
820  else {
821  meanOverCallCountsTimings[k] = ScalarTraits<double>::zero ();
822  }
823  }
824  }
825 
826  // Reformat the data into the map of statistics. Be sure that
827  // each value (the std::vector of (timing, call count) pairs,
828  // each entry of which is a different statistic) preserves the
829  // order of statNames.
830  statNames.resize (4);
831  statNames[0] = "MinOverProcs";
832  statNames[1] = "MeanOverProcs";
833  statNames[2] = "MaxOverProcs";
834  statNames[3] = "MeanOverCallCounts";
835 
836  stat_map_type::iterator statIter = statData.end();
837  timer_map_t::const_iterator it = globalTimerData.begin();
838  for (int k = 0; it != globalTimerData.end(); ++k, ++it) {
839  std::vector<std::pair<double, double> > curData (4);
840  curData[0] = minTimingsAndCallCounts[k];
841  curData[1] = std::make_pair (meanOverProcsTimings[k], meanCallCounts[k]);
842  curData[2] = maxTimingsAndCallCounts[k];
843  curData[3] = std::make_pair (meanOverCallCountsTimings[k], meanCallCounts[k]);
844 
845  // statIter gives an insertion location hint that makes each
846  // insertion O(1), since we remember the location of the last
847  // insertion.
848  statIter = statData.insert (statIter, std::make_pair (it->first, curData));
849  }
850  }
851 
852 
869  RCP<const Comm<int> >
870  getDefaultComm ()
871  {
872  // The default communicator. If Trilinos was built with MPI
873  // enabled, this should be MPI_COMM_WORLD. (If MPI has not yet
874  // been initialized, it's not valid to use the communicator!)
875  // Otherwise, this should be a "serial" (no MPI, one "process")
876  // communicator.
877  RCP<const Comm<int> > comm = DefaultComm<int>::getComm ();
878 
879 #ifdef HAVE_MPI
880  {
881  int mpiHasBeenStarted = 0;
882  MPI_Initialized (&mpiHasBeenStarted);
883  if (! mpiHasBeenStarted) {
884  // Make pComm a new "serial communicator."
885  comm = rcp_implicit_cast<const Comm<int> > (rcp (new SerialComm<int> ()));
886  }
887  }
888 #endif // HAVE_MPI
889  return comm;
890  }
891 
892  } // namespace (anonymous)
893 
894 
895  void
897  std::vector<std::string>& statNames,
898  Ptr<const Comm<int> > comm,
899  const ECounterSetOp setOp,
900  const std::string& filter)
901  {
902  // Collect local timer data and names. Filter out timers with
903  // zero call counts if writeZeroTimers is false. Also, apply the
904  // timer label filter at this point, so we don't have to compute
905  // statistics on timers we don't want to display anyway.
906  timer_map_t localTimerData;
907  Array<std::string> localTimerNames;
908  const bool writeZeroTimers = false;
909  collectLocalTimerDataAndNames (localTimerData, localTimerNames,
910  counters(), writeZeroTimers, filter);
911  // Merge the local timer data and names into global timer data and
912  // names.
913  timer_map_t globalTimerData;
914  Array<std::string> globalTimerNames;
915  const bool alwaysWriteLocal = false;
916  collectGlobalTimerData (globalTimerData, globalTimerNames,
917  localTimerData, localTimerNames,
918  comm, alwaysWriteLocal, setOp);
919  // Compute statistics on the data.
920  computeGlobalTimerStats (statData, statNames, comm, globalTimerData, false);
921  }
922 
923 
924  void
926  std::ostream& out,
927  const bool alwaysWriteLocal,
928  const bool writeGlobalStats,
929  const bool writeZeroTimers,
930  const ECounterSetOp setOp,
931  const std::string& filter,
932  const bool ignoreZeroTimers)
933  {
934  //
935  // We can't just call computeGlobalTimerStatistics(), since
936  // summarize() has different options that affect whether global
937  // statistics are computed and printed.
938  //
939  const int numProcs = comm->getSize();
940  const int myRank = comm->getRank();
941 
942  // Collect local timer data and names. Filter out timers with
943  // zero call counts if writeZeroTimers is false. Also, apply the
944  // timer label filter at this point, so we don't have to compute
945  // statistics on timers we don't want to display anyway.
946  timer_map_t localTimerData;
947  Array<std::string> localTimerNames;
948  collectLocalTimerDataAndNames (localTimerData, localTimerNames,
949  counters(), writeZeroTimers, filter);
950 
951  // If we're computing global statistics, merge the local timer
952  // data and names into global timer data and names, and compute
953  // global timer statistics. Otherwise, leave the global data
954  // empty.
955  timer_map_t globalTimerData;
956  Array<std::string> globalTimerNames;
957  stat_map_type statData;
958  std::vector<std::string> statNames;
959  if (writeGlobalStats) {
960  collectGlobalTimerData (globalTimerData, globalTimerNames,
961  localTimerData, localTimerNames,
962  comm, alwaysWriteLocal, setOp);
963  // Compute statistics on the data, but only if the communicator
964  // contains more than one process. Otherwise, statistics don't
965  // make sense and we don't print them (see below).
966  if (numProcs > 1) {
967  computeGlobalTimerStats (statData, statNames, comm, globalTimerData, ignoreZeroTimers);
968  }
969  }
970 
971  // Precision of floating-point numbers in the table.
972  const int precision = format().precision();
973  const std::ios_base::fmtflags& flags = out.flags();
974 
975  // All columns of the table, in order.
976  Array<TableColumn> tableColumns;
977 
978  // Labels of all the columns of the table.
979  // We will append to this when we add each column.
980  Array<std::string> titles;
981 
982  // Widths (in number of characters) of each column.
983  // We will append to this when we add each column.
984  Array<int> columnWidths;
985 
986  // Table column containing all timer names. If writeGlobalStats
987  // is true, we use the global timer names, otherwise we use the
988  // local timer names. We build the table on all processes
989  // redundantly, but only print on Rank 0.
990  {
991  titles.append ("Timer Name");
992 
993  // The column labels depend on whether we are computing global statistics.
994  TableColumn nameCol (writeGlobalStats ? globalTimerNames : localTimerNames);
995  tableColumns.append (nameCol);
996 
997  // Each column is as wide as it needs to be to hold both its
998  // title and all of the column data. This column's title is the
999  // current last entry of the titles array.
1000  columnWidths.append (format().computeRequiredColumnWidth (titles.back(), nameCol));
1001  }
1002 
1003  // Table column containing local timer stats, if applicable. We
1004  // only write local stats if asked, only on MPI Proc 0, and only
1005  // if there is more than one MPI process in the communicator
1006  // (otherwise local stats == global stats, so we just print the
1007  // global stats). In this case, we've padded the local data on
1008  // Proc 0 if necessary to match the global timer list, so that the
1009  // columns have the same number of rows.
1010  if (alwaysWriteLocal && numProcs > 1 && myRank == 0) {
1011  titles.append ("Local time (num calls)");
1012 
1013  // Copy local timer data out of the array-of-structs into
1014  // separate arrays, for display in the table.
1015  Array<double> localTimings;
1016  Array<double> localNumCalls;
1017  for (timer_map_t::const_iterator it = localTimerData.begin();
1018  it != localTimerData.end(); ++it) {
1019  localTimings.push_back (it->second.first);
1020  localNumCalls.push_back (static_cast<double> (it->second.second));
1021  }
1022  TableColumn timeAndCalls (localTimings, localNumCalls, precision, flags, true);
1023  tableColumns.append (timeAndCalls);
1024  columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
1025  }
1026 
1027  if (writeGlobalStats) {
1028  // If there's only 1 process in the communicator, don't display
1029  // statistics; statistics don't make sense in that case. Just
1030  // display the timings and call counts. If there's more than 1
1031  // process, do display statistics.
1032  if (numProcs == 1) {
1033  // Extract timings and the call counts from globalTimerData.
1034  Array<double> globalTimings;
1035  Array<double> globalNumCalls;
1036  for (timer_map_t::const_iterator it = globalTimerData.begin();
1037  it != globalTimerData.end(); ++it) {
1038  globalTimings.push_back (it->second.first);
1039  globalNumCalls.push_back (static_cast<double> (it->second.second));
1040  }
1041  // Print the table column.
1042  titles.append ("Global time (num calls)");
1043  TableColumn timeAndCalls (globalTimings, globalNumCalls, precision, flags, true);
1044  tableColumns.append (timeAndCalls);
1045  columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
1046  }
1047  else { // numProcs > 1
1048  // Print a table column for each statistic. statNames and
1049  // each value in statData use the same ordering, so we can
1050  // iterate over valid indices of statNames to display the
1051  // statistics in the right order.
1052  const timer_map_t::size_type numGlobalTimers = globalTimerData.size();
1053  for (std::vector<std::string>::size_type statInd = 0; statInd < statNames.size(); ++statInd) {
1054  // Extract lists of timings and their call counts for the
1055  // current statistic.
1056  Array<double> statTimings (numGlobalTimers);
1057  Array<double> statCallCounts (numGlobalTimers);
1058  stat_map_type::const_iterator it = statData.begin();
1059  for (int k = 0; it != statData.end(); ++it, ++k) {
1060  statTimings[k] = (it->second[statInd]).first;
1061  statCallCounts[k] = (it->second[statInd]).second;
1062  }
1063  // Print the table column.
1064  const std::string& statisticName = statNames[statInd];
1065  const std::string titleString = statisticName;
1066  titles.append (titleString);
1067  TableColumn timeAndCalls (statTimings, statCallCounts, precision, flags, true);
1068  tableColumns.append (timeAndCalls);
1069  columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
1070  }
1071  }
1072  }
1073 
1074  // Print the whole table to the given output stream on MPI Rank 0.
1075  format().setColumnWidths (columnWidths);
1076  if (myRank == 0) {
1077  std::ostringstream theTitle;
1078  theTitle << "TimeMonitor results over " << numProcs << " processor"
1079  << (numProcs > 1 ? "s" : "");
1080  format().writeWholeTable (out, theTitle.str(), titles, tableColumns);
1081  }
1082  }
1083 
1084  void
1085  TimeMonitor::summarize (std::ostream &out,
1086  const bool alwaysWriteLocal,
1087  const bool writeGlobalStats,
1088  const bool writeZeroTimers,
1089  const ECounterSetOp setOp,
1090  const std::string& filter,
1091  const bool ignoreZeroTimers)
1092  {
1093  // The default communicator. If Trilinos was built with MPI
1094  // enabled, this should be MPI_COMM_WORLD. Otherwise, this should
1095  // be a "serial" (no MPI, one "process") communicator.
1096  RCP<const Comm<int> > comm = getDefaultComm();
1097 
1098  summarize (comm.ptr(), out, alwaysWriteLocal,
1099  writeGlobalStats, writeZeroTimers, setOp, filter, ignoreZeroTimers);
1100  }
1101 
1102  void
1104  std::vector<std::string>& statNames,
1105  const ECounterSetOp setOp,
1106  const std::string& filter)
1107  {
1108  // The default communicator. If Trilinos was built with MPI
1109  // enabled, this should be MPI_COMM_WORLD. Otherwise, this should
1110  // be a "serial" (no MPI, one "process") communicator.
1111  RCP<const Comm<int> > comm = getDefaultComm();
1112 
1113  computeGlobalTimerStatistics (statData, statNames, comm.ptr(), setOp, filter);
1114  }
1115 
1116 
1117  namespace {
1141  std::string
1142  quoteLabelForYaml (const std::string& label)
1143  {
1144  // YAML allows empty keys in key: value pairs. See Section 7.2
1145  // of the YAML 1.2 spec. We thus let an empty label pass
1146  // through without quoting or other special treatment.
1147  if (label.empty ()) {
1148  return label;
1149  }
1150 
1151  // Check whether the label is already quoted. If so, we don't
1152  // need to quote it again. However, we do need to quote any
1153  // quote symbols in the string inside the outer quotes.
1154  const bool alreadyQuoted = label.size () >= 2 &&
1155  label[0] == '"' && label[label.size() - 1] == '"';
1156 
1157  // We need to quote if there are any colons or (inner) quotes in
1158  // the string. We'll determine this as we read through the
1159  // string and escape any characters that need escaping.
1160  bool needToQuote = false;
1161 
1162  std::string out; // To fill with the return value
1163  out.reserve (label.size ());
1164 
1165  const size_t startPos = alreadyQuoted ? 1 : 0;
1166  const size_t endPos = alreadyQuoted ? label.size () - 1 : label.size ();
1167  for (size_t i = startPos; i < endPos; ++i) {
1168  const char c = label[i];
1169  if (c == '"' || c == '\\') {
1170  out.push_back ('\\'); // Escape the quote or backslash.
1171  needToQuote = true;
1172  }
1173  else if (c == ':') {
1174  needToQuote = true;
1175  }
1176  out.push_back (c);
1177  }
1178 
1179  if (needToQuote || alreadyQuoted) {
1180  // If the input string was already quoted, then out doesn't
1181  // include its quotes, so we have to add them back in.
1182  return "\"" + out + "\"";
1183  }
1184  else {
1185  return out;
1186  }
1187  }
1188 
1189  } // namespace (anonymous)
1190 
1191 
1192  void TimeMonitor::
1194  std::ostream &out,
1195  const ETimeMonitorYamlFormat yamlStyle,
1196  const std::string& filter)
1197  {
1198  using Teuchos::FancyOStream;
1199  using Teuchos::fancyOStream;
1200  using Teuchos::getFancyOStream;
1201  using Teuchos::OSTab;
1202  using Teuchos::RCP;
1203  using Teuchos::rcpFromRef;
1204  using std::endl;
1205  typedef std::vector<std::string>::size_type size_type;
1206 
1207  const bool compact = (yamlStyle == YAML_FORMAT_COMPACT);
1208 
1209  // const bool writeGlobalStats = true;
1210  // const bool writeZeroTimers = true;
1211  // const bool alwaysWriteLocal = false;
1212  const ECounterSetOp setOp = Intersection;
1213 
1214  stat_map_type statData;
1215  std::vector<std::string> statNames;
1216  computeGlobalTimerStatistics (statData, statNames, comm, setOp, filter);
1217 
1218  const int numProcs = comm->getSize();
1219 
1220  // HACK (mfh 20 Aug 2012) For some reason, creating OSTab with "-
1221  // " as the line prefix does not work, else I would prefer that
1222  // method for printing each line of a YAML block sequence (see
1223  // Section 8.2.1 of the YAML 1.2 spec).
1224  //
1225  // Also, I have to set the tab indent string here, rather than in
1226  // OSTab's constructor. This is because line prefix (which for
1227  // some reason is what OSTab's constructor takes, rather than tab
1228  // indent string) means something different from tab indent
1229  // string, and turning on the line prefix prints all sorts of
1230  // things including "|" for some reason.
1231  RCP<FancyOStream> pfout = getFancyOStream (rcpFromRef (out));
1232  pfout->setTabIndentStr (" ");
1233  FancyOStream& fout = *pfout;
1234 
1235  fout << "# Teuchos::TimeMonitor report" << endl
1236  << "---" << endl;
1237 
1238  // mfh 19 Aug 2012: An important goal of our chosen output format
1239  // was to minimize the nesting depth. We have managed to keep the
1240  // nesting depth to 3, which is the limit that the current version
1241  // of PylotDB imposes for its YAML input.
1242 
1243  // Outermost level is a dictionary. (Individual entries of a
1244  // dictionary do _not_ begin with "- ".) We always print the
1245  // outermost level in standard style, not flow style, for better
1246  // readability. We begin the outermost level with metadata.
1247  fout << "Output mode: " << (compact ? "compact" : "spacious") << endl
1248  << "Number of processes: " << numProcs << endl
1249  << "Time unit: s" << endl;
1250  // For a key: value pair where the value is a sequence or
1251  // dictionary on the following line, YAML requires a space after
1252  // the colon.
1253  fout << "Statistics collected: ";
1254  // Print list of the names of all the statistics we collected.
1255  if (compact) {
1256  fout << " [";
1257  for (size_type i = 0; i < statNames.size (); ++i) {
1258  fout << quoteLabelForYaml (statNames[i]);
1259  if (i + 1 < statNames.size ()) {
1260  fout << ", ";
1261  }
1262  }
1263  fout << "]" << endl;
1264  }
1265  else {
1266  fout << endl;
1267  OSTab tab1 (pfout);
1268  for (size_type i = 0; i < statNames.size (); ++i) {
1269  fout << "- " << quoteLabelForYaml (statNames[i]) << endl;
1270  }
1271  }
1272 
1273  // Print the list of timer names.
1274  //
1275  // It might be nicer instead to print a map from timer name to all
1276  // of its data, but keeping the maximum nesting depth small
1277  // ensures better compatibility with different parsing tools.
1278  fout << "Timer names: ";
1279  if (compact) {
1280  fout << " [";
1281  size_type ind = 0;
1282  for (stat_map_type::const_iterator it = statData.begin();
1283  it != statData.end(); ++it, ++ind) {
1284  fout << quoteLabelForYaml (it->first);
1285  if (ind + 1 < statData.size ()) {
1286  fout << ", ";
1287  }
1288  }
1289  fout << "]" << endl;
1290  }
1291  else {
1292  fout << endl;
1293  OSTab tab1 (pfout);
1294  for (stat_map_type::const_iterator it = statData.begin();
1295  it != statData.end(); ++it) {
1296  fout << "- " << quoteLabelForYaml (it->first) << endl;
1297  }
1298  }
1299 
1300  // Print times for each timer, as a map from statistic name to its time.
1301  fout << "Total times: ";
1302  if (compact) {
1303  fout << " {";
1304  size_type outerInd = 0;
1305  for (stat_map_type::const_iterator outerIter = statData.begin();
1306  outerIter != statData.end(); ++outerIter, ++outerInd) {
1307  // Print timer name.
1308  fout << quoteLabelForYaml (outerIter->first) << ": ";
1309  // Print that timer's data.
1310  const std::vector<std::pair<double, double> >& curData = outerIter->second;
1311  fout << "{";
1312  for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1313  fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1314  << curData[innerInd].first;
1315  if (innerInd + 1 < curData.size ()) {
1316  fout << ", ";
1317  }
1318  }
1319  fout << "}";
1320  if (outerInd + 1 < statData.size ()) {
1321  fout << ", ";
1322  }
1323  }
1324  fout << "}" << endl;
1325  }
1326  else {
1327  fout << endl;
1328  OSTab tab1 (pfout);
1329  size_type outerInd = 0;
1330  for (stat_map_type::const_iterator outerIter = statData.begin();
1331  outerIter != statData.end(); ++outerIter, ++outerInd) {
1332  // Print timer name.
1333  fout << quoteLabelForYaml (outerIter->first) << ": " << endl;
1334  // Print that timer's data.
1335  OSTab tab2 (pfout);
1336  const std::vector<std::pair<double, double> >& curData = outerIter->second;
1337  for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1338  fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1339  << curData[innerInd].first << endl;
1340  }
1341  }
1342  }
1343 
1344  // Print call counts for each timer, for each statistic name.
1345  fout << "Call counts:";
1346  if (compact) {
1347  fout << " {";
1348  size_type outerInd = 0;
1349  for (stat_map_type::const_iterator outerIter = statData.begin();
1350  outerIter != statData.end(); ++outerIter, ++outerInd) {
1351  // Print timer name.
1352  fout << quoteLabelForYaml (outerIter->first) << ": ";
1353  // Print that timer's data.
1354  const std::vector<std::pair<double, double> >& curData = outerIter->second;
1355  fout << "{";
1356  for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1357  fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1358  << curData[innerInd].second;
1359  if (innerInd + 1 < curData.size ()) {
1360  fout << ", ";
1361  }
1362  }
1363  fout << "}";
1364  if (outerInd + 1 < statData.size ()) {
1365  fout << ", ";
1366  }
1367  }
1368  fout << "}" << endl;
1369  }
1370  else {
1371  fout << endl;
1372  OSTab tab1 (pfout);
1373  size_type outerInd = 0;
1374  for (stat_map_type::const_iterator outerIter = statData.begin();
1375  outerIter != statData.end(); ++outerIter, ++outerInd) {
1376  // Print timer name.
1377  fout << quoteLabelForYaml (outerIter->first) << ": " << endl;
1378  // Print that timer's data.
1379  OSTab tab2 (pfout);
1380  const std::vector<std::pair<double, double> >& curData = outerIter->second;
1381  for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1382  fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1383  << curData[innerInd].second << endl;
1384  }
1385  }
1386  }
1387  }
1388 
1389  void TimeMonitor::
1390  summarizeToYaml (std::ostream &out,
1391  const ETimeMonitorYamlFormat yamlStyle,
1392  const std::string& filter)
1393  {
1394  // The default communicator. If Trilinos was built with MPI
1395  // enabled, this should be MPI_COMM_WORLD. Otherwise, this should
1396  // be a "serial" (no MPI, one "process") communicator.
1397  RCP<const Comm<int> > comm = getDefaultComm ();
1398 
1399  summarizeToYaml (comm.ptr (), out, yamlStyle, filter);
1400  }
1401 
1402  // Default value is false. We'll set to true once
1403  // setReportParameters() completes successfully.
1404  bool TimeMonitor::setParams_ = false;
1405 
1406  // We have to declare all of these here in order to avoid linker errors.
1410  bool TimeMonitor::alwaysWriteLocal_ = false;
1411  bool TimeMonitor::writeGlobalStats_ = true;
1412  bool TimeMonitor::writeZeroTimers_ = true;
1413 
1414  void
1416  {
1417  const std::string name ("Report format");
1418  const std::string defaultValue ("Table");
1419  const std::string docString ("Output format for report of timer statistics");
1420  Array<std::string> strings;
1421  Array<std::string> docs;
1423 
1424  strings.push_back ("YAML");
1425  docs.push_back ("YAML (see yaml.org) format");
1426  values.push_back (REPORT_FORMAT_YAML);
1427  strings.push_back ("Table");
1428  docs.push_back ("Tabular format via Teuchos::TableFormat");
1429  values.push_back (REPORT_FORMAT_TABLE);
1430 
1431  setStringToIntegralParameter<ETimeMonitorReportFormat> (name, defaultValue,
1432  docString,
1433  strings (), docs (),
1434  values (), &plist);
1435  }
1436 
1437  void
1439  {
1440  const std::string name ("YAML style");
1441  const std::string defaultValue ("spacious");
1442  const std::string docString ("YAML-specific output format");
1443  Array<std::string> strings;
1444  Array<std::string> docs;
1446 
1447  strings.push_back ("compact");
1448  docs.push_back ("Compact format: use \"flow style\" (see YAML 1.2 spec at "
1449  "yaml.org) for most sequences except the outermost sequence");
1450  values.push_back (YAML_FORMAT_COMPACT);
1451 
1452  strings.push_back ("spacious");
1453  docs.push_back ("Spacious format: avoid flow style");
1455 
1456  setStringToIntegralParameter<ETimeMonitorYamlFormat> (name, defaultValue,
1457  docString,
1458  strings (), docs (),
1459  values (), &plist);
1460  }
1461 
1462  void
1464  {
1465  const std::string name ("How to merge timer sets");
1466  const std::string defaultValue ("Intersection");
1467  const std::string docString ("How to merge differing sets of timers "
1468  "across processes");
1469  Array<std::string> strings;
1470  Array<std::string> docs;
1471  Array<ECounterSetOp> values;
1472 
1473  strings.push_back ("Intersection");
1474  docs.push_back ("Compute intersection of timer sets over processes");
1475  values.push_back (Intersection);
1476  strings.push_back ("Union");
1477  docs.push_back ("Compute union of timer sets over processes");
1478  values.push_back (Union);
1479 
1480  setStringToIntegralParameter<ECounterSetOp> (name, defaultValue, docString,
1481  strings (), docs (), values (),
1482  &plist);
1483  }
1484 
1485  void
1487  {
1488  stackedTimer_ = t;
1489  }
1490 
1493  {
1494  return stackedTimer_;
1495  }
1496 
1499  {
1500  // Our implementation favors recomputation over persistent
1501  // storage. That is, we simply recreate the list every time we
1502  // need it.
1503  RCP<ParameterList> plist = parameterList ("TimeMonitor::report");
1504 
1505  const bool alwaysWriteLocal = false;
1506  const bool writeGlobalStats = true;
1507  const bool writeZeroTimers = true;
1508 
1509  setReportFormatParameter (*plist);
1510  setYamlFormatParameter (*plist);
1511  setSetOpParameter (*plist);
1512  plist->set ("alwaysWriteLocal", alwaysWriteLocal,
1513  "Always output local timers' values on Proc 0");
1514  plist->set ("writeGlobalStats", writeGlobalStats, "Always output global "
1515  "statistics, even if there is only one process in the "
1516  "communicator");
1517  plist->set ("writeZeroTimers", writeZeroTimers, "Generate output for "
1518  "timers that have never been called");
1519 
1520  return rcp_const_cast<const ParameterList> (plist);
1521  }
1522 
1523  void
1525  {
1528  ECounterSetOp setOp = Intersection;
1529  bool alwaysWriteLocal = false;
1530  bool writeGlobalStats = true;
1531  bool writeZeroTimers = true;
1532 
1533  if (params.is_null ()) {
1534  // If we've set parameters before, leave their current values.
1535  // Otherwise, set defaults (below).
1536  if (setParams_) {
1537  return;
1538  }
1539  }
1540  else { // params is nonnull. Let's read it!
1541  params->validateParametersAndSetDefaults (*getValidReportParameters ());
1542 
1543  reportFormat = getIntegralValue<ETimeMonitorReportFormat> (*params, "Report format");
1544  yamlStyle = getIntegralValue<ETimeMonitorYamlFormat> (*params, "YAML style");
1545  setOp = getIntegralValue<ECounterSetOp> (*params, "How to merge timer sets");
1546  alwaysWriteLocal = params->get<bool> ("alwaysWriteLocal");
1547  writeGlobalStats = params->get<bool> ("writeGlobalStats");
1548  writeZeroTimers = params->get<bool> ("writeZeroTimers");
1549  }
1550  // Defer setting state until here, to ensure the strong exception
1551  // guarantee for this method (either it throws with no externally
1552  // visible state changes, or it returns normally).
1553  reportFormat_ = reportFormat;
1554  yamlStyle_ = yamlStyle;
1555  setOp_ = setOp;
1556  alwaysWriteLocal_ = alwaysWriteLocal;
1557  writeGlobalStats_ = writeGlobalStats;
1558  writeZeroTimers_ = writeZeroTimers;
1559 
1560  setParams_ = true; // Yay, we successfully set parameters!
1561  }
1562 
1563  void
1565  std::ostream& out,
1566  const std::string& filter,
1567  const RCP<ParameterList>& params)
1568  {
1569  setReportParameters (params);
1570 
1572  summarizeToYaml (comm, out, yamlStyle_, filter);
1573  }
1574  else if (reportFormat_ == REPORT_FORMAT_TABLE) {
1576  writeZeroTimers_, setOp_, filter);
1577  }
1578  else {
1579  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "TimeMonitor::report: "
1580  "Invalid report format. This should never happen; ParameterList "
1581  "validation should have caught this. Please report this bug to the "
1582  "Teuchos developers.");
1583  }
1584  }
1585 
1586  void
1588  std::ostream& out,
1589  const RCP<ParameterList>& params)
1590  {
1591  report (comm, out, "", params);
1592  }
1593 
1594  void
1595  TimeMonitor::report (std::ostream& out,
1596  const std::string& filter,
1597  const RCP<ParameterList>& params)
1598  {
1599  RCP<const Comm<int> > comm = getDefaultComm ();
1600  report (comm.ptr (), out, filter, params);
1601  }
1602 
1603  void
1604  TimeMonitor::report (std::ostream& out,
1605  const RCP<ParameterList>& params)
1606  {
1607  RCP<const Comm<int> > comm = getDefaultComm ();
1608  report (comm.ptr (), out, "", params);
1609  }
1610 
1611 } // namespace Teuchos
static bool alwaysWriteLocal_
Whether report() should always report Proc 0&#39;s local timer results.
static void setSetOpParameter(ParameterList &plist)
Add the &quot;How to merge timer sets&quot; parameter to plist.
Array< T > & append(const T &x)
Add a new entry at the end of the array.
static ETimeMonitorYamlFormat yamlStyle_
std::map< std::string, std::vector< std::pair< double, double > > > stat_map_type
Global statistics collected from timer data.
static void setReportFormatParameter(ParameterList &plist)
Add the &quot;Report format&quot; parameter to plist.
void setColumnWidths(const Array< int > &colWidths)
Set the column widths to be used for subsequent rows.
static ECounterSetOp setOp_
Whether report() should use the intersection or union of timers over processes.
const Time & counter() const
Constant access to the instance&#39;s counter reference.
TimeMonitor(Time &timer, bool reset=false)
Constructor: starts the timer.
void writeWholeTable(std::ostream &out, const std::string &tableTitle, const Array< std::string > &columnNames, const Array< TableColumn > &columns) const
bool nonnull(const std::shared_ptr< T > &p)
Returns true if p.get()!=NULL.
~TimeMonitor()
Destructor: stops the timer.
static RCP< Time > lookupCounter(const std::string &name)
Return the first counter with the given name, or null if none.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
static void disableTimer(const std::string &name)
Disable the timer with the given name.
static bool writeZeroTimers_
Whether report() should report timers with zero call counts.
void reduce(const Ordinal count, const std::pair< double, int > inBuffer[], std::pair< double, int > inoutBuffer[]) const
Teuchos version of MPI_MINLOC.
void reduce(const Ordinal count, const std::pair< ScalarType, IndexType > inBuffer[], std::pair< ScalarType, IndexType > inoutBuffer[]) const
std::map< std::string, std::pair< double, int > > timer_map_t
T * get() const
Get the raw C++ pointer to the underlying object.
static Teuchos::RCP< const Comm< OrdinalType > > getComm()
Return the default global communicator.
Tabbing class for helping to create formated, indented output for a basic_FancyOStream object...
static RCP< const ParameterList > getValidReportParameters()
Default parameters (with validators) for report().
basic_OSTab< char > OSTab
This structure defines some basic traits for a scalar field type.
Teuchos version of MPI_MAXLOC.
Base interface class for user-defined reduction operations for objects that use value semantics...
void reduce(const Ordinal count, const std::pair< double, int > inBuffer[], std::pair< double, int > inoutBuffer[]) const
static Teuchos::RCP< Teuchos::StackedTimer > stackedTimer_
Stacked timer for optional injetion of timing from TimeMontior enabled objects.
void reduce(const Ordinal count, const std::pair< ScalarType, IndexType > inBuffer[], std::pair< ScalarType, IndexType > inoutBuffer[]) const
void start(bool reset=false)
Start the timer, if the timer is enabled (see disable()).
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Deprecated.
A column of TableEntry objects.
static void summarize(Ptr< const Comm< int > > comm, std::ostream &out=std::cout, const bool alwaysWriteLocal=false, const bool writeGlobalStats=true, const bool writeZeroTimers=true, const ECounterSetOp setOp=Intersection, const std::string &filter="", const bool ignoreZeroTimers=false)
Print summary statistics for all timers on the given communicator.
double stop()
Stop the timer, if the timer is enabled (see disable()).
std::ostream subclass that performs the magic of indenting data sent to an std::ostream object among ...
Wall-clock timer.
Ptr< T > ptr() const
Get a safer wrapper raw C++ pointer to the underlying object.
int precision() const
Get the precision for writing doubles. Default is 4.
static void computeGlobalTimerStatistics(stat_map_type &statData, std::vector< std::string > &statNames, Ptr< const Comm< int > > comm, const ECounterSetOp setOp=Intersection, const std::string &filter="")
Compute global timer statistics for all timers on the given communicator.
static void summarizeToYaml(Ptr< const Comm< int > > comm, std::ostream &out, const ETimeMonitorYamlFormat yamlStyle, const std::string &filter="")
Like summarize(), but with YAML-format output.
static ETimeMonitorReportFormat reportFormat_
Parameters for the report() class method.
static void setStackedTimer(const Teuchos::RCP< Teuchos::StackedTimer > &t)
Sets the StackedTimer that the TimeMonitor will use to insert timings into.
Provides utilities for formatting tabular output.
void reduce(const Ordinal count, const std::pair< ScalarType, IndexType > inBuffer[], std::pair< ScalarType, IndexType > inoutBuffer[]) const
TEUCHOS_DEPRECATED void reduceAll(const Comm< Ordinal > &comm, const EReductionType reductType, const Packet &send, Packet *globalReduct)
Deprecated .
void reduce(const Ordinal count, const std::pair< double, int > inBuffer[], std::pair< double, int > inoutBuffer[]) const
void mergeCounterNames(const Comm< int > &comm, const Array< std::string > &localNames, Array< std::string > &globalNames, const ECounterSetOp setOp)
Merge counter names over all processors.
const std::string & name() const
The name of this timer.
A list of parameters of arbitrary type.
static void setReportParameters(const RCP< ParameterList > &params)
Set parameters for report(). Call only from report().
ETimeMonitorReportFormat
Valid output formats for report().
static std::map< std::string, RCP< Time > > & counters()
Array of all counters that were created with getNewCounter() on the calling (MPI) process...
reference back()
Abstract interface for distributed-memory communication.
basic_FancyOStream< char > FancyOStream
void push_back(const value_type &x)
ETimeMonitorYamlFormat
Valid YAML output formats for report().
bool isRecursiveCall() const
Whether we are currently in a recursive call of the counter.
static const Teuchos::RCP< Teuchos::StackedTimer > & getStackedTimer()
Returns the StackedTimer used by the TimeMonitor.
size_type size() const
Defines basic traits for the scalar field type.
static T zero()
Returns representation of zero for this scalar type.
Scope protection wrapper for Teuchos::Time, with timer reporting functionality.
Smart reference counting pointer class for automatic garbage collection.
This class allows one to push and pop timers on and off a stack.
static bool setParams_
Whether setReportParameters() completed successfully.
ECounterSetOp
Set operation type for mergeCounterNames() to perform.
Common capabilities for collecting and reporting performance data across processors.
static void setYamlFormatParameter(ParameterList &plist)
Add the &quot;YAML style&quot; parameter to plist.
same as MinLoc, but don&#39;t allow zero
Simple wrapper class for raw pointers to single objects where no persisting relationship exists...
static void enableTimer(const std::string &name)
Enable the timer with the given name.
static TableFormat & format()
Table format that will be used to print a summary of timer results.
static void zeroOutTimers()
Reset all global timers to zero.
static void report(Ptr< const Comm< int > > comm, std::ostream &out, const std::string &filter, const RCP< ParameterList > &params=null)
Report timer statistics to the given output stream.
bool is_null() const
Returns true if the underlying pointer is null.