Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_KokkosTeuchosTimerInjection.cpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
11 #include "TpetraCore_config.h"
13 #include "Kokkos_Core.hpp"
14 #include "Teuchos_TimeMonitor.hpp"
15 #include "Teuchos_Time.hpp"
16 #include "Teuchos_RCP.hpp"
17 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
18 #include "Teuchos_StackedTimer.hpp"
19 #include <sstream>
20 #endif
21 #include <string>
22 
24 
25 
26 
27 namespace {
28  // Get a useful label from the deviceId
29  // NOTE: Relevant code is in: kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp
30  std::string deviceIdToString(const uint32_t deviceId) {
31  using namespace Kokkos::Tools::Experimental;
32  std::string device_label("(");
33  ExecutionSpaceIdentifier eid = identifier_from_devid(deviceId);
34  if (eid.type == DeviceType::Serial) device_label+="Serial";
35  else if (eid.type == DeviceType::OpenMP) device_label+="OpenMP";
36  else if (eid.type == DeviceType::Cuda) device_label+="Cuda";
37  else if (eid.type == DeviceType::HIP) device_label+="HIP";
38  else if (eid.type == DeviceType::OpenMPTarget) device_label+="OpenMPTarget";
39  else if (eid.type == DeviceType::HPX) device_label+="HPX";
40  else if (eid.type == DeviceType::Threads) device_label+="Threads";
41  else if (eid.type == DeviceType::SYCL) device_label+="SYCL";
42  else if (eid.type == DeviceType::OpenACC) device_label+="OpenACC";
43  else if (eid.type == DeviceType::Unknown) device_label+="Unknown";
44  else device_label+="Unknown to Tpetra";
45 #if KOKKOS_VERSION >= 40499
46  if(eid.instance_id == int_for_synchronization_reason(SpecialSynchronizationCases::GlobalDeviceSynchronization))
47  device_label += " All Instances)";
48  else if(eid.instance_id == int_for_synchronization_reason(SpecialSynchronizationCases::DeepCopyResourceSynchronization))
49  device_label += " DeepCopyResource)";
50 #else
51  if(eid.instance_id == Impl::int_for_synchronization_reason(SpecialSynchronizationCases::GlobalDeviceSynchronization))
52  device_label += " All Instances)";
53  else if(eid.instance_id == Impl::int_for_synchronization_reason(SpecialSynchronizationCases::DeepCopyResourceSynchronization))
54  device_label += " DeepCopyResource)";
55 #endif
56  else
57  device_label += " Instance " + std::to_string(eid.instance_id) + ")";
58 
59  return device_label;
60  }
61 
62  void overlappingWarning() {
63  std::ostringstream warning;
64  warning <<
65  "\n*********************************************************************\n"
66  "WARNING: Overlapping timers detected!\n"
67  "A TimeMonitor timer was stopped before a nested subtimer was\n"
68  "stopped. This is not allowed by the StackedTimer. This corner case\n"
69  "typically occurs if the TimeMonitor is stored in an RCP and the RCP is\n"
70  "assigned to a new timer. To disable this warning, either fix the\n"
71  "ordering of timer creation and destuction or disable the StackedTimer\n";
72  std::cout << warning.str() << std::endl;
73  }
74 
75 }// anonymous space
76 
77 
78 namespace Tpetra {
79 namespace Details {
80 
81  namespace DeepCopyTimerInjection {
82  Teuchos::RCP<Teuchos::Time> timer_;
83  std::string label_;
84  bool initialized_ = false;
85 
86  void kokkosp_begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle, const char* dst_name, const void* dst_ptr,
87  Kokkos::Tools::SpaceHandle src_handle, const char* src_name, const void* src_ptr,
88  uint64_t size) {
89  // In verbose mode, we add the src/dst names as well
90  std::string extra_label;
92  extra_label = std::string(" {") + src_name + "=>" + dst_name + "}";
94  extra_label = std::string(" {") + src_name + "=>" + dst_name + "," + std::to_string(size)+"}";
95  }
96 
97  if(timer_ != Teuchos::null)
98  std::cout << "WARNING: Kokkos::deep_copy() started within another Kokkos::deep_copy(). Timers will be in error"<<std::endl;
99 
100  // If the src_name is "Scalar" or "(none)" then we're doing a "Fill" style copy from host to devices, which we want to record separately.
101  if(!strcmp(src_name,"Scalar") || !strcmp(src_name,"(none)"))
102  label_ = std::string("Kokkos::deep_copy_scalar [")+src_handle.name+"=>"+dst_handle.name+"]" + extra_label;
103  // If the size is under 65 bytes, we're going to flag this as "small" to make it easier to watch the big stuff
104  else if(size <= 64)
105  label_ = std::string("Kokkos::deep_copy_small [")+src_handle.name+"=>"+dst_handle.name+"]" + extra_label;
106  else
107  label_ = std::string("Kokkos::deep_copy [")+src_handle.name+"=>"+dst_handle.name+"]" + extra_label;
108 
109 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
110  if (!Teuchos::TimeMonitor::stackedTimerNameIsDefault()) {
111  const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
112  stackedTimer->start(label_);
113  } else
114 #endif
115  {
116  timer_ = Teuchos::TimeMonitor::getNewTimer(label_);
117  timer_->start();
118  timer_->incrementNumCalls();
119  }
120  }
121 
122  void kokkosp_end_deep_copy() {
123 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
124  if (!Teuchos::TimeMonitor::stackedTimerNameIsDefault()) {
125  try {
126  const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
127  stackedTimer->stop(label_);
128  return;
129  }
130  catch (std::runtime_error&) {
131  overlappingWarning();
132  Teuchos::TimeMonitor::setStackedTimer(Teuchos::null);
133  }
134  } else
135 #endif
136  {
137  if (timer_ != Teuchos::null) {
138  timer_->stop();
139  }
140  timer_ = Teuchos::null;
141  }
142  }
143 
144  }// end DeepCopyTimerInjection
145 
146  void AddKokkosDeepCopyToTimeMonitor(bool force) {
147  if (!DeepCopyTimerInjection::initialized_) {
150  Kokkos::Tools::Experimental::set_begin_deep_copy_callback(DeepCopyTimerInjection::kokkosp_begin_deep_copy);
151  Kokkos::Tools::Experimental::set_end_deep_copy_callback(DeepCopyTimerInjection::kokkosp_end_deep_copy);
152  DeepCopyTimerInjection::initialized_=true;
153  }
154  }
155  }
156 
157 
158  namespace FenceTimerInjection {
159  Teuchos::RCP<Teuchos::Time> timer_;
160  bool initialized_ = false;
161  uint64_t active_handle;
162  std::string label_ = "";
163 
164  void kokkosp_begin_fence(const char* name, const uint32_t deviceId,
165  uint64_t* handle) {
166 
167  // Nested fences are not allowed
168  if(!label_.empty())
169  return;
170 
171  active_handle = (active_handle+1) % 1024;
172  *handle = active_handle;
173 
174  std::string device_label = deviceIdToString(deviceId);
175 
176  label_ = std::string("Kokkos::fence ")+name + " " + device_label;
177 
178 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
179  if (!Teuchos::TimeMonitor::stackedTimerNameIsDefault()) {
180  const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
181  stackedTimer->start(label_);
182  } else
183 #endif
184  {
185  timer_ = Teuchos::TimeMonitor::getNewTimer(label_);
186  timer_->start();
187  timer_->incrementNumCalls();
188  }
189  }
190 
191 
192  void kokkosp_end_fence(const uint64_t handle) {
193  if(handle == active_handle) {
194 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
195  if (!Teuchos::TimeMonitor::stackedTimerNameIsDefault()) {
196  try {
197  const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
198  stackedTimer->stop(label_);
199  label_ = "";
200  return;
201  }
202  catch (std::runtime_error&) {
203  overlappingWarning();
204  Teuchos::TimeMonitor::setStackedTimer(Teuchos::null);
205  }
206  } else
207 #endif
208  {
209  if (timer_ != Teuchos::null) {
210  timer_->stop();
211  }
212  timer_ = Teuchos::null;
213  }
214  }
215  // Else: We've nested our fences, and we need to ignore the inner fences
216  }
217 
218 
219  }//end FenceTimerInjection
220 
221  void AddKokkosFenceToTimeMonitor(bool force) {
222  if (!FenceTimerInjection::initialized_) {
224  Kokkos::Tools::Experimental::set_begin_fence_callback(FenceTimerInjection::kokkosp_begin_fence);
225  Kokkos::Tools::Experimental::set_end_fence_callback(FenceTimerInjection::kokkosp_end_fence);
226  FenceTimerInjection::initialized_=true;
227  }
228  }
229  }
230 
231 
232  namespace FunctionsTimerInjection {
233  Teuchos::RCP<Teuchos::Time> timer_;
234  bool initialized_ = false;
235  std::string label_;
236 
237  void kokkosp_begin_kernel(const char* kernelName, const char* kernelPrefix, const uint32_t devID,
238  uint64_t* kernelID) {
239  // Nested fences are not allowed
240  if(timer_ != Teuchos::null)
241  return;
242  std::string device_label = deviceIdToString(devID);
243 
244  label_ = std::string("Kokkos::")+ kernelName + " " +kernelPrefix + " " + device_label;
245 
246 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
247  if (!Teuchos::TimeMonitor::stackedTimerNameIsDefault()) {
248  const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
249  stackedTimer->start(label_);
250  } else
251 #endif
252  {
253  timer_ = Teuchos::TimeMonitor::getNewTimer(label_);
254  timer_->start();
255  timer_->incrementNumCalls();
256  }
257  }
258 
259  void kokkosp_begin_for(const char* kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
260  kokkosp_begin_kernel("parallel_for",kernelPrefix,devID,kernelID);
261  }
262 
263  void kokkosp_begin_scan(const char* kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
264  kokkosp_begin_kernel("parallel_scan",kernelPrefix,devID,kernelID);
265  }
266 
267  void kokkosp_begin_reduce(const char* kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
268  kokkosp_begin_kernel("parallel_reduce",kernelPrefix,devID,kernelID);
269  }
270 
271  void kokkosp_end_kernel(const uint64_t handle) {
272 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
273  if (!Teuchos::TimeMonitor::stackedTimerNameIsDefault()) {
274  try {
275  const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
276  stackedTimer->stop(label_);
277  return;
278  }
279  catch (std::runtime_error&) {
280  overlappingWarning();
281  Teuchos::TimeMonitor::setStackedTimer(Teuchos::null);
282  }
283  } else
284 #endif
285  {
286  if (timer_ != Teuchos::null) {
287  timer_->stop();
288  }
289  timer_ = Teuchos::null;
290  }
291  }
292  }//end FunctionsInjection
293 
294  void AddKokkosFunctionsToTimeMonitor(bool force) {
295  if (!FunctionsTimerInjection::initialized_) {
297  Kokkos::Tools::Experimental::set_begin_parallel_for_callback(FunctionsTimerInjection::kokkosp_begin_for);
298  Kokkos::Tools::Experimental::set_begin_parallel_reduce_callback(FunctionsTimerInjection::kokkosp_begin_reduce);
299  Kokkos::Tools::Experimental::set_begin_parallel_scan_callback(FunctionsTimerInjection::kokkosp_begin_scan);
300 
301  // The end-call is generic, even though the start-call is not.
302  Kokkos::Tools::Experimental::set_end_parallel_for_callback(FunctionsTimerInjection::kokkosp_end_kernel);
303  Kokkos::Tools::Experimental::set_end_parallel_reduce_callback(FunctionsTimerInjection::kokkosp_end_kernel);
304  Kokkos::Tools::Experimental::set_end_parallel_scan_callback(FunctionsTimerInjection::kokkosp_end_kernel);
305  FunctionsTimerInjection::initialized_=true;
306  }
307  }
308  }
309 
310 
311 
312 } // namespace Details
313 } // namespace Tpetra
static bool timeKokkosFunctions()
Add Teuchos timers for all host calls to Kokkos::parallel_for(), Kokkos::parallel_reduce() and Kokkos...
Declaration functions that use Kokkos&#39; profiling library to add deep copies between memory spaces...
static bool timeKokkosDeepCopyVerbose2()
Adds verbose output to Kokkos deep_copy timers by appending source, destination, and size...
static bool timeKokkosDeepCopyVerbose1()
Adds verbose output to Kokkos deep_copy timers by appending source and destination. This is especially useful for identifying host/device data transfers.
static bool timeKokkosFence()
Add Teuchos timers for all host calls to Kokkos::fence().
static bool timeKokkosDeepCopy()
Add Teuchos timers for all host calls to Kokkos::deep_copy(). This is especially useful for identifyi...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.