Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_KokkosTeuchosTimerInjection.cpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
11 #include "TpetraCore_config.h"
13 #include "Kokkos_Core.hpp"
14 #include "Teuchos_TimeMonitor.hpp"
15 #include "Teuchos_Time.hpp"
16 #include "Teuchos_RCP.hpp"
17 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
18 #include "Teuchos_StackedTimer.hpp"
19 #include <sstream>
20 #endif
21 #include <string>
22 
24 
25 namespace {
26 // Get a useful label from the deviceId
27 // NOTE: Relevant code is in: kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp
28 std::string deviceIdToString(const uint32_t deviceId) {
29  using namespace Kokkos::Tools::Experimental;
30  std::string device_label("(");
31  ExecutionSpaceIdentifier eid = identifier_from_devid(deviceId);
32  if (eid.type == DeviceType::Serial)
33  device_label += "Serial";
34  else if (eid.type == DeviceType::OpenMP)
35  device_label += "OpenMP";
36  else if (eid.type == DeviceType::Cuda)
37  device_label += "Cuda";
38  else if (eid.type == DeviceType::HIP)
39  device_label += "HIP";
40  else if (eid.type == DeviceType::OpenMPTarget)
41  device_label += "OpenMPTarget";
42  else if (eid.type == DeviceType::HPX)
43  device_label += "HPX";
44  else if (eid.type == DeviceType::Threads)
45  device_label += "Threads";
46  else if (eid.type == DeviceType::SYCL)
47  device_label += "SYCL";
48  else if (eid.type == DeviceType::OpenACC)
49  device_label += "OpenACC";
50  else if (eid.type == DeviceType::Unknown)
51  device_label += "Unknown";
52  else
53  device_label += "Unknown to Tpetra";
54  if (eid.instance_id == int_for_synchronization_reason(SpecialSynchronizationCases::GlobalDeviceSynchronization))
55  device_label += " All Instances)";
56  else if (eid.instance_id == int_for_synchronization_reason(SpecialSynchronizationCases::DeepCopyResourceSynchronization))
57  device_label += " DeepCopyResource)";
58  else
59  device_label += " Instance " + std::to_string(eid.instance_id) + ")";
60 
61  return device_label;
62 }
63 
64 void overlappingWarning() {
65  std::ostringstream warning;
66  warning << "\n*********************************************************************\n"
67  "WARNING: Overlapping timers detected!\n"
68  "A TimeMonitor timer was stopped before a nested subtimer was\n"
69  "stopped. This is not allowed by the StackedTimer. This corner case\n"
70  "typically occurs if the TimeMonitor is stored in an RCP and the RCP is\n"
71  "assigned to a new timer. To disable this warning, either fix the\n"
72  "ordering of timer creation and destuction or disable the StackedTimer\n";
73  std::cout << warning.str() << std::endl;
74 }
75 
76 } // namespace
77 
78 namespace Tpetra {
79 namespace Details {
80 
81 namespace DeepCopyTimerInjection {
82 Teuchos::RCP<Teuchos::Time> timer_;
83 std::string label_;
84 bool initialized_ = false;
85 
86 void kokkosp_begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle, const char* dst_name, const void* dst_ptr,
87  Kokkos::Tools::SpaceHandle src_handle, const char* src_name, const void* src_ptr,
88  uint64_t size) {
89  // In verbose mode, we add the src/dst names as well
90  std::string extra_label;
92  extra_label = std::string(" {") + src_name + "=>" + dst_name + "}";
94  extra_label = std::string(" {") + src_name + "=>" + dst_name + "," + std::to_string(size) + "}";
95  }
96 
97  if (timer_ != Teuchos::null)
98  std::cout << "WARNING: Kokkos::deep_copy() started within another Kokkos::deep_copy(). Timers will be in error" << std::endl;
99 
100  // If the src_name is "Scalar" or "(none)" then we're doing a "Fill" style copy from host to devices, which we want to record separately.
101  if (!strcmp(src_name, "Scalar") || !strcmp(src_name, "(none)"))
102  label_ = std::string("Kokkos::deep_copy_scalar [") + src_handle.name + "=>" + dst_handle.name + "]" + extra_label;
103  // If the size is under 65 bytes, we're going to flag this as "small" to make it easier to watch the big stuff
104  else if (size <= 64)
105  label_ = std::string("Kokkos::deep_copy_small [") + src_handle.name + "=>" + dst_handle.name + "]" + extra_label;
106  else
107  label_ = std::string("Kokkos::deep_copy [") + src_handle.name + "=>" + dst_handle.name + "]" + extra_label;
108 
109 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
110  if (!Teuchos::TimeMonitor::stackedTimerNameIsDefault()) {
111  const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
112  stackedTimer->start(label_);
113  } else
114 #endif
115  {
116  timer_ = Teuchos::TimeMonitor::getNewTimer(label_);
117  timer_->start();
118  timer_->incrementNumCalls();
119  }
120 }
121 
122 void kokkosp_end_deep_copy() {
123 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
124  if (!Teuchos::TimeMonitor::stackedTimerNameIsDefault()) {
125  try {
126  const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
127  stackedTimer->stop(label_);
128  return;
129  } catch (std::runtime_error&) {
130  overlappingWarning();
131  Teuchos::TimeMonitor::setStackedTimer(Teuchos::null);
132  }
133  } else
134 #endif
135  {
136  if (timer_ != Teuchos::null) {
137  timer_->stop();
138  }
139  timer_ = Teuchos::null;
140  }
141 }
142 
143 } // namespace DeepCopyTimerInjection
144 
145 void AddKokkosDeepCopyToTimeMonitor(bool force) {
146  if (!DeepCopyTimerInjection::initialized_) {
148  Kokkos::Tools::Experimental::set_begin_deep_copy_callback(DeepCopyTimerInjection::kokkosp_begin_deep_copy);
149  Kokkos::Tools::Experimental::set_end_deep_copy_callback(DeepCopyTimerInjection::kokkosp_end_deep_copy);
150  DeepCopyTimerInjection::initialized_ = true;
151  }
152  }
153 }
154 
155 namespace FenceTimerInjection {
156 Teuchos::RCP<Teuchos::Time> timer_;
157 bool initialized_ = false;
158 uint64_t active_handle;
159 std::string label_ = "";
160 
161 void kokkosp_begin_fence(const char* name, const uint32_t deviceId,
162  uint64_t* handle) {
163  // Nested fences are not allowed
164  if (!label_.empty())
165  return;
166 
167  active_handle = (active_handle + 1) % 1024;
168  *handle = active_handle;
169 
170  std::string device_label = deviceIdToString(deviceId);
171 
172  label_ = std::string("Kokkos::fence ") + name + " " + device_label;
173 
174 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
175  if (!Teuchos::TimeMonitor::stackedTimerNameIsDefault()) {
176  const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
177  stackedTimer->start(label_);
178  } else
179 #endif
180  {
181  timer_ = Teuchos::TimeMonitor::getNewTimer(label_);
182  timer_->start();
183  timer_->incrementNumCalls();
184  }
185 }
186 
187 void kokkosp_end_fence(const uint64_t handle) {
188  if (handle == active_handle) {
189 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
190  if (!Teuchos::TimeMonitor::stackedTimerNameIsDefault()) {
191  try {
192  const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
193  stackedTimer->stop(label_);
194  label_ = "";
195  return;
196  } catch (std::runtime_error&) {
197  overlappingWarning();
198  Teuchos::TimeMonitor::setStackedTimer(Teuchos::null);
199  }
200  } else
201 #endif
202  {
203  if (timer_ != Teuchos::null) {
204  timer_->stop();
205  }
206  timer_ = Teuchos::null;
207  }
208  }
209  // Else: We've nested our fences, and we need to ignore the inner fences
210 }
211 
212 } // namespace FenceTimerInjection
213 
214 void AddKokkosFenceToTimeMonitor(bool force) {
215  if (!FenceTimerInjection::initialized_) {
217  Kokkos::Tools::Experimental::set_begin_fence_callback(FenceTimerInjection::kokkosp_begin_fence);
218  Kokkos::Tools::Experimental::set_end_fence_callback(FenceTimerInjection::kokkosp_end_fence);
219  FenceTimerInjection::initialized_ = true;
220  }
221  }
222 }
223 
224 namespace FunctionsTimerInjection {
225 Teuchos::RCP<Teuchos::Time> timer_;
226 bool initialized_ = false;
227 std::string label_;
228 
229 void kokkosp_begin_kernel(const char* kernelName, const char* kernelPrefix, const uint32_t devID,
230  uint64_t* kernelID) {
231  // Nested fences are not allowed
232  if (timer_ != Teuchos::null)
233  return;
234  std::string device_label = deviceIdToString(devID);
235 
236  label_ = std::string("Kokkos::") + kernelName + " " + kernelPrefix + " " + device_label;
237 
238 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
239  if (!Teuchos::TimeMonitor::stackedTimerNameIsDefault()) {
240  const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
241  stackedTimer->start(label_);
242  } else
243 #endif
244  {
245  timer_ = Teuchos::TimeMonitor::getNewTimer(label_);
246  timer_->start();
247  timer_->incrementNumCalls();
248  }
249 }
250 
251 void kokkosp_begin_for(const char* kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
252  kokkosp_begin_kernel("parallel_for", kernelPrefix, devID, kernelID);
253 }
254 
255 void kokkosp_begin_scan(const char* kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
256  kokkosp_begin_kernel("parallel_scan", kernelPrefix, devID, kernelID);
257 }
258 
259 void kokkosp_begin_reduce(const char* kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
260  kokkosp_begin_kernel("parallel_reduce", kernelPrefix, devID, kernelID);
261 }
262 
263 void kokkosp_end_kernel(const uint64_t handle) {
264 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
265  if (!Teuchos::TimeMonitor::stackedTimerNameIsDefault()) {
266  try {
267  const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
268  stackedTimer->stop(label_);
269  return;
270  } catch (std::runtime_error&) {
271  overlappingWarning();
272  Teuchos::TimeMonitor::setStackedTimer(Teuchos::null);
273  }
274  } else
275 #endif
276  {
277  if (timer_ != Teuchos::null) {
278  timer_->stop();
279  }
280  timer_ = Teuchos::null;
281  }
282 }
283 } // namespace FunctionsTimerInjection
284 
285 void AddKokkosFunctionsToTimeMonitor(bool force) {
286  if (!FunctionsTimerInjection::initialized_) {
288  Kokkos::Tools::Experimental::set_begin_parallel_for_callback(FunctionsTimerInjection::kokkosp_begin_for);
289  Kokkos::Tools::Experimental::set_begin_parallel_reduce_callback(FunctionsTimerInjection::kokkosp_begin_reduce);
290  Kokkos::Tools::Experimental::set_begin_parallel_scan_callback(FunctionsTimerInjection::kokkosp_begin_scan);
291 
292  // The end-call is generic, even though the start-call is not.
293  Kokkos::Tools::Experimental::set_end_parallel_for_callback(FunctionsTimerInjection::kokkosp_end_kernel);
294  Kokkos::Tools::Experimental::set_end_parallel_reduce_callback(FunctionsTimerInjection::kokkosp_end_kernel);
295  Kokkos::Tools::Experimental::set_end_parallel_scan_callback(FunctionsTimerInjection::kokkosp_end_kernel);
296  FunctionsTimerInjection::initialized_ = true;
297  }
298  }
299 }
300 
301 } // namespace Details
302 } // namespace Tpetra
static bool timeKokkosFunctions()
Add Teuchos timers for all host calls to Kokkos::parallel_for(), Kokkos::parallel_reduce() and Kokkos...
Declaration functions that use Kokkos&#39; profiling library to add deep copies between memory spaces...
static bool timeKokkosDeepCopyVerbose2()
Adds verbose output to Kokkos deep_copy timers by appending source, destination, and size...
static bool timeKokkosDeepCopyVerbose1()
Adds verbose output to Kokkos deep_copy timers by appending source and destination. This is especially useful for identifying host/device data transfers.
static bool timeKokkosFence()
Add Teuchos timers for all host calls to Kokkos::fence().
static bool timeKokkosDeepCopy()
Add Teuchos timers for all host calls to Kokkos::deep_copy(). This is especially useful for identifyi...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.