Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_KokkosTeuchosTimerInjection.cpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
11 #include "TpetraCore_config.h"
13 #include "Kokkos_Core.hpp"
14 #include "Teuchos_TimeMonitor.hpp"
15 #include "Teuchos_Time.hpp"
16 #include "Teuchos_RCP.hpp"
17 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
18 #include "Teuchos_StackedTimer.hpp"
19 #include <sstream>
20 #endif
21 #include <string>
22 
24 
25 
26 
27 namespace {
28  // Get a useful label from the deviceId
29  // NOTE: Relevant code is in: kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp
30  std::string deviceIdToString(const uint32_t deviceId) {
31  using namespace Kokkos::Tools::Experimental;
32  std::string device_label("(");
33  ExecutionSpaceIdentifier eid = identifier_from_devid(deviceId);
34  if (eid.type == DeviceType::Serial) device_label+="Serial";
35  else if (eid.type == DeviceType::OpenMP) device_label+="OpenMP";
36  else if (eid.type == DeviceType::Cuda) device_label+="Cuda";
37  else if (eid.type == DeviceType::HIP) device_label+="HIP";
38  else if (eid.type == DeviceType::OpenMPTarget) device_label+="OpenMPTarget";
39  else if (eid.type == DeviceType::HPX) device_label+="HPX";
40  else if (eid.type == DeviceType::Threads) device_label+="Threads";
41  else if (eid.type == DeviceType::SYCL) device_label+="SYCL";
42  else if (eid.type == DeviceType::OpenACC) device_label+="OpenACC";
43  else if (eid.type == DeviceType::Unknown) device_label+="Unknown";
44  else device_label+="Unknown to Tpetra";
45 #if KOKKOS_VERSION >= 40499
46  if(eid.instance_id == int_for_synchronization_reason(SpecialSynchronizationCases::GlobalDeviceSynchronization))
47  device_label += " All Instances)";
48  else if(eid.instance_id == int_for_synchronization_reason(SpecialSynchronizationCases::DeepCopyResourceSynchronization))
49  device_label += " DeepCopyResource)";
50 #else
51  if(eid.instance_id == Impl::int_for_synchronization_reason(SpecialSynchronizationCases::GlobalDeviceSynchronization))
52  device_label += " All Instances)";
53  else if(eid.instance_id == Impl::int_for_synchronization_reason(SpecialSynchronizationCases::DeepCopyResourceSynchronization))
54  device_label += " DeepCopyResource)";
55 #endif
56  else
57  device_label += " Instance " + std::to_string(eid.instance_id) + ")";
58 
59  return device_label;
60  }
61 
62  void overlappingWarning() {
63  std::ostringstream warning;
64  warning <<
65  "\n*********************************************************************\n"
66  "WARNING: Overlapping timers detected!\n"
67  "A TimeMonitor timer was stopped before a nested subtimer was\n"
68  "stopped. This is not allowed by the StackedTimer. This corner case\n"
69  "typically occurs if the TimeMonitor is stored in an RCP and the RCP is\n"
70  "assigned to a new timer. To disable this warning, either fix the\n"
71  "ordering of timer creation and destuction or disable the StackedTimer\n";
72  std::cout << warning.str() << std::endl;
73  }
74 
75 }// anonymous space
76 
77 
78 namespace Tpetra {
79 namespace Details {
80 
81  namespace DeepCopyTimerInjection {
82  Teuchos::RCP<Teuchos::Time> timer_;
83  bool initialized_ = false;
84 
85  void kokkosp_begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle, const char* dst_name, const void* dst_ptr,
86  Kokkos::Tools::SpaceHandle src_handle, const char* src_name, const void* src_ptr,
87  uint64_t size) {
88  // In verbose mode, we add the src/dst names as well
89  std::string extra_label;
91  extra_label = std::string(" {") + src_name + "=>" + dst_name + "}";
93  extra_label = std::string(" {") + src_name + "=>" + dst_name + "," + std::to_string(size)+"}";
94  }
95 
96  if(timer_ != Teuchos::null)
97  std::cout << "WARNING: Kokkos::deep_copy() started within another Kokkos::deep_copy(). Timers will be in error"<<std::endl;
98 
99  // If the src_name is "Scalar" or "(none)" then we're doing a "Fill" style copy from host to devices, which we want to record separately.
100  if(!strcmp(src_name,"Scalar") || !strcmp(src_name,"(none)"))
101  timer_ = Teuchos::TimeMonitor::getNewTimer(std::string("Kokkos::deep_copy_scalar [")+src_handle.name+"=>"+dst_handle.name+"]" + extra_label);
102  // If the size is under 65 bytes, we're going to flag this as "small" to make it easier to watch the big stuff
103  else if(size <= 64)
104  timer_ = Teuchos::TimeMonitor::getNewTimer(std::string("Kokkos::deep_copy_small [")+src_handle.name+"=>"+dst_handle.name+"]" + extra_label);
105  else
106  timer_ = Teuchos::TimeMonitor::getNewTimer(std::string("Kokkos::deep_copy [")+src_handle.name+"=>"+dst_handle.name+"]" + extra_label);
107  timer_->start();
108  timer_->incrementNumCalls();
109 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
110  const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
111  if (nonnull(stackedTimer))
112  stackedTimer->start(timer_->name());
113 #endif
114  }
115 
116  void kokkosp_end_deep_copy() {
117  if (timer_ != Teuchos::null) {
118  timer_->stop();
119 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
120  try {
121  const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
122  if (nonnull(stackedTimer))
123  stackedTimer->stop(timer_->name());
124  }
125  catch (std::runtime_error&) {
126  overlappingWarning();
127  Teuchos::TimeMonitor::setStackedTimer(Teuchos::null);
128  }
129 #endif
130  }
131  timer_ = Teuchos::null;
132  }
133 
134  }// end DeepCopyTimerInjection
135 
136  void AddKokkosDeepCopyToTimeMonitor(bool force) {
137  if (!DeepCopyTimerInjection::initialized_) {
140  Kokkos::Tools::Experimental::set_begin_deep_copy_callback(DeepCopyTimerInjection::kokkosp_begin_deep_copy);
141  Kokkos::Tools::Experimental::set_end_deep_copy_callback(DeepCopyTimerInjection::kokkosp_end_deep_copy);
142  DeepCopyTimerInjection::initialized_=true;
143  }
144  }
145  }
146 
147 
148  namespace FenceTimerInjection {
149  Teuchos::RCP<Teuchos::Time> timer_;
150  bool initialized_ = false;
151  uint64_t active_handle;
152 
153  void kokkosp_begin_fence(const char* name, const uint32_t deviceId,
154  uint64_t* handle) {
155 
156  // Nested fences are not allowed
157  if(timer_ != Teuchos::null)
158  return;
159  active_handle = (active_handle+1) % 1024;
160  *handle = active_handle;
161 
162  std::string device_label = deviceIdToString(deviceId);
163 
164  timer_ = Teuchos::TimeMonitor::getNewTimer(std::string("Kokkos::fence ")+name + " " + device_label);
165  timer_->start();
166  timer_->incrementNumCalls();
167 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
168  const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
169  if (nonnull(stackedTimer))
170  stackedTimer->start(timer_->name());
171 #endif
172 
173  }
174 
175 
176  void kokkosp_end_fence(const uint64_t handle) {
177  if(handle == active_handle) {
178  if (timer_ != Teuchos::null) {
179  timer_->stop();
180 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
181  try {
182  const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
183  if (nonnull(stackedTimer))
184  stackedTimer->stop(timer_->name());
185  }
186  catch (std::runtime_error&) {
187  overlappingWarning();
188  Teuchos::TimeMonitor::setStackedTimer(Teuchos::null);
189  }
190 #endif
191  }
192  timer_ = Teuchos::null;
193  }
194  // Else: We've nested our fences, and we need to ignore the inner fences
195  }
196 
197 
198  }//end FenceTimerInjection
199 
200  void AddKokkosFenceToTimeMonitor(bool force) {
201  if (!FenceTimerInjection::initialized_) {
203  Kokkos::Tools::Experimental::set_begin_fence_callback(FenceTimerInjection::kokkosp_begin_fence);
204  Kokkos::Tools::Experimental::set_end_fence_callback(FenceTimerInjection::kokkosp_end_fence);
205  FenceTimerInjection::initialized_=true;
206  }
207  }
208  }
209 
210 
211  namespace FunctionsTimerInjection {
212  Teuchos::RCP<Teuchos::Time> timer_;
213  bool initialized_ = false;
214 
215  void kokkosp_begin_kernel(const char* kernelName, const char* kernelPrefix, const uint32_t devID,
216  uint64_t* kernelID) {
217  // Nested fences are not allowed
218  if(timer_ != Teuchos::null)
219  return;
220  std::string device_label = deviceIdToString(devID);
221 
222  timer_ = Teuchos::TimeMonitor::getNewTimer(std::string("Kokkos::")+ kernelName + " " +kernelPrefix + " " + device_label);
223  timer_->start();
224  timer_->incrementNumCalls();
225 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
226  const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
227  if (nonnull(stackedTimer))
228  stackedTimer->start(timer_->name());
229 #endif
230 
231  }
232 
233  void kokkosp_begin_for(const char* kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
234  kokkosp_begin_kernel("parallel_for",kernelPrefix,devID,kernelID);
235  }
236 
237  void kokkosp_begin_scan(const char* kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
238  kokkosp_begin_kernel("parallel_scan",kernelPrefix,devID,kernelID);
239  }
240 
241  void kokkosp_begin_reduce(const char* kernelPrefix, const uint32_t devID, uint64_t* kernelID) {
242  kokkosp_begin_kernel("parallel_reduce",kernelPrefix,devID,kernelID);
243  }
244 
245  void kokkosp_end_kernel(const uint64_t handle) {
246  if (timer_ != Teuchos::null) {
247  timer_->stop();
248 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
249  try {
250  const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
251  if (nonnull(stackedTimer))
252  stackedTimer->stop(timer_->name());
253  }
254  catch (std::runtime_error&) {
255  overlappingWarning();
256  Teuchos::TimeMonitor::setStackedTimer(Teuchos::null);
257  }
258 #endif
259  }
260 
261  timer_ = Teuchos::null;
262  }
263  }//end FunctionsInjection
264 
265  void AddKokkosFunctionsToTimeMonitor(bool force) {
266  if (!FunctionsTimerInjection::initialized_) {
268  Kokkos::Tools::Experimental::set_begin_parallel_for_callback(FunctionsTimerInjection::kokkosp_begin_for);
269  Kokkos::Tools::Experimental::set_begin_parallel_reduce_callback(FunctionsTimerInjection::kokkosp_begin_reduce);
270  Kokkos::Tools::Experimental::set_begin_parallel_scan_callback(FunctionsTimerInjection::kokkosp_begin_scan);
271 
272  // The end-call is generic, even though the start-call is not.
273  Kokkos::Tools::Experimental::set_end_parallel_for_callback(FunctionsTimerInjection::kokkosp_end_kernel);
274  Kokkos::Tools::Experimental::set_end_parallel_reduce_callback(FunctionsTimerInjection::kokkosp_end_kernel);
275  Kokkos::Tools::Experimental::set_end_parallel_scan_callback(FunctionsTimerInjection::kokkosp_end_kernel);
276  FunctionsTimerInjection::initialized_=true;
277  }
278  }
279  }
280 
281 
282 
283 } // namespace Details
284 } // namespace Tpetra
285 
static bool timeKokkosFunctions()
Add Teuchos timers for all host calls to Kokkos::parallel_for(), Kokkos::parallel_reduce() and Kokkos...
Declaration functions that use Kokkos&#39; profiling library to add deep copies between memory spaces...
static bool timeKokkosDeepCopyVerbose2()
Adds verbose output to Kokkos deep_copy timers by appending source, destination, and size...
static bool timeKokkosDeepCopyVerbose1()
Adds verbose output to Kokkos deep_copy timers by appending source and destination. This is especially useful for identifying host/device data transfers.
static bool timeKokkosFence()
Add Teuchos timers for all host calls to Kokkos::fence().
static bool timeKokkosDeepCopy()
Add Teuchos timers for all host calls to Kokkos::deep_copy(). This is especially useful for identifyi...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.