11 #include "TpetraCore_config.h"
13 #include "Kokkos_Core.hpp"
14 #include "Teuchos_TimeMonitor.hpp"
15 #include "Teuchos_Time.hpp"
16 #include "Teuchos_RCP.hpp"
17 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
18 #include "Teuchos_StackedTimer.hpp"
30 std::string deviceIdToString(
const uint32_t deviceId) {
31 using namespace Kokkos::Tools::Experimental;
32 std::string device_label(
"(");
33 ExecutionSpaceIdentifier eid = identifier_from_devid(deviceId);
34 if (eid.type == DeviceType::Serial) device_label+=
"Serial";
35 else if (eid.type == DeviceType::OpenMP) device_label+=
"OpenMP";
36 else if (eid.type == DeviceType::Cuda) device_label+=
"Cuda";
37 else if (eid.type == DeviceType::HIP) device_label+=
"HIP";
38 else if (eid.type == DeviceType::OpenMPTarget) device_label+=
"OpenMPTarget";
39 else if (eid.type == DeviceType::HPX) device_label+=
"HPX";
40 else if (eid.type == DeviceType::Threads) device_label+=
"Threads";
41 else if (eid.type == DeviceType::SYCL) device_label+=
"SYCL";
42 else if (eid.type == DeviceType::OpenACC) device_label+=
"OpenACC";
43 else if (eid.type == DeviceType::Unknown) device_label+=
"Unknown";
44 else device_label+=
"Unknown to Tpetra";
45 #if KOKKOS_VERSION >= 40499
46 if(eid.instance_id == int_for_synchronization_reason(SpecialSynchronizationCases::GlobalDeviceSynchronization))
47 device_label +=
" All Instances)";
48 else if(eid.instance_id == int_for_synchronization_reason(SpecialSynchronizationCases::DeepCopyResourceSynchronization))
49 device_label +=
" DeepCopyResource)";
51 if(eid.instance_id == Impl::int_for_synchronization_reason(SpecialSynchronizationCases::GlobalDeviceSynchronization))
52 device_label +=
" All Instances)";
53 else if(eid.instance_id == Impl::int_for_synchronization_reason(SpecialSynchronizationCases::DeepCopyResourceSynchronization))
54 device_label +=
" DeepCopyResource)";
57 device_label +=
" Instance " + std::to_string(eid.instance_id) +
")";
62 void overlappingWarning() {
63 std::ostringstream warning;
65 "\n*********************************************************************\n"
66 "WARNING: Overlapping timers detected!\n"
67 "A TimeMonitor timer was stopped before a nested subtimer was\n"
68 "stopped. This is not allowed by the StackedTimer. This corner case\n"
69 "typically occurs if the TimeMonitor is stored in an RCP and the RCP is\n"
70 "assigned to a new timer. To disable this warning, either fix the\n"
71 "ordering of timer creation and destuction or disable the StackedTimer\n";
72 std::cout << warning.str() << std::endl;
81 namespace DeepCopyTimerInjection {
82 Teuchos::RCP<Teuchos::Time> timer_;
83 bool initialized_ =
false;
85 void kokkosp_begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle,
const char* dst_name,
const void* dst_ptr,
86 Kokkos::Tools::SpaceHandle src_handle,
const char* src_name,
const void* src_ptr,
89 std::string extra_label;
91 extra_label = std::string(
" {") + src_name +
"=>" + dst_name +
"}";
93 extra_label = std::string(
" {") + src_name +
"=>" + dst_name +
"," + std::to_string(size)+
"}";
96 if(timer_ != Teuchos::null)
97 std::cout <<
"WARNING: Kokkos::deep_copy() started within another Kokkos::deep_copy(). Timers will be in error"<<std::endl;
100 if(!strcmp(src_name,
"Scalar") || !strcmp(src_name,
"(none)"))
101 timer_ = Teuchos::TimeMonitor::getNewTimer(std::string(
"Kokkos::deep_copy_scalar [")+src_handle.name+
"=>"+dst_handle.name+
"]" + extra_label);
104 timer_ = Teuchos::TimeMonitor::getNewTimer(std::string(
"Kokkos::deep_copy_small [")+src_handle.name+
"=>"+dst_handle.name+
"]" + extra_label);
106 timer_ = Teuchos::TimeMonitor::getNewTimer(std::string(
"Kokkos::deep_copy [")+src_handle.name+
"=>"+dst_handle.name+
"]" + extra_label);
108 timer_->incrementNumCalls();
109 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
110 const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
111 if (nonnull(stackedTimer))
112 stackedTimer->start(timer_->name());
116 void kokkosp_end_deep_copy() {
117 if (timer_ != Teuchos::null) {
119 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
121 const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
122 if (nonnull(stackedTimer))
123 stackedTimer->stop(timer_->name());
125 catch (std::runtime_error&) {
126 overlappingWarning();
127 Teuchos::TimeMonitor::setStackedTimer(Teuchos::null);
131 timer_ = Teuchos::null;
136 void AddKokkosDeepCopyToTimeMonitor(
bool force) {
137 if (!DeepCopyTimerInjection::initialized_) {
140 Kokkos::Tools::Experimental::set_begin_deep_copy_callback(DeepCopyTimerInjection::kokkosp_begin_deep_copy);
141 Kokkos::Tools::Experimental::set_end_deep_copy_callback(DeepCopyTimerInjection::kokkosp_end_deep_copy);
142 DeepCopyTimerInjection::initialized_=
true;
148 namespace FenceTimerInjection {
149 Teuchos::RCP<Teuchos::Time> timer_;
150 bool initialized_ =
false;
151 uint64_t active_handle;
153 void kokkosp_begin_fence(
const char* name,
const uint32_t deviceId,
157 if(timer_ != Teuchos::null)
159 active_handle = (active_handle+1) % 1024;
160 *handle = active_handle;
162 std::string device_label = deviceIdToString(deviceId);
164 timer_ = Teuchos::TimeMonitor::getNewTimer(std::string(
"Kokkos::fence ")+name +
" " + device_label);
166 timer_->incrementNumCalls();
167 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
168 const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
169 if (nonnull(stackedTimer))
170 stackedTimer->start(timer_->name());
176 void kokkosp_end_fence(
const uint64_t handle) {
177 if(handle == active_handle) {
178 if (timer_ != Teuchos::null) {
180 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
182 const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
183 if (nonnull(stackedTimer))
184 stackedTimer->stop(timer_->name());
186 catch (std::runtime_error&) {
187 overlappingWarning();
188 Teuchos::TimeMonitor::setStackedTimer(Teuchos::null);
192 timer_ = Teuchos::null;
200 void AddKokkosFenceToTimeMonitor(
bool force) {
201 if (!FenceTimerInjection::initialized_) {
203 Kokkos::Tools::Experimental::set_begin_fence_callback(FenceTimerInjection::kokkosp_begin_fence);
204 Kokkos::Tools::Experimental::set_end_fence_callback(FenceTimerInjection::kokkosp_end_fence);
205 FenceTimerInjection::initialized_=
true;
211 namespace FunctionsTimerInjection {
212 Teuchos::RCP<Teuchos::Time> timer_;
213 bool initialized_ =
false;
215 void kokkosp_begin_kernel(
const char* kernelName,
const char* kernelPrefix,
const uint32_t devID,
216 uint64_t* kernelID) {
218 if(timer_ != Teuchos::null)
220 std::string device_label = deviceIdToString(devID);
222 timer_ = Teuchos::TimeMonitor::getNewTimer(std::string(
"Kokkos::")+ kernelName +
" " +kernelPrefix +
" " + device_label);
224 timer_->incrementNumCalls();
225 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
226 const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
227 if (nonnull(stackedTimer))
228 stackedTimer->start(timer_->name());
233 void kokkosp_begin_for(
const char* kernelPrefix,
const uint32_t devID, uint64_t* kernelID) {
234 kokkosp_begin_kernel(
"parallel_for",kernelPrefix,devID,kernelID);
237 void kokkosp_begin_scan(
const char* kernelPrefix,
const uint32_t devID, uint64_t* kernelID) {
238 kokkosp_begin_kernel(
"parallel_scan",kernelPrefix,devID,kernelID);
241 void kokkosp_begin_reduce(
const char* kernelPrefix,
const uint32_t devID, uint64_t* kernelID) {
242 kokkosp_begin_kernel(
"parallel_reduce",kernelPrefix,devID,kernelID);
245 void kokkosp_end_kernel(
const uint64_t handle) {
246 if (timer_ != Teuchos::null) {
248 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
250 const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
251 if (nonnull(stackedTimer))
252 stackedTimer->stop(timer_->name());
254 catch (std::runtime_error&) {
255 overlappingWarning();
256 Teuchos::TimeMonitor::setStackedTimer(Teuchos::null);
261 timer_ = Teuchos::null;
265 void AddKokkosFunctionsToTimeMonitor(
bool force) {
266 if (!FunctionsTimerInjection::initialized_) {
268 Kokkos::Tools::Experimental::set_begin_parallel_for_callback(FunctionsTimerInjection::kokkosp_begin_for);
269 Kokkos::Tools::Experimental::set_begin_parallel_reduce_callback(FunctionsTimerInjection::kokkosp_begin_reduce);
270 Kokkos::Tools::Experimental::set_begin_parallel_scan_callback(FunctionsTimerInjection::kokkosp_begin_scan);
273 Kokkos::Tools::Experimental::set_end_parallel_for_callback(FunctionsTimerInjection::kokkosp_end_kernel);
274 Kokkos::Tools::Experimental::set_end_parallel_reduce_callback(FunctionsTimerInjection::kokkosp_end_kernel);
275 Kokkos::Tools::Experimental::set_end_parallel_scan_callback(FunctionsTimerInjection::kokkosp_end_kernel);
276 FunctionsTimerInjection::initialized_=
true;
static bool timeKokkosFunctions()
Add Teuchos timers for all host calls to Kokkos::parallel_for(), Kokkos::parallel_reduce() and Kokkos...
Declaration functions that use Kokkos' profiling library to add deep copies between memory spaces...
static bool timeKokkosDeepCopyVerbose2()
Adds verbose output to Kokkos deep_copy timers by appending source, destination, and size...
static bool timeKokkosDeepCopyVerbose1()
Adds verbose output to Kokkos deep_copy timers by appending source and destination. This is especially useful for identifying host/device data transfers.
static bool timeKokkosFence()
Add Teuchos timers for all host calls to Kokkos::fence().
static bool timeKokkosDeepCopy()
Add Teuchos timers for all host calls to Kokkos::deep_copy(). This is especially useful for identifyi...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.