42 #include "TpetraCore_config.h"
44 #include "Kokkos_Core.hpp"
45 #include "Teuchos_TimeMonitor.hpp"
46 #include "Teuchos_Time.hpp"
47 #include "Teuchos_RCP.hpp"
48 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
49 #include "Teuchos_StackedTimer.hpp"
61 std::string deviceIdToString(
const uint32_t deviceId) {
62 using namespace Kokkos::Tools::Experimental;
63 std::string device_label(
"(");
64 ExecutionSpaceIdentifier eid = identifier_from_devid(deviceId);
65 if (eid.type == DeviceType::Serial) device_label+=
"Serial";
66 else if (eid.type == DeviceType::OpenMP) device_label+=
"OpenMP";
67 else if (eid.type == DeviceType::Cuda) device_label+=
"Cuda";
68 else if (eid.type == DeviceType::HIP) device_label+=
"HIP";
69 else if (eid.type == DeviceType::OpenMPTarget) device_label+=
"OpenMPTarget";
70 else if (eid.type == DeviceType::HPX) device_label+=
"HPX";
71 else if (eid.type == DeviceType::Threads) device_label+=
"Threads";
72 else if (eid.type == DeviceType::SYCL) device_label+=
"SYCL";
73 else if (eid.type == DeviceType::OpenACC) device_label+=
"OpenACC";
74 else if (eid.type == DeviceType::Unknown) device_label+=
"Unknown";
75 else device_label+=
"Unknown to Tpetra";
77 if(eid.instance_id == Impl::int_for_synchronization_reason(SpecialSynchronizationCases::GlobalDeviceSynchronization))
78 device_label +=
" All Instances)";
79 else if(eid.instance_id == Impl::int_for_synchronization_reason(SpecialSynchronizationCases::DeepCopyResourceSynchronization))
80 device_label +=
" DeepCopyResource)";
82 device_label +=
" Instance " + std::to_string(eid.instance_id) +
")";
87 void overlappingWarning() {
88 std::ostringstream warning;
90 "\n*********************************************************************\n"
91 "WARNING: Overlapping timers detected!\n"
92 "A TimeMonitor timer was stopped before a nested subtimer was\n"
93 "stopped. This is not allowed by the StackedTimer. This corner case\n"
94 "typically occurs if the TimeMonitor is stored in an RCP and the RCP is\n"
95 "assigned to a new timer. To disable this warning, either fix the\n"
96 "ordering of timer creation and destuction or disable the StackedTimer\n";
97 std::cout << warning.str() << std::endl;
106 namespace DeepCopyTimerInjection {
107 Teuchos::RCP<Teuchos::Time> timer_;
108 bool initialized_ =
false;
110 void kokkosp_begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle,
const char* dst_name,
const void* dst_ptr,
111 Kokkos::Tools::SpaceHandle src_handle,
const char* src_name,
const void* src_ptr,
114 std::string extra_label;
116 extra_label = std::string(
" {") + src_name +
"=>" + dst_name +
"}";
118 extra_label = std::string(
" {") + src_name +
"=>" + dst_name +
"," + std::to_string(size)+
"}";
121 if(timer_ != Teuchos::null)
122 std::cout <<
"WARNING: Kokkos::deep_copy() started within another Kokkos::deep_copy(). Timers will be in error"<<std::endl;
125 if(!strcmp(src_name,
"Scalar") || !strcmp(src_name,
"(none)"))
126 timer_ = Teuchos::TimeMonitor::getNewTimer(std::string(
"Kokkos::deep_copy_scalar [")+src_handle.name+
"=>"+dst_handle.name+
"]" + extra_label);
129 timer_ = Teuchos::TimeMonitor::getNewTimer(std::string(
"Kokkos::deep_copy_small [")+src_handle.name+
"=>"+dst_handle.name+
"]" + extra_label);
131 timer_ = Teuchos::TimeMonitor::getNewTimer(std::string(
"Kokkos::deep_copy [")+src_handle.name+
"=>"+dst_handle.name+
"]" + extra_label);
133 timer_->incrementNumCalls();
134 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
135 const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
136 if (nonnull(stackedTimer))
137 stackedTimer->start(timer_->name());
141 void kokkosp_end_deep_copy() {
142 if (timer_ != Teuchos::null) {
144 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
146 const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
147 if (nonnull(stackedTimer))
148 stackedTimer->stop(timer_->name());
150 catch (std::runtime_error&) {
151 overlappingWarning();
152 Teuchos::TimeMonitor::setStackedTimer(Teuchos::null);
156 timer_ = Teuchos::null;
161 void AddKokkosDeepCopyToTimeMonitor(
bool force) {
162 if (!DeepCopyTimerInjection::initialized_) {
165 Kokkos::Tools::Experimental::set_begin_deep_copy_callback(DeepCopyTimerInjection::kokkosp_begin_deep_copy);
166 Kokkos::Tools::Experimental::set_end_deep_copy_callback(DeepCopyTimerInjection::kokkosp_end_deep_copy);
167 DeepCopyTimerInjection::initialized_=
true;
173 namespace FenceTimerInjection {
174 Teuchos::RCP<Teuchos::Time> timer_;
175 bool initialized_ =
false;
176 uint64_t active_handle;
178 void kokkosp_begin_fence(
const char* name,
const uint32_t deviceId,
182 if(timer_ != Teuchos::null)
184 active_handle = (active_handle+1) % 1024;
185 *handle = active_handle;
187 std::string device_label = deviceIdToString(deviceId);
189 timer_ = Teuchos::TimeMonitor::getNewTimer(std::string(
"Kokkos::fence ")+name +
" " + device_label);
191 timer_->incrementNumCalls();
192 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
193 const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
194 if (nonnull(stackedTimer))
195 stackedTimer->start(timer_->name());
201 void kokkosp_end_fence(
const uint64_t handle) {
202 if(handle == active_handle) {
203 if (timer_ != Teuchos::null) {
205 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
207 const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
208 if (nonnull(stackedTimer))
209 stackedTimer->stop(timer_->name());
211 catch (std::runtime_error&) {
212 overlappingWarning();
213 Teuchos::TimeMonitor::setStackedTimer(Teuchos::null);
217 timer_ = Teuchos::null;
225 void AddKokkosFenceToTimeMonitor(
bool force) {
226 if (!FenceTimerInjection::initialized_) {
228 Kokkos::Tools::Experimental::set_begin_fence_callback(FenceTimerInjection::kokkosp_begin_fence);
229 Kokkos::Tools::Experimental::set_end_fence_callback(FenceTimerInjection::kokkosp_end_fence);
230 FenceTimerInjection::initialized_=
true;
236 namespace FunctionsTimerInjection {
237 Teuchos::RCP<Teuchos::Time> timer_;
238 bool initialized_ =
false;
240 void kokkosp_begin_kernel(
const char* kernelName,
const char* kernelPrefix,
const uint32_t devID,
241 uint64_t* kernelID) {
243 if(timer_ != Teuchos::null)
245 std::string device_label = deviceIdToString(devID);
247 timer_ = Teuchos::TimeMonitor::getNewTimer(std::string(
"Kokkos::")+ kernelName +
" " +kernelPrefix +
" " + device_label);
249 timer_->incrementNumCalls();
250 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
251 const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
252 if (nonnull(stackedTimer))
253 stackedTimer->start(timer_->name());
258 void kokkosp_begin_for(
const char* kernelPrefix,
const uint32_t devID, uint64_t* kernelID) {
259 kokkosp_begin_kernel(
"parallel_for",kernelPrefix,devID,kernelID);
262 void kokkosp_begin_scan(
const char* kernelPrefix,
const uint32_t devID, uint64_t* kernelID) {
263 kokkosp_begin_kernel(
"parallel_scan",kernelPrefix,devID,kernelID);
266 void kokkosp_begin_reduce(
const char* kernelPrefix,
const uint32_t devID, uint64_t* kernelID) {
267 kokkosp_begin_kernel(
"parallel_reduce",kernelPrefix,devID,kernelID);
270 void kokkosp_end_kernel(
const uint64_t handle) {
271 if (timer_ != Teuchos::null) {
273 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
275 const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
276 if (nonnull(stackedTimer))
277 stackedTimer->stop(timer_->name());
279 catch (std::runtime_error&) {
280 overlappingWarning();
281 Teuchos::TimeMonitor::setStackedTimer(Teuchos::null);
286 timer_ = Teuchos::null;
290 void AddKokkosFunctionsToTimeMonitor(
bool force) {
291 if (!FunctionsTimerInjection::initialized_) {
293 Kokkos::Tools::Experimental::set_begin_parallel_for_callback(FunctionsTimerInjection::kokkosp_begin_for);
294 Kokkos::Tools::Experimental::set_begin_parallel_reduce_callback(FunctionsTimerInjection::kokkosp_begin_reduce);
295 Kokkos::Tools::Experimental::set_begin_parallel_scan_callback(FunctionsTimerInjection::kokkosp_begin_scan);
298 Kokkos::Tools::Experimental::set_end_parallel_for_callback(FunctionsTimerInjection::kokkosp_end_kernel);
299 Kokkos::Tools::Experimental::set_end_parallel_reduce_callback(FunctionsTimerInjection::kokkosp_end_kernel);
300 Kokkos::Tools::Experimental::set_end_parallel_scan_callback(FunctionsTimerInjection::kokkosp_end_kernel);
301 FunctionsTimerInjection::initialized_=
true;
static bool timeKokkosFunctions()
Add Teuchos timers for all host calls to Kokkos::parallel_for(), Kokkos::parallel_reduce() and Kokkos...
Declaration functions that use Kokkos' profiling library to add deep copies between memory spaces...
static bool timeKokkosDeepCopyVerbose2()
Adds verbose output to Kokkos deep_copy timers by appending source, destination, and size...
static bool timeKokkosDeepCopyVerbose1()
Adds verbose output to Kokkos deep_copy timers by appending source and destination. This is especially useful for identifying host/device data transfers.
static bool timeKokkosFence()
Add Teuchos timers for all host calls to Kokkos::fence().
static bool timeKokkosDeepCopy()
Add Teuchos timers for all host calls to Kokkos::deep_copy(). This is especially useful for identifyi...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.