11 #include "TpetraCore_config.h"
13 #include "Kokkos_Core.hpp"
14 #include "Teuchos_TimeMonitor.hpp"
15 #include "Teuchos_Time.hpp"
16 #include "Teuchos_RCP.hpp"
17 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
18 #include "Teuchos_StackedTimer.hpp"
30 std::string deviceIdToString(
const uint32_t deviceId) {
31 using namespace Kokkos::Tools::Experimental;
32 std::string device_label(
"(");
33 ExecutionSpaceIdentifier eid = identifier_from_devid(deviceId);
34 if (eid.type == DeviceType::Serial) device_label+=
"Serial";
35 else if (eid.type == DeviceType::OpenMP) device_label+=
"OpenMP";
36 else if (eid.type == DeviceType::Cuda) device_label+=
"Cuda";
37 else if (eid.type == DeviceType::HIP) device_label+=
"HIP";
38 else if (eid.type == DeviceType::OpenMPTarget) device_label+=
"OpenMPTarget";
39 else if (eid.type == DeviceType::HPX) device_label+=
"HPX";
40 else if (eid.type == DeviceType::Threads) device_label+=
"Threads";
41 else if (eid.type == DeviceType::SYCL) device_label+=
"SYCL";
42 else if (eid.type == DeviceType::OpenACC) device_label+=
"OpenACC";
43 else if (eid.type == DeviceType::Unknown) device_label+=
"Unknown";
44 else device_label+=
"Unknown to Tpetra";
45 #if KOKKOS_VERSION >= 40499
46 if(eid.instance_id == int_for_synchronization_reason(SpecialSynchronizationCases::GlobalDeviceSynchronization))
47 device_label +=
" All Instances)";
48 else if(eid.instance_id == int_for_synchronization_reason(SpecialSynchronizationCases::DeepCopyResourceSynchronization))
49 device_label +=
" DeepCopyResource)";
51 if(eid.instance_id == Impl::int_for_synchronization_reason(SpecialSynchronizationCases::GlobalDeviceSynchronization))
52 device_label +=
" All Instances)";
53 else if(eid.instance_id == Impl::int_for_synchronization_reason(SpecialSynchronizationCases::DeepCopyResourceSynchronization))
54 device_label +=
" DeepCopyResource)";
57 device_label +=
" Instance " + std::to_string(eid.instance_id) +
")";
62 void overlappingWarning() {
63 std::ostringstream warning;
65 "\n*********************************************************************\n"
66 "WARNING: Overlapping timers detected!\n"
67 "A TimeMonitor timer was stopped before a nested subtimer was\n"
68 "stopped. This is not allowed by the StackedTimer. This corner case\n"
69 "typically occurs if the TimeMonitor is stored in an RCP and the RCP is\n"
70 "assigned to a new timer. To disable this warning, either fix the\n"
71 "ordering of timer creation and destuction or disable the StackedTimer\n";
72 std::cout << warning.str() << std::endl;
81 namespace DeepCopyTimerInjection {
82 Teuchos::RCP<Teuchos::Time> timer_;
84 bool initialized_ =
false;
86 void kokkosp_begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle,
const char* dst_name,
const void* dst_ptr,
87 Kokkos::Tools::SpaceHandle src_handle,
const char* src_name,
const void* src_ptr,
90 std::string extra_label;
92 extra_label = std::string(
" {") + src_name +
"=>" + dst_name +
"}";
94 extra_label = std::string(
" {") + src_name +
"=>" + dst_name +
"," + std::to_string(size)+
"}";
97 if(timer_ != Teuchos::null)
98 std::cout <<
"WARNING: Kokkos::deep_copy() started within another Kokkos::deep_copy(). Timers will be in error"<<std::endl;
101 if(!strcmp(src_name,
"Scalar") || !strcmp(src_name,
"(none)"))
102 label_ = std::string(
"Kokkos::deep_copy_scalar [")+src_handle.name+
"=>"+dst_handle.name+
"]" + extra_label;
105 label_ = std::string(
"Kokkos::deep_copy_small [")+src_handle.name+
"=>"+dst_handle.name+
"]" + extra_label;
107 label_ = std::string(
"Kokkos::deep_copy [")+src_handle.name+
"=>"+dst_handle.name+
"]" + extra_label;
109 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
110 if (!Teuchos::TimeMonitor::stackedTimerNameIsDefault()) {
111 const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
112 stackedTimer->start(label_);
116 timer_ = Teuchos::TimeMonitor::getNewTimer(label_);
118 timer_->incrementNumCalls();
122 void kokkosp_end_deep_copy() {
123 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
124 if (!Teuchos::TimeMonitor::stackedTimerNameIsDefault()) {
126 const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
127 stackedTimer->stop(label_);
130 catch (std::runtime_error&) {
131 overlappingWarning();
132 Teuchos::TimeMonitor::setStackedTimer(Teuchos::null);
137 if (timer_ != Teuchos::null) {
140 timer_ = Teuchos::null;
146 void AddKokkosDeepCopyToTimeMonitor(
bool force) {
147 if (!DeepCopyTimerInjection::initialized_) {
150 Kokkos::Tools::Experimental::set_begin_deep_copy_callback(DeepCopyTimerInjection::kokkosp_begin_deep_copy);
151 Kokkos::Tools::Experimental::set_end_deep_copy_callback(DeepCopyTimerInjection::kokkosp_end_deep_copy);
152 DeepCopyTimerInjection::initialized_=
true;
158 namespace FenceTimerInjection {
159 Teuchos::RCP<Teuchos::Time> timer_;
160 bool initialized_ =
false;
161 uint64_t active_handle;
162 std::string label_ =
"";
164 void kokkosp_begin_fence(
const char* name,
const uint32_t deviceId,
171 active_handle = (active_handle+1) % 1024;
172 *handle = active_handle;
174 std::string device_label = deviceIdToString(deviceId);
176 label_ = std::string(
"Kokkos::fence ")+name +
" " + device_label;
178 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
179 if (!Teuchos::TimeMonitor::stackedTimerNameIsDefault()) {
180 const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
181 stackedTimer->start(label_);
185 timer_ = Teuchos::TimeMonitor::getNewTimer(label_);
187 timer_->incrementNumCalls();
192 void kokkosp_end_fence(
const uint64_t handle) {
193 if(handle == active_handle) {
194 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
195 if (!Teuchos::TimeMonitor::stackedTimerNameIsDefault()) {
197 const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
198 stackedTimer->stop(label_);
202 catch (std::runtime_error&) {
203 overlappingWarning();
204 Teuchos::TimeMonitor::setStackedTimer(Teuchos::null);
209 if (timer_ != Teuchos::null) {
212 timer_ = Teuchos::null;
221 void AddKokkosFenceToTimeMonitor(
bool force) {
222 if (!FenceTimerInjection::initialized_) {
224 Kokkos::Tools::Experimental::set_begin_fence_callback(FenceTimerInjection::kokkosp_begin_fence);
225 Kokkos::Tools::Experimental::set_end_fence_callback(FenceTimerInjection::kokkosp_end_fence);
226 FenceTimerInjection::initialized_=
true;
232 namespace FunctionsTimerInjection {
233 Teuchos::RCP<Teuchos::Time> timer_;
234 bool initialized_ =
false;
237 void kokkosp_begin_kernel(
const char* kernelName,
const char* kernelPrefix,
const uint32_t devID,
238 uint64_t* kernelID) {
240 if(timer_ != Teuchos::null)
242 std::string device_label = deviceIdToString(devID);
244 label_ = std::string(
"Kokkos::")+ kernelName +
" " +kernelPrefix +
" " + device_label;
246 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
247 if (!Teuchos::TimeMonitor::stackedTimerNameIsDefault()) {
248 const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
249 stackedTimer->start(label_);
253 timer_ = Teuchos::TimeMonitor::getNewTimer(label_);
255 timer_->incrementNumCalls();
259 void kokkosp_begin_for(
const char* kernelPrefix,
const uint32_t devID, uint64_t* kernelID) {
260 kokkosp_begin_kernel(
"parallel_for",kernelPrefix,devID,kernelID);
263 void kokkosp_begin_scan(
const char* kernelPrefix,
const uint32_t devID, uint64_t* kernelID) {
264 kokkosp_begin_kernel(
"parallel_scan",kernelPrefix,devID,kernelID);
267 void kokkosp_begin_reduce(
const char* kernelPrefix,
const uint32_t devID, uint64_t* kernelID) {
268 kokkosp_begin_kernel(
"parallel_reduce",kernelPrefix,devID,kernelID);
271 void kokkosp_end_kernel(
const uint64_t handle) {
272 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
273 if (!Teuchos::TimeMonitor::stackedTimerNameIsDefault()) {
275 const auto stackedTimer = Teuchos::TimeMonitor::getStackedTimer();
276 stackedTimer->stop(label_);
279 catch (std::runtime_error&) {
280 overlappingWarning();
281 Teuchos::TimeMonitor::setStackedTimer(Teuchos::null);
286 if (timer_ != Teuchos::null) {
289 timer_ = Teuchos::null;
294 void AddKokkosFunctionsToTimeMonitor(
bool force) {
295 if (!FunctionsTimerInjection::initialized_) {
297 Kokkos::Tools::Experimental::set_begin_parallel_for_callback(FunctionsTimerInjection::kokkosp_begin_for);
298 Kokkos::Tools::Experimental::set_begin_parallel_reduce_callback(FunctionsTimerInjection::kokkosp_begin_reduce);
299 Kokkos::Tools::Experimental::set_begin_parallel_scan_callback(FunctionsTimerInjection::kokkosp_begin_scan);
302 Kokkos::Tools::Experimental::set_end_parallel_for_callback(FunctionsTimerInjection::kokkosp_end_kernel);
303 Kokkos::Tools::Experimental::set_end_parallel_reduce_callback(FunctionsTimerInjection::kokkosp_end_kernel);
304 Kokkos::Tools::Experimental::set_end_parallel_scan_callback(FunctionsTimerInjection::kokkosp_end_kernel);
305 FunctionsTimerInjection::initialized_=
true;
static bool timeKokkosFunctions()
Add Teuchos timers for all host calls to Kokkos::parallel_for(), Kokkos::parallel_reduce() and Kokkos...
Declaration functions that use Kokkos' profiling library to add deep copies between memory spaces...
static bool timeKokkosDeepCopyVerbose2()
Adds verbose output to Kokkos deep_copy timers by appending source, destination, and size...
static bool timeKokkosDeepCopyVerbose1()
Adds verbose output to Kokkos deep_copy timers by appending source and destination. This is especially useful for identifying host/device data transfers.
static bool timeKokkosFence()
Add Teuchos timers for all host calls to Kokkos::fence().
static bool timeKokkosDeepCopy()
Add Teuchos timers for all host calls to Kokkos::deep_copy(). This is especially useful for identifyi...
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.