Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_KokkosCounter.cpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 // clang-format off
12 #include "TpetraCore_config.h"
13 #include "Kokkos_Core.hpp"
14 #include "Teuchos_TestForException.hpp"
15 #include <cstring>
16 #include <string>
17 
18 namespace Tpetra {
19 namespace Details {
20 
21 
22  /***************************** Deep Copy *****************************/
23  namespace DeepCopyCounterDetails {
24  // Static variables
25  bool is_initialized=true;
26  size_t count_same=0;
27  size_t count_different=0;
28  bool count_active=false;
29 
30  void kokkosp_begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle, const char* dst_name, const void* dst_ptr,
31  Kokkos::Tools::SpaceHandle src_handle, const char* src_name, const void* src_ptr,
32  uint64_t size) {
33 
34  if(count_active) {
35  if(strcmp(dst_handle.name,src_handle.name))
36  count_different++;
37  else
38  count_same++;
39  }
40  }
41 
42  }// end DeepCopyCounterDetails
43 
44 
46  DeepCopyCounterDetails::count_active=true;
47  Kokkos::Tools::Experimental::set_begin_deep_copy_callback(DeepCopyCounterDetails::kokkosp_begin_deep_copy);
48  }
49 
51  DeepCopyCounterDetails::count_same=0;
52  DeepCopyCounterDetails::count_different=0;
53  }
54 
56  DeepCopyCounterDetails::count_active=false;
57  }
58 
60  return DeepCopyCounterDetails::count_same;
61  }
62 
64  return DeepCopyCounterDetails::count_different;
65  }
66 
67 
68 
69  /***************************** Fence *****************************/
70 
71 
72  namespace FenceCounterDetails {
73 
74  // Static variables
75  bool is_initialized=false;
76  bool count_active=false;
77  std::vector<size_t> count_instance;
78  std::vector<size_t> count_global;
79  int num_devices=0;
80 
81 
82  void kokkosp_begin_fence(const char* name, const uint32_t deviceId,
83  uint64_t* handle) {
84 
85  if(count_active) {
86  using namespace Kokkos::Tools::Experimental;
87  ExecutionSpaceIdentifier eid = identifier_from_devid(deviceId);
88 
89  // Figure out what count bin to stick this in
90  int idx = (int) eid.type;
91 #if KOKKOS_VERSION >= 40499
92  if(eid.instance_id == int_for_synchronization_reason(SpecialSynchronizationCases::GlobalDeviceSynchronization))
93 #else
94  if(eid.instance_id == Impl::int_for_synchronization_reason(SpecialSynchronizationCases::GlobalDeviceSynchronization))
95 #endif
96  count_global[idx]++;
97  else
98  count_instance[idx]++;
99  }
100  }
101 
102 
103  std::string get_label(int i) {
104  using namespace Kokkos::Tools::Experimental;
105  DeviceType i_type = devicetype_from_uint32t(i);
106  std::string device_label;
107  if (i_type == DeviceType::Serial) device_label="Serial";
108  else if (i_type == DeviceType::OpenMP) device_label="OpenMP";
109  else if (i_type == DeviceType::Cuda) device_label="Cuda";
110  else if (i_type == DeviceType::HIP) device_label="HIP";
111  else if (i_type == DeviceType::OpenMPTarget) device_label="OpenMPTarget";
112  else if (i_type == DeviceType::HPX) device_label="HPX";
113  else if (i_type == DeviceType::Threads) device_label="Threats";
114  else if (i_type == DeviceType::SYCL) device_label="SYCL";
115  else if (i_type == DeviceType::OpenACC) device_label="OpenACC";
116  else if (i_type == DeviceType::Unknown) device_label="Unknown";
117 
118  return device_label;
119  }
120 
121  void initialize() {
122  using namespace Kokkos::Tools::Experimental;
123  num_devices = (int) DeviceType::Unknown;
124  count_instance.resize(num_devices);
125  count_instance.assign(num_devices,0);
126  count_global.resize(num_devices);
127  count_global.assign(num_devices,0);
128  is_initialized=true;
129  }
130 
131  }// end FenceCounterDetails
132 
133 
134 
135 
137  if(!FenceCounterDetails::is_initialized)
138  FenceCounterDetails::initialize();
139  FenceCounterDetails::count_active=true;
140  Kokkos::Tools::Experimental::set_begin_fence_callback(FenceCounterDetails::kokkosp_begin_fence);
141  }
142 
144  FenceCounterDetails::count_instance.assign(FenceCounterDetails::num_devices,0);
145  FenceCounterDetails::count_global.assign(FenceCounterDetails::num_devices,0);
146  }
147 
149  FenceCounterDetails::count_active=false;
150  }
151 
152  size_t FenceCounter::get_count_global(const std::string & device) {
153  using namespace Kokkos::Tools::Experimental;
154  for(int i=0;i<FenceCounterDetails::num_devices; i++) {
155  std::string device_label = FenceCounterDetails::get_label(i);
156 
157  if(device == device_label)
158  return FenceCounterDetails::count_global[i];
159  }
160 
161  // Haven't found a device by this name
162  TEUCHOS_TEST_FOR_EXCEPTION(1,std::runtime_error,std::string("Error: ") + device + std::string(" is not a device known to Tpetra"));
163  }
164 
165 
166  size_t FenceCounter::get_count_instance(const std::string & device) {
167  using namespace Kokkos::Tools::Experimental;
168  for(int i=0;i<FenceCounterDetails::num_devices; i++) {
169  std::string device_label = FenceCounterDetails::get_label(i);
170 
171  if(device == device_label)
172  return FenceCounterDetails::count_instance[i];
173  }
174 
175  // Haven't found a device by this name
176  TEUCHOS_TEST_FOR_EXCEPTION(1,std::runtime_error,std::string("Error: ") + device + std::string(" is not a device known to Tpetra"));
177  }
178 
179 // clang-format on
180 namespace KokkosRegionCounterDetails {
181 std::vector<std::string> regions;
182 
183 void push_region_callback(const char *label) { regions.push_back(label); }
184 static_assert(std::is_same_v<decltype(&push_region_callback),
185  Kokkos_Profiling_pushFunction>,
186  "Unexpected Kokkos profiling interface API. This is an internal "
187  "Tpetra developer error, please report this.");
188 
189 } // namespace KokkosRegionCounterDetails
190 
192  Kokkos::Tools::Experimental::set_push_region_callback(
193  KokkosRegionCounterDetails::push_region_callback);
194 }
195 
197  KokkosRegionCounterDetails::regions.clear();
198 }
199 
201  Kokkos::Tools::Experimental::set_push_region_callback(nullptr);
202 }
203 
204 size_t
206  size_t count = 0;
207  for (const auto &region : KokkosRegionCounterDetails::regions) {
208  count += (region.find(needle) != std::string::npos);
209  }
210  return count;
211 }
212 
213 void KokkosRegionCounter::dump_regions(Teuchos::FancyOStream &os) {
214  for (const auto &region : KokkosRegionCounterDetails::regions) {
215  os << region << "\n";
216  }
217 }
218 
219 void KokkosRegionCounter::dump_regions(std::ostream &os) {
220  for (const auto &region : KokkosRegionCounterDetails::regions) {
221  os << region << "\n";
222  }
223 }
224 
225 
226 // clang-format off
227 
228 
229 } // namespace Details
230 } // namespace Tpetra
231 
void initialize(int *argc, char ***argv)
Initialize Tpetra.
void reset()
Reset the deep_copy counter.
Declaration of various tools for counting Kokkos calls of various types using the Kokkos Profiling Li...
void dump_regions(std::ostream &os)
Print all observed region labels, separated by newline.
size_t get_count_same_space()
Query the deep_copy counter for copies in the same space.
size_t get_count_global(const std::string &device)
Query the fence counter for given device, for an Kokkos::fence()
size_t get_count_region_contains(const std::string &substr)
How many regions containing substr have been seen.
void start()
Start the fence counter.
size_t get_count_different_space()
Query the deep_copy counter for copies between different spaces.
void reset()
Reset the fence counter.
void start()
Start the deep_copy counter.
size_t get_count_instance(const std::string &device)
Query the fence counter for given device, for an exec_space_instance.fence()
void stop()
Stop the deep_copy counter.