Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_KokkosCounter.cpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
11 #include "TpetraCore_config.h"
12 #include "Kokkos_Core.hpp"
13 #include "Teuchos_TestForException.hpp"
14 #include <cstring>
15 #include <string>
16 
17 namespace Tpetra {
18 namespace Details {
19 
20 /***************************** Deep Copy *****************************/
21 namespace DeepCopyCounterDetails {
22 // Static variables
23 bool is_initialized = true;
24 size_t count_same = 0;
25 size_t count_different = 0;
26 bool count_active = false;
27 
28 void kokkosp_begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle, const char *dst_name, const void *dst_ptr,
29  Kokkos::Tools::SpaceHandle src_handle, const char *src_name, const void *src_ptr,
30  uint64_t size) {
31  if (count_active) {
32  if (strcmp(dst_handle.name, src_handle.name))
33  count_different++;
34  else
35  count_same++;
36  }
37 }
38 
39 } // namespace DeepCopyCounterDetails
40 
42  DeepCopyCounterDetails::count_active = true;
43  Kokkos::Tools::Experimental::set_begin_deep_copy_callback(DeepCopyCounterDetails::kokkosp_begin_deep_copy);
44 }
45 
47  DeepCopyCounterDetails::count_same = 0;
48  DeepCopyCounterDetails::count_different = 0;
49 }
50 
52  DeepCopyCounterDetails::count_active = false;
53 }
54 
56  return DeepCopyCounterDetails::count_same;
57 }
58 
60  return DeepCopyCounterDetails::count_different;
61 }
62 
63 /***************************** Fence *****************************/
64 
65 namespace FenceCounterDetails {
66 
67 // Static variables
68 bool is_initialized = false;
69 bool count_active = false;
70 std::vector<size_t> count_instance;
71 std::vector<size_t> count_global;
72 int num_devices = 0;
73 
74 void kokkosp_begin_fence(const char *name, const uint32_t deviceId,
75  uint64_t *handle) {
76  if (count_active) {
77  using namespace Kokkos::Tools::Experimental;
78  ExecutionSpaceIdentifier eid = identifier_from_devid(deviceId);
79 
80  // Figure out what count bin to stick this in
81  int idx = (int)eid.type;
82  if (eid.instance_id == int_for_synchronization_reason(SpecialSynchronizationCases::GlobalDeviceSynchronization))
83  count_global[idx]++;
84  else
85  count_instance[idx]++;
86  }
87 }
88 
89 std::string get_label(int i) {
90  using namespace Kokkos::Tools::Experimental;
91  DeviceType i_type = devicetype_from_uint32t(i);
92  std::string device_label;
93  if (i_type == DeviceType::Serial)
94  device_label = "Serial";
95  else if (i_type == DeviceType::OpenMP)
96  device_label = "OpenMP";
97  else if (i_type == DeviceType::Cuda)
98  device_label = "Cuda";
99  else if (i_type == DeviceType::HIP)
100  device_label = "HIP";
101  else if (i_type == DeviceType::OpenMPTarget)
102  device_label = "OpenMPTarget";
103  else if (i_type == DeviceType::HPX)
104  device_label = "HPX";
105  else if (i_type == DeviceType::Threads)
106  device_label = "Threats";
107  else if (i_type == DeviceType::SYCL)
108  device_label = "SYCL";
109  else if (i_type == DeviceType::OpenACC)
110  device_label = "OpenACC";
111  else if (i_type == DeviceType::Unknown)
112  device_label = "Unknown";
113 
114  return device_label;
115 }
116 
117 void initialize() {
118  using namespace Kokkos::Tools::Experimental;
119  num_devices = (int)DeviceType::Unknown;
120  count_instance.resize(num_devices);
121  count_instance.assign(num_devices, 0);
122  count_global.resize(num_devices);
123  count_global.assign(num_devices, 0);
124  is_initialized = true;
125 }
126 
127 } // namespace FenceCounterDetails
128 
130  if (!FenceCounterDetails::is_initialized)
131  FenceCounterDetails::initialize();
132  FenceCounterDetails::count_active = true;
133  Kokkos::Tools::Experimental::set_begin_fence_callback(FenceCounterDetails::kokkosp_begin_fence);
134 }
135 
137  FenceCounterDetails::count_instance.assign(FenceCounterDetails::num_devices, 0);
138  FenceCounterDetails::count_global.assign(FenceCounterDetails::num_devices, 0);
139 }
140 
142  FenceCounterDetails::count_active = false;
143 }
144 
145 size_t FenceCounter::get_count_global(const std::string &device) {
146  using namespace Kokkos::Tools::Experimental;
147  for (int i = 0; i < FenceCounterDetails::num_devices; i++) {
148  std::string device_label = FenceCounterDetails::get_label(i);
149 
150  if (device == device_label)
151  return FenceCounterDetails::count_global[i];
152  }
153 
154  // Haven't found a device by this name
155  TEUCHOS_TEST_FOR_EXCEPTION(1, std::runtime_error, std::string("Error: ") + device + std::string(" is not a device known to Tpetra"));
156 }
157 
158 size_t FenceCounter::get_count_instance(const std::string &device) {
159  using namespace Kokkos::Tools::Experimental;
160  for (int i = 0; i < FenceCounterDetails::num_devices; i++) {
161  std::string device_label = FenceCounterDetails::get_label(i);
162 
163  if (device == device_label)
164  return FenceCounterDetails::count_instance[i];
165  }
166 
167  // Haven't found a device by this name
168  TEUCHOS_TEST_FOR_EXCEPTION(1, std::runtime_error, std::string("Error: ") + device + std::string(" is not a device known to Tpetra"));
169 }
170 
171 namespace KokkosRegionCounterDetails {
172 std::vector<std::string> regions;
173 
174 void push_region_callback(const char *label) { regions.push_back(label); }
175 static_assert(std::is_same_v<decltype(&push_region_callback),
176  Kokkos_Profiling_pushFunction>,
177  "Unexpected Kokkos profiling interface API. This is an internal "
178  "Tpetra developer error, please report this.");
179 
180 } // namespace KokkosRegionCounterDetails
181 
183  Kokkos::Tools::Experimental::set_push_region_callback(
184  KokkosRegionCounterDetails::push_region_callback);
185 }
186 
188  KokkosRegionCounterDetails::regions.clear();
189 }
190 
192  Kokkos::Tools::Experimental::set_push_region_callback(nullptr);
193 }
194 
195 size_t
197  size_t count = 0;
198  for (const auto &region : KokkosRegionCounterDetails::regions) {
199  count += (region.find(needle) != std::string::npos);
200  }
201  return count;
202 }
203 
204 void KokkosRegionCounter::dump_regions(Teuchos::FancyOStream &os) {
205  for (const auto &region : KokkosRegionCounterDetails::regions) {
206  os << region << "\n";
207  }
208 }
209 
210 void KokkosRegionCounter::dump_regions(std::ostream &os) {
211  for (const auto &region : KokkosRegionCounterDetails::regions) {
212  os << region << "\n";
213  }
214 }
215 
216 } // namespace Details
217 } // namespace Tpetra
void initialize(int *argc, char ***argv)
Initialize Tpetra.
void reset()
Reset the deep_copy counter.
Declaration of various tools for counting Kokkos calls of various types using the Kokkos Profiling Li...
void dump_regions(std::ostream &os)
Print all observed region labels, separated by newline.
size_t get_count_same_space()
Query the deep_copy counter for copies in the same space.
size_t get_count_global(const std::string &device)
Query the fence counter for given device, for an Kokkos::fence()
size_t get_count_region_contains(const std::string &substr)
How many regions containing substr have been seen.
void start()
Start the fence counter.
size_t get_count_different_space()
Query the deep_copy counter for copies between different spaces.
void reset()
Reset the fence counter.
void start()
Start the deep_copy counter.
size_t get_count_instance(const std::string &device)
Query the fence counter for given device, for an exec_space_instance.fence()
void stop()
Stop the deep_copy counter.