Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_ExecutionSpaces.hpp
Go to the documentation of this file.
1 #ifndef TPETRA_DETAILS_EXECUTIONSPACES_HPP
2 #define TPETRA_DETAILS_EXECUTIONSPACES_HPP
3 
4 #include <iostream>
5 #include <sstream>
6 #include <vector>
7 
8 #include <Kokkos_Core.hpp>
9 
10 #include <Teuchos_RCP.hpp>
11 
14 
31 #define TPETRA_DETAILS_SPACES_THROW(x) \
32  { \
33  std::stringstream ss; \
34  ss << __FILE__ << ":" << __LINE__ << ": " << x; \
35  throw std::runtime_error(ss.str()); \
36  }
37 
38 namespace Tpetra {
39 namespace Details {
40 namespace Spaces {
41 
47 enum class Priority {
48  low = 0,
49  medium = 1,
50  high = 2,
51  NUM_LEVELS = 3 // not to be used as a priority
52 };
53 
54 #if defined(KOKKOS_ENABLE_CUDA)
55 inline void success_or_throw(cudaError_t err, const char *file,
56  const int line) {
57  if (err != cudaSuccess) {
58  std::stringstream ss;
59  ss << file << ":" << line << ": ";
60  ss << cudaGetErrorString(err);
61  throw std::runtime_error(ss.str());
62  }
63 }
64 #define TPETRA_DETAILS_SPACES_CUDA_RUNTIME(x) \
65  Tpetra::Details::Spaces::success_or_throw((x), __FILE__, __LINE__)
66 #endif // KOKKOS_ENABLE_CUDA
67 
74 void lazy_init();
75 
76 #if defined(KOKKOS_ENABLE_CUDA)
77 struct CudaInfo {
78  bool initialized_;
79  int lowPrio_;
80  int mediumPrio_; // same as CUDA default
81  int highPrio_;
82  cudaEvent_t execSpaceWaitEvent_; // see exec_space_wait
83 
84  CudaInfo();
85  ~CudaInfo() = default; // execSpaceWaitEvent_ cleaned up by CUDA deinit
86  CudaInfo(const CudaInfo &other) = delete;
87  CudaInfo(CudaInfo &&other) = delete;
88 };
89 extern CudaInfo cudaInfo;
90 #endif // KOKKOS_ENABLE_CUDA
91 
92 // Tpetra's managed spaces
93 #if defined(KOKKOS_ENABLE_CUDA)
94 template <typename Space>
95 using IsCuda = std::enable_if_t<std::is_same_v<Space, Kokkos::Cuda>, bool>;
96 template <typename Space>
97 using NotCuda = std::enable_if_t<!std::is_same_v<Space, Kokkos::Cuda>, bool>;
98 template <typename S1, typename S2>
99 using BothCuda = std::enable_if_t<
100  std::is_same_v<S1, Kokkos::Cuda> && std::is_same_v<S2, Kokkos::Cuda>, bool>;
101 template <typename S1, typename S2>
102 using NotBothCuda = std::enable_if_t<!std::is_same_v<S1, Kokkos::Cuda> ||
103  !std::is_same_v<S2, Kokkos::Cuda>,
104  bool>;
105 #endif // KOKKOS_ENABLE_CUDA
106 
107 #if defined(KOKKOS_ENABLE_SERIAL)
108 template <typename Space>
110 using IsSerial = std::enable_if_t<std::is_same_v<Space, Kokkos::Serial>, bool>;
111 #endif // KOKKOS_ENABLE_SERIAL
112 
113 #if defined(KOKKOS_ENABLE_OPENMP)
114 template <typename Space>
116 using IsOpenMP = std::enable_if_t<std::is_same_v<Space, Kokkos::OpenMP>, bool>;
117 #endif // KOKKOS_ENABLE_OPENMP
118 
119 #if defined(KOKKOS_ENABLE_HIP)
120 template <typename Space>
122 using IsHIP = std::enable_if_t<std::is_same_v<Space, Kokkos::HIP>, bool>;
123 #endif // KOKKOS_ENABLE_HIP
124 
125 #if defined(KOKKOS_ENABLE_SYCL)
126 template <typename Space>
128 using IsSYCL = std::enable_if_t<std::is_same_v<Space, Kokkos::Experimental::SYCL>, bool>;
129 #endif // KOKKOS_ENABLE_SYCL
130 
136 template <typename ExecSpace, Priority priority = Priority::medium
137 #if defined(KOKKOS_ENABLE_CUDA)
138  ,
139  NotCuda<ExecSpace> = true
140 #endif // KOKKOS_ENABLE_CUDA
141  >
142 ExecSpace make_instance() {
143  return ExecSpace();
144 }
145 
152 #if defined(KOKKOS_ENABLE_CUDA)
153 template <typename ExecSpace, Priority priority = Priority::medium,
154  IsCuda<ExecSpace> = true>
155 Kokkos::Cuda make_instance() {
156  lazy_init(); // CUDA priorities
157  cudaStream_t stream;
158  int prio;
159  switch (priority) {
160  case Priority::high:
161  prio = cudaInfo.highPrio_;
162  break;
163  case Priority::medium:
164  prio = cudaInfo.mediumPrio_;
165  break;
166  case Priority::low:
167  prio = cudaInfo.lowPrio_;
168  break;
169  default:
170  throw std::runtime_error("unexpected static Tpetra Space priority");
171  }
172  TPETRA_DETAILS_SPACES_CUDA_RUNTIME(
173  cudaStreamCreateWithPriority(&stream, cudaStreamNonBlocking, prio));
174  return Kokkos::Cuda(stream, true /*Kokkos will manage this stream*/);
175 }
176 #endif // KOKKOS_ENABLE_CUDA
177 
183 template <typename ExecSpace> ExecSpace make_instance(const Priority &prio) {
184  switch (prio) {
185  case Priority::high:
186  return make_instance<ExecSpace, Priority::high>();
187  case Priority::medium:
188  return make_instance<ExecSpace, Priority::medium>();
189  case Priority::low:
190  return make_instance<ExecSpace, Priority::low>();
191  default:
192  throw std::runtime_error("unexpected dynamic Tpetra Space priority");
193  }
194 }
195 
207 template <typename ExecSpace> class InstanceLifetimeManager {
208 public:
209  using execution_space = ExecSpace;
210  using rcp_type = Teuchos::RCP<const execution_space>;
211 
218  template <Priority priority = Priority::medium>
219  rcp_type space_instance(int i = 0) {
221  "Tpetra::Details::Spaces::space_instance");
222 
223  constexpr int p = static_cast<int>(priority);
224  static_assert(p < sizeof(instances) / sizeof(instances[0]),
225  "Spaces::Priority enum error");
226 
227  if (i < 0) {
228  TPETRA_DETAILS_SPACES_THROW("requested instance id " << i << " (< 0)");
229  }
231  TPETRA_DETAILS_SPACES_THROW(
232  "requested instance id "
234  << ") set by TPETRA_SPACES_ID_WARN_LIMIT");
235  }
236 
237  // make sure we can store an exec space at index i for priority
238  // not sure what happens in RCP(), so let's explicitly make it null
239  while (size_t(i) >= instances[p].size()) {
240  instances[p].push_back(Teuchos::ENull());
241  }
242 
243  /* no exec space instance i of priority p exists.
244  It may have never existed, or all Tpetra objects referencing it have been
245  destructed.
246 
247  Create a new RCP<ExecSpace> and internally store a weak
248  reference, so this space will be destructed when all strong references to
249  it are gone, but we can still refer to it as long as it lives to prevent
250  recreating
251  */
252  if (instances[p][i].is_null() || !instances[p][i].is_valid_ptr()) {
253 
254  // create a strong RCP to a space
255  rcp_type r = Teuchos::RCP<const execution_space>(
256  new ExecSpace(make_instance<ExecSpace, priority>()));
257 
258  // store a weak RCP to the space
259  instances[p][i] = r.create_weak();
260 
261  return r; // allow strong rcp to escape so internal weak one does not
262  // immediately go away
263  }
264 
265  auto r = instances[p][i].create_strong();
266  return r;
267  }
268 
273  for (int i = 0; i < static_cast<int>(Spaces::Priority::NUM_LEVELS); ++i) {
274  for (const rcp_type &rcp : instances[i]) {
275  if (rcp.is_valid_ptr() && !rcp.is_null()) {
276  // avoid throwing in dtor
277  std::cerr << __FILE__ << ":" << __LINE__
278  << " execution space instance survived to "
279  "~InstanceLifetimeManager. strong_count() = "
280  << rcp.strong_count()
281  << ". Did a Tpetra object live past Kokkos::finalize()?"
282  << std::endl;
283  }
284  }
285  }
286  }
287 
288 private:
289  // one vector of instances for each priority level
290  std::vector<rcp_type>
291  instances[static_cast<int>(Spaces::Priority::NUM_LEVELS)];
292 };
293 
294 #if defined(KOKKOS_ENABLE_CUDA)
295 extern InstanceLifetimeManager<Kokkos::Cuda> cudaSpaces;
296 #endif
297 #if defined(KOKKOS_ENABLE_SERIAL)
298 extern InstanceLifetimeManager<Kokkos::Serial> serialSpaces;
299 #endif
300 #if defined(KOKKOS_ENABLE_OPENMP)
301 extern InstanceLifetimeManager<Kokkos::OpenMP> openMPSpaces;
302 #endif
303 #if defined(KOKKOS_ENABLE_HIP)
304 extern InstanceLifetimeManager<Kokkos::HIP> HIPSpaces;
305 #endif
306 #if defined(KOKKOS_ENABLE_SYCL)
307 extern InstanceLifetimeManager<Kokkos::Experimental::SYCL> SYCLSpaces;
308 #endif
309 
310 #if defined(KOKKOS_ENABLE_CUDA)
311 
315 template <typename ExecSpace, Priority priority = Priority::medium,
316  IsCuda<ExecSpace> = true>
317 Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
318  return cudaSpaces.space_instance<priority>(i);
319 }
320 #endif
321 
322 #if defined(KOKKOS_ENABLE_SERIAL)
323 
326 template <typename ExecSpace, Priority priority = Priority::medium,
327  IsSerial<ExecSpace> = true>
328 Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
329  return serialSpaces.space_instance<priority>(i);
330 }
331 #endif
332 
333 #if defined(KOKKOS_ENABLE_OPENMP)
334 
337 template <typename ExecSpace, Priority priority = Priority::medium,
338  IsOpenMP<ExecSpace> = true>
339 Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
340  return openMPSpaces.space_instance<priority>(i);
341 }
342 #endif
343 
344 #if defined(KOKKOS_ENABLE_HIP)
345 
347 template <typename ExecSpace, Priority priority = Priority::medium,
348  IsHIP<ExecSpace> = true>
349 Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
350  return HIPSpaces.space_instance<priority>(i);
351 }
352 #endif
353 #if defined(KOKKOS_ENABLE_SYCL)
354 
357 template <typename ExecSpace, Priority priority = Priority::medium,
358  IsSYCL<ExecSpace> = true>
359 Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
360  return SYCLSpaces.space_instance<priority>(i);
361 }
362 #endif
363 
369 template <typename ExecSpace>
370 Teuchos::RCP<const ExecSpace> space_instance(const Priority &priority,
371  int i = 0) {
372  switch (priority) {
373  case Priority::high:
374  return space_instance<ExecSpace, Priority::high>(i);
375  case Priority::medium:
376  return space_instance<ExecSpace, Priority::medium>(i);
377  case Priority::low:
378  return space_instance<ExecSpace, Priority::low>(i);
379  default:
380  throw std::runtime_error(
381  "unexpected dynamic Tpetra Space priority in space_instance");
382  }
383 }
384 
398 template <typename S1, typename S2
399 #if defined(KOKKOS_ENABLE_CUDA)
400  ,
401  NotBothCuda<S1, S2> = true
402 #endif
403  >
404 void exec_space_wait(const char *msg, const S1 &waitee, const S2 & /*waiter*/) {
406  "Tpetra::Details::Spaces::exec_space_wait");
407  lazy_init();
408  waitee.fence(msg);
409 }
410 
411 #if defined(KOKKOS_ENABLE_CUDA)
412 template <typename S1, typename S2, BothCuda<S1, S2> = true>
413 void exec_space_wait(const char *msg, const S1 &waitee, const S2 &waiter) {
415  "Tpetra::Details::Spaces::exec_space_wait");
416  lazy_init();
417 
418  // if they are the same instance, no sync needed
419  if (waitee.impl_instance_id() !=
420  waiter
421  .impl_instance_id()) { // TODO: use instance operator== once available
422  /* cudaStreamWaitEvent is not affected by later calls to cudaEventRecord,
423  even if it overwrites the state of a shared event this means we only need
424  one event even if many exec_space_waits are in flight at the same time
425  */
426  TPETRA_DETAILS_SPACES_CUDA_RUNTIME(
427  cudaEventRecord(cudaInfo.execSpaceWaitEvent_, waitee.cuda_stream()));
428  TPETRA_DETAILS_SPACES_CUDA_RUNTIME(cudaStreamWaitEvent(
429  waiter.cuda_stream(), cudaInfo.execSpaceWaitEvent_, 0 /*flags*/));
430  }
431 }
432 #endif
433 
434 template <typename S1, typename S2>
435 void exec_space_wait(const S1 &waitee, const S2 &waiter) {
437  "Tpetra::Details::Spaces::exec_space_wait");
438  lazy_init();
439  exec_space_wait("anonymous", waitee, waiter);
440 }
441 
442 template <typename ExecutionSpace>
443 constexpr KOKKOS_INLINE_FUNCTION bool is_gpu_exec_space() {
444  return false;
445 }
446 
447 #if defined(KOKKOS_ENABLE_CUDA)
448 template <>
449 constexpr KOKKOS_INLINE_FUNCTION bool is_gpu_exec_space<Kokkos::Cuda>() {
450  return true;
451 }
452 #endif
453 
454 #if defined(KOKKOS_ENABLE_HIP)
455 template <>
456 constexpr KOKKOS_INLINE_FUNCTION bool
457 is_gpu_exec_space<Kokkos::HIP>() {
458  return true;
459 }
460 #endif
461 
462 #if defined(KOKKOS_ENABLE_SYCL)
463 template <>
464 constexpr KOKKOS_INLINE_FUNCTION bool
465 is_gpu_exec_space<Kokkos::Experimental::SYCL>() {
466  return true;
467 }
468 #endif
469 
470 } // namespace Spaces
471 } // namespace Details
472 } // namespace Tpetra
473 
474 #undef TPETRA_DETAILS_SPACES_THROW
475 
476 #endif // TPETRA_DETAILS_EXECUTIONSPACES_HPP
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
static size_t spacesIdWarnLimit()
Warn if more than this many Kokkos spaces are accessed.
rcp_type space_instance(int i=0)
Retrieve a strong Teuchos::RCP&lt;const ExecSpace&gt; to instance i
~InstanceLifetimeManager()
Issue a warning if any Tpetra-managed execution space instances survive to the end of static lifetime...
Provides reusable Kokkos execution space instances.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.