Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_ExecutionSpaces.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_DETAILS_EXECUTIONSPACES_HPP
11 #define TPETRA_DETAILS_EXECUTIONSPACES_HPP
12 
13 #include <iostream>
14 #include <sstream>
15 #include <vector>
16 
17 #include <Kokkos_Core.hpp>
18 
19 #include <Teuchos_RCP.hpp>
20 
23 
40 #define TPETRA_DETAILS_SPACES_THROW(x) \
41  { \
42  std::stringstream ss; \
43  ss << __FILE__ << ":" << __LINE__ << ": " << x; \
44  throw std::runtime_error(ss.str()); \
45  }
46 
47 namespace Tpetra {
48 namespace Details {
49 namespace Spaces {
50 
56 enum class Priority {
57  low = 0,
58  medium = 1,
59  high = 2,
60  NUM_LEVELS = 3 // not to be used as a priority
61 };
62 
63 #if defined(KOKKOS_ENABLE_CUDA)
64 inline void success_or_throw(cudaError_t err, const char *file,
65  const int line) {
66  if (err != cudaSuccess) {
67  std::stringstream ss;
68  ss << file << ":" << line << ": ";
69  ss << cudaGetErrorString(err);
70  throw std::runtime_error(ss.str());
71  }
72 }
73 #define TPETRA_DETAILS_SPACES_CUDA_RUNTIME(x) \
74  Tpetra::Details::Spaces::success_or_throw((x), __FILE__, __LINE__)
75 #endif // KOKKOS_ENABLE_CUDA
76 
83 void lazy_init();
84 
85 #if defined(KOKKOS_ENABLE_CUDA)
86 struct CudaInfo {
87  bool initialized_;
88  int lowPrio_;
89  int mediumPrio_; // same as CUDA default
90  int highPrio_;
91  cudaEvent_t execSpaceWaitEvent_; // see exec_space_wait
92 
93  CudaInfo();
94  ~CudaInfo() = default; // execSpaceWaitEvent_ cleaned up by CUDA deinit
95  CudaInfo(const CudaInfo &other) = delete;
96  CudaInfo(CudaInfo &&other) = delete;
97 };
98 extern CudaInfo cudaInfo;
99 #endif // KOKKOS_ENABLE_CUDA
100 
101 // Tpetra's managed spaces
102 #if defined(KOKKOS_ENABLE_CUDA)
103 template <typename Space>
104 using IsCuda = std::enable_if_t<std::is_same_v<Space, Kokkos::Cuda>, bool>;
105 template <typename Space>
106 using NotCuda = std::enable_if_t<!std::is_same_v<Space, Kokkos::Cuda>, bool>;
107 template <typename S1, typename S2>
108 using BothCuda = std::enable_if_t<
109  std::is_same_v<S1, Kokkos::Cuda> && std::is_same_v<S2, Kokkos::Cuda>, bool>;
110 template <typename S1, typename S2>
111 using NotBothCuda = std::enable_if_t<!std::is_same_v<S1, Kokkos::Cuda> ||
112  !std::is_same_v<S2, Kokkos::Cuda>,
113  bool>;
114 #endif // KOKKOS_ENABLE_CUDA
115 
116 #if defined(KOKKOS_ENABLE_SERIAL)
117 template <typename Space>
119 using IsSerial = std::enable_if_t<std::is_same_v<Space, Kokkos::Serial>, bool>;
120 #endif // KOKKOS_ENABLE_SERIAL
121 
122 #if defined(KOKKOS_ENABLE_OPENMP)
123 template <typename Space>
125 using IsOpenMP = std::enable_if_t<std::is_same_v<Space, Kokkos::OpenMP>, bool>;
126 #endif // KOKKOS_ENABLE_OPENMP
127 
128 #if defined(KOKKOS_ENABLE_HIP)
129 template <typename Space>
131 using IsHIP = std::enable_if_t<std::is_same_v<Space, Kokkos::HIP>, bool>;
132 #endif // KOKKOS_ENABLE_HIP
133 
134 #if defined(KOKKOS_ENABLE_SYCL)
135 template <typename Space>
137 using IsSYCL = std::enable_if_t<std::is_same_v<Space, Kokkos::Experimental::SYCL>, bool>;
138 #endif // KOKKOS_ENABLE_SYCL
139 
145 template <typename ExecSpace, Priority priority = Priority::medium
146 #if defined(KOKKOS_ENABLE_CUDA)
147  ,
148  NotCuda<ExecSpace> = true
149 #endif // KOKKOS_ENABLE_CUDA
150  >
151 ExecSpace make_instance() {
152  return ExecSpace();
153 }
154 
161 #if defined(KOKKOS_ENABLE_CUDA)
162 template <typename ExecSpace, Priority priority = Priority::medium,
163  IsCuda<ExecSpace> = true>
164 Kokkos::Cuda make_instance() {
165  lazy_init(); // CUDA priorities
166  cudaStream_t stream;
167  int prio;
168  switch (priority) {
169  case Priority::high:
170  prio = cudaInfo.highPrio_;
171  break;
172  case Priority::medium:
173  prio = cudaInfo.mediumPrio_;
174  break;
175  case Priority::low:
176  prio = cudaInfo.lowPrio_;
177  break;
178  default:
179  throw std::runtime_error("unexpected static Tpetra Space priority");
180  }
181  TPETRA_DETAILS_SPACES_CUDA_RUNTIME(
182  cudaStreamCreateWithPriority(&stream, cudaStreamNonBlocking, prio));
183  return Kokkos::Cuda(stream, true /*Kokkos will manage this stream*/);
184 }
185 #endif // KOKKOS_ENABLE_CUDA
186 
192 template <typename ExecSpace> ExecSpace make_instance(const Priority &prio) {
193  switch (prio) {
194  case Priority::high:
195  return make_instance<ExecSpace, Priority::high>();
196  case Priority::medium:
197  return make_instance<ExecSpace, Priority::medium>();
198  case Priority::low:
199  return make_instance<ExecSpace, Priority::low>();
200  default:
201  throw std::runtime_error("unexpected dynamic Tpetra Space priority");
202  }
203 }
204 
216 template <typename ExecSpace> class InstanceLifetimeManager {
217 public:
218  using execution_space = ExecSpace;
219  using rcp_type = Teuchos::RCP<const execution_space>;
220 
227  template <Priority priority = Priority::medium>
228  rcp_type space_instance(int i = 0) {
230  "Tpetra::Details::Spaces::space_instance");
231 
232  constexpr int p = static_cast<int>(priority);
233  static_assert(p < sizeof(instances) / sizeof(instances[0]),
234  "Spaces::Priority enum error");
235 
236  if (i < 0) {
237  TPETRA_DETAILS_SPACES_THROW("requested instance id " << i << " (< 0)");
238  }
240  TPETRA_DETAILS_SPACES_THROW(
241  "requested instance id "
243  << ") set by TPETRA_SPACES_ID_WARN_LIMIT");
244  }
245 
246  // make sure we can store an exec space at index i for priority
247  // not sure what happens in RCP(), so let's explicitly make it null
248  while (size_t(i) >= instances[p].size()) {
249  instances[p].push_back(Teuchos::ENull());
250  }
251 
252  /* no exec space instance i of priority p exists.
253  It may have never existed, or all Tpetra objects referencing it have been
254  destructed.
255 
256  Create a new RCP<ExecSpace> and internally store a weak
257  reference, so this space will be destructed when all strong references to
258  it are gone, but we can still refer to it as long as it lives to prevent
259  recreating
260  */
261  if (instances[p][i].is_null() || !instances[p][i].is_valid_ptr()) {
262 
263  // create a strong RCP to a space
264  rcp_type r = Teuchos::RCP<const execution_space>(
265  new ExecSpace(make_instance<ExecSpace, priority>()));
266 
267  // store a weak RCP to the space
268  instances[p][i] = r.create_weak();
269 
270  return r; // allow strong rcp to escape so internal weak one does not
271  // immediately go away
272  }
273 
274  auto r = instances[p][i].create_strong();
275  return r;
276  }
277 
282  for (int i = 0; i < static_cast<int>(Spaces::Priority::NUM_LEVELS); ++i) {
283  for (const rcp_type &rcp : instances[i]) {
284  if (rcp.is_valid_ptr() && !rcp.is_null()) {
285  // avoid throwing in dtor
286  std::cerr << __FILE__ << ":" << __LINE__
287  << " execution space instance survived to "
288  "~InstanceLifetimeManager. strong_count() = "
289  << rcp.strong_count()
290  << ". Did a Tpetra object live past Kokkos::finalize()?"
291  << std::endl;
292  }
293  }
294  }
295  }
296 
297 private:
298  // one vector of instances for each priority level
299  std::vector<rcp_type>
300  instances[static_cast<int>(Spaces::Priority::NUM_LEVELS)];
301 };
302 
303 #if defined(KOKKOS_ENABLE_CUDA)
304 extern InstanceLifetimeManager<Kokkos::Cuda> cudaSpaces;
305 #endif
306 #if defined(KOKKOS_ENABLE_SERIAL)
307 extern InstanceLifetimeManager<Kokkos::Serial> serialSpaces;
308 #endif
309 #if defined(KOKKOS_ENABLE_OPENMP)
310 extern InstanceLifetimeManager<Kokkos::OpenMP> openMPSpaces;
311 #endif
312 #if defined(KOKKOS_ENABLE_HIP)
313 extern InstanceLifetimeManager<Kokkos::HIP> HIPSpaces;
314 #endif
315 #if defined(KOKKOS_ENABLE_SYCL)
316 extern InstanceLifetimeManager<Kokkos::Experimental::SYCL> SYCLSpaces;
317 #endif
318 
319 #if defined(KOKKOS_ENABLE_CUDA)
320 
324 template <typename ExecSpace, Priority priority = Priority::medium,
325  IsCuda<ExecSpace> = true>
326 Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
327  return cudaSpaces.space_instance<priority>(i);
328 }
329 #endif
330 
331 #if defined(KOKKOS_ENABLE_SERIAL)
332 
335 template <typename ExecSpace, Priority priority = Priority::medium,
336  IsSerial<ExecSpace> = true>
337 Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
338  return serialSpaces.space_instance<priority>(i);
339 }
340 #endif
341 
342 #if defined(KOKKOS_ENABLE_OPENMP)
343 
346 template <typename ExecSpace, Priority priority = Priority::medium,
347  IsOpenMP<ExecSpace> = true>
348 Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
349  return openMPSpaces.space_instance<priority>(i);
350 }
351 #endif
352 
353 #if defined(KOKKOS_ENABLE_HIP)
354 
356 template <typename ExecSpace, Priority priority = Priority::medium,
357  IsHIP<ExecSpace> = true>
358 Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
359  return HIPSpaces.space_instance<priority>(i);
360 }
361 #endif
362 #if defined(KOKKOS_ENABLE_SYCL)
363 
366 template <typename ExecSpace, Priority priority = Priority::medium,
367  IsSYCL<ExecSpace> = true>
368 Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
369  return SYCLSpaces.space_instance<priority>(i);
370 }
371 #endif
372 
378 template <typename ExecSpace>
379 Teuchos::RCP<const ExecSpace> space_instance(const Priority &priority,
380  int i = 0) {
381  switch (priority) {
382  case Priority::high:
383  return space_instance<ExecSpace, Priority::high>(i);
384  case Priority::medium:
385  return space_instance<ExecSpace, Priority::medium>(i);
386  case Priority::low:
387  return space_instance<ExecSpace, Priority::low>(i);
388  default:
389  throw std::runtime_error(
390  "unexpected dynamic Tpetra Space priority in space_instance");
391  }
392 }
393 
407 template <typename S1, typename S2
408 #if defined(KOKKOS_ENABLE_CUDA)
409  ,
410  NotBothCuda<S1, S2> = true
411 #endif
412  >
413 void exec_space_wait(const char *msg, const S1 &waitee, const S2 & /*waiter*/) {
415  "Tpetra::Details::Spaces::exec_space_wait");
416  lazy_init();
417  waitee.fence(msg);
418 }
419 
420 #if defined(KOKKOS_ENABLE_CUDA)
421 template <typename S1, typename S2, BothCuda<S1, S2> = true>
422 void exec_space_wait(const char *msg, const S1 &waitee, const S2 &waiter) {
424  "Tpetra::Details::Spaces::exec_space_wait");
425  lazy_init();
426 
427  // if they are the same instance, no sync needed
428  if (waitee.impl_instance_id() !=
429  waiter
430  .impl_instance_id()) { // TODO: use instance operator== once available
431  /* cudaStreamWaitEvent is not affected by later calls to cudaEventRecord,
432  even if it overwrites the state of a shared event this means we only need
433  one event even if many exec_space_waits are in flight at the same time
434  */
435  TPETRA_DETAILS_SPACES_CUDA_RUNTIME(
436  cudaEventRecord(cudaInfo.execSpaceWaitEvent_, waitee.cuda_stream()));
437  TPETRA_DETAILS_SPACES_CUDA_RUNTIME(cudaStreamWaitEvent(
438  waiter.cuda_stream(), cudaInfo.execSpaceWaitEvent_, 0 /*flags*/));
439  }
440 }
441 #endif
442 
443 template <typename S1, typename S2>
444 void exec_space_wait(const S1 &waitee, const S2 &waiter) {
446  "Tpetra::Details::Spaces::exec_space_wait");
447  lazy_init();
448  exec_space_wait("anonymous", waitee, waiter);
449 }
450 
451 template <typename ExecutionSpace>
452 constexpr KOKKOS_INLINE_FUNCTION bool is_gpu_exec_space() {
453  return false;
454 }
455 
456 #if defined(KOKKOS_ENABLE_CUDA)
457 template <>
458 constexpr KOKKOS_INLINE_FUNCTION bool is_gpu_exec_space<Kokkos::Cuda>() {
459  return true;
460 }
461 #endif
462 
463 #if defined(KOKKOS_ENABLE_HIP)
464 template <>
465 constexpr KOKKOS_INLINE_FUNCTION bool
466 is_gpu_exec_space<Kokkos::HIP>() {
467  return true;
468 }
469 #endif
470 
471 #if defined(KOKKOS_ENABLE_SYCL)
472 template <>
473 constexpr KOKKOS_INLINE_FUNCTION bool
474 is_gpu_exec_space<Kokkos::Experimental::SYCL>() {
475  return true;
476 }
477 #endif
478 
479 } // namespace Spaces
480 } // namespace Details
481 } // namespace Tpetra
482 
483 #undef TPETRA_DETAILS_SPACES_THROW
484 
485 #endif // TPETRA_DETAILS_EXECUTIONSPACES_HPP
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
static size_t spacesIdWarnLimit()
Warn if more than this many Kokkos spaces are accessed.
rcp_type space_instance(int i=0)
Retrieve a strong Teuchos::RCP&lt;const ExecSpace&gt; to instance i
~InstanceLifetimeManager()
Issue a warning if any Tpetra-managed execution space instances survive to the end of static lifetime...
Provides reusable Kokkos execution space instances.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.