Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_ExecutionSpaces.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_DETAILS_EXECUTIONSPACES_HPP
11 #define TPETRA_DETAILS_EXECUTIONSPACES_HPP
12 
13 #include <iostream>
14 #include <sstream>
15 #include <vector>
16 
17 #include <Kokkos_Core.hpp>
18 
19 #include <Teuchos_RCP.hpp>
20 
23 
40 #define TPETRA_DETAILS_SPACES_THROW(x) \
41  { \
42  std::stringstream ss; \
43  ss << __FILE__ << ":" << __LINE__ << ": " << x; \
44  throw std::runtime_error(ss.str()); \
45  }
46 
47 namespace Tpetra {
48 namespace Details {
49 namespace Spaces {
50 
56 enum class Priority {
57  low = 0,
58  medium = 1,
59  high = 2,
60  NUM_LEVELS = 3 // not to be used as a priority
61 };
62 
63 #if defined(KOKKOS_ENABLE_CUDA)
64 inline void success_or_throw(cudaError_t err, const char *file,
65  const int line) {
66  if (err != cudaSuccess) {
67  std::stringstream ss;
68  ss << file << ":" << line << ": ";
69  ss << cudaGetErrorString(err);
70  throw std::runtime_error(ss.str());
71  }
72 }
73 #define TPETRA_DETAILS_SPACES_CUDA_RUNTIME(x) \
74  Tpetra::Details::Spaces::success_or_throw((x), __FILE__, __LINE__)
75 #endif // KOKKOS_ENABLE_CUDA
76 
83 void lazy_init();
84 
85 #if defined(KOKKOS_ENABLE_CUDA)
86 struct CudaInfo {
87  bool initialized_;
88  int lowPrio_;
89  int mediumPrio_; // same as CUDA default
90  int highPrio_;
91  cudaEvent_t execSpaceWaitEvent_; // see exec_space_wait
92 
93  CudaInfo();
94  ~CudaInfo() = default; // execSpaceWaitEvent_ cleaned up by CUDA deinit
95  CudaInfo(const CudaInfo &other) = delete;
96  CudaInfo(CudaInfo &&other) = delete;
97 };
98 extern CudaInfo cudaInfo;
99 #endif // KOKKOS_ENABLE_CUDA
100 
101 // Tpetra's managed spaces
102 #if defined(KOKKOS_ENABLE_CUDA)
103 template <typename Space>
104 using IsCuda = std::enable_if_t<std::is_same_v<Space, Kokkos::Cuda>, bool>;
105 template <typename Space>
106 using NotCuda = std::enable_if_t<!std::is_same_v<Space, Kokkos::Cuda>, bool>;
107 template <typename S1, typename S2>
108 using BothCuda = std::enable_if_t<
109  std::is_same_v<S1, Kokkos::Cuda> && std::is_same_v<S2, Kokkos::Cuda>, bool>;
110 template <typename S1, typename S2>
111 using NotBothCuda = std::enable_if_t<!std::is_same_v<S1, Kokkos::Cuda> ||
112  !std::is_same_v<S2, Kokkos::Cuda>,
113  bool>;
114 #endif // KOKKOS_ENABLE_CUDA
115 
116 #if defined(KOKKOS_ENABLE_SERIAL)
117 template <typename Space>
119 using IsSerial = std::enable_if_t<std::is_same_v<Space, Kokkos::Serial>, bool>;
120 #endif // KOKKOS_ENABLE_SERIAL
121 
122 #if defined(KOKKOS_ENABLE_OPENMP)
123 template <typename Space>
125 using IsOpenMP = std::enable_if_t<std::is_same_v<Space, Kokkos::OpenMP>, bool>;
126 #endif // KOKKOS_ENABLE_OPENMP
127 
128 #if defined(KOKKOS_ENABLE_HIP)
129 template <typename Space>
131 using IsHIP = std::enable_if_t<std::is_same_v<Space, Kokkos::HIP>, bool>;
132 #endif // KOKKOS_ENABLE_HIP
133 
134 #if defined(KOKKOS_ENABLE_SYCL)
135 template <typename Space>
137 using IsSYCL = std::enable_if_t<std::is_same_v<Space, Kokkos::Experimental::SYCL>, bool>;
138 #endif // KOKKOS_ENABLE_SYCL
139 
145 template <typename ExecSpace, Priority priority = Priority::medium
146 #if defined(KOKKOS_ENABLE_CUDA)
147  ,
148  NotCuda<ExecSpace> = true
149 #endif // KOKKOS_ENABLE_CUDA
150  >
151 ExecSpace make_instance() {
152  return ExecSpace();
153 }
154 
161 #if defined(KOKKOS_ENABLE_CUDA)
162 template <typename ExecSpace, Priority priority = Priority::medium,
163  IsCuda<ExecSpace> = true>
164 Kokkos::Cuda make_instance() {
165  lazy_init(); // CUDA priorities
166  cudaStream_t stream;
167  int prio;
168  switch (priority) {
169  case Priority::high:
170  prio = cudaInfo.highPrio_;
171  break;
172  case Priority::medium:
173  prio = cudaInfo.mediumPrio_;
174  break;
175  case Priority::low:
176  prio = cudaInfo.lowPrio_;
177  break;
178  default:
179  throw std::runtime_error("unexpected static Tpetra Space priority");
180  }
181  TPETRA_DETAILS_SPACES_CUDA_RUNTIME(
182  cudaStreamCreateWithPriority(&stream, cudaStreamNonBlocking, prio));
183  return Kokkos::Cuda(stream, true /*Kokkos will manage this stream*/);
184 }
185 #endif // KOKKOS_ENABLE_CUDA
186 
192 template <typename ExecSpace>
193 ExecSpace make_instance(const Priority &prio) {
194  switch (prio) {
195  case Priority::high:
196  return make_instance<ExecSpace, Priority::high>();
197  case Priority::medium:
198  return make_instance<ExecSpace, Priority::medium>();
199  case Priority::low:
200  return make_instance<ExecSpace, Priority::low>();
201  default:
202  throw std::runtime_error("unexpected dynamic Tpetra Space priority");
203  }
204 }
205 
217 template <typename ExecSpace>
219  public:
220  using execution_space = ExecSpace;
221  using rcp_type = Teuchos::RCP<const execution_space>;
222 
229  template <Priority priority = Priority::medium>
230  rcp_type space_instance(int i = 0) {
232  "Tpetra::Details::Spaces::space_instance");
233 
234  constexpr int p = static_cast<int>(priority);
235  static_assert(p < sizeof(instances) / sizeof(instances[0]),
236  "Spaces::Priority enum error");
237 
238  if (i < 0) {
239  TPETRA_DETAILS_SPACES_THROW("requested instance id " << i << " (< 0)");
240  }
242  TPETRA_DETAILS_SPACES_THROW(
243  "requested instance id "
245  << ") set by TPETRA_SPACES_ID_WARN_LIMIT");
246  }
247 
248  // make sure we can store an exec space at index i for priority
249  // not sure what happens in RCP(), so let's explicitly make it null
250  while (size_t(i) >= instances[p].size()) {
251  instances[p].push_back(Teuchos::ENull());
252  }
253 
254  /* no exec space instance i of priority p exists.
255  It may have never existed, or all Tpetra objects referencing it have been
256  destructed.
257 
258  Create a new RCP<ExecSpace> and internally store a weak
259  reference, so this space will be destructed when all strong references to
260  it are gone, but we can still refer to it as long as it lives to prevent
261  recreating
262  */
263  if (instances[p][i].is_null() || !instances[p][i].is_valid_ptr()) {
264  // create a strong RCP to a space
265  rcp_type r = Teuchos::RCP<const execution_space>(
266  new ExecSpace(make_instance<ExecSpace, priority>()));
267 
268  // store a weak RCP to the space
269  instances[p][i] = r.create_weak();
270 
271  return r; // allow strong rcp to escape so internal weak one does not
272  // immediately go away
273  }
274 
275  auto r = instances[p][i].create_strong();
276  return r;
277  }
278 
283  for (int i = 0; i < static_cast<int>(Spaces::Priority::NUM_LEVELS); ++i) {
284  for (const rcp_type &rcp : instances[i]) {
285  if (rcp.is_valid_ptr() && !rcp.is_null()) {
286  // avoid throwing in dtor
287  std::cerr << __FILE__ << ":" << __LINE__
288  << " execution space instance survived to "
289  "~InstanceLifetimeManager. strong_count() = "
290  << rcp.strong_count()
291  << ". Did a Tpetra object live past Kokkos::finalize()?"
292  << std::endl;
293  }
294  }
295  }
296  }
297 
298  private:
299  // one vector of instances for each priority level
300  std::vector<rcp_type>
301  instances[static_cast<int>(Spaces::Priority::NUM_LEVELS)];
302 };
303 
304 #if defined(KOKKOS_ENABLE_CUDA)
305 extern InstanceLifetimeManager<Kokkos::Cuda> cudaSpaces;
306 #endif
307 #if defined(KOKKOS_ENABLE_SERIAL)
308 extern InstanceLifetimeManager<Kokkos::Serial> serialSpaces;
309 #endif
310 #if defined(KOKKOS_ENABLE_OPENMP)
311 extern InstanceLifetimeManager<Kokkos::OpenMP> openMPSpaces;
312 #endif
313 #if defined(KOKKOS_ENABLE_HIP)
314 extern InstanceLifetimeManager<Kokkos::HIP> HIPSpaces;
315 #endif
316 #if defined(KOKKOS_ENABLE_SYCL)
317 extern InstanceLifetimeManager<Kokkos::Experimental::SYCL> SYCLSpaces;
318 #endif
319 
320 #if defined(KOKKOS_ENABLE_CUDA)
321 
325 template <typename ExecSpace, Priority priority = Priority::medium,
326  IsCuda<ExecSpace> = true>
327 Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
328  return cudaSpaces.space_instance<priority>(i);
329 }
330 #endif
331 
332 #if defined(KOKKOS_ENABLE_SERIAL)
333 
336 template <typename ExecSpace, Priority priority = Priority::medium,
337  IsSerial<ExecSpace> = true>
338 Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
339  return serialSpaces.space_instance<priority>(i);
340 }
341 #endif
342 
343 #if defined(KOKKOS_ENABLE_OPENMP)
344 
347 template <typename ExecSpace, Priority priority = Priority::medium,
348  IsOpenMP<ExecSpace> = true>
349 Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
350  return openMPSpaces.space_instance<priority>(i);
351 }
352 #endif
353 
354 #if defined(KOKKOS_ENABLE_HIP)
355 
357 template <typename ExecSpace, Priority priority = Priority::medium,
358  IsHIP<ExecSpace> = true>
359 Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
360  return HIPSpaces.space_instance<priority>(i);
361 }
362 #endif
363 #if defined(KOKKOS_ENABLE_SYCL)
364 
367 template <typename ExecSpace, Priority priority = Priority::medium,
368  IsSYCL<ExecSpace> = true>
369 Teuchos::RCP<const ExecSpace> space_instance(int i = 0) {
370  return SYCLSpaces.space_instance<priority>(i);
371 }
372 #endif
373 
379 template <typename ExecSpace>
380 Teuchos::RCP<const ExecSpace> space_instance(const Priority &priority,
381  int i = 0) {
382  switch (priority) {
383  case Priority::high:
384  return space_instance<ExecSpace, Priority::high>(i);
385  case Priority::medium:
386  return space_instance<ExecSpace, Priority::medium>(i);
387  case Priority::low:
388  return space_instance<ExecSpace, Priority::low>(i);
389  default:
390  throw std::runtime_error(
391  "unexpected dynamic Tpetra Space priority in space_instance");
392  }
393 }
394 
408 template <typename S1, typename S2
409 #if defined(KOKKOS_ENABLE_CUDA)
410  ,
411  NotBothCuda<S1, S2> = true
412 #endif
413  >
414 void exec_space_wait(const char *msg, const S1 &waitee, const S2 & /*waiter*/) {
416  "Tpetra::Details::Spaces::exec_space_wait");
417  lazy_init();
418  waitee.fence(msg);
419 }
420 
421 #if defined(KOKKOS_ENABLE_CUDA)
422 template <typename S1, typename S2, BothCuda<S1, S2> = true>
423 void exec_space_wait(const char *msg, const S1 &waitee, const S2 &waiter) {
425  "Tpetra::Details::Spaces::exec_space_wait");
426  lazy_init();
427 
428  // if they are the same instance, no sync needed
429  if (waitee.impl_instance_id() !=
430  waiter
431  .impl_instance_id()) { // TODO: use instance operator== once available
432  /* cudaStreamWaitEvent is not affected by later calls to cudaEventRecord,
433  even if it overwrites the state of a shared event this means we only need
434  one event even if many exec_space_waits are in flight at the same time
435  */
436  TPETRA_DETAILS_SPACES_CUDA_RUNTIME(
437  cudaEventRecord(cudaInfo.execSpaceWaitEvent_, waitee.cuda_stream()));
438  TPETRA_DETAILS_SPACES_CUDA_RUNTIME(cudaStreamWaitEvent(
439  waiter.cuda_stream(), cudaInfo.execSpaceWaitEvent_, 0 /*flags*/));
440  }
441 }
442 #endif
443 
444 template <typename S1, typename S2>
445 void exec_space_wait(const S1 &waitee, const S2 &waiter) {
447  "Tpetra::Details::Spaces::exec_space_wait");
448  lazy_init();
449  exec_space_wait("anonymous", waitee, waiter);
450 }
451 
452 template <typename ExecutionSpace>
453 constexpr KOKKOS_INLINE_FUNCTION bool is_gpu_exec_space() {
454  return false;
455 }
456 
457 #if defined(KOKKOS_ENABLE_CUDA)
458 template <>
459 constexpr KOKKOS_INLINE_FUNCTION bool is_gpu_exec_space<Kokkos::Cuda>() {
460  return true;
461 }
462 #endif
463 
464 #if defined(KOKKOS_ENABLE_HIP)
465 template <>
466 constexpr KOKKOS_INLINE_FUNCTION bool
467 is_gpu_exec_space<Kokkos::HIP>() {
468  return true;
469 }
470 #endif
471 
472 #if defined(KOKKOS_ENABLE_SYCL)
473 template <>
474 constexpr KOKKOS_INLINE_FUNCTION bool
475 is_gpu_exec_space<Kokkos::Experimental::SYCL>() {
476  return true;
477 }
478 #endif
479 
480 } // namespace Spaces
481 } // namespace Details
482 } // namespace Tpetra
483 
484 #undef TPETRA_DETAILS_SPACES_THROW
485 
486 #endif // TPETRA_DETAILS_EXECUTIONSPACES_HPP
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
static size_t spacesIdWarnLimit()
Warn if more than this many Kokkos spaces are accessed.
rcp_type space_instance(int i=0)
Retrieve a strong Teuchos::RCP&lt;const ExecSpace&gt; to instance i
~InstanceLifetimeManager()
Issue a warning if any Tpetra-managed execution space instances survive to the end of static lifetime...
Provides reusable Kokkos execution space instances.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.