Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_BlockHelper.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4 //
5 // Copyright 2009 NTESS and the Ifpack2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef IFPACK2_BLOCKHELPER_IMPL_HPP
11 #define IFPACK2_BLOCKHELPER_IMPL_HPP
12 
13 #include "Ifpack2_BlockHelper_Timers.hpp"
14 
15 namespace Ifpack2 {
16 
17 namespace BlockHelperDetails {
18 
19 namespace KB = KokkosBatched;
20 
24 using do_not_initialize_tag = Kokkos::ViewAllocateWithoutInitializing;
25 
26 template <typename MemoryTraitsType, Kokkos::MemoryTraitsFlags flag>
27 using MemoryTraits = Kokkos::MemoryTraits<MemoryTraitsType::is_unmanaged |
28  MemoryTraitsType::is_random_access |
29  flag>;
30 
31 template <typename ViewType>
32 using Unmanaged = Kokkos::View<typename ViewType::data_type,
33  typename ViewType::array_layout,
34  typename ViewType::device_type,
35  MemoryTraits<typename ViewType::memory_traits, Kokkos::Unmanaged> >;
36 template <typename ViewType>
37 using Atomic = Kokkos::View<typename ViewType::data_type,
38  typename ViewType::array_layout,
39  typename ViewType::device_type,
40  MemoryTraits<typename ViewType::memory_traits, Kokkos::Atomic> >;
41 template <typename ViewType>
42 using Const = Kokkos::View<typename ViewType::const_data_type,
43  typename ViewType::array_layout,
44  typename ViewType::device_type,
45  typename ViewType::memory_traits>;
46 template <typename ViewType>
47 using ConstUnmanaged = Const<Unmanaged<ViewType> >;
48 
49 template <typename ViewType>
50 using AtomicUnmanaged = Atomic<Unmanaged<ViewType> >;
51 
52 template <typename ViewType>
53 using Unmanaged = Kokkos::View<typename ViewType::data_type,
54  typename ViewType::array_layout,
55  typename ViewType::device_type,
56  MemoryTraits<typename ViewType::memory_traits, Kokkos::Unmanaged> >;
57 
58 template <typename ViewType>
59 using Scratch = Kokkos::View<typename ViewType::data_type,
60  typename ViewType::array_layout,
61  typename ViewType::execution_space::scratch_memory_space,
62  MemoryTraits<typename ViewType::memory_traits, Kokkos::Unmanaged> >;
63 
67 template <typename LayoutType>
69 template <>
70 struct TpetraLittleBlock<Kokkos::LayoutLeft> {
71  template <typename T>
72  KOKKOS_INLINE_FUNCTION static T getFlatIndex(const T i, const T j, const T blksize) { return i + j * blksize; }
73 };
74 template <>
75 struct TpetraLittleBlock<Kokkos::LayoutRight> {
76  template <typename T>
77  KOKKOS_INLINE_FUNCTION static T getFlatIndex(const T i, const T j, const T blksize) { return i * blksize + j; }
78 };
79 
83 template <typename T>
84 struct BlockTridiagScalarType { typedef T type; };
85 #if defined(IFPACK2_BLOCKHELPER_USE_SMALL_SCALAR_FOR_BLOCKTRIDIAG)
86 template <>
87 struct BlockTridiagScalarType<double> { typedef float type; };
88 // template<> struct SmallScalarType<Kokkos::complex<double> > { typedef Kokkos::complex<float> type; };
89 #endif
90 
94 template <typename T>
95 struct is_cuda {
96  enum : bool { value = false };
97 };
98 #if defined(KOKKOS_ENABLE_CUDA)
99 template <>
100 struct is_cuda<Kokkos::Cuda> {
101  enum : bool { value = true };
102 };
103 #endif
104 
108 template <typename T>
109 struct is_hip {
110  enum : bool { value = false };
111 };
112 #if defined(KOKKOS_ENABLE_HIP)
113 template <>
114 struct is_hip<Kokkos::HIP> {
115  enum : bool { value = true };
116 };
117 #endif
118 
122 template <typename T>
123 struct is_sycl {
124  enum : bool { value = false };
125 };
126 #if defined(KOKKOS_ENABLE_SYCL)
127 template <>
128 struct is_sycl<Kokkos::Experimental::SYCL> {
129  enum : bool { value = true };
130 };
131 #endif
132 
133 template <typename T>
134 struct is_device {
135  enum : bool { value = is_cuda<T>::value || is_hip<T>::value || is_sycl<T>::value };
136 };
137 
141 template <typename T>
143  static void createInstance(T &exec_instance) {
144  exec_instance = T();
145  }
146 #if defined(KOKKOS_ENABLE_CUDA)
147  static void createInstance(const cudaStream_t &s, T &exec_instance) {
148  exec_instance = T();
149  }
150 #endif
151 };
152 
153 #if defined(KOKKOS_ENABLE_CUDA)
154 template <>
155 struct ExecutionSpaceFactory<Kokkos::Cuda> {
156  static void createInstance(Kokkos::Cuda &exec_instance) {
157  exec_instance = Kokkos::Cuda();
158  }
159  static void createInstance(const cudaStream_t &s, Kokkos::Cuda &exec_instance) {
160  exec_instance = Kokkos::Cuda(s);
161  }
162 };
163 #endif
164 
165 #if defined(KOKKOS_ENABLE_HIP)
166 template <>
167 struct ExecutionSpaceFactory<Kokkos::HIP> {
168  static void createInstance(Kokkos::HIP &exec_instance) {
169  exec_instance = Kokkos::HIP();
170  }
171 };
172 #endif
173 
174 #if defined(KOKKOS_ENABLE_SYCL)
175 template <>
176 struct ExecutionSpaceFactory<Kokkos::Experimental::SYCL> {
177  static void createInstance(Kokkos::Experimental::SYCL &exec_instance) {
178  exec_instance = Kokkos::Experimental::SYCL();
179  }
180 };
181 #endif
182 
183 #if defined(KOKKOS_ENABLE_CUDA) && defined(IFPACK2_BLOCKHELPER_ENABLE_PROFILE)
184 #define IFPACK2_BLOCKHELPER_PROFILER_REGION_BEGIN \
185  KOKKOS_IMPL_CUDA_SAFE_CALL(cudaProfilerStart());
186 
187 #define IFPACK2_BLOCKHELPER_PROFILER_REGION_END \
188  { KOKKOS_IMPL_CUDA_SAFE_CALL(cudaProfilerStop()); }
189 #else
190 #define IFPACK2_BLOCKHELPER_PROFILER_REGION_BEGIN
192 #define IFPACK2_BLOCKHELPER_PROFILER_REGION_END
193 #endif
194 
198 template <typename CommPtrType>
199 std::string get_msg_prefix(const CommPtrType &comm) {
200  const auto rank = comm->getRank();
201  const auto nranks = comm->getSize();
202  std::stringstream ss;
203  ss << "Rank " << rank << " of " << nranks << ": ";
204  return ss.str();
205 }
206 
210 template <typename T, int N>
212  T v[N];
213  KOKKOS_INLINE_FUNCTION
214  ArrayValueType() {
215  for (int i = 0; i < N; ++i)
216  this->v[i] = 0;
217  }
218  KOKKOS_INLINE_FUNCTION
219  ArrayValueType(const ArrayValueType &b) {
220  for (int i = 0; i < N; ++i)
221  this->v[i] = b.v[i];
222  }
223 };
224 template <typename T, int N>
225 static KOKKOS_INLINE_FUNCTION void
226 operator+=(ArrayValueType<T, N> &a,
227  const ArrayValueType<T, N> &b) {
228  for (int i = 0; i < N; ++i)
229  a.v[i] += b.v[i];
230 }
231 
235 template <typename T, int N, typename ExecSpace>
236 struct SumReducer {
237  typedef SumReducer reducer;
239  typedef Kokkos::View<value_type, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type;
240  value_type *value;
241 
242  KOKKOS_INLINE_FUNCTION
243  SumReducer(value_type &val)
244  : value(&val) {}
245 
246  KOKKOS_INLINE_FUNCTION
247  void join(value_type &dst, value_type const &src) const {
248  for (int i = 0; i < N; ++i)
249  dst.v[i] += src.v[i];
250  }
251  KOKKOS_INLINE_FUNCTION
252  void init(value_type &val) const {
253  for (int i = 0; i < N; ++i)
254  val.v[i] = 0;
255  }
256  KOKKOS_INLINE_FUNCTION
257  value_type &reference() {
258  return *value;
259  }
260  KOKKOS_INLINE_FUNCTION
261  result_view_type view() const {
262  return result_view_type(value);
263  }
264 };
265 
269 template <typename MatrixType>
270 struct ImplType {
274  typedef size_t size_type;
275  typedef typename MatrixType::scalar_type scalar_type;
276  typedef typename MatrixType::local_ordinal_type local_ordinal_type;
277  typedef typename MatrixType::global_ordinal_type global_ordinal_type;
278  typedef typename MatrixType::node_type node_type;
279 
283 #if KOKKOS_VERSION >= 40799
284  typedef typename KokkosKernels::ArithTraits<scalar_type>::val_type impl_scalar_type;
285 #else
286  typedef typename Kokkos::ArithTraits<scalar_type>::val_type impl_scalar_type;
287 #endif
288 #if KOKKOS_VERSION >= 40799
289  typedef typename KokkosKernels::ArithTraits<impl_scalar_type>::mag_type magnitude_type;
290 #else
291  typedef typename Kokkos::ArithTraits<impl_scalar_type>::mag_type magnitude_type;
292 #endif
293 
294  typedef typename BlockTridiagScalarType<impl_scalar_type>::type btdm_scalar_type;
295 #if KOKKOS_VERSION >= 40799
296  typedef typename KokkosKernels::ArithTraits<btdm_scalar_type>::mag_type btdm_magnitude_type;
297 #else
298  typedef typename Kokkos::ArithTraits<btdm_scalar_type>::mag_type btdm_magnitude_type;
299 #endif
300 
304  typedef Kokkos::DefaultHostExecutionSpace host_execution_space;
305 
309  typedef typename node_type::device_type node_device_type;
310  typedef typename node_device_type::execution_space node_execution_space;
311  typedef typename node_device_type::memory_space node_memory_space;
312 
313 #if defined(KOKKOS_ENABLE_CUDA) && defined(IFPACK2_BLOCKHELPER_USE_CUDA_SPACE)
314  typedef node_execution_space execution_space;
316  typedef typename std::conditional<std::is_same<node_memory_space, Kokkos::CudaUVMSpace>::value,
317  Kokkos::CudaSpace,
318  node_memory_space>::type memory_space;
319  typedef Kokkos::Device<execution_space, memory_space> device_type;
320 #else
321  typedef node_device_type device_type;
322  typedef node_execution_space execution_space;
323  typedef node_memory_space memory_space;
324 #endif
325 
326  typedef Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> tpetra_multivector_type;
327  typedef Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> tpetra_map_type;
328  typedef Tpetra::Import<local_ordinal_type, global_ordinal_type, node_type> tpetra_import_type;
329  typedef Tpetra::RowMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> tpetra_row_matrix_type;
330  typedef Tpetra::CrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> tpetra_crs_matrix_type;
331  typedef Tpetra::CrsGraph<local_ordinal_type, global_ordinal_type, node_type> tpetra_crs_graph_type;
332  typedef Tpetra::BlockCrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> tpetra_block_crs_matrix_type;
333  typedef typename tpetra_block_crs_matrix_type::little_block_type tpetra_block_access_view_type;
334  typedef Tpetra::BlockMultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> tpetra_block_multivector_type;
335  typedef typename tpetra_block_crs_matrix_type::crs_graph_type::local_graph_device_type local_crs_graph_type;
336 
340  template <typename T, int l>
341  using Vector = KB::Vector<T, l>;
342  template <typename T>
343  using SIMD = KB::SIMD<T>;
344  template <typename T, typename M>
345  using DefaultVectorLength = KB::DefaultVectorLength<T, M>;
346  template <typename T, typename M>
347  using DefaultInternalVectorLength = KB::DefaultInternalVectorLength<T, M>;
348 
349  static constexpr int vector_length = DefaultVectorLength<btdm_scalar_type, memory_space>::value;
350  static constexpr int internal_vector_length = DefaultInternalVectorLength<btdm_scalar_type, memory_space>::value;
351  static constexpr int half_vector_length = (vector_length > 1) ? (vector_length / 2) : 1;
352  typedef Vector<SIMD<btdm_scalar_type>, vector_length> vector_type;
353  typedef Vector<SIMD<btdm_scalar_type>, internal_vector_length> internal_vector_type;
354 
358  typedef Kokkos::View<size_type *, device_type> size_type_1d_view;
359  typedef Kokkos::View<size_type **, device_type> size_type_2d_view;
360  typedef Kokkos::View<int64_t ***, Kokkos::LayoutRight, device_type> i64_3d_view;
361  typedef Kokkos::View<local_ordinal_type *, device_type> local_ordinal_type_1d_view;
362  typedef Kokkos::View<local_ordinal_type **, device_type> local_ordinal_type_2d_view;
363  // tpetra block crs values
364  typedef Kokkos::View<impl_scalar_type *, device_type> impl_scalar_type_1d_view;
365  typedef Kokkos::View<impl_scalar_type *, node_device_type> impl_scalar_type_1d_view_tpetra;
366 
367  // tpetra multivector values (layout left): may need to change the typename more explicitly
368  typedef Kokkos::View<impl_scalar_type **, Kokkos::LayoutLeft, device_type> impl_scalar_type_2d_view;
369  typedef Kokkos::View<impl_scalar_type **, Kokkos::LayoutLeft, node_device_type> impl_scalar_type_2d_view_tpetra;
370  typedef Kokkos::View<const impl_scalar_type **, Kokkos::LayoutLeft, node_device_type> const_impl_scalar_type_2d_view_tpetra;
371 
372  // packed data always use layout right
373  typedef Kokkos::View<vector_type *, device_type> vector_type_1d_view;
374  typedef Kokkos::View<vector_type ***, Kokkos::LayoutRight, device_type> vector_type_3d_view;
375  typedef Kokkos::View<vector_type ****, Kokkos::LayoutRight, device_type> vector_type_4d_view;
376  typedef Kokkos::View<internal_vector_type ***, Kokkos::LayoutRight, device_type> internal_vector_type_3d_view;
377  typedef Kokkos::View<internal_vector_type ****, Kokkos::LayoutRight, device_type> internal_vector_type_4d_view;
378  typedef Kokkos::View<internal_vector_type *****, Kokkos::LayoutRight, device_type> internal_vector_type_5d_view;
379  typedef Kokkos::View<btdm_scalar_type **, Kokkos::LayoutRight, device_type> btdm_scalar_type_2d_view;
380  typedef Kokkos::View<btdm_scalar_type ***, Kokkos::LayoutRight, device_type> btdm_scalar_type_3d_view;
381  typedef Kokkos::View<btdm_scalar_type ****, Kokkos::LayoutRight, device_type> btdm_scalar_type_4d_view;
382  typedef Kokkos::View<btdm_scalar_type *****, Kokkos::LayoutRight, device_type> btdm_scalar_type_5d_view;
383 };
384 
388 template <typename MatrixType>
389 struct NormManager {
390  public:
392  using host_execution_space = typename impl_type::host_execution_space;
393  using magnitude_type = typename impl_type::magnitude_type;
394 
395  private:
396  bool collective_;
397  int sweep_step_, sweep_step_upper_bound_;
398 #ifdef HAVE_IFPACK2_MPI
399  MPI_Request mpi_request_;
400  MPI_Comm comm_;
401 #endif
402  magnitude_type work_[3];
403 
404  public:
405  NormManager() = default;
406  NormManager(const NormManager &b) = default;
407  NormManager(const Teuchos::RCP<const Teuchos::Comm<int> > &comm) {
408  sweep_step_ = 1;
409  sweep_step_upper_bound_ = 1;
410  collective_ = comm->getSize() > 1;
411  if (collective_) {
412 #ifdef HAVE_IFPACK2_MPI
413  const auto mpi_comm = Teuchos::rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm);
414  TEUCHOS_ASSERT(!mpi_comm.is_null());
415  comm_ = *mpi_comm->getRawMpiComm();
416 #endif
417  }
418  const magnitude_type zero(0), minus_one(-1);
419  work_[0] = zero;
420  work_[1] = zero;
421  work_[2] = minus_one;
422  }
423 
424  // Check the norm every sweep_step sweeps.
425  void setCheckFrequency(const int sweep_step) {
426  TEUCHOS_TEST_FOR_EXCEPT_MSG(sweep_step < 1, "sweep step must be >= 1");
427  sweep_step_upper_bound_ = sweep_step;
428  sweep_step_ = 1;
429  }
430 
431  // Get the buffer into which to store rank-local squared norms.
432  magnitude_type *getBuffer() { return &work_[0]; }
433 
434  // Call MPI_Iallreduce to find the global squared norms.
435  void ireduce(const int sweep, const bool force = false) {
436  if (!force && sweep % sweep_step_) return;
437 
438  IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::NormManager::Ireduce", Ireduce);
439 
440  work_[1] = work_[0];
441 #ifdef HAVE_IFPACK2_MPI
442  auto send_data = &work_[1];
443  auto recv_data = &work_[0];
444  if (collective_) {
445 #if defined(IFPACK2_BLOCKTRIDICONTAINER_USE_MPI_3)
446  MPI_Iallreduce(send_data, recv_data, 1,
447  Teuchos::Details::MpiTypeTraits<magnitude_type>::getType(),
448  MPI_SUM, comm_, &mpi_request_);
449 #else
450  MPI_Allreduce(send_data, recv_data, 1,
451  Teuchos::Details::MpiTypeTraits<magnitude_type>::getType(),
452  MPI_SUM, comm_);
453 #endif
454  }
455 #endif
456  }
457 
458  // Check if the norm-based termination criterion is met. tol2 is the
459  // tolerance squared. Sweep is the sweep index. If not every iteration is
460  // being checked, this function immediately returns false. If a check must
461  // be done at this iteration, it waits for the reduction triggered by
462  // ireduce to complete, then checks the global norm against the tolerance.
463  bool checkDone(const int sweep, const magnitude_type tol2, const bool force = false) {
464  // early return
465  if (sweep <= 0) return false;
466 
467  IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::NormManager::CheckDone", CheckDone);
468 
469  TEUCHOS_ASSERT(sweep >= 1);
470  if (!force && (sweep - 1) % sweep_step_) return false;
471  if (collective_) {
472 #ifdef HAVE_IFPACK2_MPI
473 #if defined(IFPACK2_BLOCKTRIDICONTAINER_USE_MPI_3)
474  MPI_Wait(&mpi_request_, MPI_STATUS_IGNORE);
475 #else
476  // Do nothing.
477 #endif
478 #endif
479  }
480  bool r_val = false;
481  if (sweep == 1) {
482  work_[2] = work_[0];
483  } else {
484  r_val = (work_[0] < tol2 * work_[2]);
485  }
486 
487  // adjust sweep step
488  const auto adjusted_sweep_step = 2 * sweep_step_;
489  if (adjusted_sweep_step < sweep_step_upper_bound_) {
490  sweep_step_ = adjusted_sweep_step;
491  } else {
492  sweep_step_ = sweep_step_upper_bound_;
493  }
494  return r_val;
495  }
496 
497  // After termination has occurred, finalize the norms for use in
498  // get_norms{0,final}.
499  void finalize() {
500  work_[0] = std::sqrt(work_[0]); // after converged
501  if (work_[2] >= 0)
502  work_[2] = std::sqrt(work_[2]); // first norm
503  // if work_[2] is minus one, then norm is not requested.
504  }
505 
506  // Report norms to the caller.
507  const magnitude_type getNorms0() const { return work_[2]; }
508  const magnitude_type getNormsFinal() const { return work_[0]; }
509 };
510 
511 template <typename MatrixType>
512 void reduceVector(const ConstUnmanaged<typename BlockHelperDetails::ImplType<MatrixType>::impl_scalar_type_1d_view> zz,
513  /* */ typename BlockHelperDetails::ImplType<MatrixType>::magnitude_type *vals) {
514  IFPACK2_BLOCKHELPER_PROFILER_REGION_BEGIN;
515  IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::ReduceVector", ReduceVector);
516 
517  using impl_type = BlockHelperDetails::ImplType<MatrixType>;
518  using local_ordinal_type = typename impl_type::local_ordinal_type;
519  using impl_scalar_type = typename impl_type::impl_scalar_type;
520 #if 0
521  const auto norm2 = KokkosBlas::nrm1(zz);
522 #else
523  impl_scalar_type norm2(0);
524  Kokkos::parallel_reduce(
525  "ReduceMultiVector::Device",
526  Kokkos::RangePolicy<typename impl_type::execution_space>(0, zz.extent(0)),
527  KOKKOS_LAMBDA(const local_ordinal_type &i, impl_scalar_type &update) {
528  update += zz(i);
529  },
530  norm2);
531 #endif
532 #if KOKKOS_VERSION >= 40799
533  vals[0] = KokkosKernels::ArithTraits<impl_scalar_type>::abs(norm2);
534 #else
535  vals[0] = Kokkos::ArithTraits<impl_scalar_type>::abs(norm2);
536 #endif
537 
538  IFPACK2_BLOCKHELPER_PROFILER_REGION_END;
539  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename ImplType<MatrixType>::execution_space)
540 }
541 
542 } // namespace BlockHelperDetails
543 
544 } // namespace Ifpack2
545 
546 #endif
node_type::device_type node_device_type
Definition: Ifpack2_BlockHelper.hpp:309
Definition: Ifpack2_BlockHelper.hpp:84
Kokkos::DefaultHostExecutionSpace host_execution_space
Definition: Ifpack2_BlockHelper.hpp:304
size_t size_type
Definition: Ifpack2_BlockHelper.hpp:274
Kokkos::ArithTraits< scalar_type >::val_type impl_scalar_type
Definition: Ifpack2_BlockHelper.hpp:286
KB::Vector< T, l > Vector
Definition: Ifpack2_BlockHelper.hpp:341
Definition: Ifpack2_BlockHelper.hpp:142
Definition: Ifpack2_BlockHelper.hpp:95
#define TEUCHOS_TEST_FOR_EXCEPT_MSG(throw_exception_test, msg)
Kokkos::View< size_type *, device_type > size_type_1d_view
Definition: Ifpack2_BlockHelper.hpp:358
Definition: Ifpack2_BlockHelper.hpp:109
Definition: Ifpack2_BlockHelper.hpp:389
Definition: Ifpack2_BlockHelper.hpp:211
#define TEUCHOS_ASSERT(assertion_test)
Definition: Ifpack2_BlockHelper.hpp:236
Definition: Ifpack2_BlockHelper.hpp:270
Definition: Ifpack2_BlockHelper.hpp:123
Definition: Ifpack2_BlockHelper.hpp:68