Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_BlockHelper.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4 //
5 // Copyright 2009 NTESS and the Ifpack2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef IFPACK2_BLOCKHELPER_IMPL_HPP
11 #define IFPACK2_BLOCKHELPER_IMPL_HPP
12 
13 #include "Ifpack2_BlockHelper_Timers.hpp"
14 
15 namespace Ifpack2 {
16 
17  namespace BlockHelperDetails {
18 
19  namespace KB = KokkosBatched;
20 
24  using do_not_initialize_tag = Kokkos::ViewAllocateWithoutInitializing;
25 
26  template <typename MemoryTraitsType, Kokkos::MemoryTraitsFlags flag>
27  using MemoryTraits = Kokkos::MemoryTraits<MemoryTraitsType::is_unmanaged |
28  MemoryTraitsType::is_random_access |
29  flag>;
30 
31  template <typename ViewType>
32  using Unmanaged = Kokkos::View<typename ViewType::data_type,
33  typename ViewType::array_layout,
34  typename ViewType::device_type,
35  MemoryTraits<typename ViewType::memory_traits,Kokkos::Unmanaged> >;
36  template <typename ViewType>
37  using Atomic = Kokkos::View<typename ViewType::data_type,
38  typename ViewType::array_layout,
39  typename ViewType::device_type,
40  MemoryTraits<typename ViewType::memory_traits,Kokkos::Atomic> >;
41  template <typename ViewType>
42  using Const = Kokkos::View<typename ViewType::const_data_type,
43  typename ViewType::array_layout,
44  typename ViewType::device_type,
45  typename ViewType::memory_traits>;
46  template <typename ViewType>
47  using ConstUnmanaged = Const<Unmanaged<ViewType> >;
48 
49  template <typename ViewType>
50  using AtomicUnmanaged = Atomic<Unmanaged<ViewType> >;
51 
52  template <typename ViewType>
53  using Unmanaged = Kokkos::View<typename ViewType::data_type,
54  typename ViewType::array_layout,
55  typename ViewType::device_type,
56  MemoryTraits<typename ViewType::memory_traits,Kokkos::Unmanaged> >;
57 
58 
59  template <typename ViewType>
60  using Scratch = Kokkos::View<typename ViewType::data_type,
61  typename ViewType::array_layout,
62  typename ViewType::execution_space::scratch_memory_space,
63  MemoryTraits<typename ViewType::memory_traits, Kokkos::Unmanaged> >;
64 
68  template<typename LayoutType> struct TpetraLittleBlock;
69  template<> struct TpetraLittleBlock<Kokkos::LayoutLeft> {
70  template<typename T> KOKKOS_INLINE_FUNCTION
71  static T getFlatIndex(const T i, const T j, const T blksize) { return i+j*blksize; }
72  };
73  template<> struct TpetraLittleBlock<Kokkos::LayoutRight> {
74  template<typename T> KOKKOS_INLINE_FUNCTION
75  static T getFlatIndex(const T i, const T j, const T blksize) { return i*blksize+j; }
76  };
77 
81  template<typename T> struct BlockTridiagScalarType { typedef T type; };
82 #if defined(IFPACK2_BLOCKHELPER_USE_SMALL_SCALAR_FOR_BLOCKTRIDIAG)
83  template<> struct BlockTridiagScalarType<double> { typedef float type; };
84  //template<> struct SmallScalarType<Kokkos::complex<double> > { typedef Kokkos::complex<float> type; };
85 #endif
86 
90  template<typename T> struct is_cuda { enum : bool { value = false }; };
91 #if defined(KOKKOS_ENABLE_CUDA)
92  template<> struct is_cuda<Kokkos::Cuda> { enum : bool { value = true }; };
93 #endif
94 
98  template<typename T> struct is_hip { enum : bool { value = false }; };
99 #if defined(KOKKOS_ENABLE_HIP)
100  template<> struct is_hip<Kokkos::HIP> { enum : bool { value = true }; };
101 #endif
102 
106  template<typename T> struct is_sycl { enum : bool { value = false }; };
107 #if defined(KOKKOS_ENABLE_SYCL)
108  template<> struct is_sycl<Kokkos::Experimental::SYCL> { enum : bool { value = true }; };
109 #endif
110 
111  template<typename T> struct is_device { enum : bool { value = is_cuda<T>::value || is_hip<T>::value || is_sycl<T>::value }; };
112 
113 
117  template<typename T>
119  static void createInstance(T &exec_instance) {
120  exec_instance = T();
121  }
122 #if defined(KOKKOS_ENABLE_CUDA)
123  static void createInstance(const cudaStream_t &s, T &exec_instance) {
124  exec_instance = T();
125  }
126 #endif
127  };
128 
129 #if defined(KOKKOS_ENABLE_CUDA)
130  template<>
131  struct ExecutionSpaceFactory<Kokkos::Cuda> {
132  static void createInstance(Kokkos::Cuda &exec_instance) {
133  exec_instance = Kokkos::Cuda();
134  }
135  static void createInstance(const cudaStream_t &s, Kokkos::Cuda &exec_instance) {
136  exec_instance = Kokkos::Cuda(s);
137  }
138  };
139 #endif
140 
141 #if defined(KOKKOS_ENABLE_HIP)
142  template<>
143  struct ExecutionSpaceFactory<Kokkos::HIP> {
144  static void createInstance(Kokkos::HIP &exec_instance) {
145  exec_instance = Kokkos::HIP();
146  }
147  };
148 #endif
149 
150 #if defined(KOKKOS_ENABLE_SYCL)
151  template<>
152  struct ExecutionSpaceFactory<Kokkos::Experimental::SYCL> {
153  static void createInstance(Kokkos::Experimental::SYCL &exec_instance) {
154  exec_instance = Kokkos::Experimental::SYCL();
155  }
156  };
157 #endif
158 
159 #if defined(KOKKOS_ENABLE_CUDA) && defined(IFPACK2_BLOCKHELPER_ENABLE_PROFILE)
160 #define IFPACK2_BLOCKHELPER_PROFILER_REGION_BEGIN \
161  KOKKOS_IMPL_CUDA_SAFE_CALL(cudaProfilerStart());
162 
163 #define IFPACK2_BLOCKHELPER_PROFILER_REGION_END \
164  { KOKKOS_IMPL_CUDA_SAFE_CALL( cudaProfilerStop() ); }
165 #else
166 #define IFPACK2_BLOCKHELPER_PROFILER_REGION_BEGIN
168 #define IFPACK2_BLOCKHELPER_PROFILER_REGION_END
169 #endif
170 
171 
175  template<typename CommPtrType>
176  std::string get_msg_prefix (const CommPtrType &comm) {
177  const auto rank = comm->getRank();
178  const auto nranks = comm->getSize();
179  std::stringstream ss;
180  ss << "Rank " << rank << " of " << nranks << ": ";
181  return ss.str();
182  }
183 
187  template<typename T, int N>
188  struct ArrayValueType {
189  T v[N];
190  KOKKOS_INLINE_FUNCTION
191  ArrayValueType() {
192  for (int i=0;i<N;++i)
193  this->v[i] = 0;
194  }
195  KOKKOS_INLINE_FUNCTION
196  ArrayValueType(const ArrayValueType &b) {
197  for (int i=0;i<N;++i)
198  this->v[i] = b.v[i];
199  }
200  };
201  template<typename T, int N>
202  static
203  KOKKOS_INLINE_FUNCTION
204  void
205  operator+=(ArrayValueType<T,N> &a,
206  const ArrayValueType<T,N> &b) {
207  for (int i=0;i<N;++i)
208  a.v[i] += b.v[i];
209  }
210 
214  template<typename T, int N, typename ExecSpace>
215  struct SumReducer {
216  typedef SumReducer reducer;
218  typedef Kokkos::View<value_type,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type;
219  value_type *value;
220 
221  KOKKOS_INLINE_FUNCTION
222  SumReducer(value_type &val) : value(&val) {}
223 
224  KOKKOS_INLINE_FUNCTION
225  void join(value_type &dst, value_type const &src) const {
226  for (int i=0;i<N;++i)
227  dst.v[i] += src.v[i];
228  }
229  KOKKOS_INLINE_FUNCTION
230  void init(value_type &val) const {
231  for (int i=0;i<N;++i)
232  val.v[i] = Kokkos::reduction_identity<T>::sum();
233  }
234  KOKKOS_INLINE_FUNCTION
235  value_type& reference() {
236  return *value;
237  }
238  KOKKOS_INLINE_FUNCTION
239  result_view_type view() const {
240  return result_view_type(value);
241  }
242  };
243 
244 
248  template <typename MatrixType>
249  struct ImplType {
253  typedef size_t size_type;
254  typedef typename MatrixType::scalar_type scalar_type;
255  typedef typename MatrixType::local_ordinal_type local_ordinal_type;
256  typedef typename MatrixType::global_ordinal_type global_ordinal_type;
257  typedef typename MatrixType::node_type node_type;
258 
262  typedef typename Kokkos::Details::ArithTraits<scalar_type>::val_type impl_scalar_type;
263  typedef typename Kokkos::ArithTraits<impl_scalar_type>::mag_type magnitude_type;
264 
265  typedef typename BlockTridiagScalarType<impl_scalar_type>::type btdm_scalar_type;
266  typedef typename Kokkos::ArithTraits<btdm_scalar_type>::mag_type btdm_magnitude_type;
267 
271  typedef Kokkos::DefaultHostExecutionSpace host_execution_space;
272 
276  typedef typename node_type::device_type node_device_type;
277  typedef typename node_device_type::execution_space node_execution_space;
278  typedef typename node_device_type::memory_space node_memory_space;
279 
280 #if defined(KOKKOS_ENABLE_CUDA) && defined(IFPACK2_BLOCKHELPER_USE_CUDA_SPACE)
281  typedef node_execution_space execution_space;
283  typedef typename std::conditional<std::is_same<node_memory_space,Kokkos::CudaUVMSpace>::value,
284  Kokkos::CudaSpace,
285  node_memory_space>::type memory_space;
286  typedef Kokkos::Device<execution_space,memory_space> device_type;
287 #else
288  typedef node_device_type device_type;
289  typedef node_execution_space execution_space;
290  typedef node_memory_space memory_space;
291 #endif
292 
293  typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> tpetra_multivector_type;
294  typedef Tpetra::Map<local_ordinal_type,global_ordinal_type,node_type> tpetra_map_type;
295  typedef Tpetra::Import<local_ordinal_type,global_ordinal_type,node_type> tpetra_import_type;
296  typedef Tpetra::RowMatrix<scalar_type,local_ordinal_type,global_ordinal_type,node_type> tpetra_row_matrix_type;
297  typedef Tpetra::CrsMatrix<scalar_type,local_ordinal_type,global_ordinal_type,node_type> tpetra_crs_matrix_type;
298  typedef Tpetra::CrsGraph<local_ordinal_type,global_ordinal_type,node_type> tpetra_crs_graph_type;
299  typedef Tpetra::BlockCrsMatrix<scalar_type,local_ordinal_type,global_ordinal_type,node_type> tpetra_block_crs_matrix_type;
300  typedef typename tpetra_block_crs_matrix_type::little_block_type tpetra_block_access_view_type;
301  typedef Tpetra::BlockMultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> tpetra_block_multivector_type;
302  typedef typename tpetra_block_crs_matrix_type::crs_graph_type::local_graph_device_type local_crs_graph_type;
303 
307  template<typename T, int l> using Vector = KB::Vector<T,l>;
308  template<typename T> using SIMD = KB::SIMD<T>;
309  template<typename T, typename M> using DefaultVectorLength = KB::DefaultVectorLength<T,M>;
310  template<typename T, typename M> using DefaultInternalVectorLength = KB::DefaultInternalVectorLength<T,M>;
311 
312  static constexpr int vector_length = DefaultVectorLength<btdm_scalar_type,memory_space>::value;
313  static constexpr int internal_vector_length = DefaultInternalVectorLength<btdm_scalar_type,memory_space>::value;
314  typedef Vector<SIMD<btdm_scalar_type>,vector_length> vector_type;
315  typedef Vector<SIMD<btdm_scalar_type>,internal_vector_length> internal_vector_type;
316 
320  typedef Kokkos::View<size_type*,device_type> size_type_1d_view;
321  typedef Kokkos::View<size_type**,device_type> size_type_2d_view;
322  typedef Kokkos::View<int64_t***,Kokkos::LayoutRight,device_type> i64_3d_view;
323  typedef Kokkos::View<local_ordinal_type*,device_type> local_ordinal_type_1d_view;
324  typedef Kokkos::View<local_ordinal_type**,device_type> local_ordinal_type_2d_view;
325  // tpetra block crs values
326  typedef Kokkos::View<impl_scalar_type*,device_type> impl_scalar_type_1d_view;
327  typedef Kokkos::View<impl_scalar_type*,node_device_type> impl_scalar_type_1d_view_tpetra;
328 
329  // tpetra multivector values (layout left): may need to change the typename more explicitly
330  typedef Kokkos::View<impl_scalar_type**,Kokkos::LayoutLeft,device_type> impl_scalar_type_2d_view;
331  typedef Kokkos::View<impl_scalar_type**,Kokkos::LayoutLeft,node_device_type> impl_scalar_type_2d_view_tpetra;
332 
333  // packed data always use layout right
334  typedef Kokkos::View<vector_type*,device_type> vector_type_1d_view;
335  typedef Kokkos::View<vector_type***,Kokkos::LayoutRight,device_type> vector_type_3d_view;
336  typedef Kokkos::View<vector_type****,Kokkos::LayoutRight,device_type> vector_type_4d_view;
337  typedef Kokkos::View<internal_vector_type***,Kokkos::LayoutRight,device_type> internal_vector_type_3d_view;
338  typedef Kokkos::View<internal_vector_type****,Kokkos::LayoutRight,device_type> internal_vector_type_4d_view;
339  typedef Kokkos::View<internal_vector_type*****,Kokkos::LayoutRight,device_type> internal_vector_type_5d_view;
340  typedef Kokkos::View<btdm_scalar_type**,Kokkos::LayoutRight,device_type> btdm_scalar_type_2d_view;
341  typedef Kokkos::View<btdm_scalar_type***,Kokkos::LayoutRight,device_type> btdm_scalar_type_3d_view;
342  typedef Kokkos::View<btdm_scalar_type****,Kokkos::LayoutRight,device_type> btdm_scalar_type_4d_view;
343  typedef Kokkos::View<btdm_scalar_type*****,Kokkos::LayoutRight,device_type> btdm_scalar_type_5d_view;
344  };
345 
346 
350  template<typename MatrixType>
351  struct NormManager {
352  public:
354  using host_execution_space = typename impl_type::host_execution_space;
355  using magnitude_type = typename impl_type::magnitude_type;
356 
357  private:
358  bool collective_;
359  int sweep_step_, sweep_step_upper_bound_;
360 #ifdef HAVE_IFPACK2_MPI
361  MPI_Request mpi_request_;
362  MPI_Comm comm_;
363 #endif
364  magnitude_type work_[3];
365 
366  public:
367  NormManager() = default;
368  NormManager(const NormManager &b) = default;
369  NormManager(const Teuchos::RCP<const Teuchos::Comm<int> >& comm) {
370  sweep_step_ = 1;
371  sweep_step_upper_bound_ = 1;
372  collective_ = comm->getSize() > 1;
373  if (collective_) {
374 #ifdef HAVE_IFPACK2_MPI
375  const auto mpi_comm = Teuchos::rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm);
376  TEUCHOS_ASSERT( ! mpi_comm.is_null());
377  comm_ = *mpi_comm->getRawMpiComm();
378 #endif
379  }
380  const magnitude_type zero(0), minus_one(-1);
381  work_[0] = zero;
382  work_[1] = zero;
383  work_[2] = minus_one;
384  }
385 
386  // Check the norm every sweep_step sweeps.
387  void setCheckFrequency(const int sweep_step) {
388  TEUCHOS_TEST_FOR_EXCEPT_MSG(sweep_step < 1, "sweep step must be >= 1");
389  sweep_step_upper_bound_ = sweep_step;
390  sweep_step_ = 1;
391  }
392 
393  // Get the buffer into which to store rank-local squared norms.
394  magnitude_type* getBuffer() { return &work_[0]; }
395 
396  // Call MPI_Iallreduce to find the global squared norms.
397  void ireduce(const int sweep, const bool force = false) {
398  if ( ! force && sweep % sweep_step_) return;
399 
400  IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::NormManager::Ireduce", Ireduce);
401 
402  work_[1] = work_[0];
403 #ifdef HAVE_IFPACK2_MPI
404  auto send_data = &work_[1];
405  auto recv_data = &work_[0];
406  if (collective_) {
407 # if defined(IFPACK2_BLOCKTRIDICONTAINER_USE_MPI_3)
408  MPI_Iallreduce(send_data, recv_data, 1,
409  Teuchos::Details::MpiTypeTraits<magnitude_type>::getType(),
410  MPI_SUM, comm_, &mpi_request_);
411 # else
412  MPI_Allreduce (send_data, recv_data, 1,
413  Teuchos::Details::MpiTypeTraits<magnitude_type>::getType(),
414  MPI_SUM, comm_);
415 # endif
416  }
417 #endif
418  }
419 
420  // Check if the norm-based termination criterion is met. tol2 is the
421  // tolerance squared. Sweep is the sweep index. If not every iteration is
422  // being checked, this function immediately returns false. If a check must
423  // be done at this iteration, it waits for the reduction triggered by
424  // ireduce to complete, then checks the global norm against the tolerance.
425  bool checkDone (const int sweep, const magnitude_type tol2, const bool force = false) {
426  // early return
427  if (sweep <= 0) return false;
428 
429  IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::NormManager::CheckDone", CheckDone);
430 
431  TEUCHOS_ASSERT(sweep >= 1);
432  if ( ! force && (sweep - 1) % sweep_step_) return false;
433  if (collective_) {
434 #ifdef HAVE_IFPACK2_MPI
435 # if defined(IFPACK2_BLOCKTRIDICONTAINER_USE_MPI_3)
436  MPI_Wait(&mpi_request_, MPI_STATUS_IGNORE);
437 # else
438  // Do nothing.
439 # endif
440 #endif
441  }
442  bool r_val = false;
443  if (sweep == 1) {
444  work_[2] = work_[0];
445  } else {
446  r_val = (work_[0] < tol2*work_[2]);
447  }
448 
449  // adjust sweep step
450  const auto adjusted_sweep_step = 2*sweep_step_;
451  if (adjusted_sweep_step < sweep_step_upper_bound_) {
452  sweep_step_ = adjusted_sweep_step;
453  } else {
454  sweep_step_ = sweep_step_upper_bound_;
455  }
456  return r_val;
457  }
458 
459  // After termination has occurred, finalize the norms for use in
460  // get_norms{0,final}.
461  void finalize () {
462  work_[0] = std::sqrt(work_[0]); // after converged
463  if (work_[2] >= 0)
464  work_[2] = std::sqrt(work_[2]); // first norm
465  // if work_[2] is minus one, then norm is not requested.
466  }
467 
468  // Report norms to the caller.
469  const magnitude_type getNorms0 () const { return work_[2]; }
470  const magnitude_type getNormsFinal () const { return work_[0]; }
471  };
472 
473  template<typename MatrixType>
474  void reduceVector(const ConstUnmanaged<typename BlockHelperDetails::ImplType<MatrixType>::impl_scalar_type_1d_view> zz,
475  /* */ typename BlockHelperDetails::ImplType<MatrixType>::magnitude_type *vals) {
476  IFPACK2_BLOCKHELPER_PROFILER_REGION_BEGIN;
477  IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::ReduceVector", ReduceVector);
478 
479  using impl_type = BlockHelperDetails::ImplType<MatrixType>;
480  using local_ordinal_type = typename impl_type::local_ordinal_type;
481  using impl_scalar_type = typename impl_type::impl_scalar_type;
482 #if 0
483  const auto norm2 = KokkosBlas::nrm1(zz);
484 #else
485  impl_scalar_type norm2(0);
486  Kokkos::parallel_reduce
487  ("ReduceMultiVector::Device",
488  Kokkos::RangePolicy<typename impl_type::execution_space>(0,zz.extent(0)),
489  KOKKOS_LAMBDA(const local_ordinal_type &i, impl_scalar_type &update) {
490  update += zz(i);
491  }, norm2);
492 #endif
493  vals[0] = Kokkos::ArithTraits<impl_scalar_type>::abs(norm2);
494 
495  IFPACK2_BLOCKHELPER_PROFILER_REGION_END;
496  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename ImplType<MatrixType>::execution_space)
497  }
498 
499  } // namespace BlockHelperDetails
500 
501 } // namespace Ifpack2
502 
503 #endif
node_type::device_type node_device_type
Definition: Ifpack2_BlockHelper.hpp:276
Definition: Ifpack2_BlockHelper.hpp:81
Kokkos::DefaultHostExecutionSpace host_execution_space
Definition: Ifpack2_BlockHelper.hpp:271
size_t size_type
Definition: Ifpack2_BlockHelper.hpp:253
Definition: Ifpack2_BlockHelper.hpp:118
Definition: Ifpack2_BlockHelper.hpp:90
KB::Vector< T, l > Vector
Definition: Ifpack2_BlockHelper.hpp:307
#define TEUCHOS_TEST_FOR_EXCEPT_MSG(throw_exception_test, msg)
Definition: Ifpack2_BlockHelper.hpp:98
Definition: Ifpack2_BlockHelper.hpp:351
Kokkos::Details::ArithTraits< scalar_type >::val_type impl_scalar_type
Definition: Ifpack2_BlockHelper.hpp:262
Definition: Ifpack2_BlockHelper.hpp:188
Kokkos::View< size_type *, device_type > size_type_1d_view
Definition: Ifpack2_BlockHelper.hpp:320
#define TEUCHOS_ASSERT(assertion_test)
Definition: Ifpack2_BlockHelper.hpp:215
Definition: Ifpack2_BlockHelper.hpp:249
Definition: Ifpack2_BlockHelper.hpp:106
Definition: Ifpack2_BlockHelper.hpp:68