10 #ifndef IFPACK2_BLOCKHELPER_IMPL_HPP
11 #define IFPACK2_BLOCKHELPER_IMPL_HPP
13 #include "Ifpack2_BlockHelper_Timers.hpp"
17 namespace BlockHelperDetails {
19 namespace KB = KokkosBatched;
24 using do_not_initialize_tag = Kokkos::ViewAllocateWithoutInitializing;
26 template <
typename MemoryTraitsType, Kokkos::MemoryTraitsFlags flag>
27 using MemoryTraits = Kokkos::MemoryTraits<MemoryTraitsType::is_unmanaged |
28 MemoryTraitsType::is_random_access |
31 template <
typename ViewType>
32 using Unmanaged = Kokkos::View<
typename ViewType::data_type,
33 typename ViewType::array_layout,
34 typename ViewType::device_type,
35 MemoryTraits<typename ViewType::memory_traits, Kokkos::Unmanaged> >;
36 template <
typename ViewType>
37 using Atomic = Kokkos::View<
typename ViewType::data_type,
38 typename ViewType::array_layout,
39 typename ViewType::device_type,
40 MemoryTraits<typename ViewType::memory_traits, Kokkos::Atomic> >;
41 template <
typename ViewType>
42 using Const = Kokkos::View<
typename ViewType::const_data_type,
43 typename ViewType::array_layout,
44 typename ViewType::device_type,
45 typename ViewType::memory_traits>;
46 template <
typename ViewType>
47 using ConstUnmanaged = Const<Unmanaged<ViewType> >;
49 template <
typename ViewType>
50 using AtomicUnmanaged = Atomic<Unmanaged<ViewType> >;
52 template <
typename ViewType>
53 using Unmanaged = Kokkos::View<
typename ViewType::data_type,
54 typename ViewType::array_layout,
55 typename ViewType::device_type,
56 MemoryTraits<typename ViewType::memory_traits, Kokkos::Unmanaged> >;
58 template <
typename ViewType>
59 using Scratch = Kokkos::View<
typename ViewType::data_type,
60 typename ViewType::array_layout,
61 typename ViewType::execution_space::scratch_memory_space,
62 MemoryTraits<typename ViewType::memory_traits, Kokkos::Unmanaged> >;
67 template <
typename LayoutType>
72 KOKKOS_INLINE_FUNCTION
static T getFlatIndex(
const T i,
const T j,
const T blksize) {
return i + j * blksize; }
75 struct TpetraLittleBlock<Kokkos::LayoutRight> {
77 KOKKOS_INLINE_FUNCTION
static T getFlatIndex(
const T i,
const T j,
const T blksize) {
return i * blksize + j; }
85 #if defined(IFPACK2_BLOCKHELPER_USE_SMALL_SCALAR_FOR_BLOCKTRIDIAG)
96 enum :
bool { value =
false };
98 #if defined(KOKKOS_ENABLE_CUDA)
101 enum :
bool { value =
true };
108 template <
typename T>
110 enum :
bool { value =
false };
112 #if defined(KOKKOS_ENABLE_HIP)
114 struct is_hip<Kokkos::HIP> {
115 enum :
bool { value =
true };
122 template <
typename T>
124 enum :
bool { value =
false };
126 #if defined(KOKKOS_ENABLE_SYCL)
128 struct is_sycl<Kokkos::Experimental::SYCL> {
129 enum :
bool { value =
true };
133 template <
typename T>
135 enum :
bool { value = is_cuda<T>::value || is_hip<T>::value || is_sycl<T>::value };
141 template <
typename T>
143 static void createInstance(T &exec_instance) {
146 #if defined(KOKKOS_ENABLE_CUDA)
147 static void createInstance(
const cudaStream_t &s, T &exec_instance) {
153 #if defined(KOKKOS_ENABLE_CUDA)
156 static void createInstance(Kokkos::Cuda &exec_instance) {
157 exec_instance = Kokkos::Cuda();
159 static void createInstance(
const cudaStream_t &s, Kokkos::Cuda &exec_instance) {
160 exec_instance = Kokkos::Cuda(s);
165 #if defined(KOKKOS_ENABLE_HIP)
167 struct ExecutionSpaceFactory<Kokkos::HIP> {
168 static void createInstance(Kokkos::HIP &exec_instance) {
169 exec_instance = Kokkos::HIP();
174 #if defined(KOKKOS_ENABLE_SYCL)
176 struct ExecutionSpaceFactory<Kokkos::Experimental::SYCL> {
177 static void createInstance(Kokkos::Experimental::SYCL &exec_instance) {
178 exec_instance = Kokkos::Experimental::SYCL();
183 #if defined(KOKKOS_ENABLE_CUDA) && defined(IFPACK2_BLOCKHELPER_ENABLE_PROFILE)
184 #define IFPACK2_BLOCKHELPER_PROFILER_REGION_BEGIN \
185 KOKKOS_IMPL_CUDA_SAFE_CALL(cudaProfilerStart());
187 #define IFPACK2_BLOCKHELPER_PROFILER_REGION_END \
188 { KOKKOS_IMPL_CUDA_SAFE_CALL(cudaProfilerStop()); }
190 #define IFPACK2_BLOCKHELPER_PROFILER_REGION_BEGIN
192 #define IFPACK2_BLOCKHELPER_PROFILER_REGION_END
198 template <
typename CommPtrType>
199 std::string get_msg_prefix(
const CommPtrType &comm) {
200 const auto rank = comm->getRank();
201 const auto nranks = comm->getSize();
202 std::stringstream ss;
203 ss <<
"Rank " << rank <<
" of " << nranks <<
": ";
210 template <
typename T,
int N>
213 KOKKOS_INLINE_FUNCTION
215 for (
int i = 0; i < N; ++i)
218 KOKKOS_INLINE_FUNCTION
220 for (
int i = 0; i < N; ++i)
224 template <
typename T,
int N>
225 static KOKKOS_INLINE_FUNCTION
void
228 for (
int i = 0; i < N; ++i)
235 template <
typename T,
int N,
typename ExecSpace>
239 typedef Kokkos::View<value_type, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type;
242 KOKKOS_INLINE_FUNCTION
246 KOKKOS_INLINE_FUNCTION
248 for (
int i = 0; i < N; ++i)
249 dst.v[i] += src.v[i];
251 KOKKOS_INLINE_FUNCTION
253 for (
int i = 0; i < N; ++i)
256 KOKKOS_INLINE_FUNCTION
260 KOKKOS_INLINE_FUNCTION
261 result_view_type view()
const {
262 return result_view_type(value);
269 template <
typename MatrixType>
275 typedef typename MatrixType::scalar_type scalar_type;
276 typedef typename MatrixType::local_ordinal_type local_ordinal_type;
277 typedef typename MatrixType::global_ordinal_type global_ordinal_type;
278 typedef typename MatrixType::node_type node_type;
283 #if KOKKOS_VERSION >= 40799
284 typedef typename KokkosKernels::ArithTraits<scalar_type>::val_type
impl_scalar_type;
288 #if KOKKOS_VERSION >= 40799
289 typedef typename KokkosKernels::ArithTraits<impl_scalar_type>::mag_type magnitude_type;
291 typedef typename Kokkos::ArithTraits<impl_scalar_type>::mag_type magnitude_type;
294 typedef typename BlockTridiagScalarType<impl_scalar_type>::type btdm_scalar_type;
295 #if KOKKOS_VERSION >= 40799
296 typedef typename KokkosKernels::ArithTraits<btdm_scalar_type>::mag_type btdm_magnitude_type;
298 typedef typename Kokkos::ArithTraits<btdm_scalar_type>::mag_type btdm_magnitude_type;
310 typedef typename node_device_type::execution_space node_execution_space;
311 typedef typename node_device_type::memory_space node_memory_space;
313 #if defined(KOKKOS_ENABLE_CUDA) && defined(IFPACK2_BLOCKHELPER_USE_CUDA_SPACE)
314 typedef node_execution_space execution_space;
316 typedef typename std::conditional<std::is_same<node_memory_space, Kokkos::CudaUVMSpace>::value,
318 node_memory_space>::type memory_space;
319 typedef Kokkos::Device<execution_space, memory_space> device_type;
322 typedef node_execution_space execution_space;
323 typedef node_memory_space memory_space;
326 typedef Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> tpetra_multivector_type;
327 typedef Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> tpetra_map_type;
328 typedef Tpetra::Import<local_ordinal_type, global_ordinal_type, node_type> tpetra_import_type;
329 typedef Tpetra::RowMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> tpetra_row_matrix_type;
330 typedef Tpetra::CrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> tpetra_crs_matrix_type;
331 typedef Tpetra::CrsGraph<local_ordinal_type, global_ordinal_type, node_type> tpetra_crs_graph_type;
332 typedef Tpetra::BlockCrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> tpetra_block_crs_matrix_type;
333 typedef typename tpetra_block_crs_matrix_type::little_block_type tpetra_block_access_view_type;
334 typedef Tpetra::BlockMultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> tpetra_block_multivector_type;
335 typedef typename tpetra_block_crs_matrix_type::crs_graph_type::local_graph_device_type local_crs_graph_type;
340 template <
typename T,
int l>
342 template <
typename T>
343 using SIMD = KB::SIMD<T>;
344 template <
typename T,
typename M>
345 using DefaultVectorLength = KB::DefaultVectorLength<T, M>;
346 template <
typename T,
typename M>
347 using DefaultInternalVectorLength = KB::DefaultInternalVectorLength<T, M>;
349 static constexpr
int vector_length = DefaultVectorLength<btdm_scalar_type, memory_space>::value;
350 static constexpr
int internal_vector_length = DefaultInternalVectorLength<btdm_scalar_type, memory_space>::value;
351 static constexpr
int half_vector_length = (vector_length > 1) ? (vector_length / 2) : 1;
359 typedef Kokkos::View<size_type **, device_type> size_type_2d_view;
360 typedef Kokkos::View<int64_t ***, Kokkos::LayoutRight, device_type> i64_3d_view;
361 typedef Kokkos::View<local_ordinal_type *, device_type> local_ordinal_type_1d_view;
362 typedef Kokkos::View<local_ordinal_type **, device_type> local_ordinal_type_2d_view;
364 typedef Kokkos::View<impl_scalar_type *, device_type> impl_scalar_type_1d_view;
365 typedef Kokkos::View<impl_scalar_type *, node_device_type> impl_scalar_type_1d_view_tpetra;
368 typedef Kokkos::View<impl_scalar_type **, Kokkos::LayoutLeft, device_type> impl_scalar_type_2d_view;
369 typedef Kokkos::View<impl_scalar_type **, Kokkos::LayoutLeft, node_device_type> impl_scalar_type_2d_view_tpetra;
370 typedef Kokkos::View<const impl_scalar_type **, Kokkos::LayoutLeft, node_device_type> const_impl_scalar_type_2d_view_tpetra;
373 typedef Kokkos::View<vector_type *, device_type> vector_type_1d_view;
374 typedef Kokkos::View<vector_type ***, Kokkos::LayoutRight, device_type> vector_type_3d_view;
375 typedef Kokkos::View<vector_type ****, Kokkos::LayoutRight, device_type> vector_type_4d_view;
376 typedef Kokkos::View<internal_vector_type ***, Kokkos::LayoutRight, device_type> internal_vector_type_3d_view;
377 typedef Kokkos::View<internal_vector_type ****, Kokkos::LayoutRight, device_type> internal_vector_type_4d_view;
378 typedef Kokkos::View<internal_vector_type *****, Kokkos::LayoutRight, device_type> internal_vector_type_5d_view;
379 typedef Kokkos::View<btdm_scalar_type **, Kokkos::LayoutRight, device_type> btdm_scalar_type_2d_view;
380 typedef Kokkos::View<btdm_scalar_type ***, Kokkos::LayoutRight, device_type> btdm_scalar_type_3d_view;
381 typedef Kokkos::View<btdm_scalar_type ****, Kokkos::LayoutRight, device_type> btdm_scalar_type_4d_view;
382 typedef Kokkos::View<btdm_scalar_type *****, Kokkos::LayoutRight, device_type> btdm_scalar_type_5d_view;
388 template <
typename MatrixType>
393 using magnitude_type =
typename impl_type::magnitude_type;
397 int sweep_step_, sweep_step_upper_bound_;
398 #ifdef HAVE_IFPACK2_MPI
399 MPI_Request mpi_request_;
402 magnitude_type work_[3];
409 sweep_step_upper_bound_ = 1;
410 collective_ = comm->getSize() > 1;
412 #ifdef HAVE_IFPACK2_MPI
413 const auto mpi_comm = Teuchos::rcp_dynamic_cast<
const Teuchos::MpiComm<int> >(comm);
415 comm_ = *mpi_comm->getRawMpiComm();
418 const magnitude_type zero(0), minus_one(-1);
421 work_[2] = minus_one;
425 void setCheckFrequency(
const int sweep_step) {
427 sweep_step_upper_bound_ = sweep_step;
431 // Get the buffer into which to store rank-local squared norms.
432 magnitude_type *getBuffer() { return &work_[0]; }
434 // Call MPI_Iallreduce to find the global squared norms.
435 void ireduce(const int sweep, const bool force = false) {
436 if (!force && sweep % sweep_step_) return;
438 IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::NormManager::Ireduce
", Ireduce);
441 #ifdef HAVE_IFPACK2_MPI
442 auto send_data = &work_[1];
443 auto recv_data = &work_[0];
445 #if defined(IFPACK2_BLOCKTRIDICONTAINER_USE_MPI_3)
446 MPI_Iallreduce(send_data, recv_data, 1,
447 Teuchos::Details::MpiTypeTraits<magnitude_type>::getType(),
448 MPI_SUM, comm_, &mpi_request_);
450 MPI_Allreduce(send_data, recv_data, 1,
451 Teuchos::Details::MpiTypeTraits<magnitude_type>::getType(),
458 // Check if the norm-based termination criterion is met. tol2 is the
459 // tolerance squared. Sweep is the sweep index. If not every iteration is
460 // being checked, this function immediately returns false. If a check must
461 // be done at this iteration, it waits for the reduction triggered by
462 // ireduce to complete, then checks the global norm against the tolerance.
463 bool checkDone(const int sweep, const magnitude_type tol2, const bool force = false) {
465 if (sweep <= 0) return false;
467 IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::NormManager::CheckDone
", CheckDone);
469 TEUCHOS_ASSERT(sweep >= 1);
470 if (!force && (sweep - 1) % sweep_step_) return false;
472 #ifdef HAVE_IFPACK2_MPI
473 #if defined(IFPACK2_BLOCKTRIDICONTAINER_USE_MPI_3)
474 MPI_Wait(&mpi_request_, MPI_STATUS_IGNORE);
484 r_val = (work_[0] < tol2 * work_[2]);
488 const auto adjusted_sweep_step = 2 * sweep_step_;
489 if (adjusted_sweep_step < sweep_step_upper_bound_) {
490 sweep_step_ = adjusted_sweep_step;
492 sweep_step_ = sweep_step_upper_bound_;
497 // After termination has occurred, finalize the norms for use in
498 // get_norms{0,final}.
500 work_[0] = std::sqrt(work_[0]); // after converged
502 work_[2] = std::sqrt(work_[2]); // first norm
503 // if work_[2] is minus one, then norm is not requested.
506 // Report norms to the caller.
507 const magnitude_type getNorms0() const { return work_[2]; }
508 const magnitude_type getNormsFinal() const { return work_[0]; }
511 template <typename MatrixType>
512 void reduceVector(const ConstUnmanaged<typename BlockHelperDetails::ImplType<MatrixType>::impl_scalar_type_1d_view> zz,
513 /* */ typename BlockHelperDetails::ImplType<MatrixType>::magnitude_type *vals) {
514 IFPACK2_BLOCKHELPER_PROFILER_REGION_BEGIN;
515 IFPACK2_BLOCKHELPER_TIMER("BlockTriDi::ReduceVector
", ReduceVector);
517 using impl_type = BlockHelperDetails::ImplType<MatrixType>;
518 using local_ordinal_type = typename impl_type::local_ordinal_type;
519 using impl_scalar_type = typename impl_type::impl_scalar_type;
521 const auto norm2 = KokkosBlas::nrm1(zz);
523 impl_scalar_type norm2(0);
524 Kokkos::parallel_reduce(
525 "ReduceMultiVector::Device
",
526 Kokkos::RangePolicy<typename impl_type::execution_space>(0, zz.extent(0)),
527 KOKKOS_LAMBDA(const local_ordinal_type &i, impl_scalar_type &update) {
532 #if KOKKOS_VERSION >= 40799
533 vals[0] = KokkosKernels::ArithTraits<impl_scalar_type>::abs(norm2);
535 vals[0] = Kokkos::ArithTraits<impl_scalar_type>::abs(norm2);
538 IFPACK2_BLOCKHELPER_PROFILER_REGION_END;
539 IFPACK2_BLOCKHELPER_TIMER_FENCE(typename ImplType<MatrixType>::execution_space)
542 } // namespace BlockHelperDetails
544 } // namespace Ifpack2
node_type::device_type node_device_type
Definition: Ifpack2_BlockHelper.hpp:309
Definition: Ifpack2_BlockHelper.hpp:84
Kokkos::DefaultHostExecutionSpace host_execution_space
Definition: Ifpack2_BlockHelper.hpp:304
size_t size_type
Definition: Ifpack2_BlockHelper.hpp:274
Kokkos::ArithTraits< scalar_type >::val_type impl_scalar_type
Definition: Ifpack2_BlockHelper.hpp:286
KB::Vector< T, l > Vector
Definition: Ifpack2_BlockHelper.hpp:341
Definition: Ifpack2_BlockHelper.hpp:142
Definition: Ifpack2_BlockHelper.hpp:95
#define TEUCHOS_TEST_FOR_EXCEPT_MSG(throw_exception_test, msg)
Kokkos::View< size_type *, device_type > size_type_1d_view
Definition: Ifpack2_BlockHelper.hpp:358
Definition: Ifpack2_BlockHelper.hpp:109
Definition: Ifpack2_BlockHelper.hpp:389
Definition: Ifpack2_BlockHelper.hpp:211
#define TEUCHOS_ASSERT(assertion_test)
Definition: Ifpack2_BlockHelper.hpp:236
Definition: Ifpack2_BlockHelper.hpp:270
Definition: Ifpack2_BlockHelper.hpp:123
Definition: Ifpack2_BlockHelper.hpp:68