10 #ifndef IFPACK2_BLOCKTRIDICONTAINER_DEF_HPP
11 #define IFPACK2_BLOCKTRIDICONTAINER_DEF_HPP
15 #include <Tpetra_Distributor.hpp>
16 #include <Tpetra_BlockMultiVector.hpp>
17 #include <Tpetra_BlockCrsMatrix_Helpers.hpp>
19 #if KOKKOS_VERSION >= 40799
20 #include <KokkosKernels_ArithTraits.hpp>
22 #include <Kokkos_ArithTraits.hpp>
24 #include <KokkosBatched_Util.hpp>
25 #include <KokkosBatched_Vector.hpp>
26 #include <KokkosBatched_AddRadial_Decl.hpp>
27 #include <KokkosBatched_AddRadial_Impl.hpp>
28 #include <KokkosBatched_Gemm_Decl.hpp>
29 #include <KokkosBatched_Gemm_Serial_Impl.hpp>
30 #include <KokkosBatched_Gemv_Decl.hpp>
31 #include <KokkosBatched_Trsm_Decl.hpp>
32 #include <KokkosBatched_Trsm_Serial_Impl.hpp>
33 #include <KokkosBatched_Trsv_Decl.hpp>
34 #include <KokkosBatched_Trsv_Serial_Impl.hpp>
35 #include <KokkosBatched_LU_Decl.hpp>
36 #include <KokkosBatched_LU_Serial_Impl.hpp>
39 #include "Ifpack2_BlockTriDiContainer_impl.hpp"
49 template <
typename MatrixType>
52 const bool overlapCommAndComp,
53 const bool useSeqMethod,
55 const bool explicitConversion) {
56 IFPACK2_BLOCKHELPER_TIMER_WITH_FENCE(
"BlockTriDiContainer::initInternal", initInternal,
typename BlockHelperDetails::ImplType<MatrixType>::execution_space);
60 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::createImpl", createImpl);
61 impl_ =
Teuchos::rcp(
new BlockTriDiContainerDetails::ImplObject<MatrixType>());
62 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
65 using impl_type = BlockHelperDetails::ImplType<MatrixType>;
69 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::setA", setA);
70 if (explicitConversion) {
71 impl_->A = Teuchos::rcp_dynamic_cast<
const block_crs_matrix_type>(matrix);
72 if (impl_->A.is_null()) {
75 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::setA::convertToBlockCrsMatrix", convertToBlockCrsMatrix);
76 impl_->A = Tpetra::convertToBlockCrsMatrix(*Teuchos::rcp_dynamic_cast<const crs_matrix_type>(matrix), block_size,
true);
77 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
83 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
86 impl_->tpetra_importer = Teuchos::null;
87 impl_->async_importer = Teuchos::null;
90 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::createBlockCrsTpetraImporter useSeqMethod", useSeqMethod);
92 impl_->tpetra_importer = BlockTriDiContainerDetails::createBlockCrsTpetraImporter<MatrixType>(impl_->A);
94 impl_->tpetra_importer = importer;
95 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
97 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::createBlockCrsTpetraImporter", createBlockCrsTpetraImporter);
100 impl_->async_importer = BlockTriDiContainerDetails::createBlockCrsAsyncImporter<MatrixType>(impl_->A);
101 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
110 impl_->overlap_communication_and_computation = overlapCommAndComp;
113 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::createZ", createZ);
114 impl_->Z =
typename impl_type::tpetra_multivector_type();
115 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
118 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::createW", createW);
119 impl_->W =
typename impl_type::impl_scalar_type_1d_view();
120 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
123 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
126 template <
typename MatrixType>
127 void BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>::clearInternal() {
128 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::clearInternal", clearInternal);
129 using impl_type = BlockHelperDetails::ImplType<MatrixType>;
130 using part_interface_type = BlockHelperDetails::PartInterface<MatrixType>;
131 using block_tridiags_type = BlockTriDiContainerDetails::BlockTridiags<MatrixType>;
132 using amd_type = BlockHelperDetails::AmD<MatrixType>;
133 using norm_manager_type = BlockHelperDetails::NormManager<MatrixType>;
135 impl_->A = Teuchos::null;
136 impl_->tpetra_importer = Teuchos::null;
137 impl_->async_importer = Teuchos::null;
139 impl_->Z =
typename impl_type::tpetra_multivector_type();
140 impl_->W =
typename impl_type::impl_scalar_type_1d_view();
142 impl_->part_interface = part_interface_type();
143 impl_->block_tridiags = block_tridiags_type();
144 impl_->a_minus_d = amd_type();
145 impl_->work =
typename impl_type::vector_type_1d_view();
146 impl_->norm_manager = norm_manager_type();
148 impl_ = Teuchos::null;
149 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
152 template <
typename MatrixType>
157 :
Container<MatrixType>(matrix, partitions, pointIndexed)
158 , partitions_(partitions) {
159 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::BlockTriDiContainer",
BlockTriDiContainer);
160 const bool useSeqMethod =
false;
161 const bool overlapCommAndComp =
false;
162 initInternal(matrix, importer, overlapCommAndComp, useSeqMethod);
163 impl_->use_fused_jacobi = shouldUseFusedBlockJacobi(matrix, partitions, useSeqMethod);
164 n_subparts_per_part_ = -1;
166 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
169 template <
typename MatrixType>
172 const int n_subparts_per_part,
173 const bool overlapCommAndComp,
174 const bool useSeqMethod,
175 const int block_size,
176 const bool explicitConversion)
177 :
Container<MatrixType>(matrix, partitions, false)
178 , partitions_(partitions) {
179 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::BlockTriDiContainer",
BlockTriDiContainer);
180 initInternal(matrix, Teuchos::null, overlapCommAndComp, useSeqMethod, block_size, explicitConversion);
181 impl_->use_fused_jacobi = shouldUseFusedBlockJacobi(matrix, partitions, useSeqMethod);
182 n_subparts_per_part_ = n_subparts_per_part;
183 block_size_ = block_size;
184 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
187 template <
typename MatrixType>
193 auto matrixBCRS = Teuchos::rcp_dynamic_cast<
const block_crs_matrix_type>(matrix);
194 bool hasBlockCrs = !matrixBCRS.is_null();
195 bool onePartitionPerRow = hasBlockCrs && size_t(partitions.size()) == matrixBCRS->getLocalNumRows();
196 constexpr
bool isGPU = BlockHelperDetails::is_device<typename Helpers::execution_space>::value;
198 #ifdef KOKKOS_ARCH_VOLTA
199 constexpr
bool isVolta =
true;
201 constexpr
bool isVolta =
false;
206 return isGPU && !isVolta && !useSeqMethod && hasBlockCrs && (!partitions.size() || onePartitionPerRow);
209 template <
typename MatrixType>
213 template <
typename MatrixType>
215 if (List.
isType<
int>(
"partitioner: subparts per part"))
216 n_subparts_per_part_ = List.
get<
int>(
"partitioner: subparts per part");
217 if (List.
isType<
int>(
"partitioner: block size"))
218 block_size_ = List.
get<
int>(
"partitioner: block size");
221 template <
typename MatrixType>
223 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::initialize", initialize);
224 this->IsInitialized_ =
true;
226 auto bA = Teuchos::rcp_dynamic_cast<
const block_crs_matrix_type>(impl_->A);
230 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::initialize::getBlockCrsGraph", getBlockCrsGraph);
231 auto A = Teuchos::rcp_dynamic_cast<
const crs_matrix_type>(impl_->A);
232 impl_->blockGraph = Tpetra::getBlockCrsGraph(*A, block_size_,
true);
233 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
236 impl_->blockGraph = Teuchos::rcpFromRef(bA->getCrsGraph());
241 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::createPartInterfaceBlockTridiagsNormManager", createPartInterfaceBlockTridiagsNormManager);
242 impl_->part_interface = BlockTriDiContainerDetails::createPartInterface<MatrixType>(impl_->A, impl_->blockGraph, partitions_, n_subparts_per_part_);
243 impl_->block_tridiags = BlockTriDiContainerDetails::createBlockTridiags<MatrixType>(impl_->part_interface);
245 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
250 this->IsComputed_ =
false;
253 bool useSeqMethod = !impl_->tpetra_importer.is_null();
254 BlockTriDiContainerDetails::performSymbolicPhase<MatrixType>(impl_->A,
256 impl_->part_interface,
257 impl_->block_tridiags,
259 impl_->overlap_communication_and_computation,
260 impl_->async_importer, useSeqMethod, impl_->use_fused_jacobi);
262 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
265 template <
typename MatrixType>
267 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::compute", compute);
268 this->IsComputed_ =
false;
269 if (!this->isInitialized())
272 BlockTriDiContainerDetails::performNumericPhase<MatrixType>(impl_->A,
274 impl_->part_interface, impl_->block_tridiags,
275 #if KOKKOS_VERSION >= 40799
276 KokkosKernels::ArithTraits<magnitude_type>::zero(),
278 Kokkos::ArithTraits<magnitude_type>::zero(),
280 impl_->use_fused_jacobi);
282 this->IsComputed_ =
true;
283 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
286 template <
typename MatrixType>
288 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::clearBlocks", clearBlocks);
290 this->IsInitialized_ =
false;
291 this->IsComputed_ =
false;
293 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
296 template <
typename MatrixType>
297 void BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>::applyInverseJacobi(
const mv_type& X, mv_type& Y, scalar_type dampingFactor,
298 bool zeroStartingSolution,
int numSweeps)
const {
299 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::applyInverseJacobi", applyInverseJacobi);
300 #if KOKKOS_VERSION >= 40799
301 const magnitude_type tol = KokkosKernels::ArithTraits<magnitude_type>::zero();
303 const magnitude_type tol = Kokkos::ArithTraits<magnitude_type>::zero();
305 const int check_tol_every = 1;
307 if (!impl_->use_fused_jacobi) {
308 BlockTriDiContainerDetails::applyInverseJacobi<MatrixType>(impl_->A,
310 impl_->tpetra_importer,
311 impl_->async_importer,
312 impl_->overlap_communication_and_computation,
313 X, Y, impl_->Z, impl_->W,
314 impl_->part_interface, impl_->block_tridiags, impl_->a_minus_d,
318 zeroStartingSolution,
323 BlockTriDiContainerDetails::applyFusedBlockJacobi<MatrixType>(impl_->tpetra_importer,
324 impl_->async_importer,
325 impl_->overlap_communication_and_computation,
327 impl_->part_interface, impl_->block_tridiags, impl_->a_minus_d,
331 zeroStartingSolution,
336 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
339 template <
typename MatrixType>
340 typename BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>::ComputeParameters
342 return ComputeParameters();
345 template <
typename MatrixType>
347 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::compute", compute);
348 this->IsComputed_ =
false;
349 if (!this->isInitialized())
352 BlockTriDiContainerDetails::performNumericPhase<MatrixType>(impl_->A,
354 impl_->part_interface, impl_->block_tridiags,
355 in.addRadiallyToDiagonal,
356 impl_->use_fused_jacobi);
358 this->IsComputed_ =
true;
359 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
362 template <
typename MatrixType>
370 template <
typename MatrixType>
372 const ApplyParameters& in)
const {
373 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::applyInverseJacobi", applyInverseJacobi);
376 if (!impl_->use_fused_jacobi) {
377 r_val = BlockTriDiContainerDetails::applyInverseJacobi<MatrixType>(impl_->A,
379 impl_->tpetra_importer,
380 impl_->async_importer,
381 impl_->overlap_communication_and_computation,
382 X, Y, impl_->Z, impl_->W,
383 impl_->part_interface, impl_->block_tridiags, impl_->a_minus_d,
387 in.zeroStartingSolution,
390 in.checkToleranceEvery);
392 r_val = BlockTriDiContainerDetails::applyFusedBlockJacobi<MatrixType>(impl_->tpetra_importer,
393 impl_->async_importer,
394 impl_->overlap_communication_and_computation,
396 impl_->part_interface, impl_->block_tridiags, impl_->a_minus_d,
400 in.zeroStartingSolution,
403 in.checkToleranceEvery);
406 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
410 template <
typename MatrixType>
413 return impl_->norm_manager.getNorms0();
416 template <
typename MatrixType>
419 return impl_->norm_manager.getNormsFinal();
422 template <
typename MatrixType>
424 scalar_type , scalar_type )
const {
426 <<
"because you want to use this container's performance-portable Jacobi iteration. In "
427 <<
"that case, set \"relaxation: type\" to \"MT Split Jacobi\" rather than \"Jacobi\".");
430 template <
typename MatrixType>
436 template <
typename MatrixType>
445 template <
typename MatrixType>
448 std::ostringstream oss;
450 if (this->isInitialized()) {
451 if (this->isComputed()) {
452 oss <<
"{status = initialized, computed";
454 oss <<
"{status = initialized, not computed";
457 oss <<
"{status = not initialized, not computed";
464 template <
typename MatrixType>
470 os <<
"================================================================================" << endl
471 <<
"Ifpack2::BlockTriDiContainer" << endl
472 <<
"Number of blocks = " << this->numBlocks_ << endl
473 <<
"isInitialized() = " << this->IsInitialized_ << endl
474 <<
"isComputed() = " << this->IsComputed_ << endl
475 <<
"================================================================================" << endl
479 template <
typename MatrixType>
483 #define IFPACK2_BLOCKTRIDICONTAINER_INSTANT(S, LO, GO, N) \
484 template class Ifpack2::BlockTriDiContainer<Tpetra::RowMatrix<S, LO, GO, N> >;
Ifpack2::BlockTriDiContainer class declaration.
T & get(const std::string &name, T def_value)
Store and solve local block tridiagonal linear problems.
Definition: Ifpack2_BlockTriDiContainer_decl.hpp:107
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
#define TEUCHOS_TEST_FOR_EXCEPT_MSG(throw_exception_test, msg)
Definition: Ifpack2_BlockHelper.hpp:389
virtual std::string description() const
basic_FancyOStream & setOutputToRootOnly(const int rootRank)
Interface for creating and solving a set of local linear problems.
Definition: Ifpack2_Container_decl.hpp:79
bool isType(const std::string &name) const
#define TEUCHOS_ASSERT(assertion_test)
Definition: Ifpack2_BlockHelper.hpp:270