10 #ifndef IFPACK2_BLOCKTRIDICONTAINER_DEF_HPP
11 #define IFPACK2_BLOCKTRIDICONTAINER_DEF_HPP
15 #include <Tpetra_Distributor.hpp>
16 #include <Tpetra_BlockMultiVector.hpp>
17 #include <Tpetra_BlockCrsMatrix_Helpers.hpp>
19 #include <Kokkos_ArithTraits.hpp>
20 #include <KokkosBatched_Util.hpp>
21 #include <KokkosBatched_Vector.hpp>
22 #include <KokkosBatched_AddRadial_Decl.hpp>
23 #include <KokkosBatched_AddRadial_Impl.hpp>
24 #include <KokkosBatched_Gemm_Decl.hpp>
25 #include <KokkosBatched_Gemm_Serial_Impl.hpp>
26 #include <KokkosBatched_Gemv_Decl.hpp>
27 #include <KokkosBatched_Trsm_Decl.hpp>
28 #include <KokkosBatched_Trsm_Serial_Impl.hpp>
29 #include <KokkosBatched_Trsv_Decl.hpp>
30 #include <KokkosBatched_Trsv_Serial_Impl.hpp>
31 #include <KokkosBatched_LU_Decl.hpp>
32 #include <KokkosBatched_LU_Serial_Impl.hpp>
35 #include "Ifpack2_BlockTriDiContainer_impl.hpp"
46 template <
typename MatrixType>
48 BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>
51 const bool overlapCommAndComp,
52 const bool useSeqMethod,
54 const bool explicitConversion)
56 IFPACK2_BLOCKHELPER_TIMER_WITH_FENCE(
"BlockTriDiContainer::initInternal", initInternal,
typename BlockHelperDetails::ImplType<MatrixType>::execution_space);
60 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::createImpl", createImpl);
61 impl_ =
Teuchos::rcp(
new BlockTriDiContainerDetails::ImplObject<MatrixType>());
62 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
65 using impl_type = BlockHelperDetails::ImplType<MatrixType>;
69 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::setA", setA);
70 if (explicitConversion) {
71 impl_->A = Teuchos::rcp_dynamic_cast<
const block_crs_matrix_type>(matrix);
72 if (impl_->A.is_null()) {
74 (block_size == -1,
"A pointwise matrix and block_size = -1 were given as inputs.");
76 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::setA::convertToBlockCrsMatrix", convertToBlockCrsMatrix);
77 impl_->A = Tpetra::convertToBlockCrsMatrix(*Teuchos::rcp_dynamic_cast<const crs_matrix_type>(matrix), block_size,
true);
78 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
85 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
88 impl_->tpetra_importer = Teuchos::null;
89 impl_->async_importer = Teuchos::null;
93 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::createBlockCrsTpetraImporter useSeqMethod", useSeqMethod);
95 impl_->tpetra_importer = BlockTriDiContainerDetails::createBlockCrsTpetraImporter<MatrixType>(impl_->A);
97 impl_->tpetra_importer = importer;
98 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
102 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::createBlockCrsTpetraImporter", createBlockCrsTpetraImporter);
105 impl_->async_importer = BlockTriDiContainerDetails::createBlockCrsAsyncImporter<MatrixType>(impl_->A);
106 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
115 impl_->overlap_communication_and_computation = overlapCommAndComp;
118 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::createZ", createZ);
119 impl_->Z =
typename impl_type::tpetra_multivector_type();
120 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
123 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::createW", createW);
124 impl_->W =
typename impl_type::impl_scalar_type_1d_view();
125 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
128 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
131 template <
typename MatrixType>
133 BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>
136 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::clearInternal", clearInternal);
137 using impl_type = BlockHelperDetails::ImplType<MatrixType>;
138 using part_interface_type = BlockHelperDetails::PartInterface<MatrixType>;
139 using block_tridiags_type = BlockTriDiContainerDetails::BlockTridiags<MatrixType>;
140 using amd_type = BlockHelperDetails::AmD<MatrixType>;
141 using norm_manager_type = BlockHelperDetails::NormManager<MatrixType>;
143 impl_->A = Teuchos::null;
144 impl_->tpetra_importer = Teuchos::null;
145 impl_->async_importer = Teuchos::null;
147 impl_->Z =
typename impl_type::tpetra_multivector_type();
148 impl_->W =
typename impl_type::impl_scalar_type_1d_view();
150 impl_->part_interface = part_interface_type();
151 impl_->block_tridiags = block_tridiags_type();
152 impl_->a_minus_d = amd_type();
153 impl_->work =
typename impl_type::vector_type_1d_view();
154 impl_->norm_manager = norm_manager_type();
156 impl_ = Teuchos::null;
157 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
160 template <
typename MatrixType>
166 :
Container<MatrixType>(matrix, partitions, pointIndexed), partitions_(partitions)
168 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::BlockTriDiContainer",
BlockTriDiContainer);
169 const bool useSeqMethod =
false;
170 const bool overlapCommAndComp =
false;
171 initInternal(matrix, importer, overlapCommAndComp, useSeqMethod);
172 impl_->use_fused_jacobi = shouldUseFusedBlockJacobi(matrix, partitions, useSeqMethod);
173 n_subparts_per_part_ = -1;
175 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
178 template <
typename MatrixType>
182 const int n_subparts_per_part,
183 const bool overlapCommAndComp,
184 const bool useSeqMethod,
185 const int block_size,
186 const bool explicitConversion)
187 :
Container<MatrixType>(matrix, partitions, false), partitions_(partitions)
189 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::BlockTriDiContainer",
BlockTriDiContainer);
190 initInternal(matrix, Teuchos::null, overlapCommAndComp, useSeqMethod, block_size, explicitConversion);
191 impl_->use_fused_jacobi = shouldUseFusedBlockJacobi(matrix, partitions, useSeqMethod);
192 n_subparts_per_part_ = n_subparts_per_part;
193 block_size_ = block_size;
194 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
197 template <
typename MatrixType>
205 auto matrixBCRS = Teuchos::rcp_dynamic_cast<
const block_crs_matrix_type>(matrix);
206 bool hasBlockCrs = !matrixBCRS.is_null();
207 bool onePartitionPerRow = hasBlockCrs && size_t(partitions.size()) == matrixBCRS->getLocalNumRows();
208 constexpr
bool isGPU = BlockHelperDetails::is_device<typename Helpers::execution_space>::value;
210 #ifdef KOKKOS_ARCH_VOLTA
211 constexpr
bool isVolta =
true;
213 constexpr
bool isVolta =
false;
218 return isGPU && !isVolta && !useSeqMethod && hasBlockCrs && (!partitions.size() || onePartitionPerRow);
221 template <
typename MatrixType>
227 template <
typename MatrixType>
232 if (List.
isType<
int>(
"partitioner: subparts per part"))
233 n_subparts_per_part_ = List.
get<
int>(
"partitioner: subparts per part");
234 if (List.
isType<
int>(
"partitioner: block size"))
235 block_size_ = List.
get<
int>(
"partitioner: block size");
238 template <
typename MatrixType>
243 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::initialize", initialize);
244 this->IsInitialized_ =
true;
246 auto bA = Teuchos::rcp_dynamic_cast<
const block_crs_matrix_type>(impl_->A);
249 (block_size_ == -1,
"A pointwise matrix and block_size = -1 were given as inputs.");
251 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::initialize::getBlockCrsGraph", getBlockCrsGraph);
252 auto A = Teuchos::rcp_dynamic_cast<
const crs_matrix_type>(impl_->A);
253 impl_->blockGraph = Tpetra::getBlockCrsGraph(*A, block_size_,
true);
254 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
258 impl_->blockGraph = Teuchos::rcpFromRef(bA->getCrsGraph());
263 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::createPartInterfaceBlockTridiagsNormManager", createPartInterfaceBlockTridiagsNormManager);
264 impl_->part_interface = BlockTriDiContainerDetails::createPartInterface<MatrixType>(impl_->A, impl_->blockGraph, partitions_, n_subparts_per_part_);
265 impl_->block_tridiags = BlockTriDiContainerDetails::createBlockTridiags<MatrixType>(impl_->part_interface);
267 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
272 this->IsComputed_ =
false;
275 bool useSeqMethod = !impl_->tpetra_importer.is_null();
276 BlockTriDiContainerDetails::performSymbolicPhase<MatrixType>
279 impl_->part_interface,
280 impl_->block_tridiags,
282 impl_->overlap_communication_and_computation,
283 impl_->async_importer, useSeqMethod, impl_->use_fused_jacobi);
285 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
288 template <
typename MatrixType>
293 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::compute", compute);
294 this->IsComputed_ =
false;
295 if (!this->isInitialized())
298 BlockTriDiContainerDetails::performNumericPhase<MatrixType>
301 impl_->part_interface, impl_->block_tridiags,
302 Kokkos::ArithTraits<magnitude_type>::zero(),
303 impl_->use_fused_jacobi);
305 this->IsComputed_ =
true;
306 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
309 template <
typename MatrixType>
314 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::clearBlocks", clearBlocks);
316 this->IsInitialized_ =
false;
317 this->IsComputed_ =
false;
319 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
322 template <
typename MatrixType>
324 BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>
325 ::applyInverseJacobi (
const mv_type& X, mv_type& Y, scalar_type dampingFactor,
326 bool zeroStartingSolution,
int numSweeps)
const
328 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::applyInverseJacobi", applyInverseJacobi);
329 const magnitude_type tol = Kokkos::ArithTraits<magnitude_type>::zero();
330 const int check_tol_every = 1;
332 if(!impl_->use_fused_jacobi) {
333 BlockTriDiContainerDetails::applyInverseJacobi<MatrixType>
336 impl_->tpetra_importer,
337 impl_->async_importer,
338 impl_->overlap_communication_and_computation,
339 X, Y, impl_->Z, impl_->W,
340 impl_->part_interface, impl_->block_tridiags, impl_->a_minus_d,
344 zeroStartingSolution,
350 BlockTriDiContainerDetails::applyFusedBlockJacobi<MatrixType>
351 (impl_->tpetra_importer,
352 impl_->async_importer,
353 impl_->overlap_communication_and_computation,
355 impl_->part_interface, impl_->block_tridiags, impl_->a_minus_d,
359 zeroStartingSolution,
364 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
367 template <
typename MatrixType>
368 typename BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>::ComputeParameters
372 return ComputeParameters();
375 template <
typename MatrixType>
380 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::compute", compute);
381 this->IsComputed_ =
false;
382 if (!this->isInitialized())
385 BlockTriDiContainerDetails::performNumericPhase<MatrixType>
388 impl_->part_interface, impl_->block_tridiags,
389 in.addRadiallyToDiagonal,
390 impl_->use_fused_jacobi);
392 this->IsComputed_ =
true;
393 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
396 template <
typename MatrixType>
406 template <
typename MatrixType>
410 const ApplyParameters& in)
const
412 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::applyInverseJacobi", applyInverseJacobi);
415 if(!impl_->use_fused_jacobi) {
416 r_val = BlockTriDiContainerDetails::applyInverseJacobi<MatrixType>
419 impl_->tpetra_importer,
420 impl_->async_importer,
421 impl_->overlap_communication_and_computation,
422 X, Y, impl_->Z, impl_->W,
423 impl_->part_interface, impl_->block_tridiags, impl_->a_minus_d,
427 in.zeroStartingSolution,
430 in.checkToleranceEvery);
433 r_val = BlockTriDiContainerDetails::applyFusedBlockJacobi<MatrixType>
434 (impl_->tpetra_importer,
435 impl_->async_importer,
436 impl_->overlap_communication_and_computation,
438 impl_->part_interface, impl_->block_tridiags, impl_->a_minus_d,
442 in.zeroStartingSolution,
445 in.checkToleranceEvery);
448 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
452 template <
typename MatrixType>
456 return impl_->norm_manager.getNorms0();
459 template <
typename MatrixType>
463 return impl_->norm_manager.getNormsFinal();
466 template <
typename MatrixType>
470 scalar_type , scalar_type )
const
473 <<
"because you want to use this container's performance-portable Jacobi iteration. In "
474 <<
"that case, set \"relaxation: type\" to \"MT Split Jacobi\" rather than \"Jacobi\".");
477 template <
typename MatrixType>
486 template <
typename MatrixType>
497 template <
typename MatrixType>
502 std::ostringstream oss;
504 if (this->isInitialized()) {
505 if (this->isComputed()) {
506 oss <<
"{status = initialized, computed";
509 oss <<
"{status = initialized, not computed";
513 oss <<
"{status = not initialized, not computed";
520 template <
typename MatrixType>
528 os <<
"================================================================================" << endl
529 <<
"Ifpack2::BlockTriDiContainer" << endl
530 <<
"Number of blocks = " << this->numBlocks_ << endl
531 <<
"isInitialized() = " << this->IsInitialized_ << endl
532 <<
"isComputed() = " << this->IsComputed_ << endl
533 <<
"================================================================================" << endl
537 template <
typename MatrixType>
540 ::getName() {
return "Ifpack2::BlockTriDiContainer::ImplSimdTag"; }
542 #define IFPACK2_BLOCKTRIDICONTAINER_INSTANT(S,LO,GO,N) \
543 template class Ifpack2::BlockTriDiContainer< Tpetra::RowMatrix<S, LO, GO, N> >;
Ifpack2::BlockTriDiContainer class declaration.
T & get(const std::string &name, T def_value)
Store and solve local block tridiagonal linear problems.
Definition: Ifpack2_BlockTriDiContainer_decl.hpp:101
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
#define TEUCHOS_TEST_FOR_EXCEPT_MSG(throw_exception_test, msg)
Definition: Ifpack2_BlockHelper.hpp:353
~BlockTriDiContainer() override
Destructor (declared virtual for memory safety of derived classes).
Definition: Ifpack2_BlockTriDiContainer_def.hpp:223
BlockTriDiContainer(const Teuchos::RCP< const row_matrix_type > &matrix, const Teuchos::Array< Teuchos::Array< local_ordinal_type > > &partitions, const Teuchos::RCP< const import_type > &importer, bool pointIndexed)
Constructor.
Definition: Ifpack2_BlockTriDiContainer_def.hpp:162
virtual std::string description() const
basic_FancyOStream & setOutputToRootOnly(const int rootRank)
Interface for creating and solving a set of local linear problems.
Definition: Ifpack2_Container_decl.hpp:79
bool isType(const std::string &name) const
ComputeParameters createDefaultComputeParameters() const
Create a ComputeParameters struct with default values.
Definition: Ifpack2_BlockTriDiContainer_def.hpp:370
#define TEUCHOS_ASSERT(assertion_test)
Definition: Ifpack2_BlockHelper.hpp:249