10 #ifndef IFPACK2_BLOCKTRIDICONTAINER_DEF_HPP
11 #define IFPACK2_BLOCKTRIDICONTAINER_DEF_HPP
15 #include <Tpetra_Distributor.hpp>
16 #include <Tpetra_BlockMultiVector.hpp>
17 #include <Tpetra_BlockCrsMatrix_Helpers.hpp>
19 #include <Kokkos_ArithTraits.hpp>
20 #include <KokkosBatched_Util.hpp>
21 #include <KokkosBatched_Vector.hpp>
22 #include <KokkosBatched_AddRadial_Decl.hpp>
23 #include <KokkosBatched_AddRadial_Impl.hpp>
24 #include <KokkosBatched_Gemm_Decl.hpp>
25 #include <KokkosBatched_Gemm_Serial_Impl.hpp>
26 #include <KokkosBatched_Gemv_Decl.hpp>
27 #include <KokkosBatched_Trsm_Decl.hpp>
28 #include <KokkosBatched_Trsm_Serial_Impl.hpp>
29 #include <KokkosBatched_Trsv_Decl.hpp>
30 #include <KokkosBatched_Trsv_Serial_Impl.hpp>
31 #include <KokkosBatched_LU_Decl.hpp>
32 #include <KokkosBatched_LU_Serial_Impl.hpp>
35 #include "Ifpack2_BlockTriDiContainer_impl.hpp"
45 template <
typename MatrixType>
48 const bool overlapCommAndComp,
49 const bool useSeqMethod,
51 const bool explicitConversion) {
52 IFPACK2_BLOCKHELPER_TIMER_WITH_FENCE(
"BlockTriDiContainer::initInternal", initInternal,
typename BlockHelperDetails::ImplType<MatrixType>::execution_space);
56 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::createImpl", createImpl);
57 impl_ =
Teuchos::rcp(
new BlockTriDiContainerDetails::ImplObject<MatrixType>());
58 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
61 using impl_type = BlockHelperDetails::ImplType<MatrixType>;
65 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::setA", setA);
66 if (explicitConversion) {
67 impl_->A = Teuchos::rcp_dynamic_cast<
const block_crs_matrix_type>(matrix);
68 if (impl_->A.is_null()) {
71 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::setA::convertToBlockCrsMatrix", convertToBlockCrsMatrix);
72 impl_->A = Tpetra::convertToBlockCrsMatrix(*Teuchos::rcp_dynamic_cast<const crs_matrix_type>(matrix), block_size,
true);
73 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
79 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
82 impl_->tpetra_importer = Teuchos::null;
83 impl_->async_importer = Teuchos::null;
86 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::createBlockCrsTpetraImporter useSeqMethod", useSeqMethod);
88 impl_->tpetra_importer = BlockTriDiContainerDetails::createBlockCrsTpetraImporter<MatrixType>(impl_->A);
90 impl_->tpetra_importer = importer;
91 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
93 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::createBlockCrsTpetraImporter", createBlockCrsTpetraImporter);
96 impl_->async_importer = BlockTriDiContainerDetails::createBlockCrsAsyncImporter<MatrixType>(impl_->A);
97 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
106 impl_->overlap_communication_and_computation = overlapCommAndComp;
109 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::createZ", createZ);
110 impl_->Z =
typename impl_type::tpetra_multivector_type();
111 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
114 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::createW", createW);
115 impl_->W =
typename impl_type::impl_scalar_type_1d_view();
116 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
119 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
122 template <
typename MatrixType>
123 void BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>::clearInternal() {
124 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::clearInternal", clearInternal);
125 using impl_type = BlockHelperDetails::ImplType<MatrixType>;
126 using part_interface_type = BlockHelperDetails::PartInterface<MatrixType>;
127 using block_tridiags_type = BlockTriDiContainerDetails::BlockTridiags<MatrixType>;
128 using amd_type = BlockHelperDetails::AmD<MatrixType>;
129 using norm_manager_type = BlockHelperDetails::NormManager<MatrixType>;
131 impl_->A = Teuchos::null;
132 impl_->tpetra_importer = Teuchos::null;
133 impl_->async_importer = Teuchos::null;
135 impl_->Z =
typename impl_type::tpetra_multivector_type();
136 impl_->W =
typename impl_type::impl_scalar_type_1d_view();
138 impl_->part_interface = part_interface_type();
139 impl_->block_tridiags = block_tridiags_type();
140 impl_->a_minus_d = amd_type();
141 impl_->work =
typename impl_type::vector_type_1d_view();
142 impl_->norm_manager = norm_manager_type();
144 impl_ = Teuchos::null;
145 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
148 template <
typename MatrixType>
153 :
Container<MatrixType>(matrix, partitions, pointIndexed)
154 , partitions_(partitions) {
155 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::BlockTriDiContainer",
BlockTriDiContainer);
156 const bool useSeqMethod =
false;
157 const bool overlapCommAndComp =
false;
158 initInternal(matrix, importer, overlapCommAndComp, useSeqMethod);
159 impl_->use_fused_jacobi = shouldUseFusedBlockJacobi(matrix, partitions, useSeqMethod);
160 n_subparts_per_part_ = -1;
162 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
165 template <
typename MatrixType>
168 const int n_subparts_per_part,
169 const bool overlapCommAndComp,
170 const bool useSeqMethod,
171 const int block_size,
172 const bool explicitConversion)
173 :
Container<MatrixType>(matrix, partitions, false)
174 , partitions_(partitions) {
175 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::BlockTriDiContainer",
BlockTriDiContainer);
176 initInternal(matrix, Teuchos::null, overlapCommAndComp, useSeqMethod, block_size, explicitConversion);
177 impl_->use_fused_jacobi = shouldUseFusedBlockJacobi(matrix, partitions, useSeqMethod);
178 n_subparts_per_part_ = n_subparts_per_part;
179 block_size_ = block_size;
180 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
183 template <
typename MatrixType>
189 auto matrixBCRS = Teuchos::rcp_dynamic_cast<
const block_crs_matrix_type>(matrix);
190 bool hasBlockCrs = !matrixBCRS.is_null();
191 bool onePartitionPerRow = hasBlockCrs && size_t(partitions.size()) == matrixBCRS->getLocalNumRows();
192 constexpr
bool isGPU = BlockHelperDetails::is_device<typename Helpers::execution_space>::value;
194 #ifdef KOKKOS_ARCH_VOLTA
195 constexpr
bool isVolta =
true;
197 constexpr
bool isVolta =
false;
202 return isGPU && !isVolta && !useSeqMethod && hasBlockCrs && (!partitions.size() || onePartitionPerRow);
205 template <
typename MatrixType>
209 template <
typename MatrixType>
211 if (List.
isType<
int>(
"partitioner: subparts per part"))
212 n_subparts_per_part_ = List.
get<
int>(
"partitioner: subparts per part");
213 if (List.
isType<
int>(
"partitioner: block size"))
214 block_size_ = List.
get<
int>(
"partitioner: block size");
217 template <
typename MatrixType>
219 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::initialize", initialize);
220 this->IsInitialized_ =
true;
222 auto bA = Teuchos::rcp_dynamic_cast<
const block_crs_matrix_type>(impl_->A);
226 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::initialize::getBlockCrsGraph", getBlockCrsGraph);
227 auto A = Teuchos::rcp_dynamic_cast<
const crs_matrix_type>(impl_->A);
228 impl_->blockGraph = Tpetra::getBlockCrsGraph(*A, block_size_,
true);
229 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
232 impl_->blockGraph = Teuchos::rcpFromRef(bA->getCrsGraph());
237 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::createPartInterfaceBlockTridiagsNormManager", createPartInterfaceBlockTridiagsNormManager);
238 impl_->part_interface = BlockTriDiContainerDetails::createPartInterface<MatrixType>(impl_->A, impl_->blockGraph, partitions_, n_subparts_per_part_);
239 impl_->block_tridiags = BlockTriDiContainerDetails::createBlockTridiags<MatrixType>(impl_->part_interface);
241 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
246 this->IsComputed_ =
false;
249 bool useSeqMethod = !impl_->tpetra_importer.is_null();
250 BlockTriDiContainerDetails::performSymbolicPhase<MatrixType>(impl_->A,
252 impl_->part_interface,
253 impl_->block_tridiags,
255 impl_->overlap_communication_and_computation,
256 impl_->async_importer, useSeqMethod, impl_->use_fused_jacobi);
258 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
261 template <
typename MatrixType>
263 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::compute", compute);
264 this->IsComputed_ =
false;
265 if (!this->isInitialized())
268 BlockTriDiContainerDetails::performNumericPhase<MatrixType>(impl_->A,
270 impl_->part_interface, impl_->block_tridiags,
271 Kokkos::ArithTraits<magnitude_type>::zero(),
272 impl_->use_fused_jacobi);
274 this->IsComputed_ =
true;
275 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
278 template <
typename MatrixType>
280 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::clearBlocks", clearBlocks);
282 this->IsInitialized_ =
false;
283 this->IsComputed_ =
false;
285 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
288 template <
typename MatrixType>
289 void BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>::applyInverseJacobi(
const mv_type& X, mv_type& Y, scalar_type dampingFactor,
290 bool zeroStartingSolution,
int numSweeps)
const {
291 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::applyInverseJacobi", applyInverseJacobi);
292 const magnitude_type tol = Kokkos::ArithTraits<magnitude_type>::zero();
293 const int check_tol_every = 1;
295 if (!impl_->use_fused_jacobi) {
296 BlockTriDiContainerDetails::applyInverseJacobi<MatrixType>(impl_->A,
298 impl_->tpetra_importer,
299 impl_->async_importer,
300 impl_->overlap_communication_and_computation,
301 X, Y, impl_->Z, impl_->W,
302 impl_->part_interface, impl_->block_tridiags, impl_->a_minus_d,
306 zeroStartingSolution,
311 BlockTriDiContainerDetails::applyFusedBlockJacobi<MatrixType>(impl_->tpetra_importer,
312 impl_->async_importer,
313 impl_->overlap_communication_and_computation,
315 impl_->part_interface, impl_->block_tridiags, impl_->a_minus_d,
319 zeroStartingSolution,
324 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
327 template <
typename MatrixType>
328 typename BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>::ComputeParameters
330 return ComputeParameters();
333 template <
typename MatrixType>
335 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::compute", compute);
336 this->IsComputed_ =
false;
337 if (!this->isInitialized())
340 BlockTriDiContainerDetails::performNumericPhase<MatrixType>(impl_->A,
342 impl_->part_interface, impl_->block_tridiags,
343 in.addRadiallyToDiagonal,
344 impl_->use_fused_jacobi);
346 this->IsComputed_ =
true;
347 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
350 template <
typename MatrixType>
358 template <
typename MatrixType>
360 const ApplyParameters& in)
const {
361 IFPACK2_BLOCKHELPER_TIMER(
"BlockTriDiContainer::applyInverseJacobi", applyInverseJacobi);
364 if (!impl_->use_fused_jacobi) {
365 r_val = BlockTriDiContainerDetails::applyInverseJacobi<MatrixType>(impl_->A,
367 impl_->tpetra_importer,
368 impl_->async_importer,
369 impl_->overlap_communication_and_computation,
370 X, Y, impl_->Z, impl_->W,
371 impl_->part_interface, impl_->block_tridiags, impl_->a_minus_d,
375 in.zeroStartingSolution,
378 in.checkToleranceEvery);
380 r_val = BlockTriDiContainerDetails::applyFusedBlockJacobi<MatrixType>(impl_->tpetra_importer,
381 impl_->async_importer,
382 impl_->overlap_communication_and_computation,
384 impl_->part_interface, impl_->block_tridiags, impl_->a_minus_d,
388 in.zeroStartingSolution,
391 in.checkToleranceEvery);
394 IFPACK2_BLOCKHELPER_TIMER_FENCE(
typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
398 template <
typename MatrixType>
401 return impl_->norm_manager.getNorms0();
404 template <
typename MatrixType>
407 return impl_->norm_manager.getNormsFinal();
410 template <
typename MatrixType>
412 scalar_type , scalar_type )
const {
414 <<
"because you want to use this container's performance-portable Jacobi iteration. In "
415 <<
"that case, set \"relaxation: type\" to \"MT Split Jacobi\" rather than \"Jacobi\".");
418 template <
typename MatrixType>
424 template <
typename MatrixType>
433 template <
typename MatrixType>
436 std::ostringstream oss;
438 if (this->isInitialized()) {
439 if (this->isComputed()) {
440 oss <<
"{status = initialized, computed";
442 oss <<
"{status = initialized, not computed";
445 oss <<
"{status = not initialized, not computed";
452 template <
typename MatrixType>
458 os <<
"================================================================================" << endl
459 <<
"Ifpack2::BlockTriDiContainer" << endl
460 <<
"Number of blocks = " << this->numBlocks_ << endl
461 <<
"isInitialized() = " << this->IsInitialized_ << endl
462 <<
"isComputed() = " << this->IsComputed_ << endl
463 <<
"================================================================================" << endl
467 template <
typename MatrixType>
471 #define IFPACK2_BLOCKTRIDICONTAINER_INSTANT(S, LO, GO, N) \
472 template class Ifpack2::BlockTriDiContainer<Tpetra::RowMatrix<S, LO, GO, N> >;
Ifpack2::BlockTriDiContainer class declaration.
T & get(const std::string &name, T def_value)
Store and solve local block tridiagonal linear problems.
Definition: Ifpack2_BlockTriDiContainer_decl.hpp:107
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
#define TEUCHOS_TEST_FOR_EXCEPT_MSG(throw_exception_test, msg)
Definition: Ifpack2_BlockHelper.hpp:377
virtual std::string description() const
basic_FancyOStream & setOutputToRootOnly(const int rootRank)
Interface for creating and solving a set of local linear problems.
Definition: Ifpack2_Container_decl.hpp:79
bool isType(const std::string &name) const
#define TEUCHOS_ASSERT(assertion_test)
Definition: Ifpack2_BlockHelper.hpp:270