Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_BlockTriDiContainer_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4 //
5 // Copyright 2009 NTESS and the Ifpack2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef IFPACK2_BLOCKTRIDICONTAINER_DEF_HPP
11 #define IFPACK2_BLOCKTRIDICONTAINER_DEF_HPP
12 
14 
15 #include <Tpetra_Distributor.hpp>
16 #include <Tpetra_BlockMultiVector.hpp>
17 #include <Tpetra_BlockCrsMatrix_Helpers.hpp>
18 
19 #if KOKKOS_VERSION >= 40799
20 #include <KokkosKernels_ArithTraits.hpp>
21 #else
22 #include <Kokkos_ArithTraits.hpp>
23 #endif
24 #include <KokkosBatched_Util.hpp>
25 #include <KokkosBatched_Vector.hpp>
26 #include <KokkosBatched_AddRadial_Decl.hpp>
27 #include <KokkosBatched_AddRadial_Impl.hpp>
28 #include <KokkosBatched_Gemm_Decl.hpp>
29 #include <KokkosBatched_Gemm_Serial_Impl.hpp>
30 #include <KokkosBatched_Gemv_Decl.hpp>
31 #include <KokkosBatched_Trsm_Decl.hpp>
32 #include <KokkosBatched_Trsm_Serial_Impl.hpp>
33 #include <KokkosBatched_Trsv_Decl.hpp>
34 #include <KokkosBatched_Trsv_Serial_Impl.hpp>
35 #include <KokkosBatched_LU_Decl.hpp>
36 #include <KokkosBatched_LU_Serial_Impl.hpp>
37 
39 #include "Ifpack2_BlockTriDiContainer_impl.hpp"
40 
41 #include <memory>
42 
43 namespace Ifpack2 {
44 
48 
49 template <typename MatrixType>
50 void BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>::initInternal(const Teuchos::RCP<const row_matrix_type>& matrix,
51  const Teuchos::RCP<const import_type>& importer,
52  const bool overlapCommAndComp,
53  const bool useSeqMethod,
54  const int block_size,
55  const bool explicitConversion) {
56  IFPACK2_BLOCKHELPER_TIMER_WITH_FENCE("BlockTriDiContainer::initInternal", initInternal, typename BlockHelperDetails::ImplType<MatrixType>::execution_space);
57 
58  // create pointer of impl
59  {
60  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createImpl", createImpl);
61  impl_ = Teuchos::rcp(new BlockTriDiContainerDetails::ImplObject<MatrixType>());
62  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
63  }
64 
65  using impl_type = BlockHelperDetails::ImplType<MatrixType>;
66  // using block_crs_matrix_type = typename impl_type::tpetra_block_crs_matrix_type;
67 
68  {
69  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::setA", setA);
70  if (explicitConversion) {
71  impl_->A = Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(matrix);
72  if (impl_->A.is_null()) {
73  TEUCHOS_TEST_FOR_EXCEPT_MSG(block_size == -1, "A pointwise matrix and block_size = -1 were given as inputs.");
74  {
75  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::setA::convertToBlockCrsMatrix", convertToBlockCrsMatrix);
76  impl_->A = Tpetra::convertToBlockCrsMatrix(*Teuchos::rcp_dynamic_cast<const crs_matrix_type>(matrix), block_size, true);
77  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
78  }
79  }
80  } else {
81  impl_->A = matrix;
82  }
83  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
84  }
85 
86  impl_->tpetra_importer = Teuchos::null;
87  impl_->async_importer = Teuchos::null;
88 
89  if (useSeqMethod) {
90  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createBlockCrsTpetraImporter useSeqMethod", useSeqMethod);
91  if (importer.is_null()) // there is no given importer, then create one
92  impl_->tpetra_importer = BlockTriDiContainerDetails::createBlockCrsTpetraImporter<MatrixType>(impl_->A);
93  else
94  impl_->tpetra_importer = importer; // if there is a given importer, use it
95  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
96  } else {
97  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createBlockCrsTpetraImporter", createBlockCrsTpetraImporter);
98  // Leave tpetra_importer null even if user provided an importer.
99  // It is not used in the performant codepath (!useSeqMethod)
100  impl_->async_importer = BlockTriDiContainerDetails::createBlockCrsAsyncImporter<MatrixType>(impl_->A);
101  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
102  }
103 
104  // as a result, there are
105  // 1) tpetra_importer is null , async_importer is null (no need for importer)
106  // 2) tpetra_importer is NOT null , async_importer is null (sequential method is used)
107  // 3) tpetra_importer is null , async_importer is NOT null (async method is used)
108 
109  // temporary disabling
110  impl_->overlap_communication_and_computation = overlapCommAndComp;
111 
112  {
113  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createZ", createZ);
114  impl_->Z = typename impl_type::tpetra_multivector_type();
115  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
116  }
117  {
118  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createW", createW);
119  impl_->W = typename impl_type::impl_scalar_type_1d_view();
120  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
121  }
122 
123  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
124 }
125 
126 template <typename MatrixType>
127 void BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>::clearInternal() {
128  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::clearInternal", clearInternal);
129  using impl_type = BlockHelperDetails::ImplType<MatrixType>;
130  using part_interface_type = BlockHelperDetails::PartInterface<MatrixType>;
131  using block_tridiags_type = BlockTriDiContainerDetails::BlockTridiags<MatrixType>;
132  using amd_type = BlockHelperDetails::AmD<MatrixType>;
133  using norm_manager_type = BlockHelperDetails::NormManager<MatrixType>;
134 
135  impl_->A = Teuchos::null;
136  impl_->tpetra_importer = Teuchos::null;
137  impl_->async_importer = Teuchos::null;
138 
139  impl_->Z = typename impl_type::tpetra_multivector_type();
140  impl_->W = typename impl_type::impl_scalar_type_1d_view();
141 
142  impl_->part_interface = part_interface_type();
143  impl_->block_tridiags = block_tridiags_type();
144  impl_->a_minus_d = amd_type();
145  impl_->work = typename impl_type::vector_type_1d_view();
146  impl_->norm_manager = norm_manager_type();
147 
148  impl_ = Teuchos::null;
149  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
150 }
151 
152 template <typename MatrixType>
155  const Teuchos::RCP<const import_type>& importer,
156  bool pointIndexed)
157  : Container<MatrixType>(matrix, partitions, pointIndexed)
158  , partitions_(partitions) {
159  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::BlockTriDiContainer", BlockTriDiContainer);
160  const bool useSeqMethod = false;
161  const bool overlapCommAndComp = false;
162  initInternal(matrix, importer, overlapCommAndComp, useSeqMethod);
163  impl_->use_fused_jacobi = shouldUseFusedBlockJacobi(matrix, partitions, useSeqMethod);
164  n_subparts_per_part_ = -1;
165  block_size_ = -1;
166  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
167 }
168 
169 template <typename MatrixType>
172  const int n_subparts_per_part,
173  const bool overlapCommAndComp,
174  const bool useSeqMethod,
175  const int block_size,
176  const bool explicitConversion)
177  : Container<MatrixType>(matrix, partitions, false)
178  , partitions_(partitions) {
179  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::BlockTriDiContainer", BlockTriDiContainer);
180  initInternal(matrix, Teuchos::null, overlapCommAndComp, useSeqMethod, block_size, explicitConversion);
181  impl_->use_fused_jacobi = shouldUseFusedBlockJacobi(matrix, partitions, useSeqMethod);
182  n_subparts_per_part_ = n_subparts_per_part;
183  block_size_ = block_size;
184  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
185 }
186 
187 template <typename MatrixType>
191  bool useSeqMethod) {
193  auto matrixBCRS = Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(matrix);
194  bool hasBlockCrs = !matrixBCRS.is_null();
195  bool onePartitionPerRow = hasBlockCrs && size_t(partitions.size()) == matrixBCRS->getLocalNumRows();
196  constexpr bool isGPU = BlockHelperDetails::is_device<typename Helpers::execution_space>::value;
197  // The fused block Jacobi solve is actually slower on V100
198 #ifdef KOKKOS_ARCH_VOLTA
199  constexpr bool isVolta = true;
200 #else
201  constexpr bool isVolta = false;
202 #endif
203  // Jacobi case can be represented in two ways:
204  // - partitions is empty
205  // - partitions has length equal to local number of rows (meaning all line lengths must be 1)
206  return isGPU && !isVolta && !useSeqMethod && hasBlockCrs && (!partitions.size() || onePartitionPerRow);
207 }
208 
209 template <typename MatrixType>
211 }
212 
213 template <typename MatrixType>
215  if (List.isType<int>("partitioner: subparts per part"))
216  n_subparts_per_part_ = List.get<int>("partitioner: subparts per part");
217  if (List.isType<int>("partitioner: block size"))
218  block_size_ = List.get<int>("partitioner: block size");
219 }
220 
221 template <typename MatrixType>
223  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::initialize", initialize);
224  this->IsInitialized_ = true;
225  {
226  auto bA = Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(impl_->A);
227  if (bA.is_null()) {
228  TEUCHOS_TEST_FOR_EXCEPT_MSG(block_size_ == -1, "A pointwise matrix and block_size = -1 were given as inputs.");
229  {
230  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::initialize::getBlockCrsGraph", getBlockCrsGraph);
231  auto A = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(impl_->A);
232  impl_->blockGraph = Tpetra::getBlockCrsGraph(*A, block_size_, true);
233  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
234  }
235  } else {
236  impl_->blockGraph = Teuchos::rcpFromRef(bA->getCrsGraph());
237  }
238  }
239 
240  {
241  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createPartInterfaceBlockTridiagsNormManager", createPartInterfaceBlockTridiagsNormManager);
242  impl_->part_interface = BlockTriDiContainerDetails::createPartInterface<MatrixType>(impl_->A, impl_->blockGraph, partitions_, n_subparts_per_part_);
243  impl_->block_tridiags = BlockTriDiContainerDetails::createBlockTridiags<MatrixType>(impl_->part_interface);
244  impl_->norm_manager = BlockHelperDetails::NormManager<MatrixType>(impl_->A->getComm());
245  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
246  }
247 
248  // We assume that if you called this method, you intend to recompute
249  // everything.
250  this->IsComputed_ = false;
251  TEUCHOS_ASSERT(!impl_->A.is_null()); // when initInternal is called, A_ must be set
252  {
253  bool useSeqMethod = !impl_->tpetra_importer.is_null();
254  BlockTriDiContainerDetails::performSymbolicPhase<MatrixType>(impl_->A,
255  impl_->blockGraph,
256  impl_->part_interface,
257  impl_->block_tridiags,
258  impl_->a_minus_d,
259  impl_->overlap_communication_and_computation,
260  impl_->async_importer, useSeqMethod, impl_->use_fused_jacobi);
261  }
262  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
263 }
264 
265 template <typename MatrixType>
267  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::compute", compute);
268  this->IsComputed_ = false;
269  if (!this->isInitialized())
270  this->initialize();
271  {
272  BlockTriDiContainerDetails::performNumericPhase<MatrixType>(impl_->A,
273  impl_->blockGraph,
274  impl_->part_interface, impl_->block_tridiags,
275 #if KOKKOS_VERSION >= 40799
276  KokkosKernels::ArithTraits<magnitude_type>::zero(),
277 #else
278  Kokkos::ArithTraits<magnitude_type>::zero(),
279 #endif
280  impl_->use_fused_jacobi);
281  }
282  this->IsComputed_ = true;
283  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
284 }
285 
286 template <typename MatrixType>
288  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::clearBlocks", clearBlocks);
289  clearInternal();
290  this->IsInitialized_ = false;
291  this->IsComputed_ = false;
293  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
294 }
295 
296 template <typename MatrixType>
297 void BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>::applyInverseJacobi(const mv_type& X, mv_type& Y, scalar_type dampingFactor,
298  bool zeroStartingSolution, int numSweeps) const {
299  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::applyInverseJacobi", applyInverseJacobi);
300 #if KOKKOS_VERSION >= 40799
301  const magnitude_type tol = KokkosKernels::ArithTraits<magnitude_type>::zero();
302 #else
303  const magnitude_type tol = Kokkos::ArithTraits<magnitude_type>::zero();
304 #endif
305  const int check_tol_every = 1;
306 
307  if (!impl_->use_fused_jacobi) {
308  BlockTriDiContainerDetails::applyInverseJacobi<MatrixType>(impl_->A,
309  impl_->blockGraph,
310  impl_->tpetra_importer,
311  impl_->async_importer,
312  impl_->overlap_communication_and_computation,
313  X, Y, impl_->Z, impl_->W,
314  impl_->part_interface, impl_->block_tridiags, impl_->a_minus_d,
315  impl_->work,
316  impl_->norm_manager,
317  dampingFactor,
318  zeroStartingSolution,
319  numSweeps,
320  tol,
321  check_tol_every);
322  } else {
323  BlockTriDiContainerDetails::applyFusedBlockJacobi<MatrixType>(impl_->tpetra_importer,
324  impl_->async_importer,
325  impl_->overlap_communication_and_computation,
326  X, Y, impl_->W,
327  impl_->part_interface, impl_->block_tridiags, impl_->a_minus_d,
328  impl_->work_flat,
329  impl_->norm_manager,
330  dampingFactor,
331  zeroStartingSolution,
332  numSweeps,
333  tol,
334  check_tol_every);
335  }
336  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
337 }
338 
339 template <typename MatrixType>
340 typename BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>::ComputeParameters
342  return ComputeParameters();
343 }
344 
345 template <typename MatrixType>
347  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::compute", compute);
348  this->IsComputed_ = false;
349  if (!this->isInitialized())
350  this->initialize();
351  {
352  BlockTriDiContainerDetails::performNumericPhase<MatrixType>(impl_->A,
353  impl_->blockGraph,
354  impl_->part_interface, impl_->block_tridiags,
355  in.addRadiallyToDiagonal,
356  impl_->use_fused_jacobi);
357  }
358  this->IsComputed_ = true;
359  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
360 }
361 
362 template <typename MatrixType>
365  ApplyParameters in;
366  in.dampingFactor = Teuchos::ScalarTraits<scalar_type>::one();
367  return in;
368 }
369 
370 template <typename MatrixType>
372  const ApplyParameters& in) const {
373  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::applyInverseJacobi", applyInverseJacobi);
374  int r_val = 0;
375  {
376  if (!impl_->use_fused_jacobi) {
377  r_val = BlockTriDiContainerDetails::applyInverseJacobi<MatrixType>(impl_->A,
378  impl_->blockGraph,
379  impl_->tpetra_importer,
380  impl_->async_importer,
381  impl_->overlap_communication_and_computation,
382  X, Y, impl_->Z, impl_->W,
383  impl_->part_interface, impl_->block_tridiags, impl_->a_minus_d,
384  impl_->work,
385  impl_->norm_manager,
386  in.dampingFactor,
387  in.zeroStartingSolution,
388  in.maxNumSweeps,
389  in.tolerance,
390  in.checkToleranceEvery);
391  } else {
392  r_val = BlockTriDiContainerDetails::applyFusedBlockJacobi<MatrixType>(impl_->tpetra_importer,
393  impl_->async_importer,
394  impl_->overlap_communication_and_computation,
395  X, Y, impl_->W,
396  impl_->part_interface, impl_->block_tridiags, impl_->a_minus_d,
397  impl_->work_flat,
398  impl_->norm_manager,
399  in.dampingFactor,
400  in.zeroStartingSolution,
401  in.maxNumSweeps,
402  in.tolerance,
403  in.checkToleranceEvery);
404  }
405  }
406  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
407  return r_val;
408 }
409 
410 template <typename MatrixType>
413  return impl_->norm_manager.getNorms0();
414 }
415 
416 template <typename MatrixType>
419  return impl_->norm_manager.getNormsFinal();
420 }
421 
422 template <typename MatrixType>
423 void BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>::apply(ConstHostView /* X */, HostView /* Y */, int /* blockIndex */, Teuchos::ETransp /* mode */,
424  scalar_type /* alpha */, scalar_type /* beta */) const {
425  TEUCHOS_TEST_FOR_EXCEPT_MSG(true, "BlockTriDiContainer::apply is not implemented. You may have reached this message "
426  << "because you want to use this container's performance-portable Jacobi iteration. In "
427  << "that case, set \"relaxation: type\" to \"MT Split Jacobi\" rather than \"Jacobi\".");
428 }
429 
430 template <typename MatrixType>
431 void BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>::weightedApply(ConstHostView /* X */, HostView /* Y */, ConstHostView /* D */, int /* blockIndex */,
432  Teuchos::ETransp /* mode */, scalar_type /* alpha */, scalar_type /* beta */) const {
433  TEUCHOS_TEST_FOR_EXCEPT_MSG(true, "BlockTriDiContainer::weightedApply is not implemented.");
434 }
435 
436 template <typename MatrixType>
437 std::ostream&
439  Teuchos::FancyOStream fos(Teuchos::rcp(&os, false));
440  fos.setOutputToRootOnly(0);
441  describe(fos);
442  return os;
443 }
444 
445 template <typename MatrixType>
446 std::string
448  std::ostringstream oss;
450  if (this->isInitialized()) {
451  if (this->isComputed()) {
452  oss << "{status = initialized, computed";
453  } else {
454  oss << "{status = initialized, not computed";
455  }
456  } else {
457  oss << "{status = not initialized, not computed";
458  }
459 
460  oss << "}";
461  return oss.str();
462 }
463 
464 template <typename MatrixType>
467  const Teuchos::EVerbosityLevel verbLevel) const {
468  using std::endl;
469  if (verbLevel == Teuchos::VERB_NONE) return;
470  os << "================================================================================" << endl
471  << "Ifpack2::BlockTriDiContainer" << endl
472  << "Number of blocks = " << this->numBlocks_ << endl
473  << "isInitialized() = " << this->IsInitialized_ << endl
474  << "isComputed() = " << this->IsComputed_ << endl
475  << "================================================================================" << endl
476  << endl;
477 }
478 
479 template <typename MatrixType>
480 std::string
481 BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>::getName() { return "Ifpack2::BlockTriDiContainer::ImplSimdTag"; }
482 
483 #define IFPACK2_BLOCKTRIDICONTAINER_INSTANT(S, LO, GO, N) \
484  template class Ifpack2::BlockTriDiContainer<Tpetra::RowMatrix<S, LO, GO, N> >;
485 } // namespace Ifpack2
486 #endif
Ifpack2::BlockTriDiContainer class declaration.
T & get(const std::string &name, T def_value)
Store and solve local block tridiagonal linear problems.
Definition: Ifpack2_BlockTriDiContainer_decl.hpp:107
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
#define TEUCHOS_TEST_FOR_EXCEPT_MSG(throw_exception_test, msg)
Definition: Ifpack2_BlockHelper.hpp:389
virtual std::string description() const
basic_FancyOStream & setOutputToRootOnly(const int rootRank)
Interface for creating and solving a set of local linear problems.
Definition: Ifpack2_Container_decl.hpp:79
bool isType(const std::string &name) const
#define TEUCHOS_ASSERT(assertion_test)
Definition: Ifpack2_BlockHelper.hpp:270
bool is_null() const