Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_BlockTriDiContainer_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4 //
5 // Copyright 2009 NTESS and the Ifpack2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef IFPACK2_BLOCKTRIDICONTAINER_DEF_HPP
11 #define IFPACK2_BLOCKTRIDICONTAINER_DEF_HPP
12 
14 
15 #include <Tpetra_Distributor.hpp>
16 #include <Tpetra_BlockMultiVector.hpp>
17 #include <Tpetra_BlockCrsMatrix_Helpers.hpp>
18 
19 #include <Kokkos_ArithTraits.hpp>
20 #include <KokkosBatched_Util.hpp>
21 #include <KokkosBatched_Vector.hpp>
22 #include <KokkosBatched_AddRadial_Decl.hpp>
23 #include <KokkosBatched_AddRadial_Impl.hpp>
24 #include <KokkosBatched_Gemm_Decl.hpp>
25 #include <KokkosBatched_Gemm_Serial_Impl.hpp>
26 #include <KokkosBatched_Gemv_Decl.hpp>
27 #include <KokkosBatched_Trsm_Decl.hpp>
28 #include <KokkosBatched_Trsm_Serial_Impl.hpp>
29 #include <KokkosBatched_Trsv_Decl.hpp>
30 #include <KokkosBatched_Trsv_Serial_Impl.hpp>
31 #include <KokkosBatched_LU_Decl.hpp>
32 #include <KokkosBatched_LU_Serial_Impl.hpp>
33 
35 #include "Ifpack2_BlockTriDiContainer_impl.hpp"
36 
37 #include <memory>
38 
39 namespace Ifpack2 {
40 
44 
45 template <typename MatrixType>
46 void BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>::initInternal(const Teuchos::RCP<const row_matrix_type>& matrix,
47  const Teuchos::RCP<const import_type>& importer,
48  const bool overlapCommAndComp,
49  const bool useSeqMethod,
50  const int block_size,
51  const bool explicitConversion) {
52  IFPACK2_BLOCKHELPER_TIMER_WITH_FENCE("BlockTriDiContainer::initInternal", initInternal, typename BlockHelperDetails::ImplType<MatrixType>::execution_space);
53 
54  // create pointer of impl
55  {
56  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createImpl", createImpl);
57  impl_ = Teuchos::rcp(new BlockTriDiContainerDetails::ImplObject<MatrixType>());
58  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
59  }
60 
61  using impl_type = BlockHelperDetails::ImplType<MatrixType>;
62  // using block_crs_matrix_type = typename impl_type::tpetra_block_crs_matrix_type;
63 
64  {
65  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::setA", setA);
66  if (explicitConversion) {
67  impl_->A = Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(matrix);
68  if (impl_->A.is_null()) {
69  TEUCHOS_TEST_FOR_EXCEPT_MSG(block_size == -1, "A pointwise matrix and block_size = -1 were given as inputs.");
70  {
71  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::setA::convertToBlockCrsMatrix", convertToBlockCrsMatrix);
72  impl_->A = Tpetra::convertToBlockCrsMatrix(*Teuchos::rcp_dynamic_cast<const crs_matrix_type>(matrix), block_size, true);
73  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
74  }
75  }
76  } else {
77  impl_->A = matrix;
78  }
79  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
80  }
81 
82  impl_->tpetra_importer = Teuchos::null;
83  impl_->async_importer = Teuchos::null;
84 
85  if (useSeqMethod) {
86  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createBlockCrsTpetraImporter useSeqMethod", useSeqMethod);
87  if (importer.is_null()) // there is no given importer, then create one
88  impl_->tpetra_importer = BlockTriDiContainerDetails::createBlockCrsTpetraImporter<MatrixType>(impl_->A);
89  else
90  impl_->tpetra_importer = importer; // if there is a given importer, use it
91  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
92  } else {
93  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createBlockCrsTpetraImporter", createBlockCrsTpetraImporter);
94  // Leave tpetra_importer null even if user provided an importer.
95  // It is not used in the performant codepath (!useSeqMethod)
96  impl_->async_importer = BlockTriDiContainerDetails::createBlockCrsAsyncImporter<MatrixType>(impl_->A);
97  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
98  }
99 
100  // as a result, there are
101  // 1) tpetra_importer is null , async_importer is null (no need for importer)
102  // 2) tpetra_importer is NOT null , async_importer is null (sequential method is used)
103  // 3) tpetra_importer is null , async_importer is NOT null (async method is used)
104 
105  // temporary disabling
106  impl_->overlap_communication_and_computation = overlapCommAndComp;
107 
108  {
109  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createZ", createZ);
110  impl_->Z = typename impl_type::tpetra_multivector_type();
111  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
112  }
113  {
114  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createW", createW);
115  impl_->W = typename impl_type::impl_scalar_type_1d_view();
116  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
117  }
118 
119  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
120 }
121 
122 template <typename MatrixType>
123 void BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>::clearInternal() {
124  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::clearInternal", clearInternal);
125  using impl_type = BlockHelperDetails::ImplType<MatrixType>;
126  using part_interface_type = BlockHelperDetails::PartInterface<MatrixType>;
127  using block_tridiags_type = BlockTriDiContainerDetails::BlockTridiags<MatrixType>;
128  using amd_type = BlockHelperDetails::AmD<MatrixType>;
129  using norm_manager_type = BlockHelperDetails::NormManager<MatrixType>;
130 
131  impl_->A = Teuchos::null;
132  impl_->tpetra_importer = Teuchos::null;
133  impl_->async_importer = Teuchos::null;
134 
135  impl_->Z = typename impl_type::tpetra_multivector_type();
136  impl_->W = typename impl_type::impl_scalar_type_1d_view();
137 
138  impl_->part_interface = part_interface_type();
139  impl_->block_tridiags = block_tridiags_type();
140  impl_->a_minus_d = amd_type();
141  impl_->work = typename impl_type::vector_type_1d_view();
142  impl_->norm_manager = norm_manager_type();
143 
144  impl_ = Teuchos::null;
145  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
146 }
147 
148 template <typename MatrixType>
151  const Teuchos::RCP<const import_type>& importer,
152  bool pointIndexed)
153  : Container<MatrixType>(matrix, partitions, pointIndexed)
154  , partitions_(partitions) {
155  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::BlockTriDiContainer", BlockTriDiContainer);
156  const bool useSeqMethod = false;
157  const bool overlapCommAndComp = false;
158  initInternal(matrix, importer, overlapCommAndComp, useSeqMethod);
159  impl_->use_fused_jacobi = shouldUseFusedBlockJacobi(matrix, partitions, useSeqMethod);
160  n_subparts_per_part_ = -1;
161  block_size_ = -1;
162  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
163 }
164 
165 template <typename MatrixType>
168  const int n_subparts_per_part,
169  const bool overlapCommAndComp,
170  const bool useSeqMethod,
171  const int block_size,
172  const bool explicitConversion)
173  : Container<MatrixType>(matrix, partitions, false)
174  , partitions_(partitions) {
175  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::BlockTriDiContainer", BlockTriDiContainer);
176  initInternal(matrix, Teuchos::null, overlapCommAndComp, useSeqMethod, block_size, explicitConversion);
177  impl_->use_fused_jacobi = shouldUseFusedBlockJacobi(matrix, partitions, useSeqMethod);
178  n_subparts_per_part_ = n_subparts_per_part;
179  block_size_ = block_size;
180  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
181 }
182 
183 template <typename MatrixType>
187  bool useSeqMethod) {
189  auto matrixBCRS = Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(matrix);
190  bool hasBlockCrs = !matrixBCRS.is_null();
191  bool onePartitionPerRow = hasBlockCrs && size_t(partitions.size()) == matrixBCRS->getLocalNumRows();
192  constexpr bool isGPU = BlockHelperDetails::is_device<typename Helpers::execution_space>::value;
193  // The fused block Jacobi solve is actually slower on V100
194 #ifdef KOKKOS_ARCH_VOLTA
195  constexpr bool isVolta = true;
196 #else
197  constexpr bool isVolta = false;
198 #endif
199  // Jacobi case can be represented in two ways:
200  // - partitions is empty
201  // - partitions has length equal to local number of rows (meaning all line lengths must be 1)
202  return isGPU && !isVolta && !useSeqMethod && hasBlockCrs && (!partitions.size() || onePartitionPerRow);
203 }
204 
205 template <typename MatrixType>
207 }
208 
209 template <typename MatrixType>
211  if (List.isType<int>("partitioner: subparts per part"))
212  n_subparts_per_part_ = List.get<int>("partitioner: subparts per part");
213  if (List.isType<int>("partitioner: block size"))
214  block_size_ = List.get<int>("partitioner: block size");
215 }
216 
217 template <typename MatrixType>
219  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::initialize", initialize);
220  this->IsInitialized_ = true;
221  {
222  auto bA = Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(impl_->A);
223  if (bA.is_null()) {
224  TEUCHOS_TEST_FOR_EXCEPT_MSG(block_size_ == -1, "A pointwise matrix and block_size = -1 were given as inputs.");
225  {
226  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::initialize::getBlockCrsGraph", getBlockCrsGraph);
227  auto A = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(impl_->A);
228  impl_->blockGraph = Tpetra::getBlockCrsGraph(*A, block_size_, true);
229  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
230  }
231  } else {
232  impl_->blockGraph = Teuchos::rcpFromRef(bA->getCrsGraph());
233  }
234  }
235 
236  {
237  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createPartInterfaceBlockTridiagsNormManager", createPartInterfaceBlockTridiagsNormManager);
238  impl_->part_interface = BlockTriDiContainerDetails::createPartInterface<MatrixType>(impl_->A, impl_->blockGraph, partitions_, n_subparts_per_part_);
239  impl_->block_tridiags = BlockTriDiContainerDetails::createBlockTridiags<MatrixType>(impl_->part_interface);
240  impl_->norm_manager = BlockHelperDetails::NormManager<MatrixType>(impl_->A->getComm());
241  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
242  }
243 
244  // We assume that if you called this method, you intend to recompute
245  // everything.
246  this->IsComputed_ = false;
247  TEUCHOS_ASSERT(!impl_->A.is_null()); // when initInternal is called, A_ must be set
248  {
249  bool useSeqMethod = !impl_->tpetra_importer.is_null();
250  BlockTriDiContainerDetails::performSymbolicPhase<MatrixType>(impl_->A,
251  impl_->blockGraph,
252  impl_->part_interface,
253  impl_->block_tridiags,
254  impl_->a_minus_d,
255  impl_->overlap_communication_and_computation,
256  impl_->async_importer, useSeqMethod, impl_->use_fused_jacobi);
257  }
258  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
259 }
260 
261 template <typename MatrixType>
263  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::compute", compute);
264  this->IsComputed_ = false;
265  if (!this->isInitialized())
266  this->initialize();
267  {
268  BlockTriDiContainerDetails::performNumericPhase<MatrixType>(impl_->A,
269  impl_->blockGraph,
270  impl_->part_interface, impl_->block_tridiags,
271  Kokkos::ArithTraits<magnitude_type>::zero(),
272  impl_->use_fused_jacobi);
273  }
274  this->IsComputed_ = true;
275  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
276 }
277 
278 template <typename MatrixType>
280  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::clearBlocks", clearBlocks);
281  clearInternal();
282  this->IsInitialized_ = false;
283  this->IsComputed_ = false;
285  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
286 }
287 
288 template <typename MatrixType>
289 void BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>::applyInverseJacobi(const mv_type& X, mv_type& Y, scalar_type dampingFactor,
290  bool zeroStartingSolution, int numSweeps) const {
291  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::applyInverseJacobi", applyInverseJacobi);
292  const magnitude_type tol = Kokkos::ArithTraits<magnitude_type>::zero();
293  const int check_tol_every = 1;
294 
295  if (!impl_->use_fused_jacobi) {
296  BlockTriDiContainerDetails::applyInverseJacobi<MatrixType>(impl_->A,
297  impl_->blockGraph,
298  impl_->tpetra_importer,
299  impl_->async_importer,
300  impl_->overlap_communication_and_computation,
301  X, Y, impl_->Z, impl_->W,
302  impl_->part_interface, impl_->block_tridiags, impl_->a_minus_d,
303  impl_->work,
304  impl_->norm_manager,
305  dampingFactor,
306  zeroStartingSolution,
307  numSweeps,
308  tol,
309  check_tol_every);
310  } else {
311  BlockTriDiContainerDetails::applyFusedBlockJacobi<MatrixType>(impl_->tpetra_importer,
312  impl_->async_importer,
313  impl_->overlap_communication_and_computation,
314  X, Y, impl_->W,
315  impl_->part_interface, impl_->block_tridiags, impl_->a_minus_d,
316  impl_->work_flat,
317  impl_->norm_manager,
318  dampingFactor,
319  zeroStartingSolution,
320  numSweeps,
321  tol,
322  check_tol_every);
323  }
324  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
325 }
326 
327 template <typename MatrixType>
328 typename BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>::ComputeParameters
330  return ComputeParameters();
331 }
332 
333 template <typename MatrixType>
335  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::compute", compute);
336  this->IsComputed_ = false;
337  if (!this->isInitialized())
338  this->initialize();
339  {
340  BlockTriDiContainerDetails::performNumericPhase<MatrixType>(impl_->A,
341  impl_->blockGraph,
342  impl_->part_interface, impl_->block_tridiags,
343  in.addRadiallyToDiagonal,
344  impl_->use_fused_jacobi);
345  }
346  this->IsComputed_ = true;
347  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
348 }
349 
350 template <typename MatrixType>
353  ApplyParameters in;
354  in.dampingFactor = Teuchos::ScalarTraits<scalar_type>::one();
355  return in;
356 }
357 
358 template <typename MatrixType>
360  const ApplyParameters& in) const {
361  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::applyInverseJacobi", applyInverseJacobi);
362  int r_val = 0;
363  {
364  if (!impl_->use_fused_jacobi) {
365  r_val = BlockTriDiContainerDetails::applyInverseJacobi<MatrixType>(impl_->A,
366  impl_->blockGraph,
367  impl_->tpetra_importer,
368  impl_->async_importer,
369  impl_->overlap_communication_and_computation,
370  X, Y, impl_->Z, impl_->W,
371  impl_->part_interface, impl_->block_tridiags, impl_->a_minus_d,
372  impl_->work,
373  impl_->norm_manager,
374  in.dampingFactor,
375  in.zeroStartingSolution,
376  in.maxNumSweeps,
377  in.tolerance,
378  in.checkToleranceEvery);
379  } else {
380  r_val = BlockTriDiContainerDetails::applyFusedBlockJacobi<MatrixType>(impl_->tpetra_importer,
381  impl_->async_importer,
382  impl_->overlap_communication_and_computation,
383  X, Y, impl_->W,
384  impl_->part_interface, impl_->block_tridiags, impl_->a_minus_d,
385  impl_->work_flat,
386  impl_->norm_manager,
387  in.dampingFactor,
388  in.zeroStartingSolution,
389  in.maxNumSweeps,
390  in.tolerance,
391  in.checkToleranceEvery);
392  }
393  }
394  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
395  return r_val;
396 }
397 
398 template <typename MatrixType>
401  return impl_->norm_manager.getNorms0();
402 }
403 
404 template <typename MatrixType>
407  return impl_->norm_manager.getNormsFinal();
408 }
409 
410 template <typename MatrixType>
411 void BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>::apply(ConstHostView /* X */, HostView /* Y */, int /* blockIndex */, Teuchos::ETransp /* mode */,
412  scalar_type /* alpha */, scalar_type /* beta */) const {
413  TEUCHOS_TEST_FOR_EXCEPT_MSG(true, "BlockTriDiContainer::apply is not implemented. You may have reached this message "
414  << "because you want to use this container's performance-portable Jacobi iteration. In "
415  << "that case, set \"relaxation: type\" to \"MT Split Jacobi\" rather than \"Jacobi\".");
416 }
417 
418 template <typename MatrixType>
419 void BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>::weightedApply(ConstHostView /* X */, HostView /* Y */, ConstHostView /* D */, int /* blockIndex */,
420  Teuchos::ETransp /* mode */, scalar_type /* alpha */, scalar_type /* beta */) const {
421  TEUCHOS_TEST_FOR_EXCEPT_MSG(true, "BlockTriDiContainer::weightedApply is not implemented.");
422 }
423 
424 template <typename MatrixType>
425 std::ostream&
427  Teuchos::FancyOStream fos(Teuchos::rcp(&os, false));
428  fos.setOutputToRootOnly(0);
429  describe(fos);
430  return os;
431 }
432 
433 template <typename MatrixType>
434 std::string
436  std::ostringstream oss;
438  if (this->isInitialized()) {
439  if (this->isComputed()) {
440  oss << "{status = initialized, computed";
441  } else {
442  oss << "{status = initialized, not computed";
443  }
444  } else {
445  oss << "{status = not initialized, not computed";
446  }
447 
448  oss << "}";
449  return oss.str();
450 }
451 
452 template <typename MatrixType>
455  const Teuchos::EVerbosityLevel verbLevel) const {
456  using std::endl;
457  if (verbLevel == Teuchos::VERB_NONE) return;
458  os << "================================================================================" << endl
459  << "Ifpack2::BlockTriDiContainer" << endl
460  << "Number of blocks = " << this->numBlocks_ << endl
461  << "isInitialized() = " << this->IsInitialized_ << endl
462  << "isComputed() = " << this->IsComputed_ << endl
463  << "================================================================================" << endl
464  << endl;
465 }
466 
467 template <typename MatrixType>
468 std::string
469 BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>::getName() { return "Ifpack2::BlockTriDiContainer::ImplSimdTag"; }
470 
471 #define IFPACK2_BLOCKTRIDICONTAINER_INSTANT(S, LO, GO, N) \
472  template class Ifpack2::BlockTriDiContainer<Tpetra::RowMatrix<S, LO, GO, N> >;
473 } // namespace Ifpack2
474 #endif
Ifpack2::BlockTriDiContainer class declaration.
T & get(const std::string &name, T def_value)
Store and solve local block tridiagonal linear problems.
Definition: Ifpack2_BlockTriDiContainer_decl.hpp:107
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
#define TEUCHOS_TEST_FOR_EXCEPT_MSG(throw_exception_test, msg)
Definition: Ifpack2_BlockHelper.hpp:377
virtual std::string description() const
basic_FancyOStream & setOutputToRootOnly(const int rootRank)
Interface for creating and solving a set of local linear problems.
Definition: Ifpack2_Container_decl.hpp:79
bool isType(const std::string &name) const
#define TEUCHOS_ASSERT(assertion_test)
Definition: Ifpack2_BlockHelper.hpp:270
bool is_null() const