Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_BlockTriDiContainer_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4 //
5 // Copyright 2009 NTESS and the Ifpack2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef IFPACK2_BLOCKTRIDICONTAINER_DEF_HPP
11 #define IFPACK2_BLOCKTRIDICONTAINER_DEF_HPP
12 
14 
15 #include <Tpetra_Distributor.hpp>
16 #include <Tpetra_BlockMultiVector.hpp>
17 #include <Tpetra_BlockCrsMatrix_Helpers.hpp>
18 
19 #include <Kokkos_ArithTraits.hpp>
20 #include <KokkosBatched_Util.hpp>
21 #include <KokkosBatched_Vector.hpp>
22 #include <KokkosBatched_AddRadial_Decl.hpp>
23 #include <KokkosBatched_AddRadial_Impl.hpp>
24 #include <KokkosBatched_Gemm_Decl.hpp>
25 #include <KokkosBatched_Gemm_Serial_Impl.hpp>
26 #include <KokkosBatched_Gemv_Decl.hpp>
27 #include <KokkosBatched_Trsm_Decl.hpp>
28 #include <KokkosBatched_Trsm_Serial_Impl.hpp>
29 #include <KokkosBatched_Trsv_Decl.hpp>
30 #include <KokkosBatched_Trsv_Serial_Impl.hpp>
31 #include <KokkosBatched_LU_Decl.hpp>
32 #include <KokkosBatched_LU_Serial_Impl.hpp>
33 
35 #include "Ifpack2_BlockTriDiContainer_impl.hpp"
36 
37 #include <memory>
38 
39 
40 namespace Ifpack2 {
41 
45 
46  template <typename MatrixType>
47  void
48  BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>
49  ::initInternal (const Teuchos::RCP<const row_matrix_type>& matrix,
50  const Teuchos::RCP<const import_type>& importer,
51  const bool overlapCommAndComp,
52  const bool useSeqMethod,
53  const int block_size,
54  const bool explicitConversion)
55  {
56  IFPACK2_BLOCKHELPER_TIMER_WITH_FENCE("BlockTriDiContainer::initInternal", initInternal, typename BlockHelperDetails::ImplType<MatrixType>::execution_space);
57 
58  // create pointer of impl
59  {
60  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createImpl", createImpl);
61  impl_ = Teuchos::rcp(new BlockTriDiContainerDetails::ImplObject<MatrixType>());
62  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
63  }
64 
65  using impl_type = BlockHelperDetails::ImplType<MatrixType>;
66  // using block_crs_matrix_type = typename impl_type::tpetra_block_crs_matrix_type;
67 
68  {
69  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::setA", setA);
70  if (explicitConversion) {
71  impl_->A = Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(matrix);
72  if (impl_->A.is_null()) {
74  (block_size == -1, "A pointwise matrix and block_size = -1 were given as inputs.");
75  {
76  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::setA::convertToBlockCrsMatrix", convertToBlockCrsMatrix);
77  impl_->A = Tpetra::convertToBlockCrsMatrix(*Teuchos::rcp_dynamic_cast<const crs_matrix_type>(matrix), block_size, true);
78  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
79  }
80  }
81  }
82  else {
83  impl_->A = matrix;
84  }
85  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
86  }
87 
88  impl_->tpetra_importer = Teuchos::null;
89  impl_->async_importer = Teuchos::null;
90 
91  if (useSeqMethod)
92  {
93  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createBlockCrsTpetraImporter useSeqMethod", useSeqMethod);
94  if (importer.is_null()) // there is no given importer, then create one
95  impl_->tpetra_importer = BlockTriDiContainerDetails::createBlockCrsTpetraImporter<MatrixType>(impl_->A);
96  else
97  impl_->tpetra_importer = importer; // if there is a given importer, use it
98  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
99  }
100  else
101  {
102  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createBlockCrsTpetraImporter", createBlockCrsTpetraImporter);
103  //Leave tpetra_importer null even if user provided an importer.
104  //It is not used in the performant codepath (!useSeqMethod)
105  impl_->async_importer = BlockTriDiContainerDetails::createBlockCrsAsyncImporter<MatrixType>(impl_->A);
106  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
107  }
108 
109  // as a result, there are
110  // 1) tpetra_importer is null , async_importer is null (no need for importer)
111  // 2) tpetra_importer is NOT null , async_importer is null (sequential method is used)
112  // 3) tpetra_importer is null , async_importer is NOT null (async method is used)
113 
114  // temporary disabling
115  impl_->overlap_communication_and_computation = overlapCommAndComp;
116 
117  {
118  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createZ", createZ);
119  impl_->Z = typename impl_type::tpetra_multivector_type();
120  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
121  }
122  {
123  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createW", createW);
124  impl_->W = typename impl_type::impl_scalar_type_1d_view();
125  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
126  }
127 
128  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
129  }
130 
131  template <typename MatrixType>
132  void
133  BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>
134  ::clearInternal ()
135  {
136  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::clearInternal", clearInternal);
137  using impl_type = BlockHelperDetails::ImplType<MatrixType>;
138  using part_interface_type = BlockHelperDetails::PartInterface<MatrixType>;
139  using block_tridiags_type = BlockTriDiContainerDetails::BlockTridiags<MatrixType>;
140  using amd_type = BlockHelperDetails::AmD<MatrixType>;
141  using norm_manager_type = BlockHelperDetails::NormManager<MatrixType>;
142 
143  impl_->A = Teuchos::null;
144  impl_->tpetra_importer = Teuchos::null;
145  impl_->async_importer = Teuchos::null;
146 
147  impl_->Z = typename impl_type::tpetra_multivector_type();
148  impl_->W = typename impl_type::impl_scalar_type_1d_view();
149 
150  impl_->part_interface = part_interface_type();
151  impl_->block_tridiags = block_tridiags_type();
152  impl_->a_minus_d = amd_type();
153  impl_->work = typename impl_type::vector_type_1d_view();
154  impl_->norm_manager = norm_manager_type();
155 
156  impl_ = Teuchos::null;
157  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
158  }
159 
160  template <typename MatrixType>
164  const Teuchos::RCP<const import_type>& importer,
165  bool pointIndexed)
166  : Container<MatrixType>(matrix, partitions, pointIndexed), partitions_(partitions)
167  {
168  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::BlockTriDiContainer", BlockTriDiContainer);
169  const bool useSeqMethod = false;
170  const bool overlapCommAndComp = false;
171  initInternal(matrix, importer, overlapCommAndComp, useSeqMethod);
172  impl_->use_fused_jacobi = shouldUseFusedBlockJacobi(matrix, partitions, useSeqMethod);
173  n_subparts_per_part_ = -1;
174  block_size_ = -1;
175  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
176  }
177 
178  template <typename MatrixType>
182  const int n_subparts_per_part,
183  const bool overlapCommAndComp,
184  const bool useSeqMethod,
185  const int block_size,
186  const bool explicitConversion)
187  : Container<MatrixType>(matrix, partitions, false), partitions_(partitions)
188  {
189  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::BlockTriDiContainer", BlockTriDiContainer);
190  initInternal(matrix, Teuchos::null, overlapCommAndComp, useSeqMethod, block_size, explicitConversion);
191  impl_->use_fused_jacobi = shouldUseFusedBlockJacobi(matrix, partitions, useSeqMethod);
192  n_subparts_per_part_ = n_subparts_per_part;
193  block_size_ = block_size;
194  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
195  }
196 
197  template <typename MatrixType>
202  bool useSeqMethod)
203  {
205  auto matrixBCRS = Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(matrix);
206  bool hasBlockCrs = !matrixBCRS.is_null();
207  bool onePartitionPerRow = hasBlockCrs && size_t(partitions.size()) == matrixBCRS->getLocalNumRows();
208  constexpr bool isGPU = BlockHelperDetails::is_device<typename Helpers::execution_space>::value;
209  // The fused block Jacobi solve is actually slower on V100
210 #ifdef KOKKOS_ARCH_VOLTA
211  constexpr bool isVolta = true;
212 #else
213  constexpr bool isVolta = false;
214 #endif
215  // Jacobi case can be represented in two ways:
216  // - partitions is empty
217  // - partitions has length equal to local number of rows (meaning all line lengths must be 1)
218  return isGPU && !isVolta && !useSeqMethod && hasBlockCrs && (!partitions.size() || onePartitionPerRow);
219  }
220 
221  template <typename MatrixType>
224  {
225  }
226 
227  template <typename MatrixType>
228  void
231  {
232  if (List.isType<int>("partitioner: subparts per part"))
233  n_subparts_per_part_ = List.get<int>("partitioner: subparts per part");
234  if (List.isType<int>("partitioner: block size"))
235  block_size_ = List.get<int>("partitioner: block size");
236  }
237 
238  template <typename MatrixType>
239  void
242  {
243  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::initialize", initialize);
244  this->IsInitialized_ = true;
245  {
246  auto bA = Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(impl_->A);
247  if (bA.is_null()) {
249  (block_size_ == -1, "A pointwise matrix and block_size = -1 were given as inputs.");
250  {
251  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::initialize::getBlockCrsGraph", getBlockCrsGraph);
252  auto A = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(impl_->A);
253  impl_->blockGraph = Tpetra::getBlockCrsGraph(*A, block_size_, true);
254  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
255  }
256  }
257  else {
258  impl_->blockGraph = Teuchos::rcpFromRef(bA->getCrsGraph());
259  }
260  }
261 
262  {
263  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::createPartInterfaceBlockTridiagsNormManager", createPartInterfaceBlockTridiagsNormManager);
264  impl_->part_interface = BlockTriDiContainerDetails::createPartInterface<MatrixType>(impl_->A, impl_->blockGraph, partitions_, n_subparts_per_part_);
265  impl_->block_tridiags = BlockTriDiContainerDetails::createBlockTridiags<MatrixType>(impl_->part_interface);
266  impl_->norm_manager = BlockHelperDetails::NormManager<MatrixType>(impl_->A->getComm());
267  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
268  }
269 
270  // We assume that if you called this method, you intend to recompute
271  // everything.
272  this->IsComputed_ = false;
273  TEUCHOS_ASSERT(!impl_->A.is_null()); // when initInternal is called, A_ must be set
274  {
275  bool useSeqMethod = !impl_->tpetra_importer.is_null();
276  BlockTriDiContainerDetails::performSymbolicPhase<MatrixType>
277  (impl_->A,
278  impl_->blockGraph,
279  impl_->part_interface,
280  impl_->block_tridiags,
281  impl_->a_minus_d,
282  impl_->overlap_communication_and_computation,
283  impl_->async_importer, useSeqMethod, impl_->use_fused_jacobi);
284  }
285  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
286  }
287 
288  template <typename MatrixType>
289  void
292  {
293  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::compute", compute);
294  this->IsComputed_ = false;
295  if (!this->isInitialized())
296  this->initialize();
297  {
298  BlockTriDiContainerDetails::performNumericPhase<MatrixType>
299  (impl_->A,
300  impl_->blockGraph,
301  impl_->part_interface, impl_->block_tridiags,
302  Kokkos::ArithTraits<magnitude_type>::zero(),
303  impl_->use_fused_jacobi);
304  }
305  this->IsComputed_ = true;
306  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
307  }
308 
309  template <typename MatrixType>
310  void
313  {
314  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::clearBlocks", clearBlocks);
315  clearInternal();
316  this->IsInitialized_ = false;
317  this->IsComputed_ = false;
319  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
320  }
321 
322  template <typename MatrixType>
323  void
324  BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>
325  ::applyInverseJacobi (const mv_type& X, mv_type& Y, scalar_type dampingFactor,
326  bool zeroStartingSolution, int numSweeps) const
327  {
328  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::applyInverseJacobi", applyInverseJacobi);
329  const magnitude_type tol = Kokkos::ArithTraits<magnitude_type>::zero();
330  const int check_tol_every = 1;
331 
332  if(!impl_->use_fused_jacobi) {
333  BlockTriDiContainerDetails::applyInverseJacobi<MatrixType>
334  (impl_->A,
335  impl_->blockGraph,
336  impl_->tpetra_importer,
337  impl_->async_importer,
338  impl_->overlap_communication_and_computation,
339  X, Y, impl_->Z, impl_->W,
340  impl_->part_interface, impl_->block_tridiags, impl_->a_minus_d,
341  impl_->work,
342  impl_->norm_manager,
343  dampingFactor,
344  zeroStartingSolution,
345  numSweeps,
346  tol,
347  check_tol_every);
348  }
349  else {
350  BlockTriDiContainerDetails::applyFusedBlockJacobi<MatrixType>
351  (impl_->tpetra_importer,
352  impl_->async_importer,
353  impl_->overlap_communication_and_computation,
354  X, Y, impl_->W,
355  impl_->part_interface, impl_->block_tridiags, impl_->a_minus_d,
356  impl_->work_flat,
357  impl_->norm_manager,
358  dampingFactor,
359  zeroStartingSolution,
360  numSweeps,
361  tol,
362  check_tol_every);
363  }
364  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
365  }
366 
367  template <typename MatrixType>
368  typename BlockTriDiContainer<MatrixType, BlockTriDiContainerDetails::ImplSimdTag>::ComputeParameters
371  {
372  return ComputeParameters();
373  }
374 
375  template <typename MatrixType>
376  void
378  ::compute (const ComputeParameters& in)
379  {
380  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::compute", compute);
381  this->IsComputed_ = false;
382  if (!this->isInitialized())
383  this->initialize();
384  {
385  BlockTriDiContainerDetails::performNumericPhase<MatrixType>
386  (impl_->A,
387  impl_->blockGraph,
388  impl_->part_interface, impl_->block_tridiags,
389  in.addRadiallyToDiagonal,
390  impl_->use_fused_jacobi);
391  }
392  this->IsComputed_ = true;
393  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
394  }
395 
396  template <typename MatrixType>
400  {
401  ApplyParameters in;
402  in.dampingFactor = Teuchos::ScalarTraits<scalar_type>::one();
403  return in;
404  }
405 
406  template <typename MatrixType>
407  int
409  ::applyInverseJacobi (const mv_type& X, mv_type& Y,
410  const ApplyParameters& in) const
411  {
412  IFPACK2_BLOCKHELPER_TIMER("BlockTriDiContainer::applyInverseJacobi", applyInverseJacobi);
413  int r_val = 0;
414  {
415  if(!impl_->use_fused_jacobi) {
416  r_val = BlockTriDiContainerDetails::applyInverseJacobi<MatrixType>
417  (impl_->A,
418  impl_->blockGraph,
419  impl_->tpetra_importer,
420  impl_->async_importer,
421  impl_->overlap_communication_and_computation,
422  X, Y, impl_->Z, impl_->W,
423  impl_->part_interface, impl_->block_tridiags, impl_->a_minus_d,
424  impl_->work,
425  impl_->norm_manager,
426  in.dampingFactor,
427  in.zeroStartingSolution,
428  in.maxNumSweeps,
429  in.tolerance,
430  in.checkToleranceEvery);
431  }
432  else {
433  r_val = BlockTriDiContainerDetails::applyFusedBlockJacobi<MatrixType>
434  (impl_->tpetra_importer,
435  impl_->async_importer,
436  impl_->overlap_communication_and_computation,
437  X, Y, impl_->W,
438  impl_->part_interface, impl_->block_tridiags, impl_->a_minus_d,
439  impl_->work_flat,
440  impl_->norm_manager,
441  in.dampingFactor,
442  in.zeroStartingSolution,
443  in.maxNumSweeps,
444  in.tolerance,
445  in.checkToleranceEvery);
446  }
447  }
448  IFPACK2_BLOCKHELPER_TIMER_FENCE(typename BlockHelperDetails::ImplType<MatrixType>::execution_space)
449  return r_val;
450  }
451 
452  template <typename MatrixType>
455  ::getNorms0 () const {
456  return impl_->norm_manager.getNorms0();
457  }
458 
459  template <typename MatrixType>
463  return impl_->norm_manager.getNormsFinal();
464  }
465 
466  template <typename MatrixType>
467  void
469  ::apply (ConstHostView /* X */, HostView /* Y */, int /* blockIndex */, Teuchos::ETransp /* mode */,
470  scalar_type /* alpha */, scalar_type /* beta */) const
471  {
472  TEUCHOS_TEST_FOR_EXCEPT_MSG(true, "BlockTriDiContainer::apply is not implemented. You may have reached this message "
473  << "because you want to use this container's performance-portable Jacobi iteration. In "
474  << "that case, set \"relaxation: type\" to \"MT Split Jacobi\" rather than \"Jacobi\".");
475  }
476 
477  template <typename MatrixType>
478  void
480  ::weightedApply (ConstHostView /* X */, HostView /* Y */, ConstHostView /* D */, int /* blockIndex */,
481  Teuchos::ETransp /* mode */, scalar_type /* alpha */, scalar_type /* beta */) const
482  {
483  TEUCHOS_TEST_FOR_EXCEPT_MSG(true, "BlockTriDiContainer::weightedApply is not implemented.");
484  }
485 
486  template <typename MatrixType>
487  std::ostream&
489  ::print (std::ostream& os) const
490  {
491  Teuchos::FancyOStream fos(Teuchos::rcp(&os,false));
492  fos.setOutputToRootOnly(0);
493  describe(fos);
494  return os;
495  }
496 
497  template <typename MatrixType>
498  std::string
501  {
502  std::ostringstream oss;
504  if (this->isInitialized()) {
505  if (this->isComputed()) {
506  oss << "{status = initialized, computed";
507  }
508  else {
509  oss << "{status = initialized, not computed";
510  }
511  }
512  else {
513  oss << "{status = not initialized, not computed";
514  }
515 
516  oss << "}";
517  return oss.str();
518  }
519 
520  template <typename MatrixType>
521  void
524  const Teuchos::EVerbosityLevel verbLevel) const
525  {
526  using std::endl;
527  if(verbLevel==Teuchos::VERB_NONE) return;
528  os << "================================================================================" << endl
529  << "Ifpack2::BlockTriDiContainer" << endl
530  << "Number of blocks = " << this->numBlocks_ << endl
531  << "isInitialized() = " << this->IsInitialized_ << endl
532  << "isComputed() = " << this->IsComputed_ << endl
533  << "================================================================================" << endl
534  << endl;
535  }
536 
537  template <typename MatrixType>
538  std::string
540  ::getName() { return "Ifpack2::BlockTriDiContainer::ImplSimdTag"; }
541 
542 #define IFPACK2_BLOCKTRIDICONTAINER_INSTANT(S,LO,GO,N) \
543  template class Ifpack2::BlockTriDiContainer< Tpetra::RowMatrix<S, LO, GO, N> >;
544 }
545 #endif
Ifpack2::BlockTriDiContainer class declaration.
T & get(const std::string &name, T def_value)
Store and solve local block tridiagonal linear problems.
Definition: Ifpack2_BlockTriDiContainer_decl.hpp:101
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
#define TEUCHOS_TEST_FOR_EXCEPT_MSG(throw_exception_test, msg)
Definition: Ifpack2_BlockHelper.hpp:353
~BlockTriDiContainer() override
Destructor (declared virtual for memory safety of derived classes).
Definition: Ifpack2_BlockTriDiContainer_def.hpp:223
BlockTriDiContainer(const Teuchos::RCP< const row_matrix_type > &matrix, const Teuchos::Array< Teuchos::Array< local_ordinal_type > > &partitions, const Teuchos::RCP< const import_type > &importer, bool pointIndexed)
Constructor.
Definition: Ifpack2_BlockTriDiContainer_def.hpp:162
virtual std::string description() const
basic_FancyOStream & setOutputToRootOnly(const int rootRank)
Interface for creating and solving a set of local linear problems.
Definition: Ifpack2_Container_decl.hpp:79
bool isType(const std::string &name) const
ComputeParameters createDefaultComputeParameters() const
Create a ComputeParameters struct with default values.
Definition: Ifpack2_BlockTriDiContainer_def.hpp:370
#define TEUCHOS_ASSERT(assertion_test)
Definition: Ifpack2_BlockHelper.hpp:249
bool is_null() const