Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_Details_AdditiveSchwarzFilter_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
4 //
5 // Copyright 2009 NTESS and the Ifpack2 contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef IFPACK2_ADDITIVE_SCHWARZ_FILTER_DEF_HPP
11 #define IFPACK2_ADDITIVE_SCHWARZ_FILTER_DEF_HPP
12 
13 #include "Ifpack2_Details_AdditiveSchwarzFilter_decl.hpp"
14 #include "KokkosKernels_Sorting.hpp"
15 #include "KokkosSparse_SortCrs.hpp"
16 #include "Kokkos_Bitset.hpp"
17 
18 namespace Ifpack2
19 {
20 namespace Details
21 {
22 
23 template<class MatrixType>
27  const Teuchos::ArrayRCP<local_ordinal_type>& reverseperm,
28  bool filterSingletons)
29 {
30  setup(A_unfiltered, perm, reverseperm, filterSingletons);
31 }
32 
33 template<class MatrixType>
35 setup(const Teuchos::RCP<const row_matrix_type>& A_unfiltered,
37  const Teuchos::ArrayRCP<local_ordinal_type>& reverseperm,
38  bool filterSingletons)
39 {
40  using Teuchos::RCP;
41  using Teuchos::rcp;
42  using Teuchos::rcp_dynamic_cast;
43  //Check that A is an instance of allowed types, either:
44  // - Tpetra::CrsMatrix
45  // - Ifpack2::OverlappingRowMatrix (which always consists of two CrsMatrices, A_ and ExtMatrix_)
47  !rcp_dynamic_cast<const crs_matrix_type>(A_unfiltered) &&
48  !rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered),
49  std::invalid_argument,
50  "Ifpack2::Details::AdditiveSchwarzFilter: The input matrix must be a Tpetra::CrsMatrix or an Ifpack2::OverlappingRowMatrix");
51  A_unfiltered_ = A_unfiltered;
52  local_matrix_type A_local, A_halo;
53  bool haveHalo = !rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered_).is_null();
54  if(haveHalo)
55  {
56  auto A_overlapping = rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered_);
57  A_local = A_overlapping->getUnderlyingMatrix()->getLocalMatrixDevice();
58  A_halo = A_overlapping->getExtMatrix()->getLocalMatrixDevice();
59  }
60  else
61  {
62  auto A_crs = rcp_dynamic_cast<const crs_matrix_type>(A_unfiltered_);
63  A_local = A_crs->getLocalMatrixDevice();
64  //leave A_halo empty in this case
65  }
66  //Check that perm and reversePerm are the same length and match the number of local rows in A
68  perm.size() != reverseperm.size(),
69  std::invalid_argument,
70  "Ifpack2::Details::AdditiveSchwarzFilter: perm and reverseperm should be the same length");
72  (size_t) perm.size() != (size_t) A_unfiltered_->getLocalNumRows(),
73  std::invalid_argument,
74  "Ifpack2::Details::AdditiveSchwarzFilter: length of perm and reverseperm should be the same as the number of local rows in unfiltered A");
75  //First, compute the permutation tables (that exclude singletons, if requested)
76  FilterSingletons_ = filterSingletons;
77  local_ordinal_type numLocalRows;
78  local_ordinal_type totalLocalRows = A_local.numRows() + A_halo.numRows();
79  if(FilterSingletons_)
80  {
81  //Fill singletons and singletonDiagonals (allocate them to the upper bound at first, then shrink it to size)
82  singletons_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "singletons_"), totalLocalRows);
83  singletonDiagonals_ = Kokkos::DualView<impl_scalar_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "singletonDiagonals_"), totalLocalRows);
84  auto singletonsDevice = singletons_.view_device();
85  singletons_.modify_device();
86  auto singletonDiagonalsDevice = singletonDiagonals_.view_device();
87  singletonDiagonals_.modify_device();
88  local_ordinal_type numSingletons;
89  Kokkos::Bitset<device_type> isSingletonBitset(totalLocalRows);
90  Kokkos::parallel_scan(policy_type(0, totalLocalRows),
91  KOKKOS_LAMBDA(local_ordinal_type i, local_ordinal_type& lnumSingletons, bool finalPass)
92  {
93  bool isSingleton = true;
94  impl_scalar_type singletonValue = Kokkos::ArithTraits<impl_scalar_type>::zero();
95  if(i < A_local.numRows())
96  {
97  //row i is in original local matrix
98  size_type rowBegin = A_local.graph.row_map(i);
99  size_type rowEnd = A_local.graph.row_map(i + 1);
100  for(size_type j = rowBegin; j < rowEnd; j++)
101  {
102  local_ordinal_type entry = A_local.graph.entries(j);
103  if(entry < totalLocalRows && entry != i)
104  {
105  isSingleton = false;
106  break;
107  }
108  if(finalPass && entry == i)
109  {
110  //note: using a sum to compute the diagonal value, in case entries are not compressed.
111  singletonValue += A_local.values(j);
112  }
113  }
114  }
115  else
116  {
117  //row i is in halo
118  local_ordinal_type row = i - A_local.numRows();
119  size_type rowBegin = A_halo.graph.row_map(row);
120  size_type rowEnd = A_halo.graph.row_map(row + 1);
121  for(size_type j = rowBegin; j < rowEnd; j++)
122  {
123  local_ordinal_type entry = A_halo.graph.entries(j);
124  if(entry < totalLocalRows && entry != i)
125  {
126  isSingleton = false;
127  break;
128  }
129  if(finalPass && entry == i)
130  {
131  singletonValue += A_halo.values(j);
132  }
133  }
134  }
135  if(isSingleton)
136  {
137  if(finalPass)
138  {
139  isSingletonBitset.set(i);
140  singletonsDevice(lnumSingletons) = i;
141  singletonDiagonalsDevice(lnumSingletons) = singletonValue;
142  }
143  lnumSingletons++;
144  }
145  }, numSingletons);
146  numSingletons_ = numSingletons;
147  //Each local row in A_unfiltered is either a singleton or a row in the filtered matrix.
148  numLocalRows = totalLocalRows - numSingletons_;
149  //Using the list of singletons, create the reduced permutations.
150  perm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "perm_"), totalLocalRows);
151  perm_.modify_device();
152  reverseperm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "perm_"), numLocalRows);
153  reverseperm_.modify_device();
154  auto permView = perm_.view_device();
155  auto reversepermView = reverseperm_.view_device();
156  //Get the original inverse permutation on device
157  Kokkos::View<local_ordinal_type*, device_type> origpermView(Kokkos::view_alloc(Kokkos::WithoutInitializing, "input reverse perm"), totalLocalRows);
158  Kokkos::View<local_ordinal_type*, Kokkos::HostSpace> origpermHost(reverseperm.get(), totalLocalRows);
159  Kokkos::deep_copy(execution_space(), origpermView, origpermHost);
160  //reverseperm is a list of local rows in A_unfiltered, so filter out the elements of reverseperm where isSingleton is true. Then regenerate the forward permutation.
161  Kokkos::parallel_scan(policy_type(0, totalLocalRows),
162  KOKKOS_LAMBDA(local_ordinal_type i, local_ordinal_type& lindex, bool finalPass)
163  {
164  local_ordinal_type origRow = origpermView(i);
165  if(!isSingletonBitset.test(origRow))
166  {
167  if(finalPass)
168  {
169  //mark the mapping in both directions between origRow and lindex (a row in filtered A)
170  reversepermView(lindex) = origRow;
171  permView(origRow) = lindex;
172  }
173  lindex++;
174  }
175  else
176  {
177  //origRow is a singleton, so mark a -1 in the new forward permutation to show that it has no corresponding row in filtered A.
178  if(finalPass)
179  permView(origRow) = local_ordinal_type(-1);
180  }
181  });
182  perm_.sync_host();
183  reverseperm_.sync_host();
184  }
185  else
186  {
187  //We will keep all the local rows, so use the permutation as is
188  perm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "perm_"), perm.size());
189  perm_.modify_host();
190  auto permHost = perm_.view_host();
191  for(size_t i = 0; i < (size_t) perm.size(); i++)
192  permHost(i) = perm[i];
193  perm_.sync_device();
194  reverseperm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "reverseperm_"), reverseperm.size());
195  reverseperm_.modify_host();
196  auto reversepermHost = reverseperm_.view_host();
197  for(size_t i = 0; i < (size_t) reverseperm.size(); i++)
198  reversepermHost(i) = reverseperm[i];
199  reverseperm_.sync_device();
200  numSingletons_ = 0;
201  numLocalRows = totalLocalRows;
202  }
203  auto permView = perm_.view_device();
204  auto reversepermView = reverseperm_.view_device();
205  //Fill the local matrix of A_ (filtered and permuted)
206  //First, construct the rowmap by counting the entries in each row
207  row_map_type localRowptrs(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Filtered rowptrs"), numLocalRows + 1);
208  Kokkos::parallel_for(policy_type(0, numLocalRows + 1),
209  KOKKOS_LAMBDA(local_ordinal_type i)
210  {
211  if(i == numLocalRows)
212  {
213  localRowptrs(i) = 0;
214  return;
215  }
216  //Count the entries that will be in filtered row i.
217  //This means entries which correspond to local, non-singleton rows/columns.
218  local_ordinal_type numEntries = 0;
219  local_ordinal_type origRow = reversepermView(i);
220  if(origRow < A_local.numRows())
221  {
222  //row i is in original local matrix
223  size_type rowBegin = A_local.graph.row_map(origRow);
224  size_type rowEnd = A_local.graph.row_map(origRow + 1);
225  for(size_type j = rowBegin; j < rowEnd; j++)
226  {
227  local_ordinal_type entry = A_local.graph.entries(j);
228  if(entry < totalLocalRows && permView(entry) != -1)
229  numEntries++;
230  }
231  }
232  else
233  {
234  //row i is in halo
235  local_ordinal_type row = origRow - A_local.numRows();
236  size_type rowBegin = A_halo.graph.row_map(row);
237  size_type rowEnd = A_halo.graph.row_map(row + 1);
238  for(size_type j = rowBegin; j < rowEnd; j++)
239  {
240  local_ordinal_type entry = A_halo.graph.entries(j);
241  if(entry < totalLocalRows && permView(entry) != -1)
242  numEntries++;
243  }
244  }
245  localRowptrs(i) = numEntries;
246  });
247  //Prefix sum to finish computing the rowptrs
248  size_type numLocalEntries;
249  Kokkos::parallel_scan(policy_type(0, numLocalRows + 1),
250  KOKKOS_LAMBDA(local_ordinal_type i, size_type& lnumEntries, bool finalPass)
251  {
252  size_type numEnt = localRowptrs(i);
253  if(finalPass)
254  localRowptrs(i) = lnumEntries;
255  lnumEntries += numEnt;
256  }, numLocalEntries);
257  //Allocate and fill local entries and values
258  entries_type localEntries(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Filtered entries"), numLocalEntries);
259  values_type localValues(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Filtered values"), numLocalEntries);
260  //Create the local matrix with the uninitialized entries/values, then fill it.
261  local_matrix_type localMatrix("AdditiveSchwarzFilter", numLocalRows, numLocalRows, numLocalEntries, localValues, localRowptrs, localEntries);
262  fillLocalMatrix(localMatrix);
263  //Create a serial comm and the map for the final filtered CrsMatrix (each process uses its own local map)
264 #ifdef HAVE_IFPACK2_DEBUG
266  ! mapPairsAreFitted (*A_unfiltered_), std::invalid_argument, "Ifpack2::LocalFilter: "
267  "A's Map pairs are not fitted to each other on Process "
268  << A_->getRowMap ()->getComm ()->getRank () << " of the input matrix's "
269  "communicator. "
270  "This means that LocalFilter does not currently know how to work with A. "
271  "This will change soon. Please see discussion of Bug 5992.");
272 #endif // HAVE_IFPACK2_DEBUG
273 
274  // Build the local communicator (containing this process only).
275  RCP<const Teuchos::Comm<int> > localComm;
276 #ifdef HAVE_MPI
277  localComm = rcp (new Teuchos::MpiComm<int> (MPI_COMM_SELF));
278 #else
279  localComm = rcp (new Teuchos::SerialComm<int> ());
280 #endif // HAVE_MPI
281  //All 4 maps are the same for a local square operator.
282  localMap_ = rcp (new map_type (numLocalRows, 0, localComm, Tpetra::GloballyDistributed));
283  //Create the inner filtered matrix.
284  auto crsParams = rcp(new Teuchos::ParameterList);
285  crsParams->template set<bool>("sorted", true);
286  //NOTE: this is the fastest way to construct A_ - it's created as fillComplete,
287  //and no communication needs to be done since localMap_ uses a local comm.
288  //It does need to copy the whole local matrix to host when DualViews are constructed
289  //(only an issue with non-UVM GPU backends) but this is just unavoidable when creating a Tpetra::CrsMatrix.
290  //It also needs to compute local constants (maxNumRowEntries) but this should be a
291  //cheap operation relative to what this constructor already did.
292  A_ = rcp(new crs_matrix_type(localMap_, localMap_, localMatrix, crsParams));
293 }
294 
295 template<class MatrixType>
297 
298 template<class MatrixType>
300 {
301  //Get the local matrix back from A_
302  auto localMatrix = A_->getLocalMatrixDevice();
303  //Copy new values from A_unfiltered to the local matrix, and then reconstruct A_.
304  fillLocalMatrix(localMatrix);
305  A_->setAllValues(localMatrix.graph.row_map, localMatrix.graph.entries, localMatrix.values);
306 }
307 
308 template<class MatrixType>
311 {
312  return A_;
313 }
314 
315 template<class MatrixType>
317 {
318  auto localRowptrs = localMatrix.graph.row_map;
319  auto localEntries = localMatrix.graph.entries;
320  auto localValues = localMatrix.values;
321  auto permView = perm_.view_device();
322  auto reversepermView = reverseperm_.view_device();
323  local_matrix_type A_local, A_halo;
324  bool haveHalo = !Teuchos::rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered_).is_null();
325  if(haveHalo)
326  {
327  auto A_overlapping = Teuchos::rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered_);
328  A_local = A_overlapping->getUnderlyingMatrix()->getLocalMatrixDevice();
329  A_halo = A_overlapping->getExtMatrix()->getLocalMatrixDevice();
330  }
331  else
332  {
333  auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_unfiltered_);
334  A_local = A_crs->getLocalMatrixDevice();
335  //leave A_halo empty in this case
336  }
337  local_ordinal_type totalLocalRows = A_local.numRows() + A_halo.numRows();
338  //Fill entries and values
339  Kokkos::parallel_for(policy_type(0, localMatrix.numRows()),
340  KOKKOS_LAMBDA(local_ordinal_type i)
341  {
342  //Count the entries that will be in filtered row i.
343  //This means entries which correspond to local, non-singleton rows/columns.
344  size_type outRowStart = localRowptrs(i);
345  local_ordinal_type insertPos = 0;
346  local_ordinal_type origRow = reversepermView(i);
347  if(origRow < A_local.numRows())
348  {
349  //row i is in original local matrix
350  size_type rowBegin = A_local.graph.row_map(origRow);
351  size_type rowEnd = A_local.graph.row_map(origRow + 1);
352  for(size_type j = rowBegin; j < rowEnd; j++)
353  {
354  local_ordinal_type entry = A_local.graph.entries(j);
355  if(entry < totalLocalRows)
356  {
357  auto newCol = permView(entry);
358  if(newCol != -1)
359  {
360  localEntries(outRowStart + insertPos) = newCol;
361  localValues(outRowStart + insertPos) = A_local.values(j);
362  insertPos++;
363  }
364  }
365  }
366  }
367  else
368  {
369  //row i is in halo
370  local_ordinal_type row = origRow - A_local.numRows();
371  size_type rowBegin = A_halo.graph.row_map(row);
372  size_type rowEnd = A_halo.graph.row_map(row + 1);
373  for(size_type j = rowBegin; j < rowEnd; j++)
374  {
375  local_ordinal_type entry = A_halo.graph.entries(j);
376  if(entry < totalLocalRows)
377  {
378  auto newCol = permView(entry);
379  if(newCol != -1)
380  {
381  localEntries(outRowStart + insertPos) = newCol;
382  localValues(outRowStart + insertPos) = A_halo.values(j);
383  insertPos++;
384  }
385  }
386  }
387  }
388  });
389  //Sort the matrix, since the reordering can shuffle the entries into any order.
390  KokkosSparse::sort_crs_matrix(localMatrix);
391 }
392 
393 template<class MatrixType>
395 {
396  return localMap_->getComm();
397 }
398 
399 template<class MatrixType>
402 {
403  return localMap_;
404 }
405 
406 template<class MatrixType>
409 {
410  return localMap_;
411 }
412 
413 template<class MatrixType>
416 {
417  return localMap_;
418 }
419 
420 template<class MatrixType>
423 {
424  return localMap_;
425 }
426 
427 template<class MatrixType>
428 Teuchos::RCP<const Tpetra::RowGraph<typename MatrixType::local_ordinal_type,
429  typename MatrixType::global_ordinal_type,
430  typename MatrixType::node_type> >
432 {
433  //NOTE BMK 6-22: this is to maintain compatibilty with LocalFilter.
434  //Situations like overlapping AdditiveSchwarz + BlockRelaxation
435  //require the importer of the original distributed graph, even though the
436  //BlockRelaxation is preconditioning a local matrix (A_).
437  return A_unfiltered_->getGraph();
438 }
439 
440 template<class MatrixType>
441 typename MatrixType::local_ordinal_type AdditiveSchwarzFilter<MatrixType>::getBlockSize() const
442 {
443  return A_->getBlockSize();
444 }
445 
446 template<class MatrixType>
448 {
449  return A_->getGlobalNumRows();
450 }
451 
452 template<class MatrixType>
454 {
455  return A_->getGlobalNumCols();
456 }
457 
458 template<class MatrixType>
460 {
461  return A_->getLocalNumRows();
462 }
463 
464 template<class MatrixType>
466 {
467  return A_->getLocalNumCols();
468 }
469 
470 template<class MatrixType>
471 typename MatrixType::global_ordinal_type AdditiveSchwarzFilter<MatrixType>::getIndexBase() const
472 {
473  return A_->getIndexBase();
474 }
475 
476 template<class MatrixType>
478 {
479  return getLocalNumEntries();
480 }
481 
482 template<class MatrixType>
484 {
485  return A_->getLocalNumEntries();
486 }
487 
488 template<class MatrixType>
490 getNumEntriesInGlobalRow (global_ordinal_type globalRow) const
491 {
492  return A_->getNumEntriesInGlobalRow(globalRow);
493 }
494 
495 template<class MatrixType>
497 getNumEntriesInLocalRow (local_ordinal_type localRow) const
498 {
499  return A_->getNumEntriesInLocalRow(localRow);
500 }
501 
502 template<class MatrixType>
504 {
505  //Use A_unfiltered_ to get a valid upper bound for this.
506  //This lets us support this function without computing global constants on A_.
507  return A_unfiltered_->getGlobalMaxNumRowEntries();
508 }
509 
510 template<class MatrixType>
512 {
513  //Use A_unfiltered_ to get a valid upper bound for this
514  return A_unfiltered_->getLocalMaxNumRowEntries();
515 }
516 
517 
518 template<class MatrixType>
520 {
521  return true;
522 }
523 
524 
525 template<class MatrixType>
527 {
528  return true;
529 }
530 
531 
532 template<class MatrixType>
534 {
535  return false;
536 }
537 
538 
539 template<class MatrixType>
541 {
542  return true;
543 }
544 
545 
546 template<class MatrixType>
548  getGlobalRowCopy (global_ordinal_type globalRow,
549  nonconst_global_inds_host_view_type &globalInd,
550  nonconst_values_host_view_type &val,
551  size_t& numEntries) const
552 {
553  throw std::runtime_error("Ifpack2::Details::AdditiveSchwarzFilter does not implement getGlobalRowCopy.");
554 }
555 
556 template<class MatrixType>
558 getLocalRowCopy (local_ordinal_type LocalRow,
559  nonconst_local_inds_host_view_type &Indices,
560  nonconst_values_host_view_type &Values,
561  size_t& NumEntries) const
562 
563 {
564  A_->getLocalRowCopy(LocalRow, Indices, Values, NumEntries);
565 }
566 
567 template<class MatrixType>
568 void AdditiveSchwarzFilter<MatrixType>::getGlobalRowView(global_ordinal_type /* GlobalRow */,
569  global_inds_host_view_type &/*indices*/,
570  values_host_view_type &/*values*/) const
571 {
572  throw std::runtime_error("Ifpack2::AdditiveSchwarzFilter: does not support getGlobalRowView.");
573 }
574 
575 template<class MatrixType>
576 void AdditiveSchwarzFilter<MatrixType>::getLocalRowView(local_ordinal_type LocalRow,
577  local_inds_host_view_type & indices,
578  values_host_view_type & values) const
579 {
580  A_->getLocalRowView(LocalRow, indices, values);
581 }
582 
583 template<class MatrixType>
585 getLocalDiagCopy (Tpetra::Vector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &diag) const
586 {
587  // This is somewhat dubious as to how the maps match.
588  A_->getLocalDiagCopy(diag);
589 }
590 
591 template<class MatrixType>
592 void AdditiveSchwarzFilter<MatrixType>::leftScale(const Tpetra::Vector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& /* x */)
593 {
594  throw std::runtime_error("Ifpack2::AdditiveSchwarzFilter does not support leftScale.");
595 }
596 
597 template<class MatrixType>
598 void AdditiveSchwarzFilter<MatrixType>::rightScale(const Tpetra::Vector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& /* x */)
599 {
600  throw std::runtime_error("Ifpack2::AdditiveSchwarzFilter does not support rightScale.");
601 }
602 
603 template<class MatrixType>
605 apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &X,
606  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &Y,
607  Teuchos::ETransp mode,
608  scalar_type alpha,
609  scalar_type beta) const
610 {
611  A_->apply(X, Y, mode, alpha, beta);
612 }
613 
614 template<class MatrixType>
617  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& OverlappingB,
618  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& OverlappingY,
619  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& ReducedReorderedB) const
620 {
621  //NOTE: the three vectors here are always constructed within AdditiveSchwarz and are not subviews,
622  //so they are all constant stride (this avoids a lot of boilerplate with whichVectors)
623  TEUCHOS_TEST_FOR_EXCEPTION(!OverlappingB.isConstantStride() || !OverlappingY.isConstantStride() || !ReducedReorderedB.isConstantStride(),
624  std::logic_error, "Ifpack2::AdditiveSchwarzFilter::CreateReducedProblem ERROR: One of the input MultiVectors is not constant stride.");
625  local_ordinal_type numVecs = OverlappingB.getNumVectors();
626  auto b = OverlappingB.getLocalViewDevice(Tpetra::Access::ReadOnly);
627  auto reducedB = ReducedReorderedB.getLocalViewDevice(Tpetra::Access::OverwriteAll);
628  auto singletons = singletons_.view_device();
629  auto singletonDiags = singletonDiagonals_.view_device();
630  auto revperm = reverseperm_.view_device();
631  //First, solve the singletons.
632  {
633  auto y = OverlappingY.getLocalViewDevice(Tpetra::Access::ReadWrite);
634  Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) numSingletons_, numVecs}),
635  KOKKOS_LAMBDA(local_ordinal_type singletonIndex, local_ordinal_type col)
636  {
637  local_ordinal_type row = singletons(singletonIndex);
638  y(row, col) = b(row, col) / singletonDiags(singletonIndex);
639  });
640  }
641  //Next, permute OverlappingB to ReducedReorderedB.
642  Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) reducedB.extent(0), numVecs}),
643  KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col)
644  {
645  reducedB(row, col) = b(revperm(row), col);
646  });
647  //Finally, if there are singletons, eliminate the matrix entries which are in singleton columns ("eliminate" here means update reducedB like in row reduction)
648  //This could be done more efficiently by storing a separate matrix of just the singleton columns and permuted non-singleton rows, but this adds a lot of complexity.
649  //Instead, just apply() the unfiltered matrix to OverlappingY (which is 0, except for singletons), and then permute the result of that and subtract it from reducedB.
650  if(numSingletons_)
651  {
652  mv_type singletonUpdates(A_unfiltered_->getRowMap(), numVecs, false);
653  A_unfiltered_->apply(OverlappingY, singletonUpdates);
654  auto updatesView = singletonUpdates.getLocalViewDevice(Tpetra::Access::ReadOnly);
655  // auto revperm = reverseperm_.view_device();
656  Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) reducedB.extent(0), numVecs}),
657  KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col)
658  {
659  local_ordinal_type origRow = revperm(row);
660  reducedB(row, col) -= updatesView(origRow, col);
661  });
662  }
663 }
664 
665 template<class MatrixType>
667  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& ReducedReorderedY,
668  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& OverlappingY) const
669 {
670  //NOTE: both vectors here are always constructed within AdditiveSchwarz and are not subviews,
671  //so they are all constant stride (this avoids a lot of boilerplate with whichVectors)
672  TEUCHOS_TEST_FOR_EXCEPTION(!ReducedReorderedY.isConstantStride() || !OverlappingY.isConstantStride(),
673  std::logic_error, "Ifpack2::AdditiveSchwarzFilter::UpdateLHS ERROR: One of the input MultiVectors is not constant stride.");
674  auto reducedY = ReducedReorderedY.getLocalViewDevice(Tpetra::Access::ReadOnly);
675  auto y = OverlappingY.getLocalViewDevice(Tpetra::Access::ReadWrite);
676  auto revperm = reverseperm_.view_device();
677  Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) reducedY.extent(0), (local_ordinal_type) reducedY.extent(1)}),
678  KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col)
679  {
680  y(revperm(row), col) = reducedY(row, col);
681  });
682 }
683 
684 template<class MatrixType>
686 {
687  return true;
688 }
689 
690 
691 template<class MatrixType>
693 {
694  return true;
695 }
696 
697 template<class MatrixType>
699 {
700  // Reordering doesn't change the Frobenius norm.
701  return A_->getFrobeniusNorm ();
702 }
703 
704 template<class MatrixType>
705 bool
707 mapPairsAreFitted (const row_matrix_type& A)
708 {
709  const map_type& rangeMap = * (A.getRangeMap ());
710  const map_type& rowMap = * (A.getRowMap ());
711  const bool rangeAndRowFitted = mapPairIsFitted (rowMap, rangeMap);
712 
713  const map_type& domainMap = * (A.getDomainMap ());
714  const map_type& columnMap = * (A.getColMap ());
715  const bool domainAndColumnFitted = mapPairIsFitted (columnMap, domainMap);
716 
717  //Note BMK 6-22: Map::isLocallyFitted is a local-only operation, not a collective.
718  //This means that it can return different values on different ranks. This can cause MPI to hang,
719  //even though it's supposed to terminate globally when any single rank does.
720  //
721  //This function doesn't need to be fast since it's debug-only code.
722  int localSuccess = rangeAndRowFitted && domainAndColumnFitted;
723  int globalSuccess;
724 
725  Teuchos::reduceAll<int, int> (*(A.getComm()), Teuchos::REDUCE_MIN, localSuccess, Teuchos::outArg (globalSuccess));
726 
727  return globalSuccess == 1;
728 }
729 
730 
731 template<class MatrixType>
732 bool
734 mapPairIsFitted (const map_type& map1, const map_type& map2)
735 {
736  return map1.isLocallyFitted (map2);
737 }
738 
739 
740 }} // namespace Ifpack2::Details
741 
742 #define IFPACK2_DETAILS_ADDITIVESCHWARZFILTER_INSTANT(S,LO,GO,N) \
743  template class Ifpack2::Details::AdditiveSchwarzFilter< Tpetra::RowMatrix<S, LO, GO, N> >;
744 
745 #endif
746 
bool is_null(const boost::shared_ptr< T > &p)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
size_type size() const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Wraps a Tpetra::CrsMatrix or Ifpack2::OverlappingRowMatrix in a filter that removes off-process edges...
T * get() const