Ifpack2 Templated Preconditioning Package  Version 1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups Pages
Ifpack2_Details_AdditiveSchwarzFilter_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #ifndef IFPACK2_ADDITIVE_SCHWARZ_FILTER_DEF_HPP
44 #define IFPACK2_ADDITIVE_SCHWARZ_FILTER_DEF_HPP
45 
46 #include "Ifpack2_Details_AdditiveSchwarzFilter_decl.hpp"
47 #include "KokkosKernels_Sorting.hpp"
48 #include "KokkosSparse_SortCrs.hpp"
49 #include "Kokkos_Bitset.hpp"
50 
51 namespace Ifpack2
52 {
53 namespace Details
54 {
55 
56 template<class MatrixType>
60  const Teuchos::ArrayRCP<local_ordinal_type>& reverseperm,
61  bool filterSingletons)
62 {
63  setup(A_unfiltered, perm, reverseperm, filterSingletons);
64 }
65 
66 template<class MatrixType>
68 setup(const Teuchos::RCP<const row_matrix_type>& A_unfiltered,
70  const Teuchos::ArrayRCP<local_ordinal_type>& reverseperm,
71  bool filterSingletons)
72 {
73  using Teuchos::RCP;
74  using Teuchos::rcp;
75  using Teuchos::rcp_dynamic_cast;
76  //Check that A is an instance of allowed types, either:
77  // - Tpetra::CrsMatrix
78  // - Ifpack2::OverlappingRowMatrix (which always consists of two CrsMatrices, A_ and ExtMatrix_)
80  !rcp_dynamic_cast<const crs_matrix_type>(A_unfiltered) &&
81  !rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered),
82  std::invalid_argument,
83  "Ifpack2::Details::AdditiveSchwarzFilter: The input matrix must be a Tpetra::CrsMatrix or an Ifpack2::OverlappingRowMatrix");
84  A_unfiltered_ = A_unfiltered;
85  local_matrix_type A_local, A_halo;
86  bool haveHalo = !rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered_).is_null();
87  if(haveHalo)
88  {
89  auto A_overlapping = rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered_);
90  A_local = A_overlapping->getUnderlyingMatrix()->getLocalMatrixDevice();
91  A_halo = A_overlapping->getExtMatrix()->getLocalMatrixDevice();
92  }
93  else
94  {
95  auto A_crs = rcp_dynamic_cast<const crs_matrix_type>(A_unfiltered_);
96  A_local = A_crs->getLocalMatrixDevice();
97  //leave A_halo empty in this case
98  }
99  //Check that perm and reversePerm are the same length and match the number of local rows in A
101  perm.size() != reverseperm.size(),
102  std::invalid_argument,
103  "Ifpack2::Details::AdditiveSchwarzFilter: perm and reverseperm should be the same length");
105  (size_t) perm.size() != (size_t) A_unfiltered_->getLocalNumRows(),
106  std::invalid_argument,
107  "Ifpack2::Details::AdditiveSchwarzFilter: length of perm and reverseperm should be the same as the number of local rows in unfiltered A");
108  //First, compute the permutation tables (that exclude singletons, if requested)
109  FilterSingletons_ = filterSingletons;
110  local_ordinal_type numLocalRows;
111  local_ordinal_type totalLocalRows = A_local.numRows() + A_halo.numRows();
112  if(FilterSingletons_)
113  {
114  //Fill singletons and singletonDiagonals (allocate them to the upper bound at first, then shrink it to size)
115  singletons_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "singletons_"), totalLocalRows);
116  singletonDiagonals_ = Kokkos::DualView<impl_scalar_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "singletonDiagonals_"), totalLocalRows);
117  auto singletonsDevice = singletons_.view_device();
118  singletons_.modify_device();
119  auto singletonDiagonalsDevice = singletonDiagonals_.view_device();
120  singletonDiagonals_.modify_device();
121  local_ordinal_type numSingletons;
122  Kokkos::Bitset<device_type> isSingletonBitset(totalLocalRows);
123  Kokkos::parallel_scan(policy_type(0, totalLocalRows),
124  KOKKOS_LAMBDA(local_ordinal_type i, local_ordinal_type& lnumSingletons, bool finalPass)
125  {
126  bool isSingleton = true;
127  impl_scalar_type singletonValue = Kokkos::ArithTraits<impl_scalar_type>::zero();
128  if(i < A_local.numRows())
129  {
130  //row i is in original local matrix
131  size_type rowBegin = A_local.graph.row_map(i);
132  size_type rowEnd = A_local.graph.row_map(i + 1);
133  for(size_type j = rowBegin; j < rowEnd; j++)
134  {
135  local_ordinal_type entry = A_local.graph.entries(j);
136  if(entry < totalLocalRows && entry != i)
137  {
138  isSingleton = false;
139  break;
140  }
141  if(finalPass && entry == i)
142  {
143  //note: using a sum to compute the diagonal value, in case entries are not compressed.
144  singletonValue += A_local.values(j);
145  }
146  }
147  }
148  else
149  {
150  //row i is in halo
151  local_ordinal_type row = i - A_local.numRows();
152  size_type rowBegin = A_halo.graph.row_map(row);
153  size_type rowEnd = A_halo.graph.row_map(row + 1);
154  for(size_type j = rowBegin; j < rowEnd; j++)
155  {
156  local_ordinal_type entry = A_halo.graph.entries(j);
157  if(entry < totalLocalRows && entry != i)
158  {
159  isSingleton = false;
160  break;
161  }
162  if(finalPass && entry == i)
163  {
164  singletonValue += A_halo.values(j);
165  }
166  }
167  }
168  if(isSingleton)
169  {
170  if(finalPass)
171  {
172  isSingletonBitset.set(i);
173  singletonsDevice(lnumSingletons) = i;
174  singletonDiagonalsDevice(lnumSingletons) = singletonValue;
175  }
176  lnumSingletons++;
177  }
178  }, numSingletons);
179  numSingletons_ = numSingletons;
180  //Each local row in A_unfiltered is either a singleton or a row in the filtered matrix.
181  numLocalRows = totalLocalRows - numSingletons_;
182  //Using the list of singletons, create the reduced permutations.
183  perm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "perm_"), totalLocalRows);
184  perm_.modify_device();
185  reverseperm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "perm_"), numLocalRows);
186  reverseperm_.modify_device();
187  auto permView = perm_.view_device();
188  auto reversepermView = reverseperm_.view_device();
189  //Get the original inverse permutation on device
190  Kokkos::View<local_ordinal_type*, device_type> origpermView(Kokkos::view_alloc(Kokkos::WithoutInitializing, "input reverse perm"), totalLocalRows);
191  Kokkos::View<local_ordinal_type*, Kokkos::HostSpace> origpermHost(reverseperm.get(), totalLocalRows);
192  Kokkos::deep_copy(execution_space(), origpermView, origpermHost);
193  //reverseperm is a list of local rows in A_unfiltered, so filter out the elements of reverseperm where isSingleton is true. Then regenerate the forward permutation.
194  Kokkos::parallel_scan(policy_type(0, totalLocalRows),
195  KOKKOS_LAMBDA(local_ordinal_type i, local_ordinal_type& lindex, bool finalPass)
196  {
197  local_ordinal_type origRow = origpermView(i);
198  if(!isSingletonBitset.test(origRow))
199  {
200  if(finalPass)
201  {
202  //mark the mapping in both directions between origRow and lindex (a row in filtered A)
203  reversepermView(lindex) = origRow;
204  permView(origRow) = lindex;
205  }
206  lindex++;
207  }
208  else
209  {
210  //origRow is a singleton, so mark a -1 in the new forward permutation to show that it has no corresponding row in filtered A.
211  if(finalPass)
212  permView(origRow) = local_ordinal_type(-1);
213  }
214  });
215  perm_.sync_host();
216  reverseperm_.sync_host();
217  }
218  else
219  {
220  //We will keep all the local rows, so use the permutation as is
221  perm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "perm_"), perm.size());
222  perm_.modify_host();
223  auto permHost = perm_.view_host();
224  for(size_t i = 0; i < (size_t) perm.size(); i++)
225  permHost(i) = perm[i];
226  perm_.sync_device();
227  reverseperm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing, "reverseperm_"), reverseperm.size());
228  reverseperm_.modify_host();
229  auto reversepermHost = reverseperm_.view_host();
230  for(size_t i = 0; i < (size_t) reverseperm.size(); i++)
231  reversepermHost(i) = reverseperm[i];
232  reverseperm_.sync_device();
233  numSingletons_ = 0;
234  numLocalRows = totalLocalRows;
235  }
236  auto permView = perm_.view_device();
237  auto reversepermView = reverseperm_.view_device();
238  //Fill the local matrix of A_ (filtered and permuted)
239  //First, construct the rowmap by counting the entries in each row
240  row_map_type localRowptrs(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Filtered rowptrs"), numLocalRows + 1);
241  Kokkos::parallel_for(policy_type(0, numLocalRows + 1),
242  KOKKOS_LAMBDA(local_ordinal_type i)
243  {
244  if(i == numLocalRows)
245  {
246  localRowptrs(i) = 0;
247  return;
248  }
249  //Count the entries that will be in filtered row i.
250  //This means entries which correspond to local, non-singleton rows/columns.
251  local_ordinal_type numEntries = 0;
252  local_ordinal_type origRow = reversepermView(i);
253  if(origRow < A_local.numRows())
254  {
255  //row i is in original local matrix
256  size_type rowBegin = A_local.graph.row_map(origRow);
257  size_type rowEnd = A_local.graph.row_map(origRow + 1);
258  for(size_type j = rowBegin; j < rowEnd; j++)
259  {
260  local_ordinal_type entry = A_local.graph.entries(j);
261  if(entry < totalLocalRows && permView(entry) != -1)
262  numEntries++;
263  }
264  }
265  else
266  {
267  //row i is in halo
268  local_ordinal_type row = origRow - A_local.numRows();
269  size_type rowBegin = A_halo.graph.row_map(row);
270  size_type rowEnd = A_halo.graph.row_map(row + 1);
271  for(size_type j = rowBegin; j < rowEnd; j++)
272  {
273  local_ordinal_type entry = A_halo.graph.entries(j);
274  if(entry < totalLocalRows && permView(entry) != -1)
275  numEntries++;
276  }
277  }
278  localRowptrs(i) = numEntries;
279  });
280  //Prefix sum to finish computing the rowptrs
281  size_type numLocalEntries;
282  Kokkos::parallel_scan(policy_type(0, numLocalRows + 1),
283  KOKKOS_LAMBDA(local_ordinal_type i, size_type& lnumEntries, bool finalPass)
284  {
285  size_type numEnt = localRowptrs(i);
286  if(finalPass)
287  localRowptrs(i) = lnumEntries;
288  lnumEntries += numEnt;
289  }, numLocalEntries);
290  //Allocate and fill local entries and values
291  entries_type localEntries(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Filtered entries"), numLocalEntries);
292  values_type localValues(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Filtered values"), numLocalEntries);
293  //Create the local matrix with the uninitialized entries/values, then fill it.
294  local_matrix_type localMatrix("AdditiveSchwarzFilter", numLocalRows, numLocalRows, numLocalEntries, localValues, localRowptrs, localEntries);
295  fillLocalMatrix(localMatrix);
296  //Create a serial comm and the map for the final filtered CrsMatrix (each process uses its own local map)
297 #ifdef HAVE_IFPACK2_DEBUG
299  ! mapPairsAreFitted (*A_unfiltered_), std::invalid_argument, "Ifpack2::LocalFilter: "
300  "A's Map pairs are not fitted to each other on Process "
301  << A_->getRowMap ()->getComm ()->getRank () << " of the input matrix's "
302  "communicator. "
303  "This means that LocalFilter does not currently know how to work with A. "
304  "This will change soon. Please see discussion of Bug 5992.");
305 #endif // HAVE_IFPACK2_DEBUG
306 
307  // Build the local communicator (containing this process only).
308  RCP<const Teuchos::Comm<int> > localComm;
309 #ifdef HAVE_MPI
310  localComm = rcp (new Teuchos::MpiComm<int> (MPI_COMM_SELF));
311 #else
312  localComm = rcp (new Teuchos::SerialComm<int> ());
313 #endif // HAVE_MPI
314  //All 4 maps are the same for a local square operator.
315  localMap_ = rcp (new map_type (numLocalRows, 0, localComm, Tpetra::GloballyDistributed));
316  //Create the inner filtered matrix.
317  auto crsParams = rcp(new Teuchos::ParameterList);
318  crsParams->template set<bool>("sorted", true);
319  //NOTE: this is the fastest way to construct A_ - it's created as fillComplete,
320  //and no communication needs to be done since localMap_ uses a local comm.
321  //It does need to copy the whole local matrix to host when DualViews are constructed
322  //(only an issue with non-UVM GPU backends) but this is just unavoidable when creating a Tpetra::CrsMatrix.
323  //It also needs to compute local constants (maxNumRowEntries) but this should be a
324  //cheap operation relative to what this constructor already did.
325  A_ = rcp(new crs_matrix_type(localMap_, localMap_, localMatrix, crsParams));
326 }
327 
328 template<class MatrixType>
330 
331 template<class MatrixType>
333 {
334  //Get the local matrix back from A_
335  auto localMatrix = A_->getLocalMatrixDevice();
336  //Copy new values from A_unfiltered to the local matrix, and then reconstruct A_.
337  fillLocalMatrix(localMatrix);
338  A_->setAllValues(localMatrix.graph.row_map, localMatrix.graph.entries, localMatrix.values);
339 }
340 
341 template<class MatrixType>
344 {
345  return A_;
346 }
347 
348 template<class MatrixType>
350 {
351  auto localRowptrs = localMatrix.graph.row_map;
352  auto localEntries = localMatrix.graph.entries;
353  auto localValues = localMatrix.values;
354  auto permView = perm_.view_device();
355  auto reversepermView = reverseperm_.view_device();
356  local_matrix_type A_local, A_halo;
357  bool haveHalo = !Teuchos::rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered_).is_null();
358  if(haveHalo)
359  {
360  auto A_overlapping = Teuchos::rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered_);
361  A_local = A_overlapping->getUnderlyingMatrix()->getLocalMatrixDevice();
362  A_halo = A_overlapping->getExtMatrix()->getLocalMatrixDevice();
363  }
364  else
365  {
366  auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_unfiltered_);
367  A_local = A_crs->getLocalMatrixDevice();
368  //leave A_halo empty in this case
369  }
370  local_ordinal_type totalLocalRows = A_local.numRows() + A_halo.numRows();
371  //Fill entries and values
372  Kokkos::parallel_for(policy_type(0, localMatrix.numRows()),
373  KOKKOS_LAMBDA(local_ordinal_type i)
374  {
375  //Count the entries that will be in filtered row i.
376  //This means entries which correspond to local, non-singleton rows/columns.
377  size_type outRowStart = localRowptrs(i);
378  local_ordinal_type insertPos = 0;
379  local_ordinal_type origRow = reversepermView(i);
380  if(origRow < A_local.numRows())
381  {
382  //row i is in original local matrix
383  size_type rowBegin = A_local.graph.row_map(origRow);
384  size_type rowEnd = A_local.graph.row_map(origRow + 1);
385  for(size_type j = rowBegin; j < rowEnd; j++)
386  {
387  local_ordinal_type entry = A_local.graph.entries(j);
388  if(entry < totalLocalRows)
389  {
390  auto newCol = permView(entry);
391  if(newCol != -1)
392  {
393  localEntries(outRowStart + insertPos) = newCol;
394  localValues(outRowStart + insertPos) = A_local.values(j);
395  insertPos++;
396  }
397  }
398  }
399  }
400  else
401  {
402  //row i is in halo
403  local_ordinal_type row = origRow - A_local.numRows();
404  size_type rowBegin = A_halo.graph.row_map(row);
405  size_type rowEnd = A_halo.graph.row_map(row + 1);
406  for(size_type j = rowBegin; j < rowEnd; j++)
407  {
408  local_ordinal_type entry = A_halo.graph.entries(j);
409  if(entry < totalLocalRows)
410  {
411  auto newCol = permView(entry);
412  if(newCol != -1)
413  {
414  localEntries(outRowStart + insertPos) = newCol;
415  localValues(outRowStart + insertPos) = A_halo.values(j);
416  insertPos++;
417  }
418  }
419  }
420  }
421  });
422  //Sort the matrix, since the reordering can shuffle the entries into any order.
423  KokkosSparse::sort_crs_matrix(localMatrix);
424 }
425 
426 template<class MatrixType>
428 {
429  return localMap_->getComm();
430 }
431 
432 template<class MatrixType>
435 {
436  return localMap_;
437 }
438 
439 template<class MatrixType>
442 {
443  return localMap_;
444 }
445 
446 template<class MatrixType>
449 {
450  return localMap_;
451 }
452 
453 template<class MatrixType>
456 {
457  return localMap_;
458 }
459 
460 template<class MatrixType>
461 Teuchos::RCP<const Tpetra::RowGraph<typename MatrixType::local_ordinal_type,
462  typename MatrixType::global_ordinal_type,
463  typename MatrixType::node_type> >
465 {
466  //NOTE BMK 6-22: this is to maintain compatibilty with LocalFilter.
467  //Situations like overlapping AdditiveSchwarz + BlockRelaxation
468  //require the importer of the original distributed graph, even though the
469  //BlockRelaxation is preconditioning a local matrix (A_).
470  return A_unfiltered_->getGraph();
471 }
472 
473 template<class MatrixType>
474 typename MatrixType::local_ordinal_type AdditiveSchwarzFilter<MatrixType>::getBlockSize() const
475 {
476  return A_->getBlockSize();
477 }
478 
479 template<class MatrixType>
481 {
482  return A_->getGlobalNumRows();
483 }
484 
485 template<class MatrixType>
487 {
488  return A_->getGlobalNumCols();
489 }
490 
491 template<class MatrixType>
493 {
494  return A_->getLocalNumRows();
495 }
496 
497 template<class MatrixType>
499 {
500  return A_->getLocalNumCols();
501 }
502 
503 template<class MatrixType>
504 typename MatrixType::global_ordinal_type AdditiveSchwarzFilter<MatrixType>::getIndexBase() const
505 {
506  return A_->getIndexBase();
507 }
508 
509 template<class MatrixType>
511 {
512  return getLocalNumEntries();
513 }
514 
515 template<class MatrixType>
517 {
518  return A_->getLocalNumEntries();
519 }
520 
521 template<class MatrixType>
523 getNumEntriesInGlobalRow (global_ordinal_type globalRow) const
524 {
525  return A_->getNumEntriesInGlobalRow(globalRow);
526 }
527 
528 template<class MatrixType>
530 getNumEntriesInLocalRow (local_ordinal_type localRow) const
531 {
532  return A_->getNumEntriesInLocalRow(localRow);
533 }
534 
535 template<class MatrixType>
537 {
538  //Use A_unfiltered_ to get a valid upper bound for this.
539  //This lets us support this function without computing global constants on A_.
540  return A_unfiltered_->getGlobalMaxNumRowEntries();
541 }
542 
543 template<class MatrixType>
545 {
546  //Use A_unfiltered_ to get a valid upper bound for this
547  return A_unfiltered_->getLocalMaxNumRowEntries();
548 }
549 
550 
551 template<class MatrixType>
553 {
554  return true;
555 }
556 
557 
558 template<class MatrixType>
560 {
561  return true;
562 }
563 
564 
565 template<class MatrixType>
567 {
568  return false;
569 }
570 
571 
572 template<class MatrixType>
574 {
575  return true;
576 }
577 
578 
579 template<class MatrixType>
581  getGlobalRowCopy (global_ordinal_type globalRow,
582  nonconst_global_inds_host_view_type &globalInd,
583  nonconst_values_host_view_type &val,
584  size_t& numEntries) const
585 {
586  throw std::runtime_error("Ifpack2::Details::AdditiveSchwarzFilter does not implement getGlobalRowCopy.");
587 }
588 
589 template<class MatrixType>
591 getLocalRowCopy (local_ordinal_type LocalRow,
592  nonconst_local_inds_host_view_type &Indices,
593  nonconst_values_host_view_type &Values,
594  size_t& NumEntries) const
595 
596 {
597  A_->getLocalRowCopy(LocalRow, Indices, Values, NumEntries);
598 }
599 
600 template<class MatrixType>
601 void AdditiveSchwarzFilter<MatrixType>::getGlobalRowView(global_ordinal_type /* GlobalRow */,
602  global_inds_host_view_type &/*indices*/,
603  values_host_view_type &/*values*/) const
604 {
605  throw std::runtime_error("Ifpack2::AdditiveSchwarzFilter: does not support getGlobalRowView.");
606 }
607 
608 template<class MatrixType>
609 void AdditiveSchwarzFilter<MatrixType>::getLocalRowView(local_ordinal_type LocalRow,
610  local_inds_host_view_type & indices,
611  values_host_view_type & values) const
612 {
613  A_->getLocalRowView(LocalRow, indices, values);
614 }
615 
616 template<class MatrixType>
618 getLocalDiagCopy (Tpetra::Vector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &diag) const
619 {
620  // This is somewhat dubious as to how the maps match.
621  A_->getLocalDiagCopy(diag);
622 }
623 
624 template<class MatrixType>
625 void AdditiveSchwarzFilter<MatrixType>::leftScale(const Tpetra::Vector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& /* x */)
626 {
627  throw std::runtime_error("Ifpack2::AdditiveSchwarzFilter does not support leftScale.");
628 }
629 
630 template<class MatrixType>
631 void AdditiveSchwarzFilter<MatrixType>::rightScale(const Tpetra::Vector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& /* x */)
632 {
633  throw std::runtime_error("Ifpack2::AdditiveSchwarzFilter does not support rightScale.");
634 }
635 
636 template<class MatrixType>
638 apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &X,
639  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &Y,
640  Teuchos::ETransp mode,
641  scalar_type alpha,
642  scalar_type beta) const
643 {
644  A_->apply(X, Y, mode, alpha, beta);
645 }
646 
647 template<class MatrixType>
650  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& OverlappingB,
651  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& OverlappingY,
652  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& ReducedReorderedB) const
653 {
654  //NOTE: the three vectors here are always constructed within AdditiveSchwarz and are not subviews,
655  //so they are all constant stride (this avoids a lot of boilerplate with whichVectors)
656  TEUCHOS_TEST_FOR_EXCEPTION(!OverlappingB.isConstantStride() || !OverlappingY.isConstantStride() || !ReducedReorderedB.isConstantStride(),
657  std::logic_error, "Ifpack2::AdditiveSchwarzFilter::CreateReducedProblem ERROR: One of the input MultiVectors is not constant stride.");
658  local_ordinal_type numVecs = OverlappingB.getNumVectors();
659  auto b = OverlappingB.getLocalViewDevice(Tpetra::Access::ReadOnly);
660  auto reducedB = ReducedReorderedB.getLocalViewDevice(Tpetra::Access::OverwriteAll);
661  auto singletons = singletons_.view_device();
662  auto singletonDiags = singletonDiagonals_.view_device();
663  auto revperm = reverseperm_.view_device();
664  //First, solve the singletons.
665  {
666  auto y = OverlappingY.getLocalViewDevice(Tpetra::Access::ReadWrite);
667  Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) numSingletons_, numVecs}),
668  KOKKOS_LAMBDA(local_ordinal_type singletonIndex, local_ordinal_type col)
669  {
670  local_ordinal_type row = singletons(singletonIndex);
671  y(row, col) = b(row, col) / singletonDiags(singletonIndex);
672  });
673  }
674  //Next, permute OverlappingB to ReducedReorderedB.
675  Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) reducedB.extent(0), numVecs}),
676  KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col)
677  {
678  reducedB(row, col) = b(revperm(row), col);
679  });
680  //Finally, if there are singletons, eliminate the matrix entries which are in singleton columns ("eliminate" here means update reducedB like in row reduction)
681  //This could be done more efficiently by storing a separate matrix of just the singleton columns and permuted non-singleton rows, but this adds a lot of complexity.
682  //Instead, just apply() the unfiltered matrix to OverlappingY (which is 0, except for singletons), and then permute the result of that and subtract it from reducedB.
683  if(numSingletons_)
684  {
685  mv_type singletonUpdates(A_unfiltered_->getRowMap(), numVecs, false);
686  A_unfiltered_->apply(OverlappingY, singletonUpdates);
687  auto updatesView = singletonUpdates.getLocalViewDevice(Tpetra::Access::ReadOnly);
688  // auto revperm = reverseperm_.view_device();
689  Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) reducedB.extent(0), numVecs}),
690  KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col)
691  {
692  local_ordinal_type origRow = revperm(row);
693  reducedB(row, col) -= updatesView(origRow, col);
694  });
695  }
696 }
697 
698 template<class MatrixType>
700  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& ReducedReorderedY,
701  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& OverlappingY) const
702 {
703  //NOTE: both vectors here are always constructed within AdditiveSchwarz and are not subviews,
704  //so they are all constant stride (this avoids a lot of boilerplate with whichVectors)
705  TEUCHOS_TEST_FOR_EXCEPTION(!ReducedReorderedY.isConstantStride() || !OverlappingY.isConstantStride(),
706  std::logic_error, "Ifpack2::AdditiveSchwarzFilter::UpdateLHS ERROR: One of the input MultiVectors is not constant stride.");
707  auto reducedY = ReducedReorderedY.getLocalViewDevice(Tpetra::Access::ReadOnly);
708  auto y = OverlappingY.getLocalViewDevice(Tpetra::Access::ReadWrite);
709  auto revperm = reverseperm_.view_device();
710  Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) reducedY.extent(0), (local_ordinal_type) reducedY.extent(1)}),
711  KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col)
712  {
713  y(revperm(row), col) = reducedY(row, col);
714  });
715 }
716 
717 template<class MatrixType>
719 {
720  return true;
721 }
722 
723 
724 template<class MatrixType>
726 {
727  return true;
728 }
729 
730 template<class MatrixType>
732 {
733  // Reordering doesn't change the Frobenius norm.
734  return A_->getFrobeniusNorm ();
735 }
736 
737 template<class MatrixType>
738 bool
740 mapPairsAreFitted (const row_matrix_type& A)
741 {
742  const map_type& rangeMap = * (A.getRangeMap ());
743  const map_type& rowMap = * (A.getRowMap ());
744  const bool rangeAndRowFitted = mapPairIsFitted (rowMap, rangeMap);
745 
746  const map_type& domainMap = * (A.getDomainMap ());
747  const map_type& columnMap = * (A.getColMap ());
748  const bool domainAndColumnFitted = mapPairIsFitted (columnMap, domainMap);
749 
750  //Note BMK 6-22: Map::isLocallyFitted is a local-only operation, not a collective.
751  //This means that it can return different values on different ranks. This can cause MPI to hang,
752  //even though it's supposed to terminate globally when any single rank does.
753  //
754  //This function doesn't need to be fast since it's debug-only code.
755  int localSuccess = rangeAndRowFitted && domainAndColumnFitted;
756  int globalSuccess;
757 
758  Teuchos::reduceAll<int, int> (*(A.getComm()), Teuchos::REDUCE_MIN, localSuccess, Teuchos::outArg (globalSuccess));
759 
760  return globalSuccess == 1;
761 }
762 
763 
764 template<class MatrixType>
765 bool
767 mapPairIsFitted (const map_type& map1, const map_type& map2)
768 {
769  return map1.isLocallyFitted (map2);
770 }
771 
772 
773 }} // namespace Ifpack2::Details
774 
775 #define IFPACK2_DETAILS_ADDITIVESCHWARZFILTER_INSTANT(S,LO,GO,N) \
776  template class Ifpack2::Details::AdditiveSchwarzFilter< Tpetra::RowMatrix<S, LO, GO, N> >;
777 
778 #endif
779 
bool is_null(const boost::shared_ptr< T > &p)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
size_type size() const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Wraps a Tpetra::CrsMatrix or Ifpack2::OverlappingRowMatrix in a filter that removes off-process edges...
T * get() const