10 #ifndef IFPACK2_ADDITIVE_SCHWARZ_FILTER_DEF_HPP
11 #define IFPACK2_ADDITIVE_SCHWARZ_FILTER_DEF_HPP
13 #include "Ifpack2_Details_AdditiveSchwarzFilter_decl.hpp"
14 #include "KokkosKernels_Sorting.hpp"
15 #include "KokkosSparse_SortCrs.hpp"
16 #include "Kokkos_Bitset.hpp"
23 template<
class MatrixType>
28 bool filterSingletons)
30 setup(A_unfiltered, perm, reverseperm, filterSingletons);
33 template<
class MatrixType>
38 bool filterSingletons)
42 using Teuchos::rcp_dynamic_cast;
47 !rcp_dynamic_cast<const crs_matrix_type>(A_unfiltered) &&
48 !rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered),
49 std::invalid_argument,
50 "Ifpack2::Details::AdditiveSchwarzFilter: The input matrix must be a Tpetra::CrsMatrix or an Ifpack2::OverlappingRowMatrix");
51 A_unfiltered_ = A_unfiltered;
52 local_matrix_type A_local, A_halo;
53 bool haveHalo = !rcp_dynamic_cast<
const overlapping_matrix_type>(A_unfiltered_).
is_null();
56 auto A_overlapping = rcp_dynamic_cast<
const overlapping_matrix_type>(A_unfiltered_);
57 A_local = A_overlapping->getUnderlyingMatrix()->getLocalMatrixDevice();
58 A_halo = A_overlapping->getExtMatrix()->getLocalMatrixDevice();
62 auto A_crs = rcp_dynamic_cast<
const crs_matrix_type>(A_unfiltered_);
63 A_local = A_crs->getLocalMatrixDevice();
69 std::invalid_argument,
70 "Ifpack2::Details::AdditiveSchwarzFilter: perm and reverseperm should be the same length");
72 (
size_t) perm.
size() != (size_t) A_unfiltered_->getLocalNumRows(),
73 std::invalid_argument,
74 "Ifpack2::Details::AdditiveSchwarzFilter: length of perm and reverseperm should be the same as the number of local rows in unfiltered A");
76 FilterSingletons_ = filterSingletons;
77 local_ordinal_type numLocalRows;
78 local_ordinal_type totalLocalRows = A_local.numRows() + A_halo.numRows();
82 singletons_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"singletons_"), totalLocalRows);
83 singletonDiagonals_ = Kokkos::DualView<impl_scalar_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"singletonDiagonals_"), totalLocalRows);
84 auto singletonsDevice = singletons_.view_device();
85 singletons_.modify_device();
86 auto singletonDiagonalsDevice = singletonDiagonals_.view_device();
87 singletonDiagonals_.modify_device();
88 local_ordinal_type numSingletons;
89 Kokkos::Bitset<device_type> isSingletonBitset(totalLocalRows);
90 Kokkos::parallel_scan(policy_type(0, totalLocalRows),
91 KOKKOS_LAMBDA(local_ordinal_type i, local_ordinal_type& lnumSingletons,
bool finalPass)
93 bool isSingleton =
true;
94 impl_scalar_type singletonValue = Kokkos::ArithTraits<impl_scalar_type>::zero();
95 if(i < A_local.numRows())
98 size_type rowBegin = A_local.graph.row_map(i);
99 size_type rowEnd = A_local.graph.row_map(i + 1);
100 for(size_type j = rowBegin; j < rowEnd; j++)
102 local_ordinal_type entry = A_local.graph.entries(j);
103 if(entry < totalLocalRows && entry != i)
108 if(finalPass && entry == i)
111 singletonValue += A_local.values(j);
118 local_ordinal_type row = i - A_local.numRows();
119 size_type rowBegin = A_halo.graph.row_map(row);
120 size_type rowEnd = A_halo.graph.row_map(row + 1);
121 for(size_type j = rowBegin; j < rowEnd; j++)
123 local_ordinal_type entry = A_halo.graph.entries(j);
124 if(entry < totalLocalRows && entry != i)
129 if(finalPass && entry == i)
131 singletonValue += A_halo.values(j);
139 isSingletonBitset.set(i);
140 singletonsDevice(lnumSingletons) = i;
141 singletonDiagonalsDevice(lnumSingletons) = singletonValue;
146 numSingletons_ = numSingletons;
148 numLocalRows = totalLocalRows - numSingletons_;
150 perm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"perm_"), totalLocalRows);
151 perm_.modify_device();
152 reverseperm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"perm_"), numLocalRows);
153 reverseperm_.modify_device();
154 auto permView = perm_.view_device();
155 auto reversepermView = reverseperm_.view_device();
157 Kokkos::View<local_ordinal_type*, device_type> origpermView(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"input reverse perm"), totalLocalRows);
158 Kokkos::View<local_ordinal_type*, Kokkos::HostSpace> origpermHost(reverseperm.
get(), totalLocalRows);
159 Kokkos::deep_copy(execution_space(), origpermView, origpermHost);
161 Kokkos::parallel_scan(policy_type(0, totalLocalRows),
162 KOKKOS_LAMBDA(local_ordinal_type i, local_ordinal_type& lindex,
bool finalPass)
164 local_ordinal_type origRow = origpermView(i);
165 if(!isSingletonBitset.test(origRow))
170 reversepermView(lindex) = origRow;
171 permView(origRow) = lindex;
179 permView(origRow) = local_ordinal_type(-1);
183 reverseperm_.sync_host();
188 perm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"perm_"), perm.
size());
190 auto permHost = perm_.view_host();
191 for(
size_t i = 0; i < (size_t) perm.
size(); i++)
192 permHost(i) = perm[i];
194 reverseperm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"reverseperm_"), reverseperm.
size());
195 reverseperm_.modify_host();
196 auto reversepermHost = reverseperm_.view_host();
197 for(
size_t i = 0; i < (size_t) reverseperm.
size(); i++)
198 reversepermHost(i) = reverseperm[i];
199 reverseperm_.sync_device();
201 numLocalRows = totalLocalRows;
203 auto permView = perm_.view_device();
204 auto reversepermView = reverseperm_.view_device();
207 row_map_type localRowptrs(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"Filtered rowptrs"), numLocalRows + 1);
208 Kokkos::parallel_for(policy_type(0, numLocalRows + 1),
209 KOKKOS_LAMBDA(local_ordinal_type i)
211 if(i == numLocalRows)
218 local_ordinal_type numEntries = 0;
219 local_ordinal_type origRow = reversepermView(i);
220 if(origRow < A_local.numRows())
223 size_type rowBegin = A_local.graph.row_map(origRow);
224 size_type rowEnd = A_local.graph.row_map(origRow + 1);
225 for(size_type j = rowBegin; j < rowEnd; j++)
227 local_ordinal_type entry = A_local.graph.entries(j);
228 if(entry < totalLocalRows && permView(entry) != -1)
235 local_ordinal_type row = origRow - A_local.numRows();
236 size_type rowBegin = A_halo.graph.row_map(row);
237 size_type rowEnd = A_halo.graph.row_map(row + 1);
238 for(size_type j = rowBegin; j < rowEnd; j++)
240 local_ordinal_type entry = A_halo.graph.entries(j);
241 if(entry < totalLocalRows && permView(entry) != -1)
245 localRowptrs(i) = numEntries;
248 size_type numLocalEntries;
249 Kokkos::parallel_scan(policy_type(0, numLocalRows + 1),
250 KOKKOS_LAMBDA(local_ordinal_type i, size_type& lnumEntries,
bool finalPass)
252 size_type numEnt = localRowptrs(i);
254 localRowptrs(i) = lnumEntries;
255 lnumEntries += numEnt;
258 entries_type localEntries(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"Filtered entries"), numLocalEntries);
259 values_type localValues(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"Filtered values"), numLocalEntries);
261 local_matrix_type localMatrix(
"AdditiveSchwarzFilter", numLocalRows, numLocalRows, numLocalEntries, localValues, localRowptrs, localEntries);
262 fillLocalMatrix(localMatrix);
264 #ifdef HAVE_IFPACK2_DEBUG
266 ! mapPairsAreFitted (*A_unfiltered_), std::invalid_argument,
"Ifpack2::LocalFilter: "
267 "A's Map pairs are not fitted to each other on Process "
268 << A_->getRowMap ()->getComm ()->getRank () <<
" of the input matrix's "
270 "This means that LocalFilter does not currently know how to work with A. "
271 "This will change soon. Please see discussion of Bug 5992.");
272 #endif // HAVE_IFPACK2_DEBUG
275 RCP<const Teuchos::Comm<int> > localComm;
277 localComm =
rcp (
new Teuchos::MpiComm<int> (MPI_COMM_SELF));
282 localMap_ =
rcp (
new map_type (numLocalRows, 0, localComm, Tpetra::GloballyDistributed));
285 crsParams->template set<bool>(
"sorted",
true);
292 A_ =
rcp(
new crs_matrix_type(localMap_, localMap_, localMatrix, crsParams));
295 template<
class MatrixType>
298 template<
class MatrixType>
302 auto localMatrix = A_->getLocalMatrixDevice();
304 fillLocalMatrix(localMatrix);
305 A_->setAllValues(localMatrix.graph.row_map, localMatrix.graph.entries, localMatrix.values);
308 template<
class MatrixType>
315 template<
class MatrixType>
318 auto localRowptrs = localMatrix.graph.row_map;
319 auto localEntries = localMatrix.graph.entries;
320 auto localValues = localMatrix.values;
321 auto permView = perm_.view_device();
322 auto reversepermView = reverseperm_.view_device();
323 local_matrix_type A_local, A_halo;
324 bool haveHalo = !Teuchos::rcp_dynamic_cast<
const overlapping_matrix_type>(A_unfiltered_).
is_null();
327 auto A_overlapping = Teuchos::rcp_dynamic_cast<
const overlapping_matrix_type>(A_unfiltered_);
328 A_local = A_overlapping->getUnderlyingMatrix()->getLocalMatrixDevice();
329 A_halo = A_overlapping->getExtMatrix()->getLocalMatrixDevice();
333 auto A_crs = Teuchos::rcp_dynamic_cast<
const crs_matrix_type>(A_unfiltered_);
334 A_local = A_crs->getLocalMatrixDevice();
337 local_ordinal_type totalLocalRows = A_local.numRows() + A_halo.numRows();
339 Kokkos::parallel_for(policy_type(0, localMatrix.numRows()),
340 KOKKOS_LAMBDA(local_ordinal_type i)
344 size_type outRowStart = localRowptrs(i);
345 local_ordinal_type insertPos = 0;
346 local_ordinal_type origRow = reversepermView(i);
347 if(origRow < A_local.numRows())
350 size_type rowBegin = A_local.graph.row_map(origRow);
351 size_type rowEnd = A_local.graph.row_map(origRow + 1);
352 for(size_type j = rowBegin; j < rowEnd; j++)
354 local_ordinal_type entry = A_local.graph.entries(j);
355 if(entry < totalLocalRows)
357 auto newCol = permView(entry);
360 localEntries(outRowStart + insertPos) = newCol;
361 localValues(outRowStart + insertPos) = A_local.values(j);
370 local_ordinal_type row = origRow - A_local.numRows();
371 size_type rowBegin = A_halo.graph.row_map(row);
372 size_type rowEnd = A_halo.graph.row_map(row + 1);
373 for(size_type j = rowBegin; j < rowEnd; j++)
375 local_ordinal_type entry = A_halo.graph.entries(j);
376 if(entry < totalLocalRows)
378 auto newCol = permView(entry);
381 localEntries(outRowStart + insertPos) = newCol;
382 localValues(outRowStart + insertPos) = A_halo.values(j);
390 KokkosSparse::sort_crs_matrix(localMatrix);
393 template<
class MatrixType>
396 return localMap_->getComm();
399 template<
class MatrixType>
406 template<
class MatrixType>
413 template<
class MatrixType>
420 template<
class MatrixType>
427 template<
class MatrixType>
428 Teuchos::RCP<
const Tpetra::RowGraph<
typename MatrixType::local_ordinal_type,
429 typename MatrixType::global_ordinal_type,
430 typename MatrixType::node_type> >
437 return A_unfiltered_->getGraph();
440 template<
class MatrixType>
443 return A_->getBlockSize();
446 template<
class MatrixType>
449 return A_->getGlobalNumRows();
452 template<
class MatrixType>
455 return A_->getGlobalNumCols();
458 template<
class MatrixType>
461 return A_->getLocalNumRows();
464 template<
class MatrixType>
467 return A_->getLocalNumCols();
470 template<
class MatrixType>
473 return A_->getIndexBase();
476 template<
class MatrixType>
479 return getLocalNumEntries();
482 template<
class MatrixType>
485 return A_->getLocalNumEntries();
488 template<
class MatrixType>
492 return A_->getNumEntriesInGlobalRow(globalRow);
495 template<
class MatrixType>
499 return A_->getNumEntriesInLocalRow(localRow);
502 template<
class MatrixType>
507 return A_unfiltered_->getGlobalMaxNumRowEntries();
510 template<
class MatrixType>
514 return A_unfiltered_->getLocalMaxNumRowEntries();
518 template<
class MatrixType>
525 template<
class MatrixType>
532 template<
class MatrixType>
539 template<
class MatrixType>
546 template<
class MatrixType>
549 nonconst_global_inds_host_view_type &globalInd,
550 nonconst_values_host_view_type &val,
551 size_t& numEntries)
const
553 throw std::runtime_error(
"Ifpack2::Details::AdditiveSchwarzFilter does not implement getGlobalRowCopy.");
556 template<
class MatrixType>
559 nonconst_local_inds_host_view_type &Indices,
560 nonconst_values_host_view_type &Values,
561 size_t& NumEntries)
const
564 A_->getLocalRowCopy(LocalRow, Indices, Values, NumEntries);
567 template<
class MatrixType>
569 global_inds_host_view_type &,
570 values_host_view_type &)
const
572 throw std::runtime_error(
"Ifpack2::AdditiveSchwarzFilter: does not support getGlobalRowView.");
575 template<
class MatrixType>
577 local_inds_host_view_type & indices,
578 values_host_view_type & values)
const
580 A_->getLocalRowView(LocalRow, indices, values);
583 template<
class MatrixType>
585 getLocalDiagCopy (Tpetra::Vector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &diag)
const
588 A_->getLocalDiagCopy(diag);
591 template<
class MatrixType>
594 throw std::runtime_error(
"Ifpack2::AdditiveSchwarzFilter does not support leftScale.");
597 template<
class MatrixType>
600 throw std::runtime_error(
"Ifpack2::AdditiveSchwarzFilter does not support rightScale.");
603 template<
class MatrixType>
605 apply (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &X,
606 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &Y,
609 scalar_type beta)
const
611 A_->apply(X, Y, mode, alpha, beta);
614 template<
class MatrixType>
617 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& OverlappingB,
618 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& OverlappingY,
619 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& ReducedReorderedB)
const
623 TEUCHOS_TEST_FOR_EXCEPTION(!OverlappingB.isConstantStride() || !OverlappingY.isConstantStride() || !ReducedReorderedB.isConstantStride(),
624 std::logic_error,
"Ifpack2::AdditiveSchwarzFilter::CreateReducedProblem ERROR: One of the input MultiVectors is not constant stride.");
625 local_ordinal_type numVecs = OverlappingB.getNumVectors();
626 auto b = OverlappingB.getLocalViewDevice(Tpetra::Access::ReadOnly);
627 auto reducedB = ReducedReorderedB.getLocalViewDevice(Tpetra::Access::OverwriteAll);
628 auto singletons = singletons_.view_device();
629 auto singletonDiags = singletonDiagonals_.view_device();
630 auto revperm = reverseperm_.view_device();
633 auto y = OverlappingY.getLocalViewDevice(Tpetra::Access::ReadWrite);
634 Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) numSingletons_, numVecs}),
635 KOKKOS_LAMBDA(local_ordinal_type singletonIndex, local_ordinal_type col)
637 local_ordinal_type row = singletons(singletonIndex);
638 y(row, col) = b(row, col) / singletonDiags(singletonIndex);
642 Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) reducedB.extent(0), numVecs}),
643 KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col)
645 reducedB(row, col) = b(revperm(row), col);
652 mv_type singletonUpdates(A_unfiltered_->getRowMap(), numVecs,
false);
653 A_unfiltered_->apply(OverlappingY, singletonUpdates);
654 auto updatesView = singletonUpdates.getLocalViewDevice(Tpetra::Access::ReadOnly);
656 Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) reducedB.extent(0), numVecs}),
657 KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col)
659 local_ordinal_type origRow = revperm(row);
660 reducedB(row, col) -= updatesView(origRow, col);
665 template<
class MatrixType>
667 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& ReducedReorderedY,
668 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& OverlappingY)
const
673 std::logic_error,
"Ifpack2::AdditiveSchwarzFilter::UpdateLHS ERROR: One of the input MultiVectors is not constant stride.");
674 auto reducedY = ReducedReorderedY.getLocalViewDevice(Tpetra::Access::ReadOnly);
675 auto y = OverlappingY.getLocalViewDevice(Tpetra::Access::ReadWrite);
676 auto revperm = reverseperm_.view_device();
677 Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) reducedY.extent(0), (local_ordinal_type) reducedY.extent(1)}),
678 KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col)
680 y(revperm(row), col) = reducedY(row, col);
684 template<
class MatrixType>
691 template<
class MatrixType>
697 template<
class MatrixType>
701 return A_->getFrobeniusNorm ();
704 template<
class MatrixType>
709 const map_type& rangeMap = * (A.getRangeMap ());
710 const map_type& rowMap = * (A.getRowMap ());
711 const bool rangeAndRowFitted = mapPairIsFitted (rowMap, rangeMap);
713 const map_type& domainMap = * (A.getDomainMap ());
714 const map_type& columnMap = * (A.getColMap ());
715 const bool domainAndColumnFitted = mapPairIsFitted (columnMap, domainMap);
722 int localSuccess = rangeAndRowFitted && domainAndColumnFitted;
725 Teuchos::reduceAll<int, int> (*(A.getComm()),
Teuchos::REDUCE_MIN, localSuccess, Teuchos::outArg (globalSuccess));
727 return globalSuccess == 1;
731 template<
class MatrixType>
736 return map1.isLocallyFitted (map2);
742 #define IFPACK2_DETAILS_ADDITIVESCHWARZFILTER_INSTANT(S,LO,GO,N) \
743 template class Ifpack2::Details::AdditiveSchwarzFilter< Tpetra::RowMatrix<S, LO, GO, N> >;
bool is_null(const boost::shared_ptr< T > &p)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Wraps a Tpetra::CrsMatrix or Ifpack2::OverlappingRowMatrix in a filter that removes off-process edges...