43 #ifndef IFPACK2_ADDITIVE_SCHWARZ_FILTER_DEF_HPP
44 #define IFPACK2_ADDITIVE_SCHWARZ_FILTER_DEF_HPP
46 #include "Ifpack2_Details_AdditiveSchwarzFilter_decl.hpp"
47 #include "KokkosKernels_Sorting.hpp"
48 #include "KokkosSparse_SortCrs.hpp"
49 #include "Kokkos_Bitset.hpp"
56 template<
class MatrixType>
61 bool filterSingletons)
63 setup(A_unfiltered, perm, reverseperm, filterSingletons);
66 template<
class MatrixType>
71 bool filterSingletons)
75 using Teuchos::rcp_dynamic_cast;
80 !rcp_dynamic_cast<const crs_matrix_type>(A_unfiltered) &&
81 !rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered),
82 std::invalid_argument,
83 "Ifpack2::Details::AdditiveSchwarzFilter: The input matrix must be a Tpetra::CrsMatrix or an Ifpack2::OverlappingRowMatrix");
84 A_unfiltered_ = A_unfiltered;
85 local_matrix_type A_local, A_halo;
86 bool haveHalo = !rcp_dynamic_cast<
const overlapping_matrix_type>(A_unfiltered_).
is_null();
89 auto A_overlapping = rcp_dynamic_cast<
const overlapping_matrix_type>(A_unfiltered_);
90 A_local = A_overlapping->getUnderlyingMatrix()->getLocalMatrixDevice();
91 A_halo = A_overlapping->getExtMatrix()->getLocalMatrixDevice();
95 auto A_crs = rcp_dynamic_cast<
const crs_matrix_type>(A_unfiltered_);
96 A_local = A_crs->getLocalMatrixDevice();
102 std::invalid_argument,
103 "Ifpack2::Details::AdditiveSchwarzFilter: perm and reverseperm should be the same length");
105 (
size_t) perm.
size() != (size_t) A_unfiltered_->getLocalNumRows(),
106 std::invalid_argument,
107 "Ifpack2::Details::AdditiveSchwarzFilter: length of perm and reverseperm should be the same as the number of local rows in unfiltered A");
109 FilterSingletons_ = filterSingletons;
110 local_ordinal_type numLocalRows;
111 local_ordinal_type totalLocalRows = A_local.numRows() + A_halo.numRows();
112 if(FilterSingletons_)
115 singletons_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"singletons_"), totalLocalRows);
116 singletonDiagonals_ = Kokkos::DualView<impl_scalar_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"singletonDiagonals_"), totalLocalRows);
117 auto singletonsDevice = singletons_.view_device();
118 singletons_.modify_device();
119 auto singletonDiagonalsDevice = singletonDiagonals_.view_device();
120 singletonDiagonals_.modify_device();
121 local_ordinal_type numSingletons;
122 Kokkos::Bitset<device_type> isSingletonBitset(totalLocalRows);
123 Kokkos::parallel_scan(policy_type(0, totalLocalRows),
124 KOKKOS_LAMBDA(local_ordinal_type i, local_ordinal_type& lnumSingletons,
bool finalPass)
126 bool isSingleton =
true;
127 impl_scalar_type singletonValue = Kokkos::ArithTraits<impl_scalar_type>::zero();
128 if(i < A_local.numRows())
131 size_type rowBegin = A_local.graph.row_map(i);
132 size_type rowEnd = A_local.graph.row_map(i + 1);
133 for(size_type j = rowBegin; j < rowEnd; j++)
135 local_ordinal_type entry = A_local.graph.entries(j);
136 if(entry < totalLocalRows && entry != i)
141 if(finalPass && entry == i)
144 singletonValue += A_local.values(j);
151 local_ordinal_type row = i - A_local.numRows();
152 size_type rowBegin = A_halo.graph.row_map(row);
153 size_type rowEnd = A_halo.graph.row_map(row + 1);
154 for(size_type j = rowBegin; j < rowEnd; j++)
156 local_ordinal_type entry = A_halo.graph.entries(j);
157 if(entry < totalLocalRows && entry != i)
162 if(finalPass && entry == i)
164 singletonValue += A_halo.values(j);
172 isSingletonBitset.set(i);
173 singletonsDevice(lnumSingletons) = i;
174 singletonDiagonalsDevice(lnumSingletons) = singletonValue;
179 numSingletons_ = numSingletons;
181 numLocalRows = totalLocalRows - numSingletons_;
183 perm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"perm_"), totalLocalRows);
184 perm_.modify_device();
185 reverseperm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"perm_"), numLocalRows);
186 reverseperm_.modify_device();
187 auto permView = perm_.view_device();
188 auto reversepermView = reverseperm_.view_device();
190 Kokkos::View<local_ordinal_type*, device_type> origpermView(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"input reverse perm"), totalLocalRows);
191 Kokkos::View<local_ordinal_type*, Kokkos::HostSpace> origpermHost(reverseperm.
get(), totalLocalRows);
192 Kokkos::deep_copy(execution_space(), origpermView, origpermHost);
194 Kokkos::parallel_scan(policy_type(0, totalLocalRows),
195 KOKKOS_LAMBDA(local_ordinal_type i, local_ordinal_type& lindex,
bool finalPass)
197 local_ordinal_type origRow = origpermView(i);
198 if(!isSingletonBitset.test(origRow))
203 reversepermView(lindex) = origRow;
204 permView(origRow) = lindex;
212 permView(origRow) = local_ordinal_type(-1);
216 reverseperm_.sync_host();
221 perm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"perm_"), perm.
size());
223 auto permHost = perm_.view_host();
224 for(
size_t i = 0; i < (size_t) perm.
size(); i++)
225 permHost(i) = perm[i];
227 reverseperm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"reverseperm_"), reverseperm.
size());
228 reverseperm_.modify_host();
229 auto reversepermHost = reverseperm_.view_host();
230 for(
size_t i = 0; i < (size_t) reverseperm.
size(); i++)
231 reversepermHost(i) = reverseperm[i];
232 reverseperm_.sync_device();
234 numLocalRows = totalLocalRows;
236 auto permView = perm_.view_device();
237 auto reversepermView = reverseperm_.view_device();
240 row_map_type localRowptrs(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"Filtered rowptrs"), numLocalRows + 1);
241 Kokkos::parallel_for(policy_type(0, numLocalRows + 1),
242 KOKKOS_LAMBDA(local_ordinal_type i)
244 if(i == numLocalRows)
251 local_ordinal_type numEntries = 0;
252 local_ordinal_type origRow = reversepermView(i);
253 if(origRow < A_local.numRows())
256 size_type rowBegin = A_local.graph.row_map(origRow);
257 size_type rowEnd = A_local.graph.row_map(origRow + 1);
258 for(size_type j = rowBegin; j < rowEnd; j++)
260 local_ordinal_type entry = A_local.graph.entries(j);
261 if(entry < totalLocalRows && permView(entry) != -1)
268 local_ordinal_type row = origRow - A_local.numRows();
269 size_type rowBegin = A_halo.graph.row_map(row);
270 size_type rowEnd = A_halo.graph.row_map(row + 1);
271 for(size_type j = rowBegin; j < rowEnd; j++)
273 local_ordinal_type entry = A_halo.graph.entries(j);
274 if(entry < totalLocalRows && permView(entry) != -1)
278 localRowptrs(i) = numEntries;
281 size_type numLocalEntries;
282 Kokkos::parallel_scan(policy_type(0, numLocalRows + 1),
283 KOKKOS_LAMBDA(local_ordinal_type i, size_type& lnumEntries,
bool finalPass)
285 size_type numEnt = localRowptrs(i);
287 localRowptrs(i) = lnumEntries;
288 lnumEntries += numEnt;
291 entries_type localEntries(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"Filtered entries"), numLocalEntries);
292 values_type localValues(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"Filtered values"), numLocalEntries);
294 local_matrix_type localMatrix(
"AdditiveSchwarzFilter", numLocalRows, numLocalRows, numLocalEntries, localValues, localRowptrs, localEntries);
295 fillLocalMatrix(localMatrix);
297 #ifdef HAVE_IFPACK2_DEBUG
299 ! mapPairsAreFitted (*A_unfiltered_), std::invalid_argument,
"Ifpack2::LocalFilter: "
300 "A's Map pairs are not fitted to each other on Process "
301 << A_->getRowMap ()->getComm ()->getRank () <<
" of the input matrix's "
303 "This means that LocalFilter does not currently know how to work with A. "
304 "This will change soon. Please see discussion of Bug 5992.");
305 #endif // HAVE_IFPACK2_DEBUG
308 RCP<const Teuchos::Comm<int> > localComm;
310 localComm =
rcp (
new Teuchos::MpiComm<int> (MPI_COMM_SELF));
315 localMap_ =
rcp (
new map_type (numLocalRows, 0, localComm, Tpetra::GloballyDistributed));
318 crsParams->template set<bool>(
"sorted",
true);
325 A_ =
rcp(
new crs_matrix_type(localMap_, localMap_, localMatrix, crsParams));
328 template<
class MatrixType>
331 template<
class MatrixType>
335 auto localMatrix = A_->getLocalMatrixDevice();
337 fillLocalMatrix(localMatrix);
338 A_->setAllValues(localMatrix.graph.row_map, localMatrix.graph.entries, localMatrix.values);
341 template<
class MatrixType>
348 template<
class MatrixType>
351 auto localRowptrs = localMatrix.graph.row_map;
352 auto localEntries = localMatrix.graph.entries;
353 auto localValues = localMatrix.values;
354 auto permView = perm_.view_device();
355 auto reversepermView = reverseperm_.view_device();
356 local_matrix_type A_local, A_halo;
357 bool haveHalo = !Teuchos::rcp_dynamic_cast<
const overlapping_matrix_type>(A_unfiltered_).
is_null();
360 auto A_overlapping = Teuchos::rcp_dynamic_cast<
const overlapping_matrix_type>(A_unfiltered_);
361 A_local = A_overlapping->getUnderlyingMatrix()->getLocalMatrixDevice();
362 A_halo = A_overlapping->getExtMatrix()->getLocalMatrixDevice();
366 auto A_crs = Teuchos::rcp_dynamic_cast<
const crs_matrix_type>(A_unfiltered_);
367 A_local = A_crs->getLocalMatrixDevice();
370 local_ordinal_type totalLocalRows = A_local.numRows() + A_halo.numRows();
372 Kokkos::parallel_for(policy_type(0, localMatrix.numRows()),
373 KOKKOS_LAMBDA(local_ordinal_type i)
377 size_type outRowStart = localRowptrs(i);
378 local_ordinal_type insertPos = 0;
379 local_ordinal_type origRow = reversepermView(i);
380 if(origRow < A_local.numRows())
383 size_type rowBegin = A_local.graph.row_map(origRow);
384 size_type rowEnd = A_local.graph.row_map(origRow + 1);
385 for(size_type j = rowBegin; j < rowEnd; j++)
387 local_ordinal_type entry = A_local.graph.entries(j);
388 if(entry < totalLocalRows)
390 auto newCol = permView(entry);
393 localEntries(outRowStart + insertPos) = newCol;
394 localValues(outRowStart + insertPos) = A_local.values(j);
403 local_ordinal_type row = origRow - A_local.numRows();
404 size_type rowBegin = A_halo.graph.row_map(row);
405 size_type rowEnd = A_halo.graph.row_map(row + 1);
406 for(size_type j = rowBegin; j < rowEnd; j++)
408 local_ordinal_type entry = A_halo.graph.entries(j);
409 if(entry < totalLocalRows)
411 auto newCol = permView(entry);
414 localEntries(outRowStart + insertPos) = newCol;
415 localValues(outRowStart + insertPos) = A_halo.values(j);
423 KokkosSparse::sort_crs_matrix(localMatrix);
426 template<
class MatrixType>
429 return localMap_->getComm();
432 template<
class MatrixType>
439 template<
class MatrixType>
446 template<
class MatrixType>
453 template<
class MatrixType>
460 template<
class MatrixType>
461 Teuchos::RCP<
const Tpetra::RowGraph<
typename MatrixType::local_ordinal_type,
462 typename MatrixType::global_ordinal_type,
463 typename MatrixType::node_type> >
470 return A_unfiltered_->getGraph();
473 template<
class MatrixType>
476 return A_->getBlockSize();
479 template<
class MatrixType>
482 return A_->getGlobalNumRows();
485 template<
class MatrixType>
488 return A_->getGlobalNumCols();
491 template<
class MatrixType>
494 return A_->getLocalNumRows();
497 template<
class MatrixType>
500 return A_->getLocalNumCols();
503 template<
class MatrixType>
506 return A_->getIndexBase();
509 template<
class MatrixType>
512 return getLocalNumEntries();
515 template<
class MatrixType>
518 return A_->getLocalNumEntries();
521 template<
class MatrixType>
525 return A_->getNumEntriesInGlobalRow(globalRow);
528 template<
class MatrixType>
532 return A_->getNumEntriesInLocalRow(localRow);
535 template<
class MatrixType>
540 return A_unfiltered_->getGlobalMaxNumRowEntries();
543 template<
class MatrixType>
547 return A_unfiltered_->getLocalMaxNumRowEntries();
551 template<
class MatrixType>
558 template<
class MatrixType>
565 template<
class MatrixType>
572 template<
class MatrixType>
579 template<
class MatrixType>
582 nonconst_global_inds_host_view_type &globalInd,
583 nonconst_values_host_view_type &val,
584 size_t& numEntries)
const
586 throw std::runtime_error(
"Ifpack2::Details::AdditiveSchwarzFilter does not implement getGlobalRowCopy.");
589 template<
class MatrixType>
592 nonconst_local_inds_host_view_type &Indices,
593 nonconst_values_host_view_type &Values,
594 size_t& NumEntries)
const
597 A_->getLocalRowCopy(LocalRow, Indices, Values, NumEntries);
600 template<
class MatrixType>
602 global_inds_host_view_type &,
603 values_host_view_type &)
const
605 throw std::runtime_error(
"Ifpack2::AdditiveSchwarzFilter: does not support getGlobalRowView.");
608 template<
class MatrixType>
610 local_inds_host_view_type & indices,
611 values_host_view_type & values)
const
613 A_->getLocalRowView(LocalRow, indices, values);
616 template<
class MatrixType>
618 getLocalDiagCopy (Tpetra::Vector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &diag)
const
621 A_->getLocalDiagCopy(diag);
624 template<
class MatrixType>
627 throw std::runtime_error(
"Ifpack2::AdditiveSchwarzFilter does not support leftScale.");
630 template<
class MatrixType>
633 throw std::runtime_error(
"Ifpack2::AdditiveSchwarzFilter does not support rightScale.");
636 template<
class MatrixType>
638 apply (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &X,
639 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &Y,
642 scalar_type beta)
const
644 A_->apply(X, Y, mode, alpha, beta);
647 template<
class MatrixType>
650 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& OverlappingB,
651 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& OverlappingY,
652 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& ReducedReorderedB)
const
656 TEUCHOS_TEST_FOR_EXCEPTION(!OverlappingB.isConstantStride() || !OverlappingY.isConstantStride() || !ReducedReorderedB.isConstantStride(),
657 std::logic_error,
"Ifpack2::AdditiveSchwarzFilter::CreateReducedProblem ERROR: One of the input MultiVectors is not constant stride.");
658 local_ordinal_type numVecs = OverlappingB.getNumVectors();
659 auto b = OverlappingB.getLocalViewDevice(Tpetra::Access::ReadOnly);
660 auto reducedB = ReducedReorderedB.getLocalViewDevice(Tpetra::Access::OverwriteAll);
661 auto singletons = singletons_.view_device();
662 auto singletonDiags = singletonDiagonals_.view_device();
663 auto revperm = reverseperm_.view_device();
666 auto y = OverlappingY.getLocalViewDevice(Tpetra::Access::ReadWrite);
667 Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) numSingletons_, numVecs}),
668 KOKKOS_LAMBDA(local_ordinal_type singletonIndex, local_ordinal_type col)
670 local_ordinal_type row = singletons(singletonIndex);
671 y(row, col) = b(row, col) / singletonDiags(singletonIndex);
675 Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) reducedB.extent(0), numVecs}),
676 KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col)
678 reducedB(row, col) = b(revperm(row), col);
685 mv_type singletonUpdates(A_unfiltered_->getRowMap(), numVecs,
false);
686 A_unfiltered_->apply(OverlappingY, singletonUpdates);
687 auto updatesView = singletonUpdates.getLocalViewDevice(Tpetra::Access::ReadOnly);
689 Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) reducedB.extent(0), numVecs}),
690 KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col)
692 local_ordinal_type origRow = revperm(row);
693 reducedB(row, col) -= updatesView(origRow, col);
698 template<
class MatrixType>
700 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& ReducedReorderedY,
701 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& OverlappingY)
const
706 std::logic_error,
"Ifpack2::AdditiveSchwarzFilter::UpdateLHS ERROR: One of the input MultiVectors is not constant stride.");
707 auto reducedY = ReducedReorderedY.getLocalViewDevice(Tpetra::Access::ReadOnly);
708 auto y = OverlappingY.getLocalViewDevice(Tpetra::Access::ReadWrite);
709 auto revperm = reverseperm_.view_device();
710 Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) reducedY.extent(0), (local_ordinal_type) reducedY.extent(1)}),
711 KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col)
713 y(revperm(row), col) = reducedY(row, col);
717 template<
class MatrixType>
724 template<
class MatrixType>
730 template<
class MatrixType>
734 return A_->getFrobeniusNorm ();
737 template<
class MatrixType>
742 const map_type& rangeMap = * (A.getRangeMap ());
743 const map_type& rowMap = * (A.getRowMap ());
744 const bool rangeAndRowFitted = mapPairIsFitted (rowMap, rangeMap);
746 const map_type& domainMap = * (A.getDomainMap ());
747 const map_type& columnMap = * (A.getColMap ());
748 const bool domainAndColumnFitted = mapPairIsFitted (columnMap, domainMap);
755 int localSuccess = rangeAndRowFitted && domainAndColumnFitted;
758 Teuchos::reduceAll<int, int> (*(A.getComm()),
Teuchos::REDUCE_MIN, localSuccess, Teuchos::outArg (globalSuccess));
760 return globalSuccess == 1;
764 template<
class MatrixType>
769 return map1.isLocallyFitted (map2);
775 #define IFPACK2_DETAILS_ADDITIVESCHWARZFILTER_INSTANT(S,LO,GO,N) \
776 template class Ifpack2::Details::AdditiveSchwarzFilter< Tpetra::RowMatrix<S, LO, GO, N> >;
bool is_null(const boost::shared_ptr< T > &p)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Wraps a Tpetra::CrsMatrix or Ifpack2::OverlappingRowMatrix in a filter that removes off-process edges...