46 #ifndef MUELU_UTILITIES_KOKKOS_DEF_HPP
47 #define MUELU_UTILITIES_KOKKOS_DEF_HPP
50 #include <Teuchos_DefaultComm.hpp>
55 #ifdef HAVE_MUELU_EPETRA
57 # include "Epetra_MpiComm.h"
61 #include <Kokkos_Core.hpp>
62 #include <KokkosSparse_CrsMatrix.hpp>
63 #include <KokkosSparse_getDiagCopy.hpp>
65 #if defined(HAVE_MUELU_EPETRA) && defined(HAVE_MUELU_EPETRAEXT)
77 #ifdef HAVE_MUELU_TPETRA
79 #include <Tpetra_RowMatrixTransposer.hpp>
80 #include <TpetraExt_MatrixMatrix.hpp>
81 #include <Xpetra_TpetraMultiVector.hpp>
82 #include <Xpetra_TpetraCrsMatrix.hpp>
83 #include <Xpetra_TpetraBlockCrsMatrix.hpp>
86 #ifdef HAVE_MUELU_EPETRA
94 #include <Xpetra_Map.hpp>
95 #include <Xpetra_MapFactory.hpp>
100 #include <Xpetra_MultiVectorFactory.hpp>
107 #include <KokkosKernels_Handle.hpp>
108 #include <KokkosGraph_RCM.hpp>
114 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
118 const auto rowMap = A.getRowMap();
126 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
136 using local_matrix_type =
typename Matrix::local_matrix_type;
138 using value_type =
typename local_matrix_type::value_type;
139 using ordinal_type =
typename local_matrix_type::ordinal_type;
140 using execution_space =
typename local_matrix_type::execution_space;
147 using KAT = Kokkos::ArithTraits<value_type>;
151 RCP<Vector> diag = VectorFactory::Build(rowMap,
false);
154 local_matrix_type localMatrix = A.getLocalMatrixDevice();
155 auto diagVals = diag->getDeviceLocalView(Xpetra::Access::ReadWrite);
157 ordinal_type numRows = localMatrix.graph.numRows();
164 Kokkos::parallel_for(
"Utilities_kokkos::GetMatrixDiagonalInverse",
165 Kokkos::RangePolicy<ordinal_type, execution_space>(0, numRows),
166 KOKKOS_LAMBDA(
const ordinal_type rowIdx) {
167 bool foundDiagEntry =
false;
168 auto myRow = localMatrix.rowConst(rowIdx);
169 for(ordinal_type entryIdx = 0; entryIdx < myRow.length; ++entryIdx) {
170 if(myRow.colidx(entryIdx) == rowIdx) {
171 foundDiagEntry =
true;
172 if(KAT::magnitude(myRow.value(entryIdx)) > KAT::magnitude(tol)) {
173 diagVals(rowIdx, 0) = KAT::one() / myRow.value(entryIdx);
175 diagVals(rowIdx, 0) = KAT::zero();
181 if(!foundDiagEntry) {diagVals(rowIdx, 0) = KAT::zero();}
184 Kokkos::parallel_for(
"Utilities_kokkos::GetMatrixDiagonalInverse",
185 Kokkos::RangePolicy<ordinal_type, execution_space>(0, numRows),
186 KOKKOS_LAMBDA(
const ordinal_type rowIdx) {
187 auto myRow = localMatrix.rowConst(rowIdx);
188 for(ordinal_type entryIdx = 0; entryIdx < myRow.length; ++entryIdx) {
189 diagVals(rowIdx, 0) += KAT::magnitude(myRow.value(entryIdx));
191 if(KAT::magnitude(diagVals(rowIdx, 0)) > KAT::magnitude(tol))
192 diagVals(rowIdx, 0) = KAT::one() / diagVals(rowIdx, 0);
194 diagVals(rowIdx, 0) = KAT::zero();
201 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
206 return MueLu::GetMatrixDiagonalInverse<Scalar, LocalOrdinal, GlobalOrdinal, Node>(A,tol,doLumped);
209 template <
class Node>
214 return MueLu::GetMatrixDiagonalInverse<double, int, int, Node>(A,tol,doLumped);
217 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
223 RCP<Vector> localDiag = VectorFactory::Build(rowMap);
232 auto localDiagVals = localDiag->getDeviceLocalView(Xpetra::Access::ReadWrite);
233 const auto diagVals = GetMatrixDiagonal(A)->getDeviceLocalView(Xpetra::Access::ReadOnly);
234 Kokkos::deep_copy(localDiagVals, diagVals);
237 RCP<Vector> diagonal = VectorFactory::Build(colMap);
240 if (importer == Teuchos::null) {
241 importer = ImportFactory::Build(rowMap, colMap);
248 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
251 bool doInverse,
bool doFillComplete,
bool doOptimizeStorage)
256 for (
int i = 0; i < scalingVector.
size(); ++i)
257 sv[i] = one / scalingVector[i];
259 for (
int i = 0; i < scalingVector.
size(); ++i)
260 sv[i] = scalingVector[i];
263 switch (Op.getRowMap()->lib()) {
265 MyOldScaleMatrix_Tpetra(Op, sv, doFillComplete, doOptimizeStorage);
271 throw std::runtime_error(
"FIXME");
272 #ifndef __NVCC__ //prevent nvcc warning
278 #ifndef __NVCC__ //prevent nvcc warning
284 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
289 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
292 bool doOptimizeStorage)
294 #ifdef HAVE_MUELU_TPETRA
303 if (maxRowSize == Teuchos::as<size_t>(-1))
313 for (
size_t i = 0; i < rowMap->getLocalNumElements(); ++i) {
317 for (
size_t j = 0; j < nnz; ++j)
318 scaledVals[j] = scalingVector[i]*vals[j];
329 for (
size_t i = 0; i < rowMap->getLocalNumElements(); ++i) {
330 GO gid = rowMap->getGlobalElement(i);
336 for (
size_t j = 0; j < nnz; ++j)
337 scaledVals[j] = scalingVector[i]*vals[j];
345 if (doFillComplete) {
346 if (domainMap == Teuchos::null || rangeMap == Teuchos::null)
347 throw Exceptions::RuntimeError(
"In Utils_kokkos::Scaling: cannot fillComplete because the domain and/or range map hasn't been defined");
350 params->
set(
"Optimize Storage", doOptimizeStorage);
351 params->
set(
"No Nonlocal Changes",
true);
363 template <
class SC,
class LO,
class GO,
class NO>
364 Kokkos::View<bool*, typename NO::device_type>
367 const bool count_twos_as_dirichlet) {
368 using impl_scalar_type =
typename Kokkos::ArithTraits<SC>::val_type;
369 using ATS = Kokkos::ArithTraits<impl_scalar_type>;
370 using range_type = Kokkos::RangePolicy<LO, typename NO::execution_space>;
374 if(helpers::isTpetraBlockCrs(A)) {
375 #ifdef HAVE_MUELU_TPETRA
377 auto b_graph = Am.getCrsGraph().getLocalGraphDevice();
378 auto b_rowptr = Am.getCrsGraph().getLocalRowPtrsDevice();
383 Kokkos::View<bool*, typename NO::device_type> boundaryNodes(Kokkos::ViewAllocateWithoutInitializing(
"boundaryNodes"), numBlockRows);
385 if (count_twos_as_dirichlet)
388 Kokkos::parallel_for(
"MueLu:Utils::DetectDirichletRowsBlockCrs", range_type(0,numBlockRows),
389 KOKKOS_LAMBDA(
const LO row) {
390 auto rowView = b_graph.rowConst(row);
391 auto length = rowView.length;
392 LO valstart = b_rowptr[row] * stride;
394 boundaryNodes(row) =
true;
395 decltype(length) colID =0;
396 for (; colID < length; colID++) {
397 if (rowView.colidx(colID) != row) {
398 LO current = valstart + colID*stride;
399 for(
LO k=0; k<stride; k++) {
400 if (ATS::magnitude(values[current+ k]) > tol) {
401 boundaryNodes(row) =
false;
406 if(boundaryNodes(row) ==
false)
411 return boundaryNodes;
417 auto localMatrix = A.getLocalMatrixDevice();
418 LO numRows = A.getLocalNumRows();
419 Kokkos::View<bool*, typename NO::device_type> boundaryNodes(Kokkos::ViewAllocateWithoutInitializing(
"boundaryNodes"), numRows);
421 if (count_twos_as_dirichlet)
422 Kokkos::parallel_for(
"MueLu:Utils::DetectDirichletRows_Twos_As_Dirichlet", range_type(0,numRows),
423 KOKKOS_LAMBDA(
const LO row) {
424 auto rowView = localMatrix.row(row);
425 auto length = rowView.length;
427 boundaryNodes(row) =
true;
429 decltype(length) colID =0;
430 for ( ; colID < length; colID++)
431 if ((rowView.colidx(colID) != row) &&
432 (ATS::magnitude(rowView.value(colID)) > tol)) {
433 if (!boundaryNodes(row))
435 boundaryNodes(row) =
false;
438 boundaryNodes(row) =
true;
442 Kokkos::parallel_for(
"MueLu:Utils::DetectDirichletRows", range_type(0,numRows),
443 KOKKOS_LAMBDA(
const LO row) {
444 auto rowView = localMatrix.row(row);
445 auto length = rowView.length;
447 boundaryNodes(row) =
true;
448 for (decltype(length) colID = 0; colID < length; colID++)
449 if ((rowView.colidx(colID) != row) &&
450 (ATS::magnitude(rowView.value(colID)) > tol)) {
451 boundaryNodes(row) =
false;
455 return boundaryNodes;
460 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
461 Kokkos::View<bool*, typename Node::device_type>
464 return MueLu::DetectDirichletRows<Scalar,LocalOrdinal,GlobalOrdinal,Node>(A, tol, count_twos_as_dirichlet);
467 template <
class Node>
468 Kokkos::View<bool*, typename Node::device_type>
471 return MueLu::DetectDirichletRows<double,int,int,Node>(A, tol,count_twos_as_dirichlet);
475 template <
class SC,
class LO,
class GO,
class NO>
476 Kokkos::View<bool*, typename NO::device_type>
478 const Kokkos::View<const bool*, typename NO::device_type>& dirichletRows) {
479 using ATS = Kokkos::ArithTraits<SC>;
480 using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
481 using range_type = Kokkos::RangePolicy<LO, typename NO::execution_space>;
483 SC zero = ATS::zero();
485 auto localMatrix = A.getLocalMatrixDevice();
486 LO numRows = A.getLocalNumRows();
491 myColsToZero->putScalar(zero);
492 auto myColsToZeroView = myColsToZero->getDeviceLocalView(Xpetra::Access::ReadWrite);
494 Kokkos::parallel_for(
"MueLu:Utils::DetectDirichletCols1", range_type(0,numRows),
495 KOKKOS_LAMBDA(
const LO row) {
496 if (dirichletRows(row)) {
497 auto rowView = localMatrix.row(row);
498 auto length = rowView.length;
500 for (decltype(length) colID = 0; colID < length; colID++)
501 myColsToZeroView(rowView.colidx(colID),0) = impl_ATS::one();
506 globalColsToZero->putScalar(zero);
509 globalColsToZero->doExport(*myColsToZero,*exporter,
Xpetra::ADD);
511 myColsToZero->doImport(*globalColsToZero,*exporter,
Xpetra::INSERT);
513 auto myCols = myColsToZero->getDeviceLocalView(Xpetra::Access::ReadOnly);
514 size_t numColEntries = colMap->getLocalNumElements();
515 Kokkos::View<bool*, typename NO::device_type> dirichletCols(Kokkos::ViewAllocateWithoutInitializing(
"dirichletCols"), numColEntries);
516 const typename ATS::magnitudeType eps = 2.0*ATS::eps();
518 Kokkos::parallel_for(
"MueLu:Utils::DetectDirichletCols2", range_type(0,numColEntries),
519 KOKKOS_LAMBDA (
const size_t i) {
520 dirichletCols(i) = impl_ATS::magnitude(myCols(i,0))>eps;
522 return dirichletCols;
526 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
527 Kokkos::View<bool*, typename Node::device_type>
530 const Kokkos::View<const bool*, typename NO::device_type>& dirichletRows) {
531 return MueLu::DetectDirichletCols<Scalar,LocalOrdinal,GlobalOrdinal,Node>(A, dirichletRows);
534 template <
class Node>
535 Kokkos::View<bool*, typename Node::device_type>
538 const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows) {
539 return MueLu::DetectDirichletCols<double,int,int,Node>(A, dirichletRows);
544 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
546 Kokkos::View<bool*, typename Node::device_type> nonzeros) {
547 using ATS = Kokkos::ArithTraits<Scalar>;
548 using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
549 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
551 const typename ATS::magnitudeType eps = 2.0*impl_ATS::eps();
553 Kokkos::parallel_for(
"MueLu:Maxwell1::FindNonZeros", range_type(0,vals.extent(0)),
554 KOKKOS_LAMBDA (
const size_t i) {
555 nonzeros(i) = (impl_ATS::magnitude(vals(i,0)) > eps);
559 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
563 Kokkos::View<bool*, typename Node::device_type> nonzeros) {
564 MueLu::FindNonZeros<Scalar,LocalOrdinal,GlobalOrdinal,Node>(vals,nonzeros);
567 template <
class Node>
571 Kokkos::View<bool*, typename Node::device_type> nonzeros) {
572 MueLu::FindNonZeros<double,int,int,Node>(vals,nonzeros);
576 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
578 const Kokkos::View<bool*, typename Node::device_type> & dirichletRows,
579 Kokkos::View<bool*, typename Node::device_type> dirichletCols,
580 Kokkos::View<bool*, typename Node::device_type> dirichletDomain) {
581 using ATS = Kokkos::ArithTraits<Scalar>;
582 using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
583 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
587 TEUCHOS_ASSERT(dirichletRows.extent(0) == rowMap->getLocalNumElements());
588 TEUCHOS_ASSERT(dirichletCols.extent(0) == colMap->getLocalNumElements());
589 TEUCHOS_ASSERT(dirichletDomain.extent(0) == domMap->getLocalNumElements());
592 auto myColsToZeroView = myColsToZero->getDeviceLocalView(Xpetra::Access::ReadWrite);
593 auto localMatrix = A.getLocalMatrixDevice();
594 Kokkos::parallel_for(
"MueLu:Maxwell1::DetectDirichletCols", range_type(0,rowMap->getLocalNumElements()),
596 if (dirichletRows(row)) {
597 auto rowView = localMatrix.row(row);
598 auto length = rowView.length;
600 for (decltype(length) colID = 0; colID < length; colID++)
601 myColsToZeroView(rowView.colidx(colID),0) = impl_ATS::one();
610 globalColsToZero->doExport(*myColsToZero,*importer,
Xpetra::ADD);
612 myColsToZero->doImport(*globalColsToZero,*importer,
Xpetra::INSERT);
615 globalColsToZero = myColsToZero;
616 FindNonZeros<Scalar,LocalOrdinal,GlobalOrdinal,Node>(globalColsToZero->getDeviceLocalView(Xpetra::Access::ReadOnly),dirichletDomain);
617 FindNonZeros<Scalar,LocalOrdinal,GlobalOrdinal,Node>(myColsToZero->getDeviceLocalView(Xpetra::Access::ReadOnly),dirichletCols);
621 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
625 const Kokkos::View<bool*, typename Node::device_type> & dirichletRows,
626 Kokkos::View<bool*, typename Node::device_type> dirichletCols,
627 Kokkos::View<bool*, typename Node::device_type> dirichletDomain) {
628 MueLu::DetectDirichletColsAndDomains<Scalar,LocalOrdinal,GlobalOrdinal,Node>(A,dirichletRows,dirichletCols,dirichletDomain);
631 template <
class Node>
635 const Kokkos::View<bool*, typename Node::device_type> & dirichletRows,
636 Kokkos::View<bool*, typename Node::device_type> dirichletCols,
637 Kokkos::View<bool*, typename Node::device_type> dirichletDomain) {
638 MueLu::DetectDirichletColsAndDomains<double,int,int,Node>(A,dirichletRows,dirichletCols,dirichletDomain);
643 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
646 const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows,
648 using ATS = Kokkos::ArithTraits<Scalar>;
649 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
651 typename ATS::val_type impl_replaceWith = replaceWith;
653 auto localMatrix = A->getLocalMatrixDevice();
656 Kokkos::parallel_for(
"MueLu:Utils::ZeroDirichletRows", range_type(0,numRows),
658 if (dirichletRows(row)) {
659 auto rowView = localMatrix.row(row);
660 auto length = rowView.length;
661 for (decltype(length) colID = 0; colID < length; colID++)
662 rowView.value(colID) = impl_replaceWith;
667 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
671 const Kokkos::View<const bool*, typename NO::device_type>& dirichletRows,
673 MueLu::ZeroDirichletRows<Scalar,LocalOrdinal,GlobalOrdinal,Node>(A, dirichletRows, replaceWith);
676 template <
class Node>
680 const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows,
681 double replaceWith) {
682 return MueLu::ZeroDirichletRows<double,int,int,Node>(A, dirichletRows, replaceWith);
687 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
690 const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows,
692 using ATS = Kokkos::ArithTraits<Scalar>;
693 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
695 typename ATS::val_type impl_replaceWith = replaceWith;
697 auto myCols = X->getDeviceLocalView(Xpetra::Access::ReadWrite);
698 size_t numVecs = X->getNumVectors();
699 Kokkos::parallel_for(
"MueLu:Utils::ZeroDirichletRows_MV", range_type(0,dirichletRows.size()),
700 KOKKOS_LAMBDA(
const size_t i) {
701 if (dirichletRows(i)) {
702 for(
size_t j=0; j<numVecs; j++)
703 myCols(i,j) = impl_replaceWith;
708 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
712 const Kokkos::View<const bool*, typename NO::device_type>& dirichletRows,
714 MueLu::ZeroDirichletRows<Scalar,LocalOrdinal,GlobalOrdinal,Node>(X, dirichletRows, replaceWith);
717 template <
class Node>
721 const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows,
722 double replaceWith) {
723 return MueLu::ZeroDirichletRows<double,int,int,Node>(X, dirichletRows, replaceWith);
728 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
731 const Kokkos::View<const bool*, typename Node::device_type>& dirichletCols,
733 using ATS = Kokkos::ArithTraits<Scalar>;
734 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
736 typename ATS::val_type impl_replaceWith = replaceWith;
738 auto localMatrix = A->getLocalMatrixDevice();
741 Kokkos::parallel_for(
"MueLu:Utils::ZeroDirichletCols", range_type(0,numRows),
743 auto rowView = localMatrix.row(row);
744 auto length = rowView.length;
745 for (decltype(length) colID = 0; colID < length; colID++)
746 if (dirichletCols(rowView.colidx(colID))) {
747 rowView.value(colID) = impl_replaceWith;
752 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
756 const Kokkos::View<const bool*, typename NO::device_type>& dirichletCols,
758 MueLu::ZeroDirichletCols<Scalar,LocalOrdinal,GlobalOrdinal,Node>(A, dirichletCols, replaceWith);
761 template <
class Node>
765 const Kokkos::View<const bool*, typename Node::device_type>& dirichletCols,
766 double replaceWith) {
767 return MueLu::ZeroDirichletCols<double,int,int,Node>(A, dirichletCols, replaceWith);
771 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
774 Kokkos::View<bool*, typename Node::device_type> & dirichletRows)
779 auto dirichletRowsHost = Kokkos::create_mirror_view(dirichletRows);
780 Kokkos::deep_copy(dirichletRowsHost, dirichletRows);
782 for (
LocalOrdinal row = 0; row < Teuchos::as<LocalOrdinal>(rowmap->getLocalNumElements()); ++row) {
788 Scalar rowsum = STS::zero();
789 Scalar diagval = STS::zero();
790 for (
LocalOrdinal colID = 0; colID < Teuchos::as<LocalOrdinal>(nnz); colID++) {
793 diagval = vals[colID];
794 rowsum += vals[colID];
796 if (STS::real(rowsum) > STS::magnitude(diagval) * rowSumTol)
797 dirichletRowsHost(row) =
true;
800 Kokkos::deep_copy(dirichletRows, dirichletRowsHost);
803 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
808 Kokkos::View<bool*, typename Node::device_type> & dirichletRows)
810 MueLu::ApplyRowSumCriterion<Scalar, LocalOrdinal, GlobalOrdinal, Node>(A,rowSumTol,dirichletRows);
814 template <
class Node>
819 Kokkos::View<bool*, typename Node::device_type> & dirichletRows)
821 MueLu::ApplyRowSumCriterion<double, int, int, Node>(A,rowSumTol,dirichletRows);
825 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
830 #if defined(HAVE_XPETRA_TPETRA) && (defined(HAVE_TPETRA_INST_COMPLEX_DOUBLE) || defined(HAVE_TPETRA_INST_COMPLEX_FLOAT))
831 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
834 if ((
typeid(
Scalar).name() ==
typeid(std::complex<double>).name()) ||
835 (
typeid(
Scalar).name() ==
typeid(std::complex<float>).name())) {
836 size_t numVecs = X->getNumVectors();
838 auto XVec = X->getDeviceLocalView(Xpetra::Access::ReadOnly);
839 auto XVecScalar = Xscalar->getDeviceLocalView(Xpetra::Access::ReadWrite);
841 Kokkos::parallel_for(
"MueLu:Utils::RealValuedToScalarMultiVector", range_type(0,X->getLocalLength()),
842 KOKKOS_LAMBDA(
const size_t i) {
843 for (
size_t j=0; j<numVecs; j++)
844 XVecScalar(i,j) = XVec(i,j);
852 template <
class Node>
860 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
863 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
864 using lno_nnz_view_t =
typename local_graph_type::entries_type::non_const_type;
865 using device =
typename local_graph_type::device_type;
866 using execution_space =
typename local_matrix_type::execution_space;
867 using ordinal_type =
typename local_matrix_type::ordinal_type;
869 local_graph_type localGraph = Op.getLocalMatrixDevice().graph;
871 lno_nnz_view_t rcmOrder = KokkosGraph::Experimental::graph_rcm
872 <device,
typename local_graph_type::row_map_type,
typename local_graph_type::entries_type, lno_nnz_view_t>
873 (localGraph.row_map, localGraph.entries);
879 auto view1D = Kokkos::subview(retval->getDeviceLocalView(Xpetra::Access::ReadWrite),Kokkos::ALL (), 0);
880 Kokkos::parallel_for(
"Utilities_kokkos::ReverseCuthillMcKee",
881 Kokkos::RangePolicy<ordinal_type, execution_space>(0, localGraph.numRows()),
882 KOKKOS_LAMBDA(
const ordinal_type rowIdx) {
883 view1D(rcmOrder(rowIdx)) = rowIdx;
888 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
891 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
892 using lno_nnz_view_t =
typename local_graph_type::entries_type::non_const_type;
893 using device =
typename local_graph_type::device_type;
894 using execution_space =
typename local_matrix_type::execution_space;
895 using ordinal_type =
typename local_matrix_type::ordinal_type;
897 local_graph_type localGraph = Op.getLocalMatrixDevice().graph;
900 lno_nnz_view_t rcmOrder = KokkosGraph::Experimental::graph_rcm
901 <device,
typename local_graph_type::row_map_type,
typename local_graph_type::entries_type, lno_nnz_view_t>
902 (localGraph.row_map, localGraph.entries);
908 auto view1D = Kokkos::subview(retval->getDeviceLocalView(Xpetra::Access::ReadWrite),Kokkos::ALL (), 0);
910 Kokkos::parallel_for(
"Utilities_kokkos::ReverseCuthillMcKee",
911 Kokkos::RangePolicy<ordinal_type, execution_space>(0, numRows),
912 KOKKOS_LAMBDA(
const ordinal_type rowIdx) {
913 view1D(rcmOrder(numRows - 1 - rowIdx)) = rowIdx;
918 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
921 return MueLu::ReverseCuthillMcKee<Scalar,LocalOrdinal,GlobalOrdinal,Node>(Op);
924 template <
class Node>
927 return MueLu::ReverseCuthillMcKee<double,int,int,Node>(Op);
930 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
933 return MueLu::CuthillMcKee<Scalar,LocalOrdinal,GlobalOrdinal,Node>(Op);
936 template <
class Node>
939 return MueLu::CuthillMcKee<double,int,int,Node>(Op);
944 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
947 const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows) {
949 using ATS = Kokkos::ArithTraits<Scalar>;
950 using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
951 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
958 TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == Rmap->getLocalNumElements());
960 auto localMatrix = A->getLocalMatrixDevice();
961 auto localRmap = Rmap->getLocalMap();
962 auto localCmap = Cmap->getLocalMap();
964 Kokkos::parallel_for(
"MueLu::Utils::ApplyOAZ",range_type(0,dirichletRows.extent(0)),
966 if (dirichletRows(row)){
967 auto rowView = localMatrix.row(row);
968 auto length = rowView.length;
969 auto row_gid = localRmap.getGlobalElement(row);
970 auto row_lid = localCmap.getLocalElement(row_gid);
972 for (decltype(length) colID = 0; colID < length; colID++)
973 if (rowView.colidx(colID) == row_lid)
974 rowView.value(colID) = impl_ATS::one();
976 rowView.value(colID) = impl_ATS::zero();
981 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
985 const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows) {
986 MueLu::ApplyOAZToMatrixRows<Scalar,LocalOrdinal,GlobalOrdinal,Node>(A, dirichletRows);
989 template <
class Node>
993 const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows) {
994 MueLu::ApplyOAZToMatrixRows<double,int,int,Node>(A, dirichletRows);
999 #define MUELU_UTILITIES_KOKKOS_SHORT
1000 #endif // MUELU_UTILITIES_KOKKOS_DEF_HPP
static RCP< Export< LocalOrdinal, GlobalOrdinal, Node > > Build(const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &source, const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &target)
MueLu::DefaultLocalOrdinal LocalOrdinal
static Kokkos::View< bool *, typename NO::device_type > DetectDirichletCols(const Matrix &A, const Kokkos::View< const bool *, typename NO::device_type > &dirichletRows)
Detect Dirichlet columns based on Dirichlet rows.
static void MyOldScaleMatrix(Matrix &Op, const Teuchos::ArrayRCP< const SC > &scalingVector, bool doInverse=true, bool doFillComplete=true, bool doOptimizeStorage=true)
static Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Build(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node >> &map, size_t NumVectors, bool zeroOut=true)
Teuchos::RCP< Xpetra::Vector< LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node > > ReverseCuthillMcKee(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op)
Teuchos::RCP< const map_type > getRangeMap() const override
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
void ZeroDirichletCols(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletCols, Scalar replaceWith)
void ZeroDirichletRows(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletRows, Scalar replaceWith)
Teuchos::RCP< Xpetra::Vector< LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node > > CuthillMcKee(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op)
size_t getLocalNumRows() const override
static RCP< MultiVector > RealValuedToScalarMultiVector(RCP< RealValuedMultiVector > X)
void ApplyOAZToMatrixRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletRows)
static RCP< Vector > GetMatrixDiagonal(const Matrix &A)
Extract Matrix Diagonal.
static RCP< Time > getNewTimer(const std::string &name)
static RCP< Vector > GetMatrixDiagonalInverse(const Matrix &A, Magnitude tol=TST::eps()*100, const bool doLumped=false)
Extract Matrix Diagonal.
void DetectDirichletColsAndDomains(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Kokkos::View< bool *, typename Node::device_type > &dirichletRows, Kokkos::View< bool *, typename Node::device_type > dirichletCols, Kokkos::View< bool *, typename Node::device_type > dirichletDomain)
virtual void fillComplete(const RCP< const Map > &domainMap, const RCP< const Map > &rangeMap, const RCP< ParameterList > ¶ms=null)=0
virtual void getLocalDiagCopy(Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &diag) const =0
size_t getLocalMaxNumRowEntries() const override
void getLocalDiagOffsets(Teuchos::ArrayRCP< size_t > &offsets) const
virtual void getLocalRowView(LocalOrdinal LocalRow, ArrayView< const LocalOrdinal > &indices, ArrayView< const Scalar > &values) const =0
Kokkos::View< bool *, typename NO::device_type > DetectDirichletRows(const Xpetra::Matrix< SC, LO, GO, NO > &A, const typename Teuchos::ScalarTraits< SC >::magnitudeType &tol, const bool count_twos_as_dirichlet)
bool isFillComplete() const override
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultScalar Scalar
void getLocalDiagCopy(Xpetra::Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &diag) const
static RCP< Vector > GetMatrixOverlappedDiagonal(const Matrix &A)
Extract Overlapped Matrix Diagonal.
local_ordinal_type replaceGlobalValues(const global_ordinal_type globalRow, const Kokkos::View< const global_ordinal_type *, Kokkos::AnonymousSpace > &inputInds, const Kokkos::View< const impl_scalar_type *, Kokkos::AnonymousSpace > &inputVals)
static void ApplyRowSumCriterion(const Matrix &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType rowSumTol, Kokkos::View< bool *, typename NO::device_type > &dirichletRows)
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
Teuchos::ScalarTraits< Scalar >::magnitudeType Magnitude
Teuchos::RCP< Xpetra::Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixDiagonalInverse(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, typename Teuchos::ScalarTraits< Scalar >::magnitudeType tol, const bool doLumped)
static void DetectDirichletColsAndDomains(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Kokkos::View< bool *, typename Node::device_type > &dirichletRows, Kokkos::View< bool *, typename Node::device_type > dirichletCols, Kokkos::View< bool *, typename Node::device_type > dirichletDomain)
Detects Dirichlet columns & domains from a list of Dirichlet rows.
static RCP< Vector > Build(const Teuchos::RCP< const Map > &map, bool zeroOut=true)
static void FindNonZeros(const typename Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node >::dual_view_type::t_dev_const_um vals, Kokkos::View< bool *, typename Node::device_type > nonzeros)
Find non-zero values in an ArrayRCP Compares the value to 2 * machine epsilon.
typename TST::magnitudeType Magnitude
static Kokkos::View< bool *, typename NO::device_type > DetectDirichletRows(const Matrix &A, const Magnitude &tol=Teuchos::ScalarTraits< typename Teuchos::ScalarTraits< SC >::magnitudeType >::zero(), const bool count_twos_as_dirichlet=false)
Detect Dirichlet rows.
virtual RCP< const CrsGraph > getCrsGraph() const =0
Teuchos::RCP< const map_type > getDomainMap() const override
virtual bool isLocallyIndexed() const =0
void FindNonZeros(const typename Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node >::dual_view_type::t_dev_const_um vals, Kokkos::View< bool *, typename Node::device_type > nonzeros)
virtual void getGlobalRowView(GlobalOrdinal GlobalRow, global_inds_host_view_type &indices, values_host_view_type &values) const =0
void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType rowSumTol, Kokkos::View< bool *, typename Node::device_type > &dirichletRows)
impl_scalar_type_dualview::t_dev::const_type getValuesDevice(const LO &lclRow) const
virtual Teuchos::RCP< const Map > getRangeMap() const =0
static RCP< Xpetra::Vector< LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node > > ReverseCuthillMcKee(const Matrix &Op)
size_t getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const override
Exception throws to report errors in the internal logical of the program.
virtual void getLocalRowView(LocalOrdinal LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const =0
#define TEUCHOS_ASSERT(assertion_test)
local_ordinal_type replaceLocalValues(const local_ordinal_type localRow, const Kokkos::View< const local_ordinal_type *, Kokkos::AnonymousSpace > &inputInds, const Kokkos::View< const impl_scalar_type *, Kokkos::AnonymousSpace > &inputVals)
static void MyOldScaleMatrix_Epetra(Matrix &Op, const Teuchos::ArrayRCP< SC > &scalingVector, bool doFillComplete, bool doOptimizeStorage)
static void ApplyOAZToMatrixRows(RCP< Matrix > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletRows)
Kokkos::View< bool *, typename NO::device_type > DetectDirichletCols(const Xpetra::Matrix< SC, LO, GO, NO > &A, const Kokkos::View< const bool *, typename NO::device_type > &dirichletRows)
static void MyOldScaleMatrix_Tpetra(Matrix &Op, const Teuchos::ArrayRCP< SC > &scalingVector, bool doFillComplete, bool doOptimizeStorage)
static void ZeroDirichletCols(RCP< Matrix > &A, const Kokkos::View< const bool *, typename NO::device_type > &dirichletCols, SC replaceWith=Teuchos::ScalarTraits< SC >::zero())
virtual Teuchos::RCP< const Map > getDomainMap() const =0
static void ZeroDirichletRows(RCP< Matrix > &A, const Kokkos::View< const bool *, typename NO::device_type > &dirichletRows, SC replaceWith=Teuchos::ScalarTraits< SC >::zero())
static RCP< Xpetra::Vector< LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node > > CuthillMcKee(const Matrix &Op)
Teuchos::RCP< const map_type > getRowMap() const override