46 #ifndef MUELU_UTILITIES_KOKKOS_DEF_HPP
47 #define MUELU_UTILITIES_KOKKOS_DEF_HPP
49 #include <Teuchos_DefaultComm.hpp>
54 #ifdef HAVE_MUELU_EPETRA
56 # include "Epetra_MpiComm.h"
60 #include <Kokkos_ArithTraits.hpp>
61 #include <Kokkos_Core.hpp>
62 #include <KokkosSparse_CrsMatrix.hpp>
64 #if defined(HAVE_MUELU_EPETRA) && defined(HAVE_MUELU_EPETRAEXT)
76 #ifdef HAVE_MUELU_TPETRA
77 #include <MatrixMarket_Tpetra.hpp>
78 #include <Tpetra_RowMatrixTransposer.hpp>
79 #include <TpetraExt_MatrixMatrix.hpp>
80 #include <Xpetra_TpetraMultiVector.hpp>
81 #include <Xpetra_TpetraCrsMatrix.hpp>
85 #ifdef HAVE_MUELU_EPETRA
108 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
109 Teuchos::ArrayRCP<Scalar> Utilities_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::GetMatrixDiagonal(
const Matrix& A) {
112 size_t numRows = A.getRowMap()->getNodeNumElements();
117 for (
size_t i = 0; i < numRows; ++i) {
118 A.getLocalRowView(i, cols, vals);
121 for (; j < cols.
size(); ++j) {
122 if (Teuchos::as<size_t>(cols[j]) == i) {
127 if (j == cols.
size()) {
136 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
139 RCP<const Map> rowMap = A.getRowMap();
140 RCP<Vector> diag = VectorFactory::Build(rowMap);
141 ArrayRCP<SC> diagVals = diag->getDataNonConst(0);
143 size_t numRows = rowMap->getNodeNumElements();
147 for (
size_t i = 0; i < numRows; ++i) {
148 A.getLocalRowView(i, cols, vals);
151 for (; j < cols.
size(); ++j) {
152 if (Teuchos::as<size_t>(cols[j]) == i) {
160 if (j == cols.
size()) {
170 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
171 Teuchos::ArrayRCP<Scalar> Utilities_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::GetLumpedMatrixDiagonal(
const Matrix &A) {
173 size_t numRows = A.getRowMap()->getNodeNumElements();
178 for (
size_t i = 0; i < numRows; ++i) {
179 A.getLocalRowView(i, cols, vals);
182 for (
LO j = 0; j < cols.
size(); ++j) {
190 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
191 RCP<Xpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > Utilities_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::GetMatrixOverlappedDiagonal(
const Matrix& A) {
193 RCP<const Map> rowMap = A.getRowMap(), colMap = A.getColMap();
194 RCP<Vector> localDiag = VectorFactory::Build(rowMap);
197 const CrsMatrixWrap* crsOp =
dynamic_cast<const CrsMatrixWrap*
>(&A);
199 throw Exceptions::RuntimeError(
"cast to CrsMatrixWrap failed");
202 crsOp->getLocalDiagOffsets(offsets);
203 crsOp->getLocalDiagCopy(*localDiag,offsets());
206 ArrayRCP<SC> localDiagVals = localDiag->getDataNonConst(0);
208 for (
LO i = 0; i < localDiagVals.size(); i++)
209 localDiagVals[i] = diagVals[i];
210 localDiagVals = diagVals = null;
213 RCP<Vector> diagonal = VectorFactory::Build(colMap);
214 RCP< const Import> importer;
215 importer = A.getCrsGraph()->getImporter();
216 if (importer == Teuchos::null) {
217 importer = ImportFactory::Build(rowMap, colMap);
224 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
225 void Utilities_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MyOldScaleMatrix(Matrix& Op,
const Teuchos::ArrayRCP<const SC>& scalingVector,
bool doInverse,
227 bool doOptimizeStorage)
232 for (
int i = 0; i < scalingVector.
size(); ++i)
233 sv[i] = one / scalingVector[i];
235 for (
int i = 0; i < scalingVector.
size(); ++i)
236 sv[i] = scalingVector[i];
239 switch (Op.getRowMap()->lib()) {
241 MyOldScaleMatrix_Tpetra(Op, sv, doFillComplete, doOptimizeStorage);
247 throw std::runtime_error(
"FIXME");
248 #ifndef __NVCC__ //prevent nvcc warning
253 throw Exceptions::RuntimeError(
"Only Epetra and Tpetra matrices can be scaled.");
254 #ifndef __NVCC__ //prevent nvcc warning
260 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
262 throw Exceptions::RuntimeError(
"MyOldScaleMatrix_Epetra: Epetra needs SC=double and LO=GO=int.");
265 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
266 void Utilities_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MyOldScaleMatrix_Tpetra(Matrix& Op,
const Teuchos::ArrayRCP<SC>& scalingVector,
268 bool doOptimizeStorage)
270 #ifdef HAVE_MUELU_TPETRA
272 Tpetra::CrsMatrix<SC,LO,GO,NO>& tpOp = Op2NonConstTpetraCrs(Op);
274 const RCP<const Tpetra::Map<LO,GO,NO> > rowMap = tpOp.getRowMap();
275 const RCP<const Tpetra::Map<LO,GO,NO> > domainMap = tpOp.getDomainMap();
276 const RCP<const Tpetra::Map<LO,GO,NO> > rangeMap = tpOp.getRangeMap();
278 size_t maxRowSize = tpOp.getNodeMaxNumRowEntries();
279 if (maxRowSize == Teuchos::as<size_t>(-1))
282 std::vector<SC> scaledVals(maxRowSize);
283 if (tpOp.isFillComplete())
286 if (Op.isLocallyIndexed() ==
true) {
290 for (
size_t i = 0; i < rowMap->getNodeNumElements(); ++i) {
291 tpOp.getLocalRowView(i, cols, vals);
292 size_t nnz = tpOp.getNumEntriesInLocalRow(i);
293 if (nnz > maxRowSize) {
295 scaledVals.resize(maxRowSize);
297 for (
size_t j = 0; j < nnz; ++j)
298 scaledVals[j] = vals[j]*scalingVector[i];
302 tpOp.replaceLocalValues(i, cols, valview);
310 for (
size_t i = 0; i < rowMap->getNodeNumElements(); ++i) {
311 GO gid = rowMap->getGlobalElement(i);
312 tpOp.getGlobalRowView(gid, cols, vals);
313 size_t nnz = tpOp.getNumEntriesInGlobalRow(gid);
314 if (nnz > maxRowSize) {
316 scaledVals.resize(maxRowSize);
319 for (
size_t j = 0; j < nnz; ++j)
320 scaledVals[j] = vals[j]*scalingVector[i];
324 tpOp.replaceGlobalValues(gid, cols, valview);
329 if (doFillComplete) {
330 if (domainMap == Teuchos::null || rangeMap == Teuchos::null)
331 throw Exceptions::RuntimeError(
"In Utils_kokkos::Scaling: cannot fillComplete because the domain and/or range map hasn't been defined");
334 params->set(
"Optimize Storage", doOptimizeStorage);
335 params->set(
"No Nonlocal Changes",
true);
336 Op.fillComplete(Op.getDomainMap(), Op.getRangeMap(), params);
339 throw Exceptions::RuntimeError(
"Only Tpetra::CrsMatrix types can be scaled (Err.1)");
342 throw Exceptions::RuntimeError(
"Matrix scaling is not possible because Tpetra has not been enabled.");
347 template <
class SC,
class LO,
class GO,
class NO>
351 const bool count_twos_as_dirichlet) {
352 using ATS = Kokkos::ArithTraits<SC>;
355 auto localMatrix = A.getLocalMatrix();
359 if (count_twos_as_dirichlet)
360 Kokkos::parallel_for(
"MueLu:Utils::DetectDirichletRows_Twos_As_Dirichlet", range_type(0,numRows),
361 KOKKOS_LAMBDA(
const LO row) {
362 auto rowView = localMatrix.row(row);
363 auto length = rowView.length;
365 boundaryNodes(row) =
true;
367 decltype(length) colID;
368 for (colID = 0; colID < length; colID++)
369 if ((rowView.colidx(colID) != row) &&
370 (ATS::magnitude(rowView.value(colID)) > tol)) {
371 if (!boundaryNodes(row))
373 boundaryNodes(row) =
false;
376 boundaryNodes(row) =
true;
381 KOKKOS_LAMBDA(
const LO row) {
382 auto rowView = localMatrix.row(row);
383 auto length = rowView.length;
385 boundaryNodes(row) =
true;
386 for (decltype(length) colID = 0; colID < length; colID++)
387 if ((rowView.colidx(colID) != row) &&
388 (ATS::magnitude(rowView.value(colID)) > tol)) {
389 boundaryNodes(row) =
false;
394 return boundaryNodes;
397 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
401 return MueLu::DetectDirichletRows<Scalar,LocalOrdinal,GlobalOrdinal,Node>(A, tol, count_twos_as_dirichlet);
404 template <
class Node>
408 return MueLu::DetectDirichletRows<double,int,int,Node>(A, tol,count_twos_as_dirichlet);
412 template <
class SC,
class LO,
class GO,
class NO>
416 using ATS = Kokkos::ArithTraits<SC>;
419 SC zero = ATS::zero();
422 auto localMatrix = A.getLocalMatrix();
428 myColsToZero->putScalar(zero);
429 auto myColsToZeroView = myColsToZero->template getLocalView<typename NO::device_type>();
432 KOKKOS_LAMBDA(
const LO row) {
433 if (dirichletRows(row)) {
434 auto rowView = localMatrix.row(row);
435 auto length = rowView.length;
437 for (decltype(length) colID = 0; colID < length; colID++)
438 myColsToZeroView(rowView.colidx(colID),0) = one;
443 globalColsToZero->putScalar(zero);
446 globalColsToZero->doExport(*myColsToZero,*exporter,
Xpetra::ADD);
448 myColsToZero->doImport(*globalColsToZero,*exporter,
Xpetra::INSERT);
450 auto myCols = myColsToZero->template getLocalView<typename NO::device_type>();
451 size_t numColEntries = colMap->getNodeNumElements();
453 const typename ATS::magnitudeType eps = 2.0*ATS::eps();
456 KOKKOS_LAMBDA(
const size_t i) {
457 dirichletCols(i) = ATS::magnitude(myCols(i,0))>eps;
459 return dirichletCols;
463 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
468 return MueLu::DetectDirichletCols<Scalar,LocalOrdinal,GlobalOrdinal,Node>(A, dirichletRows);
471 template <
class Node>
476 return MueLu::DetectDirichletCols<double,int,int,Node>(A, dirichletRows);
481 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
485 Scalar replaceWith) {
488 auto localMatrix = A->getLocalMatrix();
489 LocalOrdinal numRows = A->getNodeNumRows();
492 KOKKOS_LAMBDA(
const LocalOrdinal row) {
493 if (dirichletRows(row)) {
494 auto rowView = localMatrix.row(row);
495 auto length = rowView.length;
496 for (decltype(length) colID = 0; colID < length; colID++)
497 rowView.value(colID) = replaceWith;
502 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
507 Scalar replaceWith) {
508 MueLu::ZeroDirichletRows<Scalar,LocalOrdinal,GlobalOrdinal,Node>(A, dirichletRows, replaceWith);
511 template <
class Node>
516 double replaceWith) {
517 return MueLu::ZeroDirichletRows<double,int,int,Node>(A, dirichletRows, replaceWith);
522 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
526 Scalar replaceWith) {
528 auto myCols = X->template getLocalView<typename Node::device_type>();
529 size_t numVecs = X->getNumVectors();
531 KOKKOS_LAMBDA(
const size_t i) {
532 if (dirichletRows(i)) {
533 for(
size_t j=0; j<numVecs; j++)
534 myCols(i,j) = replaceWith;
539 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
544 Scalar replaceWith) {
545 MueLu::ZeroDirichletRows<Scalar,LocalOrdinal,GlobalOrdinal,Node>(X, dirichletRows, replaceWith);
548 template <
class Node>
553 double replaceWith) {
554 return MueLu::ZeroDirichletRows<double,int,int,Node>(X, dirichletRows, replaceWith);
559 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
563 Scalar replaceWith) {
566 auto localMatrix = A->getLocalMatrix();
567 LocalOrdinal numRows = A->getNodeNumRows();
570 KOKKOS_LAMBDA(
const LocalOrdinal row) {
571 auto rowView = localMatrix.row(row);
572 auto length = rowView.length;
573 for (decltype(length) colID = 0; colID < length; colID++)
574 if (dirichletCols(rowView.colidx(colID))) {
575 rowView.value(colID) = replaceWith;
580 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
585 Scalar replaceWith) {
586 MueLu::ZeroDirichletCols<Scalar,LocalOrdinal,GlobalOrdinal,Node>(A, dirichletCols, replaceWith);
589 template <
class Node>
594 double replaceWith) {
595 return MueLu::ZeroDirichletCols<double,int,int,Node>(A, dirichletCols, replaceWith);
599 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
600 RCP<Xpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
601 Utilities_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
602 RealValuedToScalarMultiVector(RCP<RealValuedMultiVector > X) {
603 RCP<Xpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > Xscalar;
604 #if defined(HAVE_XPETRA_TPETRA) && (defined(HAVE_TPETRA_INST_COMPLEX_DOUBLE) || defined(HAVE_TPETRA_INST_COMPLEX_FLOAT))
608 if ((
typeid(Scalar).name() ==
typeid(std::complex<double>).name()) ||
609 (
typeid(Scalar).name() ==
typeid(std::complex<float>).name())) {
610 size_t numVecs = X->getNumVectors();
612 auto XVec = X->template getLocalView<typename Node::device_type>();
613 auto XVecScalar = Xscalar->template getLocalView<typename Node::device_type>();
615 Kokkos::parallel_for(
"MueLu:Utils::RealValuedToScalarMultiVector", range_type(0,X->getLocalLength()),
616 KOKKOS_LAMBDA(
const size_t i) {
617 for (
size_t j=0; j<numVecs; j++)
618 XVecScalar(i,j) = XVec(i,j);
626 template <
class Node>
627 RCP<Xpetra::MultiVector<double,int,int,Node> >
628 Utilities_kokkos<double,int,int,Node>::
635 #define MUELU_UTILITIES_KOKKOS_SHORT
636 #endif // MUELU_UTILITIES_KOKKOS_DEF_HPP
virtual size_t getNodeNumRows() const =0
static RCP< Export< LocalOrdinal, GlobalOrdinal, Node > > Build(const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &source, const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &target)
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=0)
void ZeroDirichletCols(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletCols, Scalar replaceWith)
void ZeroDirichletRows(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletRows, Scalar replaceWith)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
static magnitudeType magnitude(T a)
Kokkos::View< const bool *, typename NO::device_type > DetectDirichletCols(const Xpetra::Matrix< SC, LO, GO, NO > &A, const Kokkos::View< const bool *, typename NO::device_type > &dirichletRows)
static Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Build(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &map, size_t NumVectors, bool zeroOut=true)
Kokkos::View< const bool *, typename NO::device_type > DetectDirichletRows(const Xpetra::Matrix< SC, LO, GO, NO > &A, const typename Teuchos::ScalarTraits< SC >::magnitudeType &tol, const bool count_twos_as_dirichlet)
virtual Teuchos::RCP< const Map > getDomainMap() const =0