46 #ifndef MUELU_UTILITIES_KOKKOS_DEF_HPP
47 #define MUELU_UTILITIES_KOKKOS_DEF_HPP
49 #include <Teuchos_DefaultComm.hpp>
54 #ifdef HAVE_MUELU_EPETRA
56 # include "Epetra_MpiComm.h"
60 #include <Kokkos_ArithTraits.hpp>
61 #include <Kokkos_Core.hpp>
62 #include <KokkosSparse_CrsMatrix.hpp>
64 #if defined(HAVE_MUELU_EPETRA) && defined(HAVE_MUELU_EPETRAEXT)
76 #ifdef HAVE_MUELU_TPETRA
77 #include <MatrixMarket_Tpetra.hpp>
78 #include <Tpetra_RowMatrixTransposer.hpp>
79 #include <TpetraExt_MatrixMatrix.hpp>
80 #include <Xpetra_TpetraMultiVector.hpp>
81 #include <Xpetra_TpetraCrsMatrix.hpp>
82 #include <Xpetra_TpetraBlockCrsMatrix.hpp>
85 #ifdef HAVE_MUELU_EPETRA
93 #include <Xpetra_Map.hpp>
94 #include <Xpetra_MapFactory.hpp>
99 #include <Xpetra_MultiVectorFactory.hpp>
108 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
109 Teuchos::ArrayRCP<Scalar> Utilities_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::GetMatrixDiagonal(
const Matrix& A) {
112 size_t numRows = A.getRowMap()->getNodeNumElements();
117 for (
size_t i = 0; i < numRows; ++i) {
118 A.getLocalRowView(i, cols, vals);
121 for (; j < cols.
size(); ++j) {
122 if (Teuchos::as<size_t>(cols[j]) == i) {
127 if (j == cols.
size()) {
136 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
139 RCP<const Map> rowMap = A.getRowMap();
140 RCP<Vector> diag = VectorFactory::Build(rowMap);
141 ArrayRCP<SC> diagVals = diag->getDataNonConst(0);
143 size_t numRows = rowMap->getNodeNumElements();
147 for (
size_t i = 0; i < numRows; ++i) {
148 A.getLocalRowView(i, cols, vals);
151 for (; j < cols.
size(); ++j) {
152 if (Teuchos::as<size_t>(cols[j]) == i) {
160 if (j == cols.
size()) {
170 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
171 Teuchos::ArrayRCP<Scalar> Utilities_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::GetLumpedMatrixDiagonal(
const Matrix &A) {
173 size_t numRows = A.getRowMap()->getNodeNumElements();
178 for (
size_t i = 0; i < numRows; ++i) {
179 A.getLocalRowView(i, cols, vals);
182 for (
LO j = 0; j < cols.
size(); ++j) {
190 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
191 RCP<Xpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > Utilities_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::GetMatrixOverlappedDiagonal(
const Matrix& A) {
193 RCP<const Map> rowMap = A.getRowMap(), colMap = A.getColMap();
194 RCP<Vector> localDiag = VectorFactory::Build(rowMap);
197 const CrsMatrixWrap* crsOp =
dynamic_cast<const CrsMatrixWrap*
>(&A);
199 throw Exceptions::RuntimeError(
"cast to CrsMatrixWrap failed");
202 crsOp->getLocalDiagOffsets(offsets);
203 crsOp->getLocalDiagCopy(*localDiag,offsets());
206 ArrayRCP<SC> localDiagVals = localDiag->getDataNonConst(0);
208 for (
LO i = 0; i < localDiagVals.size(); i++)
209 localDiagVals[i] = diagVals[i];
210 localDiagVals = diagVals = null;
213 RCP<Vector> diagonal = VectorFactory::Build(colMap);
214 RCP< const Import> importer;
215 importer = A.getCrsGraph()->getImporter();
216 if (importer == Teuchos::null) {
217 importer = ImportFactory::Build(rowMap, colMap);
224 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
225 void Utilities_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MyOldScaleMatrix(Matrix& Op,
const Teuchos::ArrayRCP<const SC>& scalingVector,
bool doInverse,
227 bool doOptimizeStorage)
232 for (
int i = 0; i < scalingVector.
size(); ++i)
233 sv[i] = one / scalingVector[i];
235 for (
int i = 0; i < scalingVector.
size(); ++i)
236 sv[i] = scalingVector[i];
239 switch (Op.getRowMap()->lib()) {
241 MyOldScaleMatrix_Tpetra(Op, sv, doFillComplete, doOptimizeStorage);
247 throw std::runtime_error(
"FIXME");
248 #ifndef __NVCC__ //prevent nvcc warning
253 throw Exceptions::RuntimeError(
"Only Epetra and Tpetra matrices can be scaled.");
254 #ifndef __NVCC__ //prevent nvcc warning
260 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
262 throw Exceptions::RuntimeError(
"MyOldScaleMatrix_Epetra: Epetra needs SC=double and LO=GO=int.");
265 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
266 void Utilities_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MyOldScaleMatrix_Tpetra(Matrix& Op,
const Teuchos::ArrayRCP<SC>& scalingVector,
268 bool doOptimizeStorage)
270 #ifdef HAVE_MUELU_TPETRA
272 Tpetra::CrsMatrix<SC,LO,GO,NO>& tpOp = Op2NonConstTpetraCrs(Op);
274 const RCP<const Tpetra::Map<LO,GO,NO> > rowMap = tpOp.getRowMap();
275 const RCP<const Tpetra::Map<LO,GO,NO> > domainMap = tpOp.getDomainMap();
276 const RCP<const Tpetra::Map<LO,GO,NO> > rangeMap = tpOp.getRangeMap();
278 size_t maxRowSize = tpOp.getNodeMaxNumRowEntries();
279 if (maxRowSize == Teuchos::as<size_t>(-1))
282 std::vector<SC> scaledVals(maxRowSize);
283 if (tpOp.isFillComplete())
286 if (Op.isLocallyIndexed() ==
true) {
290 for (
size_t i = 0; i < rowMap->getNodeNumElements(); ++i) {
291 tpOp.getLocalRowView(i, cols, vals);
292 size_t nnz = tpOp.getNumEntriesInLocalRow(i);
293 if (nnz > maxRowSize) {
295 scaledVals.resize(maxRowSize);
297 for (
size_t j = 0; j < nnz; ++j)
298 scaledVals[j] = vals[j]*scalingVector[i];
302 tpOp.replaceLocalValues(i, cols, valview);
310 for (
size_t i = 0; i < rowMap->getNodeNumElements(); ++i) {
311 GO gid = rowMap->getGlobalElement(i);
312 tpOp.getGlobalRowView(gid, cols, vals);
313 size_t nnz = tpOp.getNumEntriesInGlobalRow(gid);
314 if (nnz > maxRowSize) {
316 scaledVals.resize(maxRowSize);
319 for (
size_t j = 0; j < nnz; ++j)
320 scaledVals[j] = vals[j]*scalingVector[i];
324 tpOp.replaceGlobalValues(gid, cols, valview);
329 if (doFillComplete) {
330 if (domainMap == Teuchos::null || rangeMap == Teuchos::null)
331 throw Exceptions::RuntimeError(
"In Utils_kokkos::Scaling: cannot fillComplete because the domain and/or range map hasn't been defined");
334 params->set(
"Optimize Storage", doOptimizeStorage);
335 params->set(
"No Nonlocal Changes",
true);
336 Op.fillComplete(Op.getDomainMap(), Op.getRangeMap(), params);
339 throw Exceptions::RuntimeError(
"Only Tpetra::CrsMatrix types can be scaled (Err.1)");
342 throw Exceptions::RuntimeError(
"Matrix scaling is not possible because Tpetra has not been enabled.");
347 template <
class SC,
class LO,
class GO,
class NO>
351 const bool count_twos_as_dirichlet) {
352 using ATS = Kokkos::ArithTraits<SC>;
355 auto localMatrix = A.getLocalMatrix();
359 if (count_twos_as_dirichlet)
360 Kokkos::parallel_for(
"MueLu:Utils::DetectDirichletRows_Twos_As_Dirichlet", range_type(0,numRows),
361 KOKKOS_LAMBDA(
const LO row) {
362 auto rowView = localMatrix.row(row);
363 auto length = rowView.length;
365 boundaryNodes(row) =
true;
367 decltype(length) colID;
368 for (colID = 0; colID < length; colID++)
369 if ((rowView.colidx(colID) != row) &&
370 (ATS::magnitude(rowView.value(colID)) > tol)) {
371 if (!boundaryNodes(row))
373 boundaryNodes(row) =
false;
376 boundaryNodes(row) =
true;
381 KOKKOS_LAMBDA(
const LO row) {
382 auto rowView = localMatrix.row(row);
383 auto length = rowView.length;
385 boundaryNodes(row) =
true;
386 for (decltype(length) colID = 0; colID < length; colID++)
387 if ((rowView.colidx(colID) != row) &&
388 (ATS::magnitude(rowView.value(colID)) > tol)) {
389 boundaryNodes(row) =
false;
394 return boundaryNodes;
397 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
401 return MueLu::DetectDirichletRows<Scalar,LocalOrdinal,GlobalOrdinal,Node>(A, tol, count_twos_as_dirichlet);
404 template <
class Node>
408 return MueLu::DetectDirichletRows<double,int,int,Node>(A, tol,count_twos_as_dirichlet);
412 template <
class SC,
class LO,
class GO,
class NO>
416 using ATS = Kokkos::ArithTraits<SC>;
417 using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
420 SC zero = ATS::zero();
423 auto localMatrix = A.getLocalMatrix();
429 myColsToZero->putScalar(zero);
430 auto myColsToZeroView = myColsToZero->template getLocalView<typename NO::device_type>();
433 KOKKOS_LAMBDA(
const LO row) {
434 if (dirichletRows(row)) {
435 auto rowView = localMatrix.row(row);
436 auto length = rowView.length;
438 for (decltype(length) colID = 0; colID < length; colID++)
439 myColsToZeroView(rowView.colidx(colID),0) = one;
444 globalColsToZero->putScalar(zero);
447 globalColsToZero->doExport(*myColsToZero,*exporter,
Xpetra::ADD);
449 myColsToZero->doImport(*globalColsToZero,*exporter,
Xpetra::INSERT);
451 auto myCols = myColsToZero->template getLocalView<typename NO::device_type>();
452 size_t numColEntries = colMap->getNodeNumElements();
454 const typename ATS::magnitudeType eps = 2.0*ATS::eps();
457 KOKKOS_LAMBDA (
const size_t i) {
458 dirichletCols(i) = impl_ATS::magnitude(myCols(i,0))>eps;
460 return dirichletCols;
464 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
469 return MueLu::DetectDirichletCols<Scalar,LocalOrdinal,GlobalOrdinal,Node>(A, dirichletRows);
472 template <
class Node>
477 return MueLu::DetectDirichletCols<double,int,int,Node>(A, dirichletRows);
482 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
489 auto localMatrix = A->getLocalMatrix();
494 if (dirichletRows(row)) {
495 auto rowView = localMatrix.row(row);
496 auto length = rowView.length;
497 for (decltype(length) colID = 0; colID < length; colID++)
498 rowView.value(colID) = replaceWith;
503 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
509 MueLu::ZeroDirichletRows<Scalar,LocalOrdinal,GlobalOrdinal,Node>(A, dirichletRows, replaceWith);
512 template <
class Node>
517 double replaceWith) {
518 return MueLu::ZeroDirichletRows<double,int,int,Node>(A, dirichletRows, replaceWith);
523 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
529 auto myCols = X->template getLocalView<typename Node::device_type>();
530 size_t numVecs = X->getNumVectors();
532 KOKKOS_LAMBDA(
const size_t i) {
533 if (dirichletRows(i)) {
534 for(
size_t j=0; j<numVecs; j++)
535 myCols(i,j) = replaceWith;
540 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
546 MueLu::ZeroDirichletRows<Scalar,LocalOrdinal,GlobalOrdinal,Node>(X, dirichletRows, replaceWith);
549 template <
class Node>
554 double replaceWith) {
555 return MueLu::ZeroDirichletRows<double,int,int,Node>(X, dirichletRows, replaceWith);
560 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
567 auto localMatrix = A->getLocalMatrix();
572 auto rowView = localMatrix.row(row);
573 auto length = rowView.length;
574 for (decltype(length) colID = 0; colID < length; colID++)
575 if (dirichletCols(rowView.colidx(colID))) {
576 rowView.value(colID) = replaceWith;
581 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
587 MueLu::ZeroDirichletCols<Scalar,LocalOrdinal,GlobalOrdinal,Node>(A, dirichletCols, replaceWith);
590 template <
class Node>
595 double replaceWith) {
596 return MueLu::ZeroDirichletCols<double,int,int,Node>(A, dirichletCols, replaceWith);
600 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
601 RCP<Xpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
602 Utilities_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
603 RealValuedToScalarMultiVector(RCP<RealValuedMultiVector > X) {
604 RCP<Xpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > Xscalar;
605 #if defined(HAVE_XPETRA_TPETRA) && (defined(HAVE_TPETRA_INST_COMPLEX_DOUBLE) || defined(HAVE_TPETRA_INST_COMPLEX_FLOAT))
609 if ((
typeid(
Scalar).name() ==
typeid(std::complex<double>).name()) ||
610 (
typeid(
Scalar).name() ==
typeid(std::complex<float>).name())) {
611 size_t numVecs = X->getNumVectors();
613 auto XVec = X->template getLocalView<typename Node::device_type>();
614 auto XVecScalar = Xscalar->template getLocalView<typename Node::device_type>();
616 Kokkos::parallel_for(
"MueLu:Utils::RealValuedToScalarMultiVector", range_type(0,X->getLocalLength()),
617 KOKKOS_LAMBDA(
const size_t i) {
618 for (
size_t j=0; j<numVecs; j++)
619 XVecScalar(i,j) = XVec(i,j);
627 template <
class Node>
628 RCP<Xpetra::MultiVector<double,int,int,Node> >
629 Utilities_kokkos<double,int,int,Node>::
636 #define MUELU_UTILITIES_KOKKOS_SHORT
637 #endif // MUELU_UTILITIES_KOKKOS_DEF_HPP
virtual size_t getNodeNumRows() const =0
static RCP< Export< LocalOrdinal, GlobalOrdinal, Node > > Build(const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &source, const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &target)
MueLu::DefaultLocalOrdinal LocalOrdinal
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=0)
static Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Build(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node >> &map, size_t NumVectors, bool zeroOut=true)
void ZeroDirichletCols(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletCols, Scalar replaceWith)
void ZeroDirichletRows(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletRows, Scalar replaceWith)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultScalar Scalar
static magnitudeType magnitude(T a)
Kokkos::View< const bool *, typename NO::device_type > DetectDirichletCols(const Xpetra::Matrix< SC, LO, GO, NO > &A, const Kokkos::View< const bool *, typename NO::device_type > &dirichletRows)
Kokkos::View< const bool *, typename NO::device_type > DetectDirichletRows(const Xpetra::Matrix< SC, LO, GO, NO > &A, const typename Teuchos::ScalarTraits< SC >::magnitudeType &tol, const bool count_twos_as_dirichlet)
virtual Teuchos::RCP< const Map > getDomainMap() const =0