MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_Utilities_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_UTILITIES_KOKKOS_DEF_HPP
47 #define MUELU_UTILITIES_KOKKOS_DEF_HPP
48 
49 #include <algorithm>
50 #include <Teuchos_DefaultComm.hpp>
52 
53 #include "MueLu_ConfigDefs.hpp"
54 
55 #ifdef HAVE_MUELU_EPETRA
56 # ifdef HAVE_MPI
57 # include "Epetra_MpiComm.h"
58 # endif
59 #endif
60 
61 #include <Kokkos_Core.hpp>
62 #include <KokkosSparse_CrsMatrix.hpp>
63 #include <KokkosSparse_getDiagCopy.hpp>
64 
65 #if defined(HAVE_MUELU_EPETRA) && defined(HAVE_MUELU_EPETRAEXT)
66 #include <EpetraExt_MatrixMatrix.h>
67 #include <EpetraExt_RowMatrixOut.h>
69 #include <EpetraExt_CrsMatrixIn.h>
71 #include <EpetraExt_BlockMapIn.h>
72 #include <Xpetra_EpetraUtils.hpp>
73 #include <Xpetra_EpetraMultiVector.hpp>
74 #include <EpetraExt_BlockMapOut.h>
75 #endif
76 
77 #ifdef HAVE_MUELU_TPETRA
78 #include <MatrixMarket_Tpetra.hpp>
79 #include <Tpetra_RowMatrixTransposer.hpp>
80 #include <TpetraExt_MatrixMatrix.hpp>
81 #include <Xpetra_TpetraMultiVector.hpp>
82 #include <Xpetra_TpetraCrsMatrix.hpp>
83 #include <Xpetra_TpetraBlockCrsMatrix.hpp>
84 #endif
85 
86 #ifdef HAVE_MUELU_EPETRA
87 #include <Xpetra_EpetraMap.hpp>
88 #endif
89 
90 #include <Xpetra_BlockedCrsMatrix.hpp>
91 #include <Xpetra_DefaultPlatform.hpp>
92 #include <Xpetra_Import.hpp>
93 #include <Xpetra_ImportFactory.hpp>
94 #include <Xpetra_Map.hpp>
95 #include <Xpetra_MapFactory.hpp>
96 #include <Xpetra_Matrix.hpp>
97 #include <Xpetra_MatrixMatrix.hpp>
98 #include <Xpetra_MatrixFactory.hpp>
99 #include <Xpetra_MultiVector.hpp>
100 #include <Xpetra_MultiVectorFactory.hpp>
101 #include <Xpetra_Operator.hpp>
102 #include <Xpetra_Vector.hpp>
103 #include <Xpetra_VectorFactory.hpp>
104 
106 
107 #include <KokkosKernels_Handle.hpp>
108 #include <KokkosSparse_partitioning_impl.hpp>
109 
110 
111 namespace MueLu {
112 
113 
114  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
115  Teuchos::ArrayRCP<Scalar> Utilities_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
116  GetMatrixDiagonal(const Matrix& A) {
117  // FIXME_KOKKOS
118 
119  size_t numRows = A.getRowMap()->getNodeNumElements();
120  Teuchos::ArrayRCP<SC> diag(numRows);
121 
124  for (size_t i = 0; i < numRows; ++i) {
125  A.getLocalRowView(i, cols, vals);
126 
127  LO j = 0;
128  for (; j < cols.size(); ++j) {
129  if (Teuchos::as<size_t>(cols[j]) == i) {
130  diag[i] = vals[j];
131  break;
132  }
133  }
134  if (j == cols.size()) {
135  // Diagonal entry is absent
137  }
138  }
139 
140  return diag;
141  } //GetMatrixDiagonal
142 
143  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
145  Utilities_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
146  GetMatrixDiagonalInverse(const Matrix& A, Magnitude tol) {
147  Teuchos::TimeMonitor MM = *Teuchos::TimeMonitor::getNewTimer("Utilities_kokkos::GetMatrixDiagonalInverse");
148  // Some useful type definitions
149  using local_matrix_type = typename Matrix::local_matrix_type;
150  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
151  using value_type = typename local_matrix_type::value_type;
152  using ordinal_type = typename local_matrix_type::ordinal_type;
153  using execution_space = typename local_matrix_type::execution_space;
154  using memory_space = typename local_matrix_type::memory_space;
155  // Be careful with this one, if using Kokkos::ArithTraits<Scalar>
156  // you are likely to run into errors when handling std::complex<>
157  // a good way to work around that is to use the following:
158  // using KAT = Kokkos::ArithTraits<Kokkos::ArithTraits<Scalar>::val_type> >
159  // here we have: value_type = Kokkos::ArithTraits<Scalar>::val_type
160  using KAT = Kokkos::ArithTraits<value_type>;
161 
162  // Get/Create distributed objects
163  RCP<const Map> rowMap = A.getRowMap();
164  RCP<Vector> diag = VectorFactory::Build(rowMap,false);
165 
166  // Now generate local objects
167  local_matrix_type localMatrix = A.getLocalMatrix();
168  auto diagVals = diag->template getLocalView<memory_space>();
169 
170  ordinal_type numRows = localMatrix.graph.numRows();
171 
172  // Note: 2019-11-21, LBV
173  // This could be implemented with a TeamPolicy over the rows
174  // and a TeamVectorRange over the entries in a row if performance
175  // becomes more important here.
176  Kokkos::parallel_for("Utilities_kokkos::GetMatrixDiagonalInverse",
178  KOKKOS_LAMBDA(const ordinal_type rowIdx) {
179  bool foundDiagEntry = false;
180  auto myRow = localMatrix.rowConst(rowIdx);
181  for(ordinal_type entryIdx = 0; entryIdx < myRow.length; ++entryIdx) {
182  if(myRow.colidx(entryIdx) == rowIdx) {
183  foundDiagEntry = true;
184  if(KAT::magnitude(myRow.value(entryIdx)) > KAT::magnitude(tol)) {
185  diagVals(rowIdx, 0) = KAT::one() / myRow.value(entryIdx);
186  } else {
187  diagVals(rowIdx, 0) = KAT::zero();
188  }
189  break;
190  }
191  }
192 
193  if(!foundDiagEntry) {diagVals(rowIdx, 0) = KAT::zero();}
194  });
195 
196  return diag;
197  } //GetMatrixDiagonalInverse
198 
199  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
200  RCP<Xpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
201  Utilities_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
202  GetMatrixOverlappedDiagonal(const Matrix& A) {
203  // FIXME_KOKKOS
204  RCP<const Map> rowMap = A.getRowMap(), colMap = A.getColMap();
205  RCP<Vector> localDiag = VectorFactory::Build(rowMap);
206 
207  try {
208  const CrsMatrixWrap* crsOp = dynamic_cast<const CrsMatrixWrap*>(&A);
209  if (crsOp == NULL) {
210  throw Exceptions::RuntimeError("cast to CrsMatrixWrap failed");
211  }
213  crsOp->getLocalDiagOffsets(offsets);
214  crsOp->getLocalDiagCopy(*localDiag,offsets());
215  }
216  catch (...) {
217  ArrayRCP<SC> localDiagVals = localDiag->getDataNonConst(0);
218  Teuchos::ArrayRCP<SC> diagVals = GetMatrixDiagonal(A);
219  for (LO i = 0; i < localDiagVals.size(); i++)
220  localDiagVals[i] = diagVals[i];
221  localDiagVals = diagVals = null;
222  }
223 
224  RCP<Vector> diagonal = VectorFactory::Build(colMap);
225  RCP< const Import> importer;
226  importer = A.getCrsGraph()->getImporter();
227  if (importer == Teuchos::null) {
228  importer = ImportFactory::Build(rowMap, colMap);
229  }
230  diagonal->doImport(*localDiag, *(importer), Xpetra::INSERT);
231 
232  return diagonal;
233  } //GetMatrixOverlappedDiagonal
234 
235  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
236  void Utilities_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
237  MyOldScaleMatrix(Matrix& Op, const Teuchos::ArrayRCP<const SC>& scalingVector,
238  bool doInverse, bool doFillComplete, bool doOptimizeStorage)
239  {
241  Teuchos::ArrayRCP<SC> sv(scalingVector.size());
242  if (doInverse) {
243  for (int i = 0; i < scalingVector.size(); ++i)
244  sv[i] = one / scalingVector[i];
245  } else {
246  for (int i = 0; i < scalingVector.size(); ++i)
247  sv[i] = scalingVector[i];
248  }
249 
250  switch (Op.getRowMap()->lib()) {
251  case Xpetra::UseTpetra:
252  MyOldScaleMatrix_Tpetra(Op, sv, doFillComplete, doOptimizeStorage);
253  break;
254 
255  case Xpetra::UseEpetra:
256  // FIXME_KOKKOS
257  // Utils2_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MyOldScaleMatrix_Epetra(Op, sv, doFillComplete, doOptimizeStorage);
258  throw std::runtime_error("FIXME");
259 #ifndef __NVCC__ //prevent nvcc warning
260  break;
261 #endif
262 
263  default:
264  throw Exceptions::RuntimeError("Only Epetra and Tpetra matrices can be scaled.");
265 #ifndef __NVCC__ //prevent nvcc warning
266  break;
267 #endif
268  }
269  }
270 
271  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
272  void Utilities_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MyOldScaleMatrix_Epetra(Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>& /* Op */, const Teuchos::ArrayRCP<Scalar>& /* scalingVector */, bool /* doFillComplete */, bool /* doOptimizeStorage */) {
273  throw Exceptions::RuntimeError("MyOldScaleMatrix_Epetra: Epetra needs SC=double and LO=GO=int.");
274  }
275 
276  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
277  void Utilities_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MyOldScaleMatrix_Tpetra(Matrix& Op, const Teuchos::ArrayRCP<SC>& scalingVector,
278  bool doFillComplete,
279  bool doOptimizeStorage)
280  {
281 #ifdef HAVE_MUELU_TPETRA
282  try {
283  Tpetra::CrsMatrix<SC,LO,GO,NO>& tpOp = Op2NonConstTpetraCrs(Op);
284 
285  const RCP<const Tpetra::Map<LO,GO,NO> > rowMap = tpOp.getRowMap();
286  const RCP<const Tpetra::Map<LO,GO,NO> > domainMap = tpOp.getDomainMap();
287  const RCP<const Tpetra::Map<LO,GO,NO> > rangeMap = tpOp.getRangeMap();
288 
289  size_t maxRowSize = tpOp.getNodeMaxNumRowEntries();
290  if (maxRowSize == Teuchos::as<size_t>(-1)) // hasn't been determined yet
291  maxRowSize = 20;
292 
293  std::vector<SC> scaledVals(maxRowSize);
294  if (tpOp.isFillComplete())
295  tpOp.resumeFill();
296 
297  if (Op.isLocallyIndexed() == true) {
300 
301  for (size_t i = 0; i < rowMap->getNodeNumElements(); ++i) {
302  tpOp.getLocalRowView(i, cols, vals);
303  size_t nnz = tpOp.getNumEntriesInLocalRow(i);
304  if (nnz > maxRowSize) {
305  maxRowSize = nnz;
306  scaledVals.resize(maxRowSize);
307  }
308  for (size_t j = 0; j < nnz; ++j)
309  scaledVals[j] = vals[j]*scalingVector[i];
310 
311  if (nnz > 0) {
312  Teuchos::ArrayView<const SC> valview(&scaledVals[0], nnz);
313  tpOp.replaceLocalValues(i, cols, valview);
314  }
315  } //for (size_t i=0; ...
316 
317  } else {
320 
321  for (size_t i = 0; i < rowMap->getNodeNumElements(); ++i) {
322  GO gid = rowMap->getGlobalElement(i);
323  tpOp.getGlobalRowView(gid, cols, vals);
324  size_t nnz = tpOp.getNumEntriesInGlobalRow(gid);
325  if (nnz > maxRowSize) {
326  maxRowSize = nnz;
327  scaledVals.resize(maxRowSize);
328  }
329  // FIXME FIXME FIXME FIXME FIXME FIXME
330  for (size_t j = 0; j < nnz; ++j)
331  scaledVals[j] = vals[j]*scalingVector[i]; //FIXME i or gid?
332 
333  if (nnz > 0) {
334  Teuchos::ArrayView<const SC> valview(&scaledVals[0], nnz);
335  tpOp.replaceGlobalValues(gid, cols, valview);
336  }
337  } //for (size_t i=0; ...
338  }
339 
340  if (doFillComplete) {
341  if (domainMap == Teuchos::null || rangeMap == Teuchos::null)
342  throw Exceptions::RuntimeError("In Utils_kokkos::Scaling: cannot fillComplete because the domain and/or range map hasn't been defined");
343 
344  RCP<Teuchos::ParameterList> params = rcp(new Teuchos::ParameterList());
345  params->set("Optimize Storage", doOptimizeStorage);
346  params->set("No Nonlocal Changes", true);
347  Op.fillComplete(Op.getDomainMap(), Op.getRangeMap(), params);
348  }
349  } catch(...) {
350  throw Exceptions::RuntimeError("Only Tpetra::CrsMatrix types can be scaled (Err.1)");
351  }
352 #else
353  throw Exceptions::RuntimeError("Matrix scaling is not possible because Tpetra has not been enabled.");
354 #endif
355  } //MyOldScaleMatrix_Tpetra()
356 
357 
358  template <class SC, class LO, class GO, class NO>
360  DetectDirichletRows(const Xpetra::Matrix<SC,LO,GO,NO>& A,
361  const typename Teuchos::ScalarTraits<SC>::magnitudeType& tol,
362  const bool count_twos_as_dirichlet) {
363  using ATS = Kokkos::ArithTraits<SC>;
365 
366  auto localMatrix = A.getLocalMatrix();
367  LO numRows = A.getNodeNumRows();
368 
369  Kokkos::View<bool*, typename NO::device_type> boundaryNodes(Kokkos::ViewAllocateWithoutInitializing("boundaryNodes"), numRows);
370  if (count_twos_as_dirichlet)
371  Kokkos::parallel_for("MueLu:Utils::DetectDirichletRows_Twos_As_Dirichlet", range_type(0,numRows),
372  KOKKOS_LAMBDA(const LO row) {
373  auto rowView = localMatrix.row(row);
374  auto length = rowView.length;
375 
376  boundaryNodes(row) = true;
377  if (length > 2) {
378  decltype(length) colID;
379  for (colID = 0; colID < length; colID++)
380  if ((rowView.colidx(colID) != row) &&
381  (ATS::magnitude(rowView.value(colID)) > tol)) {
382  if (!boundaryNodes(row))
383  break;
384  boundaryNodes(row) = false;
385  }
386  if (colID == length)
387  boundaryNodes(row) = true;
388  }
389  });
390  else
391  Kokkos::parallel_for("MueLu:Utils::DetectDirichletRows", range_type(0,numRows),
392  KOKKOS_LAMBDA(const LO row) {
393  auto rowView = localMatrix.row(row);
394  auto length = rowView.length;
395 
396  boundaryNodes(row) = true;
397  for (decltype(length) colID = 0; colID < length; colID++)
398  if ((rowView.colidx(colID) != row) &&
399  (ATS::magnitude(rowView.value(colID)) > tol)) {
400  boundaryNodes(row) = false;
401  break;
402  }
403  });
404 
405  return boundaryNodes;
406  }
407 
408  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
411  DetectDirichletRows(const Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>& A, const typename Teuchos::ScalarTraits<Scalar>::magnitudeType& tol, const bool count_twos_as_dirichlet) {
412  return MueLu::DetectDirichletRows<Scalar,LocalOrdinal,GlobalOrdinal,Node>(A, tol, count_twos_as_dirichlet);
413  }
414 
415  template <class Node>
418  DetectDirichletRows(const Xpetra::Matrix<double,int,int,Node>& A, const typename Teuchos::ScalarTraits<double>::magnitudeType& tol, const bool count_twos_as_dirichlet) {
419  return MueLu::DetectDirichletRows<double,int,int,Node>(A, tol,count_twos_as_dirichlet);
420  }
421 
422 
423  template <class SC, class LO, class GO, class NO>
425  DetectDirichletCols(const Xpetra::Matrix<SC,LO,GO,NO>& A,
427  using ATS = Kokkos::ArithTraits<SC>;
428  using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
430 
431  SC zero = ATS::zero();
432  SC one = ATS::one();
433 
434  auto localMatrix = A.getLocalMatrix();
435  LO numRows = A.getNodeNumRows();
436 
437  Teuchos::RCP<const Xpetra::Map<LO,GO,NO> > domMap = A.getDomainMap();
438  Teuchos::RCP<const Xpetra::Map<LO,GO,NO> > colMap = A.getColMap();
439  Teuchos::RCP<Xpetra::MultiVector<SC,LO,GO,NO> > myColsToZero = Xpetra::MultiVectorFactory<SC,LO,GO,NO>::Build(colMap,1);
440  myColsToZero->putScalar(zero);
441  auto myColsToZeroView = myColsToZero->template getLocalView<typename NO::device_type>();
442  // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0
443  Kokkos::parallel_for("MueLu:Utils::DetectDirichletCols1", range_type(0,numRows),
444  KOKKOS_LAMBDA(const LO row) {
445  if (dirichletRows(row)) {
446  auto rowView = localMatrix.row(row);
447  auto length = rowView.length;
448 
449  for (decltype(length) colID = 0; colID < length; colID++)
450  myColsToZeroView(rowView.colidx(colID),0) = one;
451  }
452  });
453 
454  Teuchos::RCP<Xpetra::MultiVector<SC,LO,GO,NO> > globalColsToZero = Xpetra::MultiVectorFactory<SC,LO,GO,NO>::Build(domMap,1);
455  globalColsToZero->putScalar(zero);
456  Teuchos::RCP<Xpetra::Export<LO,GO,NO> > exporter = Xpetra::ExportFactory<LO,GO,NO>::Build(colMap,domMap);
457  // export to domain map
458  globalColsToZero->doExport(*myColsToZero,*exporter,Xpetra::ADD);
459  // import to column map
460  myColsToZero->doImport(*globalColsToZero,*exporter,Xpetra::INSERT);
461 
462  auto myCols = myColsToZero->template getLocalView<typename NO::device_type>();
463  size_t numColEntries = colMap->getNodeNumElements();
464  Kokkos::View<bool*, typename NO::device_type> dirichletCols(Kokkos::ViewAllocateWithoutInitializing("dirichletCols"), numColEntries);
465  const typename ATS::magnitudeType eps = 2.0*ATS::eps();
466 
467  Kokkos::parallel_for("MueLu:Utils::DetectDirichletCols2", range_type(0,numColEntries),
468  KOKKOS_LAMBDA (const size_t i) {
469  dirichletCols(i) = impl_ATS::magnitude(myCols(i,0))>eps;
470  });
471  return dirichletCols;
472  }
473 
474 
475  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
478  DetectDirichletCols(const Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>& A,
480  return MueLu::DetectDirichletCols<Scalar,LocalOrdinal,GlobalOrdinal,Node>(A, dirichletRows);
481  }
482 
483  template <class Node>
486  DetectDirichletCols(const Xpetra::Matrix<double,int,int,Node>& A,
488  return MueLu::DetectDirichletCols<double,int,int,Node>(A, dirichletRows);
489  }
490 
491 
492  // Zeros out rows
493  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
494  void
495  ZeroDirichletRows(RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >& A,
497  Scalar replaceWith) {
499 
500  auto localMatrix = A->getLocalMatrix();
501  LocalOrdinal numRows = A->getNodeNumRows();
502 
503  Kokkos::parallel_for("MueLu:Utils::ZeroDirichletRows", range_type(0,numRows),
504  KOKKOS_LAMBDA(const LocalOrdinal row) {
505  if (dirichletRows(row)) {
506  auto rowView = localMatrix.row(row);
507  auto length = rowView.length;
508  for (decltype(length) colID = 0; colID < length; colID++)
509  rowView.value(colID) = replaceWith;
510  }
511  });
512  }
513 
514  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
515  void
517  ZeroDirichletRows(RCP<Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> >& A,
519  Scalar replaceWith) {
520  MueLu::ZeroDirichletRows<Scalar,LocalOrdinal,GlobalOrdinal,Node>(A, dirichletRows, replaceWith);
521  }
522 
523  template <class Node>
524  void
526  ZeroDirichletRows(RCP<Xpetra::Matrix<double, int, int, Node> >& A,
528  double replaceWith) {
529  return MueLu::ZeroDirichletRows<double,int,int,Node>(A, dirichletRows, replaceWith);
530  }
531 
532 
533  // Zeros out rows
534  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
535  void
536  ZeroDirichletRows(RCP<Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >& X,
538  Scalar replaceWith) {
540  auto myCols = X->template getLocalView<typename Node::device_type>();
541  size_t numVecs = X->getNumVectors();
542  Kokkos::parallel_for("MueLu:Utils::ZeroDirichletRows_MV", range_type(0,dirichletRows.size()),
543  KOKKOS_LAMBDA(const size_t i) {
544  if (dirichletRows(i)) {
545  for(size_t j=0; j<numVecs; j++)
546  myCols(i,j) = replaceWith;
547  }
548  });
549  }
550 
551  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
552  void
554  ZeroDirichletRows(RCP<Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >& X,
556  Scalar replaceWith) {
557  MueLu::ZeroDirichletRows<Scalar,LocalOrdinal,GlobalOrdinal,Node>(X, dirichletRows, replaceWith);
558  }
559 
560  template <class Node>
561  void
563  ZeroDirichletRows(RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType, int, int, Node> >& X,
565  double replaceWith) {
566  return MueLu::ZeroDirichletRows<double,int,int,Node>(X, dirichletRows, replaceWith);
567  }
568 
569 
570  // Zeros out columns
571  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
572  void
573  ZeroDirichletCols(RCP<Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> >& A,
575  Scalar replaceWith) {
577 
578  auto localMatrix = A->getLocalMatrix();
579  LocalOrdinal numRows = A->getNodeNumRows();
580 
581  Kokkos::parallel_for("MueLu:Utils::ZeroDirichletCols", range_type(0,numRows),
582  KOKKOS_LAMBDA(const LocalOrdinal row) {
583  auto rowView = localMatrix.row(row);
584  auto length = rowView.length;
585  for (decltype(length) colID = 0; colID < length; colID++)
586  if (dirichletCols(rowView.colidx(colID))) {
587  rowView.value(colID) = replaceWith;
588  }
589  });
590  }
591 
592  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
593  void
595  ZeroDirichletCols(RCP<Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> >& A,
597  Scalar replaceWith) {
598  MueLu::ZeroDirichletCols<Scalar,LocalOrdinal,GlobalOrdinal,Node>(A, dirichletCols, replaceWith);
599  }
600 
601  template <class Node>
602  void
604  ZeroDirichletCols(RCP<Xpetra::Matrix<double,int,int,Node> >& A,
606  double replaceWith) {
607  return MueLu::ZeroDirichletCols<double,int,int,Node>(A, dirichletCols, replaceWith);
608  }
609 
610 
611  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
612  RCP<Xpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
613  Utilities_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
614  RealValuedToScalarMultiVector(RCP<RealValuedMultiVector > X) {
615  RCP<Xpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > Xscalar;
616 #if defined(HAVE_XPETRA_TPETRA) && (defined(HAVE_TPETRA_INST_COMPLEX_DOUBLE) || defined(HAVE_TPETRA_INST_COMPLEX_FLOAT))
618 
619  // Need to cast the real-valued multivector to Scalar=complex
620  if ((typeid(Scalar).name() == typeid(std::complex<double>).name()) ||
621  (typeid(Scalar).name() == typeid(std::complex<float>).name())) {
622  size_t numVecs = X->getNumVectors();
623  Xscalar = Xpetra::MultiVectorFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Build(X->getMap(),numVecs);
624  auto XVec = X->template getLocalView<typename Node::device_type>();
625  auto XVecScalar = Xscalar->template getLocalView<typename Node::device_type>();
626 
627  Kokkos::parallel_for("MueLu:Utils::RealValuedToScalarMultiVector", range_type(0,X->getLocalLength()),
628  KOKKOS_LAMBDA(const size_t i) {
629  for (size_t j=0; j<numVecs; j++)
630  XVecScalar(i,j) = XVec(i,j);
631  });
632  } else
633 #endif
634  Xscalar = rcp_dynamic_cast<Xpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >(X);
635  return Xscalar;
636  }
637 
638  template <class Node>
639  RCP<Xpetra::MultiVector<double,int,int,Node> >
640  Utilities_kokkos<double,int,int,Node>::
641  RealValuedToScalarMultiVector(RCP<Xpetra::MultiVector<Magnitude,int,int,Node> > X) {
642  return X;
643  }
644 
645 
646  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
647  Teuchos::RCP<Xpetra::Vector<LocalOrdinal,LocalOrdinal,GlobalOrdinal,Node> > ReverseCuthillMcKee(const Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Op) {
648  using local_matrix_type = typename Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_type;
649  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
650  using lno_nnz_view_t = typename local_graph_type::entries_type::non_const_type;
651  using device = typename local_graph_type::device_type;
652  using execution_space = typename local_matrix_type::execution_space;
653  using ordinal_type = typename local_matrix_type::ordinal_type;
654 
655  local_matrix_type localMatrix = Op.getLocalMatrix();
656  using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle<typename local_graph_type::size_type, LocalOrdinal,Scalar,
657  typename device::execution_space, typename device::memory_space,typename device::memory_space>;
658 
659  using rcm_t = KokkosSparse::Impl::RCM<KernelHandle, typename local_graph_type::row_map_type::const_type, typename local_graph_type::entries_type::non_const_type>;
660 
661  rcm_t rcm(localMatrix.numRows(), localMatrix.graph.row_map, localMatrix.graph.entries);
662  lno_nnz_view_t rcmOrder = rcm.rcm();
663 
665  Xpetra::VectorFactory<LocalOrdinal,LocalOrdinal,GlobalOrdinal,Node>::Build(Op.getRowMap());
666 
667  // Copy out and reorder data
668  auto view1D = Kokkos::subview(retval->template getLocalView<device>(),Kokkos::ALL (), 0);
669  Kokkos::parallel_for("Utilities_kokkos::ReverseCuthillMcKee",
670  Kokkos::RangePolicy<ordinal_type, execution_space>(0, localMatrix.numRows()),
671  KOKKOS_LAMBDA(const ordinal_type rowIdx) {
672  view1D(rcmOrder(rowIdx)) = rowIdx;
673  });
674  return retval;
675  }
676 
677  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
678  Teuchos::RCP<Xpetra::Vector<LocalOrdinal,LocalOrdinal,GlobalOrdinal,Node> > CuthillMcKee(const Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Op) {
679  using local_matrix_type = typename Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_type;
680  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
681  using lno_nnz_view_t = typename local_graph_type::entries_type::non_const_type;
682  using device = typename local_graph_type::device_type;
683  using execution_space = typename local_matrix_type::execution_space;
684  using ordinal_type = typename local_matrix_type::ordinal_type;
685 
686  local_matrix_type localMatrix = Op.getLocalMatrix();
687  using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle<typename local_graph_type::size_type, LocalOrdinal,Scalar,
688  typename device::execution_space, typename device::memory_space,typename device::memory_space>;
689 
690  using rcm_t = KokkosSparse::Impl::RCM<KernelHandle, typename local_graph_type::row_map_type::const_type, typename local_graph_type::entries_type::non_const_type>;
691 
692  rcm_t rcm(localMatrix.numRows(), localMatrix.graph.row_map, localMatrix.graph.entries);
693  lno_nnz_view_t rcmOrder = rcm.cuthill_mckee();
694 
696  Xpetra::VectorFactory<LocalOrdinal,LocalOrdinal,GlobalOrdinal,Node>::Build(Op.getRowMap());
697 
698  // Copy out data
699  auto view1D = Kokkos::subview(retval->template getLocalView<device>(),Kokkos::ALL (), 0);
700  Kokkos::parallel_for("Utilities_kokkos::ReverseCuthillMcKee",
701  Kokkos::RangePolicy<ordinal_type, execution_space>(0, localMatrix.numRows()),
702  KOKKOS_LAMBDA(const ordinal_type rowIdx) {
703  view1D(rcmOrder(rowIdx)) = rowIdx;
704  });
705  return retval;
706  }
707 
708  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
711  return MueLu::ReverseCuthillMcKee<Scalar,LocalOrdinal,GlobalOrdinal,Node>(Op);
712  }
713 
714  template <class Node>
717  return MueLu::ReverseCuthillMcKee<double,int,int,Node>(Op);
718  }
719 
720  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
723  return MueLu::CuthillMcKee<Scalar,LocalOrdinal,GlobalOrdinal,Node>(Op);
724  }
725 
726  template <class Node>
729  return MueLu::CuthillMcKee<double,int,int,Node>(Op);
730  }
731 
732  // Applies Ones-and-Zeros to matrix rows
733  // Takes a Boolean array.
734  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
735  void
736  ApplyOAZToMatrixRows(Teuchos::RCP<Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> >& A,
738  TEUCHOS_ASSERT(A->isFillComplete());
739  using ATS = Kokkos::ArithTraits<Scalar>;
740  using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
742 
743  RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > domMap = A->getDomainMap();
744  RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > ranMap = A->getRangeMap();
747 
748  TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == Rmap->getNodeNumElements());
749 
750  const Scalar one = impl_ATS::one();
751  const Scalar zero = impl_ATS::zero();
752 
753  auto localMatrix = A->getLocalMatrix();
754  auto localRmap = Rmap->getLocalMap();
755  auto localCmap = Cmap->getLocalMap();
756 
757  Kokkos::parallel_for("MueLu::Utils::ApplyOAZ",range_type(0,dirichletRows.extent(0)),
758  KOKKOS_LAMBDA(const LocalOrdinal row) {
759  if (dirichletRows(row)){
760  auto rowView = localMatrix.row(row);
761  auto length = rowView.length;
762  auto row_gid = localRmap.getGlobalElement(row);
763  auto row_lid = localCmap.getLocalElement(row_gid);
764 
765  for (decltype(length) colID = 0; colID < length; colID++)
766  if (rowView.colidx(colID) == row_lid)
767  rowView.value(colID) = one;
768  else
769  rowView.value(colID) = zero;
770  }
771  });
772  }
773 
774  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
775  void
777  ApplyOAZToMatrixRows(Teuchos::RCP<Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> >& A,
779  MueLu::ApplyOAZToMatrixRows<Scalar,LocalOrdinal,GlobalOrdinal,Node>(A, dirichletRows);
780  }
781 
782  template <class Node>
783  void
785  ApplyOAZToMatrixRows(Teuchos::RCP<Xpetra::Matrix<double,int,int,Node> >& A,
787  MueLu::ApplyOAZToMatrixRows<double,int,int,Node>(A, dirichletRows);
788  }
789 
790 } //namespace MueLu
791 
792 #define MUELU_UTILITIES_KOKKOS_SHORT
793 #endif // MUELU_UTILITIES_KOKKOS_DEF_HPP
MueLu::DefaultLocalOrdinal LocalOrdinal
Teuchos::RCP< Xpetra::Vector< LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node > > ReverseCuthillMcKee(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op)
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< std::is_integral< iType >::value, size_t >::type extent(const iType &r) const noexcept
size_type size() const
void ZeroDirichletCols(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletCols, Scalar replaceWith)
void ZeroDirichletRows(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletRows, Scalar replaceWith)
Teuchos::RCP< Xpetra::Vector< LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node > > CuthillMcKee(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op)
void ApplyOAZToMatrixRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletRows)
size_type size() const
MueLu::DefaultNode Node
static RCP< Time > getNewTimer(const std::string &name)
Kokkos::View< bool *, typename NO::device_type > DetectDirichletRows(const Xpetra::Matrix< SC, LO, GO, NO > &A, const typename Teuchos::ScalarTraits< SC >::magnitudeType &tol, const bool count_twos_as_dirichlet)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultScalar Scalar
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename std::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=nullptr)
#define TEUCHOS_ASSERT(assertion_test)
Kokkos::View< bool *, typename NO::device_type > DetectDirichletCols(const Xpetra::Matrix< SC, LO, GO, NO > &A, const Kokkos::View< const bool *, typename NO::device_type > &dirichletRows)