MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_UtilitiesBase_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_UTILITIESBASE_DEF_HPP
11 #define MUELU_UTILITIESBASE_DEF_HPP
12 
13 #include "MueLu_ConfigDefs.hpp"
14 
16 
17 #include <Kokkos_Core.hpp>
18 #include <KokkosSparse_CrsMatrix.hpp>
19 #include <KokkosSparse_getDiagCopy.hpp>
20 
21 #include <Xpetra_BlockedVector.hpp>
22 #include <Xpetra_BlockedMap.hpp>
23 #include <Xpetra_BlockedMultiVector.hpp>
24 #include <Xpetra_ExportFactory.hpp>
25 
26 #include <Xpetra_Import.hpp>
27 #include <Xpetra_ImportFactory.hpp>
28 #include <Xpetra_MatrixMatrix.hpp>
29 #include <Xpetra_CrsGraph.hpp>
31 #include <Xpetra_CrsMatrixWrap.hpp>
32 #include <Xpetra_StridedMap.hpp>
33 
34 #include "MueLu_Exceptions.hpp"
36 
37 #include <KokkosKernels_Handle.hpp>
38 #include <KokkosGraph_RCM.hpp>
39 
40 namespace MueLu {
41 
42 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
43 RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
46  if (Op.is_null())
47  return Teuchos::null;
48  return rcp(new CrsMatrixWrap(Op));
49 }
50 
51 template <class Scalar,
52  class LocalOrdinal,
53  class GlobalOrdinal,
54  class Node>
57  const typename Teuchos::ScalarTraits<Scalar>::magnitudeType threshold,
58  const bool keepDiagonal) {
60  using row_ptr_type = typename crs_matrix::local_graph_type::row_map_type::non_const_type;
61  using col_idx_type = typename crs_matrix::local_graph_type::entries_type::non_const_type;
62  using vals_type = typename crs_matrix::local_matrix_type::values_type;
63  using execution_space = typename crs_matrix::local_matrix_type::execution_space;
64 
65  using ATS = Kokkos::ArithTraits<Scalar>;
66  using impl_SC = typename ATS::val_type;
67  using impl_ATS = Kokkos::ArithTraits<impl_SC>;
68 
69  auto lclA = A->getLocalMatrixDevice();
70 
71  auto rowptr = row_ptr_type("rowptr", lclA.numRows() + 1);
72  col_idx_type idx;
73  vals_type vals;
74  LocalOrdinal nnz;
75 
76  if (keepDiagonal) {
77  auto lclRowMap = A->getRowMap()->getLocalMap();
78  auto lclColMap = A->getColMap()->getLocalMap();
79  Kokkos::parallel_scan(
80  "removeSmallEntries::rowptr",
81  Kokkos::RangePolicy<LocalOrdinal, execution_space>(0, lclA.numRows()),
82  KOKKOS_LAMBDA(const LocalOrdinal rlid, LocalOrdinal& partial_nnz, bool is_final) {
83  auto row = lclA.row(rlid);
84  auto rowInCol = lclColMap.getLocalElement(lclRowMap.getGlobalElement(rlid));
85  for (LocalOrdinal k = 0; k < row.length; ++k) {
86  if ((impl_ATS::magnitude(row.value(k)) > threshold) || (row.colidx(k) == rowInCol)) {
87  partial_nnz += 1;
88  }
89  }
90  if (is_final)
91  rowptr(rlid + 1) = partial_nnz;
92  },
93  nnz);
94 
95  idx = col_idx_type("idx", nnz);
96  vals = vals_type("vals", nnz);
97 
98  Kokkos::parallel_for(
99  "removeSmallEntries::indicesValues",
100  Kokkos::RangePolicy<LocalOrdinal, execution_space>(0, lclA.numRows()),
101  KOKKOS_LAMBDA(const LocalOrdinal rlid) {
102  auto row = lclA.row(rlid);
103  auto rowInCol = lclColMap.getLocalElement(lclRowMap.getGlobalElement(rlid));
104  auto I = rowptr(rlid);
105  for (LocalOrdinal k = 0; k < row.length; ++k) {
106  if ((impl_ATS::magnitude(row.value(k)) > threshold) || (row.colidx(k) == rowInCol)) {
107  idx(I) = row.colidx(k);
108  vals(I) = row.value(k);
109  I += 1;
110  }
111  }
112  });
113 
114  Kokkos::fence();
115  } else {
116  Kokkos::parallel_scan(
117  "removeSmallEntries::rowptr",
118  Kokkos::RangePolicy<LocalOrdinal, execution_space>(0, lclA.numRows()),
119  KOKKOS_LAMBDA(const LocalOrdinal rlid, LocalOrdinal& partial_nnz, bool is_final) {
120  auto row = lclA.row(rlid);
121  for (LocalOrdinal k = 0; k < row.length; ++k) {
122  if (impl_ATS::magnitude(row.value(k)) > threshold) {
123  partial_nnz += 1;
124  }
125  }
126  if (is_final)
127  rowptr(rlid + 1) = partial_nnz;
128  },
129  nnz);
130 
131  idx = col_idx_type("idx", nnz);
132  vals = vals_type("vals", nnz);
133 
134  Kokkos::parallel_for(
135  "removeSmallEntries::indicesValues",
136  Kokkos::RangePolicy<LocalOrdinal, execution_space>(0, lclA.numRows()),
137  KOKKOS_LAMBDA(const LocalOrdinal rlid) {
138  auto row = lclA.row(rlid);
139  auto I = rowptr(rlid);
140  for (LocalOrdinal k = 0; k < row.length; ++k) {
141  if (impl_ATS::magnitude(row.value(k)) > threshold) {
142  idx(I) = row.colidx(k);
143  vals(I) = row.value(k);
144  I += 1;
145  }
146  }
147  });
148 
149  Kokkos::fence();
150  }
151 
152  auto lclNewA = typename crs_matrix::local_matrix_type("thresholdedMatrix", lclA.numRows(), lclA.numCols(), nnz, vals, rowptr, idx);
153  auto newA = Xpetra::MatrixFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(lclNewA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap());
154 
155  return newA;
156 }
157 
158 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
159 RCP<Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
161  GetThresholdedMatrix(const RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& Ain, const typename Teuchos::ScalarTraits<Scalar>::magnitudeType threshold, const bool keepDiagonal, const GlobalOrdinal expectedNNZperRow) {
162  auto crsWrap = rcp_dynamic_cast<CrsMatrixWrap>(Ain);
163  if (!crsWrap.is_null()) {
164  auto crsMat = crsWrap->getCrsMatrix();
165  auto filteredMat = removeSmallEntries(crsMat, threshold, keepDiagonal);
166  return rcp_static_cast<CrsMatrixWrap>(filteredMat);
167  }
168 
169  RCP<const Map> rowmap = Ain->getRowMap();
170  RCP<const Map> colmap = Ain->getColMap();
171  RCP<CrsMatrixWrap> Aout = rcp(new CrsMatrixWrap(rowmap, expectedNNZperRow <= 0 ? Ain->getGlobalMaxNumRowEntries() : expectedNNZperRow));
172  // loop over local rows
173  for (size_t row = 0; row < Ain->getLocalNumRows(); row++) {
174  size_t nnz = Ain->getNumEntriesInLocalRow(row);
175 
178  Ain->getLocalRowView(row, indices, vals);
179 
180  TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(indices.size()) != nnz, Exceptions::RuntimeError, "MueLu::ThresholdAFilterFactory::Build: number of nonzeros not equal to number of indices? Error.");
181 
184  size_t nNonzeros = 0;
185  if (keepDiagonal) {
186  GlobalOrdinal glbRow = rowmap->getGlobalElement(row);
187  LocalOrdinal lclColIdx = colmap->getLocalElement(glbRow);
188  for (size_t i = 0; i < (size_t)indices.size(); i++) {
189  if (Teuchos::ScalarTraits<Scalar>::magnitude(vals[i]) > Teuchos::ScalarTraits<Scalar>::magnitude(threshold) || indices[i] == lclColIdx) {
190  indout[nNonzeros] = colmap->getGlobalElement(indices[i]); // LID -> GID (column)
191  valout[nNonzeros] = vals[i];
192  nNonzeros++;
193  }
194  }
195  } else
196  for (size_t i = 0; i < (size_t)indices.size(); i++) {
198  indout[nNonzeros] = colmap->getGlobalElement(indices[i]); // LID -> GID (column)
199  valout[nNonzeros] = vals[i];
200  nNonzeros++;
201  }
202  }
203 
204  indout.resize(nNonzeros);
205  valout.resize(nNonzeros);
206 
207  Aout->insertGlobalValues(Ain->getRowMap()->getGlobalElement(row), indout.view(0, indout.size()), valout.view(0, valout.size()));
208  }
209  Aout->fillComplete(Ain->getDomainMap(), Ain->getRangeMap());
210 
211  return Aout;
212 }
213 
214 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
218  using STS = Teuchos::ScalarTraits<Scalar>;
219  RCP<CrsGraph> sparsityPattern = CrsGraphFactory::Build(A->getRowMap(), expectedNNZperRow <= 0 ? A->getGlobalMaxNumRowEntries() : expectedNNZperRow);
220 
221  RCP<Vector> diag = GetMatrixOverlappedDiagonal(*A);
222  ArrayRCP<const Scalar> D = diag->getData(0);
223 
224  for (size_t row = 0; row < A->getLocalNumRows(); row++) {
227  A->getLocalRowView(row, indices, vals);
228 
229  GlobalOrdinal globalRow = A->getRowMap()->getGlobalElement(row);
230  LocalOrdinal col = A->getColMap()->getLocalElement(globalRow);
231 
232  const Scalar Dk = STS::magnitude(D[col]) > 0.0 ? STS::magnitude(D[col]) : 1.0;
233  Array<GlobalOrdinal> indicesNew;
234 
235  for (size_t i = 0; i < size_t(indices.size()); i++)
236  // keep diagonal per default
237  if (col == indices[i] || STS::magnitude(STS::squareroot(Dk) * vals[i] * STS::squareroot(Dk)) > STS::magnitude(threshold))
238  indicesNew.append(A->getColMap()->getGlobalElement(indices[i]));
239 
240  sparsityPattern->insertGlobalIndices(globalRow, ArrayView<const GlobalOrdinal>(indicesNew.data(), indicesNew.length()));
241  }
242  sparsityPattern->fillComplete();
243 
244  return sparsityPattern;
245 }
246 
247 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
251  size_t numRows = A.getRowMap()->getLocalNumElements();
252  Teuchos::ArrayRCP<Scalar> diag(numRows);
255  for (size_t i = 0; i < numRows; ++i) {
256  A.getLocalRowView(i, cols, vals);
257  LocalOrdinal j = 0;
258  for (; j < cols.size(); ++j) {
259  if (Teuchos::as<size_t>(cols[j]) == i) {
260  diag[i] = vals[j];
261  break;
262  }
263  }
264  if (j == cols.size()) {
265  // Diagonal entry is absent
267  }
268  }
269  return diag;
270 }
271 
272 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
275  GetMatrixDiagonal(const Matrix& A) {
276  const auto rowMap = A.getRowMap();
278 
279  const CrsMatrixWrap* crsOp = dynamic_cast<const CrsMatrixWrap*>(&A);
280  if ((crsOp != NULL) && (rowMap->lib() == Xpetra::UseTpetra)) {
281  using device_type = typename CrsGraph::device_type;
282  Kokkos::View<size_t*, device_type> offsets("offsets", rowMap->getLocalNumElements());
283  crsOp->getCrsGraph()->getLocalDiagOffsets(offsets);
284  crsOp->getCrsMatrix()->getLocalDiagCopy(*diag, offsets);
285  } else {
286  A.getLocalDiagCopy(*diag);
287  }
288 
289  return diag;
290 }
291 
292 // template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
293 // RCP<Xpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
294 // UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
295 // GetMatrixDiagonalInverse(const Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> & A, Magnitude tol, Scalar valReplacement) {
296 // Teuchos::TimeMonitor MM = *Teuchos::TimeMonitor::getNewTimer("UtilitiesBase::GetMatrixDiagonalInverse");
297 
298 // RCP<const Map> rowMap = A.getRowMap();
299 // RCP<Vector> diag = Xpetra::VectorFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Build(rowMap,true);
300 
301 // A.getLocalDiagCopy(*diag);
302 
303 // RCP<Vector> inv = MueLu::UtilitiesBase<Scalar,LocalOrdinal,GlobalOrdinal,Node>::GetInverse(diag, tol, valReplacement);
304 
305 // return inv;
306 // }
307 
308 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
313  Scalar valReplacement,
314  const bool doLumped) {
315  Teuchos::TimeMonitor MM = *Teuchos::TimeMonitor::getNewTimer("Utilities::GetMatrixDiagonalInverse");
316 
317  RCP<const BlockedCrsMatrix> bA = Teuchos::rcp_dynamic_cast<const BlockedCrsMatrix>(rcpFromRef(A));
318  if (!bA.is_null()) {
319  RCP<const Map> rowMap = A.getRowMap();
321  A.getLocalDiagCopy(*diag);
323  return inv;
324  }
325 
326  // Some useful type definitions
327  using local_matrix_type = typename Matrix::local_matrix_type;
328  // using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
329  using value_type = typename local_matrix_type::value_type;
330  using values_type = typename local_matrix_type::values_type;
331  using scalar_type = typename values_type::non_const_value_type;
332  using ordinal_type = typename local_matrix_type::ordinal_type;
333  using execution_space = typename local_matrix_type::execution_space;
334  // using memory_space = typename local_matrix_type::memory_space;
335  // Be careful with this one, if using Kokkos::ArithTraits<Scalar>
336  // you are likely to run into errors when handling std::complex<>
337  // a good way to work around that is to use the following:
338  // using KAT = Kokkos::ArithTraits<Kokkos::ArithTraits<Scalar>::val_type> >
339  // here we have: value_type = Kokkos::ArithTraits<Scalar>::val_type
340  using KAT = Kokkos::ArithTraits<value_type>;
341 
342  // Get/Create distributed objects
343  RCP<const Map> rowMap = A.getRowMap();
344  RCP<Vector> diag = VectorFactory::Build(rowMap, false);
345 
346  // Now generate local objects
347  local_matrix_type localMatrix = A.getLocalMatrixDevice();
348  auto diagVals = diag->getLocalViewDevice(Xpetra::Access::ReadWrite);
349 
350  ordinal_type numRows = localMatrix.graph.numRows();
351 
352  scalar_type valReplacement_dev = valReplacement;
353 
354  // Note: 2019-11-21, LBV
355  // This could be implemented with a TeamPolicy over the rows
356  // and a TeamVectorRange over the entries in a row if performance
357  // becomes more important here.
358  if (!doLumped)
359  Kokkos::parallel_for(
360  "Utilities::GetMatrixDiagonalInverse",
361  Kokkos::RangePolicy<ordinal_type, execution_space>(0, numRows),
362  KOKKOS_LAMBDA(const ordinal_type rowIdx) {
363  bool foundDiagEntry = false;
364  auto myRow = localMatrix.rowConst(rowIdx);
365  for (ordinal_type entryIdx = 0; entryIdx < myRow.length; ++entryIdx) {
366  if (myRow.colidx(entryIdx) == rowIdx) {
367  foundDiagEntry = true;
368  if (KAT::magnitude(myRow.value(entryIdx)) > KAT::magnitude(tol)) {
369  diagVals(rowIdx, 0) = KAT::one() / myRow.value(entryIdx);
370  } else {
371  diagVals(rowIdx, 0) = valReplacement_dev;
372  }
373  break;
374  }
375  }
376 
377  if (!foundDiagEntry) {
378  diagVals(rowIdx, 0) = KAT::zero();
379  }
380  });
381  else
382  Kokkos::parallel_for(
383  "Utilities::GetMatrixDiagonalInverse",
384  Kokkos::RangePolicy<ordinal_type, execution_space>(0, numRows),
385  KOKKOS_LAMBDA(const ordinal_type rowIdx) {
386  auto myRow = localMatrix.rowConst(rowIdx);
387  for (ordinal_type entryIdx = 0; entryIdx < myRow.length; ++entryIdx) {
388  diagVals(rowIdx, 0) += KAT::magnitude(myRow.value(entryIdx));
389  }
390  if (KAT::magnitude(diagVals(rowIdx, 0)) > KAT::magnitude(tol))
391  diagVals(rowIdx, 0) = KAT::one() / diagVals(rowIdx, 0);
392  else
393  diagVals(rowIdx, 0) = valReplacement_dev;
394  });
395 
396  return diag;
397 }
398 
399 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
403  Magnitude tol,
404  Scalar valReplacement,
405  const bool replaceSingleEntryRowWithZero,
406  const bool useAverageAbsDiagVal) {
407  typedef Teuchos::ScalarTraits<Scalar> TST;
408 
409  RCP<Vector> diag = Teuchos::null;
410  const Scalar zero = TST::zero();
411  const Scalar one = TST::one();
412  const Scalar two = one + one;
413 
414  Teuchos::RCP<const Matrix> rcpA = Teuchos::rcpFromRef(A);
415 
417  Teuchos::rcp_dynamic_cast<const Xpetra::BlockedCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(rcpA);
418  if (bA == Teuchos::null) {
419  RCP<const Map> rowMap = rcpA->getRowMap();
421 
422  if (rowMap->lib() == Xpetra::UnderlyingLib::UseTpetra) {
423  Teuchos::TimeMonitor MM = *Teuchos::TimeMonitor::getNewTimer("UtilitiesBase::GetLumpedMatrixDiagonal (Kokkos implementation)");
424  // Implement using Kokkos
425  using local_vector_type = typename Vector::dual_view_type::t_dev_um;
426  using local_matrix_type = typename Matrix::local_matrix_type;
427  using execution_space = typename local_vector_type::execution_space;
428  // using rowmap_type = typename local_matrix_type::row_map_type;
429  // using entries_type = typename local_matrix_type::index_type;
430  using values_type = typename local_matrix_type::values_type;
431  using scalar_type = typename values_type::non_const_value_type;
432  using mag_type = typename Kokkos::ArithTraits<scalar_type>::mag_type;
433  using KAT_S = typename Kokkos::ArithTraits<scalar_type>;
434  using KAT_M = typename Kokkos::ArithTraits<mag_type>;
435  using size_type = typename local_matrix_type::non_const_size_type;
436 
437  local_vector_type diag_dev = diag->getLocalViewDevice(Xpetra::Access::OverwriteAll);
438  local_matrix_type local_mat_dev = rcpA->getLocalMatrixDevice();
439  Kokkos::RangePolicy<execution_space, int> my_policy(0, static_cast<int>(diag_dev.extent(0)));
440  scalar_type valReplacement_dev = valReplacement;
441 
442  if (doReciprocal) {
443  Kokkos::View<int*, execution_space> nnzPerRow("nnz per rows", diag_dev.extent(0));
444  Kokkos::View<scalar_type*, execution_space> regSum("regSum", diag_dev.extent(0));
445  Kokkos::View<mag_type, execution_space> avgAbsDiagVal_dev("avgAbsDiagVal");
446  Kokkos::View<int, execution_space> numDiagsEqualToOne_dev("numDiagsEqualToOne");
447 
448  {
449  Teuchos::TimeMonitor MMM = *Teuchos::TimeMonitor::getNewTimer("GetLumpedMatrixDiagonal: parallel_for (doReciprocal)");
450  Kokkos::parallel_for(
451  "GetLumpedMatrixDiagonal", my_policy,
452  KOKKOS_LAMBDA(const int rowIdx) {
453  diag_dev(rowIdx, 0) = KAT_S::zero();
454  for (size_type entryIdx = local_mat_dev.graph.row_map(rowIdx);
455  entryIdx < local_mat_dev.graph.row_map(rowIdx + 1);
456  ++entryIdx) {
457  regSum(rowIdx) += local_mat_dev.values(entryIdx);
458  if (KAT_M::zero() < KAT_S::abs(local_mat_dev.values(entryIdx))) {
459  ++nnzPerRow(rowIdx);
460  }
461  diag_dev(rowIdx, 0) += KAT_S::abs(local_mat_dev.values(entryIdx));
462  if (rowIdx == local_mat_dev.graph.entries(entryIdx)) {
463  Kokkos::atomic_add(&avgAbsDiagVal_dev(), KAT_S::abs(local_mat_dev.values(entryIdx)));
464  }
465  }
466 
467  if (nnzPerRow(rowIdx) == 1 && KAT_S::magnitude(diag_dev(rowIdx, 0)) == KAT_M::one()) {
468  Kokkos::atomic_add(&numDiagsEqualToOne_dev(), 1);
469  }
470  });
471  }
472  if (useAverageAbsDiagVal) {
473  Teuchos::TimeMonitor MMM = *Teuchos::TimeMonitor::getNewTimer("GetLumpedMatrixDiagonal: useAverageAbsDiagVal");
474  typename Kokkos::View<mag_type, execution_space>::host_mirror_type avgAbsDiagVal = Kokkos::create_mirror_view(avgAbsDiagVal_dev);
475  Kokkos::deep_copy(avgAbsDiagVal, avgAbsDiagVal_dev);
476  int numDiagsEqualToOne;
477  Kokkos::deep_copy(numDiagsEqualToOne, numDiagsEqualToOne_dev);
478 
479  tol = TST::magnitude(100 * Teuchos::ScalarTraits<Scalar>::eps()) * (avgAbsDiagVal() - numDiagsEqualToOne) / (rowMap->getLocalNumElements() - numDiagsEqualToOne);
480  }
481 
482  {
483  Teuchos::TimeMonitor MMM = *Teuchos::TimeMonitor::getNewTimer("ComputeLumpedDiagonalInverse: parallel_for (doReciprocal)");
484  Kokkos::parallel_for(
485  "ComputeLumpedDiagonalInverse", my_policy,
486  KOKKOS_LAMBDA(const int rowIdx) {
487  if (replaceSingleEntryRowWithZero && nnzPerRow(rowIdx) <= 1) {
488  diag_dev(rowIdx, 0) = KAT_S::zero();
489  } else if ((diag_dev(rowIdx, 0) != KAT_S::zero()) && (KAT_S::magnitude(diag_dev(rowIdx, 0)) < KAT_S::magnitude(2 * regSum(rowIdx)))) {
490  diag_dev(rowIdx, 0) = KAT_S::one() / KAT_S::magnitude(2 * regSum(rowIdx));
491  } else {
492  if (KAT_S::magnitude(diag_dev(rowIdx, 0)) > tol) {
493  diag_dev(rowIdx, 0) = KAT_S::one() / diag_dev(rowIdx, 0);
494  } else {
495  diag_dev(rowIdx, 0) = valReplacement_dev;
496  }
497  }
498  });
499  }
500 
501  } else {
502  Teuchos::TimeMonitor MMM = *Teuchos::TimeMonitor::getNewTimer("GetLumpedMatrixDiagonal: parallel_for");
503  Kokkos::parallel_for(
504  "GetLumpedMatrixDiagonal", my_policy,
505  KOKKOS_LAMBDA(const int rowIdx) {
506  diag_dev(rowIdx, 0) = KAT_S::zero();
507  for (size_type entryIdx = local_mat_dev.graph.row_map(rowIdx);
508  entryIdx < local_mat_dev.graph.row_map(rowIdx + 1);
509  ++entryIdx) {
510  diag_dev(rowIdx, 0) += KAT_S::magnitude(local_mat_dev.values(entryIdx));
511  }
512  });
513  }
514  } else {
515  // Implement using Teuchos
516  Teuchos::TimeMonitor MMM = *Teuchos::TimeMonitor::getNewTimer("UtilitiesBase: GetLumpedMatrixDiagonal: (Teuchos implementation)");
517  ArrayRCP<Scalar> diagVals = diag->getDataNonConst(0);
518  Teuchos::Array<Scalar> regSum(diag->getLocalLength());
521 
522  std::vector<int> nnzPerRow(rowMap->getLocalNumElements());
523 
524  // FIXME 2021-10-22 JHU If this is called with doReciprocal=false, what should the correct behavior be? Currently,
525  // FIXME 2021-10-22 JHU the diagonal entry is set to be the sum of the absolute values of the row entries.
526 
527  const Magnitude zeroMagn = TST::magnitude(zero);
528  Magnitude avgAbsDiagVal = TST::magnitude(zero);
529  int numDiagsEqualToOne = 0;
530  for (size_t i = 0; i < rowMap->getLocalNumElements(); ++i) {
531  nnzPerRow[i] = 0;
532  rcpA->getLocalRowView(i, cols, vals);
533  diagVals[i] = zero;
534  for (LocalOrdinal j = 0; j < cols.size(); ++j) {
535  regSum[i] += vals[j];
536  const Magnitude rowEntryMagn = TST::magnitude(vals[j]);
537  if (rowEntryMagn > zeroMagn)
538  nnzPerRow[i]++;
539  diagVals[i] += rowEntryMagn;
540  if (static_cast<size_t>(cols[j]) == i)
541  avgAbsDiagVal += rowEntryMagn;
542  }
543  if (nnzPerRow[i] == 1 && TST::magnitude(diagVals[i]) == 1.)
544  numDiagsEqualToOne++;
545  }
546  if (useAverageAbsDiagVal)
547  tol = TST::magnitude(100 * Teuchos::ScalarTraits<Scalar>::eps()) * (avgAbsDiagVal - numDiagsEqualToOne) / (rowMap->getLocalNumElements() - numDiagsEqualToOne);
548  if (doReciprocal) {
549  for (size_t i = 0; i < rowMap->getLocalNumElements(); ++i) {
550  if (replaceSingleEntryRowWithZero && nnzPerRow[i] <= static_cast<int>(1))
551  diagVals[i] = zero;
552  else if ((diagVals[i] != zero) && (TST::magnitude(diagVals[i]) < TST::magnitude(two * regSum[i])))
553  diagVals[i] = one / TST::magnitude((two * regSum[i]));
554  else {
555  if (TST::magnitude(diagVals[i]) > tol)
556  diagVals[i] = one / diagVals[i];
557  else {
558  diagVals[i] = valReplacement;
559  }
560  }
561  }
562  }
563  }
564  } else {
566  "UtilitiesBase::GetLumpedMatrixDiagonal(): extracting reciprocal of diagonal of a blocked matrix is not supported");
567  diag = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(bA->getRangeMapExtractor()->getFullMap(), true);
568 
569  for (size_t row = 0; row < bA->Rows(); ++row) {
570  for (size_t col = 0; col < bA->Cols(); ++col) {
571  if (!bA->getMatrix(row, col).is_null()) {
572  // if we are in Thyra mode, but the block (row,row) is again a blocked operator, we have to use (pseudo) Xpetra-style GIDs with offset!
573  bool bThyraMode = bA->getRangeMapExtractor()->getThyraMode() && (Teuchos::rcp_dynamic_cast<Xpetra::BlockedCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(bA->getMatrix(row, col)) == Teuchos::null);
574  RCP<Vector> ddtemp = bA->getRangeMapExtractor()->ExtractVector(diag, row, bThyraMode);
575  RCP<const Vector> dd = GetLumpedMatrixDiagonal(*(bA->getMatrix(row, col)));
576  ddtemp->update(Teuchos::as<Scalar>(1.0), *dd, Teuchos::as<Scalar>(1.0));
577  bA->getRangeMapExtractor()->InsertVector(ddtemp, row, diag, bThyraMode);
578  }
579  }
580  }
581  }
582 
583  return diag;
584 }
585 
586 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
590  // Get/Create distributed objects
591  RCP<const Map> rowMap = A.getRowMap();
593 
594  // Implement using Kokkos
595  using local_vector_type = typename Vector::dual_view_type::t_dev_um;
596  using local_matrix_type = typename Matrix::local_matrix_type;
597  using execution_space = typename local_vector_type::execution_space;
598  using values_type = typename local_matrix_type::values_type;
599  using scalar_type = typename values_type::non_const_value_type;
600  using KAT_S = typename Kokkos::ArithTraits<scalar_type>;
601 
602  auto diag_dev = diag->getLocalViewDevice(Xpetra::Access::OverwriteAll);
603  auto local_mat_dev = A.getLocalMatrixDevice();
604  Kokkos::RangePolicy<execution_space, int> my_policy(0, static_cast<int>(diag_dev.extent(0)));
605 
606  Kokkos::parallel_for(
607  "GetMatrixMaxMinusOffDiagonal", my_policy,
608  KOKKOS_LAMBDA(const LocalOrdinal rowIdx) {
609  auto mymax = KAT_S::zero();
610  auto row = local_mat_dev.rowConst(rowIdx);
611  for (LocalOrdinal entryIdx = 0; entryIdx < row.length; ++entryIdx) {
612  if (rowIdx != row.colidx(entryIdx)) {
613  if (KAT_S::real(mymax) < -KAT_S::real(row.value(entryIdx)))
614  mymax = -KAT_S::real(row.value(entryIdx));
615  }
616  }
617  diag_dev(rowIdx, 0) = mymax;
618  });
619 
620  return diag;
621 }
622 
623 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
627  TEUCHOS_TEST_FOR_EXCEPTION(!A.getColMap()->isSameAs(*BlockNumber.getMap()), std::runtime_error, "GetMatrixMaxMinusOffDiagonal: BlockNumber must match's A's column map.");
628 
629  // Get/Create distributed objects
630  RCP<const Map> rowMap = A.getRowMap();
632 
633  // Implement using Kokkos
634  using local_vector_type = typename Vector::dual_view_type::t_dev_um;
635  using local_matrix_type = typename Matrix::local_matrix_type;
636  using execution_space = typename local_vector_type::execution_space;
637  using values_type = typename local_matrix_type::values_type;
638  using scalar_type = typename values_type::non_const_value_type;
639  using KAT_S = typename Kokkos::ArithTraits<scalar_type>;
640 
641  auto diag_dev = diag->getLocalViewDevice(Xpetra::Access::OverwriteAll);
642  auto local_mat_dev = A.getLocalMatrixDevice();
643  auto local_block_dev = BlockNumber.getLocalViewDevice(Xpetra::Access::ReadOnly);
644  Kokkos::RangePolicy<execution_space, int> my_policy(0, static_cast<int>(diag_dev.extent(0)));
645 
646  Kokkos::parallel_for(
647  "GetMatrixMaxMinusOffDiagonal", my_policy,
648  KOKKOS_LAMBDA(const LocalOrdinal rowIdx) {
649  auto mymax = KAT_S::zero();
650  auto row = local_mat_dev.row(rowIdx);
651  for (LocalOrdinal entryIdx = 0; entryIdx < row.length; ++entryIdx) {
652  if ((rowIdx != row.colidx(entryIdx)) && (local_block_dev(rowIdx, 0) == local_block_dev(row.colidx(entryIdx), 0))) {
653  if (KAT_S::real(mymax) < -KAT_S::real(row.value(entryIdx)))
654  mymax = -KAT_S::real(row.value(entryIdx));
655  }
656  }
657  diag_dev(rowIdx, 0) = mymax;
658  });
659 
660  return diag;
661 }
662 
663 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
668 
669  // check whether input vector "v" is a BlockedVector
670  RCP<const BlockedVector> bv = Teuchos::rcp_dynamic_cast<const BlockedVector>(v);
671  if (bv.is_null() == false) {
672  RCP<BlockedVector> bret = Teuchos::rcp_dynamic_cast<BlockedVector>(ret);
673  TEUCHOS_TEST_FOR_EXCEPTION(bret.is_null() == true, MueLu::Exceptions::RuntimeError, "MueLu::UtilitiesBase::GetInverse: return vector should be of type BlockedVector");
674  RCP<const BlockedMap> bmap = bv->getBlockedMap();
675  for (size_t r = 0; r < bmap->getNumMaps(); ++r) {
676  RCP<const MultiVector> submvec = bv->getMultiVector(r, bmap->getThyraMode());
677  RCP<const Vector> subvec = submvec->getVector(0);
679  bret->setMultiVector(r, subvecinf, bmap->getThyraMode());
680  }
681  return ret;
682  }
683 
684  // v is an {Epetra,Tpetra}Vector: work with the underlying raw data
685  ArrayRCP<Scalar> retVals = ret->getDataNonConst(0);
686  ArrayRCP<const Scalar> inputVals = v->getData(0);
687  for (size_t i = 0; i < v->getMap()->getLocalNumElements(); ++i) {
688  if (Teuchos::ScalarTraits<Scalar>::magnitude(inputVals[i]) > tol)
689  retVals[i] = Teuchos::ScalarTraits<Scalar>::one() / inputVals[i];
690  else
691  retVals[i] = valReplacement;
692  }
693  return ret;
694 }
695 
696 // template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
697 // RCP<Xpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
698 // UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
699 // GetMatrixOverlappedDiagonal(const Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> & A) {
700 // RCP<const Map> rowMap = A.getRowMap(), colMap = A.getColMap();
701 
702 // // Undo block map (if we have one)
703 // RCP<const BlockedMap> browMap = Teuchos::rcp_dynamic_cast<const BlockedMap>(rowMap);
704 // if(!browMap.is_null()) rowMap = browMap->getMap();
705 
706 // RCP<Vector> localDiag = Xpetra::VectorFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Build(rowMap);
707 // try {
708 // const CrsMatrixWrap* crsOp = dynamic_cast<const CrsMatrixWrap*>(&A);
709 // if (crsOp == NULL) {
710 // throw Exceptions::RuntimeError("cast to CrsMatrixWrap failed");
711 // }
712 // Teuchos::ArrayRCP<size_t> offsets;
713 // crsOp->getLocalDiagOffsets(offsets);
714 // crsOp->getLocalDiagCopy(*localDiag,offsets());
715 // }
716 // catch (...) {
717 // ArrayRCP<Scalar> localDiagVals = localDiag->getDataNonConst(0);
718 // Teuchos::ArrayRCP<Scalar> diagVals = GetMatrixDiagonal(A);
719 // for (LocalOrdinal i = 0; i < localDiagVals.size(); i++)
720 // localDiagVals[i] = diagVals[i];
721 // localDiagVals = diagVals = null;
722 // }
723 
724 // RCP<Vector> diagonal = Xpetra::VectorFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Build(colMap);
725 // RCP< const Xpetra::Import<LocalOrdinal,GlobalOrdinal,Node> > importer;
726 // importer = A.getCrsGraph()->getImporter();
727 // if (importer == Teuchos::null) {
728 // importer = Xpetra::ImportFactory<LocalOrdinal,GlobalOrdinal,Node>::Build(rowMap, colMap);
729 // }
730 // diagonal->doImport(*localDiag, *(importer), Xpetra::INSERT);
731 // return diagonal;
732 // }
733 
734 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
738  RCP<const Map> rowMap = A.getRowMap(), colMap = A.getColMap();
739  RCP<Vector> localDiag = GetMatrixDiagonal(A);
740  RCP<const Import> importer = A.getCrsGraph()->getImporter();
741  if (importer.is_null() && !rowMap->isSameAs(*colMap)) {
742  importer = ImportFactory::Build(rowMap, colMap);
743  }
744  if (!importer.is_null()) {
745  RCP<Vector> diagonal = VectorFactory::Build(colMap);
746  diagonal->doImport(*localDiag, *(importer), Xpetra::INSERT);
747  return diagonal;
748  } else
749  return localDiag;
750 }
751 
752 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
756  using STS = typename Teuchos::ScalarTraits<SC>;
757 
758  // Undo block map (if we have one)
759  RCP<const Map> rowMap = A.getRowMap(), colMap = A.getColMap();
760  RCP<const BlockedMap> browMap = Teuchos::rcp_dynamic_cast<const BlockedMap>(rowMap);
761  if (!browMap.is_null()) rowMap = browMap->getMap();
762 
765  ArrayRCP<SC> localVals = local->getDataNonConst(0);
766 
767  for (LO row = 0; row < static_cast<LO>(A.getRowMap()->getLocalNumElements()); ++row) {
768  size_t nnz = A.getNumEntriesInLocalRow(row);
769  ArrayView<const LO> indices;
770  ArrayView<const SC> vals;
771  A.getLocalRowView(row, indices, vals);
772 
773  SC si = STS::zero();
774 
775  for (LO colID = 0; colID < static_cast<LO>(nnz); colID++) {
776  if (indices[colID] != row) {
777  si += vals[colID];
778  }
779  }
780  localVals[row] = si;
781  }
782 
784  importer = A.getCrsGraph()->getImporter();
785  if (importer == Teuchos::null) {
786  importer = Xpetra::ImportFactory<LO, GO, Node>::Build(rowMap, colMap);
787  }
788  ghosted->doImport(*local, *(importer), Xpetra::INSERT);
789  return ghosted;
790 }
791 
792 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
796  RCP<const Map> rowMap = A.getRowMap(), colMap = A.getColMap();
797  using STS = typename Teuchos::ScalarTraits<Scalar>;
798  using MTS = typename Teuchos::ScalarTraits<Magnitude>;
799  using MT = Magnitude;
800  using RealValuedVector = Xpetra::Vector<MT, LO, GO, Node>;
801 
802  // Undo block map (if we have one)
803  RCP<const BlockedMap> browMap = Teuchos::rcp_dynamic_cast<const BlockedMap>(rowMap);
804  if (!browMap.is_null()) rowMap = browMap->getMap();
805 
808  ArrayRCP<MT> localVals = local->getDataNonConst(0);
809 
810  for (LO rowIdx = 0; rowIdx < static_cast<LO>(A.getRowMap()->getLocalNumElements()); ++rowIdx) {
811  size_t nnz = A.getNumEntriesInLocalRow(rowIdx);
812  ArrayView<const LO> indices;
813  ArrayView<const SC> vals;
814  A.getLocalRowView(rowIdx, indices, vals);
815 
816  MT si = MTS::zero();
817 
818  for (LO colID = 0; colID < static_cast<LO>(nnz); ++colID) {
819  if (indices[colID] != rowIdx) {
820  si += STS::magnitude(vals[colID]);
821  }
822  }
823  localVals[rowIdx] = si;
824  }
825 
827  importer = A.getCrsGraph()->getImporter();
828  if (importer == Teuchos::null) {
829  importer = Xpetra::ImportFactory<LO, GO, Node>::Build(rowMap, colMap);
830  }
831  ghosted->doImport(*local, *(importer), Xpetra::INSERT);
832  return ghosted;
833 }
834 
835 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
838  ResidualNorm(const Xpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op, const MultiVector& X, const MultiVector& RHS) {
839  TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != RHS.getNumVectors(), Exceptions::RuntimeError, "Number of solution vectors != number of right-hand sides")
840  const size_t numVecs = X.getNumVectors();
841  RCP<MultiVector> RES = Residual(Op, X, RHS);
842  Teuchos::Array<Magnitude> norms(numVecs);
843  RES->norm2(norms);
844  return norms;
845 }
846 
847 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
850  ResidualNorm(const Xpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op, const MultiVector& X, const MultiVector& RHS, MultiVector& Resid) {
851  TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != RHS.getNumVectors(), Exceptions::RuntimeError, "Number of solution vectors != number of right-hand sides")
852  const size_t numVecs = X.getNumVectors();
853  Residual(Op, X, RHS, Resid);
854  Teuchos::Array<Magnitude> norms(numVecs);
855  Resid.norm2(norms);
856  return norms;
857 }
858 
859 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
862  Residual(const Xpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op, const MultiVector& X, const MultiVector& RHS) {
863  TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != RHS.getNumVectors(), Exceptions::RuntimeError, "Number of solution vectors != number of right-hand sides")
864  const size_t numVecs = X.getNumVectors();
865  // TODO Op.getRangeMap should return a BlockedMap if it is a BlockedCrsOperator
866  RCP<MultiVector> RES = Xpetra::MultiVectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(RHS.getMap(), numVecs, false); // no need to initialize to zero
867  Op.residual(X, RHS, *RES);
868  return RES;
869 }
870 
871 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
873  Residual(const Xpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op, const MultiVector& X, const MultiVector& RHS, MultiVector& Resid) {
874  TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != RHS.getNumVectors(), Exceptions::RuntimeError, "Number of solution vectors != number of right-hand sides");
875  TEUCHOS_TEST_FOR_EXCEPTION(Resid.getNumVectors() != RHS.getNumVectors(), Exceptions::RuntimeError, "Number of residual vectors != number of right-hand sides");
876  Op.residual(X, RHS, Resid);
877 }
878 
879 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
880 Scalar
882  PowerMethod(const Matrix& A, bool scaleByDiag,
883  LocalOrdinal niters, typename Teuchos::ScalarTraits<Scalar>::magnitudeType tolerance, bool verbose, unsigned int seed) {
884  TEUCHOS_TEST_FOR_EXCEPTION(!(A.getRangeMap()->isSameAs(*(A.getDomainMap()))), Exceptions::Incompatible,
885  "Utils::PowerMethod: operator must have domain and range maps that are equivalent.");
886 
887  // power iteration
888  RCP<Vector> diagInvVec;
889  if (scaleByDiag) {
890  diagInvVec = GetMatrixDiagonalInverse(A);
891  }
892 
893  Scalar lambda = PowerMethod(A, diagInvVec, niters, tolerance, verbose, seed);
894  return lambda;
895 }
896 
897 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
898 Scalar
900  PowerMethod(const Matrix& A, const RCP<Vector>& diagInvVec,
901  LocalOrdinal niters, typename Teuchos::ScalarTraits<Scalar>::magnitudeType tolerance, bool verbose, unsigned int seed) {
902  TEUCHOS_TEST_FOR_EXCEPTION(!(A.getRangeMap()->isSameAs(*(A.getDomainMap()))), Exceptions::Incompatible,
903  "Utils::PowerMethod: operator must have domain and range maps that are equivalent.");
904 
905  // Create three vectors, fill z with random numbers
909 
910  z->setSeed(seed); // seed random number generator
911  z->randomize(true); // use Xpetra implementation: -> same results for Epetra and Tpetra
912 
913  Teuchos::Array<Magnitude> norms(1);
914 
915  typedef Teuchos::ScalarTraits<Scalar> STS;
916 
917  const Scalar zero = STS::zero(), one = STS::one();
918 
919  Scalar lambda = zero;
920  Magnitude residual = STS::magnitude(zero);
921 
922  // power iteration
923  for (int iter = 0; iter < niters; ++iter) {
924  z->norm2(norms); // Compute 2-norm of z
925  q->update(one / norms[0], *z, zero); // Set q = z / normz
926  A.apply(*q, *z); // Compute z = A*q
927  if (diagInvVec != Teuchos::null)
928  z->elementWiseMultiply(one, *diagInvVec, *z, zero);
929  lambda = q->dot(*z); // Approximate maximum eigenvalue: lamba = dot(q,z)
930 
931  if (iter % 100 == 0 || iter + 1 == niters) {
932  r->update(1.0, *z, -lambda, *q, zero); // Compute A*q - lambda*q
933  r->norm2(norms);
934  residual = STS::magnitude(norms[0] / lambda);
935  if (verbose) {
936  std::cout << "Iter = " << iter
937  << " Lambda = " << lambda
938  << " Residual of A*q - lambda*q = " << residual
939  << std::endl;
940  }
941  }
942  if (residual < tolerance)
943  break;
944  }
945  return lambda;
946 }
947 
948 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
949 RCP<Teuchos::FancyOStream>
951  MakeFancy(std::ostream& os) {
952  RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(os));
953  return fancy;
954 }
955 
956 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
960  const size_t numVectors = v.size();
961 
963  for (size_t j = 0; j < numVectors; j++) {
964  d += (v[j][i0] - v[j][i1]) * (v[j][i0] - v[j][i1]);
965  }
967 }
968 
969 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
973  const size_t numVectors = v.size();
975 
977  for (size_t j = 0; j < numVectors; j++) {
978  d += Teuchos::as<MT>(weight[j]) * (v[j][i0] - v[j][i1]) * (v[j][i0] - v[j][i1]);
979  }
981 }
982 
983 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
987  LocalOrdinal numRows = A.getLocalNumRows();
988  typedef Teuchos::ScalarTraits<Scalar> STS;
989  ArrayRCP<bool> boundaryNodes(numRows, true);
990  if (count_twos_as_dirichlet) {
991  for (LocalOrdinal row = 0; row < numRows; row++) {
994  A.getLocalRowView(row, indices, vals);
995  size_t nnz = A.getNumEntriesInLocalRow(row);
996  if (nnz > 2) {
997  size_t col;
998  for (col = 0; col < nnz; col++)
999  if ((indices[col] != row) && STS::magnitude(vals[col]) > tol) {
1000  if (!boundaryNodes[row])
1001  break;
1002  boundaryNodes[row] = false;
1003  }
1004  if (col == nnz)
1005  boundaryNodes[row] = true;
1006  }
1007  }
1008  } else {
1009  for (LocalOrdinal row = 0; row < numRows; row++) {
1012  A.getLocalRowView(row, indices, vals);
1013  size_t nnz = A.getNumEntriesInLocalRow(row);
1014  if (nnz > 1)
1015  for (size_t col = 0; col < nnz; col++)
1016  if ((indices[col] != row) && STS::magnitude(vals[col]) > tol) {
1017  boundaryNodes[row] = false;
1018  break;
1019  }
1020  }
1021  }
1022  return boundaryNodes;
1023 }
1024 
1025 template <class CrsMatrix>
1026 KOKKOS_FORCEINLINE_FUNCTION bool isDirichletRow(typename CrsMatrix::ordinal_type rowId,
1027  KokkosSparse::SparseRowViewConst<CrsMatrix>& row,
1028  const typename Kokkos::ArithTraits<typename CrsMatrix::value_type>::magnitudeType& tol,
1029  const bool count_twos_as_dirichlet) {
1030  using ATS = Kokkos::ArithTraits<typename CrsMatrix::value_type>;
1031 
1032  auto length = row.length;
1033  bool boundaryNode = true;
1034 
1035  if (count_twos_as_dirichlet) {
1036  if (length > 2) {
1037  decltype(length) colID = 0;
1038  for (; colID < length; colID++)
1039  if ((row.colidx(colID) != rowId) &&
1040  (ATS::magnitude(row.value(colID)) > tol)) {
1041  if (!boundaryNode)
1042  break;
1043  boundaryNode = false;
1044  }
1045  if (colID == length)
1046  boundaryNode = true;
1047  }
1048  } else {
1049  for (decltype(length) colID = 0; colID < length; colID++)
1050  if ((row.colidx(colID) != rowId) &&
1051  (ATS::magnitude(row.value(colID)) > tol)) {
1052  boundaryNode = false;
1053  break;
1054  }
1055  }
1056  return boundaryNode;
1057 }
1058 
1059 template <class SC, class LO, class GO, class NO, class memory_space>
1060 Kokkos::View<bool*, memory_space>
1062  const typename Teuchos::ScalarTraits<SC>::magnitudeType& tol,
1063  const bool count_twos_as_dirichlet) {
1064  using impl_scalar_type = typename Kokkos::ArithTraits<SC>::val_type;
1065  using ATS = Kokkos::ArithTraits<impl_scalar_type>;
1066  using range_type = Kokkos::RangePolicy<LO, typename NO::execution_space>;
1067  using helpers = Xpetra::Helpers<SC, LO, GO, NO>;
1068 
1069  Kokkos::View<bool*, typename NO::device_type::memory_space> boundaryNodes;
1070 
1071  if (helpers::isTpetraBlockCrs(A)) {
1072  const Tpetra::BlockCrsMatrix<SC, LO, GO, NO>& Am = toTpetraBlock(A);
1073  auto b_graph = Am.getCrsGraph().getLocalGraphDevice();
1074  auto b_rowptr = Am.getCrsGraph().getLocalRowPtrsDevice();
1075  auto values = Am.getValuesDevice();
1076  LO numBlockRows = Am.getLocalNumRows();
1077  const LO stride = Am.getBlockSize() * Am.getBlockSize();
1078 
1079  boundaryNodes = Kokkos::View<bool*, typename NO::device_type::memory_space>(Kokkos::ViewAllocateWithoutInitializing("boundaryNodes"), numBlockRows);
1080 
1081  if (count_twos_as_dirichlet)
1082  throw Exceptions::RuntimeError("BlockCrs does not support counting twos as Dirichlet");
1083 
1084  Kokkos::parallel_for(
1085  "MueLu:Utils::DetectDirichletRowsBlockCrs", range_type(0, numBlockRows),
1086  KOKKOS_LAMBDA(const LO row) {
1087  auto rowView = b_graph.rowConst(row);
1088  auto length = rowView.length;
1089  LO valstart = b_rowptr[row] * stride;
1090 
1091  boundaryNodes(row) = true;
1092  decltype(length) colID = 0;
1093  for (; colID < length; colID++) {
1094  if (rowView.colidx(colID) != row) {
1095  LO current = valstart + colID * stride;
1096  for (LO k = 0; k < stride; k++) {
1097  if (ATS::magnitude(values[current + k]) > tol) {
1098  boundaryNodes(row) = false;
1099  break;
1100  }
1101  }
1102  }
1103  if (boundaryNodes(row) == false)
1104  break;
1105  }
1106  });
1107  } else {
1108  auto localMatrix = A.getLocalMatrixDevice();
1109  LO numRows = A.getLocalNumRows();
1110  boundaryNodes = Kokkos::View<bool*, typename NO::device_type::memory_space>(Kokkos::ViewAllocateWithoutInitializing("boundaryNodes"), numRows);
1111 
1112  Kokkos::parallel_for(
1113  "MueLu:Utils::DetectDirichletRows", range_type(0, numRows),
1114  KOKKOS_LAMBDA(const LO row) {
1115  auto rowView = localMatrix.rowConst(row);
1116  boundaryNodes(row) = isDirichletRow(row, rowView, tol, count_twos_as_dirichlet);
1117  });
1118  }
1119  if constexpr (std::is_same<memory_space, typename NO::device_type::memory_space>::value)
1120  return boundaryNodes;
1121  else {
1122  Kokkos::View<bool*, memory_space> boundaryNodes2(Kokkos::ViewAllocateWithoutInitializing("boundaryNodes"), boundaryNodes.extent(0));
1123  Kokkos::deep_copy(boundaryNodes2, boundaryNodes);
1124  return boundaryNodes2;
1125  }
1126  // CAG: No idea why this is needed to avoid "warning: missing return statement at end of non-void function"
1127  Kokkos::View<bool*, memory_space> dummy("dummy", 0);
1128  return dummy;
1129 }
1130 
1131 template <class SC, class LO, class GO, class NO>
1132 Kokkos::View<bool*, typename NO::device_type::memory_space>
1135  const typename Teuchos::ScalarTraits<SC>::magnitudeType& tol,
1136  const bool count_twos_as_dirichlet) {
1137  return MueLu::DetectDirichletRows_kokkos<SC, LO, GO, NO, typename NO::device_type::memory_space>(A, tol, count_twos_as_dirichlet);
1138 }
1139 
1140 template <class SC, class LO, class GO, class NO>
1141 Kokkos::View<bool*, typename Kokkos::HostSpace>
1144  const typename Teuchos::ScalarTraits<SC>::magnitudeType& tol,
1145  const bool count_twos_as_dirichlet) {
1146  return MueLu::DetectDirichletRows_kokkos<SC, LO, GO, NO, typename Kokkos::HostSpace>(A, tol, count_twos_as_dirichlet);
1147 }
1148 
1149 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1153  // assume that there is no zero diagonal in matrix
1154  bHasZeroDiagonal = false;
1155 
1157  A.getLocalDiagCopy(*diagVec);
1158  Teuchos::ArrayRCP<const Scalar> diagVecData = diagVec->getData(0);
1159 
1160  LocalOrdinal numRows = A.getLocalNumRows();
1161  typedef Teuchos::ScalarTraits<Scalar> STS;
1162  ArrayRCP<bool> boundaryNodes(numRows, false);
1163  for (LocalOrdinal row = 0; row < numRows; row++) {
1166  A.getLocalRowView(row, indices, vals);
1167  size_t nnz = 0; // collect nonzeros in row (excluding the diagonal)
1168  bool bHasDiag = false;
1169  for (decltype(indices.size()) col = 0; col < indices.size(); col++) {
1170  if (indices[col] != row) {
1171  if (STS::magnitude(vals[col] / STS::magnitude(sqrt(STS::magnitude(diagVecData[row]) * STS::magnitude(diagVecData[col])))) > tol) {
1172  nnz++;
1173  }
1174  } else
1175  bHasDiag = true; // found a diagonal entry
1176  }
1177  if (bHasDiag == false)
1178  bHasZeroDiagonal = true; // we found at least one row without a diagonal
1179  else if (nnz == 0)
1180  boundaryNodes[row] = true;
1181  }
1182  return boundaryNodes;
1183 }
1184 
1185 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1190  const typename Teuchos::ScalarTraits<SC>::magnitudeType& tol,
1191  const bool count_twos_as_dirichlet) {
1192  using range_type = Kokkos::RangePolicy<LO, typename Node::execution_space>;
1193 
1194  LocalOrdinal numRows = A.getLocalNumRows();
1195  LocalOrdinal numVectors = RHS.getNumVectors();
1196  TEUCHOS_ASSERT_EQUALITY(numVectors, Teuchos::as<LocalOrdinal>(InitialGuess.getNumVectors()));
1197 #ifdef MUELU_DEBUG
1198  TEUCHOS_ASSERT(RHS.getMap()->isCompatible(InitialGuess.getMap()));
1199 #endif
1200 
1201  auto lclRHS = RHS.getLocalViewDevice(Xpetra::Access::ReadOnly);
1202  auto lclInitialGuess = InitialGuess.getLocalViewDevice(Xpetra::Access::ReadWrite);
1203  auto lclA = A.getLocalMatrixDevice();
1204 
1205  Kokkos::parallel_for(
1206  "MueLu:Utils::EnforceInitialCondition", range_type(0, numRows),
1207  KOKKOS_LAMBDA(const LO i) {
1208  auto row = lclA.rowConst(i);
1209  if (isDirichletRow(i, row, tol, count_twos_as_dirichlet)) {
1210  for (LocalOrdinal j = 0; j < numVectors; ++j)
1211  lclInitialGuess(i, j) = lclRHS(i, j);
1212  }
1213  });
1214 }
1215 
1216 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1219  Teuchos::ArrayRCP<bool> nonzeros) {
1220  TEUCHOS_ASSERT(vals.size() == nonzeros.size());
1221  typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType magnitudeType;
1222  const magnitudeType eps = 2.0 * Teuchos::ScalarTraits<magnitudeType>::eps();
1223  for (size_t i = 0; i < static_cast<size_t>(vals.size()); i++) {
1224  nonzeros[i] = (Teuchos::ScalarTraits<Scalar>::magnitude(vals[i]) > eps);
1225  }
1226 }
1227 
1228 // Find Nonzeros in a device view
1229 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1232  Kokkos::View<bool*, typename Node::device_type> nonzeros) {
1233  using ATS = Kokkos::ArithTraits<Scalar>;
1234  using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
1235  using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1236  TEUCHOS_ASSERT(vals.extent(0) == nonzeros.extent(0));
1237  const typename ATS::magnitudeType eps = 2.0 * impl_ATS::eps();
1238 
1239  Kokkos::parallel_for(
1240  "MueLu:Maxwell1::FindNonZeros", range_type(0, vals.extent(0)),
1241  KOKKOS_LAMBDA(const size_t i) {
1242  nonzeros(i) = (impl_ATS::magnitude(vals(i, 0)) > eps);
1243  });
1244 }
1245 
1246 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1249  const Teuchos::ArrayRCP<bool>& dirichletRows,
1250  Teuchos::ArrayRCP<bool> dirichletCols,
1251  Teuchos::ArrayRCP<bool> dirichletDomain) {
1256  TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == rowMap->getLocalNumElements());
1257  TEUCHOS_ASSERT(static_cast<size_t>(dirichletCols.size()) == colMap->getLocalNumElements());
1258  TEUCHOS_ASSERT(static_cast<size_t>(dirichletDomain.size()) == domMap->getLocalNumElements());
1260  // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0
1261  for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1262  if (dirichletRows[i]) {
1264  ArrayView<const Scalar> values;
1265  A.getLocalRowView(i, indices, values);
1266  for (size_t j = 0; j < static_cast<size_t>(indices.size()); j++)
1267  myColsToZero->replaceLocalValue(indices[j], 0, one);
1268  }
1269  }
1270 
1273  if (!importer.is_null()) {
1274  globalColsToZero = Xpetra::MultiVectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(domMap, 1, /*zeroOut=*/true);
1275  // export to domain map
1276  globalColsToZero->doExport(*myColsToZero, *importer, Xpetra::ADD);
1277  // import to column map
1278  myColsToZero->doImport(*globalColsToZero, *importer, Xpetra::INSERT);
1279  } else
1280  globalColsToZero = myColsToZero;
1281 
1282  FindNonZeros(globalColsToZero->getData(0), dirichletDomain);
1283  FindNonZeros(myColsToZero->getData(0), dirichletCols);
1284 }
1285 
1286 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1289  const Kokkos::View<bool*, typename Node::device_type>& dirichletRows,
1290  Kokkos::View<bool*, typename Node::device_type> dirichletCols,
1291  Kokkos::View<bool*, typename Node::device_type> dirichletDomain) {
1292  using ATS = Kokkos::ArithTraits<Scalar>;
1293  using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
1294  using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1298  TEUCHOS_ASSERT(dirichletRows.extent(0) == rowMap->getLocalNumElements());
1299  TEUCHOS_ASSERT(dirichletCols.extent(0) == colMap->getLocalNumElements());
1300  TEUCHOS_ASSERT(dirichletDomain.extent(0) == domMap->getLocalNumElements());
1302  // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0
1303  auto myColsToZeroView = myColsToZero->getLocalViewDevice(Xpetra::Access::ReadWrite);
1304  auto localMatrix = A.getLocalMatrixDevice();
1305  Kokkos::parallel_for(
1306  "MueLu:Maxwell1::DetectDirichletCols", range_type(0, rowMap->getLocalNumElements()),
1307  KOKKOS_LAMBDA(const LocalOrdinal row) {
1308  if (dirichletRows(row)) {
1309  auto rowView = localMatrix.row(row);
1310  auto length = rowView.length;
1311 
1312  for (decltype(length) colID = 0; colID < length; colID++)
1313  myColsToZeroView(rowView.colidx(colID), 0) = impl_ATS::one();
1314  }
1315  });
1316 
1319  if (!importer.is_null()) {
1320  globalColsToZero = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(domMap, /*zeroOut=*/true);
1321  // export to domain map
1322  globalColsToZero->doExport(*myColsToZero, *importer, Xpetra::ADD);
1323  // import to column map
1324  myColsToZero->doImport(*globalColsToZero, *importer, Xpetra::INSERT);
1325  } else
1326  globalColsToZero = myColsToZero;
1327  UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::FindNonZeros(globalColsToZero->getLocalViewDevice(Xpetra::Access::ReadOnly), dirichletDomain);
1328  UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::FindNonZeros(myColsToZero->getLocalViewDevice(Xpetra::Access::ReadOnly), dirichletCols);
1329 }
1330 
1331 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1334  typedef Teuchos::ScalarTraits<Scalar> STS;
1336  typedef Teuchos::ScalarTraits<MT> MTS;
1338  for (LocalOrdinal row = 0; row < Teuchos::as<LocalOrdinal>(rowmap->getLocalNumElements()); ++row) {
1339  size_t nnz = A.getNumEntriesInLocalRow(row);
1342  A.getLocalRowView(row, indices, vals);
1343 
1344  Scalar rowsum = STS::zero();
1345  Scalar diagval = STS::zero();
1346 
1347  for (LocalOrdinal colID = 0; colID < Teuchos::as<LocalOrdinal>(nnz); colID++) {
1348  LocalOrdinal col = indices[colID];
1349  if (row == col)
1350  diagval = vals[colID];
1351  rowsum += vals[colID];
1352  }
1353  // printf("A(%d,:) row_sum(point) = %6.4e\n",row,rowsum);
1354  if (rowSumTol < MTS::one() && STS::magnitude(rowsum) > STS::magnitude(diagval) * rowSumTol) {
1355  // printf("Row %d triggers rowsum\n",(int)row);
1356  dirichletRows[row] = true;
1357  }
1358  }
1359 }
1360 
1361 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1364  typedef Teuchos::ScalarTraits<Scalar> STS;
1366  typedef Teuchos::ScalarTraits<MT> MTS;
1367  RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> rowmap = A.getRowMap();
1368 
1369  TEUCHOS_TEST_FOR_EXCEPTION(!A.getColMap()->isSameAs(*BlockNumber.getMap()), std::runtime_error, "ApplyRowSumCriterion: BlockNumber must match's A's column map.");
1370 
1371  Teuchos::ArrayRCP<const LocalOrdinal> block_id = BlockNumber.getData(0);
1372  for (LocalOrdinal row = 0; row < Teuchos::as<LocalOrdinal>(rowmap->getLocalNumElements()); ++row) {
1373  size_t nnz = A.getNumEntriesInLocalRow(row);
1374  ArrayView<const LocalOrdinal> indices;
1375  ArrayView<const Scalar> vals;
1376  A.getLocalRowView(row, indices, vals);
1377 
1378  Scalar rowsum = STS::zero();
1379  Scalar diagval = STS::zero();
1380  for (LocalOrdinal colID = 0; colID < Teuchos::as<LocalOrdinal>(nnz); colID++) {
1381  LocalOrdinal col = indices[colID];
1382  if (row == col)
1383  diagval = vals[colID];
1384  if (block_id[row] == block_id[col])
1385  rowsum += vals[colID];
1386  }
1387 
1388  // printf("A(%d,:) row_sum(block) = %6.4e\n",row,rowsum);
1389  if (rowSumTol < MTS::one() && STS::magnitude(rowsum) > STS::magnitude(diagval) * rowSumTol) {
1390  // printf("Row %d triggers rowsum\n",(int)row);
1391  dirichletRows[row] = true;
1392  }
1393  }
1394 }
1395 
1396 // Applies rowsum criterion
1397 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class memory_space>
1399  const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1400  Kokkos::View<bool*, memory_space>& dirichletRows) {
1401  typedef Teuchos::ScalarTraits<Scalar> STS;
1403 
1404  auto dirichletRowsHost = Kokkos::create_mirror_view(dirichletRows);
1405  Kokkos::deep_copy(dirichletRowsHost, dirichletRows);
1406 
1407  for (LocalOrdinal row = 0; row < Teuchos::as<LocalOrdinal>(rowmap->getLocalNumElements()); ++row) {
1408  size_t nnz = A.getNumEntriesInLocalRow(row);
1411  A.getLocalRowView(row, indices, vals);
1412 
1413  Scalar rowsum = STS::zero();
1414  Scalar diagval = STS::zero();
1415  for (LocalOrdinal colID = 0; colID < Teuchos::as<LocalOrdinal>(nnz); colID++) {
1416  LocalOrdinal col = indices[colID];
1417  if (row == col)
1418  diagval = vals[colID];
1419  rowsum += vals[colID];
1420  }
1421  if (STS::real(rowsum) > STS::magnitude(diagval) * rowSumTol)
1422  dirichletRowsHost(row) = true;
1423  }
1424 
1425  Kokkos::deep_copy(dirichletRows, dirichletRowsHost);
1426 }
1427 
1428 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1431  const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1432  Kokkos::View<bool*, typename Node::device_type::memory_space>& dirichletRows) {
1433  MueLu::ApplyRowSumCriterion<Scalar, LocalOrdinal, GlobalOrdinal, Node, typename Node::device_type::memory_space>(A, rowSumTol, dirichletRows);
1434 }
1435 
1436 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1439  const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1440  Kokkos::View<bool*, Kokkos::HostSpace>& dirichletRows) {
1441  MueLu::ApplyRowSumCriterion<Scalar, LocalOrdinal, GlobalOrdinal, Node, Kokkos::HostSpace>(A, rowSumTol, dirichletRows);
1442 }
1443 
1444 // Applies rowsum criterion
1445 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class memory_space>
1448  const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1449  Kokkos::View<bool*, memory_space>& dirichletRows) {
1450  typedef Teuchos::ScalarTraits<Scalar> STS;
1452 
1453  TEUCHOS_TEST_FOR_EXCEPTION(!A.getColMap()->isSameAs(*BlockNumber.getMap()), std::runtime_error, "ApplyRowSumCriterion: BlockNumber must match's A's column map.");
1454 
1455  auto dirichletRowsHost = Kokkos::create_mirror_view(dirichletRows);
1456  Kokkos::deep_copy(dirichletRowsHost, dirichletRows);
1457 
1458  Teuchos::ArrayRCP<const LocalOrdinal> block_id = BlockNumber.getData(0);
1459  for (LocalOrdinal row = 0; row < Teuchos::as<LocalOrdinal>(rowmap->getLocalNumElements()); ++row) {
1460  size_t nnz = A.getNumEntriesInLocalRow(row);
1463  A.getLocalRowView(row, indices, vals);
1464 
1465  Scalar rowsum = STS::zero();
1466  Scalar diagval = STS::zero();
1467  for (LocalOrdinal colID = 0; colID < Teuchos::as<LocalOrdinal>(nnz); colID++) {
1468  LocalOrdinal col = indices[colID];
1469  if (row == col)
1470  diagval = vals[colID];
1471  if (block_id[row] == block_id[col])
1472  rowsum += vals[colID];
1473  }
1474  if (STS::real(rowsum) > STS::magnitude(diagval) * rowSumTol)
1475  dirichletRowsHost(row) = true;
1476  }
1477 
1478  Kokkos::deep_copy(dirichletRows, dirichletRowsHost);
1479 }
1480 
1481 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1485  const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1486  Kokkos::View<bool*, typename Node::device_type::memory_space>& dirichletRows) {
1487  MueLu::ApplyRowSumCriterion<Scalar, LocalOrdinal, GlobalOrdinal, Node, typename Node::device_type::memory_space>(A, BlockNumber, rowSumTol, dirichletRows);
1488 }
1489 
1490 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1494  const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1495  Kokkos::View<bool*, Kokkos::HostSpace>& dirichletRows) {
1496  MueLu::ApplyRowSumCriterion<Scalar, LocalOrdinal, GlobalOrdinal, Node, Kokkos::HostSpace>(A, BlockNumber, rowSumTol, dirichletRows);
1497 }
1498 
1499 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1503  const Teuchos::ArrayRCP<const bool>& dirichletRows) {
1509  myColsToZero->putScalar(zero);
1510  // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0
1511  for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1512  if (dirichletRows[i]) {
1515  A.getLocalRowView(i, indices, values);
1516  for (size_t j = 0; j < static_cast<size_t>(indices.size()); j++)
1517  myColsToZero->replaceLocalValue(indices[j], 0, one);
1518  }
1519  }
1520 
1522  globalColsToZero->putScalar(zero);
1524  // export to domain map
1525  globalColsToZero->doExport(*myColsToZero, *exporter, Xpetra::ADD);
1526  // import to column map
1527  myColsToZero->doImport(*globalColsToZero, *exporter, Xpetra::INSERT);
1528  Teuchos::ArrayRCP<const Scalar> myCols = myColsToZero->getData(0);
1529  Teuchos::ArrayRCP<bool> dirichletCols(colMap->getLocalNumElements(), true);
1531  for (size_t i = 0; i < colMap->getLocalNumElements(); i++) {
1532  dirichletCols[i] = Teuchos::ScalarTraits<Scalar>::magnitude(myCols[i]) > 2.0 * eps;
1533  }
1534  return dirichletCols;
1535 }
1536 
1537 template <class SC, class LO, class GO, class NO>
1538 Kokkos::View<bool*, typename NO::device_type>
1541  const Kokkos::View<const bool*, typename NO::device_type>& dirichletRows) {
1542  using ATS = Kokkos::ArithTraits<SC>;
1543  using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
1544  using range_type = Kokkos::RangePolicy<LO, typename NO::execution_space>;
1545 
1546  SC zero = ATS::zero();
1547 
1548  auto localMatrix = A.getLocalMatrixDevice();
1549  LO numRows = A.getLocalNumRows();
1550 
1552  Teuchos::RCP<const Xpetra::Map<LO, GO, NO>> colMap = A.getColMap();
1554  myColsToZero->putScalar(zero);
1555  auto myColsToZeroView = myColsToZero->getLocalViewDevice(Xpetra::Access::ReadWrite);
1556  // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0
1557  Kokkos::parallel_for(
1558  "MueLu:Utils::DetectDirichletCols1", range_type(0, numRows),
1559  KOKKOS_LAMBDA(const LO row) {
1560  if (dirichletRows(row)) {
1561  auto rowView = localMatrix.row(row);
1562  auto length = rowView.length;
1563 
1564  for (decltype(length) colID = 0; colID < length; colID++)
1565  myColsToZeroView(rowView.colidx(colID), 0) = impl_ATS::one();
1566  }
1567  });
1568 
1570  globalColsToZero->putScalar(zero);
1572  // export to domain map
1573  globalColsToZero->doExport(*myColsToZero, *exporter, Xpetra::ADD);
1574  // import to column map
1575  myColsToZero->doImport(*globalColsToZero, *exporter, Xpetra::INSERT);
1576 
1577  auto myCols = myColsToZero->getLocalViewDevice(Xpetra::Access::ReadOnly);
1578  size_t numColEntries = colMap->getLocalNumElements();
1579  Kokkos::View<bool*, typename NO::device_type> dirichletCols(Kokkos::ViewAllocateWithoutInitializing("dirichletCols"), numColEntries);
1580  const typename ATS::magnitudeType eps = 2.0 * ATS::eps();
1581 
1582  Kokkos::parallel_for(
1583  "MueLu:Utils::DetectDirichletCols2", range_type(0, numColEntries),
1584  KOKKOS_LAMBDA(const size_t i) {
1585  dirichletCols(i) = impl_ATS::magnitude(myCols(i, 0)) > eps;
1586  });
1587  return dirichletCols;
1588 }
1589 
1590 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1591 Scalar
1594  // We check only row maps. Column may be different. One would hope that they are the same, as we typically
1595  // calculate frobenius norm of the specified sparsity pattern with an updated matrix from the previous step,
1596  // but matrix addition, even when one is submatrix of the other, changes column map (though change may be as
1597  // simple as couple of elements swapped)
1598  TEUCHOS_TEST_FOR_EXCEPTION(!A.getRowMap()->isSameAs(*B.getRowMap()), Exceptions::Incompatible, "MueLu::CGSolver::Frobenius: row maps are incompatible");
1599  TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete() || !B.isFillComplete(), Exceptions::RuntimeError, "Matrices must be fill completed");
1600 
1601  const Map& AColMap = *A.getColMap();
1602  const Map& BColMap = *B.getColMap();
1603 
1606  size_t nnzA = 0, nnzB = 0;
1607 
1608  // We use a simple algorithm
1609  // for each row we fill valBAll array with the values in the corresponding row of B
1610  // as such, it serves as both sorted array and as storage, so we don't need to do a
1611  // tricky problem: "find a value in the row of B corresponding to the specific GID"
1612  // Once we do that, we translate LID of entries of row of A to LID of B, and multiply
1613  // corresponding entries.
1614  // The algorithm should be reasonably cheap, as it does not sort anything, provided
1615  // that getLocalElement and getGlobalElement functions are reasonably effective. It
1616  // *is* possible that the costs are hidden in those functions, but if maps are close
1617  // to linear maps, we should be fine
1618  Teuchos::Array<Scalar> valBAll(BColMap.getLocalNumElements());
1619 
1621  Scalar zero = Teuchos::ScalarTraits<Scalar>::zero(), f = zero, gf;
1622  size_t numRows = A.getLocalNumRows();
1623  for (size_t i = 0; i < numRows; i++) {
1624  A.getLocalRowView(i, indA, valA);
1625  B.getLocalRowView(i, indB, valB);
1626  nnzA = indA.size();
1627  nnzB = indB.size();
1628 
1629  // Set up array values
1630  for (size_t j = 0; j < nnzB; j++)
1631  valBAll[indB[j]] = valB[j];
1632 
1633  for (size_t j = 0; j < nnzA; j++) {
1634  // The cost of the whole Frobenius dot product function depends on the
1635  // cost of the getLocalElement and getGlobalElement functions here.
1636  LocalOrdinal ind = BColMap.getLocalElement(AColMap.getGlobalElement(indA[j]));
1637  if (ind != invalid)
1638  f += valBAll[ind] * valA[j];
1639  }
1640 
1641  // Clean up array values
1642  for (size_t j = 0; j < nnzB; j++)
1643  valBAll[indB[j]] = zero;
1644  }
1645 
1646  MueLu_sumAll(AColMap.getComm(), f, gf);
1647 
1648  return gf;
1649 }
1650 
1651 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1654  // Distribute the seeds evenly in [1,maxint-1]. This guarantees nothing
1655  // about where in random number stream we are, but avoids overflow situations
1656  // in parallel when multiplying by a PID. It would be better to use
1657  // a good parallel random number generator.
1658  double one = 1.0;
1659  int maxint = INT_MAX; //= 2^31-1 = 2147483647 for 32-bit integers
1660  int mySeed = Teuchos::as<int>((maxint - 1) * (one - (comm.getRank() + 1) / (comm.getSize() + one)));
1661  if (mySeed < 1 || mySeed == maxint) {
1662  std::ostringstream errStr;
1663  errStr << "Error detected with random seed = " << mySeed << ". It should be in the interval [1,2^31-2].";
1664  throw Exceptions::RuntimeError(errStr.str());
1665  }
1666  std::srand(mySeed);
1667  // For Tpetra, we could use Kokkos' random number generator here.
1669  // Epetra
1670  // MultiVector::Random() -> Epetra_Util::RandomDouble() -> Epetra_Utils::RandomInt()
1671  // Its own random number generator, based on Seed_. Seed_ is initialized in Epetra_Util constructor with std::rand()
1672  // So our setting std::srand() affects that too
1673 }
1674 
1675 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1678  std::vector<LocalOrdinal>& dirichletRows, bool count_twos_as_dirichlet) {
1680  dirichletRows.resize(0);
1681  for (size_t i = 0; i < A->getLocalNumRows(); i++) {
1684  A->getLocalRowView(i, indices, values);
1685  int nnz = 0;
1686  for (size_t j = 0; j < (size_t)indices.size(); j++) {
1688  nnz++;
1689  }
1690  }
1691  if (nnz == 1 || (count_twos_as_dirichlet && nnz == 2)) {
1692  dirichletRows.push_back(i);
1693  }
1694  }
1695 }
1696 
1697 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1700  const std::vector<LocalOrdinal>& dirichletRows) {
1701  RCP<const Map> Rmap = A->getRowMap();
1702  RCP<const Map> Cmap = A->getColMap();
1705 
1706  for (size_t i = 0; i < dirichletRows.size(); i++) {
1707  GlobalOrdinal row_gid = Rmap->getGlobalElement(dirichletRows[i]);
1708 
1711  A->getLocalRowView(dirichletRows[i], indices, values);
1712  // NOTE: This won't work with fancy node types.
1713  Scalar* valuesNC = const_cast<Scalar*>(values.getRawPtr());
1714  for (size_t j = 0; j < (size_t)indices.size(); j++) {
1715  if (Cmap->getGlobalElement(indices[j]) == row_gid)
1716  valuesNC[j] = one;
1717  else
1718  valuesNC[j] = zero;
1719  }
1720  }
1721 }
1722 
1723 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1726  const Teuchos::ArrayRCP<const bool>& dirichletRows) {
1728  RCP<const Map> domMap = A->getDomainMap();
1729  RCP<const Map> ranMap = A->getRangeMap();
1730  RCP<const Map> Rmap = A->getRowMap();
1731  RCP<const Map> Cmap = A->getColMap();
1732  TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == Rmap->getLocalNumElements());
1735  A->resumeFill();
1736  for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1737  if (dirichletRows[i]) {
1738  GlobalOrdinal row_gid = Rmap->getGlobalElement(i);
1739 
1742  A->getLocalRowView(i, indices, values);
1743 
1744  Teuchos::ArrayRCP<Scalar> valuesNC(values.size());
1745  for (size_t j = 0; j < (size_t)indices.size(); j++) {
1746  if (Cmap->getGlobalElement(indices[j]) == row_gid)
1747  valuesNC[j] = one;
1748  else
1749  valuesNC[j] = zero;
1750  }
1751  A->replaceLocalValues(i, indices, valuesNC());
1752  }
1753  }
1754  A->fillComplete(domMap, ranMap);
1755 }
1756 
1757 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1760  const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows) {
1762  using ATS = Kokkos::ArithTraits<Scalar>;
1763  using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
1764  using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1765 
1770 
1771  TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == Rmap->getLocalNumElements());
1772 
1773  auto localMatrix = A->getLocalMatrixDevice();
1774  auto localRmap = Rmap->getLocalMap();
1775  auto localCmap = Cmap->getLocalMap();
1776 
1777  Kokkos::parallel_for(
1778  "MueLu::Utils::ApplyOAZ", range_type(0, dirichletRows.extent(0)),
1779  KOKKOS_LAMBDA(const LocalOrdinal row) {
1780  if (dirichletRows(row)) {
1781  auto rowView = localMatrix.row(row);
1782  auto length = rowView.length;
1783  auto row_gid = localRmap.getGlobalElement(row);
1784  auto row_lid = localCmap.getLocalElement(row_gid);
1785 
1786  for (decltype(length) colID = 0; colID < length; colID++)
1787  if (rowView.colidx(colID) == row_lid)
1788  rowView.value(colID) = impl_ATS::one();
1789  else
1790  rowView.value(colID) = impl_ATS::zero();
1791  }
1792  });
1793 }
1794 
1795 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1798  const std::vector<LocalOrdinal>& dirichletRows,
1799  Scalar replaceWith) {
1800  for (size_t i = 0; i < dirichletRows.size(); i++) {
1803  A->getLocalRowView(dirichletRows[i], indices, values);
1804  // NOTE: This won't work with fancy node types.
1805  Scalar* valuesNC = const_cast<Scalar*>(values.getRawPtr());
1806  for (size_t j = 0; j < (size_t)indices.size(); j++)
1807  valuesNC[j] = replaceWith;
1808  }
1809 }
1810 
1811 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1814  const Teuchos::ArrayRCP<const bool>& dirichletRows,
1815  Scalar replaceWith) {
1816  TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == A->getRowMap()->getLocalNumElements());
1817  for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1818  if (dirichletRows[i]) {
1821  A->getLocalRowView(i, indices, values);
1822  // NOTE: This won't work with fancy node types.
1823  Scalar* valuesNC = const_cast<Scalar*>(values.getRawPtr());
1824  for (size_t j = 0; j < (size_t)indices.size(); j++)
1825  valuesNC[j] = replaceWith;
1826  }
1827  }
1828 }
1829 
1830 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1833  const Teuchos::ArrayRCP<const bool>& dirichletRows,
1834  Scalar replaceWith) {
1835  TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == X->getMap()->getLocalNumElements());
1836  for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1837  if (dirichletRows[i]) {
1838  for (size_t j = 0; j < X->getNumVectors(); j++)
1839  X->replaceLocalValue(i, j, replaceWith);
1840  }
1841  }
1842 }
1843 
1844 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1847  const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows,
1848  Scalar replaceWith) {
1849  using ATS = Kokkos::ArithTraits<Scalar>;
1850  using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1851 
1852  typename ATS::val_type impl_replaceWith = replaceWith;
1853 
1854  auto localMatrix = A->getLocalMatrixDevice();
1855  LocalOrdinal numRows = A->getLocalNumRows();
1856 
1857  Kokkos::parallel_for(
1858  "MueLu:Utils::ZeroDirichletRows", range_type(0, numRows),
1859  KOKKOS_LAMBDA(const LocalOrdinal row) {
1860  if (dirichletRows(row)) {
1861  auto rowView = localMatrix.row(row);
1862  auto length = rowView.length;
1863  for (decltype(length) colID = 0; colID < length; colID++)
1864  rowView.value(colID) = impl_replaceWith;
1865  }
1866  });
1867 }
1868 
1869 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1872  const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows,
1873  Scalar replaceWith) {
1874  using ATS = Kokkos::ArithTraits<Scalar>;
1875  using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1876 
1877  typename ATS::val_type impl_replaceWith = replaceWith;
1878 
1879  auto myCols = X->getLocalViewDevice(Xpetra::Access::ReadWrite);
1880  size_t numVecs = X->getNumVectors();
1881  Kokkos::parallel_for(
1882  "MueLu:Utils::ZeroDirichletRows_MV", range_type(0, dirichletRows.size()),
1883  KOKKOS_LAMBDA(const size_t i) {
1884  if (dirichletRows(i)) {
1885  for (size_t j = 0; j < numVecs; j++)
1886  myCols(i, j) = impl_replaceWith;
1887  }
1888  });
1889 }
1890 
1891 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1894  const Teuchos::ArrayRCP<const bool>& dirichletCols,
1895  Scalar replaceWith) {
1896  TEUCHOS_ASSERT(static_cast<size_t>(dirichletCols.size()) == A->getColMap()->getLocalNumElements());
1897  for (size_t i = 0; i < A->getLocalNumRows(); i++) {
1900  A->getLocalRowView(i, indices, values);
1901  // NOTE: This won't work with fancy node types.
1902  Scalar* valuesNC = const_cast<Scalar*>(values.getRawPtr());
1903  for (size_t j = 0; j < static_cast<size_t>(indices.size()); j++)
1904  if (dirichletCols[indices[j]])
1905  valuesNC[j] = replaceWith;
1906  }
1907 }
1908 
1909 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1912  const Kokkos::View<const bool*, typename Node::device_type>& dirichletCols,
1913  Scalar replaceWith) {
1914  using ATS = Kokkos::ArithTraits<Scalar>;
1915  using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1916 
1917  typename ATS::val_type impl_replaceWith = replaceWith;
1918 
1919  auto localMatrix = A->getLocalMatrixDevice();
1920  LocalOrdinal numRows = A->getLocalNumRows();
1921 
1922  Kokkos::parallel_for(
1923  "MueLu:Utils::ZeroDirichletCols", range_type(0, numRows),
1924  KOKKOS_LAMBDA(const LocalOrdinal row) {
1925  auto rowView = localMatrix.row(row);
1926  auto length = rowView.length;
1927  for (decltype(length) colID = 0; colID < length; colID++)
1928  if (dirichletCols(rowView.colidx(colID))) {
1929  rowView.value(colID) = impl_replaceWith;
1930  }
1931  });
1932 }
1933 
1934 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1939  // Make sure A's RowMap == DomainMap
1940  if (!A->getRowMap()->isSameAs(*A->getDomainMap())) {
1941  throw std::runtime_error("UtilitiesBase::FindDirichletRowsAndPropagateToCols row and domain maps must match.");
1942  }
1944  bool has_import = !importer.is_null();
1945 
1946  // Find the Dirichlet rows
1947  std::vector<LocalOrdinal> dirichletRows;
1948  FindDirichletRows(A, dirichletRows);
1949 
1950 #if 0
1951  printf("[%d] DirichletRow Ids = ",A->getRowMap()->getComm()->getRank());
1952  for(size_t i=0; i<(size_t) dirichletRows.size(); i++)
1953  printf("%d ",dirichletRows[i]);
1954  printf("\n");
1955  fflush(stdout);
1956 #endif
1957  // Allocate all as non-Dirichlet
1959  isDirichletCol = Xpetra::VectorFactory<int, LocalOrdinal, GlobalOrdinal, Node>::Build(A->getColMap(), true);
1960 
1961  {
1962  Teuchos::ArrayRCP<int> dr_rcp = isDirichletRow->getDataNonConst(0);
1963  Teuchos::ArrayView<int> dr = dr_rcp();
1964  Teuchos::ArrayRCP<int> dc_rcp = isDirichletCol->getDataNonConst(0);
1965  Teuchos::ArrayView<int> dc = dc_rcp();
1966  for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1967  dr[dirichletRows[i]] = 1;
1968  if (!has_import) dc[dirichletRows[i]] = 1;
1969  }
1970  }
1971 
1972  if (has_import)
1973  isDirichletCol->doImport(*isDirichletRow, *importer, Xpetra::CombineMode::ADD);
1974 }
1975 
1976 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1980  using ISC = typename Kokkos::ArithTraits<Scalar>::val_type;
1981  using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1982  using local_matrix_type = typename CrsMatrix::local_matrix_type;
1983  using values_type = typename local_matrix_type::values_type;
1984 
1985  const ISC ONE = Kokkos::ArithTraits<ISC>::one();
1986  const ISC ZERO = Kokkos::ArithTraits<ISC>::zero();
1987 
1988  // Copy the values array of the old matrix to a new array, replacing all the non-zeros with one
1989  auto localMatrix = original->getLocalMatrixDevice();
1990  TEUCHOS_TEST_FOR_EXCEPTION(!original->hasCrsGraph(), Exceptions::RuntimeError, "ReplaceNonZerosWithOnes: Cannot get CrsGraph");
1991  values_type new_values("values", localMatrix.nnz());
1992 
1993  Kokkos::parallel_for(
1994  "ReplaceNonZerosWithOnes", range_type(0, localMatrix.nnz()), KOKKOS_LAMBDA(const size_t i) {
1995  if (localMatrix.values(i) != ZERO)
1996  new_values(i) = ONE;
1997  else
1998  new_values(i) = ZERO;
1999  });
2000 
2001  // Build the new matrix
2002  RCP<Matrix> NewMatrix = Xpetra::MatrixFactory<SC, LO, GO, NO>::Build(original->getCrsGraph(), new_values);
2003  TEUCHOS_TEST_FOR_EXCEPTION(NewMatrix.is_null(), Exceptions::RuntimeError, "ReplaceNonZerosWithOnes: MatrixFactory::Build() did not return matrix");
2004  NewMatrix->fillComplete(original->getDomainMap(), original->getRangeMap());
2005  return NewMatrix;
2006 }
2007 
2008 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2014  Xpetra::UnderlyingLib lib = sourceBlockedMap.lib();
2015 
2016  // De-stride the map if we have to (might regret this later)
2017  RCP<const Map> fullMap = sourceBlockedMap.getMap();
2018  RCP<const Map> stridedMap = Teuchos::rcp_dynamic_cast<const Xpetra::StridedMap<LocalOrdinal, GlobalOrdinal, Node>>(fullMap);
2019  if (!stridedMap.is_null()) fullMap = stridedMap->getMap();
2020 
2021  // Initial sanity checking for map compatibil
2022  const size_t numSubMaps = sourceBlockedMap.getNumMaps();
2023  if (!Importer.getSourceMap()->isCompatible(*fullMap))
2024  throw std::runtime_error("GenerateBlockedTargetMap(): Map compatibility error");
2025 
2026  // Build an indicator vector
2028 
2029  for (size_t i = 0; i < numSubMaps; i++) {
2030  RCP<const Map> map = sourceBlockedMap.getMap(i);
2031 
2032  for (size_t j = 0; j < map->getLocalNumElements(); j++) {
2033  LocalOrdinal jj = fullMap->getLocalElement(map->getGlobalElement(j));
2034  block_ids->replaceLocalValue(jj, (int)i);
2035  }
2036  }
2037 
2038  // Get the block ids for the new map
2039  RCP<const Map> targetMap = Importer.getTargetMap();
2041  new_block_ids->doImport(*block_ids, Importer, Xpetra::CombineMode::ADD);
2042  Teuchos::ArrayRCP<const int> dataRCP = new_block_ids->getData(0);
2043  Teuchos::ArrayView<const int> data = dataRCP();
2044 
2045  // Get the GIDs for each subblock
2046  Teuchos::Array<Teuchos::Array<GlobalOrdinal>> elementsInSubMap(numSubMaps);
2047  for (size_t i = 0; i < targetMap->getLocalNumElements(); i++) {
2048  elementsInSubMap[data[i]].push_back(targetMap->getGlobalElement(i));
2049  }
2050 
2051  // Generate the new submaps
2052  std::vector<RCP<const Map>> subMaps(numSubMaps);
2053  for (size_t i = 0; i < numSubMaps; i++) {
2054  subMaps[i] = Xpetra::MapFactory<LocalOrdinal, GlobalOrdinal, Node>::Build(lib, Teuchos::OrdinalTraits<GlobalOrdinal>::invalid(), elementsInSubMap[i](), targetMap->getIndexBase(), targetMap->getComm());
2055  }
2056 
2057  // Build the BlockedMap
2058  return rcp(new BlockedMap(targetMap, subMaps));
2059 }
2060 
2061 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2064  if ((rowMap.lib() == Xpetra::UseTpetra) && (colMap.lib() == Xpetra::UseTpetra)) {
2065  auto tpRowMap = toTpetra(rowMap);
2066  auto tpColMap = toTpetra(colMap);
2067  return tpColMap.isLocallyFitted(tpRowMap);
2068  }
2069 
2070  ArrayView<const GlobalOrdinal> rowElements = rowMap.getLocalElementList();
2071  ArrayView<const GlobalOrdinal> colElements = colMap.getLocalElementList();
2072 
2073  const size_t numElements = rowElements.size();
2074 
2075  if (size_t(colElements.size()) < numElements)
2076  return false;
2077 
2078  bool goodMap = true;
2079  for (size_t i = 0; i < numElements; i++)
2080  if (rowElements[i] != colElements[i]) {
2081  goodMap = false;
2082  break;
2083  }
2084 
2085  return goodMap;
2086 }
2087 
2088 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2093  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
2094  using lno_nnz_view_t = typename local_graph_type::entries_type::non_const_type;
2095  using device = typename local_graph_type::device_type;
2096  using execution_space = typename local_matrix_type::execution_space;
2097  using ordinal_type = typename local_matrix_type::ordinal_type;
2098 
2099  local_graph_type localGraph = Op.getLocalMatrixDevice().graph;
2100 
2101  lno_nnz_view_t rcmOrder = KokkosGraph::Experimental::graph_rcm<device, typename local_graph_type::row_map_type, typename local_graph_type::entries_type, lno_nnz_view_t>(localGraph.row_map, localGraph.entries);
2102 
2105 
2106  // Copy out and reorder data
2107  auto view1D = Kokkos::subview(retval->getLocalViewDevice(Xpetra::Access::ReadWrite), Kokkos::ALL(), 0);
2108  Kokkos::parallel_for(
2109  "Utilities::ReverseCuthillMcKee",
2110  Kokkos::RangePolicy<ordinal_type, execution_space>(0, localGraph.numRows()),
2111  KOKKOS_LAMBDA(const ordinal_type rowIdx) {
2112  view1D(rcmOrder(rowIdx)) = rowIdx;
2113  });
2114  return retval;
2115 }
2116 
2117 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2122  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
2123  using lno_nnz_view_t = typename local_graph_type::entries_type::non_const_type;
2124  using device = typename local_graph_type::device_type;
2125  using execution_space = typename local_matrix_type::execution_space;
2126  using ordinal_type = typename local_matrix_type::ordinal_type;
2127 
2128  local_graph_type localGraph = Op.getLocalMatrixDevice().graph;
2129  LocalOrdinal numRows = localGraph.numRows();
2130 
2131  lno_nnz_view_t rcmOrder = KokkosGraph::Experimental::graph_rcm<device, typename local_graph_type::row_map_type, typename local_graph_type::entries_type, lno_nnz_view_t>(localGraph.row_map, localGraph.entries);
2132 
2135 
2136  // Copy out data
2137  auto view1D = Kokkos::subview(retval->getLocalViewDevice(Xpetra::Access::ReadWrite), Kokkos::ALL(), 0);
2138  // Since KokkosKernels produced RCM, also reverse the order of the view to get CM
2139  Kokkos::parallel_for(
2140  "Utilities::ReverseCuthillMcKee",
2141  Kokkos::RangePolicy<ordinal_type, execution_space>(0, numRows),
2142  KOKKOS_LAMBDA(const ordinal_type rowIdx) {
2143  view1D(rcmOrder(numRows - 1 - rowIdx)) = rowIdx;
2144  });
2145  return retval;
2146 }
2147 
2148 } // namespace MueLu
2149 
2150 #define MUELU_UTILITIESBASE_SHORT
2151 #endif // MUELU_UTILITIESBASE_DEF_HPP
2152 
2153 // LocalWords: LocalOrdinal
static void FindNonZeros(const Teuchos::ArrayRCP< const Scalar > vals, Teuchos::ArrayRCP< bool > nonzeros)
Find non-zero values in an ArrayRCP Compares the value to 2 * machine epsilon.
KOKKOS_FORCEINLINE_FUNCTION bool isDirichletRow(typename CrsMatrix::ordinal_type rowId, KokkosSparse::SparseRowViewConst< CrsMatrix > &row, const typename Kokkos::ArithTraits< typename CrsMatrix::value_type >::magnitudeType &tol, const bool count_twos_as_dirichlet)
static RCP< Export< LocalOrdinal, GlobalOrdinal, Node > > Build(const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &source, const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &target)
static RCP< Vector > GetMatrixDiagonal(const Matrix &A)
Extract Matrix Diagonal.
static void ApplyOAZToMatrixRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node >> &A, const std::vector< LocalOrdinal > &dirichletRows)
static Teuchos::RCP< Vector > GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Return vector containing: max_{i=k}(-a_ik), for each for i in the matrix.
static void ZeroDirichletCols(Teuchos::RCP< Matrix > &A, const Teuchos::ArrayRCP< const bool > &dirichletCols, Scalar replaceWith=Teuchos::ScalarTraits< Scalar >::zero())
static void DetectDirichletColsAndDomains(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Teuchos::ArrayRCP< bool > &dirichletRows, Teuchos::ArrayRCP< bool > dirichletCols, Teuchos::ArrayRCP< bool > dirichletDomain)
Detects Dirichlet columns &amp; domains from a list of Dirichlet rows.
#define MueLu_sumAll(rcpComm, in, out)
Array< T > & append(const T &x)
virtual int getSize() const =0
MueLu::DefaultLocalOrdinal LocalOrdinal
void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType rowSumTol, Kokkos::View< bool *, memory_space > &dirichletRows)
virtual LO getBlockSize() const override
static bool MapsAreNested(const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &rowMap, const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &colMap)
static RCP< Xpetra::Vector< LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node > > ReverseCuthillMcKee(const Matrix &Op)
static void FindDirichletRowsAndPropagateToCols(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node >> &A, Teuchos::RCP< Xpetra::Vector< int, LocalOrdinal, GlobalOrdinal, Node >> &isDirichletRow, Teuchos::RCP< Xpetra::Vector< int, LocalOrdinal, GlobalOrdinal, Node >> &isDirichletCol)
virtual int getRank() const =0
static void ApplyRowSumCriterionHost(const Matrix &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType rowSumTol, Kokkos::View< bool *, Kokkos::HostSpace > &dirichletRows)
static Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Build(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node >> &map, size_t NumVectors, bool zeroOut=true)
static magnitudeType eps()
Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > removeSmallEntries(Teuchos::RCP< Xpetra::CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node >> &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType threshold, const bool keepDiagonal)
static Scalar PowerMethod(const Matrix &A, bool scaleByDiag=true, LocalOrdinal niters=10, Magnitude tolerance=1e-2, bool verbose=false, unsigned int seed=123)
Power method.
static void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude rowSumTol, Teuchos::ArrayRCP< bool > &dirichletRows)
Apply Rowsum Criterion.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
static RCP< Vector > GetMatrixOverlappedDeletedRowsum(const Matrix &A)
Extract Overlapped Matrix Deleted Rowsum.
virtual const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
static RCP< Xpetra::CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > GetThresholdedGraph(const RCP< Matrix > &A, const Magnitude threshold, const GlobalOrdinal expectedNNZperRow=-1)
Threshold a graph.
size_type size() const
virtual void resumeFill(const RCP< ParameterList > &params=null)=0
LocalOrdinal LO
static void ZeroDirichletRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node >> &A, const std::vector< LocalOrdinal > &dirichletRows, Scalar replaceWith=Teuchos::ScalarTraits< Scalar >::zero())
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > ReplaceNonZerosWithOnes(const RCP< Matrix > &original)
Creates a copy of a matrix where the non-zero entries are replaced by ones.
size_t getLocalNumRows() const override
size_type size() const
Exception throws to report incompatible objects (like maps).
static void SetRandomSeed(const Teuchos::Comm< int > &comm)
Set seed for random number generator.
MueLu::DefaultNode Node
static RCP< Xpetra::Vector< LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node > > CuthillMcKee(const Matrix &Op)
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getTargetMap() const =0
static RCP< Time > getNewTimer(const std::string &name)
void resize(const size_type n, const T &val=T())
virtual void fillComplete(const RCP< const Map > &domainMap, const RCP< const Map > &rangeMap, const RCP< ParameterList > &params=null)=0
virtual void getLocalDiagCopy(Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &diag) const =0
static RCP< MultiVector > Residual(const Xpetra::Operator< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, const MultiVector &X, const MultiVector &RHS)
static Teuchos::ArrayRCP< const bool > DetectDirichletCols(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Teuchos::ArrayRCP< const bool > &dirichletRows)
Detect Dirichlet columns based on Dirichlet rows.
virtual void getLocalRowView(LocalOrdinal LocalRow, ArrayView< const LocalOrdinal > &indices, ArrayView< const Scalar > &values) const =0
static void FindDirichletRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node >> &A, std::vector< LocalOrdinal > &dirichletRows, bool count_twos_as_dirichlet=false)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultScalar Scalar
Kokkos::View< bool *, memory_space > DetectDirichletRows_kokkos(const Xpetra::Matrix< SC, LO, GO, NO > &A, const typename Teuchos::ScalarTraits< SC >::magnitudeType &tol, const bool count_twos_as_dirichlet)
MueLu::DefaultGlobalOrdinal GlobalOrdinal
static RCP< CrsMatrixWrap > GetThresholdedMatrix(const RCP< Matrix > &Ain, const Magnitude threshold, const bool keepDiagonal=true, const GlobalOrdinal expectedNNZperRow=-1)
Threshold a matrix.
static Teuchos::RCP< Map< LocalOrdinal, GlobalOrdinal, Node > > Build(UnderlyingLib lib, global_size_t numGlobalElements, GlobalOrdinal indexBase, const Teuchos::RCP< const Teuchos::Comm< int >> &comm, LocalGlobal lg=Xpetra::GloballyDistributed)
#define I(i, j)
static Teuchos::ArrayRCP< const bool > DetectDirichletRows(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude &tol=Teuchos::ScalarTraits< Magnitude >::zero(), bool count_twos_as_dirichlet=false)
Detect Dirichlet rows.
virtual bool isFillComplete() const =0
static RCP< Xpetra::Vector< Magnitude, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixOverlappedAbsDeletedRowsum(const Matrix &A)
size_t getNumMaps() const
virtual void replaceLocalValues(LocalOrdinal localRow, const ArrayView< const LocalOrdinal > &cols, const ArrayView< const Scalar > &vals)=0
static Teuchos::ArrayRCP< Scalar > GetMatrixDiagonal_arcp(const Matrix &A)
Extract Matrix Diagonal.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getMap() const =0
static RCP< const Xpetra::BlockedMap< LocalOrdinal, GlobalOrdinal, Node > > GeneratedBlockedTargetMap(const Xpetra::BlockedMap< LocalOrdinal, GlobalOrdinal, Node > &sourceBlockedMap, const Xpetra::Import< LocalOrdinal, GlobalOrdinal, Node > &Importer)
static RCP< Vector > GetMatrixOverlappedDiagonal(const Matrix &A)
Extract Overlapped Matrix Diagonal.
T * getRawPtr() const
Teuchos::ScalarTraits< Scalar >::magnitudeType Magnitude
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
static RCP< Vector > GetMatrixDiagonalInverse(const Matrix &A, Magnitude tol=Teuchos::ScalarTraits< Scalar >::eps()*100, Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero(), const bool doLumped=false)
Extract Matrix Diagonal.
static void seedrandom(unsigned int s)
static Teuchos::ArrayRCP< const bool > DetectDirichletRowsExt(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, bool &bHasZeroDiagonal, const Magnitude &tol=Teuchos::ScalarTraits< Scalar >::zero())
Detect Dirichlet rows (extended version)
static RCP< Vector > Build(const Teuchos::RCP< const Map > &map, bool zeroOut=true)
const RCP< const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > getMap(size_t i, bool bThyraMode=false) const
virtual UnderlyingLib lib() const
static magnitudeType magnitude(T a)
TransListIter iter
RCP< const Tpetra::CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > toTpetra(const RCP< const CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > &graph)
void push_back(const value_type &x)
virtual RCP< const CrsGraph > getCrsGraph() const =0
static Teuchos::Array< Magnitude > ResidualNorm(const Xpetra::Operator< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, const MultiVector &X, const MultiVector &RHS)
static void EnforceInitialCondition(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &RHS, Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &InitialGuess, const Magnitude &tol=Teuchos::ScalarTraits< Magnitude >::zero(), const bool count_twos_as_dirichlet=false)
Detect Dirichlet rows and copy values from RHS multivector to InitialGuess for Dirichlet rows...
int length() const
size_type size() const
Scalar SC
impl_scalar_type_dualview::t_dev::const_type getValuesDevice(const LO &lclRow) const
virtual Teuchos::RCP< const Map > getRangeMap() const =0
static RCP< Teuchos::FancyOStream > MakeFancy(std::ostream &os)
static RCP< Matrix > Build(const RCP< const Map > &rowMap, size_t maxNumEntriesPerRow, Xpetra::ProfileType pftype=Xpetra::DynamicProfile)
void residual(const Operator< SC, LO, GO, NO > &Aop, const MultiVector< SC, LO, GO, NO > &X_in, const MultiVector< SC, LO, GO, NO > &B_in, MultiVector< SC, LO, GO, NO > &R_in)
Exception throws to report errors in the internal logical of the program.
static Scalar Frobenius(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B)
Frobenius inner product of two matrices.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getSourceMap() const =0
#define TEUCHOS_ASSERT(assertion_test)
#define TEUCHOS_ASSERT_EQUALITY(val1, val2)
static Teuchos::RCP< Vector > GetInverse(Teuchos::RCP< const Vector > v, Magnitude tol=Teuchos::ScalarTraits< Scalar >::eps()*100, Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero())
Return vector containing inverse of input vector.
static Kokkos::View< bool *, typename Kokkos::HostSpace > DetectDirichletRows_kokkos_host(const Matrix &A, const Magnitude &tol=Teuchos::ScalarTraits< typename Teuchos::ScalarTraits< SC >::magnitudeType >::zero(), const bool count_twos_as_dirichlet=false)
virtual size_t getNumVectors() const =0
static Teuchos::ScalarTraits< Scalar >::magnitudeType Distance2(const Teuchos::Array< Teuchos::ArrayRCP< const Scalar >> &v, LocalOrdinal i0, LocalOrdinal i1)
Squared distance between two rows in a multivector.
static Teuchos::RCP< Vector > GetLumpedMatrixDiagonal(Matrix const &A, const bool doReciprocal=false, Magnitude tol=Teuchos::ScalarTraits< Scalar >::magnitude(Teuchos::ScalarTraits< Scalar >::zero()), Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero(), const bool replaceSingleEntryRowWithZero=false, const bool useAverageAbsDiagVal=false)
Extract Matrix Diagonal of lumped matrix.
virtual UnderlyingLib lib() const =0
static Kokkos::View< bool *, typename NO::device_type::memory_space > DetectDirichletRows_kokkos(const Matrix &A, const Magnitude &tol=Teuchos::ScalarTraits< typename Teuchos::ScalarTraits< SC >::magnitudeType >::zero(), const bool count_twos_as_dirichlet=false)
Detect Dirichlet rows.
static RCP< Matrix > Crs2Op(RCP< CrsMatrix > Op)
virtual Teuchos::ArrayRCP< const Scalar > getData(size_t j) const =0
virtual Teuchos::RCP< const Map > getDomainMap() const =0
static RCP< Import< LocalOrdinal, GlobalOrdinal, Node > > Build(const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &source, const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &target, const Teuchos::RCP< Teuchos::ParameterList > &plist=Teuchos::null)
bool is_null() const