MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_UtilitiesBase_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_UTILITIESBASE_DEF_HPP
11 #define MUELU_UTILITIESBASE_DEF_HPP
12 
13 #include "MueLu_ConfigDefs.hpp"
14 
16 
17 #include <Kokkos_Core.hpp>
18 #include <KokkosSparse_CrsMatrix.hpp>
19 #include <KokkosSparse_getDiagCopy.hpp>
20 
21 #include <Xpetra_BlockedVector.hpp>
22 #include <Xpetra_BlockedMap.hpp>
23 #include <Xpetra_BlockedMultiVector.hpp>
24 #include <Xpetra_ExportFactory.hpp>
25 
26 #include <Xpetra_Import.hpp>
27 #include <Xpetra_ImportFactory.hpp>
28 #include <Xpetra_MatrixMatrix.hpp>
29 #include <Xpetra_CrsGraph.hpp>
31 #include <Xpetra_CrsMatrixWrap.hpp>
32 #include <Xpetra_StridedMap.hpp>
33 
34 #include "MueLu_Exceptions.hpp"
36 
37 #include <KokkosKernels_Handle.hpp>
38 #include <KokkosGraph_RCM.hpp>
39 
40 namespace MueLu {
41 
42 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
43 RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
46  if (Op.is_null())
47  return Teuchos::null;
48  return rcp(new CrsMatrixWrap(Op));
49 }
50 
51 template <class Scalar,
52  class LocalOrdinal,
53  class GlobalOrdinal,
54  class Node>
57  const typename Teuchos::ScalarTraits<Scalar>::magnitudeType threshold,
58  const bool keepDiagonal) {
60  using row_ptr_type = typename crs_matrix::local_graph_type::row_map_type::non_const_type;
61  using col_idx_type = typename crs_matrix::local_graph_type::entries_type::non_const_type;
62  using vals_type = typename crs_matrix::local_matrix_type::values_type;
63  using execution_space = typename crs_matrix::local_matrix_type::execution_space;
64 
65  using ATS = Kokkos::ArithTraits<Scalar>;
66  using impl_SC = typename ATS::val_type;
67  using impl_ATS = Kokkos::ArithTraits<impl_SC>;
68 
69  auto lclA = A->getLocalMatrixDevice();
70 
71  auto rowptr = row_ptr_type("rowptr", lclA.numRows() + 1);
72  col_idx_type idx;
73  vals_type vals;
74  LocalOrdinal nnz;
75 
76  if (keepDiagonal) {
77  auto lclRowMap = A->getRowMap()->getLocalMap();
78  auto lclColMap = A->getColMap()->getLocalMap();
79  Kokkos::parallel_scan(
80  "removeSmallEntries::rowptr",
81  Kokkos::RangePolicy<LocalOrdinal, execution_space>(0, lclA.numRows()),
82  KOKKOS_LAMBDA(const LocalOrdinal rlid, LocalOrdinal& partial_nnz, bool is_final) {
83  auto row = lclA.row(rlid);
84  auto rowInCol = lclColMap.getLocalElement(lclRowMap.getGlobalElement(rlid));
85  for (LocalOrdinal k = 0; k < row.length; ++k) {
86  if ((impl_ATS::magnitude(row.value(k)) > threshold) || (row.colidx(k) == rowInCol)) {
87  partial_nnz += 1;
88  }
89  }
90  if (is_final)
91  rowptr(rlid + 1) = partial_nnz;
92  },
93  nnz);
94 
95  idx = col_idx_type("idx", nnz);
96  vals = vals_type("vals", nnz);
97 
98  Kokkos::parallel_for(
99  "removeSmallEntries::indicesValues",
100  Kokkos::RangePolicy<LocalOrdinal, execution_space>(0, lclA.numRows()),
101  KOKKOS_LAMBDA(const LocalOrdinal rlid) {
102  auto row = lclA.row(rlid);
103  auto rowInCol = lclColMap.getLocalElement(lclRowMap.getGlobalElement(rlid));
104  auto I = rowptr(rlid);
105  for (LocalOrdinal k = 0; k < row.length; ++k) {
106  if ((impl_ATS::magnitude(row.value(k)) > threshold) || (row.colidx(k) == rowInCol)) {
107  idx(I) = row.colidx(k);
108  vals(I) = row.value(k);
109  I += 1;
110  }
111  }
112  });
113 
114  Kokkos::fence();
115  } else {
116  Kokkos::parallel_scan(
117  "removeSmallEntries::rowptr",
118  Kokkos::RangePolicy<LocalOrdinal, execution_space>(0, lclA.numRows()),
119  KOKKOS_LAMBDA(const LocalOrdinal rlid, LocalOrdinal& partial_nnz, bool is_final) {
120  auto row = lclA.row(rlid);
121  for (LocalOrdinal k = 0; k < row.length; ++k) {
122  if (impl_ATS::magnitude(row.value(k)) > threshold) {
123  partial_nnz += 1;
124  }
125  }
126  if (is_final)
127  rowptr(rlid + 1) = partial_nnz;
128  },
129  nnz);
130 
131  idx = col_idx_type("idx", nnz);
132  vals = vals_type("vals", nnz);
133 
134  Kokkos::parallel_for(
135  "removeSmallEntries::indicesValues",
136  Kokkos::RangePolicy<LocalOrdinal, execution_space>(0, lclA.numRows()),
137  KOKKOS_LAMBDA(const LocalOrdinal rlid) {
138  auto row = lclA.row(rlid);
139  auto I = rowptr(rlid);
140  for (LocalOrdinal k = 0; k < row.length; ++k) {
141  if (impl_ATS::magnitude(row.value(k)) > threshold) {
142  idx(I) = row.colidx(k);
143  vals(I) = row.value(k);
144  I += 1;
145  }
146  }
147  });
148 
149  Kokkos::fence();
150  }
151 
152  auto lclNewA = typename crs_matrix::local_matrix_type("thresholdedMatrix", lclA.numRows(), lclA.numCols(), nnz, vals, rowptr, idx);
153  auto newA = Xpetra::MatrixFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(lclNewA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap());
154 
155  return newA;
156 }
157 
158 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
159 RCP<Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
161  GetThresholdedMatrix(const RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& Ain, const typename Teuchos::ScalarTraits<Scalar>::magnitudeType threshold, const bool keepDiagonal, const GlobalOrdinal expectedNNZperRow) {
162  auto crsWrap = rcp_dynamic_cast<CrsMatrixWrap>(Ain);
163  if (!crsWrap.is_null()) {
164  auto crsMat = crsWrap->getCrsMatrix();
165  auto filteredMat = removeSmallEntries(crsMat, threshold, keepDiagonal);
166  return rcp_static_cast<CrsMatrixWrap>(filteredMat);
167  }
168 
169  RCP<const Map> rowmap = Ain->getRowMap();
170  RCP<const Map> colmap = Ain->getColMap();
171  RCP<CrsMatrixWrap> Aout = rcp(new CrsMatrixWrap(rowmap, expectedNNZperRow <= 0 ? Ain->getGlobalMaxNumRowEntries() : expectedNNZperRow));
172  // loop over local rows
173  for (size_t row = 0; row < Ain->getLocalNumRows(); row++) {
174  size_t nnz = Ain->getNumEntriesInLocalRow(row);
175 
178  Ain->getLocalRowView(row, indices, vals);
179 
180  TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(indices.size()) != nnz, Exceptions::RuntimeError, "MueLu::ThresholdAFilterFactory::Build: number of nonzeros not equal to number of indices? Error.");
181 
184  size_t nNonzeros = 0;
185  if (keepDiagonal) {
186  GlobalOrdinal glbRow = rowmap->getGlobalElement(row);
187  LocalOrdinal lclColIdx = colmap->getLocalElement(glbRow);
188  for (size_t i = 0; i < (size_t)indices.size(); i++) {
189  if (Teuchos::ScalarTraits<Scalar>::magnitude(vals[i]) > Teuchos::ScalarTraits<Scalar>::magnitude(threshold) || indices[i] == lclColIdx) {
190  indout[nNonzeros] = colmap->getGlobalElement(indices[i]); // LID -> GID (column)
191  valout[nNonzeros] = vals[i];
192  nNonzeros++;
193  }
194  }
195  } else
196  for (size_t i = 0; i < (size_t)indices.size(); i++) {
198  indout[nNonzeros] = colmap->getGlobalElement(indices[i]); // LID -> GID (column)
199  valout[nNonzeros] = vals[i];
200  nNonzeros++;
201  }
202  }
203 
204  indout.resize(nNonzeros);
205  valout.resize(nNonzeros);
206 
207  Aout->insertGlobalValues(Ain->getRowMap()->getGlobalElement(row), indout.view(0, indout.size()), valout.view(0, valout.size()));
208  }
209  Aout->fillComplete(Ain->getDomainMap(), Ain->getRangeMap());
210 
211  return Aout;
212 }
213 
214 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
218  using STS = Teuchos::ScalarTraits<Scalar>;
219  RCP<CrsGraph> sparsityPattern = CrsGraphFactory::Build(A->getRowMap(), expectedNNZperRow <= 0 ? A->getGlobalMaxNumRowEntries() : expectedNNZperRow);
220 
221  RCP<Vector> diag = GetMatrixOverlappedDiagonal(*A);
222  ArrayRCP<const Scalar> D = diag->getData(0);
223 
224  for (size_t row = 0; row < A->getLocalNumRows(); row++) {
227  A->getLocalRowView(row, indices, vals);
228 
229  GlobalOrdinal globalRow = A->getRowMap()->getGlobalElement(row);
230  LocalOrdinal col = A->getColMap()->getLocalElement(globalRow);
231 
232  const Scalar Dk = STS::magnitude(D[col]) > 0.0 ? STS::magnitude(D[col]) : 1.0;
233  Array<GlobalOrdinal> indicesNew;
234 
235  for (size_t i = 0; i < size_t(indices.size()); i++)
236  // keep diagonal per default
237  if (col == indices[i] || STS::magnitude(STS::squareroot(Dk) * vals[i] * STS::squareroot(Dk)) > STS::magnitude(threshold))
238  indicesNew.append(A->getColMap()->getGlobalElement(indices[i]));
239 
240  sparsityPattern->insertGlobalIndices(globalRow, ArrayView<const GlobalOrdinal>(indicesNew.data(), indicesNew.length()));
241  }
242  sparsityPattern->fillComplete();
243 
244  return sparsityPattern;
245 }
246 
247 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
251  size_t numRows = A.getRowMap()->getLocalNumElements();
252  Teuchos::ArrayRCP<Scalar> diag(numRows);
255  for (size_t i = 0; i < numRows; ++i) {
256  A.getLocalRowView(i, cols, vals);
257  LocalOrdinal j = 0;
258  for (; j < cols.size(); ++j) {
259  if (Teuchos::as<size_t>(cols[j]) == i) {
260  diag[i] = vals[j];
261  break;
262  }
263  }
264  if (j == cols.size()) {
265  // Diagonal entry is absent
267  }
268  }
269  return diag;
270 }
271 
272 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
275  GetMatrixDiagonal(const Matrix& A) {
276  const auto rowMap = A.getRowMap();
278 
279  const CrsMatrixWrap* crsOp = dynamic_cast<const CrsMatrixWrap*>(&A);
280  if ((crsOp != NULL) && (rowMap->lib() == Xpetra::UseTpetra)) {
281  using device_type = typename CrsGraph::device_type;
282  Kokkos::View<size_t*, device_type> offsets("offsets", rowMap->getLocalNumElements());
283  crsOp->getCrsGraph()->getLocalDiagOffsets(offsets);
284  crsOp->getCrsMatrix()->getLocalDiagCopy(*diag, offsets);
285  } else {
286  A.getLocalDiagCopy(*diag);
287  }
288 
289  return diag;
290 }
291 
292 // template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
293 // RCP<Xpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
294 // UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
295 // GetMatrixDiagonalInverse(const Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> & A, Magnitude tol, Scalar valReplacement) {
296 // Teuchos::TimeMonitor MM = *Teuchos::TimeMonitor::getNewTimer("UtilitiesBase::GetMatrixDiagonalInverse");
297 
298 // RCP<const Map> rowMap = A.getRowMap();
299 // RCP<Vector> diag = Xpetra::VectorFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Build(rowMap,true);
300 
301 // A.getLocalDiagCopy(*diag);
302 
303 // RCP<Vector> inv = MueLu::UtilitiesBase<Scalar,LocalOrdinal,GlobalOrdinal,Node>::GetInverse(diag, tol, valReplacement);
304 
305 // return inv;
306 // }
307 
308 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
313  Scalar valReplacement,
314  const bool doLumped) {
315  Teuchos::TimeMonitor MM = *Teuchos::TimeMonitor::getNewTimer("Utilities::GetMatrixDiagonalInverse");
316 
317  RCP<const BlockedCrsMatrix> bA = Teuchos::rcp_dynamic_cast<const BlockedCrsMatrix>(rcpFromRef(A));
318  if (!bA.is_null()) {
319  RCP<const Map> rowMap = A.getRowMap();
321  A.getLocalDiagCopy(*diag);
323  return inv;
324  }
325 
326  // Some useful type definitions
327  using local_matrix_type = typename Matrix::local_matrix_type;
328  // using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
329  using value_type = typename local_matrix_type::value_type;
330  using values_type = typename local_matrix_type::values_type;
331  using scalar_type = typename values_type::non_const_value_type;
332  using ordinal_type = typename local_matrix_type::ordinal_type;
333  using execution_space = typename local_matrix_type::execution_space;
334  // using memory_space = typename local_matrix_type::memory_space;
335  // Be careful with this one, if using Kokkos::ArithTraits<Scalar>
336  // you are likely to run into errors when handling std::complex<>
337  // a good way to work around that is to use the following:
338  // using KAT = Kokkos::ArithTraits<Kokkos::ArithTraits<Scalar>::val_type> >
339  // here we have: value_type = Kokkos::ArithTraits<Scalar>::val_type
340  using KAT = Kokkos::ArithTraits<value_type>;
341 
342  // Get/Create distributed objects
343  RCP<const Map> rowMap = A.getRowMap();
344  RCP<Vector> diag = VectorFactory::Build(rowMap, false);
345 
346  // Now generate local objects
347  local_matrix_type localMatrix = A.getLocalMatrixDevice();
348  auto diagVals = diag->getDeviceLocalView(Xpetra::Access::ReadWrite);
349 
350  ordinal_type numRows = localMatrix.graph.numRows();
351 
352  scalar_type valReplacement_dev = valReplacement;
353 
354  // Note: 2019-11-21, LBV
355  // This could be implemented with a TeamPolicy over the rows
356  // and a TeamVectorRange over the entries in a row if performance
357  // becomes more important here.
358  if (!doLumped)
359  Kokkos::parallel_for(
360  "Utilities::GetMatrixDiagonalInverse",
361  Kokkos::RangePolicy<ordinal_type, execution_space>(0, numRows),
362  KOKKOS_LAMBDA(const ordinal_type rowIdx) {
363  bool foundDiagEntry = false;
364  auto myRow = localMatrix.rowConst(rowIdx);
365  for (ordinal_type entryIdx = 0; entryIdx < myRow.length; ++entryIdx) {
366  if (myRow.colidx(entryIdx) == rowIdx) {
367  foundDiagEntry = true;
368  if (KAT::magnitude(myRow.value(entryIdx)) > KAT::magnitude(tol)) {
369  diagVals(rowIdx, 0) = KAT::one() / myRow.value(entryIdx);
370  } else {
371  diagVals(rowIdx, 0) = valReplacement_dev;
372  }
373  break;
374  }
375  }
376 
377  if (!foundDiagEntry) {
378  diagVals(rowIdx, 0) = KAT::zero();
379  }
380  });
381  else
382  Kokkos::parallel_for(
383  "Utilities::GetMatrixDiagonalInverse",
384  Kokkos::RangePolicy<ordinal_type, execution_space>(0, numRows),
385  KOKKOS_LAMBDA(const ordinal_type rowIdx) {
386  auto myRow = localMatrix.rowConst(rowIdx);
387  for (ordinal_type entryIdx = 0; entryIdx < myRow.length; ++entryIdx) {
388  diagVals(rowIdx, 0) += KAT::magnitude(myRow.value(entryIdx));
389  }
390  if (KAT::magnitude(diagVals(rowIdx, 0)) > KAT::magnitude(tol))
391  diagVals(rowIdx, 0) = KAT::one() / diagVals(rowIdx, 0);
392  else
393  diagVals(rowIdx, 0) = valReplacement_dev;
394  });
395 
396  return diag;
397 }
398 
399 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
403  Magnitude tol,
404  Scalar valReplacement,
405  const bool replaceSingleEntryRowWithZero,
406  const bool useAverageAbsDiagVal) {
407  typedef Teuchos::ScalarTraits<Scalar> TST;
408 
409  RCP<Vector> diag = Teuchos::null;
410  const Scalar zero = TST::zero();
411  const Scalar one = TST::one();
412  const Scalar two = one + one;
413 
414  Teuchos::RCP<const Matrix> rcpA = Teuchos::rcpFromRef(A);
415 
417  Teuchos::rcp_dynamic_cast<const Xpetra::BlockedCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(rcpA);
418  if (bA == Teuchos::null) {
419  RCP<const Map> rowMap = rcpA->getRowMap();
421 
422  if (rowMap->lib() == Xpetra::UnderlyingLib::UseTpetra) {
423  Teuchos::TimeMonitor MM = *Teuchos::TimeMonitor::getNewTimer("UtilitiesBase::GetLumpedMatrixDiagonal (Kokkos implementation)");
424  // Implement using Kokkos
425  using local_vector_type = typename Vector::dual_view_type::t_dev_um;
426  using local_matrix_type = typename Matrix::local_matrix_type;
427  using execution_space = typename local_vector_type::execution_space;
428  // using rowmap_type = typename local_matrix_type::row_map_type;
429  // using entries_type = typename local_matrix_type::index_type;
430  using values_type = typename local_matrix_type::values_type;
431  using scalar_type = typename values_type::non_const_value_type;
432  using mag_type = typename Kokkos::ArithTraits<scalar_type>::mag_type;
433  using KAT_S = typename Kokkos::ArithTraits<scalar_type>;
434  using KAT_M = typename Kokkos::ArithTraits<mag_type>;
435  using size_type = typename local_matrix_type::non_const_size_type;
436 
437  local_vector_type diag_dev = diag->getDeviceLocalView(Xpetra::Access::OverwriteAll);
438  local_matrix_type local_mat_dev = rcpA->getLocalMatrixDevice();
439  Kokkos::RangePolicy<execution_space, int> my_policy(0, static_cast<int>(diag_dev.extent(0)));
440  scalar_type valReplacement_dev = valReplacement;
441 
442  if (doReciprocal) {
443  Kokkos::View<int*, execution_space> nnzPerRow("nnz per rows", diag_dev.extent(0));
444  Kokkos::View<scalar_type*, execution_space> regSum("regSum", diag_dev.extent(0));
445  Kokkos::View<mag_type, execution_space> avgAbsDiagVal_dev("avgAbsDiagVal");
446  Kokkos::View<int, execution_space> numDiagsEqualToOne_dev("numDiagsEqualToOne");
447 
448  {
449  Teuchos::TimeMonitor MMM = *Teuchos::TimeMonitor::getNewTimer("GetLumpedMatrixDiagonal: parallel_for (doReciprocal)");
450  Kokkos::parallel_for(
451  "GetLumpedMatrixDiagonal", my_policy,
452  KOKKOS_LAMBDA(const int rowIdx) {
453  diag_dev(rowIdx, 0) = KAT_S::zero();
454  for (size_type entryIdx = local_mat_dev.graph.row_map(rowIdx);
455  entryIdx < local_mat_dev.graph.row_map(rowIdx + 1);
456  ++entryIdx) {
457  regSum(rowIdx) += local_mat_dev.values(entryIdx);
458  if (KAT_M::zero() < KAT_S::abs(local_mat_dev.values(entryIdx))) {
459  ++nnzPerRow(rowIdx);
460  }
461  diag_dev(rowIdx, 0) += KAT_S::abs(local_mat_dev.values(entryIdx));
462  if (rowIdx == local_mat_dev.graph.entries(entryIdx)) {
463  Kokkos::atomic_add(&avgAbsDiagVal_dev(), KAT_S::abs(local_mat_dev.values(entryIdx)));
464  }
465  }
466 
467  if (nnzPerRow(rowIdx) == 1 && KAT_S::magnitude(diag_dev(rowIdx, 0)) == KAT_M::one()) {
468  Kokkos::atomic_add(&numDiagsEqualToOne_dev(), 1);
469  }
470  });
471  }
472  if (useAverageAbsDiagVal) {
473  Teuchos::TimeMonitor MMM = *Teuchos::TimeMonitor::getNewTimer("GetLumpedMatrixDiagonal: useAverageAbsDiagVal");
474  typename Kokkos::View<mag_type, execution_space>::HostMirror avgAbsDiagVal = Kokkos::create_mirror_view(avgAbsDiagVal_dev);
475  Kokkos::deep_copy(avgAbsDiagVal, avgAbsDiagVal_dev);
476  int numDiagsEqualToOne;
477  Kokkos::deep_copy(numDiagsEqualToOne, numDiagsEqualToOne_dev);
478 
479  tol = TST::magnitude(100 * Teuchos::ScalarTraits<Scalar>::eps()) * (avgAbsDiagVal() - numDiagsEqualToOne) / (rowMap->getLocalNumElements() - numDiagsEqualToOne);
480  }
481 
482  {
483  Teuchos::TimeMonitor MMM = *Teuchos::TimeMonitor::getNewTimer("ComputeLumpedDiagonalInverse: parallel_for (doReciprocal)");
484  Kokkos::parallel_for(
485  "ComputeLumpedDiagonalInverse", my_policy,
486  KOKKOS_LAMBDA(const int rowIdx) {
487  if (replaceSingleEntryRowWithZero && nnzPerRow(rowIdx) <= 1) {
488  diag_dev(rowIdx, 0) = KAT_S::zero();
489  } else if ((diag_dev(rowIdx, 0) != KAT_S::zero()) && (KAT_S::magnitude(diag_dev(rowIdx, 0)) < KAT_S::magnitude(2 * regSum(rowIdx)))) {
490  diag_dev(rowIdx, 0) = KAT_S::one() / KAT_S::magnitude(2 * regSum(rowIdx));
491  } else {
492  if (KAT_S::magnitude(diag_dev(rowIdx, 0)) > tol) {
493  diag_dev(rowIdx, 0) = KAT_S::one() / diag_dev(rowIdx, 0);
494  } else {
495  diag_dev(rowIdx, 0) = valReplacement_dev;
496  }
497  }
498  });
499  }
500 
501  } else {
502  Teuchos::TimeMonitor MMM = *Teuchos::TimeMonitor::getNewTimer("GetLumpedMatrixDiagonal: parallel_for");
503  Kokkos::parallel_for(
504  "GetLumpedMatrixDiagonal", my_policy,
505  KOKKOS_LAMBDA(const int rowIdx) {
506  diag_dev(rowIdx, 0) = KAT_S::zero();
507  for (size_type entryIdx = local_mat_dev.graph.row_map(rowIdx);
508  entryIdx < local_mat_dev.graph.row_map(rowIdx + 1);
509  ++entryIdx) {
510  diag_dev(rowIdx, 0) += KAT_S::magnitude(local_mat_dev.values(entryIdx));
511  }
512  });
513  }
514  } else {
515  // Implement using Teuchos
516  Teuchos::TimeMonitor MMM = *Teuchos::TimeMonitor::getNewTimer("UtilitiesBase: GetLumpedMatrixDiagonal: (Teuchos implementation)");
517  ArrayRCP<Scalar> diagVals = diag->getDataNonConst(0);
518  Teuchos::Array<Scalar> regSum(diag->getLocalLength());
521 
522  std::vector<int> nnzPerRow(rowMap->getLocalNumElements());
523 
524  // FIXME 2021-10-22 JHU If this is called with doReciprocal=false, what should the correct behavior be? Currently,
525  // FIXME 2021-10-22 JHU the diagonal entry is set to be the sum of the absolute values of the row entries.
526 
527  const Magnitude zeroMagn = TST::magnitude(zero);
528  Magnitude avgAbsDiagVal = TST::magnitude(zero);
529  int numDiagsEqualToOne = 0;
530  for (size_t i = 0; i < rowMap->getLocalNumElements(); ++i) {
531  nnzPerRow[i] = 0;
532  rcpA->getLocalRowView(i, cols, vals);
533  diagVals[i] = zero;
534  for (LocalOrdinal j = 0; j < cols.size(); ++j) {
535  regSum[i] += vals[j];
536  const Magnitude rowEntryMagn = TST::magnitude(vals[j]);
537  if (rowEntryMagn > zeroMagn)
538  nnzPerRow[i]++;
539  diagVals[i] += rowEntryMagn;
540  if (static_cast<size_t>(cols[j]) == i)
541  avgAbsDiagVal += rowEntryMagn;
542  }
543  if (nnzPerRow[i] == 1 && TST::magnitude(diagVals[i]) == 1.)
544  numDiagsEqualToOne++;
545  }
546  if (useAverageAbsDiagVal)
547  tol = TST::magnitude(100 * Teuchos::ScalarTraits<Scalar>::eps()) * (avgAbsDiagVal - numDiagsEqualToOne) / (rowMap->getLocalNumElements() - numDiagsEqualToOne);
548  if (doReciprocal) {
549  for (size_t i = 0; i < rowMap->getLocalNumElements(); ++i) {
550  if (replaceSingleEntryRowWithZero && nnzPerRow[i] <= static_cast<int>(1))
551  diagVals[i] = zero;
552  else if ((diagVals[i] != zero) && (TST::magnitude(diagVals[i]) < TST::magnitude(two * regSum[i])))
553  diagVals[i] = one / TST::magnitude((two * regSum[i]));
554  else {
555  if (TST::magnitude(diagVals[i]) > tol)
556  diagVals[i] = one / diagVals[i];
557  else {
558  diagVals[i] = valReplacement;
559  }
560  }
561  }
562  }
563  }
564  } else {
566  "UtilitiesBase::GetLumpedMatrixDiagonal(): extracting reciprocal of diagonal of a blocked matrix is not supported");
567  diag = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(bA->getRangeMapExtractor()->getFullMap(), true);
568 
569  for (size_t row = 0; row < bA->Rows(); ++row) {
570  for (size_t col = 0; col < bA->Cols(); ++col) {
571  if (!bA->getMatrix(row, col).is_null()) {
572  // if we are in Thyra mode, but the block (row,row) is again a blocked operator, we have to use (pseudo) Xpetra-style GIDs with offset!
573  bool bThyraMode = bA->getRangeMapExtractor()->getThyraMode() && (Teuchos::rcp_dynamic_cast<Xpetra::BlockedCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(bA->getMatrix(row, col)) == Teuchos::null);
574  RCP<Vector> ddtemp = bA->getRangeMapExtractor()->ExtractVector(diag, row, bThyraMode);
575  RCP<const Vector> dd = GetLumpedMatrixDiagonal(*(bA->getMatrix(row, col)));
576  ddtemp->update(Teuchos::as<Scalar>(1.0), *dd, Teuchos::as<Scalar>(1.0));
577  bA->getRangeMapExtractor()->InsertVector(ddtemp, row, diag, bThyraMode);
578  }
579  }
580  }
581  }
582 
583  return diag;
584 }
585 
586 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
590  // Get/Create distributed objects
591  RCP<const Map> rowMap = A.getRowMap();
593 
594  // Implement using Kokkos
595  using local_vector_type = typename Vector::dual_view_type::t_dev_um;
596  using local_matrix_type = typename Matrix::local_matrix_type;
597  using execution_space = typename local_vector_type::execution_space;
598  using values_type = typename local_matrix_type::values_type;
599  using scalar_type = typename values_type::non_const_value_type;
600  using mag_type = typename Kokkos::ArithTraits<scalar_type>::mag_type;
601  using KAT_S = typename Kokkos::ArithTraits<scalar_type>;
602  using KAT_M = typename Kokkos::ArithTraits<mag_type>;
603  using size_type = typename local_matrix_type::non_const_size_type;
604 
605  auto diag_dev = diag->getDeviceLocalView(Xpetra::Access::OverwriteAll);
606  auto local_mat_dev = A.getLocalMatrixDevice();
607  Kokkos::RangePolicy<execution_space, int> my_policy(0, static_cast<int>(diag_dev.extent(0)));
608 
609  Kokkos::parallel_for(
610  "GetMatrixMaxMinusOffDiagonal", my_policy,
611  KOKKOS_LAMBDA(const LocalOrdinal rowIdx) {
612  auto mymax = KAT_M::zero();
613  auto row = local_mat_dev.row(rowIdx);
614  for (LocalOrdinal entryIdx = 0; entryIdx < row.length; ++entryIdx) {
615  if (rowIdx != row.colidx(entryIdx)) {
616  mymax = std::max(mymax, -KAT_S::magnitude(row.value(entryIdx)));
617  }
618  }
619  diag_dev(rowIdx, 0) = mymax;
620  });
621 
622  return diag;
623 }
624 
625 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
629  TEUCHOS_TEST_FOR_EXCEPTION(!A.getColMap()->isSameAs(*BlockNumber.getMap()), std::runtime_error, "GetMatrixMaxMinusOffDiagonal: BlockNumber must match's A's column map.");
630 
631  // Get/Create distributed objects
632  RCP<const Map> rowMap = A.getRowMap();
634 
635  // Implement using Kokkos
636  using local_vector_type = typename Vector::dual_view_type::t_dev_um;
637  using local_matrix_type = typename Matrix::local_matrix_type;
638  using execution_space = typename local_vector_type::execution_space;
639  using values_type = typename local_matrix_type::values_type;
640  using scalar_type = typename values_type::non_const_value_type;
641  using mag_type = typename Kokkos::ArithTraits<scalar_type>::mag_type;
642  using KAT_S = typename Kokkos::ArithTraits<scalar_type>;
643  using KAT_M = typename Kokkos::ArithTraits<mag_type>;
644  using size_type = typename local_matrix_type::non_const_size_type;
645 
646  auto diag_dev = diag->getDeviceLocalView(Xpetra::Access::OverwriteAll);
647  auto local_mat_dev = A.getLocalMatrixDevice();
648  auto local_block_dev = BlockNumber.getDeviceLocalView(Xpetra::Access::ReadOnly);
649  Kokkos::RangePolicy<execution_space, int> my_policy(0, static_cast<int>(diag_dev.extent(0)));
650 
651  Kokkos::parallel_for(
652  "GetMatrixMaxMinusOffDiagonal", my_policy,
653  KOKKOS_LAMBDA(const LocalOrdinal rowIdx) {
654  auto mymax = KAT_M::zero();
655  auto row = local_mat_dev.row(rowIdx);
656  for (LocalOrdinal entryIdx = 0; entryIdx < row.length; ++entryIdx) {
657  if ((rowIdx != row.colidx(entryIdx)) && (local_block_dev(rowIdx, 0) == local_block_dev(row.colidx(entryIdx), 0))) {
658  mymax = std::max(mymax, -KAT_S::magnitude(row.value(entryIdx)));
659  }
660  }
661  diag_dev(rowIdx, 0) = mymax;
662  });
663 
664  return diag;
665 }
666 
667 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
672 
673  // check whether input vector "v" is a BlockedVector
674  RCP<const BlockedVector> bv = Teuchos::rcp_dynamic_cast<const BlockedVector>(v);
675  if (bv.is_null() == false) {
676  RCP<BlockedVector> bret = Teuchos::rcp_dynamic_cast<BlockedVector>(ret);
677  TEUCHOS_TEST_FOR_EXCEPTION(bret.is_null() == true, MueLu::Exceptions::RuntimeError, "MueLu::UtilitiesBase::GetInverse: return vector should be of type BlockedVector");
678  RCP<const BlockedMap> bmap = bv->getBlockedMap();
679  for (size_t r = 0; r < bmap->getNumMaps(); ++r) {
680  RCP<const MultiVector> submvec = bv->getMultiVector(r, bmap->getThyraMode());
681  RCP<const Vector> subvec = submvec->getVector(0);
683  bret->setMultiVector(r, subvecinf, bmap->getThyraMode());
684  }
685  return ret;
686  }
687 
688  // v is an {Epetra,Tpetra}Vector: work with the underlying raw data
689  ArrayRCP<Scalar> retVals = ret->getDataNonConst(0);
690  ArrayRCP<const Scalar> inputVals = v->getData(0);
691  for (size_t i = 0; i < v->getMap()->getLocalNumElements(); ++i) {
692  if (Teuchos::ScalarTraits<Scalar>::magnitude(inputVals[i]) > tol)
693  retVals[i] = Teuchos::ScalarTraits<Scalar>::one() / inputVals[i];
694  else
695  retVals[i] = valReplacement;
696  }
697  return ret;
698 }
699 
700 // template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
701 // RCP<Xpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
702 // UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
703 // GetMatrixOverlappedDiagonal(const Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> & A) {
704 // RCP<const Map> rowMap = A.getRowMap(), colMap = A.getColMap();
705 
706 // // Undo block map (if we have one)
707 // RCP<const BlockedMap> browMap = Teuchos::rcp_dynamic_cast<const BlockedMap>(rowMap);
708 // if(!browMap.is_null()) rowMap = browMap->getMap();
709 
710 // RCP<Vector> localDiag = Xpetra::VectorFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Build(rowMap);
711 // try {
712 // const CrsMatrixWrap* crsOp = dynamic_cast<const CrsMatrixWrap*>(&A);
713 // if (crsOp == NULL) {
714 // throw Exceptions::RuntimeError("cast to CrsMatrixWrap failed");
715 // }
716 // Teuchos::ArrayRCP<size_t> offsets;
717 // crsOp->getLocalDiagOffsets(offsets);
718 // crsOp->getLocalDiagCopy(*localDiag,offsets());
719 // }
720 // catch (...) {
721 // ArrayRCP<Scalar> localDiagVals = localDiag->getDataNonConst(0);
722 // Teuchos::ArrayRCP<Scalar> diagVals = GetMatrixDiagonal(A);
723 // for (LocalOrdinal i = 0; i < localDiagVals.size(); i++)
724 // localDiagVals[i] = diagVals[i];
725 // localDiagVals = diagVals = null;
726 // }
727 
728 // RCP<Vector> diagonal = Xpetra::VectorFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Build(colMap);
729 // RCP< const Xpetra::Import<LocalOrdinal,GlobalOrdinal,Node> > importer;
730 // importer = A.getCrsGraph()->getImporter();
731 // if (importer == Teuchos::null) {
732 // importer = Xpetra::ImportFactory<LocalOrdinal,GlobalOrdinal,Node>::Build(rowMap, colMap);
733 // }
734 // diagonal->doImport(*localDiag, *(importer), Xpetra::INSERT);
735 // return diagonal;
736 // }
737 
738 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
742  RCP<const Map> rowMap = A.getRowMap(), colMap = A.getColMap();
743  RCP<Vector> localDiag = GetMatrixDiagonal(A);
744  RCP<Vector> diagonal = VectorFactory::Build(colMap);
745  RCP<const Import> importer = A.getCrsGraph()->getImporter();
746  if (importer == Teuchos::null) {
747  importer = ImportFactory::Build(rowMap, colMap);
748  }
749  diagonal->doImport(*localDiag, *(importer), Xpetra::INSERT);
750 
751  return diagonal;
752 }
753 
754 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
758  using STS = typename Teuchos::ScalarTraits<SC>;
759 
760  // Undo block map (if we have one)
761  RCP<const Map> rowMap = A.getRowMap(), colMap = A.getColMap();
762  RCP<const BlockedMap> browMap = Teuchos::rcp_dynamic_cast<const BlockedMap>(rowMap);
763  if (!browMap.is_null()) rowMap = browMap->getMap();
764 
767  ArrayRCP<SC> localVals = local->getDataNonConst(0);
768 
769  for (LO row = 0; row < static_cast<LO>(A.getRowMap()->getLocalNumElements()); ++row) {
770  size_t nnz = A.getNumEntriesInLocalRow(row);
771  ArrayView<const LO> indices;
772  ArrayView<const SC> vals;
773  A.getLocalRowView(row, indices, vals);
774 
775  SC si = STS::zero();
776 
777  for (LO colID = 0; colID < static_cast<LO>(nnz); colID++) {
778  if (indices[colID] != row) {
779  si += vals[colID];
780  }
781  }
782  localVals[row] = si;
783  }
784 
786  importer = A.getCrsGraph()->getImporter();
787  if (importer == Teuchos::null) {
788  importer = Xpetra::ImportFactory<LO, GO, Node>::Build(rowMap, colMap);
789  }
790  ghosted->doImport(*local, *(importer), Xpetra::INSERT);
791  return ghosted;
792 }
793 
794 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
798  RCP<const Map> rowMap = A.getRowMap(), colMap = A.getColMap();
799  using STS = typename Teuchos::ScalarTraits<Scalar>;
800  using MTS = typename Teuchos::ScalarTraits<Magnitude>;
801  using MT = Magnitude;
802  using RealValuedVector = Xpetra::Vector<MT, LO, GO, Node>;
803 
804  // Undo block map (if we have one)
805  RCP<const BlockedMap> browMap = Teuchos::rcp_dynamic_cast<const BlockedMap>(rowMap);
806  if (!browMap.is_null()) rowMap = browMap->getMap();
807 
810  ArrayRCP<MT> localVals = local->getDataNonConst(0);
811 
812  for (LO rowIdx = 0; rowIdx < static_cast<LO>(A.getRowMap()->getLocalNumElements()); ++rowIdx) {
813  size_t nnz = A.getNumEntriesInLocalRow(rowIdx);
814  ArrayView<const LO> indices;
815  ArrayView<const SC> vals;
816  A.getLocalRowView(rowIdx, indices, vals);
817 
818  MT si = MTS::zero();
819 
820  for (LO colID = 0; colID < static_cast<LO>(nnz); ++colID) {
821  if (indices[colID] != rowIdx) {
822  si += STS::magnitude(vals[colID]);
823  }
824  }
825  localVals[rowIdx] = si;
826  }
827 
829  importer = A.getCrsGraph()->getImporter();
830  if (importer == Teuchos::null) {
831  importer = Xpetra::ImportFactory<LO, GO, Node>::Build(rowMap, colMap);
832  }
833  ghosted->doImport(*local, *(importer), Xpetra::INSERT);
834  return ghosted;
835 }
836 
837 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
840  ResidualNorm(const Xpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op, const MultiVector& X, const MultiVector& RHS) {
841  TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != RHS.getNumVectors(), Exceptions::RuntimeError, "Number of solution vectors != number of right-hand sides")
842  const size_t numVecs = X.getNumVectors();
843  RCP<MultiVector> RES = Residual(Op, X, RHS);
844  Teuchos::Array<Magnitude> norms(numVecs);
845  RES->norm2(norms);
846  return norms;
847 }
848 
849 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
852  ResidualNorm(const Xpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op, const MultiVector& X, const MultiVector& RHS, MultiVector& Resid) {
853  TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != RHS.getNumVectors(), Exceptions::RuntimeError, "Number of solution vectors != number of right-hand sides")
854  const size_t numVecs = X.getNumVectors();
855  Residual(Op, X, RHS, Resid);
856  Teuchos::Array<Magnitude> norms(numVecs);
857  Resid.norm2(norms);
858  return norms;
859 }
860 
861 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
864  Residual(const Xpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op, const MultiVector& X, const MultiVector& RHS) {
865  TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != RHS.getNumVectors(), Exceptions::RuntimeError, "Number of solution vectors != number of right-hand sides")
866  const size_t numVecs = X.getNumVectors();
867  // TODO Op.getRangeMap should return a BlockedMap if it is a BlockedCrsOperator
868  RCP<MultiVector> RES = Xpetra::MultiVectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(RHS.getMap(), numVecs, false); // no need to initialize to zero
869  Op.residual(X, RHS, *RES);
870  return RES;
871 }
872 
873 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
875  Residual(const Xpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op, const MultiVector& X, const MultiVector& RHS, MultiVector& Resid) {
876  TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != RHS.getNumVectors(), Exceptions::RuntimeError, "Number of solution vectors != number of right-hand sides");
877  TEUCHOS_TEST_FOR_EXCEPTION(Resid.getNumVectors() != RHS.getNumVectors(), Exceptions::RuntimeError, "Number of residual vectors != number of right-hand sides");
878  Op.residual(X, RHS, Resid);
879 }
880 
881 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
882 Scalar
884  PowerMethod(const Matrix& A, bool scaleByDiag,
885  LocalOrdinal niters, typename Teuchos::ScalarTraits<Scalar>::magnitudeType tolerance, bool verbose, unsigned int seed) {
886  TEUCHOS_TEST_FOR_EXCEPTION(!(A.getRangeMap()->isSameAs(*(A.getDomainMap()))), Exceptions::Incompatible,
887  "Utils::PowerMethod: operator must have domain and range maps that are equivalent.");
888 
889  // power iteration
890  RCP<Vector> diagInvVec;
891  if (scaleByDiag) {
892  diagInvVec = GetMatrixDiagonalInverse(A);
893  }
894 
895  Scalar lambda = PowerMethod(A, diagInvVec, niters, tolerance, verbose, seed);
896  return lambda;
897 }
898 
899 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
900 Scalar
902  PowerMethod(const Matrix& A, const RCP<Vector>& diagInvVec,
903  LocalOrdinal niters, typename Teuchos::ScalarTraits<Scalar>::magnitudeType tolerance, bool verbose, unsigned int seed) {
904  TEUCHOS_TEST_FOR_EXCEPTION(!(A.getRangeMap()->isSameAs(*(A.getDomainMap()))), Exceptions::Incompatible,
905  "Utils::PowerMethod: operator must have domain and range maps that are equivalent.");
906 
907  // Create three vectors, fill z with random numbers
911 
912  z->setSeed(seed); // seed random number generator
913  z->randomize(true); // use Xpetra implementation: -> same results for Epetra and Tpetra
914 
915  Teuchos::Array<Magnitude> norms(1);
916 
917  typedef Teuchos::ScalarTraits<Scalar> STS;
918 
919  const Scalar zero = STS::zero(), one = STS::one();
920 
921  Scalar lambda = zero;
922  Magnitude residual = STS::magnitude(zero);
923 
924  // power iteration
925  for (int iter = 0; iter < niters; ++iter) {
926  z->norm2(norms); // Compute 2-norm of z
927  q->update(one / norms[0], *z, zero); // Set q = z / normz
928  A.apply(*q, *z); // Compute z = A*q
929  if (diagInvVec != Teuchos::null)
930  z->elementWiseMultiply(one, *diagInvVec, *z, zero);
931  lambda = q->dot(*z); // Approximate maximum eigenvalue: lamba = dot(q,z)
932 
933  if (iter % 100 == 0 || iter + 1 == niters) {
934  r->update(1.0, *z, -lambda, *q, zero); // Compute A*q - lambda*q
935  r->norm2(norms);
936  residual = STS::magnitude(norms[0] / lambda);
937  if (verbose) {
938  std::cout << "Iter = " << iter
939  << " Lambda = " << lambda
940  << " Residual of A*q - lambda*q = " << residual
941  << std::endl;
942  }
943  }
944  if (residual < tolerance)
945  break;
946  }
947  return lambda;
948 }
949 
950 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
951 RCP<Teuchos::FancyOStream>
953  MakeFancy(std::ostream& os) {
954  RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(os));
955  return fancy;
956 }
957 
958 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
962  const size_t numVectors = v.size();
963 
965  for (size_t j = 0; j < numVectors; j++) {
966  d += (v[j][i0] - v[j][i1]) * (v[j][i0] - v[j][i1]);
967  }
969 }
970 
971 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
975  const size_t numVectors = v.size();
977 
979  for (size_t j = 0; j < numVectors; j++) {
980  d += Teuchos::as<MT>(weight[j]) * (v[j][i0] - v[j][i1]) * (v[j][i0] - v[j][i1]);
981  }
983 }
984 
985 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
989  LocalOrdinal numRows = A.getLocalNumRows();
990  typedef Teuchos::ScalarTraits<Scalar> STS;
991  ArrayRCP<bool> boundaryNodes(numRows, true);
992  if (count_twos_as_dirichlet) {
993  for (LocalOrdinal row = 0; row < numRows; row++) {
996  A.getLocalRowView(row, indices, vals);
997  size_t nnz = A.getNumEntriesInLocalRow(row);
998  if (nnz > 2) {
999  size_t col;
1000  for (col = 0; col < nnz; col++)
1001  if ((indices[col] != row) && STS::magnitude(vals[col]) > tol) {
1002  if (!boundaryNodes[row])
1003  break;
1004  boundaryNodes[row] = false;
1005  }
1006  if (col == nnz)
1007  boundaryNodes[row] = true;
1008  }
1009  }
1010  } else {
1011  for (LocalOrdinal row = 0; row < numRows; row++) {
1014  A.getLocalRowView(row, indices, vals);
1015  size_t nnz = A.getNumEntriesInLocalRow(row);
1016  if (nnz > 1)
1017  for (size_t col = 0; col < nnz; col++)
1018  if ((indices[col] != row) && STS::magnitude(vals[col]) > tol) {
1019  boundaryNodes[row] = false;
1020  break;
1021  }
1022  }
1023  }
1024  return boundaryNodes;
1025 }
1026 
1027 template <class SC, class LO, class GO, class NO, class memory_space>
1028 Kokkos::View<bool*, memory_space>
1030  const typename Teuchos::ScalarTraits<SC>::magnitudeType& tol,
1031  const bool count_twos_as_dirichlet) {
1032  using impl_scalar_type = typename Kokkos::ArithTraits<SC>::val_type;
1033  using ATS = Kokkos::ArithTraits<impl_scalar_type>;
1034  using range_type = Kokkos::RangePolicy<LO, typename NO::execution_space>;
1035  using helpers = Xpetra::Helpers<SC, LO, GO, NO>;
1036 
1037  Kokkos::View<bool*, typename NO::device_type::memory_space> boundaryNodes;
1038 
1039  if (helpers::isTpetraBlockCrs(A)) {
1040  const Tpetra::BlockCrsMatrix<SC, LO, GO, NO>& Am = helpers::Op2TpetraBlockCrs(A);
1041  auto b_graph = Am.getCrsGraph().getLocalGraphDevice();
1042  auto b_rowptr = Am.getCrsGraph().getLocalRowPtrsDevice();
1043  auto values = Am.getValuesDevice();
1044  LO numBlockRows = Am.getLocalNumRows();
1045  const LO stride = Am.getBlockSize() * Am.getBlockSize();
1046 
1047  boundaryNodes = Kokkos::View<bool*, typename NO::device_type::memory_space>(Kokkos::ViewAllocateWithoutInitializing("boundaryNodes"), numBlockRows);
1048 
1049  if (count_twos_as_dirichlet)
1050  throw Exceptions::RuntimeError("BlockCrs does not support counting twos as Dirichlet");
1051 
1052  Kokkos::parallel_for(
1053  "MueLu:Utils::DetectDirichletRowsBlockCrs", range_type(0, numBlockRows),
1054  KOKKOS_LAMBDA(const LO row) {
1055  auto rowView = b_graph.rowConst(row);
1056  auto length = rowView.length;
1057  LO valstart = b_rowptr[row] * stride;
1058 
1059  boundaryNodes(row) = true;
1060  decltype(length) colID = 0;
1061  for (; colID < length; colID++) {
1062  if (rowView.colidx(colID) != row) {
1063  LO current = valstart + colID * stride;
1064  for (LO k = 0; k < stride; k++) {
1065  if (ATS::magnitude(values[current + k]) > tol) {
1066  boundaryNodes(row) = false;
1067  break;
1068  }
1069  }
1070  }
1071  if (boundaryNodes(row) == false)
1072  break;
1073  }
1074  });
1075  } else {
1076  auto localMatrix = A.getLocalMatrixDevice();
1077  LO numRows = A.getLocalNumRows();
1078  boundaryNodes = Kokkos::View<bool*, typename NO::device_type::memory_space>(Kokkos::ViewAllocateWithoutInitializing("boundaryNodes"), numRows);
1079 
1080  if (count_twos_as_dirichlet)
1081  Kokkos::parallel_for(
1082  "MueLu:Utils::DetectDirichletRows_Twos_As_Dirichlet", range_type(0, numRows),
1083  KOKKOS_LAMBDA(const LO row) {
1084  auto rowView = localMatrix.row(row);
1085  auto length = rowView.length;
1086 
1087  boundaryNodes(row) = true;
1088  if (length > 2) {
1089  decltype(length) colID = 0;
1090  for (; colID < length; colID++)
1091  if ((rowView.colidx(colID) != row) &&
1092  (ATS::magnitude(rowView.value(colID)) > tol)) {
1093  if (!boundaryNodes(row))
1094  break;
1095  boundaryNodes(row) = false;
1096  }
1097  if (colID == length)
1098  boundaryNodes(row) = true;
1099  }
1100  });
1101  else
1102  Kokkos::parallel_for(
1103  "MueLu:Utils::DetectDirichletRows", range_type(0, numRows),
1104  KOKKOS_LAMBDA(const LO row) {
1105  auto rowView = localMatrix.row(row);
1106  auto length = rowView.length;
1107 
1108  boundaryNodes(row) = true;
1109  for (decltype(length) colID = 0; colID < length; colID++)
1110  if ((rowView.colidx(colID) != row) &&
1111  (ATS::magnitude(rowView.value(colID)) > tol)) {
1112  boundaryNodes(row) = false;
1113  break;
1114  }
1115  });
1116  }
1117  if constexpr (std::is_same<memory_space, typename NO::device_type::memory_space>::value)
1118  return boundaryNodes;
1119  else {
1120  Kokkos::View<bool*, memory_space> boundaryNodes2(Kokkos::ViewAllocateWithoutInitializing("boundaryNodes"), boundaryNodes.extent(0));
1121  Kokkos::deep_copy(boundaryNodes2, boundaryNodes);
1122  return boundaryNodes2;
1123  }
1124  // CAG: No idea why this is needed to avoid "warning: missing return statement at end of non-void function"
1125  Kokkos::View<bool*, memory_space> dummy("dummy", 0);
1126  return dummy;
1127 }
1128 
1129 template <class SC, class LO, class GO, class NO>
1130 Kokkos::View<bool*, typename NO::device_type::memory_space>
1133  const typename Teuchos::ScalarTraits<SC>::magnitudeType& tol,
1134  const bool count_twos_as_dirichlet) {
1135  return MueLu::DetectDirichletRows_kokkos<SC, LO, GO, NO, typename NO::device_type::memory_space>(A, tol, count_twos_as_dirichlet);
1136 }
1137 
1138 template <class SC, class LO, class GO, class NO>
1139 Kokkos::View<bool*, typename Kokkos::HostSpace>
1142  const typename Teuchos::ScalarTraits<SC>::magnitudeType& tol,
1143  const bool count_twos_as_dirichlet) {
1144  return MueLu::DetectDirichletRows_kokkos<SC, LO, GO, NO, typename Kokkos::HostSpace>(A, tol, count_twos_as_dirichlet);
1145 }
1146 
1147 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1151  // assume that there is no zero diagonal in matrix
1152  bHasZeroDiagonal = false;
1153 
1155  A.getLocalDiagCopy(*diagVec);
1156  Teuchos::ArrayRCP<const Scalar> diagVecData = diagVec->getData(0);
1157 
1158  LocalOrdinal numRows = A.getLocalNumRows();
1159  typedef Teuchos::ScalarTraits<Scalar> STS;
1160  ArrayRCP<bool> boundaryNodes(numRows, false);
1161  for (LocalOrdinal row = 0; row < numRows; row++) {
1164  A.getLocalRowView(row, indices, vals);
1165  size_t nnz = 0; // collect nonzeros in row (excluding the diagonal)
1166  bool bHasDiag = false;
1167  for (decltype(indices.size()) col = 0; col < indices.size(); col++) {
1168  if (indices[col] != row) {
1169  if (STS::magnitude(vals[col] / STS::magnitude(sqrt(STS::magnitude(diagVecData[row]) * STS::magnitude(diagVecData[col])))) > tol) {
1170  nnz++;
1171  }
1172  } else
1173  bHasDiag = true; // found a diagonal entry
1174  }
1175  if (bHasDiag == false)
1176  bHasZeroDiagonal = true; // we found at least one row without a diagonal
1177  else if (nnz == 0)
1178  boundaryNodes[row] = true;
1179  }
1180  return boundaryNodes;
1181 }
1182 
1183 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1186  Teuchos::ArrayRCP<bool> nonzeros) {
1187  TEUCHOS_ASSERT(vals.size() == nonzeros.size());
1188  typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType magnitudeType;
1189  const magnitudeType eps = 2.0 * Teuchos::ScalarTraits<magnitudeType>::eps();
1190  for (size_t i = 0; i < static_cast<size_t>(vals.size()); i++) {
1191  nonzeros[i] = (Teuchos::ScalarTraits<Scalar>::magnitude(vals[i]) > eps);
1192  }
1193 }
1194 
1195 // Find Nonzeros in a device view
1196 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1199  Kokkos::View<bool*, typename Node::device_type> nonzeros) {
1200  using ATS = Kokkos::ArithTraits<Scalar>;
1201  using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
1202  using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1203  TEUCHOS_ASSERT(vals.extent(0) == nonzeros.extent(0));
1204  const typename ATS::magnitudeType eps = 2.0 * impl_ATS::eps();
1205 
1206  Kokkos::parallel_for(
1207  "MueLu:Maxwell1::FindNonZeros", range_type(0, vals.extent(0)),
1208  KOKKOS_LAMBDA(const size_t i) {
1209  nonzeros(i) = (impl_ATS::magnitude(vals(i, 0)) > eps);
1210  });
1211 }
1212 
1213 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1216  const Teuchos::ArrayRCP<bool>& dirichletRows,
1217  Teuchos::ArrayRCP<bool> dirichletCols,
1218  Teuchos::ArrayRCP<bool> dirichletDomain) {
1223  TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == rowMap->getLocalNumElements());
1224  TEUCHOS_ASSERT(static_cast<size_t>(dirichletCols.size()) == colMap->getLocalNumElements());
1225  TEUCHOS_ASSERT(static_cast<size_t>(dirichletDomain.size()) == domMap->getLocalNumElements());
1227  // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0
1228  for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1229  if (dirichletRows[i]) {
1231  ArrayView<const Scalar> values;
1232  A.getLocalRowView(i, indices, values);
1233  for (size_t j = 0; j < static_cast<size_t>(indices.size()); j++)
1234  myColsToZero->replaceLocalValue(indices[j], 0, one);
1235  }
1236  }
1237 
1240  if (!importer.is_null()) {
1241  globalColsToZero = Xpetra::MultiVectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(domMap, 1, /*zeroOut=*/true);
1242  // export to domain map
1243  globalColsToZero->doExport(*myColsToZero, *importer, Xpetra::ADD);
1244  // import to column map
1245  myColsToZero->doImport(*globalColsToZero, *importer, Xpetra::INSERT);
1246  } else
1247  globalColsToZero = myColsToZero;
1248 
1249  FindNonZeros(globalColsToZero->getData(0), dirichletDomain);
1250  FindNonZeros(myColsToZero->getData(0), dirichletCols);
1251 }
1252 
1253 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1256  const Kokkos::View<bool*, typename Node::device_type>& dirichletRows,
1257  Kokkos::View<bool*, typename Node::device_type> dirichletCols,
1258  Kokkos::View<bool*, typename Node::device_type> dirichletDomain) {
1259  using ATS = Kokkos::ArithTraits<Scalar>;
1260  using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
1261  using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1265  TEUCHOS_ASSERT(dirichletRows.extent(0) == rowMap->getLocalNumElements());
1266  TEUCHOS_ASSERT(dirichletCols.extent(0) == colMap->getLocalNumElements());
1267  TEUCHOS_ASSERT(dirichletDomain.extent(0) == domMap->getLocalNumElements());
1269  // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0
1270  auto myColsToZeroView = myColsToZero->getDeviceLocalView(Xpetra::Access::ReadWrite);
1271  auto localMatrix = A.getLocalMatrixDevice();
1272  Kokkos::parallel_for(
1273  "MueLu:Maxwell1::DetectDirichletCols", range_type(0, rowMap->getLocalNumElements()),
1274  KOKKOS_LAMBDA(const LocalOrdinal row) {
1275  if (dirichletRows(row)) {
1276  auto rowView = localMatrix.row(row);
1277  auto length = rowView.length;
1278 
1279  for (decltype(length) colID = 0; colID < length; colID++)
1280  myColsToZeroView(rowView.colidx(colID), 0) = impl_ATS::one();
1281  }
1282  });
1283 
1286  if (!importer.is_null()) {
1287  globalColsToZero = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(domMap, /*zeroOut=*/true);
1288  // export to domain map
1289  globalColsToZero->doExport(*myColsToZero, *importer, Xpetra::ADD);
1290  // import to column map
1291  myColsToZero->doImport(*globalColsToZero, *importer, Xpetra::INSERT);
1292  } else
1293  globalColsToZero = myColsToZero;
1294  UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::FindNonZeros(globalColsToZero->getDeviceLocalView(Xpetra::Access::ReadOnly), dirichletDomain);
1295  UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::FindNonZeros(myColsToZero->getDeviceLocalView(Xpetra::Access::ReadOnly), dirichletCols);
1296 }
1297 
1298 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1301  typedef Teuchos::ScalarTraits<Scalar> STS;
1303  typedef Teuchos::ScalarTraits<MT> MTS;
1305  for (LocalOrdinal row = 0; row < Teuchos::as<LocalOrdinal>(rowmap->getLocalNumElements()); ++row) {
1306  size_t nnz = A.getNumEntriesInLocalRow(row);
1309  A.getLocalRowView(row, indices, vals);
1310 
1311  Scalar rowsum = STS::zero();
1312  Scalar diagval = STS::zero();
1313 
1314  for (LocalOrdinal colID = 0; colID < Teuchos::as<LocalOrdinal>(nnz); colID++) {
1315  LocalOrdinal col = indices[colID];
1316  if (row == col)
1317  diagval = vals[colID];
1318  rowsum += vals[colID];
1319  }
1320  // printf("A(%d,:) row_sum(point) = %6.4e\n",row,rowsum);
1321  if (rowSumTol < MTS::one() && STS::magnitude(rowsum) > STS::magnitude(diagval) * rowSumTol) {
1322  // printf("Row %d triggers rowsum\n",(int)row);
1323  dirichletRows[row] = true;
1324  }
1325  }
1326 }
1327 
1328 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1331  typedef Teuchos::ScalarTraits<Scalar> STS;
1333  typedef Teuchos::ScalarTraits<MT> MTS;
1334  RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> rowmap = A.getRowMap();
1335 
1336  TEUCHOS_TEST_FOR_EXCEPTION(!A.getColMap()->isSameAs(*BlockNumber.getMap()), std::runtime_error, "ApplyRowSumCriterion: BlockNumber must match's A's column map.");
1337 
1338  Teuchos::ArrayRCP<const LocalOrdinal> block_id = BlockNumber.getData(0);
1339  for (LocalOrdinal row = 0; row < Teuchos::as<LocalOrdinal>(rowmap->getLocalNumElements()); ++row) {
1340  size_t nnz = A.getNumEntriesInLocalRow(row);
1341  ArrayView<const LocalOrdinal> indices;
1342  ArrayView<const Scalar> vals;
1343  A.getLocalRowView(row, indices, vals);
1344 
1345  Scalar rowsum = STS::zero();
1346  Scalar diagval = STS::zero();
1347  for (LocalOrdinal colID = 0; colID < Teuchos::as<LocalOrdinal>(nnz); colID++) {
1348  LocalOrdinal col = indices[colID];
1349  if (row == col)
1350  diagval = vals[colID];
1351  if (block_id[row] == block_id[col])
1352  rowsum += vals[colID];
1353  }
1354 
1355  // printf("A(%d,:) row_sum(block) = %6.4e\n",row,rowsum);
1356  if (rowSumTol < MTS::one() && STS::magnitude(rowsum) > STS::magnitude(diagval) * rowSumTol) {
1357  // printf("Row %d triggers rowsum\n",(int)row);
1358  dirichletRows[row] = true;
1359  }
1360  }
1361 }
1362 
1363 // Applies rowsum criterion
1364 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class memory_space>
1366  const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1367  Kokkos::View<bool*, memory_space>& dirichletRows) {
1368  typedef Teuchos::ScalarTraits<Scalar> STS;
1370 
1371  auto dirichletRowsHost = Kokkos::create_mirror_view(dirichletRows);
1372  Kokkos::deep_copy(dirichletRowsHost, dirichletRows);
1373 
1374  for (LocalOrdinal row = 0; row < Teuchos::as<LocalOrdinal>(rowmap->getLocalNumElements()); ++row) {
1375  size_t nnz = A.getNumEntriesInLocalRow(row);
1378  A.getLocalRowView(row, indices, vals);
1379 
1380  Scalar rowsum = STS::zero();
1381  Scalar diagval = STS::zero();
1382  for (LocalOrdinal colID = 0; colID < Teuchos::as<LocalOrdinal>(nnz); colID++) {
1383  LocalOrdinal col = indices[colID];
1384  if (row == col)
1385  diagval = vals[colID];
1386  rowsum += vals[colID];
1387  }
1388  if (STS::real(rowsum) > STS::magnitude(diagval) * rowSumTol)
1389  dirichletRowsHost(row) = true;
1390  }
1391 
1392  Kokkos::deep_copy(dirichletRows, dirichletRowsHost);
1393 }
1394 
1395 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1398  const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1399  Kokkos::View<bool*, typename Node::device_type::memory_space>& dirichletRows) {
1400  MueLu::ApplyRowSumCriterion<Scalar, LocalOrdinal, GlobalOrdinal, Node, typename Node::device_type::memory_space>(A, rowSumTol, dirichletRows);
1401 }
1402 
1403 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1406  const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1407  Kokkos::View<bool*, Kokkos::HostSpace>& dirichletRows) {
1408  MueLu::ApplyRowSumCriterion<Scalar, LocalOrdinal, GlobalOrdinal, Node, Kokkos::HostSpace>(A, rowSumTol, dirichletRows);
1409 }
1410 
1411 // Applies rowsum criterion
1412 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class memory_space>
1415  const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1416  Kokkos::View<bool*, memory_space>& dirichletRows) {
1417  typedef Teuchos::ScalarTraits<Scalar> STS;
1419 
1420  TEUCHOS_TEST_FOR_EXCEPTION(!A.getColMap()->isSameAs(*BlockNumber.getMap()), std::runtime_error, "ApplyRowSumCriterion: BlockNumber must match's A's column map.");
1421 
1422  auto dirichletRowsHost = Kokkos::create_mirror_view(dirichletRows);
1423  Kokkos::deep_copy(dirichletRowsHost, dirichletRows);
1424 
1425  Teuchos::ArrayRCP<const LocalOrdinal> block_id = BlockNumber.getData(0);
1426  for (LocalOrdinal row = 0; row < Teuchos::as<LocalOrdinal>(rowmap->getLocalNumElements()); ++row) {
1427  size_t nnz = A.getNumEntriesInLocalRow(row);
1430  A.getLocalRowView(row, indices, vals);
1431 
1432  Scalar rowsum = STS::zero();
1433  Scalar diagval = STS::zero();
1434  for (LocalOrdinal colID = 0; colID < Teuchos::as<LocalOrdinal>(nnz); colID++) {
1435  LocalOrdinal col = indices[colID];
1436  if (row == col)
1437  diagval = vals[colID];
1438  if (block_id[row] == block_id[col])
1439  rowsum += vals[colID];
1440  }
1441  if (STS::real(rowsum) > STS::magnitude(diagval) * rowSumTol)
1442  dirichletRowsHost(row) = true;
1443  }
1444 
1445  Kokkos::deep_copy(dirichletRows, dirichletRowsHost);
1446 }
1447 
1448 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1452  const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1453  Kokkos::View<bool*, typename Node::device_type::memory_space>& dirichletRows) {
1454  MueLu::ApplyRowSumCriterion<Scalar, LocalOrdinal, GlobalOrdinal, Node, typename Node::device_type::memory_space>(A, BlockNumber, rowSumTol, dirichletRows);
1455 }
1456 
1457 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1461  const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1462  Kokkos::View<bool*, Kokkos::HostSpace>& dirichletRows) {
1463  MueLu::ApplyRowSumCriterion<Scalar, LocalOrdinal, GlobalOrdinal, Node, Kokkos::HostSpace>(A, BlockNumber, rowSumTol, dirichletRows);
1464 }
1465 
1466 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1470  const Teuchos::ArrayRCP<const bool>& dirichletRows) {
1476  myColsToZero->putScalar(zero);
1477  // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0
1478  for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1479  if (dirichletRows[i]) {
1482  A.getLocalRowView(i, indices, values);
1483  for (size_t j = 0; j < static_cast<size_t>(indices.size()); j++)
1484  myColsToZero->replaceLocalValue(indices[j], 0, one);
1485  }
1486  }
1487 
1489  globalColsToZero->putScalar(zero);
1491  // export to domain map
1492  globalColsToZero->doExport(*myColsToZero, *exporter, Xpetra::ADD);
1493  // import to column map
1494  myColsToZero->doImport(*globalColsToZero, *exporter, Xpetra::INSERT);
1495  Teuchos::ArrayRCP<const Scalar> myCols = myColsToZero->getData(0);
1496  Teuchos::ArrayRCP<bool> dirichletCols(colMap->getLocalNumElements(), true);
1498  for (size_t i = 0; i < colMap->getLocalNumElements(); i++) {
1499  dirichletCols[i] = Teuchos::ScalarTraits<Scalar>::magnitude(myCols[i]) > 2.0 * eps;
1500  }
1501  return dirichletCols;
1502 }
1503 
1504 template <class SC, class LO, class GO, class NO>
1505 Kokkos::View<bool*, typename NO::device_type>
1508  const Kokkos::View<const bool*, typename NO::device_type>& dirichletRows) {
1509  using ATS = Kokkos::ArithTraits<SC>;
1510  using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
1511  using range_type = Kokkos::RangePolicy<LO, typename NO::execution_space>;
1512 
1513  SC zero = ATS::zero();
1514 
1515  auto localMatrix = A.getLocalMatrixDevice();
1516  LO numRows = A.getLocalNumRows();
1517 
1519  Teuchos::RCP<const Xpetra::Map<LO, GO, NO>> colMap = A.getColMap();
1521  myColsToZero->putScalar(zero);
1522  auto myColsToZeroView = myColsToZero->getDeviceLocalView(Xpetra::Access::ReadWrite);
1523  // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0
1524  Kokkos::parallel_for(
1525  "MueLu:Utils::DetectDirichletCols1", range_type(0, numRows),
1526  KOKKOS_LAMBDA(const LO row) {
1527  if (dirichletRows(row)) {
1528  auto rowView = localMatrix.row(row);
1529  auto length = rowView.length;
1530 
1531  for (decltype(length) colID = 0; colID < length; colID++)
1532  myColsToZeroView(rowView.colidx(colID), 0) = impl_ATS::one();
1533  }
1534  });
1535 
1537  globalColsToZero->putScalar(zero);
1539  // export to domain map
1540  globalColsToZero->doExport(*myColsToZero, *exporter, Xpetra::ADD);
1541  // import to column map
1542  myColsToZero->doImport(*globalColsToZero, *exporter, Xpetra::INSERT);
1543 
1544  auto myCols = myColsToZero->getDeviceLocalView(Xpetra::Access::ReadOnly);
1545  size_t numColEntries = colMap->getLocalNumElements();
1546  Kokkos::View<bool*, typename NO::device_type> dirichletCols(Kokkos::ViewAllocateWithoutInitializing("dirichletCols"), numColEntries);
1547  const typename ATS::magnitudeType eps = 2.0 * ATS::eps();
1548 
1549  Kokkos::parallel_for(
1550  "MueLu:Utils::DetectDirichletCols2", range_type(0, numColEntries),
1551  KOKKOS_LAMBDA(const size_t i) {
1552  dirichletCols(i) = impl_ATS::magnitude(myCols(i, 0)) > eps;
1553  });
1554  return dirichletCols;
1555 }
1556 
1557 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1558 Scalar
1561  // We check only row maps. Column may be different. One would hope that they are the same, as we typically
1562  // calculate frobenius norm of the specified sparsity pattern with an updated matrix from the previous step,
1563  // but matrix addition, even when one is submatrix of the other, changes column map (though change may be as
1564  // simple as couple of elements swapped)
1565  TEUCHOS_TEST_FOR_EXCEPTION(!A.getRowMap()->isSameAs(*B.getRowMap()), Exceptions::Incompatible, "MueLu::CGSolver::Frobenius: row maps are incompatible");
1566  TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete() || !B.isFillComplete(), Exceptions::RuntimeError, "Matrices must be fill completed");
1567 
1568  const Map& AColMap = *A.getColMap();
1569  const Map& BColMap = *B.getColMap();
1570 
1573  size_t nnzA = 0, nnzB = 0;
1574 
1575  // We use a simple algorithm
1576  // for each row we fill valBAll array with the values in the corresponding row of B
1577  // as such, it serves as both sorted array and as storage, so we don't need to do a
1578  // tricky problem: "find a value in the row of B corresponding to the specific GID"
1579  // Once we do that, we translate LID of entries of row of A to LID of B, and multiply
1580  // corresponding entries.
1581  // The algorithm should be reasonably cheap, as it does not sort anything, provided
1582  // that getLocalElement and getGlobalElement functions are reasonably effective. It
1583  // *is* possible that the costs are hidden in those functions, but if maps are close
1584  // to linear maps, we should be fine
1585  Teuchos::Array<Scalar> valBAll(BColMap.getLocalNumElements());
1586 
1588  Scalar zero = Teuchos::ScalarTraits<Scalar>::zero(), f = zero, gf;
1589  size_t numRows = A.getLocalNumRows();
1590  for (size_t i = 0; i < numRows; i++) {
1591  A.getLocalRowView(i, indA, valA);
1592  B.getLocalRowView(i, indB, valB);
1593  nnzA = indA.size();
1594  nnzB = indB.size();
1595 
1596  // Set up array values
1597  for (size_t j = 0; j < nnzB; j++)
1598  valBAll[indB[j]] = valB[j];
1599 
1600  for (size_t j = 0; j < nnzA; j++) {
1601  // The cost of the whole Frobenius dot product function depends on the
1602  // cost of the getLocalElement and getGlobalElement functions here.
1603  LocalOrdinal ind = BColMap.getLocalElement(AColMap.getGlobalElement(indA[j]));
1604  if (ind != invalid)
1605  f += valBAll[ind] * valA[j];
1606  }
1607 
1608  // Clean up array values
1609  for (size_t j = 0; j < nnzB; j++)
1610  valBAll[indB[j]] = zero;
1611  }
1612 
1613  MueLu_sumAll(AColMap.getComm(), f, gf);
1614 
1615  return gf;
1616 }
1617 
1618 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1621  // Distribute the seeds evenly in [1,maxint-1]. This guarantees nothing
1622  // about where in random number stream we are, but avoids overflow situations
1623  // in parallel when multiplying by a PID. It would be better to use
1624  // a good parallel random number generator.
1625  double one = 1.0;
1626  int maxint = INT_MAX; //= 2^31-1 = 2147483647 for 32-bit integers
1627  int mySeed = Teuchos::as<int>((maxint - 1) * (one - (comm.getRank() + 1) / (comm.getSize() + one)));
1628  if (mySeed < 1 || mySeed == maxint) {
1629  std::ostringstream errStr;
1630  errStr << "Error detected with random seed = " << mySeed << ". It should be in the interval [1,2^31-2].";
1631  throw Exceptions::RuntimeError(errStr.str());
1632  }
1633  std::srand(mySeed);
1634  // For Tpetra, we could use Kokkos' random number generator here.
1636  // Epetra
1637  // MultiVector::Random() -> Epetra_Util::RandomDouble() -> Epetra_Utils::RandomInt()
1638  // Its own random number generator, based on Seed_. Seed_ is initialized in Epetra_Util constructor with std::rand()
1639  // So our setting std::srand() affects that too
1640 }
1641 
1642 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1645  std::vector<LocalOrdinal>& dirichletRows, bool count_twos_as_dirichlet) {
1647  dirichletRows.resize(0);
1648  for (size_t i = 0; i < A->getLocalNumRows(); i++) {
1651  A->getLocalRowView(i, indices, values);
1652  int nnz = 0;
1653  for (size_t j = 0; j < (size_t)indices.size(); j++) {
1655  nnz++;
1656  }
1657  }
1658  if (nnz == 1 || (count_twos_as_dirichlet && nnz == 2)) {
1659  dirichletRows.push_back(i);
1660  }
1661  }
1662 }
1663 
1664 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1667  const std::vector<LocalOrdinal>& dirichletRows) {
1668  RCP<const Map> Rmap = A->getRowMap();
1669  RCP<const Map> Cmap = A->getColMap();
1672 
1673  for (size_t i = 0; i < dirichletRows.size(); i++) {
1674  GlobalOrdinal row_gid = Rmap->getGlobalElement(dirichletRows[i]);
1675 
1678  A->getLocalRowView(dirichletRows[i], indices, values);
1679  // NOTE: This won't work with fancy node types.
1680  Scalar* valuesNC = const_cast<Scalar*>(values.getRawPtr());
1681  for (size_t j = 0; j < (size_t)indices.size(); j++) {
1682  if (Cmap->getGlobalElement(indices[j]) == row_gid)
1683  valuesNC[j] = one;
1684  else
1685  valuesNC[j] = zero;
1686  }
1687  }
1688 }
1689 
1690 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1693  const Teuchos::ArrayRCP<const bool>& dirichletRows) {
1695  RCP<const Map> domMap = A->getDomainMap();
1696  RCP<const Map> ranMap = A->getRangeMap();
1697  RCP<const Map> Rmap = A->getRowMap();
1698  RCP<const Map> Cmap = A->getColMap();
1699  TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == Rmap->getLocalNumElements());
1702  A->resumeFill();
1703  for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1704  if (dirichletRows[i]) {
1705  GlobalOrdinal row_gid = Rmap->getGlobalElement(i);
1706 
1709  A->getLocalRowView(i, indices, values);
1710 
1711  Teuchos::ArrayRCP<Scalar> valuesNC(values.size());
1712  for (size_t j = 0; j < (size_t)indices.size(); j++) {
1713  if (Cmap->getGlobalElement(indices[j]) == row_gid)
1714  valuesNC[j] = one;
1715  else
1716  valuesNC[j] = zero;
1717  }
1718  A->replaceLocalValues(i, indices, valuesNC());
1719  }
1720  }
1721  A->fillComplete(domMap, ranMap);
1722 }
1723 
1724 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1727  const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows) {
1729  using ATS = Kokkos::ArithTraits<Scalar>;
1730  using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
1731  using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1732 
1737 
1738  TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == Rmap->getLocalNumElements());
1739 
1740  auto localMatrix = A->getLocalMatrixDevice();
1741  auto localRmap = Rmap->getLocalMap();
1742  auto localCmap = Cmap->getLocalMap();
1743 
1744  Kokkos::parallel_for(
1745  "MueLu::Utils::ApplyOAZ", range_type(0, dirichletRows.extent(0)),
1746  KOKKOS_LAMBDA(const LocalOrdinal row) {
1747  if (dirichletRows(row)) {
1748  auto rowView = localMatrix.row(row);
1749  auto length = rowView.length;
1750  auto row_gid = localRmap.getGlobalElement(row);
1751  auto row_lid = localCmap.getLocalElement(row_gid);
1752 
1753  for (decltype(length) colID = 0; colID < length; colID++)
1754  if (rowView.colidx(colID) == row_lid)
1755  rowView.value(colID) = impl_ATS::one();
1756  else
1757  rowView.value(colID) = impl_ATS::zero();
1758  }
1759  });
1760 }
1761 
1762 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1765  const std::vector<LocalOrdinal>& dirichletRows,
1766  Scalar replaceWith) {
1767  for (size_t i = 0; i < dirichletRows.size(); i++) {
1770  A->getLocalRowView(dirichletRows[i], indices, values);
1771  // NOTE: This won't work with fancy node types.
1772  Scalar* valuesNC = const_cast<Scalar*>(values.getRawPtr());
1773  for (size_t j = 0; j < (size_t)indices.size(); j++)
1774  valuesNC[j] = replaceWith;
1775  }
1776 }
1777 
1778 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1781  const Teuchos::ArrayRCP<const bool>& dirichletRows,
1782  Scalar replaceWith) {
1783  TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == A->getRowMap()->getLocalNumElements());
1784  for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1785  if (dirichletRows[i]) {
1788  A->getLocalRowView(i, indices, values);
1789  // NOTE: This won't work with fancy node types.
1790  Scalar* valuesNC = const_cast<Scalar*>(values.getRawPtr());
1791  for (size_t j = 0; j < (size_t)indices.size(); j++)
1792  valuesNC[j] = replaceWith;
1793  }
1794  }
1795 }
1796 
1797 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1800  const Teuchos::ArrayRCP<const bool>& dirichletRows,
1801  Scalar replaceWith) {
1802  TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == X->getMap()->getLocalNumElements());
1803  for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1804  if (dirichletRows[i]) {
1805  for (size_t j = 0; j < X->getNumVectors(); j++)
1806  X->replaceLocalValue(i, j, replaceWith);
1807  }
1808  }
1809 }
1810 
1811 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1814  const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows,
1815  Scalar replaceWith) {
1816  using ATS = Kokkos::ArithTraits<Scalar>;
1817  using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1818 
1819  typename ATS::val_type impl_replaceWith = replaceWith;
1820 
1821  auto localMatrix = A->getLocalMatrixDevice();
1822  LocalOrdinal numRows = A->getLocalNumRows();
1823 
1824  Kokkos::parallel_for(
1825  "MueLu:Utils::ZeroDirichletRows", range_type(0, numRows),
1826  KOKKOS_LAMBDA(const LocalOrdinal row) {
1827  if (dirichletRows(row)) {
1828  auto rowView = localMatrix.row(row);
1829  auto length = rowView.length;
1830  for (decltype(length) colID = 0; colID < length; colID++)
1831  rowView.value(colID) = impl_replaceWith;
1832  }
1833  });
1834 }
1835 
1836 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1839  const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows,
1840  Scalar replaceWith) {
1841  using ATS = Kokkos::ArithTraits<Scalar>;
1842  using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1843 
1844  typename ATS::val_type impl_replaceWith = replaceWith;
1845 
1846  auto myCols = X->getDeviceLocalView(Xpetra::Access::ReadWrite);
1847  size_t numVecs = X->getNumVectors();
1848  Kokkos::parallel_for(
1849  "MueLu:Utils::ZeroDirichletRows_MV", range_type(0, dirichletRows.size()),
1850  KOKKOS_LAMBDA(const size_t i) {
1851  if (dirichletRows(i)) {
1852  for (size_t j = 0; j < numVecs; j++)
1853  myCols(i, j) = impl_replaceWith;
1854  }
1855  });
1856 }
1857 
1858 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1861  const Teuchos::ArrayRCP<const bool>& dirichletCols,
1862  Scalar replaceWith) {
1863  TEUCHOS_ASSERT(static_cast<size_t>(dirichletCols.size()) == A->getColMap()->getLocalNumElements());
1864  for (size_t i = 0; i < A->getLocalNumRows(); i++) {
1867  A->getLocalRowView(i, indices, values);
1868  // NOTE: This won't work with fancy node types.
1869  Scalar* valuesNC = const_cast<Scalar*>(values.getRawPtr());
1870  for (size_t j = 0; j < static_cast<size_t>(indices.size()); j++)
1871  if (dirichletCols[indices[j]])
1872  valuesNC[j] = replaceWith;
1873  }
1874 }
1875 
1876 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1879  const Kokkos::View<const bool*, typename Node::device_type>& dirichletCols,
1880  Scalar replaceWith) {
1881  using ATS = Kokkos::ArithTraits<Scalar>;
1882  using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1883 
1884  typename ATS::val_type impl_replaceWith = replaceWith;
1885 
1886  auto localMatrix = A->getLocalMatrixDevice();
1887  LocalOrdinal numRows = A->getLocalNumRows();
1888 
1889  Kokkos::parallel_for(
1890  "MueLu:Utils::ZeroDirichletCols", range_type(0, numRows),
1891  KOKKOS_LAMBDA(const LocalOrdinal row) {
1892  auto rowView = localMatrix.row(row);
1893  auto length = rowView.length;
1894  for (decltype(length) colID = 0; colID < length; colID++)
1895  if (dirichletCols(rowView.colidx(colID))) {
1896  rowView.value(colID) = impl_replaceWith;
1897  }
1898  });
1899 }
1900 
1901 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1906  // Make sure A's RowMap == DomainMap
1907  if (!A->getRowMap()->isSameAs(*A->getDomainMap())) {
1908  throw std::runtime_error("UtilitiesBase::FindDirichletRowsAndPropagateToCols row and domain maps must match.");
1909  }
1911  bool has_import = !importer.is_null();
1912 
1913  // Find the Dirichlet rows
1914  std::vector<LocalOrdinal> dirichletRows;
1915  FindDirichletRows(A, dirichletRows);
1916 
1917 #if 0
1918  printf("[%d] DirichletRow Ids = ",A->getRowMap()->getComm()->getRank());
1919  for(size_t i=0; i<(size_t) dirichletRows.size(); i++)
1920  printf("%d ",dirichletRows[i]);
1921  printf("\n");
1922  fflush(stdout);
1923 #endif
1924  // Allocate all as non-Dirichlet
1925  isDirichletRow = Xpetra::VectorFactory<int, LocalOrdinal, GlobalOrdinal, Node>::Build(A->getRowMap(), true);
1926  isDirichletCol = Xpetra::VectorFactory<int, LocalOrdinal, GlobalOrdinal, Node>::Build(A->getColMap(), true);
1927 
1928  {
1929  Teuchos::ArrayRCP<int> dr_rcp = isDirichletRow->getDataNonConst(0);
1930  Teuchos::ArrayView<int> dr = dr_rcp();
1931  Teuchos::ArrayRCP<int> dc_rcp = isDirichletCol->getDataNonConst(0);
1932  Teuchos::ArrayView<int> dc = dc_rcp();
1933  for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1934  dr[dirichletRows[i]] = 1;
1935  if (!has_import) dc[dirichletRows[i]] = 1;
1936  }
1937  }
1938 
1939  if (has_import)
1940  isDirichletCol->doImport(*isDirichletRow, *importer, Xpetra::CombineMode::ADD);
1941 }
1942 
1943 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1947  using ISC = typename Kokkos::ArithTraits<Scalar>::val_type;
1948  using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1949  using local_matrix_type = typename CrsMatrix::local_matrix_type;
1950  using values_type = typename local_matrix_type::values_type;
1951 
1952  const ISC ONE = Kokkos::ArithTraits<ISC>::one();
1953  const ISC ZERO = Kokkos::ArithTraits<ISC>::zero();
1954 
1955  // Copy the values array of the old matrix to a new array, replacing all the non-zeros with one
1956  auto localMatrix = original->getLocalMatrixDevice();
1957  TEUCHOS_TEST_FOR_EXCEPTION(!original->hasCrsGraph(), Exceptions::RuntimeError, "ReplaceNonZerosWithOnes: Cannot get CrsGraph");
1958  values_type new_values("values", localMatrix.nnz());
1959 
1960  Kokkos::parallel_for(
1961  "ReplaceNonZerosWithOnes", range_type(0, localMatrix.nnz()), KOKKOS_LAMBDA(const size_t i) {
1962  if (localMatrix.values(i) != ZERO)
1963  new_values(i) = ONE;
1964  else
1965  new_values(i) = ZERO;
1966  });
1967 
1968  // Build the new matrix
1969  RCP<Matrix> NewMatrix = Xpetra::MatrixFactory<SC, LO, GO, NO>::Build(original->getCrsGraph(), new_values);
1970  TEUCHOS_TEST_FOR_EXCEPTION(NewMatrix.is_null(), Exceptions::RuntimeError, "ReplaceNonZerosWithOnes: MatrixFactory::Build() did not return matrix");
1971  NewMatrix->fillComplete(original->getDomainMap(), original->getRangeMap());
1972  return NewMatrix;
1973 }
1974 
1975 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1981  Xpetra::UnderlyingLib lib = sourceBlockedMap.lib();
1982 
1983  // De-stride the map if we have to (might regret this later)
1984  RCP<const Map> fullMap = sourceBlockedMap.getMap();
1985  RCP<const Map> stridedMap = Teuchos::rcp_dynamic_cast<const Xpetra::StridedMap<LocalOrdinal, GlobalOrdinal, Node>>(fullMap);
1986  if (!stridedMap.is_null()) fullMap = stridedMap->getMap();
1987 
1988  // Initial sanity checking for map compatibil
1989  const size_t numSubMaps = sourceBlockedMap.getNumMaps();
1990  if (!Importer.getSourceMap()->isCompatible(*fullMap))
1991  throw std::runtime_error("GenerateBlockedTargetMap(): Map compatibility error");
1992 
1993  // Build an indicator vector
1995 
1996  for (size_t i = 0; i < numSubMaps; i++) {
1997  RCP<const Map> map = sourceBlockedMap.getMap(i);
1998 
1999  for (size_t j = 0; j < map->getLocalNumElements(); j++) {
2000  LocalOrdinal jj = fullMap->getLocalElement(map->getGlobalElement(j));
2001  block_ids->replaceLocalValue(jj, (int)i);
2002  }
2003  }
2004 
2005  // Get the block ids for the new map
2006  RCP<const Map> targetMap = Importer.getTargetMap();
2008  new_block_ids->doImport(*block_ids, Importer, Xpetra::CombineMode::ADD);
2009  Teuchos::ArrayRCP<const int> dataRCP = new_block_ids->getData(0);
2010  Teuchos::ArrayView<const int> data = dataRCP();
2011 
2012  // Get the GIDs for each subblock
2013  Teuchos::Array<Teuchos::Array<GlobalOrdinal>> elementsInSubMap(numSubMaps);
2014  for (size_t i = 0; i < targetMap->getLocalNumElements(); i++) {
2015  elementsInSubMap[data[i]].push_back(targetMap->getGlobalElement(i));
2016  }
2017 
2018  // Generate the new submaps
2019  std::vector<RCP<const Map>> subMaps(numSubMaps);
2020  for (size_t i = 0; i < numSubMaps; i++) {
2021  subMaps[i] = Xpetra::MapFactory<LocalOrdinal, GlobalOrdinal, Node>::Build(lib, Teuchos::OrdinalTraits<GlobalOrdinal>::invalid(), elementsInSubMap[i](), targetMap->getIndexBase(), targetMap->getComm());
2022  }
2023 
2024  // Build the BlockedMap
2025  return rcp(new BlockedMap(targetMap, subMaps));
2026 }
2027 
2028 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2031  ArrayView<const GlobalOrdinal> rowElements = rowMap.getLocalElementList();
2032  ArrayView<const GlobalOrdinal> colElements = colMap.getLocalElementList();
2033 
2034  const size_t numElements = rowElements.size();
2035 
2036  if (size_t(colElements.size()) < numElements)
2037  return false;
2038 
2039  bool goodMap = true;
2040  for (size_t i = 0; i < numElements; i++)
2041  if (rowElements[i] != colElements[i]) {
2042  goodMap = false;
2043  break;
2044  }
2045 
2046  return goodMap;
2047 }
2048 
2049 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2054  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
2055  using lno_nnz_view_t = typename local_graph_type::entries_type::non_const_type;
2056  using device = typename local_graph_type::device_type;
2057  using execution_space = typename local_matrix_type::execution_space;
2058  using ordinal_type = typename local_matrix_type::ordinal_type;
2059 
2060  local_graph_type localGraph = Op.getLocalMatrixDevice().graph;
2061 
2062  lno_nnz_view_t rcmOrder = KokkosGraph::Experimental::graph_rcm<device, typename local_graph_type::row_map_type, typename local_graph_type::entries_type, lno_nnz_view_t>(localGraph.row_map, localGraph.entries);
2063 
2066 
2067  // Copy out and reorder data
2068  auto view1D = Kokkos::subview(retval->getDeviceLocalView(Xpetra::Access::ReadWrite), Kokkos::ALL(), 0);
2069  Kokkos::parallel_for(
2070  "Utilities::ReverseCuthillMcKee",
2071  Kokkos::RangePolicy<ordinal_type, execution_space>(0, localGraph.numRows()),
2072  KOKKOS_LAMBDA(const ordinal_type rowIdx) {
2073  view1D(rcmOrder(rowIdx)) = rowIdx;
2074  });
2075  return retval;
2076 }
2077 
2078 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2083  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
2084  using lno_nnz_view_t = typename local_graph_type::entries_type::non_const_type;
2085  using device = typename local_graph_type::device_type;
2086  using execution_space = typename local_matrix_type::execution_space;
2087  using ordinal_type = typename local_matrix_type::ordinal_type;
2088 
2089  local_graph_type localGraph = Op.getLocalMatrixDevice().graph;
2090  LocalOrdinal numRows = localGraph.numRows();
2091 
2092  lno_nnz_view_t rcmOrder = KokkosGraph::Experimental::graph_rcm<device, typename local_graph_type::row_map_type, typename local_graph_type::entries_type, lno_nnz_view_t>(localGraph.row_map, localGraph.entries);
2093 
2096 
2097  // Copy out data
2098  auto view1D = Kokkos::subview(retval->getDeviceLocalView(Xpetra::Access::ReadWrite), Kokkos::ALL(), 0);
2099  // Since KokkosKernels produced RCM, also reverse the order of the view to get CM
2100  Kokkos::parallel_for(
2101  "Utilities::ReverseCuthillMcKee",
2102  Kokkos::RangePolicy<ordinal_type, execution_space>(0, numRows),
2103  KOKKOS_LAMBDA(const ordinal_type rowIdx) {
2104  view1D(rcmOrder(numRows - 1 - rowIdx)) = rowIdx;
2105  });
2106  return retval;
2107 }
2108 
2109 } // namespace MueLu
2110 
2111 #define MUELU_UTILITIESBASE_SHORT
2112 #endif // MUELU_UTILITIESBASE_DEF_HPP
2113 
2114 // LocalWords: LocalOrdinal
static void FindNonZeros(const Teuchos::ArrayRCP< const Scalar > vals, Teuchos::ArrayRCP< bool > nonzeros)
Find non-zero values in an ArrayRCP Compares the value to 2 * machine epsilon.
static RCP< Export< LocalOrdinal, GlobalOrdinal, Node > > Build(const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &source, const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &target)
static RCP< Vector > GetMatrixDiagonal(const Matrix &A)
Extract Matrix Diagonal.
static void ApplyOAZToMatrixRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node >> &A, const std::vector< LocalOrdinal > &dirichletRows)
static void ZeroDirichletCols(Teuchos::RCP< Matrix > &A, const Teuchos::ArrayRCP< const bool > &dirichletCols, Scalar replaceWith=Teuchos::ScalarTraits< Scalar >::zero())
static void DetectDirichletColsAndDomains(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Teuchos::ArrayRCP< bool > &dirichletRows, Teuchos::ArrayRCP< bool > dirichletCols, Teuchos::ArrayRCP< bool > dirichletDomain)
Detects Dirichlet columns &amp; domains from a list of Dirichlet rows.
#define MueLu_sumAll(rcpComm, in, out)
Array< T > & append(const T &x)
virtual int getSize() const =0
MueLu::DefaultLocalOrdinal LocalOrdinal
void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType rowSumTol, Kokkos::View< bool *, memory_space > &dirichletRows)
virtual LO getBlockSize() const override
static bool MapsAreNested(const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &rowMap, const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &colMap)
static RCP< Xpetra::Vector< LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node > > ReverseCuthillMcKee(const Matrix &Op)
static void FindDirichletRowsAndPropagateToCols(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node >> &A, Teuchos::RCP< Xpetra::Vector< int, LocalOrdinal, GlobalOrdinal, Node >> &isDirichletRow, Teuchos::RCP< Xpetra::Vector< int, LocalOrdinal, GlobalOrdinal, Node >> &isDirichletCol)
virtual int getRank() const =0
static void ApplyRowSumCriterionHost(const Matrix &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType rowSumTol, Kokkos::View< bool *, Kokkos::HostSpace > &dirichletRows)
static Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Build(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node >> &map, size_t NumVectors, bool zeroOut=true)
static magnitudeType eps()
Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > removeSmallEntries(Teuchos::RCP< Xpetra::CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node >> &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType threshold, const bool keepDiagonal)
static Scalar PowerMethod(const Matrix &A, bool scaleByDiag=true, LocalOrdinal niters=10, Magnitude tolerance=1e-2, bool verbose=false, unsigned int seed=123)
Power method.
static void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude rowSumTol, Teuchos::ArrayRCP< bool > &dirichletRows)
Apply Rowsum Criterion.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
static RCP< Vector > GetMatrixOverlappedDeletedRowsum(const Matrix &A)
Extract Overlapped Matrix Deleted Rowsum.
virtual const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
static RCP< Xpetra::CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > GetThresholdedGraph(const RCP< Matrix > &A, const Magnitude threshold, const GlobalOrdinal expectedNNZperRow=-1)
Threshold a graph.
size_type size() const
virtual void resumeFill(const RCP< ParameterList > &params=null)=0
LocalOrdinal LO
static void ZeroDirichletRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node >> &A, const std::vector< LocalOrdinal > &dirichletRows, Scalar replaceWith=Teuchos::ScalarTraits< Scalar >::zero())
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > ReplaceNonZerosWithOnes(const RCP< Matrix > &original)
Creates a copy of a matrix where the non-zero entries are replaced by ones.
size_t getLocalNumRows() const override
size_type size() const
Exception throws to report incompatible objects (like maps).
static void SetRandomSeed(const Teuchos::Comm< int > &comm)
Set seed for random number generator.
MueLu::DefaultNode Node
static RCP< Xpetra::Vector< LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node > > CuthillMcKee(const Matrix &Op)
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getTargetMap() const =0
static RCP< Time > getNewTimer(const std::string &name)
void resize(const size_type n, const T &val=T())
virtual void fillComplete(const RCP< const Map > &domainMap, const RCP< const Map > &rangeMap, const RCP< ParameterList > &params=null)=0
virtual void getLocalDiagCopy(Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &diag) const =0
static RCP< MultiVector > Residual(const Xpetra::Operator< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, const MultiVector &X, const MultiVector &RHS)
static Teuchos::ArrayRCP< const bool > DetectDirichletCols(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Teuchos::ArrayRCP< const bool > &dirichletRows)
Detect Dirichlet columns based on Dirichlet rows.
virtual void getLocalRowView(LocalOrdinal LocalRow, ArrayView< const LocalOrdinal > &indices, ArrayView< const Scalar > &values) const =0
static void FindDirichletRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node >> &A, std::vector< LocalOrdinal > &dirichletRows, bool count_twos_as_dirichlet=false)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultScalar Scalar
Kokkos::View< bool *, memory_space > DetectDirichletRows_kokkos(const Xpetra::Matrix< SC, LO, GO, NO > &A, const typename Teuchos::ScalarTraits< SC >::magnitudeType &tol, const bool count_twos_as_dirichlet)
MueLu::DefaultGlobalOrdinal GlobalOrdinal
static RCP< CrsMatrixWrap > GetThresholdedMatrix(const RCP< Matrix > &Ain, const Magnitude threshold, const bool keepDiagonal=true, const GlobalOrdinal expectedNNZperRow=-1)
Threshold a matrix.
static Teuchos::RCP< Map< LocalOrdinal, GlobalOrdinal, Node > > Build(UnderlyingLib lib, global_size_t numGlobalElements, GlobalOrdinal indexBase, const Teuchos::RCP< const Teuchos::Comm< int >> &comm, LocalGlobal lg=Xpetra::GloballyDistributed)
#define I(i, j)
static Teuchos::ArrayRCP< const bool > DetectDirichletRows(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude &tol=Teuchos::ScalarTraits< Magnitude >::zero(), bool count_twos_as_dirichlet=false)
Detect Dirichlet rows.
virtual bool isFillComplete() const =0
static RCP< Xpetra::Vector< Magnitude, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixOverlappedAbsDeletedRowsum(const Matrix &A)
size_t getNumMaps() const
virtual void replaceLocalValues(LocalOrdinal localRow, const ArrayView< const LocalOrdinal > &cols, const ArrayView< const Scalar > &vals)=0
static Teuchos::ArrayRCP< Scalar > GetMatrixDiagonal_arcp(const Matrix &A)
Extract Matrix Diagonal.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getMap() const =0
static RCP< const Xpetra::BlockedMap< LocalOrdinal, GlobalOrdinal, Node > > GeneratedBlockedTargetMap(const Xpetra::BlockedMap< LocalOrdinal, GlobalOrdinal, Node > &sourceBlockedMap, const Xpetra::Import< LocalOrdinal, GlobalOrdinal, Node > &Importer)
static RCP< Vector > GetMatrixOverlappedDiagonal(const Matrix &A)
Extract Overlapped Matrix Diagonal.
T * getRawPtr() const
Teuchos::ScalarTraits< Scalar >::magnitudeType Magnitude
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
static Teuchos::RCP< Xpetra::Vector< Magnitude, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Return vector containing: max_{i=k}(-a_ik), for each for i in the matrix.
static RCP< Vector > GetMatrixDiagonalInverse(const Matrix &A, Magnitude tol=Teuchos::ScalarTraits< Scalar >::eps()*100, Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero(), const bool doLumped=false)
Extract Matrix Diagonal.
static void seedrandom(unsigned int s)
static Teuchos::ArrayRCP< const bool > DetectDirichletRowsExt(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, bool &bHasZeroDiagonal, const Magnitude &tol=Teuchos::ScalarTraits< Scalar >::zero())
Detect Dirichlet rows (extended version)
static RCP< Vector > Build(const Teuchos::RCP< const Map > &map, bool zeroOut=true)
const RCP< const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > getMap(size_t i, bool bThyraMode=false) const
virtual UnderlyingLib lib() const
static magnitudeType magnitude(T a)
TransListIter iter
void push_back(const value_type &x)
virtual RCP< const CrsGraph > getCrsGraph() const =0
static Teuchos::Array< Magnitude > ResidualNorm(const Xpetra::Operator< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, const MultiVector &X, const MultiVector &RHS)
int length() const
size_type size() const
Scalar SC
impl_scalar_type_dualview::t_dev::const_type getValuesDevice(const LO &lclRow) const
virtual Teuchos::RCP< const Map > getRangeMap() const =0
static RCP< Teuchos::FancyOStream > MakeFancy(std::ostream &os)
static RCP< Matrix > Build(const RCP< const Map > &rowMap, size_t maxNumEntriesPerRow, Xpetra::ProfileType pftype=Xpetra::DynamicProfile)
void residual(const Operator< SC, LO, GO, NO > &Aop, const MultiVector< SC, LO, GO, NO > &X_in, const MultiVector< SC, LO, GO, NO > &B_in, MultiVector< SC, LO, GO, NO > &R_in)
Exception throws to report errors in the internal logical of the program.
static Scalar Frobenius(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B)
Frobenius inner product of two matrices.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getSourceMap() const =0
#define TEUCHOS_ASSERT(assertion_test)
static Teuchos::RCP< Vector > GetInverse(Teuchos::RCP< const Vector > v, Magnitude tol=Teuchos::ScalarTraits< Scalar >::eps()*100, Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero())
Return vector containing inverse of input vector.
static Kokkos::View< bool *, typename Kokkos::HostSpace > DetectDirichletRows_kokkos_host(const Matrix &A, const Magnitude &tol=Teuchos::ScalarTraits< typename Teuchos::ScalarTraits< SC >::magnitudeType >::zero(), const bool count_twos_as_dirichlet=false)
static Teuchos::ScalarTraits< Scalar >::magnitudeType Distance2(const Teuchos::Array< Teuchos::ArrayRCP< const Scalar >> &v, LocalOrdinal i0, LocalOrdinal i1)
Squared distance between two rows in a multivector.
static Teuchos::RCP< Vector > GetLumpedMatrixDiagonal(Matrix const &A, const bool doReciprocal=false, Magnitude tol=Teuchos::ScalarTraits< Scalar >::magnitude(Teuchos::ScalarTraits< Scalar >::zero()), Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero(), const bool replaceSingleEntryRowWithZero=false, const bool useAverageAbsDiagVal=false)
Extract Matrix Diagonal of lumped matrix.
static Kokkos::View< bool *, typename NO::device_type::memory_space > DetectDirichletRows_kokkos(const Matrix &A, const Magnitude &tol=Teuchos::ScalarTraits< typename Teuchos::ScalarTraits< SC >::magnitudeType >::zero(), const bool count_twos_as_dirichlet=false)
Detect Dirichlet rows.
static RCP< Matrix > Crs2Op(RCP< CrsMatrix > Op)
virtual Teuchos::ArrayRCP< const Scalar > getData(size_t j) const =0
virtual Teuchos::RCP< const Map > getDomainMap() const =0
static RCP< Import< LocalOrdinal, GlobalOrdinal, Node > > Build(const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &source, const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &target, const Teuchos::RCP< Teuchos::ParameterList > &plist=Teuchos::null)
bool is_null() const