MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_UtilitiesBase_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_UTILITIESBASE_DEF_HPP
11 #define MUELU_UTILITIESBASE_DEF_HPP
12 
13 #include "MueLu_ConfigDefs.hpp"
14 
16 
17 #include <Kokkos_Core.hpp>
18 #include <KokkosSparse_CrsMatrix.hpp>
19 #include <KokkosSparse_getDiagCopy.hpp>
20 
21 #include <Xpetra_BlockedVector.hpp>
22 #include <Xpetra_BlockedMap.hpp>
23 #include <Xpetra_BlockedMultiVector.hpp>
24 #include <Xpetra_ExportFactory.hpp>
25 
26 #include <Xpetra_Import.hpp>
27 #include <Xpetra_ImportFactory.hpp>
28 #include <Xpetra_MatrixMatrix.hpp>
29 #include <Xpetra_CrsGraph.hpp>
31 #include <Xpetra_CrsMatrixWrap.hpp>
32 #include <Xpetra_StridedMap.hpp>
33 
34 #include "MueLu_Exceptions.hpp"
36 
37 #include <KokkosKernels_Handle.hpp>
38 #include <KokkosGraph_RCM.hpp>
39 
40 namespace MueLu {
41 
42 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
43 RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
46  if (Op.is_null())
47  return Teuchos::null;
48  return rcp(new CrsMatrixWrap(Op));
49 }
50 
51 template <class Scalar,
52  class LocalOrdinal,
53  class GlobalOrdinal,
54  class Node>
57  const typename Teuchos::ScalarTraits<Scalar>::magnitudeType threshold,
58  const bool keepDiagonal) {
60  using row_ptr_type = typename crs_matrix::local_graph_type::row_map_type::non_const_type;
61  using col_idx_type = typename crs_matrix::local_graph_type::entries_type::non_const_type;
62  using vals_type = typename crs_matrix::local_matrix_type::values_type;
63  using execution_space = typename crs_matrix::local_matrix_type::execution_space;
64 
65  using ATS = Kokkos::ArithTraits<Scalar>;
66  using impl_SC = typename ATS::val_type;
67  using impl_ATS = Kokkos::ArithTraits<impl_SC>;
68 
69  auto lclA = A->getLocalMatrixDevice();
70 
71  auto rowptr = row_ptr_type("rowptr", lclA.numRows() + 1);
72  col_idx_type idx;
73  vals_type vals;
74  LocalOrdinal nnz;
75 
76  if (keepDiagonal) {
77  auto lclRowMap = A->getRowMap()->getLocalMap();
78  auto lclColMap = A->getColMap()->getLocalMap();
79  Kokkos::parallel_scan(
80  "removeSmallEntries::rowptr",
81  Kokkos::RangePolicy<LocalOrdinal, execution_space>(0, lclA.numRows()),
82  KOKKOS_LAMBDA(const LocalOrdinal rlid, LocalOrdinal& partial_nnz, bool is_final) {
83  auto row = lclA.row(rlid);
84  auto rowInCol = lclColMap.getLocalElement(lclRowMap.getGlobalElement(rlid));
85  for (LocalOrdinal k = 0; k < row.length; ++k) {
86  if ((impl_ATS::magnitude(row.value(k)) > threshold) || (row.colidx(k) == rowInCol)) {
87  partial_nnz += 1;
88  }
89  }
90  if (is_final)
91  rowptr(rlid + 1) = partial_nnz;
92  },
93  nnz);
94 
95  idx = col_idx_type("idx", nnz);
96  vals = vals_type("vals", nnz);
97 
98  Kokkos::parallel_for(
99  "removeSmallEntries::indicesValues",
100  Kokkos::RangePolicy<LocalOrdinal, execution_space>(0, lclA.numRows()),
101  KOKKOS_LAMBDA(const LocalOrdinal rlid) {
102  auto row = lclA.row(rlid);
103  auto rowInCol = lclColMap.getLocalElement(lclRowMap.getGlobalElement(rlid));
104  auto I = rowptr(rlid);
105  for (LocalOrdinal k = 0; k < row.length; ++k) {
106  if ((impl_ATS::magnitude(row.value(k)) > threshold) || (row.colidx(k) == rowInCol)) {
107  idx(I) = row.colidx(k);
108  vals(I) = row.value(k);
109  I += 1;
110  }
111  }
112  });
113 
114  Kokkos::fence();
115  } else {
116  Kokkos::parallel_scan(
117  "removeSmallEntries::rowptr",
118  Kokkos::RangePolicy<LocalOrdinal, execution_space>(0, lclA.numRows()),
119  KOKKOS_LAMBDA(const LocalOrdinal rlid, LocalOrdinal& partial_nnz, bool is_final) {
120  auto row = lclA.row(rlid);
121  for (LocalOrdinal k = 0; k < row.length; ++k) {
122  if (impl_ATS::magnitude(row.value(k)) > threshold) {
123  partial_nnz += 1;
124  }
125  }
126  if (is_final)
127  rowptr(rlid + 1) = partial_nnz;
128  },
129  nnz);
130 
131  idx = col_idx_type("idx", nnz);
132  vals = vals_type("vals", nnz);
133 
134  Kokkos::parallel_for(
135  "removeSmallEntries::indicesValues",
136  Kokkos::RangePolicy<LocalOrdinal, execution_space>(0, lclA.numRows()),
137  KOKKOS_LAMBDA(const LocalOrdinal rlid) {
138  auto row = lclA.row(rlid);
139  auto I = rowptr(rlid);
140  for (LocalOrdinal k = 0; k < row.length; ++k) {
141  if (impl_ATS::magnitude(row.value(k)) > threshold) {
142  idx(I) = row.colidx(k);
143  vals(I) = row.value(k);
144  I += 1;
145  }
146  }
147  });
148 
149  Kokkos::fence();
150  }
151 
152  auto lclNewA = typename crs_matrix::local_matrix_type("thresholdedMatrix", lclA.numRows(), lclA.numCols(), nnz, vals, rowptr, idx);
153  auto newA = Xpetra::MatrixFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(lclNewA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap());
154 
155  return newA;
156 }
157 
158 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
159 RCP<Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
161  GetThresholdedMatrix(const RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& Ain, const typename Teuchos::ScalarTraits<Scalar>::magnitudeType threshold, const bool keepDiagonal, const GlobalOrdinal expectedNNZperRow) {
162  auto crsWrap = rcp_dynamic_cast<CrsMatrixWrap>(Ain);
163  if (!crsWrap.is_null()) {
164  auto crsMat = crsWrap->getCrsMatrix();
165  auto filteredMat = removeSmallEntries(crsMat, threshold, keepDiagonal);
166  return rcp_static_cast<CrsMatrixWrap>(filteredMat);
167  }
168 
169  RCP<const Map> rowmap = Ain->getRowMap();
170  RCP<const Map> colmap = Ain->getColMap();
171  RCP<CrsMatrixWrap> Aout = rcp(new CrsMatrixWrap(rowmap, expectedNNZperRow <= 0 ? Ain->getGlobalMaxNumRowEntries() : expectedNNZperRow));
172  // loop over local rows
173  for (size_t row = 0; row < Ain->getLocalNumRows(); row++) {
174  size_t nnz = Ain->getNumEntriesInLocalRow(row);
175 
178  Ain->getLocalRowView(row, indices, vals);
179 
180  TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(indices.size()) != nnz, Exceptions::RuntimeError, "MueLu::ThresholdAFilterFactory::Build: number of nonzeros not equal to number of indices? Error.");
181 
184  size_t nNonzeros = 0;
185  if (keepDiagonal) {
186  GlobalOrdinal glbRow = rowmap->getGlobalElement(row);
187  LocalOrdinal lclColIdx = colmap->getLocalElement(glbRow);
188  for (size_t i = 0; i < (size_t)indices.size(); i++) {
189  if (Teuchos::ScalarTraits<Scalar>::magnitude(vals[i]) > Teuchos::ScalarTraits<Scalar>::magnitude(threshold) || indices[i] == lclColIdx) {
190  indout[nNonzeros] = colmap->getGlobalElement(indices[i]); // LID -> GID (column)
191  valout[nNonzeros] = vals[i];
192  nNonzeros++;
193  }
194  }
195  } else
196  for (size_t i = 0; i < (size_t)indices.size(); i++) {
198  indout[nNonzeros] = colmap->getGlobalElement(indices[i]); // LID -> GID (column)
199  valout[nNonzeros] = vals[i];
200  nNonzeros++;
201  }
202  }
203 
204  indout.resize(nNonzeros);
205  valout.resize(nNonzeros);
206 
207  Aout->insertGlobalValues(Ain->getRowMap()->getGlobalElement(row), indout.view(0, indout.size()), valout.view(0, valout.size()));
208  }
209  Aout->fillComplete(Ain->getDomainMap(), Ain->getRangeMap());
210 
211  return Aout;
212 }
213 
214 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
218  using STS = Teuchos::ScalarTraits<Scalar>;
219  RCP<CrsGraph> sparsityPattern = CrsGraphFactory::Build(A->getRowMap(), expectedNNZperRow <= 0 ? A->getGlobalMaxNumRowEntries() : expectedNNZperRow);
220 
221  RCP<Vector> diag = GetMatrixOverlappedDiagonal(*A);
222  ArrayRCP<const Scalar> D = diag->getData(0);
223 
224  for (size_t row = 0; row < A->getLocalNumRows(); row++) {
227  A->getLocalRowView(row, indices, vals);
228 
229  GlobalOrdinal globalRow = A->getRowMap()->getGlobalElement(row);
230  LocalOrdinal col = A->getColMap()->getLocalElement(globalRow);
231 
232  const Scalar Dk = STS::magnitude(D[col]) > 0.0 ? STS::magnitude(D[col]) : 1.0;
233  Array<GlobalOrdinal> indicesNew;
234 
235  for (size_t i = 0; i < size_t(indices.size()); i++)
236  // keep diagonal per default
237  if (col == indices[i] || STS::magnitude(STS::squareroot(Dk) * vals[i] * STS::squareroot(Dk)) > STS::magnitude(threshold))
238  indicesNew.append(A->getColMap()->getGlobalElement(indices[i]));
239 
240  sparsityPattern->insertGlobalIndices(globalRow, ArrayView<const GlobalOrdinal>(indicesNew.data(), indicesNew.length()));
241  }
242  sparsityPattern->fillComplete();
243 
244  return sparsityPattern;
245 }
246 
247 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
251  size_t numRows = A.getRowMap()->getLocalNumElements();
252  Teuchos::ArrayRCP<Scalar> diag(numRows);
255  for (size_t i = 0; i < numRows; ++i) {
256  A.getLocalRowView(i, cols, vals);
257  LocalOrdinal j = 0;
258  for (; j < cols.size(); ++j) {
259  if (Teuchos::as<size_t>(cols[j]) == i) {
260  diag[i] = vals[j];
261  break;
262  }
263  }
264  if (j == cols.size()) {
265  // Diagonal entry is absent
267  }
268  }
269  return diag;
270 }
271 
272 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
275  GetMatrixDiagonal(const Matrix& A) {
276  const auto rowMap = A.getRowMap();
278 
279  const CrsMatrixWrap* crsOp = dynamic_cast<const CrsMatrixWrap*>(&A);
280  if ((crsOp != NULL) && (rowMap->lib() == Xpetra::UseTpetra)) {
281  using device_type = typename CrsGraph::device_type;
282  Kokkos::View<size_t*, device_type> offsets("offsets", rowMap->getLocalNumElements());
283  crsOp->getCrsGraph()->getLocalDiagOffsets(offsets);
284  crsOp->getCrsMatrix()->getLocalDiagCopy(*diag, offsets);
285  } else {
286  A.getLocalDiagCopy(*diag);
287  }
288 
289  return diag;
290 }
291 
292 // template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
293 // RCP<Xpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
294 // UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
295 // GetMatrixDiagonalInverse(const Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> & A, Magnitude tol, Scalar valReplacement) {
296 // Teuchos::TimeMonitor MM = *Teuchos::TimeMonitor::getNewTimer("UtilitiesBase::GetMatrixDiagonalInverse");
297 
298 // RCP<const Map> rowMap = A.getRowMap();
299 // RCP<Vector> diag = Xpetra::VectorFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Build(rowMap,true);
300 
301 // A.getLocalDiagCopy(*diag);
302 
303 // RCP<Vector> inv = MueLu::UtilitiesBase<Scalar,LocalOrdinal,GlobalOrdinal,Node>::GetInverse(diag, tol, valReplacement);
304 
305 // return inv;
306 // }
307 
308 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
313  Scalar valReplacement,
314  const bool doLumped) {
315  Teuchos::TimeMonitor MM = *Teuchos::TimeMonitor::getNewTimer("Utilities::GetMatrixDiagonalInverse");
316 
317  RCP<const BlockedCrsMatrix> bA = Teuchos::rcp_dynamic_cast<const BlockedCrsMatrix>(rcpFromRef(A));
318  if (!bA.is_null()) {
319  RCP<const Map> rowMap = A.getRowMap();
321  A.getLocalDiagCopy(*diag);
323  return inv;
324  }
325 
326  // Some useful type definitions
327  using local_matrix_type = typename Matrix::local_matrix_type;
328  // using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
329  using value_type = typename local_matrix_type::value_type;
330  using values_type = typename local_matrix_type::values_type;
331  using scalar_type = typename values_type::non_const_value_type;
332  using ordinal_type = typename local_matrix_type::ordinal_type;
333  using execution_space = typename local_matrix_type::execution_space;
334  // using memory_space = typename local_matrix_type::memory_space;
335  // Be careful with this one, if using Kokkos::ArithTraits<Scalar>
336  // you are likely to run into errors when handling std::complex<>
337  // a good way to work around that is to use the following:
338  // using KAT = Kokkos::ArithTraits<Kokkos::ArithTraits<Scalar>::val_type> >
339  // here we have: value_type = Kokkos::ArithTraits<Scalar>::val_type
340  using KAT = Kokkos::ArithTraits<value_type>;
341 
342  // Get/Create distributed objects
343  RCP<const Map> rowMap = A.getRowMap();
344  RCP<Vector> diag = VectorFactory::Build(rowMap, false);
345 
346  // Now generate local objects
347  local_matrix_type localMatrix = A.getLocalMatrixDevice();
348  auto diagVals = diag->getDeviceLocalView(Xpetra::Access::ReadWrite);
349 
350  ordinal_type numRows = localMatrix.graph.numRows();
351 
352  scalar_type valReplacement_dev = valReplacement;
353 
354  // Note: 2019-11-21, LBV
355  // This could be implemented with a TeamPolicy over the rows
356  // and a TeamVectorRange over the entries in a row if performance
357  // becomes more important here.
358  if (!doLumped)
359  Kokkos::parallel_for(
360  "Utilities::GetMatrixDiagonalInverse",
361  Kokkos::RangePolicy<ordinal_type, execution_space>(0, numRows),
362  KOKKOS_LAMBDA(const ordinal_type rowIdx) {
363  bool foundDiagEntry = false;
364  auto myRow = localMatrix.rowConst(rowIdx);
365  for (ordinal_type entryIdx = 0; entryIdx < myRow.length; ++entryIdx) {
366  if (myRow.colidx(entryIdx) == rowIdx) {
367  foundDiagEntry = true;
368  if (KAT::magnitude(myRow.value(entryIdx)) > KAT::magnitude(tol)) {
369  diagVals(rowIdx, 0) = KAT::one() / myRow.value(entryIdx);
370  } else {
371  diagVals(rowIdx, 0) = valReplacement_dev;
372  }
373  break;
374  }
375  }
376 
377  if (!foundDiagEntry) {
378  diagVals(rowIdx, 0) = KAT::zero();
379  }
380  });
381  else
382  Kokkos::parallel_for(
383  "Utilities::GetMatrixDiagonalInverse",
384  Kokkos::RangePolicy<ordinal_type, execution_space>(0, numRows),
385  KOKKOS_LAMBDA(const ordinal_type rowIdx) {
386  auto myRow = localMatrix.rowConst(rowIdx);
387  for (ordinal_type entryIdx = 0; entryIdx < myRow.length; ++entryIdx) {
388  diagVals(rowIdx, 0) += KAT::magnitude(myRow.value(entryIdx));
389  }
390  if (KAT::magnitude(diagVals(rowIdx, 0)) > KAT::magnitude(tol))
391  diagVals(rowIdx, 0) = KAT::one() / diagVals(rowIdx, 0);
392  else
393  diagVals(rowIdx, 0) = valReplacement_dev;
394  });
395 
396  return diag;
397 }
398 
399 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
403  Magnitude tol,
404  Scalar valReplacement,
405  const bool replaceSingleEntryRowWithZero,
406  const bool useAverageAbsDiagVal) {
407  typedef Teuchos::ScalarTraits<Scalar> TST;
408 
409  RCP<Vector> diag = Teuchos::null;
410  const Scalar zero = TST::zero();
411  const Scalar one = TST::one();
412  const Scalar two = one + one;
413 
414  Teuchos::RCP<const Matrix> rcpA = Teuchos::rcpFromRef(A);
415 
417  Teuchos::rcp_dynamic_cast<const Xpetra::BlockedCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(rcpA);
418  if (bA == Teuchos::null) {
419  RCP<const Map> rowMap = rcpA->getRowMap();
421 
422  if (rowMap->lib() == Xpetra::UnderlyingLib::UseTpetra) {
423  Teuchos::TimeMonitor MM = *Teuchos::TimeMonitor::getNewTimer("UtilitiesBase::GetLumpedMatrixDiagonal (Kokkos implementation)");
424  // Implement using Kokkos
425  using local_vector_type = typename Vector::dual_view_type::t_dev_um;
426  using local_matrix_type = typename Matrix::local_matrix_type;
427  using execution_space = typename local_vector_type::execution_space;
428  // using rowmap_type = typename local_matrix_type::row_map_type;
429  // using entries_type = typename local_matrix_type::index_type;
430  using values_type = typename local_matrix_type::values_type;
431  using scalar_type = typename values_type::non_const_value_type;
432  using mag_type = typename Kokkos::ArithTraits<scalar_type>::mag_type;
433  using KAT_S = typename Kokkos::ArithTraits<scalar_type>;
434  using KAT_M = typename Kokkos::ArithTraits<mag_type>;
435  using size_type = typename local_matrix_type::non_const_size_type;
436 
437  local_vector_type diag_dev = diag->getDeviceLocalView(Xpetra::Access::OverwriteAll);
438  local_matrix_type local_mat_dev = rcpA->getLocalMatrixDevice();
439  Kokkos::RangePolicy<execution_space, int> my_policy(0, static_cast<int>(diag_dev.extent(0)));
440  scalar_type valReplacement_dev = valReplacement;
441 
442  if (doReciprocal) {
443  Kokkos::View<int*, execution_space> nnzPerRow("nnz per rows", diag_dev.extent(0));
444  Kokkos::View<scalar_type*, execution_space> regSum("regSum", diag_dev.extent(0));
445  Kokkos::View<mag_type, execution_space> avgAbsDiagVal_dev("avgAbsDiagVal");
446  Kokkos::View<int, execution_space> numDiagsEqualToOne_dev("numDiagsEqualToOne");
447 
448  {
449  Teuchos::TimeMonitor MMM = *Teuchos::TimeMonitor::getNewTimer("GetLumpedMatrixDiagonal: parallel_for (doReciprocal)");
450  Kokkos::parallel_for(
451  "GetLumpedMatrixDiagonal", my_policy,
452  KOKKOS_LAMBDA(const int rowIdx) {
453  diag_dev(rowIdx, 0) = KAT_S::zero();
454  for (size_type entryIdx = local_mat_dev.graph.row_map(rowIdx);
455  entryIdx < local_mat_dev.graph.row_map(rowIdx + 1);
456  ++entryIdx) {
457  regSum(rowIdx) += local_mat_dev.values(entryIdx);
458  if (KAT_M::zero() < KAT_S::abs(local_mat_dev.values(entryIdx))) {
459  ++nnzPerRow(rowIdx);
460  }
461  diag_dev(rowIdx, 0) += KAT_S::abs(local_mat_dev.values(entryIdx));
462  if (rowIdx == local_mat_dev.graph.entries(entryIdx)) {
463  Kokkos::atomic_add(&avgAbsDiagVal_dev(), KAT_S::abs(local_mat_dev.values(entryIdx)));
464  }
465  }
466 
467  if (nnzPerRow(rowIdx) == 1 && KAT_S::magnitude(diag_dev(rowIdx, 0)) == KAT_M::one()) {
468  Kokkos::atomic_add(&numDiagsEqualToOne_dev(), 1);
469  }
470  });
471  }
472  if (useAverageAbsDiagVal) {
473  Teuchos::TimeMonitor MMM = *Teuchos::TimeMonitor::getNewTimer("GetLumpedMatrixDiagonal: useAverageAbsDiagVal");
474  typename Kokkos::View<mag_type, execution_space>::HostMirror avgAbsDiagVal = Kokkos::create_mirror_view(avgAbsDiagVal_dev);
475  Kokkos::deep_copy(avgAbsDiagVal, avgAbsDiagVal_dev);
476  int numDiagsEqualToOne;
477  Kokkos::deep_copy(numDiagsEqualToOne, numDiagsEqualToOne_dev);
478 
479  tol = TST::magnitude(100 * Teuchos::ScalarTraits<Scalar>::eps()) * (avgAbsDiagVal() - numDiagsEqualToOne) / (rowMap->getLocalNumElements() - numDiagsEqualToOne);
480  }
481 
482  {
483  Teuchos::TimeMonitor MMM = *Teuchos::TimeMonitor::getNewTimer("ComputeLumpedDiagonalInverse: parallel_for (doReciprocal)");
484  Kokkos::parallel_for(
485  "ComputeLumpedDiagonalInverse", my_policy,
486  KOKKOS_LAMBDA(const int rowIdx) {
487  if (replaceSingleEntryRowWithZero && nnzPerRow(rowIdx) <= 1) {
488  diag_dev(rowIdx, 0) = KAT_S::zero();
489  } else if ((diag_dev(rowIdx, 0) != KAT_S::zero()) && (KAT_S::magnitude(diag_dev(rowIdx, 0)) < KAT_S::magnitude(2 * regSum(rowIdx)))) {
490  diag_dev(rowIdx, 0) = KAT_S::one() / KAT_S::magnitude(2 * regSum(rowIdx));
491  } else {
492  if (KAT_S::magnitude(diag_dev(rowIdx, 0)) > tol) {
493  diag_dev(rowIdx, 0) = KAT_S::one() / diag_dev(rowIdx, 0);
494  } else {
495  diag_dev(rowIdx, 0) = valReplacement_dev;
496  }
497  }
498  });
499  }
500 
501  } else {
502  Teuchos::TimeMonitor MMM = *Teuchos::TimeMonitor::getNewTimer("GetLumpedMatrixDiagonal: parallel_for");
503  Kokkos::parallel_for(
504  "GetLumpedMatrixDiagonal", my_policy,
505  KOKKOS_LAMBDA(const int rowIdx) {
506  diag_dev(rowIdx, 0) = KAT_S::zero();
507  for (size_type entryIdx = local_mat_dev.graph.row_map(rowIdx);
508  entryIdx < local_mat_dev.graph.row_map(rowIdx + 1);
509  ++entryIdx) {
510  diag_dev(rowIdx, 0) += KAT_S::magnitude(local_mat_dev.values(entryIdx));
511  }
512  });
513  }
514  } else {
515  // Implement using Teuchos
516  Teuchos::TimeMonitor MMM = *Teuchos::TimeMonitor::getNewTimer("UtilitiesBase: GetLumpedMatrixDiagonal: (Teuchos implementation)");
517  ArrayRCP<Scalar> diagVals = diag->getDataNonConst(0);
518  Teuchos::Array<Scalar> regSum(diag->getLocalLength());
521 
522  std::vector<int> nnzPerRow(rowMap->getLocalNumElements());
523 
524  // FIXME 2021-10-22 JHU If this is called with doReciprocal=false, what should the correct behavior be? Currently,
525  // FIXME 2021-10-22 JHU the diagonal entry is set to be the sum of the absolute values of the row entries.
526 
527  const Magnitude zeroMagn = TST::magnitude(zero);
528  Magnitude avgAbsDiagVal = TST::magnitude(zero);
529  int numDiagsEqualToOne = 0;
530  for (size_t i = 0; i < rowMap->getLocalNumElements(); ++i) {
531  nnzPerRow[i] = 0;
532  rcpA->getLocalRowView(i, cols, vals);
533  diagVals[i] = zero;
534  for (LocalOrdinal j = 0; j < cols.size(); ++j) {
535  regSum[i] += vals[j];
536  const Magnitude rowEntryMagn = TST::magnitude(vals[j]);
537  if (rowEntryMagn > zeroMagn)
538  nnzPerRow[i]++;
539  diagVals[i] += rowEntryMagn;
540  if (static_cast<size_t>(cols[j]) == i)
541  avgAbsDiagVal += rowEntryMagn;
542  }
543  if (nnzPerRow[i] == 1 && TST::magnitude(diagVals[i]) == 1.)
544  numDiagsEqualToOne++;
545  }
546  if (useAverageAbsDiagVal)
547  tol = TST::magnitude(100 * Teuchos::ScalarTraits<Scalar>::eps()) * (avgAbsDiagVal - numDiagsEqualToOne) / (rowMap->getLocalNumElements() - numDiagsEqualToOne);
548  if (doReciprocal) {
549  for (size_t i = 0; i < rowMap->getLocalNumElements(); ++i) {
550  if (replaceSingleEntryRowWithZero && nnzPerRow[i] <= static_cast<int>(1))
551  diagVals[i] = zero;
552  else if ((diagVals[i] != zero) && (TST::magnitude(diagVals[i]) < TST::magnitude(two * regSum[i])))
553  diagVals[i] = one / TST::magnitude((two * regSum[i]));
554  else {
555  if (TST::magnitude(diagVals[i]) > tol)
556  diagVals[i] = one / diagVals[i];
557  else {
558  diagVals[i] = valReplacement;
559  }
560  }
561  }
562  }
563  }
564  } else {
566  "UtilitiesBase::GetLumpedMatrixDiagonal(): extracting reciprocal of diagonal of a blocked matrix is not supported");
567  diag = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(bA->getRangeMapExtractor()->getFullMap(), true);
568 
569  for (size_t row = 0; row < bA->Rows(); ++row) {
570  for (size_t col = 0; col < bA->Cols(); ++col) {
571  if (!bA->getMatrix(row, col).is_null()) {
572  // if we are in Thyra mode, but the block (row,row) is again a blocked operator, we have to use (pseudo) Xpetra-style GIDs with offset!
573  bool bThyraMode = bA->getRangeMapExtractor()->getThyraMode() && (Teuchos::rcp_dynamic_cast<Xpetra::BlockedCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(bA->getMatrix(row, col)) == Teuchos::null);
574  RCP<Vector> ddtemp = bA->getRangeMapExtractor()->ExtractVector(diag, row, bThyraMode);
575  RCP<const Vector> dd = GetLumpedMatrixDiagonal(*(bA->getMatrix(row, col)));
576  ddtemp->update(Teuchos::as<Scalar>(1.0), *dd, Teuchos::as<Scalar>(1.0));
577  bA->getRangeMapExtractor()->InsertVector(ddtemp, row, diag, bThyraMode);
578  }
579  }
580  }
581  }
582 
583  return diag;
584 }
585 
586 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
590  // Get/Create distributed objects
591  RCP<const Map> rowMap = A.getRowMap();
593 
594  // Implement using Kokkos
595  using local_vector_type = typename Vector::dual_view_type::t_dev_um;
596  using local_matrix_type = typename Matrix::local_matrix_type;
597  using execution_space = typename local_vector_type::execution_space;
598  using values_type = typename local_matrix_type::values_type;
599  using scalar_type = typename values_type::non_const_value_type;
600  using KAT_S = typename Kokkos::ArithTraits<scalar_type>;
601 
602  auto diag_dev = diag->getDeviceLocalView(Xpetra::Access::OverwriteAll);
603  auto local_mat_dev = A.getLocalMatrixDevice();
604  Kokkos::RangePolicy<execution_space, int> my_policy(0, static_cast<int>(diag_dev.extent(0)));
605 
606  Kokkos::parallel_for(
607  "GetMatrixMaxMinusOffDiagonal", my_policy,
608  KOKKOS_LAMBDA(const LocalOrdinal rowIdx) {
609  auto mymax = KAT_S::zero();
610  auto row = local_mat_dev.rowConst(rowIdx);
611  for (LocalOrdinal entryIdx = 0; entryIdx < row.length; ++entryIdx) {
612  if (rowIdx != row.colidx(entryIdx)) {
613  if (KAT_S::real(mymax) < -KAT_S::real(row.value(entryIdx)))
614  mymax = -KAT_S::real(row.value(entryIdx));
615  }
616  }
617  diag_dev(rowIdx, 0) = mymax;
618  });
619 
620  return diag;
621 }
622 
623 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
627  TEUCHOS_TEST_FOR_EXCEPTION(!A.getColMap()->isSameAs(*BlockNumber.getMap()), std::runtime_error, "GetMatrixMaxMinusOffDiagonal: BlockNumber must match's A's column map.");
628 
629  // Get/Create distributed objects
630  RCP<const Map> rowMap = A.getRowMap();
632 
633  // Implement using Kokkos
634  using local_vector_type = typename Vector::dual_view_type::t_dev_um;
635  using local_matrix_type = typename Matrix::local_matrix_type;
636  using execution_space = typename local_vector_type::execution_space;
637  using values_type = typename local_matrix_type::values_type;
638  using scalar_type = typename values_type::non_const_value_type;
639  using KAT_S = typename Kokkos::ArithTraits<scalar_type>;
640 
641  auto diag_dev = diag->getDeviceLocalView(Xpetra::Access::OverwriteAll);
642  auto local_mat_dev = A.getLocalMatrixDevice();
643  auto local_block_dev = BlockNumber.getDeviceLocalView(Xpetra::Access::ReadOnly);
644  Kokkos::RangePolicy<execution_space, int> my_policy(0, static_cast<int>(diag_dev.extent(0)));
645 
646  Kokkos::parallel_for(
647  "GetMatrixMaxMinusOffDiagonal", my_policy,
648  KOKKOS_LAMBDA(const LocalOrdinal rowIdx) {
649  auto mymax = KAT_S::zero();
650  auto row = local_mat_dev.row(rowIdx);
651  for (LocalOrdinal entryIdx = 0; entryIdx < row.length; ++entryIdx) {
652  if ((rowIdx != row.colidx(entryIdx)) && (local_block_dev(rowIdx, 0) == local_block_dev(row.colidx(entryIdx), 0))) {
653  if (KAT_S::real(mymax) < -KAT_S::real(row.value(entryIdx)))
654  mymax = -KAT_S::real(row.value(entryIdx));
655  }
656  }
657  diag_dev(rowIdx, 0) = mymax;
658  });
659 
660  return diag;
661 }
662 
663 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
668 
669  // check whether input vector "v" is a BlockedVector
670  RCP<const BlockedVector> bv = Teuchos::rcp_dynamic_cast<const BlockedVector>(v);
671  if (bv.is_null() == false) {
672  RCP<BlockedVector> bret = Teuchos::rcp_dynamic_cast<BlockedVector>(ret);
673  TEUCHOS_TEST_FOR_EXCEPTION(bret.is_null() == true, MueLu::Exceptions::RuntimeError, "MueLu::UtilitiesBase::GetInverse: return vector should be of type BlockedVector");
674  RCP<const BlockedMap> bmap = bv->getBlockedMap();
675  for (size_t r = 0; r < bmap->getNumMaps(); ++r) {
676  RCP<const MultiVector> submvec = bv->getMultiVector(r, bmap->getThyraMode());
677  RCP<const Vector> subvec = submvec->getVector(0);
679  bret->setMultiVector(r, subvecinf, bmap->getThyraMode());
680  }
681  return ret;
682  }
683 
684  // v is an {Epetra,Tpetra}Vector: work with the underlying raw data
685  ArrayRCP<Scalar> retVals = ret->getDataNonConst(0);
686  ArrayRCP<const Scalar> inputVals = v->getData(0);
687  for (size_t i = 0; i < v->getMap()->getLocalNumElements(); ++i) {
688  if (Teuchos::ScalarTraits<Scalar>::magnitude(inputVals[i]) > tol)
689  retVals[i] = Teuchos::ScalarTraits<Scalar>::one() / inputVals[i];
690  else
691  retVals[i] = valReplacement;
692  }
693  return ret;
694 }
695 
696 // template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
697 // RCP<Xpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
698 // UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
699 // GetMatrixOverlappedDiagonal(const Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> & A) {
700 // RCP<const Map> rowMap = A.getRowMap(), colMap = A.getColMap();
701 
702 // // Undo block map (if we have one)
703 // RCP<const BlockedMap> browMap = Teuchos::rcp_dynamic_cast<const BlockedMap>(rowMap);
704 // if(!browMap.is_null()) rowMap = browMap->getMap();
705 
706 // RCP<Vector> localDiag = Xpetra::VectorFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Build(rowMap);
707 // try {
708 // const CrsMatrixWrap* crsOp = dynamic_cast<const CrsMatrixWrap*>(&A);
709 // if (crsOp == NULL) {
710 // throw Exceptions::RuntimeError("cast to CrsMatrixWrap failed");
711 // }
712 // Teuchos::ArrayRCP<size_t> offsets;
713 // crsOp->getLocalDiagOffsets(offsets);
714 // crsOp->getLocalDiagCopy(*localDiag,offsets());
715 // }
716 // catch (...) {
717 // ArrayRCP<Scalar> localDiagVals = localDiag->getDataNonConst(0);
718 // Teuchos::ArrayRCP<Scalar> diagVals = GetMatrixDiagonal(A);
719 // for (LocalOrdinal i = 0; i < localDiagVals.size(); i++)
720 // localDiagVals[i] = diagVals[i];
721 // localDiagVals = diagVals = null;
722 // }
723 
724 // RCP<Vector> diagonal = Xpetra::VectorFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Build(colMap);
725 // RCP< const Xpetra::Import<LocalOrdinal,GlobalOrdinal,Node> > importer;
726 // importer = A.getCrsGraph()->getImporter();
727 // if (importer == Teuchos::null) {
728 // importer = Xpetra::ImportFactory<LocalOrdinal,GlobalOrdinal,Node>::Build(rowMap, colMap);
729 // }
730 // diagonal->doImport(*localDiag, *(importer), Xpetra::INSERT);
731 // return diagonal;
732 // }
733 
734 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
738  RCP<const Map> rowMap = A.getRowMap(), colMap = A.getColMap();
739  RCP<Vector> localDiag = GetMatrixDiagonal(A);
740  RCP<Vector> diagonal = VectorFactory::Build(colMap);
741  RCP<const Import> importer = A.getCrsGraph()->getImporter();
742  if (importer == Teuchos::null) {
743  importer = ImportFactory::Build(rowMap, colMap);
744  }
745  diagonal->doImport(*localDiag, *(importer), Xpetra::INSERT);
746 
747  return diagonal;
748 }
749 
750 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
754  using STS = typename Teuchos::ScalarTraits<SC>;
755 
756  // Undo block map (if we have one)
757  RCP<const Map> rowMap = A.getRowMap(), colMap = A.getColMap();
758  RCP<const BlockedMap> browMap = Teuchos::rcp_dynamic_cast<const BlockedMap>(rowMap);
759  if (!browMap.is_null()) rowMap = browMap->getMap();
760 
763  ArrayRCP<SC> localVals = local->getDataNonConst(0);
764 
765  for (LO row = 0; row < static_cast<LO>(A.getRowMap()->getLocalNumElements()); ++row) {
766  size_t nnz = A.getNumEntriesInLocalRow(row);
767  ArrayView<const LO> indices;
768  ArrayView<const SC> vals;
769  A.getLocalRowView(row, indices, vals);
770 
771  SC si = STS::zero();
772 
773  for (LO colID = 0; colID < static_cast<LO>(nnz); colID++) {
774  if (indices[colID] != row) {
775  si += vals[colID];
776  }
777  }
778  localVals[row] = si;
779  }
780 
782  importer = A.getCrsGraph()->getImporter();
783  if (importer == Teuchos::null) {
784  importer = Xpetra::ImportFactory<LO, GO, Node>::Build(rowMap, colMap);
785  }
786  ghosted->doImport(*local, *(importer), Xpetra::INSERT);
787  return ghosted;
788 }
789 
790 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
794  RCP<const Map> rowMap = A.getRowMap(), colMap = A.getColMap();
795  using STS = typename Teuchos::ScalarTraits<Scalar>;
796  using MTS = typename Teuchos::ScalarTraits<Magnitude>;
797  using MT = Magnitude;
798  using RealValuedVector = Xpetra::Vector<MT, LO, GO, Node>;
799 
800  // Undo block map (if we have one)
801  RCP<const BlockedMap> browMap = Teuchos::rcp_dynamic_cast<const BlockedMap>(rowMap);
802  if (!browMap.is_null()) rowMap = browMap->getMap();
803 
806  ArrayRCP<MT> localVals = local->getDataNonConst(0);
807 
808  for (LO rowIdx = 0; rowIdx < static_cast<LO>(A.getRowMap()->getLocalNumElements()); ++rowIdx) {
809  size_t nnz = A.getNumEntriesInLocalRow(rowIdx);
810  ArrayView<const LO> indices;
811  ArrayView<const SC> vals;
812  A.getLocalRowView(rowIdx, indices, vals);
813 
814  MT si = MTS::zero();
815 
816  for (LO colID = 0; colID < static_cast<LO>(nnz); ++colID) {
817  if (indices[colID] != rowIdx) {
818  si += STS::magnitude(vals[colID]);
819  }
820  }
821  localVals[rowIdx] = si;
822  }
823 
825  importer = A.getCrsGraph()->getImporter();
826  if (importer == Teuchos::null) {
827  importer = Xpetra::ImportFactory<LO, GO, Node>::Build(rowMap, colMap);
828  }
829  ghosted->doImport(*local, *(importer), Xpetra::INSERT);
830  return ghosted;
831 }
832 
833 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
836  ResidualNorm(const Xpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op, const MultiVector& X, const MultiVector& RHS) {
837  TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != RHS.getNumVectors(), Exceptions::RuntimeError, "Number of solution vectors != number of right-hand sides")
838  const size_t numVecs = X.getNumVectors();
839  RCP<MultiVector> RES = Residual(Op, X, RHS);
840  Teuchos::Array<Magnitude> norms(numVecs);
841  RES->norm2(norms);
842  return norms;
843 }
844 
845 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
848  ResidualNorm(const Xpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op, const MultiVector& X, const MultiVector& RHS, MultiVector& Resid) {
849  TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != RHS.getNumVectors(), Exceptions::RuntimeError, "Number of solution vectors != number of right-hand sides")
850  const size_t numVecs = X.getNumVectors();
851  Residual(Op, X, RHS, Resid);
852  Teuchos::Array<Magnitude> norms(numVecs);
853  Resid.norm2(norms);
854  return norms;
855 }
856 
857 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
860  Residual(const Xpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op, const MultiVector& X, const MultiVector& RHS) {
861  TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != RHS.getNumVectors(), Exceptions::RuntimeError, "Number of solution vectors != number of right-hand sides")
862  const size_t numVecs = X.getNumVectors();
863  // TODO Op.getRangeMap should return a BlockedMap if it is a BlockedCrsOperator
864  RCP<MultiVector> RES = Xpetra::MultiVectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(RHS.getMap(), numVecs, false); // no need to initialize to zero
865  Op.residual(X, RHS, *RES);
866  return RES;
867 }
868 
869 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
871  Residual(const Xpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op, const MultiVector& X, const MultiVector& RHS, MultiVector& Resid) {
872  TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != RHS.getNumVectors(), Exceptions::RuntimeError, "Number of solution vectors != number of right-hand sides");
873  TEUCHOS_TEST_FOR_EXCEPTION(Resid.getNumVectors() != RHS.getNumVectors(), Exceptions::RuntimeError, "Number of residual vectors != number of right-hand sides");
874  Op.residual(X, RHS, Resid);
875 }
876 
877 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
878 Scalar
880  PowerMethod(const Matrix& A, bool scaleByDiag,
881  LocalOrdinal niters, typename Teuchos::ScalarTraits<Scalar>::magnitudeType tolerance, bool verbose, unsigned int seed) {
882  TEUCHOS_TEST_FOR_EXCEPTION(!(A.getRangeMap()->isSameAs(*(A.getDomainMap()))), Exceptions::Incompatible,
883  "Utils::PowerMethod: operator must have domain and range maps that are equivalent.");
884 
885  // power iteration
886  RCP<Vector> diagInvVec;
887  if (scaleByDiag) {
888  diagInvVec = GetMatrixDiagonalInverse(A);
889  }
890 
891  Scalar lambda = PowerMethod(A, diagInvVec, niters, tolerance, verbose, seed);
892  return lambda;
893 }
894 
895 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
896 Scalar
898  PowerMethod(const Matrix& A, const RCP<Vector>& diagInvVec,
899  LocalOrdinal niters, typename Teuchos::ScalarTraits<Scalar>::magnitudeType tolerance, bool verbose, unsigned int seed) {
900  TEUCHOS_TEST_FOR_EXCEPTION(!(A.getRangeMap()->isSameAs(*(A.getDomainMap()))), Exceptions::Incompatible,
901  "Utils::PowerMethod: operator must have domain and range maps that are equivalent.");
902 
903  // Create three vectors, fill z with random numbers
907 
908  z->setSeed(seed); // seed random number generator
909  z->randomize(true); // use Xpetra implementation: -> same results for Epetra and Tpetra
910 
911  Teuchos::Array<Magnitude> norms(1);
912 
913  typedef Teuchos::ScalarTraits<Scalar> STS;
914 
915  const Scalar zero = STS::zero(), one = STS::one();
916 
917  Scalar lambda = zero;
918  Magnitude residual = STS::magnitude(zero);
919 
920  // power iteration
921  for (int iter = 0; iter < niters; ++iter) {
922  z->norm2(norms); // Compute 2-norm of z
923  q->update(one / norms[0], *z, zero); // Set q = z / normz
924  A.apply(*q, *z); // Compute z = A*q
925  if (diagInvVec != Teuchos::null)
926  z->elementWiseMultiply(one, *diagInvVec, *z, zero);
927  lambda = q->dot(*z); // Approximate maximum eigenvalue: lamba = dot(q,z)
928 
929  if (iter % 100 == 0 || iter + 1 == niters) {
930  r->update(1.0, *z, -lambda, *q, zero); // Compute A*q - lambda*q
931  r->norm2(norms);
932  residual = STS::magnitude(norms[0] / lambda);
933  if (verbose) {
934  std::cout << "Iter = " << iter
935  << " Lambda = " << lambda
936  << " Residual of A*q - lambda*q = " << residual
937  << std::endl;
938  }
939  }
940  if (residual < tolerance)
941  break;
942  }
943  return lambda;
944 }
945 
946 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
947 RCP<Teuchos::FancyOStream>
949  MakeFancy(std::ostream& os) {
950  RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(os));
951  return fancy;
952 }
953 
954 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
958  const size_t numVectors = v.size();
959 
961  for (size_t j = 0; j < numVectors; j++) {
962  d += (v[j][i0] - v[j][i1]) * (v[j][i0] - v[j][i1]);
963  }
965 }
966 
967 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
971  const size_t numVectors = v.size();
973 
975  for (size_t j = 0; j < numVectors; j++) {
976  d += Teuchos::as<MT>(weight[j]) * (v[j][i0] - v[j][i1]) * (v[j][i0] - v[j][i1]);
977  }
979 }
980 
981 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
985  LocalOrdinal numRows = A.getLocalNumRows();
986  typedef Teuchos::ScalarTraits<Scalar> STS;
987  ArrayRCP<bool> boundaryNodes(numRows, true);
988  if (count_twos_as_dirichlet) {
989  for (LocalOrdinal row = 0; row < numRows; row++) {
992  A.getLocalRowView(row, indices, vals);
993  size_t nnz = A.getNumEntriesInLocalRow(row);
994  if (nnz > 2) {
995  size_t col;
996  for (col = 0; col < nnz; col++)
997  if ((indices[col] != row) && STS::magnitude(vals[col]) > tol) {
998  if (!boundaryNodes[row])
999  break;
1000  boundaryNodes[row] = false;
1001  }
1002  if (col == nnz)
1003  boundaryNodes[row] = true;
1004  }
1005  }
1006  } else {
1007  for (LocalOrdinal row = 0; row < numRows; row++) {
1010  A.getLocalRowView(row, indices, vals);
1011  size_t nnz = A.getNumEntriesInLocalRow(row);
1012  if (nnz > 1)
1013  for (size_t col = 0; col < nnz; col++)
1014  if ((indices[col] != row) && STS::magnitude(vals[col]) > tol) {
1015  boundaryNodes[row] = false;
1016  break;
1017  }
1018  }
1019  }
1020  return boundaryNodes;
1021 }
1022 
1023 template <class SC, class LO, class GO, class NO, class memory_space>
1024 Kokkos::View<bool*, memory_space>
1026  const typename Teuchos::ScalarTraits<SC>::magnitudeType& tol,
1027  const bool count_twos_as_dirichlet) {
1028  using impl_scalar_type = typename Kokkos::ArithTraits<SC>::val_type;
1029  using ATS = Kokkos::ArithTraits<impl_scalar_type>;
1030  using range_type = Kokkos::RangePolicy<LO, typename NO::execution_space>;
1031  using helpers = Xpetra::Helpers<SC, LO, GO, NO>;
1032 
1033  Kokkos::View<bool*, typename NO::device_type::memory_space> boundaryNodes;
1034 
1035  if (helpers::isTpetraBlockCrs(A)) {
1036  const Tpetra::BlockCrsMatrix<SC, LO, GO, NO>& Am = toTpetraBlock(A);
1037  auto b_graph = Am.getCrsGraph().getLocalGraphDevice();
1038  auto b_rowptr = Am.getCrsGraph().getLocalRowPtrsDevice();
1039  auto values = Am.getValuesDevice();
1040  LO numBlockRows = Am.getLocalNumRows();
1041  const LO stride = Am.getBlockSize() * Am.getBlockSize();
1042 
1043  boundaryNodes = Kokkos::View<bool*, typename NO::device_type::memory_space>(Kokkos::ViewAllocateWithoutInitializing("boundaryNodes"), numBlockRows);
1044 
1045  if (count_twos_as_dirichlet)
1046  throw Exceptions::RuntimeError("BlockCrs does not support counting twos as Dirichlet");
1047 
1048  Kokkos::parallel_for(
1049  "MueLu:Utils::DetectDirichletRowsBlockCrs", range_type(0, numBlockRows),
1050  KOKKOS_LAMBDA(const LO row) {
1051  auto rowView = b_graph.rowConst(row);
1052  auto length = rowView.length;
1053  LO valstart = b_rowptr[row] * stride;
1054 
1055  boundaryNodes(row) = true;
1056  decltype(length) colID = 0;
1057  for (; colID < length; colID++) {
1058  if (rowView.colidx(colID) != row) {
1059  LO current = valstart + colID * stride;
1060  for (LO k = 0; k < stride; k++) {
1061  if (ATS::magnitude(values[current + k]) > tol) {
1062  boundaryNodes(row) = false;
1063  break;
1064  }
1065  }
1066  }
1067  if (boundaryNodes(row) == false)
1068  break;
1069  }
1070  });
1071  } else {
1072  auto localMatrix = A.getLocalMatrixDevice();
1073  LO numRows = A.getLocalNumRows();
1074  boundaryNodes = Kokkos::View<bool*, typename NO::device_type::memory_space>(Kokkos::ViewAllocateWithoutInitializing("boundaryNodes"), numRows);
1075 
1076  if (count_twos_as_dirichlet)
1077  Kokkos::parallel_for(
1078  "MueLu:Utils::DetectDirichletRows_Twos_As_Dirichlet", range_type(0, numRows),
1079  KOKKOS_LAMBDA(const LO row) {
1080  auto rowView = localMatrix.row(row);
1081  auto length = rowView.length;
1082 
1083  boundaryNodes(row) = true;
1084  if (length > 2) {
1085  decltype(length) colID = 0;
1086  for (; colID < length; colID++)
1087  if ((rowView.colidx(colID) != row) &&
1088  (ATS::magnitude(rowView.value(colID)) > tol)) {
1089  if (!boundaryNodes(row))
1090  break;
1091  boundaryNodes(row) = false;
1092  }
1093  if (colID == length)
1094  boundaryNodes(row) = true;
1095  }
1096  });
1097  else
1098  Kokkos::parallel_for(
1099  "MueLu:Utils::DetectDirichletRows", range_type(0, numRows),
1100  KOKKOS_LAMBDA(const LO row) {
1101  auto rowView = localMatrix.row(row);
1102  auto length = rowView.length;
1103 
1104  boundaryNodes(row) = true;
1105  for (decltype(length) colID = 0; colID < length; colID++)
1106  if ((rowView.colidx(colID) != row) &&
1107  (ATS::magnitude(rowView.value(colID)) > tol)) {
1108  boundaryNodes(row) = false;
1109  break;
1110  }
1111  });
1112  }
1113  if constexpr (std::is_same<memory_space, typename NO::device_type::memory_space>::value)
1114  return boundaryNodes;
1115  else {
1116  Kokkos::View<bool*, memory_space> boundaryNodes2(Kokkos::ViewAllocateWithoutInitializing("boundaryNodes"), boundaryNodes.extent(0));
1117  Kokkos::deep_copy(boundaryNodes2, boundaryNodes);
1118  return boundaryNodes2;
1119  }
1120  // CAG: No idea why this is needed to avoid "warning: missing return statement at end of non-void function"
1121  Kokkos::View<bool*, memory_space> dummy("dummy", 0);
1122  return dummy;
1123 }
1124 
1125 template <class SC, class LO, class GO, class NO>
1126 Kokkos::View<bool*, typename NO::device_type::memory_space>
1129  const typename Teuchos::ScalarTraits<SC>::magnitudeType& tol,
1130  const bool count_twos_as_dirichlet) {
1131  return MueLu::DetectDirichletRows_kokkos<SC, LO, GO, NO, typename NO::device_type::memory_space>(A, tol, count_twos_as_dirichlet);
1132 }
1133 
1134 template <class SC, class LO, class GO, class NO>
1135 Kokkos::View<bool*, typename Kokkos::HostSpace>
1138  const typename Teuchos::ScalarTraits<SC>::magnitudeType& tol,
1139  const bool count_twos_as_dirichlet) {
1140  return MueLu::DetectDirichletRows_kokkos<SC, LO, GO, NO, typename Kokkos::HostSpace>(A, tol, count_twos_as_dirichlet);
1141 }
1142 
1143 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1147  // assume that there is no zero diagonal in matrix
1148  bHasZeroDiagonal = false;
1149 
1151  A.getLocalDiagCopy(*diagVec);
1152  Teuchos::ArrayRCP<const Scalar> diagVecData = diagVec->getData(0);
1153 
1154  LocalOrdinal numRows = A.getLocalNumRows();
1155  typedef Teuchos::ScalarTraits<Scalar> STS;
1156  ArrayRCP<bool> boundaryNodes(numRows, false);
1157  for (LocalOrdinal row = 0; row < numRows; row++) {
1160  A.getLocalRowView(row, indices, vals);
1161  size_t nnz = 0; // collect nonzeros in row (excluding the diagonal)
1162  bool bHasDiag = false;
1163  for (decltype(indices.size()) col = 0; col < indices.size(); col++) {
1164  if (indices[col] != row) {
1165  if (STS::magnitude(vals[col] / STS::magnitude(sqrt(STS::magnitude(diagVecData[row]) * STS::magnitude(diagVecData[col])))) > tol) {
1166  nnz++;
1167  }
1168  } else
1169  bHasDiag = true; // found a diagonal entry
1170  }
1171  if (bHasDiag == false)
1172  bHasZeroDiagonal = true; // we found at least one row without a diagonal
1173  else if (nnz == 0)
1174  boundaryNodes[row] = true;
1175  }
1176  return boundaryNodes;
1177 }
1178 
1179 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1184  const typename Teuchos::ScalarTraits<SC>::magnitudeType& tol,
1185  const bool count_twos_as_dirichlet) {
1186  using range_type = Kokkos::RangePolicy<LO, typename Node::execution_space>;
1187 
1188  auto dirichletRows = DetectDirichletRows_kokkos(A, tol, count_twos_as_dirichlet);
1189 
1190  LocalOrdinal numRows = A.getLocalNumRows();
1191  LocalOrdinal numVectors = RHS.getNumVectors();
1192  TEUCHOS_ASSERT_EQUALITY(numVectors, Teuchos::as<LocalOrdinal>(InitialGuess.getNumVectors()));
1193 #ifdef MUELU_DEBUG
1194  TEUCHOS_ASSERT(RHS.getMap()->isCompatible(InitialGuess.getMap()));
1195 #endif
1196 
1197  auto lclRHS = RHS.getDeviceLocalView(Xpetra::Access::ReadOnly);
1198  auto lclInitialGuess = InitialGuess.getDeviceLocalView(Xpetra::Access::ReadWrite);
1199 
1200  Kokkos::parallel_for(
1201  "MueLu:Utils::EnforceInitialCondition", range_type(0, numRows),
1202  KOKKOS_LAMBDA(const LO row) {
1203  if (dirichletRows(row)) {
1204  for (LocalOrdinal j = 0; j < numVectors; ++j)
1205  lclInitialGuess(row, j) = lclRHS(row, j);
1206  }
1207  });
1208 }
1209 
1210 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1213  Teuchos::ArrayRCP<bool> nonzeros) {
1214  TEUCHOS_ASSERT(vals.size() == nonzeros.size());
1215  typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType magnitudeType;
1216  const magnitudeType eps = 2.0 * Teuchos::ScalarTraits<magnitudeType>::eps();
1217  for (size_t i = 0; i < static_cast<size_t>(vals.size()); i++) {
1218  nonzeros[i] = (Teuchos::ScalarTraits<Scalar>::magnitude(vals[i]) > eps);
1219  }
1220 }
1221 
1222 // Find Nonzeros in a device view
1223 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1226  Kokkos::View<bool*, typename Node::device_type> nonzeros) {
1227  using ATS = Kokkos::ArithTraits<Scalar>;
1228  using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
1229  using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1230  TEUCHOS_ASSERT(vals.extent(0) == nonzeros.extent(0));
1231  const typename ATS::magnitudeType eps = 2.0 * impl_ATS::eps();
1232 
1233  Kokkos::parallel_for(
1234  "MueLu:Maxwell1::FindNonZeros", range_type(0, vals.extent(0)),
1235  KOKKOS_LAMBDA(const size_t i) {
1236  nonzeros(i) = (impl_ATS::magnitude(vals(i, 0)) > eps);
1237  });
1238 }
1239 
1240 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1243  const Teuchos::ArrayRCP<bool>& dirichletRows,
1244  Teuchos::ArrayRCP<bool> dirichletCols,
1245  Teuchos::ArrayRCP<bool> dirichletDomain) {
1250  TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == rowMap->getLocalNumElements());
1251  TEUCHOS_ASSERT(static_cast<size_t>(dirichletCols.size()) == colMap->getLocalNumElements());
1252  TEUCHOS_ASSERT(static_cast<size_t>(dirichletDomain.size()) == domMap->getLocalNumElements());
1254  // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0
1255  for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1256  if (dirichletRows[i]) {
1258  ArrayView<const Scalar> values;
1259  A.getLocalRowView(i, indices, values);
1260  for (size_t j = 0; j < static_cast<size_t>(indices.size()); j++)
1261  myColsToZero->replaceLocalValue(indices[j], 0, one);
1262  }
1263  }
1264 
1267  if (!importer.is_null()) {
1268  globalColsToZero = Xpetra::MultiVectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(domMap, 1, /*zeroOut=*/true);
1269  // export to domain map
1270  globalColsToZero->doExport(*myColsToZero, *importer, Xpetra::ADD);
1271  // import to column map
1272  myColsToZero->doImport(*globalColsToZero, *importer, Xpetra::INSERT);
1273  } else
1274  globalColsToZero = myColsToZero;
1275 
1276  FindNonZeros(globalColsToZero->getData(0), dirichletDomain);
1277  FindNonZeros(myColsToZero->getData(0), dirichletCols);
1278 }
1279 
1280 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1283  const Kokkos::View<bool*, typename Node::device_type>& dirichletRows,
1284  Kokkos::View<bool*, typename Node::device_type> dirichletCols,
1285  Kokkos::View<bool*, typename Node::device_type> dirichletDomain) {
1286  using ATS = Kokkos::ArithTraits<Scalar>;
1287  using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
1288  using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1292  TEUCHOS_ASSERT(dirichletRows.extent(0) == rowMap->getLocalNumElements());
1293  TEUCHOS_ASSERT(dirichletCols.extent(0) == colMap->getLocalNumElements());
1294  TEUCHOS_ASSERT(dirichletDomain.extent(0) == domMap->getLocalNumElements());
1296  // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0
1297  auto myColsToZeroView = myColsToZero->getDeviceLocalView(Xpetra::Access::ReadWrite);
1298  auto localMatrix = A.getLocalMatrixDevice();
1299  Kokkos::parallel_for(
1300  "MueLu:Maxwell1::DetectDirichletCols", range_type(0, rowMap->getLocalNumElements()),
1301  KOKKOS_LAMBDA(const LocalOrdinal row) {
1302  if (dirichletRows(row)) {
1303  auto rowView = localMatrix.row(row);
1304  auto length = rowView.length;
1305 
1306  for (decltype(length) colID = 0; colID < length; colID++)
1307  myColsToZeroView(rowView.colidx(colID), 0) = impl_ATS::one();
1308  }
1309  });
1310 
1313  if (!importer.is_null()) {
1314  globalColsToZero = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(domMap, /*zeroOut=*/true);
1315  // export to domain map
1316  globalColsToZero->doExport(*myColsToZero, *importer, Xpetra::ADD);
1317  // import to column map
1318  myColsToZero->doImport(*globalColsToZero, *importer, Xpetra::INSERT);
1319  } else
1320  globalColsToZero = myColsToZero;
1321  UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::FindNonZeros(globalColsToZero->getDeviceLocalView(Xpetra::Access::ReadOnly), dirichletDomain);
1322  UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::FindNonZeros(myColsToZero->getDeviceLocalView(Xpetra::Access::ReadOnly), dirichletCols);
1323 }
1324 
1325 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1328  typedef Teuchos::ScalarTraits<Scalar> STS;
1330  typedef Teuchos::ScalarTraits<MT> MTS;
1332  for (LocalOrdinal row = 0; row < Teuchos::as<LocalOrdinal>(rowmap->getLocalNumElements()); ++row) {
1333  size_t nnz = A.getNumEntriesInLocalRow(row);
1336  A.getLocalRowView(row, indices, vals);
1337 
1338  Scalar rowsum = STS::zero();
1339  Scalar diagval = STS::zero();
1340 
1341  for (LocalOrdinal colID = 0; colID < Teuchos::as<LocalOrdinal>(nnz); colID++) {
1342  LocalOrdinal col = indices[colID];
1343  if (row == col)
1344  diagval = vals[colID];
1345  rowsum += vals[colID];
1346  }
1347  // printf("A(%d,:) row_sum(point) = %6.4e\n",row,rowsum);
1348  if (rowSumTol < MTS::one() && STS::magnitude(rowsum) > STS::magnitude(diagval) * rowSumTol) {
1349  // printf("Row %d triggers rowsum\n",(int)row);
1350  dirichletRows[row] = true;
1351  }
1352  }
1353 }
1354 
1355 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1358  typedef Teuchos::ScalarTraits<Scalar> STS;
1360  typedef Teuchos::ScalarTraits<MT> MTS;
1361  RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> rowmap = A.getRowMap();
1362 
1363  TEUCHOS_TEST_FOR_EXCEPTION(!A.getColMap()->isSameAs(*BlockNumber.getMap()), std::runtime_error, "ApplyRowSumCriterion: BlockNumber must match's A's column map.");
1364 
1365  Teuchos::ArrayRCP<const LocalOrdinal> block_id = BlockNumber.getData(0);
1366  for (LocalOrdinal row = 0; row < Teuchos::as<LocalOrdinal>(rowmap->getLocalNumElements()); ++row) {
1367  size_t nnz = A.getNumEntriesInLocalRow(row);
1368  ArrayView<const LocalOrdinal> indices;
1369  ArrayView<const Scalar> vals;
1370  A.getLocalRowView(row, indices, vals);
1371 
1372  Scalar rowsum = STS::zero();
1373  Scalar diagval = STS::zero();
1374  for (LocalOrdinal colID = 0; colID < Teuchos::as<LocalOrdinal>(nnz); colID++) {
1375  LocalOrdinal col = indices[colID];
1376  if (row == col)
1377  diagval = vals[colID];
1378  if (block_id[row] == block_id[col])
1379  rowsum += vals[colID];
1380  }
1381 
1382  // printf("A(%d,:) row_sum(block) = %6.4e\n",row,rowsum);
1383  if (rowSumTol < MTS::one() && STS::magnitude(rowsum) > STS::magnitude(diagval) * rowSumTol) {
1384  // printf("Row %d triggers rowsum\n",(int)row);
1385  dirichletRows[row] = true;
1386  }
1387  }
1388 }
1389 
1390 // Applies rowsum criterion
1391 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class memory_space>
1393  const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1394  Kokkos::View<bool*, memory_space>& dirichletRows) {
1395  typedef Teuchos::ScalarTraits<Scalar> STS;
1397 
1398  auto dirichletRowsHost = Kokkos::create_mirror_view(dirichletRows);
1399  Kokkos::deep_copy(dirichletRowsHost, dirichletRows);
1400 
1401  for (LocalOrdinal row = 0; row < Teuchos::as<LocalOrdinal>(rowmap->getLocalNumElements()); ++row) {
1402  size_t nnz = A.getNumEntriesInLocalRow(row);
1405  A.getLocalRowView(row, indices, vals);
1406 
1407  Scalar rowsum = STS::zero();
1408  Scalar diagval = STS::zero();
1409  for (LocalOrdinal colID = 0; colID < Teuchos::as<LocalOrdinal>(nnz); colID++) {
1410  LocalOrdinal col = indices[colID];
1411  if (row == col)
1412  diagval = vals[colID];
1413  rowsum += vals[colID];
1414  }
1415  if (STS::real(rowsum) > STS::magnitude(diagval) * rowSumTol)
1416  dirichletRowsHost(row) = true;
1417  }
1418 
1419  Kokkos::deep_copy(dirichletRows, dirichletRowsHost);
1420 }
1421 
1422 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1425  const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1426  Kokkos::View<bool*, typename Node::device_type::memory_space>& dirichletRows) {
1427  MueLu::ApplyRowSumCriterion<Scalar, LocalOrdinal, GlobalOrdinal, Node, typename Node::device_type::memory_space>(A, rowSumTol, dirichletRows);
1428 }
1429 
1430 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1433  const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1434  Kokkos::View<bool*, Kokkos::HostSpace>& dirichletRows) {
1435  MueLu::ApplyRowSumCriterion<Scalar, LocalOrdinal, GlobalOrdinal, Node, Kokkos::HostSpace>(A, rowSumTol, dirichletRows);
1436 }
1437 
1438 // Applies rowsum criterion
1439 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class memory_space>
1442  const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1443  Kokkos::View<bool*, memory_space>& dirichletRows) {
1444  typedef Teuchos::ScalarTraits<Scalar> STS;
1446 
1447  TEUCHOS_TEST_FOR_EXCEPTION(!A.getColMap()->isSameAs(*BlockNumber.getMap()), std::runtime_error, "ApplyRowSumCriterion: BlockNumber must match's A's column map.");
1448 
1449  auto dirichletRowsHost = Kokkos::create_mirror_view(dirichletRows);
1450  Kokkos::deep_copy(dirichletRowsHost, dirichletRows);
1451 
1452  Teuchos::ArrayRCP<const LocalOrdinal> block_id = BlockNumber.getData(0);
1453  for (LocalOrdinal row = 0; row < Teuchos::as<LocalOrdinal>(rowmap->getLocalNumElements()); ++row) {
1454  size_t nnz = A.getNumEntriesInLocalRow(row);
1457  A.getLocalRowView(row, indices, vals);
1458 
1459  Scalar rowsum = STS::zero();
1460  Scalar diagval = STS::zero();
1461  for (LocalOrdinal colID = 0; colID < Teuchos::as<LocalOrdinal>(nnz); colID++) {
1462  LocalOrdinal col = indices[colID];
1463  if (row == col)
1464  diagval = vals[colID];
1465  if (block_id[row] == block_id[col])
1466  rowsum += vals[colID];
1467  }
1468  if (STS::real(rowsum) > STS::magnitude(diagval) * rowSumTol)
1469  dirichletRowsHost(row) = true;
1470  }
1471 
1472  Kokkos::deep_copy(dirichletRows, dirichletRowsHost);
1473 }
1474 
1475 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1479  const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1480  Kokkos::View<bool*, typename Node::device_type::memory_space>& dirichletRows) {
1481  MueLu::ApplyRowSumCriterion<Scalar, LocalOrdinal, GlobalOrdinal, Node, typename Node::device_type::memory_space>(A, BlockNumber, rowSumTol, dirichletRows);
1482 }
1483 
1484 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1488  const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1489  Kokkos::View<bool*, Kokkos::HostSpace>& dirichletRows) {
1490  MueLu::ApplyRowSumCriterion<Scalar, LocalOrdinal, GlobalOrdinal, Node, Kokkos::HostSpace>(A, BlockNumber, rowSumTol, dirichletRows);
1491 }
1492 
1493 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1497  const Teuchos::ArrayRCP<const bool>& dirichletRows) {
1503  myColsToZero->putScalar(zero);
1504  // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0
1505  for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1506  if (dirichletRows[i]) {
1509  A.getLocalRowView(i, indices, values);
1510  for (size_t j = 0; j < static_cast<size_t>(indices.size()); j++)
1511  myColsToZero->replaceLocalValue(indices[j], 0, one);
1512  }
1513  }
1514 
1516  globalColsToZero->putScalar(zero);
1518  // export to domain map
1519  globalColsToZero->doExport(*myColsToZero, *exporter, Xpetra::ADD);
1520  // import to column map
1521  myColsToZero->doImport(*globalColsToZero, *exporter, Xpetra::INSERT);
1522  Teuchos::ArrayRCP<const Scalar> myCols = myColsToZero->getData(0);
1523  Teuchos::ArrayRCP<bool> dirichletCols(colMap->getLocalNumElements(), true);
1525  for (size_t i = 0; i < colMap->getLocalNumElements(); i++) {
1526  dirichletCols[i] = Teuchos::ScalarTraits<Scalar>::magnitude(myCols[i]) > 2.0 * eps;
1527  }
1528  return dirichletCols;
1529 }
1530 
1531 template <class SC, class LO, class GO, class NO>
1532 Kokkos::View<bool*, typename NO::device_type>
1535  const Kokkos::View<const bool*, typename NO::device_type>& dirichletRows) {
1536  using ATS = Kokkos::ArithTraits<SC>;
1537  using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
1538  using range_type = Kokkos::RangePolicy<LO, typename NO::execution_space>;
1539 
1540  SC zero = ATS::zero();
1541 
1542  auto localMatrix = A.getLocalMatrixDevice();
1543  LO numRows = A.getLocalNumRows();
1544 
1546  Teuchos::RCP<const Xpetra::Map<LO, GO, NO>> colMap = A.getColMap();
1548  myColsToZero->putScalar(zero);
1549  auto myColsToZeroView = myColsToZero->getDeviceLocalView(Xpetra::Access::ReadWrite);
1550  // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0
1551  Kokkos::parallel_for(
1552  "MueLu:Utils::DetectDirichletCols1", range_type(0, numRows),
1553  KOKKOS_LAMBDA(const LO row) {
1554  if (dirichletRows(row)) {
1555  auto rowView = localMatrix.row(row);
1556  auto length = rowView.length;
1557 
1558  for (decltype(length) colID = 0; colID < length; colID++)
1559  myColsToZeroView(rowView.colidx(colID), 0) = impl_ATS::one();
1560  }
1561  });
1562 
1564  globalColsToZero->putScalar(zero);
1566  // export to domain map
1567  globalColsToZero->doExport(*myColsToZero, *exporter, Xpetra::ADD);
1568  // import to column map
1569  myColsToZero->doImport(*globalColsToZero, *exporter, Xpetra::INSERT);
1570 
1571  auto myCols = myColsToZero->getDeviceLocalView(Xpetra::Access::ReadOnly);
1572  size_t numColEntries = colMap->getLocalNumElements();
1573  Kokkos::View<bool*, typename NO::device_type> dirichletCols(Kokkos::ViewAllocateWithoutInitializing("dirichletCols"), numColEntries);
1574  const typename ATS::magnitudeType eps = 2.0 * ATS::eps();
1575 
1576  Kokkos::parallel_for(
1577  "MueLu:Utils::DetectDirichletCols2", range_type(0, numColEntries),
1578  KOKKOS_LAMBDA(const size_t i) {
1579  dirichletCols(i) = impl_ATS::magnitude(myCols(i, 0)) > eps;
1580  });
1581  return dirichletCols;
1582 }
1583 
1584 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1585 Scalar
1588  // We check only row maps. Column may be different. One would hope that they are the same, as we typically
1589  // calculate frobenius norm of the specified sparsity pattern with an updated matrix from the previous step,
1590  // but matrix addition, even when one is submatrix of the other, changes column map (though change may be as
1591  // simple as couple of elements swapped)
1592  TEUCHOS_TEST_FOR_EXCEPTION(!A.getRowMap()->isSameAs(*B.getRowMap()), Exceptions::Incompatible, "MueLu::CGSolver::Frobenius: row maps are incompatible");
1593  TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete() || !B.isFillComplete(), Exceptions::RuntimeError, "Matrices must be fill completed");
1594 
1595  const Map& AColMap = *A.getColMap();
1596  const Map& BColMap = *B.getColMap();
1597 
1600  size_t nnzA = 0, nnzB = 0;
1601 
1602  // We use a simple algorithm
1603  // for each row we fill valBAll array with the values in the corresponding row of B
1604  // as such, it serves as both sorted array and as storage, so we don't need to do a
1605  // tricky problem: "find a value in the row of B corresponding to the specific GID"
1606  // Once we do that, we translate LID of entries of row of A to LID of B, and multiply
1607  // corresponding entries.
1608  // The algorithm should be reasonably cheap, as it does not sort anything, provided
1609  // that getLocalElement and getGlobalElement functions are reasonably effective. It
1610  // *is* possible that the costs are hidden in those functions, but if maps are close
1611  // to linear maps, we should be fine
1612  Teuchos::Array<Scalar> valBAll(BColMap.getLocalNumElements());
1613 
1615  Scalar zero = Teuchos::ScalarTraits<Scalar>::zero(), f = zero, gf;
1616  size_t numRows = A.getLocalNumRows();
1617  for (size_t i = 0; i < numRows; i++) {
1618  A.getLocalRowView(i, indA, valA);
1619  B.getLocalRowView(i, indB, valB);
1620  nnzA = indA.size();
1621  nnzB = indB.size();
1622 
1623  // Set up array values
1624  for (size_t j = 0; j < nnzB; j++)
1625  valBAll[indB[j]] = valB[j];
1626 
1627  for (size_t j = 0; j < nnzA; j++) {
1628  // The cost of the whole Frobenius dot product function depends on the
1629  // cost of the getLocalElement and getGlobalElement functions here.
1630  LocalOrdinal ind = BColMap.getLocalElement(AColMap.getGlobalElement(indA[j]));
1631  if (ind != invalid)
1632  f += valBAll[ind] * valA[j];
1633  }
1634 
1635  // Clean up array values
1636  for (size_t j = 0; j < nnzB; j++)
1637  valBAll[indB[j]] = zero;
1638  }
1639 
1640  MueLu_sumAll(AColMap.getComm(), f, gf);
1641 
1642  return gf;
1643 }
1644 
1645 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1648  // Distribute the seeds evenly in [1,maxint-1]. This guarantees nothing
1649  // about where in random number stream we are, but avoids overflow situations
1650  // in parallel when multiplying by a PID. It would be better to use
1651  // a good parallel random number generator.
1652  double one = 1.0;
1653  int maxint = INT_MAX; //= 2^31-1 = 2147483647 for 32-bit integers
1654  int mySeed = Teuchos::as<int>((maxint - 1) * (one - (comm.getRank() + 1) / (comm.getSize() + one)));
1655  if (mySeed < 1 || mySeed == maxint) {
1656  std::ostringstream errStr;
1657  errStr << "Error detected with random seed = " << mySeed << ". It should be in the interval [1,2^31-2].";
1658  throw Exceptions::RuntimeError(errStr.str());
1659  }
1660  std::srand(mySeed);
1661  // For Tpetra, we could use Kokkos' random number generator here.
1663  // Epetra
1664  // MultiVector::Random() -> Epetra_Util::RandomDouble() -> Epetra_Utils::RandomInt()
1665  // Its own random number generator, based on Seed_. Seed_ is initialized in Epetra_Util constructor with std::rand()
1666  // So our setting std::srand() affects that too
1667 }
1668 
1669 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1672  std::vector<LocalOrdinal>& dirichletRows, bool count_twos_as_dirichlet) {
1674  dirichletRows.resize(0);
1675  for (size_t i = 0; i < A->getLocalNumRows(); i++) {
1678  A->getLocalRowView(i, indices, values);
1679  int nnz = 0;
1680  for (size_t j = 0; j < (size_t)indices.size(); j++) {
1682  nnz++;
1683  }
1684  }
1685  if (nnz == 1 || (count_twos_as_dirichlet && nnz == 2)) {
1686  dirichletRows.push_back(i);
1687  }
1688  }
1689 }
1690 
1691 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1694  const std::vector<LocalOrdinal>& dirichletRows) {
1695  RCP<const Map> Rmap = A->getRowMap();
1696  RCP<const Map> Cmap = A->getColMap();
1699 
1700  for (size_t i = 0; i < dirichletRows.size(); i++) {
1701  GlobalOrdinal row_gid = Rmap->getGlobalElement(dirichletRows[i]);
1702 
1705  A->getLocalRowView(dirichletRows[i], indices, values);
1706  // NOTE: This won't work with fancy node types.
1707  Scalar* valuesNC = const_cast<Scalar*>(values.getRawPtr());
1708  for (size_t j = 0; j < (size_t)indices.size(); j++) {
1709  if (Cmap->getGlobalElement(indices[j]) == row_gid)
1710  valuesNC[j] = one;
1711  else
1712  valuesNC[j] = zero;
1713  }
1714  }
1715 }
1716 
1717 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1720  const Teuchos::ArrayRCP<const bool>& dirichletRows) {
1722  RCP<const Map> domMap = A->getDomainMap();
1723  RCP<const Map> ranMap = A->getRangeMap();
1724  RCP<const Map> Rmap = A->getRowMap();
1725  RCP<const Map> Cmap = A->getColMap();
1726  TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == Rmap->getLocalNumElements());
1729  A->resumeFill();
1730  for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1731  if (dirichletRows[i]) {
1732  GlobalOrdinal row_gid = Rmap->getGlobalElement(i);
1733 
1736  A->getLocalRowView(i, indices, values);
1737 
1738  Teuchos::ArrayRCP<Scalar> valuesNC(values.size());
1739  for (size_t j = 0; j < (size_t)indices.size(); j++) {
1740  if (Cmap->getGlobalElement(indices[j]) == row_gid)
1741  valuesNC[j] = one;
1742  else
1743  valuesNC[j] = zero;
1744  }
1745  A->replaceLocalValues(i, indices, valuesNC());
1746  }
1747  }
1748  A->fillComplete(domMap, ranMap);
1749 }
1750 
1751 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1754  const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows) {
1756  using ATS = Kokkos::ArithTraits<Scalar>;
1757  using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
1758  using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1759 
1764 
1765  TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == Rmap->getLocalNumElements());
1766 
1767  auto localMatrix = A->getLocalMatrixDevice();
1768  auto localRmap = Rmap->getLocalMap();
1769  auto localCmap = Cmap->getLocalMap();
1770 
1771  Kokkos::parallel_for(
1772  "MueLu::Utils::ApplyOAZ", range_type(0, dirichletRows.extent(0)),
1773  KOKKOS_LAMBDA(const LocalOrdinal row) {
1774  if (dirichletRows(row)) {
1775  auto rowView = localMatrix.row(row);
1776  auto length = rowView.length;
1777  auto row_gid = localRmap.getGlobalElement(row);
1778  auto row_lid = localCmap.getLocalElement(row_gid);
1779 
1780  for (decltype(length) colID = 0; colID < length; colID++)
1781  if (rowView.colidx(colID) == row_lid)
1782  rowView.value(colID) = impl_ATS::one();
1783  else
1784  rowView.value(colID) = impl_ATS::zero();
1785  }
1786  });
1787 }
1788 
1789 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1792  const std::vector<LocalOrdinal>& dirichletRows,
1793  Scalar replaceWith) {
1794  for (size_t i = 0; i < dirichletRows.size(); i++) {
1797  A->getLocalRowView(dirichletRows[i], indices, values);
1798  // NOTE: This won't work with fancy node types.
1799  Scalar* valuesNC = const_cast<Scalar*>(values.getRawPtr());
1800  for (size_t j = 0; j < (size_t)indices.size(); j++)
1801  valuesNC[j] = replaceWith;
1802  }
1803 }
1804 
1805 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1808  const Teuchos::ArrayRCP<const bool>& dirichletRows,
1809  Scalar replaceWith) {
1810  TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == A->getRowMap()->getLocalNumElements());
1811  for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1812  if (dirichletRows[i]) {
1815  A->getLocalRowView(i, indices, values);
1816  // NOTE: This won't work with fancy node types.
1817  Scalar* valuesNC = const_cast<Scalar*>(values.getRawPtr());
1818  for (size_t j = 0; j < (size_t)indices.size(); j++)
1819  valuesNC[j] = replaceWith;
1820  }
1821  }
1822 }
1823 
1824 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1827  const Teuchos::ArrayRCP<const bool>& dirichletRows,
1828  Scalar replaceWith) {
1829  TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == X->getMap()->getLocalNumElements());
1830  for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1831  if (dirichletRows[i]) {
1832  for (size_t j = 0; j < X->getNumVectors(); j++)
1833  X->replaceLocalValue(i, j, replaceWith);
1834  }
1835  }
1836 }
1837 
1838 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1841  const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows,
1842  Scalar replaceWith) {
1843  using ATS = Kokkos::ArithTraits<Scalar>;
1844  using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1845 
1846  typename ATS::val_type impl_replaceWith = replaceWith;
1847 
1848  auto localMatrix = A->getLocalMatrixDevice();
1849  LocalOrdinal numRows = A->getLocalNumRows();
1850 
1851  Kokkos::parallel_for(
1852  "MueLu:Utils::ZeroDirichletRows", range_type(0, numRows),
1853  KOKKOS_LAMBDA(const LocalOrdinal row) {
1854  if (dirichletRows(row)) {
1855  auto rowView = localMatrix.row(row);
1856  auto length = rowView.length;
1857  for (decltype(length) colID = 0; colID < length; colID++)
1858  rowView.value(colID) = impl_replaceWith;
1859  }
1860  });
1861 }
1862 
1863 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1866  const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows,
1867  Scalar replaceWith) {
1868  using ATS = Kokkos::ArithTraits<Scalar>;
1869  using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1870 
1871  typename ATS::val_type impl_replaceWith = replaceWith;
1872 
1873  auto myCols = X->getDeviceLocalView(Xpetra::Access::ReadWrite);
1874  size_t numVecs = X->getNumVectors();
1875  Kokkos::parallel_for(
1876  "MueLu:Utils::ZeroDirichletRows_MV", range_type(0, dirichletRows.size()),
1877  KOKKOS_LAMBDA(const size_t i) {
1878  if (dirichletRows(i)) {
1879  for (size_t j = 0; j < numVecs; j++)
1880  myCols(i, j) = impl_replaceWith;
1881  }
1882  });
1883 }
1884 
1885 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1888  const Teuchos::ArrayRCP<const bool>& dirichletCols,
1889  Scalar replaceWith) {
1890  TEUCHOS_ASSERT(static_cast<size_t>(dirichletCols.size()) == A->getColMap()->getLocalNumElements());
1891  for (size_t i = 0; i < A->getLocalNumRows(); i++) {
1894  A->getLocalRowView(i, indices, values);
1895  // NOTE: This won't work with fancy node types.
1896  Scalar* valuesNC = const_cast<Scalar*>(values.getRawPtr());
1897  for (size_t j = 0; j < static_cast<size_t>(indices.size()); j++)
1898  if (dirichletCols[indices[j]])
1899  valuesNC[j] = replaceWith;
1900  }
1901 }
1902 
1903 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1906  const Kokkos::View<const bool*, typename Node::device_type>& dirichletCols,
1907  Scalar replaceWith) {
1908  using ATS = Kokkos::ArithTraits<Scalar>;
1909  using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1910 
1911  typename ATS::val_type impl_replaceWith = replaceWith;
1912 
1913  auto localMatrix = A->getLocalMatrixDevice();
1914  LocalOrdinal numRows = A->getLocalNumRows();
1915 
1916  Kokkos::parallel_for(
1917  "MueLu:Utils::ZeroDirichletCols", range_type(0, numRows),
1918  KOKKOS_LAMBDA(const LocalOrdinal row) {
1919  auto rowView = localMatrix.row(row);
1920  auto length = rowView.length;
1921  for (decltype(length) colID = 0; colID < length; colID++)
1922  if (dirichletCols(rowView.colidx(colID))) {
1923  rowView.value(colID) = impl_replaceWith;
1924  }
1925  });
1926 }
1927 
1928 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1933  // Make sure A's RowMap == DomainMap
1934  if (!A->getRowMap()->isSameAs(*A->getDomainMap())) {
1935  throw std::runtime_error("UtilitiesBase::FindDirichletRowsAndPropagateToCols row and domain maps must match.");
1936  }
1938  bool has_import = !importer.is_null();
1939 
1940  // Find the Dirichlet rows
1941  std::vector<LocalOrdinal> dirichletRows;
1942  FindDirichletRows(A, dirichletRows);
1943 
1944 #if 0
1945  printf("[%d] DirichletRow Ids = ",A->getRowMap()->getComm()->getRank());
1946  for(size_t i=0; i<(size_t) dirichletRows.size(); i++)
1947  printf("%d ",dirichletRows[i]);
1948  printf("\n");
1949  fflush(stdout);
1950 #endif
1951  // Allocate all as non-Dirichlet
1952  isDirichletRow = Xpetra::VectorFactory<int, LocalOrdinal, GlobalOrdinal, Node>::Build(A->getRowMap(), true);
1953  isDirichletCol = Xpetra::VectorFactory<int, LocalOrdinal, GlobalOrdinal, Node>::Build(A->getColMap(), true);
1954 
1955  {
1956  Teuchos::ArrayRCP<int> dr_rcp = isDirichletRow->getDataNonConst(0);
1957  Teuchos::ArrayView<int> dr = dr_rcp();
1958  Teuchos::ArrayRCP<int> dc_rcp = isDirichletCol->getDataNonConst(0);
1959  Teuchos::ArrayView<int> dc = dc_rcp();
1960  for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1961  dr[dirichletRows[i]] = 1;
1962  if (!has_import) dc[dirichletRows[i]] = 1;
1963  }
1964  }
1965 
1966  if (has_import)
1967  isDirichletCol->doImport(*isDirichletRow, *importer, Xpetra::CombineMode::ADD);
1968 }
1969 
1970 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1974  using ISC = typename Kokkos::ArithTraits<Scalar>::val_type;
1975  using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1976  using local_matrix_type = typename CrsMatrix::local_matrix_type;
1977  using values_type = typename local_matrix_type::values_type;
1978 
1979  const ISC ONE = Kokkos::ArithTraits<ISC>::one();
1980  const ISC ZERO = Kokkos::ArithTraits<ISC>::zero();
1981 
1982  // Copy the values array of the old matrix to a new array, replacing all the non-zeros with one
1983  auto localMatrix = original->getLocalMatrixDevice();
1984  TEUCHOS_TEST_FOR_EXCEPTION(!original->hasCrsGraph(), Exceptions::RuntimeError, "ReplaceNonZerosWithOnes: Cannot get CrsGraph");
1985  values_type new_values("values", localMatrix.nnz());
1986 
1987  Kokkos::parallel_for(
1988  "ReplaceNonZerosWithOnes", range_type(0, localMatrix.nnz()), KOKKOS_LAMBDA(const size_t i) {
1989  if (localMatrix.values(i) != ZERO)
1990  new_values(i) = ONE;
1991  else
1992  new_values(i) = ZERO;
1993  });
1994 
1995  // Build the new matrix
1996  RCP<Matrix> NewMatrix = Xpetra::MatrixFactory<SC, LO, GO, NO>::Build(original->getCrsGraph(), new_values);
1997  TEUCHOS_TEST_FOR_EXCEPTION(NewMatrix.is_null(), Exceptions::RuntimeError, "ReplaceNonZerosWithOnes: MatrixFactory::Build() did not return matrix");
1998  NewMatrix->fillComplete(original->getDomainMap(), original->getRangeMap());
1999  return NewMatrix;
2000 }
2001 
2002 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2008  Xpetra::UnderlyingLib lib = sourceBlockedMap.lib();
2009 
2010  // De-stride the map if we have to (might regret this later)
2011  RCP<const Map> fullMap = sourceBlockedMap.getMap();
2012  RCP<const Map> stridedMap = Teuchos::rcp_dynamic_cast<const Xpetra::StridedMap<LocalOrdinal, GlobalOrdinal, Node>>(fullMap);
2013  if (!stridedMap.is_null()) fullMap = stridedMap->getMap();
2014 
2015  // Initial sanity checking for map compatibil
2016  const size_t numSubMaps = sourceBlockedMap.getNumMaps();
2017  if (!Importer.getSourceMap()->isCompatible(*fullMap))
2018  throw std::runtime_error("GenerateBlockedTargetMap(): Map compatibility error");
2019 
2020  // Build an indicator vector
2022 
2023  for (size_t i = 0; i < numSubMaps; i++) {
2024  RCP<const Map> map = sourceBlockedMap.getMap(i);
2025 
2026  for (size_t j = 0; j < map->getLocalNumElements(); j++) {
2027  LocalOrdinal jj = fullMap->getLocalElement(map->getGlobalElement(j));
2028  block_ids->replaceLocalValue(jj, (int)i);
2029  }
2030  }
2031 
2032  // Get the block ids for the new map
2033  RCP<const Map> targetMap = Importer.getTargetMap();
2035  new_block_ids->doImport(*block_ids, Importer, Xpetra::CombineMode::ADD);
2036  Teuchos::ArrayRCP<const int> dataRCP = new_block_ids->getData(0);
2037  Teuchos::ArrayView<const int> data = dataRCP();
2038 
2039  // Get the GIDs for each subblock
2040  Teuchos::Array<Teuchos::Array<GlobalOrdinal>> elementsInSubMap(numSubMaps);
2041  for (size_t i = 0; i < targetMap->getLocalNumElements(); i++) {
2042  elementsInSubMap[data[i]].push_back(targetMap->getGlobalElement(i));
2043  }
2044 
2045  // Generate the new submaps
2046  std::vector<RCP<const Map>> subMaps(numSubMaps);
2047  for (size_t i = 0; i < numSubMaps; i++) {
2048  subMaps[i] = Xpetra::MapFactory<LocalOrdinal, GlobalOrdinal, Node>::Build(lib, Teuchos::OrdinalTraits<GlobalOrdinal>::invalid(), elementsInSubMap[i](), targetMap->getIndexBase(), targetMap->getComm());
2049  }
2050 
2051  // Build the BlockedMap
2052  return rcp(new BlockedMap(targetMap, subMaps));
2053 }
2054 
2055 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2058  ArrayView<const GlobalOrdinal> rowElements = rowMap.getLocalElementList();
2059  ArrayView<const GlobalOrdinal> colElements = colMap.getLocalElementList();
2060 
2061  const size_t numElements = rowElements.size();
2062 
2063  if (size_t(colElements.size()) < numElements)
2064  return false;
2065 
2066  bool goodMap = true;
2067  for (size_t i = 0; i < numElements; i++)
2068  if (rowElements[i] != colElements[i]) {
2069  goodMap = false;
2070  break;
2071  }
2072 
2073  return goodMap;
2074 }
2075 
2076 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2081  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
2082  using lno_nnz_view_t = typename local_graph_type::entries_type::non_const_type;
2083  using device = typename local_graph_type::device_type;
2084  using execution_space = typename local_matrix_type::execution_space;
2085  using ordinal_type = typename local_matrix_type::ordinal_type;
2086 
2087  local_graph_type localGraph = Op.getLocalMatrixDevice().graph;
2088 
2089  lno_nnz_view_t rcmOrder = KokkosGraph::Experimental::graph_rcm<device, typename local_graph_type::row_map_type, typename local_graph_type::entries_type, lno_nnz_view_t>(localGraph.row_map, localGraph.entries);
2090 
2093 
2094  // Copy out and reorder data
2095  auto view1D = Kokkos::subview(retval->getDeviceLocalView(Xpetra::Access::ReadWrite), Kokkos::ALL(), 0);
2096  Kokkos::parallel_for(
2097  "Utilities::ReverseCuthillMcKee",
2098  Kokkos::RangePolicy<ordinal_type, execution_space>(0, localGraph.numRows()),
2099  KOKKOS_LAMBDA(const ordinal_type rowIdx) {
2100  view1D(rcmOrder(rowIdx)) = rowIdx;
2101  });
2102  return retval;
2103 }
2104 
2105 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2110  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
2111  using lno_nnz_view_t = typename local_graph_type::entries_type::non_const_type;
2112  using device = typename local_graph_type::device_type;
2113  using execution_space = typename local_matrix_type::execution_space;
2114  using ordinal_type = typename local_matrix_type::ordinal_type;
2115 
2116  local_graph_type localGraph = Op.getLocalMatrixDevice().graph;
2117  LocalOrdinal numRows = localGraph.numRows();
2118 
2119  lno_nnz_view_t rcmOrder = KokkosGraph::Experimental::graph_rcm<device, typename local_graph_type::row_map_type, typename local_graph_type::entries_type, lno_nnz_view_t>(localGraph.row_map, localGraph.entries);
2120 
2123 
2124  // Copy out data
2125  auto view1D = Kokkos::subview(retval->getDeviceLocalView(Xpetra::Access::ReadWrite), Kokkos::ALL(), 0);
2126  // Since KokkosKernels produced RCM, also reverse the order of the view to get CM
2127  Kokkos::parallel_for(
2128  "Utilities::ReverseCuthillMcKee",
2129  Kokkos::RangePolicy<ordinal_type, execution_space>(0, numRows),
2130  KOKKOS_LAMBDA(const ordinal_type rowIdx) {
2131  view1D(rcmOrder(numRows - 1 - rowIdx)) = rowIdx;
2132  });
2133  return retval;
2134 }
2135 
2136 } // namespace MueLu
2137 
2138 #define MUELU_UTILITIESBASE_SHORT
2139 #endif // MUELU_UTILITIESBASE_DEF_HPP
2140 
2141 // LocalWords: LocalOrdinal
static void FindNonZeros(const Teuchos::ArrayRCP< const Scalar > vals, Teuchos::ArrayRCP< bool > nonzeros)
Find non-zero values in an ArrayRCP Compares the value to 2 * machine epsilon.
static RCP< Export< LocalOrdinal, GlobalOrdinal, Node > > Build(const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &source, const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &target)
static RCP< Vector > GetMatrixDiagonal(const Matrix &A)
Extract Matrix Diagonal.
static void ApplyOAZToMatrixRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node >> &A, const std::vector< LocalOrdinal > &dirichletRows)
static Teuchos::RCP< Vector > GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Return vector containing: max_{i=k}(-a_ik), for each for i in the matrix.
static void ZeroDirichletCols(Teuchos::RCP< Matrix > &A, const Teuchos::ArrayRCP< const bool > &dirichletCols, Scalar replaceWith=Teuchos::ScalarTraits< Scalar >::zero())
static void DetectDirichletColsAndDomains(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Teuchos::ArrayRCP< bool > &dirichletRows, Teuchos::ArrayRCP< bool > dirichletCols, Teuchos::ArrayRCP< bool > dirichletDomain)
Detects Dirichlet columns &amp; domains from a list of Dirichlet rows.
#define MueLu_sumAll(rcpComm, in, out)
Array< T > & append(const T &x)
virtual int getSize() const =0
MueLu::DefaultLocalOrdinal LocalOrdinal
void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType rowSumTol, Kokkos::View< bool *, memory_space > &dirichletRows)
virtual LO getBlockSize() const override
static bool MapsAreNested(const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &rowMap, const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &colMap)
static RCP< Xpetra::Vector< LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node > > ReverseCuthillMcKee(const Matrix &Op)
static void FindDirichletRowsAndPropagateToCols(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node >> &A, Teuchos::RCP< Xpetra::Vector< int, LocalOrdinal, GlobalOrdinal, Node >> &isDirichletRow, Teuchos::RCP< Xpetra::Vector< int, LocalOrdinal, GlobalOrdinal, Node >> &isDirichletCol)
virtual int getRank() const =0
static void ApplyRowSumCriterionHost(const Matrix &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType rowSumTol, Kokkos::View< bool *, Kokkos::HostSpace > &dirichletRows)
static Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Build(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node >> &map, size_t NumVectors, bool zeroOut=true)
static magnitudeType eps()
Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > removeSmallEntries(Teuchos::RCP< Xpetra::CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node >> &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType threshold, const bool keepDiagonal)
static Scalar PowerMethod(const Matrix &A, bool scaleByDiag=true, LocalOrdinal niters=10, Magnitude tolerance=1e-2, bool verbose=false, unsigned int seed=123)
Power method.
static void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude rowSumTol, Teuchos::ArrayRCP< bool > &dirichletRows)
Apply Rowsum Criterion.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
static RCP< Vector > GetMatrixOverlappedDeletedRowsum(const Matrix &A)
Extract Overlapped Matrix Deleted Rowsum.
virtual const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
static RCP< Xpetra::CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > GetThresholdedGraph(const RCP< Matrix > &A, const Magnitude threshold, const GlobalOrdinal expectedNNZperRow=-1)
Threshold a graph.
size_type size() const
virtual void resumeFill(const RCP< ParameterList > &params=null)=0
LocalOrdinal LO
static void ZeroDirichletRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node >> &A, const std::vector< LocalOrdinal > &dirichletRows, Scalar replaceWith=Teuchos::ScalarTraits< Scalar >::zero())
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > ReplaceNonZerosWithOnes(const RCP< Matrix > &original)
Creates a copy of a matrix where the non-zero entries are replaced by ones.
size_t getLocalNumRows() const override
size_type size() const
Exception throws to report incompatible objects (like maps).
static void SetRandomSeed(const Teuchos::Comm< int > &comm)
Set seed for random number generator.
MueLu::DefaultNode Node
static RCP< Xpetra::Vector< LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node > > CuthillMcKee(const Matrix &Op)
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getTargetMap() const =0
static RCP< Time > getNewTimer(const std::string &name)
void resize(const size_type n, const T &val=T())
virtual void fillComplete(const RCP< const Map > &domainMap, const RCP< const Map > &rangeMap, const RCP< ParameterList > &params=null)=0
virtual void getLocalDiagCopy(Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &diag) const =0
static RCP< MultiVector > Residual(const Xpetra::Operator< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, const MultiVector &X, const MultiVector &RHS)
static Teuchos::ArrayRCP< const bool > DetectDirichletCols(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Teuchos::ArrayRCP< const bool > &dirichletRows)
Detect Dirichlet columns based on Dirichlet rows.
virtual void getLocalRowView(LocalOrdinal LocalRow, ArrayView< const LocalOrdinal > &indices, ArrayView< const Scalar > &values) const =0
static void FindDirichletRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node >> &A, std::vector< LocalOrdinal > &dirichletRows, bool count_twos_as_dirichlet=false)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultScalar Scalar
Kokkos::View< bool *, memory_space > DetectDirichletRows_kokkos(const Xpetra::Matrix< SC, LO, GO, NO > &A, const typename Teuchos::ScalarTraits< SC >::magnitudeType &tol, const bool count_twos_as_dirichlet)
MueLu::DefaultGlobalOrdinal GlobalOrdinal
static RCP< CrsMatrixWrap > GetThresholdedMatrix(const RCP< Matrix > &Ain, const Magnitude threshold, const bool keepDiagonal=true, const GlobalOrdinal expectedNNZperRow=-1)
Threshold a matrix.
static Teuchos::RCP< Map< LocalOrdinal, GlobalOrdinal, Node > > Build(UnderlyingLib lib, global_size_t numGlobalElements, GlobalOrdinal indexBase, const Teuchos::RCP< const Teuchos::Comm< int >> &comm, LocalGlobal lg=Xpetra::GloballyDistributed)
#define I(i, j)
static Teuchos::ArrayRCP< const bool > DetectDirichletRows(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude &tol=Teuchos::ScalarTraits< Magnitude >::zero(), bool count_twos_as_dirichlet=false)
Detect Dirichlet rows.
virtual bool isFillComplete() const =0
static RCP< Xpetra::Vector< Magnitude, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixOverlappedAbsDeletedRowsum(const Matrix &A)
size_t getNumMaps() const
virtual void replaceLocalValues(LocalOrdinal localRow, const ArrayView< const LocalOrdinal > &cols, const ArrayView< const Scalar > &vals)=0
static Teuchos::ArrayRCP< Scalar > GetMatrixDiagonal_arcp(const Matrix &A)
Extract Matrix Diagonal.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getMap() const =0
static RCP< const Xpetra::BlockedMap< LocalOrdinal, GlobalOrdinal, Node > > GeneratedBlockedTargetMap(const Xpetra::BlockedMap< LocalOrdinal, GlobalOrdinal, Node > &sourceBlockedMap, const Xpetra::Import< LocalOrdinal, GlobalOrdinal, Node > &Importer)
static RCP< Vector > GetMatrixOverlappedDiagonal(const Matrix &A)
Extract Overlapped Matrix Diagonal.
T * getRawPtr() const
Teuchos::ScalarTraits< Scalar >::magnitudeType Magnitude
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
static RCP< Vector > GetMatrixDiagonalInverse(const Matrix &A, Magnitude tol=Teuchos::ScalarTraits< Scalar >::eps()*100, Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero(), const bool doLumped=false)
Extract Matrix Diagonal.
static void seedrandom(unsigned int s)
static Teuchos::ArrayRCP< const bool > DetectDirichletRowsExt(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, bool &bHasZeroDiagonal, const Magnitude &tol=Teuchos::ScalarTraits< Scalar >::zero())
Detect Dirichlet rows (extended version)
static RCP< Vector > Build(const Teuchos::RCP< const Map > &map, bool zeroOut=true)
const RCP< const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > getMap(size_t i, bool bThyraMode=false) const
virtual UnderlyingLib lib() const
static magnitudeType magnitude(T a)
TransListIter iter
void push_back(const value_type &x)
virtual RCP< const CrsGraph > getCrsGraph() const =0
static Teuchos::Array< Magnitude > ResidualNorm(const Xpetra::Operator< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, const MultiVector &X, const MultiVector &RHS)
static void EnforceInitialCondition(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &RHS, Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &InitialGuess, const Magnitude &tol=Teuchos::ScalarTraits< Magnitude >::zero(), const bool count_twos_as_dirichlet=false)
Detect Dirichlet rows and copy values from RHS multivector to InitialGuess for Dirichlet rows...
int length() const
size_type size() const
Scalar SC
impl_scalar_type_dualview::t_dev::const_type getValuesDevice(const LO &lclRow) const
virtual Teuchos::RCP< const Map > getRangeMap() const =0
static RCP< Teuchos::FancyOStream > MakeFancy(std::ostream &os)
static RCP< Matrix > Build(const RCP< const Map > &rowMap, size_t maxNumEntriesPerRow, Xpetra::ProfileType pftype=Xpetra::DynamicProfile)
void residual(const Operator< SC, LO, GO, NO > &Aop, const MultiVector< SC, LO, GO, NO > &X_in, const MultiVector< SC, LO, GO, NO > &B_in, MultiVector< SC, LO, GO, NO > &R_in)
Exception throws to report errors in the internal logical of the program.
static Scalar Frobenius(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B)
Frobenius inner product of two matrices.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getSourceMap() const =0
#define TEUCHOS_ASSERT(assertion_test)
#define TEUCHOS_ASSERT_EQUALITY(val1, val2)
static Teuchos::RCP< Vector > GetInverse(Teuchos::RCP< const Vector > v, Magnitude tol=Teuchos::ScalarTraits< Scalar >::eps()*100, Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero())
Return vector containing inverse of input vector.
static Kokkos::View< bool *, typename Kokkos::HostSpace > DetectDirichletRows_kokkos_host(const Matrix &A, const Magnitude &tol=Teuchos::ScalarTraits< typename Teuchos::ScalarTraits< SC >::magnitudeType >::zero(), const bool count_twos_as_dirichlet=false)
virtual size_t getNumVectors() const =0
static Teuchos::ScalarTraits< Scalar >::magnitudeType Distance2(const Teuchos::Array< Teuchos::ArrayRCP< const Scalar >> &v, LocalOrdinal i0, LocalOrdinal i1)
Squared distance between two rows in a multivector.
static Teuchos::RCP< Vector > GetLumpedMatrixDiagonal(Matrix const &A, const bool doReciprocal=false, Magnitude tol=Teuchos::ScalarTraits< Scalar >::magnitude(Teuchos::ScalarTraits< Scalar >::zero()), Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero(), const bool replaceSingleEntryRowWithZero=false, const bool useAverageAbsDiagVal=false)
Extract Matrix Diagonal of lumped matrix.
static Kokkos::View< bool *, typename NO::device_type::memory_space > DetectDirichletRows_kokkos(const Matrix &A, const Magnitude &tol=Teuchos::ScalarTraits< typename Teuchos::ScalarTraits< SC >::magnitudeType >::zero(), const bool count_twos_as_dirichlet=false)
Detect Dirichlet rows.
static RCP< Matrix > Crs2Op(RCP< CrsMatrix > Op)
virtual Teuchos::ArrayRCP< const Scalar > getData(size_t j) const =0
virtual Teuchos::RCP< const Map > getDomainMap() const =0
static RCP< Import< LocalOrdinal, GlobalOrdinal, Node > > Build(const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &source, const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &target, const Teuchos::RCP< Teuchos::ParameterList > &plist=Teuchos::null)
bool is_null() const