MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_UtilitiesBase_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_UTILITIESBASE_DEF_HPP
11 #define MUELU_UTILITIESBASE_DEF_HPP
12 
13 #include "MueLu_ConfigDefs.hpp"
14 
16 
17 #include <Kokkos_Core.hpp>
18 #include <KokkosSparse_CrsMatrix.hpp>
19 #include <KokkosSparse_getDiagCopy.hpp>
20 
21 #include <Xpetra_BlockedVector.hpp>
22 #include <Xpetra_BlockedMap.hpp>
23 #include <Xpetra_BlockedMultiVector.hpp>
24 #include <Xpetra_ExportFactory.hpp>
25 
26 #include <Xpetra_Import.hpp>
27 #include <Xpetra_ImportFactory.hpp>
28 #include <Xpetra_MatrixMatrix.hpp>
29 #include <Xpetra_CrsGraph.hpp>
31 #include <Xpetra_CrsMatrixWrap.hpp>
32 #include <Xpetra_StridedMap.hpp>
33 
34 #include "MueLu_Exceptions.hpp"
36 
37 #include <KokkosKernels_Handle.hpp>
38 #include <KokkosGraph_RCM.hpp>
39 
40 namespace MueLu {
41 
42 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
43 RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
46  if (Op.is_null())
47  return Teuchos::null;
48  return rcp(new CrsMatrixWrap(Op));
49 }
50 
51 template <class Scalar,
52  class LocalOrdinal,
53  class GlobalOrdinal,
54  class Node>
57  const typename Teuchos::ScalarTraits<Scalar>::magnitudeType threshold,
58  const bool keepDiagonal) {
60  using row_ptr_type = typename crs_matrix::local_graph_type::row_map_type::non_const_type;
61  using col_idx_type = typename crs_matrix::local_graph_type::entries_type::non_const_type;
62  using vals_type = typename crs_matrix::local_matrix_type::values_type;
63  using execution_space = typename crs_matrix::local_matrix_type::execution_space;
64 
65  using ATS = Kokkos::ArithTraits<Scalar>;
66  using impl_SC = typename ATS::val_type;
67  using impl_ATS = Kokkos::ArithTraits<impl_SC>;
68 
69  auto lclA = A->getLocalMatrixDevice();
70 
71  auto rowptr = row_ptr_type("rowptr", lclA.numRows() + 1);
72  col_idx_type idx;
73  vals_type vals;
74  LocalOrdinal nnz;
75 
76  if (keepDiagonal) {
77  auto lclRowMap = A->getRowMap()->getLocalMap();
78  auto lclColMap = A->getColMap()->getLocalMap();
79  Kokkos::parallel_scan(
80  "removeSmallEntries::rowptr",
81  Kokkos::RangePolicy<LocalOrdinal, execution_space>(0, lclA.numRows()),
82  KOKKOS_LAMBDA(const LocalOrdinal rlid, LocalOrdinal& partial_nnz, bool is_final) {
83  auto row = lclA.row(rlid);
84  auto rowInCol = lclColMap.getLocalElement(lclRowMap.getGlobalElement(rlid));
85  for (LocalOrdinal k = 0; k < row.length; ++k) {
86  if ((impl_ATS::magnitude(row.value(k)) > threshold) || (row.colidx(k) == rowInCol)) {
87  partial_nnz += 1;
88  }
89  }
90  if (is_final)
91  rowptr(rlid + 1) = partial_nnz;
92  },
93  nnz);
94 
95  idx = col_idx_type("idx", nnz);
96  vals = vals_type("vals", nnz);
97 
98  Kokkos::parallel_for(
99  "removeSmallEntries::indicesValues",
100  Kokkos::RangePolicy<LocalOrdinal, execution_space>(0, lclA.numRows()),
101  KOKKOS_LAMBDA(const LocalOrdinal rlid) {
102  auto row = lclA.row(rlid);
103  auto rowInCol = lclColMap.getLocalElement(lclRowMap.getGlobalElement(rlid));
104  auto I = rowptr(rlid);
105  for (LocalOrdinal k = 0; k < row.length; ++k) {
106  if ((impl_ATS::magnitude(row.value(k)) > threshold) || (row.colidx(k) == rowInCol)) {
107  idx(I) = row.colidx(k);
108  vals(I) = row.value(k);
109  I += 1;
110  }
111  }
112  });
113 
114  Kokkos::fence();
115  } else {
116  Kokkos::parallel_scan(
117  "removeSmallEntries::rowptr",
118  Kokkos::RangePolicy<LocalOrdinal, execution_space>(0, lclA.numRows()),
119  KOKKOS_LAMBDA(const LocalOrdinal rlid, LocalOrdinal& partial_nnz, bool is_final) {
120  auto row = lclA.row(rlid);
121  for (LocalOrdinal k = 0; k < row.length; ++k) {
122  if (impl_ATS::magnitude(row.value(k)) > threshold) {
123  partial_nnz += 1;
124  }
125  }
126  if (is_final)
127  rowptr(rlid + 1) = partial_nnz;
128  },
129  nnz);
130 
131  idx = col_idx_type("idx", nnz);
132  vals = vals_type("vals", nnz);
133 
134  Kokkos::parallel_for(
135  "removeSmallEntries::indicesValues",
136  Kokkos::RangePolicy<LocalOrdinal, execution_space>(0, lclA.numRows()),
137  KOKKOS_LAMBDA(const LocalOrdinal rlid) {
138  auto row = lclA.row(rlid);
139  auto I = rowptr(rlid);
140  for (LocalOrdinal k = 0; k < row.length; ++k) {
141  if (impl_ATS::magnitude(row.value(k)) > threshold) {
142  idx(I) = row.colidx(k);
143  vals(I) = row.value(k);
144  I += 1;
145  }
146  }
147  });
148 
149  Kokkos::fence();
150  }
151 
152  auto lclNewA = typename crs_matrix::local_matrix_type("thresholdedMatrix", lclA.numRows(), lclA.numCols(), nnz, vals, rowptr, idx);
153  auto newA = Xpetra::MatrixFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(lclNewA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap());
154 
155  return newA;
156 }
157 
158 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
159 RCP<Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node>>
161  GetThresholdedMatrix(const RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>& Ain, const typename Teuchos::ScalarTraits<Scalar>::magnitudeType threshold, const bool keepDiagonal, const GlobalOrdinal expectedNNZperRow) {
162  auto crsWrap = rcp_dynamic_cast<CrsMatrixWrap>(Ain);
163  if (!crsWrap.is_null()) {
164  auto crsMat = crsWrap->getCrsMatrix();
165  auto filteredMat = removeSmallEntries(crsMat, threshold, keepDiagonal);
166  return rcp_static_cast<CrsMatrixWrap>(filteredMat);
167  }
168 
169  RCP<const Map> rowmap = Ain->getRowMap();
170  RCP<const Map> colmap = Ain->getColMap();
171  RCP<CrsMatrixWrap> Aout = rcp(new CrsMatrixWrap(rowmap, expectedNNZperRow <= 0 ? Ain->getGlobalMaxNumRowEntries() : expectedNNZperRow));
172  // loop over local rows
173  for (size_t row = 0; row < Ain->getLocalNumRows(); row++) {
174  size_t nnz = Ain->getNumEntriesInLocalRow(row);
175 
178  Ain->getLocalRowView(row, indices, vals);
179 
180  TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(indices.size()) != nnz, Exceptions::RuntimeError, "MueLu::ThresholdAFilterFactory::Build: number of nonzeros not equal to number of indices? Error.");
181 
184  size_t nNonzeros = 0;
185  if (keepDiagonal) {
186  GlobalOrdinal glbRow = rowmap->getGlobalElement(row);
187  LocalOrdinal lclColIdx = colmap->getLocalElement(glbRow);
188  for (size_t i = 0; i < (size_t)indices.size(); i++) {
189  if (Teuchos::ScalarTraits<Scalar>::magnitude(vals[i]) > Teuchos::ScalarTraits<Scalar>::magnitude(threshold) || indices[i] == lclColIdx) {
190  indout[nNonzeros] = colmap->getGlobalElement(indices[i]); // LID -> GID (column)
191  valout[nNonzeros] = vals[i];
192  nNonzeros++;
193  }
194  }
195  } else
196  for (size_t i = 0; i < (size_t)indices.size(); i++) {
198  indout[nNonzeros] = colmap->getGlobalElement(indices[i]); // LID -> GID (column)
199  valout[nNonzeros] = vals[i];
200  nNonzeros++;
201  }
202  }
203 
204  indout.resize(nNonzeros);
205  valout.resize(nNonzeros);
206 
207  Aout->insertGlobalValues(Ain->getRowMap()->getGlobalElement(row), indout.view(0, indout.size()), valout.view(0, valout.size()));
208  }
209  Aout->fillComplete(Ain->getDomainMap(), Ain->getRangeMap());
210 
211  return Aout;
212 }
213 
214 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
218  using STS = Teuchos::ScalarTraits<Scalar>;
219  RCP<CrsGraph> sparsityPattern = CrsGraphFactory::Build(A->getRowMap(), expectedNNZperRow <= 0 ? A->getGlobalMaxNumRowEntries() : expectedNNZperRow);
220 
221  RCP<Vector> diag = GetMatrixOverlappedDiagonal(*A);
222  ArrayRCP<const Scalar> D = diag->getData(0);
223 
224  for (size_t row = 0; row < A->getLocalNumRows(); row++) {
227  A->getLocalRowView(row, indices, vals);
228 
229  GlobalOrdinal globalRow = A->getRowMap()->getGlobalElement(row);
230  LocalOrdinal col = A->getColMap()->getLocalElement(globalRow);
231 
232  const Scalar Dk = STS::magnitude(D[col]) > 0.0 ? STS::magnitude(D[col]) : 1.0;
233  Array<GlobalOrdinal> indicesNew;
234 
235  for (size_t i = 0; i < size_t(indices.size()); i++)
236  // keep diagonal per default
237  if (col == indices[i] || STS::magnitude(STS::squareroot(Dk) * vals[i] * STS::squareroot(Dk)) > STS::magnitude(threshold))
238  indicesNew.append(A->getColMap()->getGlobalElement(indices[i]));
239 
240  sparsityPattern->insertGlobalIndices(globalRow, ArrayView<const GlobalOrdinal>(indicesNew.data(), indicesNew.length()));
241  }
242  sparsityPattern->fillComplete();
243 
244  return sparsityPattern;
245 }
246 
247 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
251  size_t numRows = A.getRowMap()->getLocalNumElements();
252  Teuchos::ArrayRCP<Scalar> diag(numRows);
255  for (size_t i = 0; i < numRows; ++i) {
256  A.getLocalRowView(i, cols, vals);
257  LocalOrdinal j = 0;
258  for (; j < cols.size(); ++j) {
259  if (Teuchos::as<size_t>(cols[j]) == i) {
260  diag[i] = vals[j];
261  break;
262  }
263  }
264  if (j == cols.size()) {
265  // Diagonal entry is absent
267  }
268  }
269  return diag;
270 }
271 
272 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
275  GetMatrixDiagonal(const Matrix& A) {
276  const auto rowMap = A.getRowMap();
278 
279  const CrsMatrixWrap* crsOp = dynamic_cast<const CrsMatrixWrap*>(&A);
280  if ((crsOp != NULL) && (rowMap->lib() == Xpetra::UseTpetra)) {
281  using device_type = typename CrsGraph::device_type;
282  Kokkos::View<size_t*, device_type> offsets("offsets", rowMap->getLocalNumElements());
283  crsOp->getCrsGraph()->getLocalDiagOffsets(offsets);
284  crsOp->getCrsMatrix()->getLocalDiagCopy(*diag, offsets);
285  } else {
286  A.getLocalDiagCopy(*diag);
287  }
288 
289  return diag;
290 }
291 
292 // template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
293 // RCP<Xpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
294 // UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
295 // GetMatrixDiagonalInverse(const Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> & A, Magnitude tol, Scalar valReplacement) {
296 // Teuchos::TimeMonitor MM = *Teuchos::TimeMonitor::getNewTimer("UtilitiesBase::GetMatrixDiagonalInverse");
297 
298 // RCP<const Map> rowMap = A.getRowMap();
299 // RCP<Vector> diag = Xpetra::VectorFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Build(rowMap,true);
300 
301 // A.getLocalDiagCopy(*diag);
302 
303 // RCP<Vector> inv = MueLu::UtilitiesBase<Scalar,LocalOrdinal,GlobalOrdinal,Node>::GetInverse(diag, tol, valReplacement);
304 
305 // return inv;
306 // }
307 
308 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
313  Scalar valReplacement,
314  const bool doLumped) {
315  Teuchos::TimeMonitor MM = *Teuchos::TimeMonitor::getNewTimer("Utilities::GetMatrixDiagonalInverse");
316 
317  RCP<const BlockedCrsMatrix> bA = Teuchos::rcp_dynamic_cast<const BlockedCrsMatrix>(rcpFromRef(A));
318  if (!bA.is_null()) {
319  RCP<const Map> rowMap = A.getRowMap();
321  A.getLocalDiagCopy(*diag);
323  return inv;
324  }
325 
326  // Some useful type definitions
327  using local_matrix_type = typename Matrix::local_matrix_type;
328  // using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
329  using value_type = typename local_matrix_type::value_type;
330  using values_type = typename local_matrix_type::values_type;
331  using scalar_type = typename values_type::non_const_value_type;
332  using ordinal_type = typename local_matrix_type::ordinal_type;
333  using execution_space = typename local_matrix_type::execution_space;
334  // using memory_space = typename local_matrix_type::memory_space;
335  // Be careful with this one, if using Kokkos::ArithTraits<Scalar>
336  // you are likely to run into errors when handling std::complex<>
337  // a good way to work around that is to use the following:
338  // using KAT = Kokkos::ArithTraits<Kokkos::ArithTraits<Scalar>::val_type> >
339  // here we have: value_type = Kokkos::ArithTraits<Scalar>::val_type
340  using KAT = Kokkos::ArithTraits<value_type>;
341 
342  // Get/Create distributed objects
343  RCP<const Map> rowMap = A.getRowMap();
344  RCP<Vector> diag = VectorFactory::Build(rowMap, false);
345 
346  // Now generate local objects
347  local_matrix_type localMatrix = A.getLocalMatrixDevice();
348  auto diagVals = diag->getLocalViewDevice(Xpetra::Access::ReadWrite);
349 
350  ordinal_type numRows = localMatrix.graph.numRows();
351 
352  scalar_type valReplacement_dev = valReplacement;
353 
354  // Note: 2019-11-21, LBV
355  // This could be implemented with a TeamPolicy over the rows
356  // and a TeamVectorRange over the entries in a row if performance
357  // becomes more important here.
358  if (!doLumped)
359  Kokkos::parallel_for(
360  "Utilities::GetMatrixDiagonalInverse",
361  Kokkos::RangePolicy<ordinal_type, execution_space>(0, numRows),
362  KOKKOS_LAMBDA(const ordinal_type rowIdx) {
363  bool foundDiagEntry = false;
364  auto myRow = localMatrix.rowConst(rowIdx);
365  for (ordinal_type entryIdx = 0; entryIdx < myRow.length; ++entryIdx) {
366  if (myRow.colidx(entryIdx) == rowIdx) {
367  foundDiagEntry = true;
368  if (KAT::magnitude(myRow.value(entryIdx)) > KAT::magnitude(tol)) {
369  diagVals(rowIdx, 0) = KAT::one() / myRow.value(entryIdx);
370  } else {
371  diagVals(rowIdx, 0) = valReplacement_dev;
372  }
373  break;
374  }
375  }
376 
377  if (!foundDiagEntry) {
378  diagVals(rowIdx, 0) = KAT::zero();
379  }
380  });
381  else
382  Kokkos::parallel_for(
383  "Utilities::GetMatrixDiagonalInverse",
384  Kokkos::RangePolicy<ordinal_type, execution_space>(0, numRows),
385  KOKKOS_LAMBDA(const ordinal_type rowIdx) {
386  auto myRow = localMatrix.rowConst(rowIdx);
387  for (ordinal_type entryIdx = 0; entryIdx < myRow.length; ++entryIdx) {
388  diagVals(rowIdx, 0) += KAT::magnitude(myRow.value(entryIdx));
389  }
390  if (KAT::magnitude(diagVals(rowIdx, 0)) > KAT::magnitude(tol))
391  diagVals(rowIdx, 0) = KAT::one() / diagVals(rowIdx, 0);
392  else
393  diagVals(rowIdx, 0) = valReplacement_dev;
394  });
395 
396  return diag;
397 }
398 
399 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
403  Magnitude tol,
404  Scalar valReplacement,
405  const bool replaceSingleEntryRowWithZero,
406  const bool useAverageAbsDiagVal) {
407  typedef Teuchos::ScalarTraits<Scalar> TST;
408 
409  RCP<Vector> diag = Teuchos::null;
410  const Scalar zero = TST::zero();
411  const Scalar one = TST::one();
412  const Scalar two = one + one;
413 
414  Teuchos::RCP<const Matrix> rcpA = Teuchos::rcpFromRef(A);
415 
417  Teuchos::rcp_dynamic_cast<const Xpetra::BlockedCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(rcpA);
418  if (bA == Teuchos::null) {
419  RCP<const Map> rowMap = rcpA->getRowMap();
421 
422  if (rowMap->lib() == Xpetra::UnderlyingLib::UseTpetra) {
423  Teuchos::TimeMonitor MM = *Teuchos::TimeMonitor::getNewTimer("UtilitiesBase::GetLumpedMatrixDiagonal (Kokkos implementation)");
424  // Implement using Kokkos
425  using local_vector_type = typename Vector::dual_view_type::t_dev_um;
426  using local_matrix_type = typename Matrix::local_matrix_type;
427  using execution_space = typename local_vector_type::execution_space;
428  // using rowmap_type = typename local_matrix_type::row_map_type;
429  // using entries_type = typename local_matrix_type::index_type;
430  using values_type = typename local_matrix_type::values_type;
431  using scalar_type = typename values_type::non_const_value_type;
432  using mag_type = typename Kokkos::ArithTraits<scalar_type>::mag_type;
433  using KAT_S = typename Kokkos::ArithTraits<scalar_type>;
434  using KAT_M = typename Kokkos::ArithTraits<mag_type>;
435  using size_type = typename local_matrix_type::non_const_size_type;
436 
437  local_vector_type diag_dev = diag->getLocalViewDevice(Xpetra::Access::OverwriteAll);
438  local_matrix_type local_mat_dev = rcpA->getLocalMatrixDevice();
439  Kokkos::RangePolicy<execution_space, int> my_policy(0, static_cast<int>(diag_dev.extent(0)));
440  scalar_type valReplacement_dev = valReplacement;
441 
442  if (doReciprocal) {
443  Kokkos::View<int*, execution_space> nnzPerRow("nnz per rows", diag_dev.extent(0));
444  Kokkos::View<scalar_type*, execution_space> regSum("regSum", diag_dev.extent(0));
445  Kokkos::View<mag_type, execution_space> avgAbsDiagVal_dev("avgAbsDiagVal");
446  Kokkos::View<int, execution_space> numDiagsEqualToOne_dev("numDiagsEqualToOne");
447 
448  {
449  Teuchos::TimeMonitor MMM = *Teuchos::TimeMonitor::getNewTimer("GetLumpedMatrixDiagonal: parallel_for (doReciprocal)");
450  Kokkos::parallel_for(
451  "GetLumpedMatrixDiagonal", my_policy,
452  KOKKOS_LAMBDA(const int rowIdx) {
453  diag_dev(rowIdx, 0) = KAT_S::zero();
454  for (size_type entryIdx = local_mat_dev.graph.row_map(rowIdx);
455  entryIdx < local_mat_dev.graph.row_map(rowIdx + 1);
456  ++entryIdx) {
457  regSum(rowIdx) += local_mat_dev.values(entryIdx);
458  if (KAT_M::zero() < KAT_S::abs(local_mat_dev.values(entryIdx))) {
459  ++nnzPerRow(rowIdx);
460  }
461  diag_dev(rowIdx, 0) += KAT_S::abs(local_mat_dev.values(entryIdx));
462  if (rowIdx == local_mat_dev.graph.entries(entryIdx)) {
463  Kokkos::atomic_add(&avgAbsDiagVal_dev(), KAT_S::abs(local_mat_dev.values(entryIdx)));
464  }
465  }
466 
467  if (nnzPerRow(rowIdx) == 1 && KAT_S::magnitude(diag_dev(rowIdx, 0)) == KAT_M::one()) {
468  Kokkos::atomic_add(&numDiagsEqualToOne_dev(), 1);
469  }
470  });
471  }
472  if (useAverageAbsDiagVal) {
473  Teuchos::TimeMonitor MMM = *Teuchos::TimeMonitor::getNewTimer("GetLumpedMatrixDiagonal: useAverageAbsDiagVal");
474  typename Kokkos::View<mag_type, execution_space>::host_mirror_type avgAbsDiagVal = Kokkos::create_mirror_view(avgAbsDiagVal_dev);
475  Kokkos::deep_copy(avgAbsDiagVal, avgAbsDiagVal_dev);
476  int numDiagsEqualToOne;
477  Kokkos::deep_copy(numDiagsEqualToOne, numDiagsEqualToOne_dev);
478 
479  tol = TST::magnitude(100 * Teuchos::ScalarTraits<Scalar>::eps()) * (avgAbsDiagVal() - numDiagsEqualToOne) / (rowMap->getLocalNumElements() - numDiagsEqualToOne);
480  }
481 
482  {
483  Teuchos::TimeMonitor MMM = *Teuchos::TimeMonitor::getNewTimer("ComputeLumpedDiagonalInverse: parallel_for (doReciprocal)");
484  Kokkos::parallel_for(
485  "ComputeLumpedDiagonalInverse", my_policy,
486  KOKKOS_LAMBDA(const int rowIdx) {
487  if (replaceSingleEntryRowWithZero && nnzPerRow(rowIdx) <= 1) {
488  diag_dev(rowIdx, 0) = KAT_S::zero();
489  } else if ((diag_dev(rowIdx, 0) != KAT_S::zero()) && (KAT_S::magnitude(diag_dev(rowIdx, 0)) < KAT_S::magnitude(2 * regSum(rowIdx)))) {
490  diag_dev(rowIdx, 0) = KAT_S::one() / KAT_S::magnitude(2 * regSum(rowIdx));
491  } else {
492  if (KAT_S::magnitude(diag_dev(rowIdx, 0)) > tol) {
493  diag_dev(rowIdx, 0) = KAT_S::one() / diag_dev(rowIdx, 0);
494  } else {
495  diag_dev(rowIdx, 0) = valReplacement_dev;
496  }
497  }
498  });
499  }
500 
501  } else {
502  Teuchos::TimeMonitor MMM = *Teuchos::TimeMonitor::getNewTimer("GetLumpedMatrixDiagonal: parallel_for");
503  Kokkos::parallel_for(
504  "GetLumpedMatrixDiagonal", my_policy,
505  KOKKOS_LAMBDA(const int rowIdx) {
506  diag_dev(rowIdx, 0) = KAT_S::zero();
507  for (size_type entryIdx = local_mat_dev.graph.row_map(rowIdx);
508  entryIdx < local_mat_dev.graph.row_map(rowIdx + 1);
509  ++entryIdx) {
510  diag_dev(rowIdx, 0) += KAT_S::magnitude(local_mat_dev.values(entryIdx));
511  }
512  });
513  }
514  } else {
515  // Implement using Teuchos
516  Teuchos::TimeMonitor MMM = *Teuchos::TimeMonitor::getNewTimer("UtilitiesBase: GetLumpedMatrixDiagonal: (Teuchos implementation)");
517  ArrayRCP<Scalar> diagVals = diag->getDataNonConst(0);
518  Teuchos::Array<Scalar> regSum(diag->getLocalLength());
521 
522  std::vector<int> nnzPerRow(rowMap->getLocalNumElements());
523 
524  // FIXME 2021-10-22 JHU If this is called with doReciprocal=false, what should the correct behavior be? Currently,
525  // FIXME 2021-10-22 JHU the diagonal entry is set to be the sum of the absolute values of the row entries.
526 
527  const Magnitude zeroMagn = TST::magnitude(zero);
528  Magnitude avgAbsDiagVal = TST::magnitude(zero);
529  int numDiagsEqualToOne = 0;
530  for (size_t i = 0; i < rowMap->getLocalNumElements(); ++i) {
531  nnzPerRow[i] = 0;
532  rcpA->getLocalRowView(i, cols, vals);
533  diagVals[i] = zero;
534  for (LocalOrdinal j = 0; j < cols.size(); ++j) {
535  regSum[i] += vals[j];
536  const Magnitude rowEntryMagn = TST::magnitude(vals[j]);
537  if (rowEntryMagn > zeroMagn)
538  nnzPerRow[i]++;
539  diagVals[i] += rowEntryMagn;
540  if (static_cast<size_t>(cols[j]) == i)
541  avgAbsDiagVal += rowEntryMagn;
542  }
543  if (nnzPerRow[i] == 1 && TST::magnitude(diagVals[i]) == 1.)
544  numDiagsEqualToOne++;
545  }
546  if (useAverageAbsDiagVal)
547  tol = TST::magnitude(100 * Teuchos::ScalarTraits<Scalar>::eps()) * (avgAbsDiagVal - numDiagsEqualToOne) / (rowMap->getLocalNumElements() - numDiagsEqualToOne);
548  if (doReciprocal) {
549  for (size_t i = 0; i < rowMap->getLocalNumElements(); ++i) {
550  if (replaceSingleEntryRowWithZero && nnzPerRow[i] <= static_cast<int>(1))
551  diagVals[i] = zero;
552  else if ((diagVals[i] != zero) && (TST::magnitude(diagVals[i]) < TST::magnitude(two * regSum[i])))
553  diagVals[i] = one / TST::magnitude((two * regSum[i]));
554  else {
555  if (TST::magnitude(diagVals[i]) > tol)
556  diagVals[i] = one / diagVals[i];
557  else {
558  diagVals[i] = valReplacement;
559  }
560  }
561  }
562  }
563  }
564  } else {
566  "UtilitiesBase::GetLumpedMatrixDiagonal(): extracting reciprocal of diagonal of a blocked matrix is not supported");
567  diag = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(bA->getRangeMapExtractor()->getFullMap(), true);
568 
569  for (size_t row = 0; row < bA->Rows(); ++row) {
570  for (size_t col = 0; col < bA->Cols(); ++col) {
571  if (!bA->getMatrix(row, col).is_null()) {
572  // if we are in Thyra mode, but the block (row,row) is again a blocked operator, we have to use (pseudo) Xpetra-style GIDs with offset!
573  bool bThyraMode = bA->getRangeMapExtractor()->getThyraMode() && (Teuchos::rcp_dynamic_cast<Xpetra::BlockedCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>>(bA->getMatrix(row, col)) == Teuchos::null);
574  RCP<Vector> ddtemp = bA->getRangeMapExtractor()->ExtractVector(diag, row, bThyraMode);
575  RCP<const Vector> dd = GetLumpedMatrixDiagonal(*(bA->getMatrix(row, col)));
576  ddtemp->update(Teuchos::as<Scalar>(1.0), *dd, Teuchos::as<Scalar>(1.0));
577  bA->getRangeMapExtractor()->InsertVector(ddtemp, row, diag, bThyraMode);
578  }
579  }
580  }
581  }
582 
583  return diag;
584 }
585 
586 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
590  // Get/Create distributed objects
591  RCP<const Map> rowMap = A.getRowMap();
593 
594  // Implement using Kokkos
595  using local_vector_type = typename Vector::dual_view_type::t_dev_um;
596  using local_matrix_type = typename Matrix::local_matrix_type;
597  using execution_space = typename local_vector_type::execution_space;
598  using values_type = typename local_matrix_type::values_type;
599  using scalar_type = typename values_type::non_const_value_type;
600  using KAT_S = typename Kokkos::ArithTraits<scalar_type>;
601 
602  auto diag_dev = diag->getLocalViewDevice(Xpetra::Access::OverwriteAll);
603  auto local_mat_dev = A.getLocalMatrixDevice();
604  Kokkos::RangePolicy<execution_space, int> my_policy(0, static_cast<int>(diag_dev.extent(0)));
605 
606  Kokkos::parallel_for(
607  "GetMatrixMaxMinusOffDiagonal", my_policy,
608  KOKKOS_LAMBDA(const LocalOrdinal rowIdx) {
609  auto mymax = KAT_S::zero();
610  auto row = local_mat_dev.rowConst(rowIdx);
611  for (LocalOrdinal entryIdx = 0; entryIdx < row.length; ++entryIdx) {
612  if (rowIdx != row.colidx(entryIdx)) {
613  if (KAT_S::real(mymax) < -KAT_S::real(row.value(entryIdx)))
614  mymax = -KAT_S::real(row.value(entryIdx));
615  }
616  }
617  diag_dev(rowIdx, 0) = mymax;
618  });
619 
620  return diag;
621 }
622 
623 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
627  TEUCHOS_TEST_FOR_EXCEPTION(!A.getColMap()->isSameAs(*BlockNumber.getMap()), std::runtime_error, "GetMatrixMaxMinusOffDiagonal: BlockNumber must match's A's column map.");
628 
629  // Get/Create distributed objects
630  RCP<const Map> rowMap = A.getRowMap();
632 
633  // Implement using Kokkos
634  using local_vector_type = typename Vector::dual_view_type::t_dev_um;
635  using local_matrix_type = typename Matrix::local_matrix_type;
636  using execution_space = typename local_vector_type::execution_space;
637  using values_type = typename local_matrix_type::values_type;
638  using scalar_type = typename values_type::non_const_value_type;
639  using KAT_S = typename Kokkos::ArithTraits<scalar_type>;
640 
641  auto diag_dev = diag->getLocalViewDevice(Xpetra::Access::OverwriteAll);
642  auto local_mat_dev = A.getLocalMatrixDevice();
643  auto local_block_dev = BlockNumber.getLocalViewDevice(Xpetra::Access::ReadOnly);
644  Kokkos::RangePolicy<execution_space, int> my_policy(0, static_cast<int>(diag_dev.extent(0)));
645 
646  Kokkos::parallel_for(
647  "GetMatrixMaxMinusOffDiagonal", my_policy,
648  KOKKOS_LAMBDA(const LocalOrdinal rowIdx) {
649  auto mymax = KAT_S::zero();
650  auto row = local_mat_dev.row(rowIdx);
651  for (LocalOrdinal entryIdx = 0; entryIdx < row.length; ++entryIdx) {
652  if ((rowIdx != row.colidx(entryIdx)) && (local_block_dev(rowIdx, 0) == local_block_dev(row.colidx(entryIdx), 0))) {
653  if (KAT_S::real(mymax) < -KAT_S::real(row.value(entryIdx)))
654  mymax = -KAT_S::real(row.value(entryIdx));
655  }
656  }
657  diag_dev(rowIdx, 0) = mymax;
658  });
659 
660  return diag;
661 }
662 
663 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
668 
669  // check whether input vector "v" is a BlockedVector
670  RCP<const BlockedVector> bv = Teuchos::rcp_dynamic_cast<const BlockedVector>(v);
671  if (bv.is_null() == false) {
672  RCP<BlockedVector> bret = Teuchos::rcp_dynamic_cast<BlockedVector>(ret);
673  TEUCHOS_TEST_FOR_EXCEPTION(bret.is_null() == true, MueLu::Exceptions::RuntimeError, "MueLu::UtilitiesBase::GetInverse: return vector should be of type BlockedVector");
674  RCP<const BlockedMap> bmap = bv->getBlockedMap();
675  for (size_t r = 0; r < bmap->getNumMaps(); ++r) {
676  RCP<const MultiVector> submvec = bv->getMultiVector(r, bmap->getThyraMode());
677  RCP<const Vector> subvec = submvec->getVector(0);
679  bret->setMultiVector(r, subvecinf, bmap->getThyraMode());
680  }
681  return ret;
682  }
683 
684  // v is an {Epetra,Tpetra}Vector: work with the underlying raw data
685  ArrayRCP<Scalar> retVals = ret->getDataNonConst(0);
686  ArrayRCP<const Scalar> inputVals = v->getData(0);
687  for (size_t i = 0; i < v->getMap()->getLocalNumElements(); ++i) {
688  if (Teuchos::ScalarTraits<Scalar>::magnitude(inputVals[i]) > tol)
689  retVals[i] = Teuchos::ScalarTraits<Scalar>::one() / inputVals[i];
690  else
691  retVals[i] = valReplacement;
692  }
693  return ret;
694 }
695 
696 // template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
697 // RCP<Xpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
698 // UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
699 // GetMatrixOverlappedDiagonal(const Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> & A) {
700 // RCP<const Map> rowMap = A.getRowMap(), colMap = A.getColMap();
701 
702 // // Undo block map (if we have one)
703 // RCP<const BlockedMap> browMap = Teuchos::rcp_dynamic_cast<const BlockedMap>(rowMap);
704 // if(!browMap.is_null()) rowMap = browMap->getMap();
705 
706 // RCP<Vector> localDiag = Xpetra::VectorFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Build(rowMap);
707 // try {
708 // const CrsMatrixWrap* crsOp = dynamic_cast<const CrsMatrixWrap*>(&A);
709 // if (crsOp == NULL) {
710 // throw Exceptions::RuntimeError("cast to CrsMatrixWrap failed");
711 // }
712 // Teuchos::ArrayRCP<size_t> offsets;
713 // crsOp->getLocalDiagOffsets(offsets);
714 // crsOp->getLocalDiagCopy(*localDiag,offsets());
715 // }
716 // catch (...) {
717 // ArrayRCP<Scalar> localDiagVals = localDiag->getDataNonConst(0);
718 // Teuchos::ArrayRCP<Scalar> diagVals = GetMatrixDiagonal(A);
719 // for (LocalOrdinal i = 0; i < localDiagVals.size(); i++)
720 // localDiagVals[i] = diagVals[i];
721 // localDiagVals = diagVals = null;
722 // }
723 
724 // RCP<Vector> diagonal = Xpetra::VectorFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Build(colMap);
725 // RCP< const Xpetra::Import<LocalOrdinal,GlobalOrdinal,Node> > importer;
726 // importer = A.getCrsGraph()->getImporter();
727 // if (importer == Teuchos::null) {
728 // importer = Xpetra::ImportFactory<LocalOrdinal,GlobalOrdinal,Node>::Build(rowMap, colMap);
729 // }
730 // diagonal->doImport(*localDiag, *(importer), Xpetra::INSERT);
731 // return diagonal;
732 // }
733 
734 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
738  RCP<const Map> rowMap = A.getRowMap(), colMap = A.getColMap();
739  RCP<Vector> localDiag = GetMatrixDiagonal(A);
740  RCP<const Import> importer = A.getCrsGraph()->getImporter();
741  if (importer.is_null() && !rowMap->isSameAs(*colMap)) {
742  importer = ImportFactory::Build(rowMap, colMap);
743  }
744  if (!importer.is_null()) {
745  RCP<Vector> diagonal = VectorFactory::Build(colMap);
746  diagonal->doImport(*localDiag, *(importer), Xpetra::INSERT);
747  return diagonal;
748  } else
749  return localDiag;
750 }
751 
752 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
756  using STS = typename Teuchos::ScalarTraits<SC>;
757 
758  // Undo block map (if we have one)
759  RCP<const Map> rowMap = A.getRowMap(), colMap = A.getColMap();
760  RCP<const BlockedMap> browMap = Teuchos::rcp_dynamic_cast<const BlockedMap>(rowMap);
761  if (!browMap.is_null()) rowMap = browMap->getMap();
762 
765  ArrayRCP<SC> localVals = local->getDataNonConst(0);
766 
767  for (LO row = 0; row < static_cast<LO>(A.getRowMap()->getLocalNumElements()); ++row) {
768  size_t nnz = A.getNumEntriesInLocalRow(row);
769  ArrayView<const LO> indices;
770  ArrayView<const SC> vals;
771  A.getLocalRowView(row, indices, vals);
772 
773  SC si = STS::zero();
774 
775  for (LO colID = 0; colID < static_cast<LO>(nnz); colID++) {
776  if (indices[colID] != row) {
777  si += vals[colID];
778  }
779  }
780  localVals[row] = si;
781  }
782 
784  importer = A.getCrsGraph()->getImporter();
785  if (importer == Teuchos::null) {
786  importer = Xpetra::ImportFactory<LO, GO, Node>::Build(rowMap, colMap);
787  }
788  ghosted->doImport(*local, *(importer), Xpetra::INSERT);
789  return ghosted;
790 }
791 
792 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
796  RCP<const Map> rowMap = A.getRowMap(), colMap = A.getColMap();
797  using STS = typename Teuchos::ScalarTraits<Scalar>;
798  using MTS = typename Teuchos::ScalarTraits<Magnitude>;
799  using MT = Magnitude;
800  using RealValuedVector = Xpetra::Vector<MT, LO, GO, Node>;
801 
802  // Undo block map (if we have one)
803  RCP<const BlockedMap> browMap = Teuchos::rcp_dynamic_cast<const BlockedMap>(rowMap);
804  if (!browMap.is_null()) rowMap = browMap->getMap();
805 
808  ArrayRCP<MT> localVals = local->getDataNonConst(0);
809 
810  for (LO rowIdx = 0; rowIdx < static_cast<LO>(A.getRowMap()->getLocalNumElements()); ++rowIdx) {
811  size_t nnz = A.getNumEntriesInLocalRow(rowIdx);
812  ArrayView<const LO> indices;
813  ArrayView<const SC> vals;
814  A.getLocalRowView(rowIdx, indices, vals);
815 
816  MT si = MTS::zero();
817 
818  for (LO colID = 0; colID < static_cast<LO>(nnz); ++colID) {
819  if (indices[colID] != rowIdx) {
820  si += STS::magnitude(vals[colID]);
821  }
822  }
823  localVals[rowIdx] = si;
824  }
825 
827  importer = A.getCrsGraph()->getImporter();
828  if (importer == Teuchos::null) {
829  importer = Xpetra::ImportFactory<LO, GO, Node>::Build(rowMap, colMap);
830  }
831  ghosted->doImport(*local, *(importer), Xpetra::INSERT);
832  return ghosted;
833 }
834 
835 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
838  ResidualNorm(const Xpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op, const MultiVector& X, const MultiVector& RHS) {
839  TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != RHS.getNumVectors(), Exceptions::RuntimeError, "Number of solution vectors != number of right-hand sides")
840  const size_t numVecs = X.getNumVectors();
841  RCP<MultiVector> RES = Residual(Op, X, RHS);
842  Teuchos::Array<Magnitude> norms(numVecs);
843  RES->norm2(norms);
844  return norms;
845 }
846 
847 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
850  ResidualNorm(const Xpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op, const MultiVector& X, const MultiVector& RHS, MultiVector& Resid) {
851  TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != RHS.getNumVectors(), Exceptions::RuntimeError, "Number of solution vectors != number of right-hand sides")
852  const size_t numVecs = X.getNumVectors();
853  Residual(Op, X, RHS, Resid);
854  Teuchos::Array<Magnitude> norms(numVecs);
855  Resid.norm2(norms);
856  return norms;
857 }
858 
859 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
862  Residual(const Xpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op, const MultiVector& X, const MultiVector& RHS) {
863  TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != RHS.getNumVectors(), Exceptions::RuntimeError, "Number of solution vectors != number of right-hand sides")
864  const size_t numVecs = X.getNumVectors();
865  // TODO Op.getRangeMap should return a BlockedMap if it is a BlockedCrsOperator
866  RCP<MultiVector> RES = Xpetra::MultiVectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(RHS.getMap(), numVecs, false); // no need to initialize to zero
867  Op.residual(X, RHS, *RES);
868  return RES;
869 }
870 
871 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
873  Residual(const Xpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>& Op, const MultiVector& X, const MultiVector& RHS, MultiVector& Resid) {
874  TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != RHS.getNumVectors(), Exceptions::RuntimeError, "Number of solution vectors != number of right-hand sides");
875  TEUCHOS_TEST_FOR_EXCEPTION(Resid.getNumVectors() != RHS.getNumVectors(), Exceptions::RuntimeError, "Number of residual vectors != number of right-hand sides");
876  Op.residual(X, RHS, Resid);
877 }
878 
879 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
880 Scalar
882  PowerMethod(const Matrix& A, bool scaleByDiag,
883  LocalOrdinal niters, typename Teuchos::ScalarTraits<Scalar>::magnitudeType tolerance, bool verbose, unsigned int seed) {
884  TEUCHOS_TEST_FOR_EXCEPTION(!(A.getRangeMap()->isSameAs(*(A.getDomainMap()))), Exceptions::Incompatible,
885  "Utils::PowerMethod: operator must have domain and range maps that are equivalent.");
886 
887  // power iteration
888  RCP<Vector> diagInvVec;
889  if (scaleByDiag) {
890  diagInvVec = GetMatrixDiagonalInverse(A);
891  }
892 
893  Scalar lambda = PowerMethod(A, diagInvVec, niters, tolerance, verbose, seed);
894  return lambda;
895 }
896 
897 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
898 Scalar
900  PowerMethod(const Matrix& A, const RCP<Vector>& diagInvVec,
901  LocalOrdinal niters, typename Teuchos::ScalarTraits<Scalar>::magnitudeType tolerance, bool verbose, unsigned int seed) {
902  TEUCHOS_TEST_FOR_EXCEPTION(!(A.getRangeMap()->isSameAs(*(A.getDomainMap()))), Exceptions::Incompatible,
903  "Utils::PowerMethod: operator must have domain and range maps that are equivalent.");
904 
905  // Create three vectors, fill z with random numbers
909 
910  z->setSeed(seed); // seed random number generator
911  z->randomize(true); // use Xpetra implementation: -> same results for Epetra and Tpetra
912 
913  Teuchos::Array<Magnitude> norms(1);
914 
915  typedef Teuchos::ScalarTraits<Scalar> STS;
916 
917  const Scalar zero = STS::zero(), one = STS::one();
918 
919  Scalar lambda = zero;
920  Magnitude residual = STS::magnitude(zero);
921 
922  // power iteration
923  for (int iter = 0; iter < niters; ++iter) {
924  z->norm2(norms); // Compute 2-norm of z
925  q->update(one / norms[0], *z, zero); // Set q = z / normz
926  A.apply(*q, *z); // Compute z = A*q
927  if (diagInvVec != Teuchos::null)
928  z->elementWiseMultiply(one, *diagInvVec, *z, zero);
929  lambda = q->dot(*z); // Approximate maximum eigenvalue: lamba = dot(q,z)
930 
931  if (iter % 100 == 0 || iter + 1 == niters) {
932  r->update(1.0, *z, -lambda, *q, zero); // Compute A*q - lambda*q
933  r->norm2(norms);
934  residual = STS::magnitude(norms[0] / lambda);
935  if (verbose) {
936  std::cout << "Iter = " << iter
937  << " Lambda = " << lambda
938  << " Residual of A*q - lambda*q = " << residual
939  << std::endl;
940  }
941  }
942  if (residual < tolerance)
943  break;
944  }
945  return lambda;
946 }
947 
948 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
949 RCP<Teuchos::FancyOStream>
951  MakeFancy(std::ostream& os) {
952  RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(os));
953  return fancy;
954 }
955 
956 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
960  const size_t numVectors = v.size();
961 
963  for (size_t j = 0; j < numVectors; j++) {
964  d += (v[j][i0] - v[j][i1]) * (v[j][i0] - v[j][i1]);
965  }
967 }
968 
969 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
973  const size_t numVectors = v.size();
975 
977  for (size_t j = 0; j < numVectors; j++) {
978  d += Teuchos::as<MT>(weight[j]) * (v[j][i0] - v[j][i1]) * (v[j][i0] - v[j][i1]);
979  }
981 }
982 
983 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
987  LocalOrdinal numRows = A.getLocalNumRows();
988  typedef Teuchos::ScalarTraits<Scalar> STS;
989  ArrayRCP<bool> boundaryNodes(numRows, true);
990  if (count_twos_as_dirichlet) {
991  for (LocalOrdinal row = 0; row < numRows; row++) {
994  A.getLocalRowView(row, indices, vals);
995  size_t nnz = A.getNumEntriesInLocalRow(row);
996  if (nnz > 2) {
997  size_t col;
998  for (col = 0; col < nnz; col++)
999  if ((indices[col] != row) && STS::magnitude(vals[col]) > tol) {
1000  if (!boundaryNodes[row])
1001  break;
1002  boundaryNodes[row] = false;
1003  }
1004  if (col == nnz)
1005  boundaryNodes[row] = true;
1006  }
1007  }
1008  } else {
1009  for (LocalOrdinal row = 0; row < numRows; row++) {
1012  A.getLocalRowView(row, indices, vals);
1013  size_t nnz = A.getNumEntriesInLocalRow(row);
1014  if (nnz > 1)
1015  for (size_t col = 0; col < nnz; col++)
1016  if ((indices[col] != row) && STS::magnitude(vals[col]) > tol) {
1017  boundaryNodes[row] = false;
1018  break;
1019  }
1020  }
1021  }
1022  return boundaryNodes;
1023 }
1024 
1025 template <class SC, class LO, class GO, class NO, class memory_space>
1026 Kokkos::View<bool*, memory_space>
1028  const typename Teuchos::ScalarTraits<SC>::magnitudeType& tol,
1029  const bool count_twos_as_dirichlet) {
1030  using impl_scalar_type = typename Kokkos::ArithTraits<SC>::val_type;
1031  using ATS = Kokkos::ArithTraits<impl_scalar_type>;
1032  using range_type = Kokkos::RangePolicy<LO, typename NO::execution_space>;
1033  using helpers = Xpetra::Helpers<SC, LO, GO, NO>;
1034 
1035  Kokkos::View<bool*, typename NO::device_type::memory_space> boundaryNodes;
1036 
1037  if (helpers::isTpetraBlockCrs(A)) {
1038  const Tpetra::BlockCrsMatrix<SC, LO, GO, NO>& Am = toTpetraBlock(A);
1039  auto b_graph = Am.getCrsGraph().getLocalGraphDevice();
1040  auto b_rowptr = Am.getCrsGraph().getLocalRowPtrsDevice();
1041  auto values = Am.getValuesDevice();
1042  LO numBlockRows = Am.getLocalNumRows();
1043  const LO stride = Am.getBlockSize() * Am.getBlockSize();
1044 
1045  boundaryNodes = Kokkos::View<bool*, typename NO::device_type::memory_space>(Kokkos::ViewAllocateWithoutInitializing("boundaryNodes"), numBlockRows);
1046 
1047  if (count_twos_as_dirichlet)
1048  throw Exceptions::RuntimeError("BlockCrs does not support counting twos as Dirichlet");
1049 
1050  Kokkos::parallel_for(
1051  "MueLu:Utils::DetectDirichletRowsBlockCrs", range_type(0, numBlockRows),
1052  KOKKOS_LAMBDA(const LO row) {
1053  auto rowView = b_graph.rowConst(row);
1054  auto length = rowView.length;
1055  LO valstart = b_rowptr[row] * stride;
1056 
1057  boundaryNodes(row) = true;
1058  decltype(length) colID = 0;
1059  for (; colID < length; colID++) {
1060  if (rowView.colidx(colID) != row) {
1061  LO current = valstart + colID * stride;
1062  for (LO k = 0; k < stride; k++) {
1063  if (ATS::magnitude(values[current + k]) > tol) {
1064  boundaryNodes(row) = false;
1065  break;
1066  }
1067  }
1068  }
1069  if (boundaryNodes(row) == false)
1070  break;
1071  }
1072  });
1073  } else {
1074  auto localMatrix = A.getLocalMatrixDevice();
1075  LO numRows = A.getLocalNumRows();
1076  boundaryNodes = Kokkos::View<bool*, typename NO::device_type::memory_space>(Kokkos::ViewAllocateWithoutInitializing("boundaryNodes"), numRows);
1077 
1078  if (count_twos_as_dirichlet)
1079  Kokkos::parallel_for(
1080  "MueLu:Utils::DetectDirichletRows_Twos_As_Dirichlet", range_type(0, numRows),
1081  KOKKOS_LAMBDA(const LO row) {
1082  auto rowView = localMatrix.row(row);
1083  auto length = rowView.length;
1084 
1085  boundaryNodes(row) = true;
1086  if (length > 2) {
1087  decltype(length) colID = 0;
1088  for (; colID < length; colID++)
1089  if ((rowView.colidx(colID) != row) &&
1090  (ATS::magnitude(rowView.value(colID)) > tol)) {
1091  if (!boundaryNodes(row))
1092  break;
1093  boundaryNodes(row) = false;
1094  }
1095  if (colID == length)
1096  boundaryNodes(row) = true;
1097  }
1098  });
1099  else
1100  Kokkos::parallel_for(
1101  "MueLu:Utils::DetectDirichletRows", range_type(0, numRows),
1102  KOKKOS_LAMBDA(const LO row) {
1103  auto rowView = localMatrix.row(row);
1104  auto length = rowView.length;
1105 
1106  boundaryNodes(row) = true;
1107  for (decltype(length) colID = 0; colID < length; colID++)
1108  if ((rowView.colidx(colID) != row) &&
1109  (ATS::magnitude(rowView.value(colID)) > tol)) {
1110  boundaryNodes(row) = false;
1111  break;
1112  }
1113  });
1114  }
1115  if constexpr (std::is_same<memory_space, typename NO::device_type::memory_space>::value)
1116  return boundaryNodes;
1117  else {
1118  Kokkos::View<bool*, memory_space> boundaryNodes2(Kokkos::ViewAllocateWithoutInitializing("boundaryNodes"), boundaryNodes.extent(0));
1119  Kokkos::deep_copy(boundaryNodes2, boundaryNodes);
1120  return boundaryNodes2;
1121  }
1122  // CAG: No idea why this is needed to avoid "warning: missing return statement at end of non-void function"
1123  Kokkos::View<bool*, memory_space> dummy("dummy", 0);
1124  return dummy;
1125 }
1126 
1127 template <class SC, class LO, class GO, class NO>
1128 Kokkos::View<bool*, typename NO::device_type::memory_space>
1131  const typename Teuchos::ScalarTraits<SC>::magnitudeType& tol,
1132  const bool count_twos_as_dirichlet) {
1133  return MueLu::DetectDirichletRows_kokkos<SC, LO, GO, NO, typename NO::device_type::memory_space>(A, tol, count_twos_as_dirichlet);
1134 }
1135 
1136 template <class SC, class LO, class GO, class NO>
1137 Kokkos::View<bool*, typename Kokkos::HostSpace>
1140  const typename Teuchos::ScalarTraits<SC>::magnitudeType& tol,
1141  const bool count_twos_as_dirichlet) {
1142  return MueLu::DetectDirichletRows_kokkos<SC, LO, GO, NO, typename Kokkos::HostSpace>(A, tol, count_twos_as_dirichlet);
1143 }
1144 
1145 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1149  // assume that there is no zero diagonal in matrix
1150  bHasZeroDiagonal = false;
1151 
1153  A.getLocalDiagCopy(*diagVec);
1154  Teuchos::ArrayRCP<const Scalar> diagVecData = diagVec->getData(0);
1155 
1156  LocalOrdinal numRows = A.getLocalNumRows();
1157  typedef Teuchos::ScalarTraits<Scalar> STS;
1158  ArrayRCP<bool> boundaryNodes(numRows, false);
1159  for (LocalOrdinal row = 0; row < numRows; row++) {
1162  A.getLocalRowView(row, indices, vals);
1163  size_t nnz = 0; // collect nonzeros in row (excluding the diagonal)
1164  bool bHasDiag = false;
1165  for (decltype(indices.size()) col = 0; col < indices.size(); col++) {
1166  if (indices[col] != row) {
1167  if (STS::magnitude(vals[col] / STS::magnitude(sqrt(STS::magnitude(diagVecData[row]) * STS::magnitude(diagVecData[col])))) > tol) {
1168  nnz++;
1169  }
1170  } else
1171  bHasDiag = true; // found a diagonal entry
1172  }
1173  if (bHasDiag == false)
1174  bHasZeroDiagonal = true; // we found at least one row without a diagonal
1175  else if (nnz == 0)
1176  boundaryNodes[row] = true;
1177  }
1178  return boundaryNodes;
1179 }
1180 
1181 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1186  const typename Teuchos::ScalarTraits<SC>::magnitudeType& tol,
1187  const bool count_twos_as_dirichlet) {
1188  using range_type = Kokkos::RangePolicy<LO, typename Node::execution_space>;
1189 
1190  auto dirichletRows = DetectDirichletRows_kokkos(A, tol, count_twos_as_dirichlet);
1191 
1192  LocalOrdinal numRows = A.getLocalNumRows();
1193  LocalOrdinal numVectors = RHS.getNumVectors();
1194  TEUCHOS_ASSERT_EQUALITY(numVectors, Teuchos::as<LocalOrdinal>(InitialGuess.getNumVectors()));
1195 #ifdef MUELU_DEBUG
1196  TEUCHOS_ASSERT(RHS.getMap()->isCompatible(InitialGuess.getMap()));
1197 #endif
1198 
1199  auto lclRHS = RHS.getLocalViewDevice(Xpetra::Access::ReadOnly);
1200  auto lclInitialGuess = InitialGuess.getLocalViewDevice(Xpetra::Access::ReadWrite);
1201 
1202  Kokkos::parallel_for(
1203  "MueLu:Utils::EnforceInitialCondition", range_type(0, numRows),
1204  KOKKOS_LAMBDA(const LO row) {
1205  if (dirichletRows(row)) {
1206  for (LocalOrdinal j = 0; j < numVectors; ++j)
1207  lclInitialGuess(row, j) = lclRHS(row, j);
1208  }
1209  });
1210 }
1211 
1212 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1215  Teuchos::ArrayRCP<bool> nonzeros) {
1216  TEUCHOS_ASSERT(vals.size() == nonzeros.size());
1217  typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType magnitudeType;
1218  const magnitudeType eps = 2.0 * Teuchos::ScalarTraits<magnitudeType>::eps();
1219  for (size_t i = 0; i < static_cast<size_t>(vals.size()); i++) {
1220  nonzeros[i] = (Teuchos::ScalarTraits<Scalar>::magnitude(vals[i]) > eps);
1221  }
1222 }
1223 
1224 // Find Nonzeros in a device view
1225 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1228  Kokkos::View<bool*, typename Node::device_type> nonzeros) {
1229  using ATS = Kokkos::ArithTraits<Scalar>;
1230  using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
1231  using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1232  TEUCHOS_ASSERT(vals.extent(0) == nonzeros.extent(0));
1233  const typename ATS::magnitudeType eps = 2.0 * impl_ATS::eps();
1234 
1235  Kokkos::parallel_for(
1236  "MueLu:Maxwell1::FindNonZeros", range_type(0, vals.extent(0)),
1237  KOKKOS_LAMBDA(const size_t i) {
1238  nonzeros(i) = (impl_ATS::magnitude(vals(i, 0)) > eps);
1239  });
1240 }
1241 
1242 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1245  const Teuchos::ArrayRCP<bool>& dirichletRows,
1246  Teuchos::ArrayRCP<bool> dirichletCols,
1247  Teuchos::ArrayRCP<bool> dirichletDomain) {
1252  TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == rowMap->getLocalNumElements());
1253  TEUCHOS_ASSERT(static_cast<size_t>(dirichletCols.size()) == colMap->getLocalNumElements());
1254  TEUCHOS_ASSERT(static_cast<size_t>(dirichletDomain.size()) == domMap->getLocalNumElements());
1256  // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0
1257  for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1258  if (dirichletRows[i]) {
1260  ArrayView<const Scalar> values;
1261  A.getLocalRowView(i, indices, values);
1262  for (size_t j = 0; j < static_cast<size_t>(indices.size()); j++)
1263  myColsToZero->replaceLocalValue(indices[j], 0, one);
1264  }
1265  }
1266 
1269  if (!importer.is_null()) {
1270  globalColsToZero = Xpetra::MultiVectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(domMap, 1, /*zeroOut=*/true);
1271  // export to domain map
1272  globalColsToZero->doExport(*myColsToZero, *importer, Xpetra::ADD);
1273  // import to column map
1274  myColsToZero->doImport(*globalColsToZero, *importer, Xpetra::INSERT);
1275  } else
1276  globalColsToZero = myColsToZero;
1277 
1278  FindNonZeros(globalColsToZero->getData(0), dirichletDomain);
1279  FindNonZeros(myColsToZero->getData(0), dirichletCols);
1280 }
1281 
1282 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1285  const Kokkos::View<bool*, typename Node::device_type>& dirichletRows,
1286  Kokkos::View<bool*, typename Node::device_type> dirichletCols,
1287  Kokkos::View<bool*, typename Node::device_type> dirichletDomain) {
1288  using ATS = Kokkos::ArithTraits<Scalar>;
1289  using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
1290  using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1294  TEUCHOS_ASSERT(dirichletRows.extent(0) == rowMap->getLocalNumElements());
1295  TEUCHOS_ASSERT(dirichletCols.extent(0) == colMap->getLocalNumElements());
1296  TEUCHOS_ASSERT(dirichletDomain.extent(0) == domMap->getLocalNumElements());
1298  // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0
1299  auto myColsToZeroView = myColsToZero->getLocalViewDevice(Xpetra::Access::ReadWrite);
1300  auto localMatrix = A.getLocalMatrixDevice();
1301  Kokkos::parallel_for(
1302  "MueLu:Maxwell1::DetectDirichletCols", range_type(0, rowMap->getLocalNumElements()),
1303  KOKKOS_LAMBDA(const LocalOrdinal row) {
1304  if (dirichletRows(row)) {
1305  auto rowView = localMatrix.row(row);
1306  auto length = rowView.length;
1307 
1308  for (decltype(length) colID = 0; colID < length; colID++)
1309  myColsToZeroView(rowView.colidx(colID), 0) = impl_ATS::one();
1310  }
1311  });
1312 
1315  if (!importer.is_null()) {
1316  globalColsToZero = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(domMap, /*zeroOut=*/true);
1317  // export to domain map
1318  globalColsToZero->doExport(*myColsToZero, *importer, Xpetra::ADD);
1319  // import to column map
1320  myColsToZero->doImport(*globalColsToZero, *importer, Xpetra::INSERT);
1321  } else
1322  globalColsToZero = myColsToZero;
1323  UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::FindNonZeros(globalColsToZero->getLocalViewDevice(Xpetra::Access::ReadOnly), dirichletDomain);
1324  UtilitiesBase<Scalar, LocalOrdinal, GlobalOrdinal, Node>::FindNonZeros(myColsToZero->getLocalViewDevice(Xpetra::Access::ReadOnly), dirichletCols);
1325 }
1326 
1327 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1330  typedef Teuchos::ScalarTraits<Scalar> STS;
1332  typedef Teuchos::ScalarTraits<MT> MTS;
1334  for (LocalOrdinal row = 0; row < Teuchos::as<LocalOrdinal>(rowmap->getLocalNumElements()); ++row) {
1335  size_t nnz = A.getNumEntriesInLocalRow(row);
1338  A.getLocalRowView(row, indices, vals);
1339 
1340  Scalar rowsum = STS::zero();
1341  Scalar diagval = STS::zero();
1342 
1343  for (LocalOrdinal colID = 0; colID < Teuchos::as<LocalOrdinal>(nnz); colID++) {
1344  LocalOrdinal col = indices[colID];
1345  if (row == col)
1346  diagval = vals[colID];
1347  rowsum += vals[colID];
1348  }
1349  // printf("A(%d,:) row_sum(point) = %6.4e\n",row,rowsum);
1350  if (rowSumTol < MTS::one() && STS::magnitude(rowsum) > STS::magnitude(diagval) * rowSumTol) {
1351  // printf("Row %d triggers rowsum\n",(int)row);
1352  dirichletRows[row] = true;
1353  }
1354  }
1355 }
1356 
1357 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1360  typedef Teuchos::ScalarTraits<Scalar> STS;
1362  typedef Teuchos::ScalarTraits<MT> MTS;
1363  RCP<const Xpetra::Map<LocalOrdinal, GlobalOrdinal, Node>> rowmap = A.getRowMap();
1364 
1365  TEUCHOS_TEST_FOR_EXCEPTION(!A.getColMap()->isSameAs(*BlockNumber.getMap()), std::runtime_error, "ApplyRowSumCriterion: BlockNumber must match's A's column map.");
1366 
1367  Teuchos::ArrayRCP<const LocalOrdinal> block_id = BlockNumber.getData(0);
1368  for (LocalOrdinal row = 0; row < Teuchos::as<LocalOrdinal>(rowmap->getLocalNumElements()); ++row) {
1369  size_t nnz = A.getNumEntriesInLocalRow(row);
1370  ArrayView<const LocalOrdinal> indices;
1371  ArrayView<const Scalar> vals;
1372  A.getLocalRowView(row, indices, vals);
1373 
1374  Scalar rowsum = STS::zero();
1375  Scalar diagval = STS::zero();
1376  for (LocalOrdinal colID = 0; colID < Teuchos::as<LocalOrdinal>(nnz); colID++) {
1377  LocalOrdinal col = indices[colID];
1378  if (row == col)
1379  diagval = vals[colID];
1380  if (block_id[row] == block_id[col])
1381  rowsum += vals[colID];
1382  }
1383 
1384  // printf("A(%d,:) row_sum(block) = %6.4e\n",row,rowsum);
1385  if (rowSumTol < MTS::one() && STS::magnitude(rowsum) > STS::magnitude(diagval) * rowSumTol) {
1386  // printf("Row %d triggers rowsum\n",(int)row);
1387  dirichletRows[row] = true;
1388  }
1389  }
1390 }
1391 
1392 // Applies rowsum criterion
1393 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class memory_space>
1395  const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1396  Kokkos::View<bool*, memory_space>& dirichletRows) {
1397  typedef Teuchos::ScalarTraits<Scalar> STS;
1399 
1400  auto dirichletRowsHost = Kokkos::create_mirror_view(dirichletRows);
1401  Kokkos::deep_copy(dirichletRowsHost, dirichletRows);
1402 
1403  for (LocalOrdinal row = 0; row < Teuchos::as<LocalOrdinal>(rowmap->getLocalNumElements()); ++row) {
1404  size_t nnz = A.getNumEntriesInLocalRow(row);
1407  A.getLocalRowView(row, indices, vals);
1408 
1409  Scalar rowsum = STS::zero();
1410  Scalar diagval = STS::zero();
1411  for (LocalOrdinal colID = 0; colID < Teuchos::as<LocalOrdinal>(nnz); colID++) {
1412  LocalOrdinal col = indices[colID];
1413  if (row == col)
1414  diagval = vals[colID];
1415  rowsum += vals[colID];
1416  }
1417  if (STS::real(rowsum) > STS::magnitude(diagval) * rowSumTol)
1418  dirichletRowsHost(row) = true;
1419  }
1420 
1421  Kokkos::deep_copy(dirichletRows, dirichletRowsHost);
1422 }
1423 
1424 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1427  const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1428  Kokkos::View<bool*, typename Node::device_type::memory_space>& dirichletRows) {
1429  MueLu::ApplyRowSumCriterion<Scalar, LocalOrdinal, GlobalOrdinal, Node, typename Node::device_type::memory_space>(A, rowSumTol, dirichletRows);
1430 }
1431 
1432 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1435  const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1436  Kokkos::View<bool*, Kokkos::HostSpace>& dirichletRows) {
1437  MueLu::ApplyRowSumCriterion<Scalar, LocalOrdinal, GlobalOrdinal, Node, Kokkos::HostSpace>(A, rowSumTol, dirichletRows);
1438 }
1439 
1440 // Applies rowsum criterion
1441 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, class memory_space>
1444  const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1445  Kokkos::View<bool*, memory_space>& dirichletRows) {
1446  typedef Teuchos::ScalarTraits<Scalar> STS;
1448 
1449  TEUCHOS_TEST_FOR_EXCEPTION(!A.getColMap()->isSameAs(*BlockNumber.getMap()), std::runtime_error, "ApplyRowSumCriterion: BlockNumber must match's A's column map.");
1450 
1451  auto dirichletRowsHost = Kokkos::create_mirror_view(dirichletRows);
1452  Kokkos::deep_copy(dirichletRowsHost, dirichletRows);
1453 
1454  Teuchos::ArrayRCP<const LocalOrdinal> block_id = BlockNumber.getData(0);
1455  for (LocalOrdinal row = 0; row < Teuchos::as<LocalOrdinal>(rowmap->getLocalNumElements()); ++row) {
1456  size_t nnz = A.getNumEntriesInLocalRow(row);
1459  A.getLocalRowView(row, indices, vals);
1460 
1461  Scalar rowsum = STS::zero();
1462  Scalar diagval = STS::zero();
1463  for (LocalOrdinal colID = 0; colID < Teuchos::as<LocalOrdinal>(nnz); colID++) {
1464  LocalOrdinal col = indices[colID];
1465  if (row == col)
1466  diagval = vals[colID];
1467  if (block_id[row] == block_id[col])
1468  rowsum += vals[colID];
1469  }
1470  if (STS::real(rowsum) > STS::magnitude(diagval) * rowSumTol)
1471  dirichletRowsHost(row) = true;
1472  }
1473 
1474  Kokkos::deep_copy(dirichletRows, dirichletRowsHost);
1475 }
1476 
1477 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1481  const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1482  Kokkos::View<bool*, typename Node::device_type::memory_space>& dirichletRows) {
1483  MueLu::ApplyRowSumCriterion<Scalar, LocalOrdinal, GlobalOrdinal, Node, typename Node::device_type::memory_space>(A, BlockNumber, rowSumTol, dirichletRows);
1484 }
1485 
1486 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1490  const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
1491  Kokkos::View<bool*, Kokkos::HostSpace>& dirichletRows) {
1492  MueLu::ApplyRowSumCriterion<Scalar, LocalOrdinal, GlobalOrdinal, Node, Kokkos::HostSpace>(A, BlockNumber, rowSumTol, dirichletRows);
1493 }
1494 
1495 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1499  const Teuchos::ArrayRCP<const bool>& dirichletRows) {
1505  myColsToZero->putScalar(zero);
1506  // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0
1507  for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1508  if (dirichletRows[i]) {
1511  A.getLocalRowView(i, indices, values);
1512  for (size_t j = 0; j < static_cast<size_t>(indices.size()); j++)
1513  myColsToZero->replaceLocalValue(indices[j], 0, one);
1514  }
1515  }
1516 
1518  globalColsToZero->putScalar(zero);
1520  // export to domain map
1521  globalColsToZero->doExport(*myColsToZero, *exporter, Xpetra::ADD);
1522  // import to column map
1523  myColsToZero->doImport(*globalColsToZero, *exporter, Xpetra::INSERT);
1524  Teuchos::ArrayRCP<const Scalar> myCols = myColsToZero->getData(0);
1525  Teuchos::ArrayRCP<bool> dirichletCols(colMap->getLocalNumElements(), true);
1527  for (size_t i = 0; i < colMap->getLocalNumElements(); i++) {
1528  dirichletCols[i] = Teuchos::ScalarTraits<Scalar>::magnitude(myCols[i]) > 2.0 * eps;
1529  }
1530  return dirichletCols;
1531 }
1532 
1533 template <class SC, class LO, class GO, class NO>
1534 Kokkos::View<bool*, typename NO::device_type>
1537  const Kokkos::View<const bool*, typename NO::device_type>& dirichletRows) {
1538  using ATS = Kokkos::ArithTraits<SC>;
1539  using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
1540  using range_type = Kokkos::RangePolicy<LO, typename NO::execution_space>;
1541 
1542  SC zero = ATS::zero();
1543 
1544  auto localMatrix = A.getLocalMatrixDevice();
1545  LO numRows = A.getLocalNumRows();
1546 
1548  Teuchos::RCP<const Xpetra::Map<LO, GO, NO>> colMap = A.getColMap();
1550  myColsToZero->putScalar(zero);
1551  auto myColsToZeroView = myColsToZero->getLocalViewDevice(Xpetra::Access::ReadWrite);
1552  // Find all local column indices that are in Dirichlet rows, record in myColsToZero as 1.0
1553  Kokkos::parallel_for(
1554  "MueLu:Utils::DetectDirichletCols1", range_type(0, numRows),
1555  KOKKOS_LAMBDA(const LO row) {
1556  if (dirichletRows(row)) {
1557  auto rowView = localMatrix.row(row);
1558  auto length = rowView.length;
1559 
1560  for (decltype(length) colID = 0; colID < length; colID++)
1561  myColsToZeroView(rowView.colidx(colID), 0) = impl_ATS::one();
1562  }
1563  });
1564 
1566  globalColsToZero->putScalar(zero);
1568  // export to domain map
1569  globalColsToZero->doExport(*myColsToZero, *exporter, Xpetra::ADD);
1570  // import to column map
1571  myColsToZero->doImport(*globalColsToZero, *exporter, Xpetra::INSERT);
1572 
1573  auto myCols = myColsToZero->getLocalViewDevice(Xpetra::Access::ReadOnly);
1574  size_t numColEntries = colMap->getLocalNumElements();
1575  Kokkos::View<bool*, typename NO::device_type> dirichletCols(Kokkos::ViewAllocateWithoutInitializing("dirichletCols"), numColEntries);
1576  const typename ATS::magnitudeType eps = 2.0 * ATS::eps();
1577 
1578  Kokkos::parallel_for(
1579  "MueLu:Utils::DetectDirichletCols2", range_type(0, numColEntries),
1580  KOKKOS_LAMBDA(const size_t i) {
1581  dirichletCols(i) = impl_ATS::magnitude(myCols(i, 0)) > eps;
1582  });
1583  return dirichletCols;
1584 }
1585 
1586 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1587 Scalar
1590  // We check only row maps. Column may be different. One would hope that they are the same, as we typically
1591  // calculate frobenius norm of the specified sparsity pattern with an updated matrix from the previous step,
1592  // but matrix addition, even when one is submatrix of the other, changes column map (though change may be as
1593  // simple as couple of elements swapped)
1594  TEUCHOS_TEST_FOR_EXCEPTION(!A.getRowMap()->isSameAs(*B.getRowMap()), Exceptions::Incompatible, "MueLu::CGSolver::Frobenius: row maps are incompatible");
1595  TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete() || !B.isFillComplete(), Exceptions::RuntimeError, "Matrices must be fill completed");
1596 
1597  const Map& AColMap = *A.getColMap();
1598  const Map& BColMap = *B.getColMap();
1599 
1602  size_t nnzA = 0, nnzB = 0;
1603 
1604  // We use a simple algorithm
1605  // for each row we fill valBAll array with the values in the corresponding row of B
1606  // as such, it serves as both sorted array and as storage, so we don't need to do a
1607  // tricky problem: "find a value in the row of B corresponding to the specific GID"
1608  // Once we do that, we translate LID of entries of row of A to LID of B, and multiply
1609  // corresponding entries.
1610  // The algorithm should be reasonably cheap, as it does not sort anything, provided
1611  // that getLocalElement and getGlobalElement functions are reasonably effective. It
1612  // *is* possible that the costs are hidden in those functions, but if maps are close
1613  // to linear maps, we should be fine
1614  Teuchos::Array<Scalar> valBAll(BColMap.getLocalNumElements());
1615 
1617  Scalar zero = Teuchos::ScalarTraits<Scalar>::zero(), f = zero, gf;
1618  size_t numRows = A.getLocalNumRows();
1619  for (size_t i = 0; i < numRows; i++) {
1620  A.getLocalRowView(i, indA, valA);
1621  B.getLocalRowView(i, indB, valB);
1622  nnzA = indA.size();
1623  nnzB = indB.size();
1624 
1625  // Set up array values
1626  for (size_t j = 0; j < nnzB; j++)
1627  valBAll[indB[j]] = valB[j];
1628 
1629  for (size_t j = 0; j < nnzA; j++) {
1630  // The cost of the whole Frobenius dot product function depends on the
1631  // cost of the getLocalElement and getGlobalElement functions here.
1632  LocalOrdinal ind = BColMap.getLocalElement(AColMap.getGlobalElement(indA[j]));
1633  if (ind != invalid)
1634  f += valBAll[ind] * valA[j];
1635  }
1636 
1637  // Clean up array values
1638  for (size_t j = 0; j < nnzB; j++)
1639  valBAll[indB[j]] = zero;
1640  }
1641 
1642  MueLu_sumAll(AColMap.getComm(), f, gf);
1643 
1644  return gf;
1645 }
1646 
1647 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1650  // Distribute the seeds evenly in [1,maxint-1]. This guarantees nothing
1651  // about where in random number stream we are, but avoids overflow situations
1652  // in parallel when multiplying by a PID. It would be better to use
1653  // a good parallel random number generator.
1654  double one = 1.0;
1655  int maxint = INT_MAX; //= 2^31-1 = 2147483647 for 32-bit integers
1656  int mySeed = Teuchos::as<int>((maxint - 1) * (one - (comm.getRank() + 1) / (comm.getSize() + one)));
1657  if (mySeed < 1 || mySeed == maxint) {
1658  std::ostringstream errStr;
1659  errStr << "Error detected with random seed = " << mySeed << ". It should be in the interval [1,2^31-2].";
1660  throw Exceptions::RuntimeError(errStr.str());
1661  }
1662  std::srand(mySeed);
1663  // For Tpetra, we could use Kokkos' random number generator here.
1665  // Epetra
1666  // MultiVector::Random() -> Epetra_Util::RandomDouble() -> Epetra_Utils::RandomInt()
1667  // Its own random number generator, based on Seed_. Seed_ is initialized in Epetra_Util constructor with std::rand()
1668  // So our setting std::srand() affects that too
1669 }
1670 
1671 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1674  std::vector<LocalOrdinal>& dirichletRows, bool count_twos_as_dirichlet) {
1676  dirichletRows.resize(0);
1677  for (size_t i = 0; i < A->getLocalNumRows(); i++) {
1680  A->getLocalRowView(i, indices, values);
1681  int nnz = 0;
1682  for (size_t j = 0; j < (size_t)indices.size(); j++) {
1684  nnz++;
1685  }
1686  }
1687  if (nnz == 1 || (count_twos_as_dirichlet && nnz == 2)) {
1688  dirichletRows.push_back(i);
1689  }
1690  }
1691 }
1692 
1693 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1696  const std::vector<LocalOrdinal>& dirichletRows) {
1697  RCP<const Map> Rmap = A->getRowMap();
1698  RCP<const Map> Cmap = A->getColMap();
1701 
1702  for (size_t i = 0; i < dirichletRows.size(); i++) {
1703  GlobalOrdinal row_gid = Rmap->getGlobalElement(dirichletRows[i]);
1704 
1707  A->getLocalRowView(dirichletRows[i], indices, values);
1708  // NOTE: This won't work with fancy node types.
1709  Scalar* valuesNC = const_cast<Scalar*>(values.getRawPtr());
1710  for (size_t j = 0; j < (size_t)indices.size(); j++) {
1711  if (Cmap->getGlobalElement(indices[j]) == row_gid)
1712  valuesNC[j] = one;
1713  else
1714  valuesNC[j] = zero;
1715  }
1716  }
1717 }
1718 
1719 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1722  const Teuchos::ArrayRCP<const bool>& dirichletRows) {
1724  RCP<const Map> domMap = A->getDomainMap();
1725  RCP<const Map> ranMap = A->getRangeMap();
1726  RCP<const Map> Rmap = A->getRowMap();
1727  RCP<const Map> Cmap = A->getColMap();
1728  TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == Rmap->getLocalNumElements());
1731  A->resumeFill();
1732  for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1733  if (dirichletRows[i]) {
1734  GlobalOrdinal row_gid = Rmap->getGlobalElement(i);
1735 
1738  A->getLocalRowView(i, indices, values);
1739 
1740  Teuchos::ArrayRCP<Scalar> valuesNC(values.size());
1741  for (size_t j = 0; j < (size_t)indices.size(); j++) {
1742  if (Cmap->getGlobalElement(indices[j]) == row_gid)
1743  valuesNC[j] = one;
1744  else
1745  valuesNC[j] = zero;
1746  }
1747  A->replaceLocalValues(i, indices, valuesNC());
1748  }
1749  }
1750  A->fillComplete(domMap, ranMap);
1751 }
1752 
1753 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1756  const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows) {
1758  using ATS = Kokkos::ArithTraits<Scalar>;
1759  using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
1760  using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1761 
1766 
1767  TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == Rmap->getLocalNumElements());
1768 
1769  auto localMatrix = A->getLocalMatrixDevice();
1770  auto localRmap = Rmap->getLocalMap();
1771  auto localCmap = Cmap->getLocalMap();
1772 
1773  Kokkos::parallel_for(
1774  "MueLu::Utils::ApplyOAZ", range_type(0, dirichletRows.extent(0)),
1775  KOKKOS_LAMBDA(const LocalOrdinal row) {
1776  if (dirichletRows(row)) {
1777  auto rowView = localMatrix.row(row);
1778  auto length = rowView.length;
1779  auto row_gid = localRmap.getGlobalElement(row);
1780  auto row_lid = localCmap.getLocalElement(row_gid);
1781 
1782  for (decltype(length) colID = 0; colID < length; colID++)
1783  if (rowView.colidx(colID) == row_lid)
1784  rowView.value(colID) = impl_ATS::one();
1785  else
1786  rowView.value(colID) = impl_ATS::zero();
1787  }
1788  });
1789 }
1790 
1791 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1794  const std::vector<LocalOrdinal>& dirichletRows,
1795  Scalar replaceWith) {
1796  for (size_t i = 0; i < dirichletRows.size(); i++) {
1799  A->getLocalRowView(dirichletRows[i], indices, values);
1800  // NOTE: This won't work with fancy node types.
1801  Scalar* valuesNC = const_cast<Scalar*>(values.getRawPtr());
1802  for (size_t j = 0; j < (size_t)indices.size(); j++)
1803  valuesNC[j] = replaceWith;
1804  }
1805 }
1806 
1807 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1810  const Teuchos::ArrayRCP<const bool>& dirichletRows,
1811  Scalar replaceWith) {
1812  TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == A->getRowMap()->getLocalNumElements());
1813  for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1814  if (dirichletRows[i]) {
1817  A->getLocalRowView(i, indices, values);
1818  // NOTE: This won't work with fancy node types.
1819  Scalar* valuesNC = const_cast<Scalar*>(values.getRawPtr());
1820  for (size_t j = 0; j < (size_t)indices.size(); j++)
1821  valuesNC[j] = replaceWith;
1822  }
1823  }
1824 }
1825 
1826 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1829  const Teuchos::ArrayRCP<const bool>& dirichletRows,
1830  Scalar replaceWith) {
1831  TEUCHOS_ASSERT(static_cast<size_t>(dirichletRows.size()) == X->getMap()->getLocalNumElements());
1832  for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1833  if (dirichletRows[i]) {
1834  for (size_t j = 0; j < X->getNumVectors(); j++)
1835  X->replaceLocalValue(i, j, replaceWith);
1836  }
1837  }
1838 }
1839 
1840 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1843  const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows,
1844  Scalar replaceWith) {
1845  using ATS = Kokkos::ArithTraits<Scalar>;
1846  using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1847 
1848  typename ATS::val_type impl_replaceWith = replaceWith;
1849 
1850  auto localMatrix = A->getLocalMatrixDevice();
1851  LocalOrdinal numRows = A->getLocalNumRows();
1852 
1853  Kokkos::parallel_for(
1854  "MueLu:Utils::ZeroDirichletRows", range_type(0, numRows),
1855  KOKKOS_LAMBDA(const LocalOrdinal row) {
1856  if (dirichletRows(row)) {
1857  auto rowView = localMatrix.row(row);
1858  auto length = rowView.length;
1859  for (decltype(length) colID = 0; colID < length; colID++)
1860  rowView.value(colID) = impl_replaceWith;
1861  }
1862  });
1863 }
1864 
1865 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1868  const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows,
1869  Scalar replaceWith) {
1870  using ATS = Kokkos::ArithTraits<Scalar>;
1871  using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1872 
1873  typename ATS::val_type impl_replaceWith = replaceWith;
1874 
1875  auto myCols = X->getLocalViewDevice(Xpetra::Access::ReadWrite);
1876  size_t numVecs = X->getNumVectors();
1877  Kokkos::parallel_for(
1878  "MueLu:Utils::ZeroDirichletRows_MV", range_type(0, dirichletRows.size()),
1879  KOKKOS_LAMBDA(const size_t i) {
1880  if (dirichletRows(i)) {
1881  for (size_t j = 0; j < numVecs; j++)
1882  myCols(i, j) = impl_replaceWith;
1883  }
1884  });
1885 }
1886 
1887 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1890  const Teuchos::ArrayRCP<const bool>& dirichletCols,
1891  Scalar replaceWith) {
1892  TEUCHOS_ASSERT(static_cast<size_t>(dirichletCols.size()) == A->getColMap()->getLocalNumElements());
1893  for (size_t i = 0; i < A->getLocalNumRows(); i++) {
1896  A->getLocalRowView(i, indices, values);
1897  // NOTE: This won't work with fancy node types.
1898  Scalar* valuesNC = const_cast<Scalar*>(values.getRawPtr());
1899  for (size_t j = 0; j < static_cast<size_t>(indices.size()); j++)
1900  if (dirichletCols[indices[j]])
1901  valuesNC[j] = replaceWith;
1902  }
1903 }
1904 
1905 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1908  const Kokkos::View<const bool*, typename Node::device_type>& dirichletCols,
1909  Scalar replaceWith) {
1910  using ATS = Kokkos::ArithTraits<Scalar>;
1911  using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1912 
1913  typename ATS::val_type impl_replaceWith = replaceWith;
1914 
1915  auto localMatrix = A->getLocalMatrixDevice();
1916  LocalOrdinal numRows = A->getLocalNumRows();
1917 
1918  Kokkos::parallel_for(
1919  "MueLu:Utils::ZeroDirichletCols", range_type(0, numRows),
1920  KOKKOS_LAMBDA(const LocalOrdinal row) {
1921  auto rowView = localMatrix.row(row);
1922  auto length = rowView.length;
1923  for (decltype(length) colID = 0; colID < length; colID++)
1924  if (dirichletCols(rowView.colidx(colID))) {
1925  rowView.value(colID) = impl_replaceWith;
1926  }
1927  });
1928 }
1929 
1930 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1935  // Make sure A's RowMap == DomainMap
1936  if (!A->getRowMap()->isSameAs(*A->getDomainMap())) {
1937  throw std::runtime_error("UtilitiesBase::FindDirichletRowsAndPropagateToCols row and domain maps must match.");
1938  }
1940  bool has_import = !importer.is_null();
1941 
1942  // Find the Dirichlet rows
1943  std::vector<LocalOrdinal> dirichletRows;
1944  FindDirichletRows(A, dirichletRows);
1945 
1946 #if 0
1947  printf("[%d] DirichletRow Ids = ",A->getRowMap()->getComm()->getRank());
1948  for(size_t i=0; i<(size_t) dirichletRows.size(); i++)
1949  printf("%d ",dirichletRows[i]);
1950  printf("\n");
1951  fflush(stdout);
1952 #endif
1953  // Allocate all as non-Dirichlet
1954  isDirichletRow = Xpetra::VectorFactory<int, LocalOrdinal, GlobalOrdinal, Node>::Build(A->getRowMap(), true);
1955  isDirichletCol = Xpetra::VectorFactory<int, LocalOrdinal, GlobalOrdinal, Node>::Build(A->getColMap(), true);
1956 
1957  {
1958  Teuchos::ArrayRCP<int> dr_rcp = isDirichletRow->getDataNonConst(0);
1959  Teuchos::ArrayView<int> dr = dr_rcp();
1960  Teuchos::ArrayRCP<int> dc_rcp = isDirichletCol->getDataNonConst(0);
1961  Teuchos::ArrayView<int> dc = dc_rcp();
1962  for (size_t i = 0; i < (size_t)dirichletRows.size(); i++) {
1963  dr[dirichletRows[i]] = 1;
1964  if (!has_import) dc[dirichletRows[i]] = 1;
1965  }
1966  }
1967 
1968  if (has_import)
1969  isDirichletCol->doImport(*isDirichletRow, *importer, Xpetra::CombineMode::ADD);
1970 }
1971 
1972 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1976  using ISC = typename Kokkos::ArithTraits<Scalar>::val_type;
1977  using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
1978  using local_matrix_type = typename CrsMatrix::local_matrix_type;
1979  using values_type = typename local_matrix_type::values_type;
1980 
1981  const ISC ONE = Kokkos::ArithTraits<ISC>::one();
1982  const ISC ZERO = Kokkos::ArithTraits<ISC>::zero();
1983 
1984  // Copy the values array of the old matrix to a new array, replacing all the non-zeros with one
1985  auto localMatrix = original->getLocalMatrixDevice();
1986  TEUCHOS_TEST_FOR_EXCEPTION(!original->hasCrsGraph(), Exceptions::RuntimeError, "ReplaceNonZerosWithOnes: Cannot get CrsGraph");
1987  values_type new_values("values", localMatrix.nnz());
1988 
1989  Kokkos::parallel_for(
1990  "ReplaceNonZerosWithOnes", range_type(0, localMatrix.nnz()), KOKKOS_LAMBDA(const size_t i) {
1991  if (localMatrix.values(i) != ZERO)
1992  new_values(i) = ONE;
1993  else
1994  new_values(i) = ZERO;
1995  });
1996 
1997  // Build the new matrix
1998  RCP<Matrix> NewMatrix = Xpetra::MatrixFactory<SC, LO, GO, NO>::Build(original->getCrsGraph(), new_values);
1999  TEUCHOS_TEST_FOR_EXCEPTION(NewMatrix.is_null(), Exceptions::RuntimeError, "ReplaceNonZerosWithOnes: MatrixFactory::Build() did not return matrix");
2000  NewMatrix->fillComplete(original->getDomainMap(), original->getRangeMap());
2001  return NewMatrix;
2002 }
2003 
2004 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2010  Xpetra::UnderlyingLib lib = sourceBlockedMap.lib();
2011 
2012  // De-stride the map if we have to (might regret this later)
2013  RCP<const Map> fullMap = sourceBlockedMap.getMap();
2014  RCP<const Map> stridedMap = Teuchos::rcp_dynamic_cast<const Xpetra::StridedMap<LocalOrdinal, GlobalOrdinal, Node>>(fullMap);
2015  if (!stridedMap.is_null()) fullMap = stridedMap->getMap();
2016 
2017  // Initial sanity checking for map compatibil
2018  const size_t numSubMaps = sourceBlockedMap.getNumMaps();
2019  if (!Importer.getSourceMap()->isCompatible(*fullMap))
2020  throw std::runtime_error("GenerateBlockedTargetMap(): Map compatibility error");
2021 
2022  // Build an indicator vector
2024 
2025  for (size_t i = 0; i < numSubMaps; i++) {
2026  RCP<const Map> map = sourceBlockedMap.getMap(i);
2027 
2028  for (size_t j = 0; j < map->getLocalNumElements(); j++) {
2029  LocalOrdinal jj = fullMap->getLocalElement(map->getGlobalElement(j));
2030  block_ids->replaceLocalValue(jj, (int)i);
2031  }
2032  }
2033 
2034  // Get the block ids for the new map
2035  RCP<const Map> targetMap = Importer.getTargetMap();
2037  new_block_ids->doImport(*block_ids, Importer, Xpetra::CombineMode::ADD);
2038  Teuchos::ArrayRCP<const int> dataRCP = new_block_ids->getData(0);
2039  Teuchos::ArrayView<const int> data = dataRCP();
2040 
2041  // Get the GIDs for each subblock
2042  Teuchos::Array<Teuchos::Array<GlobalOrdinal>> elementsInSubMap(numSubMaps);
2043  for (size_t i = 0; i < targetMap->getLocalNumElements(); i++) {
2044  elementsInSubMap[data[i]].push_back(targetMap->getGlobalElement(i));
2045  }
2046 
2047  // Generate the new submaps
2048  std::vector<RCP<const Map>> subMaps(numSubMaps);
2049  for (size_t i = 0; i < numSubMaps; i++) {
2050  subMaps[i] = Xpetra::MapFactory<LocalOrdinal, GlobalOrdinal, Node>::Build(lib, Teuchos::OrdinalTraits<GlobalOrdinal>::invalid(), elementsInSubMap[i](), targetMap->getIndexBase(), targetMap->getComm());
2051  }
2052 
2053  // Build the BlockedMap
2054  return rcp(new BlockedMap(targetMap, subMaps));
2055 }
2056 
2057 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2060  if ((rowMap.lib() == Xpetra::UseTpetra) && (colMap.lib() == Xpetra::UseTpetra)) {
2061  auto tpRowMap = toTpetra(rowMap);
2062  auto tpColMap = toTpetra(colMap);
2063  return tpColMap.isLocallyFitted(tpRowMap);
2064  }
2065 
2066  ArrayView<const GlobalOrdinal> rowElements = rowMap.getLocalElementList();
2067  ArrayView<const GlobalOrdinal> colElements = colMap.getLocalElementList();
2068 
2069  const size_t numElements = rowElements.size();
2070 
2071  if (size_t(colElements.size()) < numElements)
2072  return false;
2073 
2074  bool goodMap = true;
2075  for (size_t i = 0; i < numElements; i++)
2076  if (rowElements[i] != colElements[i]) {
2077  goodMap = false;
2078  break;
2079  }
2080 
2081  return goodMap;
2082 }
2083 
2084 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2089  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
2090  using lno_nnz_view_t = typename local_graph_type::entries_type::non_const_type;
2091  using device = typename local_graph_type::device_type;
2092  using execution_space = typename local_matrix_type::execution_space;
2093  using ordinal_type = typename local_matrix_type::ordinal_type;
2094 
2095  local_graph_type localGraph = Op.getLocalMatrixDevice().graph;
2096 
2097  lno_nnz_view_t rcmOrder = KokkosGraph::Experimental::graph_rcm<device, typename local_graph_type::row_map_type, typename local_graph_type::entries_type, lno_nnz_view_t>(localGraph.row_map, localGraph.entries);
2098 
2101 
2102  // Copy out and reorder data
2103  auto view1D = Kokkos::subview(retval->getLocalViewDevice(Xpetra::Access::ReadWrite), Kokkos::ALL(), 0);
2104  Kokkos::parallel_for(
2105  "Utilities::ReverseCuthillMcKee",
2106  Kokkos::RangePolicy<ordinal_type, execution_space>(0, localGraph.numRows()),
2107  KOKKOS_LAMBDA(const ordinal_type rowIdx) {
2108  view1D(rcmOrder(rowIdx)) = rowIdx;
2109  });
2110  return retval;
2111 }
2112 
2113 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2118  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
2119  using lno_nnz_view_t = typename local_graph_type::entries_type::non_const_type;
2120  using device = typename local_graph_type::device_type;
2121  using execution_space = typename local_matrix_type::execution_space;
2122  using ordinal_type = typename local_matrix_type::ordinal_type;
2123 
2124  local_graph_type localGraph = Op.getLocalMatrixDevice().graph;
2125  LocalOrdinal numRows = localGraph.numRows();
2126 
2127  lno_nnz_view_t rcmOrder = KokkosGraph::Experimental::graph_rcm<device, typename local_graph_type::row_map_type, typename local_graph_type::entries_type, lno_nnz_view_t>(localGraph.row_map, localGraph.entries);
2128 
2131 
2132  // Copy out data
2133  auto view1D = Kokkos::subview(retval->getLocalViewDevice(Xpetra::Access::ReadWrite), Kokkos::ALL(), 0);
2134  // Since KokkosKernels produced RCM, also reverse the order of the view to get CM
2135  Kokkos::parallel_for(
2136  "Utilities::ReverseCuthillMcKee",
2137  Kokkos::RangePolicy<ordinal_type, execution_space>(0, numRows),
2138  KOKKOS_LAMBDA(const ordinal_type rowIdx) {
2139  view1D(rcmOrder(numRows - 1 - rowIdx)) = rowIdx;
2140  });
2141  return retval;
2142 }
2143 
2144 } // namespace MueLu
2145 
2146 #define MUELU_UTILITIESBASE_SHORT
2147 #endif // MUELU_UTILITIESBASE_DEF_HPP
2148 
2149 // LocalWords: LocalOrdinal
static void FindNonZeros(const Teuchos::ArrayRCP< const Scalar > vals, Teuchos::ArrayRCP< bool > nonzeros)
Find non-zero values in an ArrayRCP Compares the value to 2 * machine epsilon.
static RCP< Export< LocalOrdinal, GlobalOrdinal, Node > > Build(const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &source, const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &target)
static RCP< Vector > GetMatrixDiagonal(const Matrix &A)
Extract Matrix Diagonal.
static void ApplyOAZToMatrixRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node >> &A, const std::vector< LocalOrdinal > &dirichletRows)
static Teuchos::RCP< Vector > GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Return vector containing: max_{i=k}(-a_ik), for each for i in the matrix.
static void ZeroDirichletCols(Teuchos::RCP< Matrix > &A, const Teuchos::ArrayRCP< const bool > &dirichletCols, Scalar replaceWith=Teuchos::ScalarTraits< Scalar >::zero())
static void DetectDirichletColsAndDomains(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Teuchos::ArrayRCP< bool > &dirichletRows, Teuchos::ArrayRCP< bool > dirichletCols, Teuchos::ArrayRCP< bool > dirichletDomain)
Detects Dirichlet columns &amp; domains from a list of Dirichlet rows.
#define MueLu_sumAll(rcpComm, in, out)
Array< T > & append(const T &x)
virtual int getSize() const =0
MueLu::DefaultLocalOrdinal LocalOrdinal
void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType rowSumTol, Kokkos::View< bool *, memory_space > &dirichletRows)
virtual LO getBlockSize() const override
static bool MapsAreNested(const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &rowMap, const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &colMap)
static RCP< Xpetra::Vector< LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node > > ReverseCuthillMcKee(const Matrix &Op)
static void FindDirichletRowsAndPropagateToCols(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node >> &A, Teuchos::RCP< Xpetra::Vector< int, LocalOrdinal, GlobalOrdinal, Node >> &isDirichletRow, Teuchos::RCP< Xpetra::Vector< int, LocalOrdinal, GlobalOrdinal, Node >> &isDirichletCol)
virtual int getRank() const =0
static void ApplyRowSumCriterionHost(const Matrix &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType rowSumTol, Kokkos::View< bool *, Kokkos::HostSpace > &dirichletRows)
static Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Build(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node >> &map, size_t NumVectors, bool zeroOut=true)
static magnitudeType eps()
Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > removeSmallEntries(Teuchos::RCP< Xpetra::CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node >> &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType threshold, const bool keepDiagonal)
static Scalar PowerMethod(const Matrix &A, bool scaleByDiag=true, LocalOrdinal niters=10, Magnitude tolerance=1e-2, bool verbose=false, unsigned int seed=123)
Power method.
static void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude rowSumTol, Teuchos::ArrayRCP< bool > &dirichletRows)
Apply Rowsum Criterion.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
static RCP< Vector > GetMatrixOverlappedDeletedRowsum(const Matrix &A)
Extract Overlapped Matrix Deleted Rowsum.
virtual const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
static RCP< Xpetra::CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > GetThresholdedGraph(const RCP< Matrix > &A, const Magnitude threshold, const GlobalOrdinal expectedNNZperRow=-1)
Threshold a graph.
size_type size() const
virtual void resumeFill(const RCP< ParameterList > &params=null)=0
LocalOrdinal LO
static void ZeroDirichletRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node >> &A, const std::vector< LocalOrdinal > &dirichletRows, Scalar replaceWith=Teuchos::ScalarTraits< Scalar >::zero())
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > ReplaceNonZerosWithOnes(const RCP< Matrix > &original)
Creates a copy of a matrix where the non-zero entries are replaced by ones.
size_t getLocalNumRows() const override
size_type size() const
Exception throws to report incompatible objects (like maps).
static void SetRandomSeed(const Teuchos::Comm< int > &comm)
Set seed for random number generator.
MueLu::DefaultNode Node
static RCP< Xpetra::Vector< LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node > > CuthillMcKee(const Matrix &Op)
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getTargetMap() const =0
static RCP< Time > getNewTimer(const std::string &name)
void resize(const size_type n, const T &val=T())
virtual void fillComplete(const RCP< const Map > &domainMap, const RCP< const Map > &rangeMap, const RCP< ParameterList > &params=null)=0
virtual void getLocalDiagCopy(Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &diag) const =0
static RCP< MultiVector > Residual(const Xpetra::Operator< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, const MultiVector &X, const MultiVector &RHS)
static Teuchos::ArrayRCP< const bool > DetectDirichletCols(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Teuchos::ArrayRCP< const bool > &dirichletRows)
Detect Dirichlet columns based on Dirichlet rows.
virtual void getLocalRowView(LocalOrdinal LocalRow, ArrayView< const LocalOrdinal > &indices, ArrayView< const Scalar > &values) const =0
static void FindDirichletRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node >> &A, std::vector< LocalOrdinal > &dirichletRows, bool count_twos_as_dirichlet=false)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultScalar Scalar
Kokkos::View< bool *, memory_space > DetectDirichletRows_kokkos(const Xpetra::Matrix< SC, LO, GO, NO > &A, const typename Teuchos::ScalarTraits< SC >::magnitudeType &tol, const bool count_twos_as_dirichlet)
MueLu::DefaultGlobalOrdinal GlobalOrdinal
static RCP< CrsMatrixWrap > GetThresholdedMatrix(const RCP< Matrix > &Ain, const Magnitude threshold, const bool keepDiagonal=true, const GlobalOrdinal expectedNNZperRow=-1)
Threshold a matrix.
static Teuchos::RCP< Map< LocalOrdinal, GlobalOrdinal, Node > > Build(UnderlyingLib lib, global_size_t numGlobalElements, GlobalOrdinal indexBase, const Teuchos::RCP< const Teuchos::Comm< int >> &comm, LocalGlobal lg=Xpetra::GloballyDistributed)
#define I(i, j)
static Teuchos::ArrayRCP< const bool > DetectDirichletRows(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude &tol=Teuchos::ScalarTraits< Magnitude >::zero(), bool count_twos_as_dirichlet=false)
Detect Dirichlet rows.
virtual bool isFillComplete() const =0
static RCP< Xpetra::Vector< Magnitude, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixOverlappedAbsDeletedRowsum(const Matrix &A)
size_t getNumMaps() const
virtual void replaceLocalValues(LocalOrdinal localRow, const ArrayView< const LocalOrdinal > &cols, const ArrayView< const Scalar > &vals)=0
static Teuchos::ArrayRCP< Scalar > GetMatrixDiagonal_arcp(const Matrix &A)
Extract Matrix Diagonal.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getMap() const =0
static RCP< const Xpetra::BlockedMap< LocalOrdinal, GlobalOrdinal, Node > > GeneratedBlockedTargetMap(const Xpetra::BlockedMap< LocalOrdinal, GlobalOrdinal, Node > &sourceBlockedMap, const Xpetra::Import< LocalOrdinal, GlobalOrdinal, Node > &Importer)
static RCP< Vector > GetMatrixOverlappedDiagonal(const Matrix &A)
Extract Overlapped Matrix Diagonal.
T * getRawPtr() const
Teuchos::ScalarTraits< Scalar >::magnitudeType Magnitude
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
static RCP< Vector > GetMatrixDiagonalInverse(const Matrix &A, Magnitude tol=Teuchos::ScalarTraits< Scalar >::eps()*100, Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero(), const bool doLumped=false)
Extract Matrix Diagonal.
static void seedrandom(unsigned int s)
static Teuchos::ArrayRCP< const bool > DetectDirichletRowsExt(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, bool &bHasZeroDiagonal, const Magnitude &tol=Teuchos::ScalarTraits< Scalar >::zero())
Detect Dirichlet rows (extended version)
static RCP< Vector > Build(const Teuchos::RCP< const Map > &map, bool zeroOut=true)
const RCP< const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > getMap(size_t i, bool bThyraMode=false) const
virtual UnderlyingLib lib() const
static magnitudeType magnitude(T a)
TransListIter iter
RCP< const Tpetra::CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > toTpetra(const RCP< const CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > &graph)
void push_back(const value_type &x)
virtual RCP< const CrsGraph > getCrsGraph() const =0
static Teuchos::Array< Magnitude > ResidualNorm(const Xpetra::Operator< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, const MultiVector &X, const MultiVector &RHS)
static void EnforceInitialCondition(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &RHS, Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &InitialGuess, const Magnitude &tol=Teuchos::ScalarTraits< Magnitude >::zero(), const bool count_twos_as_dirichlet=false)
Detect Dirichlet rows and copy values from RHS multivector to InitialGuess for Dirichlet rows...
int length() const
size_type size() const
Scalar SC
impl_scalar_type_dualview::t_dev::const_type getValuesDevice(const LO &lclRow) const
virtual Teuchos::RCP< const Map > getRangeMap() const =0
static RCP< Teuchos::FancyOStream > MakeFancy(std::ostream &os)
static RCP< Matrix > Build(const RCP< const Map > &rowMap, size_t maxNumEntriesPerRow, Xpetra::ProfileType pftype=Xpetra::DynamicProfile)
void residual(const Operator< SC, LO, GO, NO > &Aop, const MultiVector< SC, LO, GO, NO > &X_in, const MultiVector< SC, LO, GO, NO > &B_in, MultiVector< SC, LO, GO, NO > &R_in)
Exception throws to report errors in the internal logical of the program.
static Scalar Frobenius(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B)
Frobenius inner product of two matrices.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getSourceMap() const =0
#define TEUCHOS_ASSERT(assertion_test)
#define TEUCHOS_ASSERT_EQUALITY(val1, val2)
static Teuchos::RCP< Vector > GetInverse(Teuchos::RCP< const Vector > v, Magnitude tol=Teuchos::ScalarTraits< Scalar >::eps()*100, Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero())
Return vector containing inverse of input vector.
static Kokkos::View< bool *, typename Kokkos::HostSpace > DetectDirichletRows_kokkos_host(const Matrix &A, const Magnitude &tol=Teuchos::ScalarTraits< typename Teuchos::ScalarTraits< SC >::magnitudeType >::zero(), const bool count_twos_as_dirichlet=false)
virtual size_t getNumVectors() const =0
static Teuchos::ScalarTraits< Scalar >::magnitudeType Distance2(const Teuchos::Array< Teuchos::ArrayRCP< const Scalar >> &v, LocalOrdinal i0, LocalOrdinal i1)
Squared distance between two rows in a multivector.
static Teuchos::RCP< Vector > GetLumpedMatrixDiagonal(Matrix const &A, const bool doReciprocal=false, Magnitude tol=Teuchos::ScalarTraits< Scalar >::magnitude(Teuchos::ScalarTraits< Scalar >::zero()), Scalar valReplacement=Teuchos::ScalarTraits< Scalar >::zero(), const bool replaceSingleEntryRowWithZero=false, const bool useAverageAbsDiagVal=false)
Extract Matrix Diagonal of lumped matrix.
virtual UnderlyingLib lib() const =0
static Kokkos::View< bool *, typename NO::device_type::memory_space > DetectDirichletRows_kokkos(const Matrix &A, const Magnitude &tol=Teuchos::ScalarTraits< typename Teuchos::ScalarTraits< SC >::magnitudeType >::zero(), const bool count_twos_as_dirichlet=false)
Detect Dirichlet rows.
static RCP< Matrix > Crs2Op(RCP< CrsMatrix > Op)
virtual Teuchos::ArrayRCP< const Scalar > getData(size_t j) const =0
virtual Teuchos::RCP< const Map > getDomainMap() const =0
static RCP< Import< LocalOrdinal, GlobalOrdinal, Node > > Build(const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &source, const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &target, const Teuchos::RCP< Teuchos::ParameterList > &plist=Teuchos::null)
bool is_null() const