MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_NotayAggregationFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_
47 #define MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_
48 
49 #include <Xpetra_Map.hpp>
50 #include <Xpetra_Vector.hpp>
51 #include <Xpetra_MultiVectorFactory.hpp>
52 #include <Xpetra_MapFactory.hpp>
53 #include <Xpetra_VectorFactory.hpp>
54 
55 #include "KokkosKernels_Handle.hpp"
56 #include "KokkosSparse_spgemm.hpp"
57 #include "KokkosSparse_spmv.hpp"
58 
60 
61 #include "MueLu_Aggregates.hpp"
62 #include "MueLu_LWGraph.hpp"
63 #include "MueLu_Level.hpp"
64 #include "MueLu_MasterList.hpp"
65 #include "MueLu_Monitor.hpp"
66 #include "MueLu_Types.hpp"
67 #include "MueLu_Utilities.hpp"
68 
69 namespace MueLu {
70 
71 namespace NotayUtils {
72 template <class LocalOrdinal>
74  return min + as<LocalOrdinal>((max - min + 1) * (static_cast<double>(std::rand()) / (RAND_MAX + 1.0)));
75 }
76 
77 template <class LocalOrdinal>
79  typedef LocalOrdinal LO;
80  LO n = Teuchos::as<LO>(list.size());
81  for (LO i = 0; i < n - 1; i++)
82  std::swap(list[i], list[RandomOrdinal(i, n - 1)]);
83 }
84 } // namespace NotayUtils
85 
86 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
88  RCP<ParameterList> validParamList = rcp(new ParameterList());
89 
90 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
91  SET_VALID_ENTRY("aggregation: pairwise: size");
92  SET_VALID_ENTRY("aggregation: pairwise: tie threshold");
93  SET_VALID_ENTRY("aggregation: compute aggregate qualities");
94  SET_VALID_ENTRY("aggregation: Dirichlet threshold");
95  SET_VALID_ENTRY("aggregation: ordering");
96 #undef SET_VALID_ENTRY
97 
98  // general variables needed in AggregationFactory
99  validParamList->set<RCP<const FactoryBase> >("A", null, "Generating factory of the matrix");
100  validParamList->set<RCP<const FactoryBase> >("Graph", null, "Generating factory of the graph");
101  validParamList->set<RCP<const FactoryBase> >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
102  validParamList->set<RCP<const FactoryBase> >("AggregateQualities", null, "Generating factory for variable \'AggregateQualities\'");
103 
104  return validParamList;
105 }
106 
107 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
109  const ParameterList& pL = GetParameterList();
110 
111  Input(currentLevel, "A");
112  Input(currentLevel, "Graph");
113  Input(currentLevel, "DofsPerNode");
114  if (pL.get<bool>("aggregation: compute aggregate qualities")) {
115  Input(currentLevel, "AggregateQualities");
116  }
117 }
118 
119 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
121  FactoryMonitor m(*this, "Build", currentLevel);
122  using STS = Teuchos::ScalarTraits<Scalar>;
123  using MT = typename STS::magnitudeType;
124 
126 
128  if (const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
129  out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
130  out->setShowAllFrontMatter(false).setShowProcRank(true);
131  } else {
132  out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
133  }
134 
135  const ParameterList& pL = GetParameterList();
136 
137  const MT kappa = static_cast<MT>(pL.get<double>("aggregation: Dirichlet threshold"));
138  TEUCHOS_TEST_FOR_EXCEPTION(kappa <= MT_TWO,
140  "Pairwise requires kappa > 2"
141  " otherwise all rows are considered as Dirichlet rows.");
142 
143  // Parameters
144  int maxNumIter = 3;
145  if (pL.isParameter("aggregation: pairwise: size"))
146  maxNumIter = pL.get<int>("aggregation: pairwise: size");
147  TEUCHOS_TEST_FOR_EXCEPTION(maxNumIter < 1,
149  "NotayAggregationFactory::Build(): \"aggregation: pairwise: size\""
150  " must be a strictly positive integer");
151 
152  RCP<const LWGraph> graph = Get<RCP<LWGraph> >(currentLevel, "Graph");
153  RCP<const Matrix> A = Get<RCP<Matrix> >(currentLevel, "A");
154 
155  // Setup aggregates & aggStat objects
156  RCP<Aggregates> aggregates = rcp(new Aggregates(*graph));
157  aggregates->setObjectLabel("PW");
158 
159  const LO numRows = graph->GetNodeNumVertices();
160 
161  // construct aggStat information
162  std::vector<unsigned> aggStat(numRows, READY);
163 
164  const int DofsPerNode = Get<int>(currentLevel, "DofsPerNode");
166  "Pairwise only supports one dof per node");
167 
168  // This follows the paper:
169  // Notay, "Aggregation-based algebraic multigrid for convection-diffusion equations",
170  // SISC 34(3), pp. A2288-2316.
171 
172  // Handle Ordering
173  std::string orderingStr = pL.get<std::string>("aggregation: ordering");
174  enum {
175  O_NATURAL,
176  O_RANDOM,
177  O_CUTHILL_MCKEE,
178  } ordering;
179 
180  ordering = O_NATURAL;
181  if (orderingStr == "random")
182  ordering = O_RANDOM;
183  else if (orderingStr == "natural") {
184  } else if (orderingStr == "cuthill-mckee" || orderingStr == "cm")
185  ordering = O_CUTHILL_MCKEE;
186  else {
187  TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError, "Invalid ordering type");
188  }
189 
190  // Get an ordering vector
191  // NOTE: The orderingVector only orders *rows* of the matrix. Off-proc columns
192  // will get ignored in the aggregation phases, so we don't need to worry about
193  // running off the end.
194  Array<LO> orderingVector(numRows);
195  for (LO i = 0; i < numRows; i++)
196  orderingVector[i] = i;
197  if (ordering == O_RANDOM)
198  MueLu::NotayUtils::RandomReorder(orderingVector);
199  else if (ordering == O_CUTHILL_MCKEE) {
201  auto localVector = rcmVector->getData(0);
202  for (LO i = 0; i < numRows; i++)
203  orderingVector[i] = localVector[i];
204  }
205 
206  // Get the party stated
207  LO numNonAggregatedNodes = numRows, numDirichletNodes = 0;
208  BuildInitialAggregates(pL, A, orderingVector(), kappa,
209  *aggregates, aggStat, numNonAggregatedNodes, numDirichletNodes);
210  TEUCHOS_TEST_FOR_EXCEPTION(0 < numNonAggregatedNodes, Exceptions::RuntimeError,
211  "Initial pairwise aggregation failed to aggregate all nodes");
212  LO numLocalAggregates = aggregates->GetNumAggregates();
213  GetOStream(Statistics0) << "Init : " << numLocalAggregates << " - "
214  << A->getLocalNumRows() / numLocalAggregates << std::endl;
215 
216  // Temporary data storage for further aggregation steps
217  local_matrix_type intermediateP;
218  local_matrix_type coarseLocalA;
219 
220  // Compute the on rank part of the local matrix
221  // that the square submatrix that only contains
222  // columns corresponding to local rows.
223  LO numLocalDirichletNodes = numDirichletNodes;
224  Array<LO> localVertex2AggId(aggregates->GetVertex2AggId()->getData(0).view(0, numRows));
225  BuildOnRankLocalMatrix(A->getLocalMatrixDevice(), coarseLocalA);
226  for (LO aggregationIter = 1; aggregationIter < maxNumIter; ++aggregationIter) {
227  // Compute the intermediate prolongator
228  BuildIntermediateProlongator(coarseLocalA.numRows(), numLocalDirichletNodes, numLocalAggregates,
229  localVertex2AggId(), intermediateP);
230 
231  // Compute the coarse local matrix and coarse row sum
232  BuildCoarseLocalMatrix(intermediateP, coarseLocalA);
233 
234  // Directly compute rowsum from A, rather than coarseA
235  row_sum_type rowSum("rowSum", numLocalAggregates);
236  {
237  std::vector<std::vector<LO> > agg2vertex(numLocalAggregates);
238  auto vertex2AggId = aggregates->GetVertex2AggId()->getData(0);
239  for (LO i = 0; i < (LO)numRows; i++) {
240  if (aggStat[i] != AGGREGATED)
241  continue;
242  LO agg = vertex2AggId[i];
243  agg2vertex[agg].push_back(i);
244  }
245 
246  typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum);
247  for (LO i = 0; i < numRows; i++) {
248  // If not aggregated already, skip this guy
249  if (aggStat[i] != AGGREGATED)
250  continue;
251  int agg = vertex2AggId[i];
252  std::vector<LO>& myagg = agg2vertex[agg];
253 
254  size_t nnz = A->getNumEntriesInLocalRow(i);
255  ArrayView<const LO> indices;
256  ArrayView<const SC> vals;
257  A->getLocalRowView(i, indices, vals);
258 
260  for (LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
261  bool found = false;
262  if (indices[colidx] < numRows) {
263  for (LO j = 0; j < (LO)myagg.size(); j++)
264  if (vertex2AggId[indices[colidx]] == agg)
265  found = true;
266  }
267  if (!found) {
268  *out << "- ADDING col " << indices[colidx] << " = " << vals[colidx] << std::endl;
269  mysum += vals[colidx];
270  } else {
271  *out << "- NOT ADDING col " << indices[colidx] << " = " << vals[colidx] << std::endl;
272  }
273  }
274 
275  rowSum_h[agg] = mysum;
276  }
277  Kokkos::deep_copy(rowSum, rowSum_h);
278  }
279 
280  // Get local orderingVector
281  Array<LO> localOrderingVector(numRows);
282  for (LO i = 0; i < numRows; i++)
283  localOrderingVector[i] = i;
284  if (ordering == O_RANDOM)
285  MueLu::NotayUtils::RandomReorder(localOrderingVector);
286  else if (ordering == O_CUTHILL_MCKEE) {
288  auto localVector = rcmVector->getData(0);
289  for (LO i = 0; i < numRows; i++)
290  localOrderingVector[i] = localVector[i];
291  }
292 
293  // Compute new aggregates
294  numLocalAggregates = 0;
295  numNonAggregatedNodes = static_cast<LO>(coarseLocalA.numRows());
296  std::vector<LO> localAggStat(numNonAggregatedNodes, READY);
297  localVertex2AggId.resize(numNonAggregatedNodes, -1);
298  BuildFurtherAggregates(pL, A, localOrderingVector, coarseLocalA, kappa, rowSum,
299  localAggStat, localVertex2AggId,
300  numLocalAggregates, numNonAggregatedNodes);
301 
302  // After the first initial pairwise aggregation
303  // the Dirichlet nodes have been removed.
304  numLocalDirichletNodes = 0;
305 
306  // Update the aggregates
307  RCP<LOMultiVector> vertex2AggIdMV = aggregates->GetVertex2AggId();
308  ArrayRCP<LO> vertex2AggId = vertex2AggIdMV->getDataNonConst(0);
309  for (size_t vertexIdx = 0; vertexIdx < A->getLocalNumRows(); ++vertexIdx) {
310  LO oldAggIdx = vertex2AggId[vertexIdx];
311  if (oldAggIdx != Teuchos::OrdinalTraits<LO>::invalid()) {
312  vertex2AggId[vertexIdx] = localVertex2AggId[oldAggIdx];
313  }
314  }
315 
316  // We could probably print some better statistics at some point
317  GetOStream(Statistics0) << "Iter " << aggregationIter << ": " << numLocalAggregates << " - "
318  << A->getLocalNumRows() / numLocalAggregates << std::endl;
319  }
320  aggregates->SetNumAggregates(numLocalAggregates);
321  aggregates->AggregatesCrossProcessors(false);
322  aggregates->ComputeAggregateSizes(true /*forceRecompute*/);
323 
324  // DO stuff
325  Set(currentLevel, "Aggregates", aggregates);
326  GetOStream(Statistics0) << aggregates->description() << std::endl;
327 }
328 
329 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
332  const RCP<const Matrix>& A,
333  const Teuchos::ArrayView<const LO>& orderingVector,
335  Aggregates& aggregates,
336  std::vector<unsigned>& aggStat,
337  LO& numNonAggregatedNodes,
338  LO& numDirichletNodes) const {
339  Monitor m(*this, "BuildInitialAggregates");
340  using STS = Teuchos::ScalarTraits<Scalar>;
341  using MT = typename STS::magnitudeType;
343 
345  if (const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
346  out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
347  out->setShowAllFrontMatter(false).setShowProcRank(true);
348  } else {
349  out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
350  }
351 
352  const SC SC_ZERO = Teuchos::ScalarTraits<SC>::zero();
353  const MT MT_ZERO = Teuchos::ScalarTraits<MT>::zero();
354  const MT MT_ONE = Teuchos::ScalarTraits<MT>::one();
355  const MT MT_TWO = MT_ONE + MT_ONE;
356  const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
357  const LO LO_ZERO = Teuchos::OrdinalTraits<LO>::zero();
358 
359  const MT kappa_init = kappa / (kappa - MT_TWO);
360  const LO numRows = aggStat.size();
361  const int myRank = A->getMap()->getComm()->getRank();
362 
363  // For finding "ties" where we fall back to the ordering. Making this larger than
364  // hard zero substantially increases code robustness.
365  double tie_criterion = params.get<double>("aggregation: pairwise: tie threshold");
366  double tie_less = 1.0 - tie_criterion;
367  double tie_more = 1.0 + tie_criterion;
368 
369  // NOTE: Assumes 1 dof per node. This constraint is enforced in Build(),
370  // and so we're not doing again here.
371  // This should probably be fixed at some point.
372 
373  // Extract diagonal, rowsums, etc
374  // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S
378  const ArrayRCP<const SC> D = ghostedDiag->getData(0);
379  const ArrayRCP<const SC> S = ghostedRowSum->getData(0);
380  const ArrayRCP<const MT> AbsRs = ghostedAbsRowSum->getData(0);
381 
382  // Aggregates stuff
383  ArrayRCP<LO> vertex2AggId_rcp = aggregates.GetVertex2AggId()->getDataNonConst(0);
384  ArrayRCP<LO> procWinner_rcp = aggregates.GetProcWinner()->getDataNonConst(0);
385  ArrayView<LO> vertex2AggId = vertex2AggId_rcp();
386  ArrayView<LO> procWinner = procWinner_rcp();
387 
388  // Algorithm 4.2
389 
390  // 0,1 : Initialize: Flag boundary conditions
391  // Modification: We assume symmetry here aij = aji
392  for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
393  MT aii = STS::magnitude(D[row]);
394  MT rowsum = AbsRs[row];
395 
396  if (aii >= kappa_init * rowsum) {
397  *out << "Flagging index " << row << " as dirichlet "
398  "aii >= kappa*rowsum = "
399  << aii << " >= " << kappa_init << " " << rowsum << std::endl;
400  aggStat[row] = IGNORED;
401  --numNonAggregatedNodes;
402  ++numDirichletNodes;
403  }
404  }
405 
406  // 2 : Iteration
407  LO aggIndex = LO_ZERO;
408  for (LO i = 0; i < numRows; i++) {
409  LO current_idx = orderingVector[i];
410  // If we're aggregated already, skip this guy
411  if (aggStat[current_idx] != READY)
412  continue;
413 
414  MT best_mu = MT_ZERO;
415  LO best_idx = LO_INVALID;
416 
417  size_t nnz = A->getNumEntriesInLocalRow(current_idx);
418  ArrayView<const LO> indices;
419  ArrayView<const SC> vals;
420  A->getLocalRowView(current_idx, indices, vals);
421 
422  MT aii = STS::real(D[current_idx]);
423  MT si = STS::real(S[current_idx]);
424  for (LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
425  // Skip aggregated neighbors, off-rank neighbors, hard zeros and self
426  LO col = indices[colidx];
427  SC val = vals[colidx];
428  if (current_idx == col || col >= numRows || aggStat[col] != READY || val == SC_ZERO)
429  continue;
430 
431  MT aij = STS::real(val);
432  MT ajj = STS::real(D[col]);
433  MT sj = -STS::real(S[col]); // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S
434  if (aii - si + ajj - sj >= MT_ZERO) {
435  // Modification: We assume symmetry here aij = aji
436  MT mu_top = MT_TWO / (MT_ONE / aii + MT_ONE / ajj);
437  MT mu_bottom = -aij + MT_ONE / (MT_ONE / (aii - si) + MT_ONE / (ajj - sj));
438  MT mu = mu_top / mu_bottom;
439 
440  // Modification: Explicitly check the tie criterion here
441  if (mu > MT_ZERO && (best_idx == LO_INVALID || mu < best_mu * tie_less ||
442  (mu < best_mu * tie_more && orderingVector[col] < orderingVector[best_idx]))) {
443  best_mu = mu;
444  best_idx = col;
445  *out << "[" << current_idx << "] Column UPDATED " << col << ": "
446  << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
447  << " = " << aii - si + ajj - sj << ", aij = " << aij << ", mu = " << mu << std::endl;
448  } else {
449  *out << "[" << current_idx << "] Column NOT UPDATED " << col << ": "
450  << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
451  << " = " << aii - si + ajj - sj << ", aij = " << aij << ", mu = " << mu << std::endl;
452  }
453  } else {
454  *out << "[" << current_idx << "] Column FAILED " << col << ": "
455  << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
456  << " = " << aii - si + ajj - sj << std::endl;
457  }
458  } // end column for loop
459 
460  if (best_idx == LO_INVALID) {
461  *out << "No node buddy found for index " << current_idx
462  << " [agg " << aggIndex << "]\n"
463  << std::endl;
464  // We found no potential node-buddy, so let's just make this a singleton
465  // NOTE: The behavior of what to do if you have no un-aggregated neighbors is not specified in
466  // the paper
467 
468  aggStat[current_idx] = ONEPT;
469  vertex2AggId[current_idx] = aggIndex;
470  procWinner[current_idx] = myRank;
471  numNonAggregatedNodes--;
472  aggIndex++;
473 
474  } else {
475  // We have a buddy, so aggregate, either as a singleton or as a pair, depending on mu
476  if (best_mu <= kappa) {
477  *out << "Node buddies (" << current_idx << "," << best_idx << ") [agg " << aggIndex << "]" << std::endl;
478 
479  aggStat[current_idx] = AGGREGATED;
480  aggStat[best_idx] = AGGREGATED;
481  vertex2AggId[current_idx] = aggIndex;
482  vertex2AggId[best_idx] = aggIndex;
483  procWinner[current_idx] = myRank;
484  procWinner[best_idx] = myRank;
485  numNonAggregatedNodes -= 2;
486  aggIndex++;
487 
488  } else {
489  *out << "No buddy found for index " << current_idx << ","
490  " but aggregating as singleton [agg "
491  << aggIndex << "]" << std::endl;
492 
493  aggStat[current_idx] = ONEPT;
494  vertex2AggId[current_idx] = aggIndex;
495  procWinner[current_idx] = myRank;
496  numNonAggregatedNodes--;
497  aggIndex++;
498  } // best_mu
499  } // best_idx
500  } // end Algorithm 4.2
501 
502  *out << "vertex2aggid :";
503  for (int i = 0; i < static_cast<int>(vertex2AggId.size()); ++i) {
504  *out << i << "(" << vertex2AggId[i] << ")";
505  }
506  *out << std::endl;
507 
508  // update aggregate object
509  aggregates.SetNumAggregates(aggIndex);
510 } // BuildInitialAggregates
511 
512 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
515  const RCP<const Matrix>& A,
516  const Teuchos::ArrayView<const LO>& orderingVector,
517  const typename Matrix::local_matrix_type& coarseA,
519  const Kokkos::View<typename Kokkos::ArithTraits<Scalar>::val_type*,
520  Kokkos::LayoutLeft,
521  typename Matrix::local_matrix_type::device_type>& rowSum,
522  std::vector<LocalOrdinal>& localAggStat,
523  Teuchos::Array<LocalOrdinal>& localVertex2AggID,
524  LO& numLocalAggregates,
525  LO& numNonAggregatedNodes) const {
526  Monitor m(*this, "BuildFurtherAggregates");
527 
528  // Set debug outputs based on environment variable
530  if (const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
531  out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
532  out->setShowAllFrontMatter(false).setShowProcRank(true);
533  } else {
534  out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
535  }
536 
537  using value_type = typename local_matrix_type::value_type;
538  const value_type KAT_zero = Kokkos::ArithTraits<value_type>::zero();
541  const magnitude_type MT_two = MT_one + MT_one;
542  const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
543 
544  // For finding "ties" where we fall back to the ordering. Making this larger than
545  // hard zero substantially increases code robustness.
546  double tie_criterion = params.get<double>("aggregation: pairwise: tie threshold");
547  double tie_less = 1.0 - tie_criterion;
548  double tie_more = 1.0 + tie_criterion;
549 
550  typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum);
551  Kokkos::deep_copy(rowSum_h, rowSum);
552 
553  // Extracting the diagonal of a KokkosSparse::CrsMatrix
554  // is not currently provided in kokkos-kernels so here
555  // is an ugly way to get that done...
556  const LO numRows = static_cast<LO>(coarseA.numRows());
557  typename local_matrix_type::values_type::HostMirror diagA_h("diagA host", numRows);
558  typename local_matrix_type::row_map_type::HostMirror row_map_h = Kokkos::create_mirror_view(coarseA.graph.row_map);
559  Kokkos::deep_copy(row_map_h, coarseA.graph.row_map);
560  typename local_matrix_type::index_type::HostMirror entries_h = Kokkos::create_mirror_view(coarseA.graph.entries);
561  Kokkos::deep_copy(entries_h, coarseA.graph.entries);
562  typename local_matrix_type::values_type::HostMirror values_h = Kokkos::create_mirror_view(coarseA.values);
563  Kokkos::deep_copy(values_h, coarseA.values);
564  for (LO rowIdx = 0; rowIdx < numRows; ++rowIdx) {
565  for (LO entryIdx = static_cast<LO>(row_map_h(rowIdx));
566  entryIdx < static_cast<LO>(row_map_h(rowIdx + 1));
567  ++entryIdx) {
568  if (rowIdx == static_cast<LO>(entries_h(entryIdx))) {
569  diagA_h(rowIdx) = values_h(entryIdx);
570  }
571  }
572  }
573 
574  for (LO currentIdx = 0; currentIdx < numRows; ++currentIdx) {
575  if (localAggStat[currentIdx] != READY) {
576  continue;
577  }
578 
581  const magnitude_type aii = Teuchos::ScalarTraits<value_type>::real(diagA_h(currentIdx));
582  const magnitude_type si = Teuchos::ScalarTraits<value_type>::real(rowSum_h(currentIdx));
583  for (auto entryIdx = row_map_h(currentIdx); entryIdx < row_map_h(currentIdx + 1); ++entryIdx) {
584  const LO colIdx = static_cast<LO>(entries_h(entryIdx));
585  if (currentIdx == colIdx || colIdx >= numRows || localAggStat[colIdx] != READY || values_h(entryIdx) == KAT_zero) {
586  continue;
587  }
588 
589  const magnitude_type aij = Teuchos::ScalarTraits<value_type>::real(values_h(entryIdx));
590  const magnitude_type ajj = Teuchos::ScalarTraits<value_type>::real(diagA_h(colIdx));
591  const magnitude_type sj = -Teuchos::ScalarTraits<value_type>::real(rowSum_h(colIdx)); // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S
592  if (aii - si + ajj - sj >= MT_zero) {
593  const magnitude_type mu_top = MT_two / (MT_one / aii + MT_one / ajj);
594  const magnitude_type mu_bottom = -aij + MT_one / (MT_one / (aii - si) + MT_one / (ajj - sj));
595  const magnitude_type mu = mu_top / mu_bottom;
596 
597  // Modification: Explicitly check the tie criterion here
598  if (mu > MT_zero && (bestIdx == LO_INVALID || mu < best_mu * tie_less ||
599  (mu < best_mu * tie_more && orderingVector[colIdx] < orderingVector[bestIdx]))) {
600  best_mu = mu;
601  bestIdx = colIdx;
602  *out << "[" << currentIdx << "] Column UPDATED " << colIdx << ": "
603  << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
604  << " = " << aii - si + ajj - sj << ", aij = " << aij << " mu = " << mu << std::endl;
605  } else {
606  *out << "[" << currentIdx << "] Column NOT UPDATED " << colIdx << ": "
607  << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
608  << " = " << aii - si + ajj - sj << ", aij = " << aij << ", mu = " << mu << std::endl;
609  }
610  } else {
611  *out << "[" << currentIdx << "] Column FAILED " << colIdx << ": "
612  << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
613  << " = " << aii - si + ajj - sj << std::endl;
614  }
615  } // end loop over row entries
616 
617  if (bestIdx == Teuchos::OrdinalTraits<LO>::invalid()) {
618  localAggStat[currentIdx] = ONEPT;
619  localVertex2AggID[currentIdx] = numLocalAggregates;
620  --numNonAggregatedNodes;
621  ++numLocalAggregates;
622  } else {
623  if (best_mu <= kappa) {
624  *out << "Node buddies (" << currentIdx << "," << bestIdx << ") [agg " << numLocalAggregates << "]" << std::endl;
625 
626  localAggStat[currentIdx] = AGGREGATED;
627  localVertex2AggID[currentIdx] = numLocalAggregates;
628  --numNonAggregatedNodes;
629 
630  localAggStat[bestIdx] = AGGREGATED;
631  localVertex2AggID[bestIdx] = numLocalAggregates;
632  --numNonAggregatedNodes;
633 
634  ++numLocalAggregates;
635  } else {
636  *out << "No buddy found for index " << currentIdx << ","
637  " but aggregating as singleton [agg "
638  << numLocalAggregates << "]" << std::endl;
639 
640  localAggStat[currentIdx] = ONEPT;
641  localVertex2AggID[currentIdx] = numLocalAggregates;
642  --numNonAggregatedNodes;
643  ++numLocalAggregates;
644  }
645  }
646  } // end loop over matrix rows
647 
648 } // BuildFurtherAggregates
649 
650 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
652  BuildOnRankLocalMatrix(const typename Matrix::local_matrix_type& localA,
653  typename Matrix::local_matrix_type& onrankA) const {
654  Monitor m(*this, "BuildOnRankLocalMatrix");
655 
656  // Set debug outputs based on environment variable
658  if (const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
659  out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
660  out->setShowAllFrontMatter(false).setShowProcRank(true);
661  } else {
662  out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
663  }
664 
665  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
666  using values_type = typename local_matrix_type::values_type;
667  using size_type = typename local_graph_type::size_type;
668  using col_index_type = typename local_graph_type::data_type;
669  using array_layout = typename local_graph_type::array_layout;
670  using memory_traits = typename local_graph_type::memory_traits;
671  using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
672  using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
673  // Extract on rank part of A
674  // Simply check that the column index is less than the number of local rows
675  // otherwise remove it.
676 
677  const int numRows = static_cast<int>(localA.numRows());
678  row_pointer_type rowPtr("onrankA row pointer", numRows + 1);
679  typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr);
680  typename local_graph_type::row_map_type::HostMirror origRowPtr_h = Kokkos::create_mirror_view(localA.graph.row_map);
681  typename local_graph_type::entries_type::HostMirror origColind_h = Kokkos::create_mirror_view(localA.graph.entries);
682  typename values_type::HostMirror origValues_h = Kokkos::create_mirror_view(localA.values);
683  Kokkos::deep_copy(origRowPtr_h, localA.graph.row_map);
684  Kokkos::deep_copy(origColind_h, localA.graph.entries);
685  Kokkos::deep_copy(origValues_h, localA.values);
686 
687  // Compute the number of nnz entries per row
688  rowPtr_h(0) = 0;
689  for (int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
690  for (size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
691  if (origColind_h(entryIdx) < numRows) {
692  rowPtr_h(rowIdx + 1) += 1;
693  }
694  }
695  rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx + 1) + rowPtr_h(rowIdx);
696  }
697  Kokkos::deep_copy(rowPtr, rowPtr_h);
698 
699  const LO nnzOnrankA = rowPtr_h(numRows);
700 
701  // Now use nnz per row to allocate matrix views and store column indices and values
702  col_indices_type colInd("onrankA column indices", rowPtr_h(numRows));
703  values_type values("onrankA values", rowPtr_h(numRows));
704  typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd);
705  typename values_type::HostMirror values_h = Kokkos::create_mirror_view(values);
706  int entriesInRow;
707  for (int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
708  entriesInRow = 0;
709  for (size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
710  if (origColind_h(entryIdx) < numRows) {
711  colInd_h(rowPtr_h(rowIdx) + entriesInRow) = origColind_h(entryIdx);
712  values_h(rowPtr_h(rowIdx) + entriesInRow) = origValues_h(entryIdx);
713  ++entriesInRow;
714  }
715  }
716  }
717  Kokkos::deep_copy(colInd, colInd_h);
718  Kokkos::deep_copy(values, values_h);
719 
720  onrankA = local_matrix_type("onrankA", numRows, numRows,
721  nnzOnrankA, values, rowPtr, colInd);
722 }
723 
724 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
727  const LocalOrdinal numDirichletNodes,
728  const LocalOrdinal numLocalAggregates,
729  const Teuchos::ArrayView<const LocalOrdinal>& localVertex2AggID,
730  typename Matrix::local_matrix_type& intermediateP) const {
731  Monitor m(*this, "BuildIntermediateProlongator");
732 
733  // Set debug outputs based on environment variable
735  if (const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
736  out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
737  out->setShowAllFrontMatter(false).setShowProcRank(true);
738  } else {
739  out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
740  }
741 
742  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
743  using values_type = typename local_matrix_type::values_type;
744  using size_type = typename local_graph_type::size_type;
745  using col_index_type = typename local_graph_type::data_type;
746  using array_layout = typename local_graph_type::array_layout;
747  using memory_traits = typename local_graph_type::memory_traits;
748  using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
749  using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
750 
751  const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
752 
753  const int intermediatePnnz = numRows - numDirichletNodes;
754  row_pointer_type rowPtr("intermediateP row pointer", numRows + 1);
755  col_indices_type colInd("intermediateP column indices", intermediatePnnz);
756  values_type values("intermediateP values", intermediatePnnz);
757  typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr);
758  typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd);
759 
760  rowPtr_h(0) = 0;
761  for (int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
762  // Skip Dirichlet nodes
763  if (localVertex2AggID[rowIdx] == LO_INVALID) {
764  rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx);
765  } else {
766  rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx) + 1;
767  colInd_h(rowPtr_h(rowIdx)) = localVertex2AggID[rowIdx];
768  }
769  }
770 
771  Kokkos::deep_copy(rowPtr, rowPtr_h);
772  Kokkos::deep_copy(colInd, colInd_h);
773  Kokkos::deep_copy(values, Kokkos::ArithTraits<typename values_type::value_type>::one());
774 
775  intermediateP = local_matrix_type("intermediateP",
776  numRows, numLocalAggregates, intermediatePnnz,
777  values, rowPtr, colInd);
778 } // BuildIntermediateProlongator
779 
780 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
782  BuildCoarseLocalMatrix(const typename Matrix::local_matrix_type& intermediateP,
783  typename Matrix::local_matrix_type& coarseA) const {
784  Monitor m(*this, "BuildCoarseLocalMatrix");
785 
786  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
787  using values_type = typename local_matrix_type::values_type;
788  using size_type = typename local_graph_type::size_type;
789  using col_index_type = typename local_graph_type::data_type;
790  using array_layout = typename local_graph_type::array_layout;
791  using memory_traits = typename local_graph_type::memory_traits;
792  using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
793  using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
794 
796  localSpGEMM(coarseA, intermediateP, "AP", AP);
797 
798  // Note 03/11/20, lbv: does kh need to destroy and recreate the spgemm handle
799  // I am not sure but doing it for safety in case it stashes data from the previous
800  // spgemm computation...
801 
802  // Compute Ac = Pt * AP
803  // Two steps needed:
804  // 1. compute Pt
805  // 2. perform multiplication
806 
807  // Step 1 compute Pt
808  // Obviously this requires the same amount of storage as P except for the rowPtr
809  row_pointer_type rowPtrPt(Kokkos::ViewAllocateWithoutInitializing("Pt row pointer"),
810  intermediateP.numCols() + 1);
811  col_indices_type colIndPt(Kokkos::ViewAllocateWithoutInitializing("Pt column indices"),
812  intermediateP.nnz());
813  values_type valuesPt(Kokkos::ViewAllocateWithoutInitializing("Pt values"),
814  intermediateP.nnz());
815 
816  typename row_pointer_type::HostMirror rowPtrPt_h = Kokkos::create_mirror_view(rowPtrPt);
817  typename col_indices_type::HostMirror entries_h = Kokkos::create_mirror_view(intermediateP.graph.entries);
818  Kokkos::deep_copy(entries_h, intermediateP.graph.entries);
819  Kokkos::deep_copy(rowPtrPt_h, 0);
820  for (size_type entryIdx = 0; entryIdx < intermediateP.nnz(); ++entryIdx) {
821  rowPtrPt_h(entries_h(entryIdx) + 1) += 1;
822  }
823  for (LO rowIdx = 0; rowIdx < intermediateP.numCols(); ++rowIdx) {
824  rowPtrPt_h(rowIdx + 1) += rowPtrPt_h(rowIdx);
825  }
826  Kokkos::deep_copy(rowPtrPt, rowPtrPt_h);
827 
828  typename row_pointer_type::HostMirror rowPtrP_h = Kokkos::create_mirror_view(intermediateP.graph.row_map);
829  Kokkos::deep_copy(rowPtrP_h, intermediateP.graph.row_map);
830  typename col_indices_type::HostMirror colIndP_h = Kokkos::create_mirror_view(intermediateP.graph.entries);
831  Kokkos::deep_copy(colIndP_h, intermediateP.graph.entries);
832  typename values_type::HostMirror valuesP_h = Kokkos::create_mirror_view(intermediateP.values);
833  Kokkos::deep_copy(valuesP_h, intermediateP.values);
834  typename col_indices_type::HostMirror colIndPt_h = Kokkos::create_mirror_view(colIndPt);
835  typename values_type::HostMirror valuesPt_h = Kokkos::create_mirror_view(valuesPt);
836  const col_index_type invalidColumnIndex = KokkosSparse::OrdinalTraits<col_index_type>::invalid();
837  Kokkos::deep_copy(colIndPt_h, invalidColumnIndex);
838 
839  col_index_type colIdx = 0;
840  for (LO rowIdx = 0; rowIdx < intermediateP.numRows(); ++rowIdx) {
841  for (size_type entryIdxP = rowPtrP_h(rowIdx); entryIdxP < rowPtrP_h(rowIdx + 1); ++entryIdxP) {
842  colIdx = entries_h(entryIdxP);
843  for (size_type entryIdxPt = rowPtrPt_h(colIdx); entryIdxPt < rowPtrPt_h(colIdx + 1); ++entryIdxPt) {
844  if (colIndPt_h(entryIdxPt) == invalidColumnIndex) {
845  colIndPt_h(entryIdxPt) = rowIdx;
846  valuesPt_h(entryIdxPt) = valuesP_h(entryIdxP);
847  break;
848  }
849  } // Loop over entries in row of Pt
850  } // Loop over entries in row of P
851  } // Loop over rows of P
852 
853  Kokkos::deep_copy(colIndPt, colIndPt_h);
854  Kokkos::deep_copy(valuesPt, valuesPt_h);
855 
856  local_matrix_type intermediatePt("intermediatePt",
857  intermediateP.numCols(),
858  intermediateP.numRows(),
859  intermediateP.nnz(),
860  valuesPt, rowPtrPt, colIndPt);
861 
862  // Create views for coarseA matrix
863  localSpGEMM(intermediatePt, AP, "coarseA", coarseA);
864 } // BuildCoarseLocalMatrix
865 
866 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
868  localSpGEMM(const typename Matrix::local_matrix_type& A,
869  const typename Matrix::local_matrix_type& B,
870  const std::string matrixLabel,
871  typename Matrix::local_matrix_type& C) const {
872  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
873  using values_type = typename local_matrix_type::values_type;
874  using size_type = typename local_graph_type::size_type;
875  using col_index_type = typename local_graph_type::data_type;
876  using array_layout = typename local_graph_type::array_layout;
877  using memory_space = typename device_type::memory_space;
878  using memory_traits = typename local_graph_type::memory_traits;
879  using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
880  using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
881 
882  // Options
883  int team_work_size = 16;
884  std::string myalg("SPGEMM_KK_MEMORY");
885  KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
886  KokkosKernels::Experimental::KokkosKernelsHandle<typename row_pointer_type::const_value_type,
887  typename col_indices_type::const_value_type,
888  typename values_type::const_value_type,
890  memory_space,
891  memory_space>
892  kh;
893  kh.create_spgemm_handle(alg_enum);
894  kh.set_team_work_size(team_work_size);
895 
896  // Create views for AP matrix
897  row_pointer_type rowPtrC(Kokkos::ViewAllocateWithoutInitializing("C row pointer"),
898  A.numRows() + 1);
899  col_indices_type colIndC;
900  values_type valuesC;
901 
902  // Symbolic multiplication
903  KokkosSparse::Experimental::spgemm_symbolic(&kh, A.numRows(),
904  B.numRows(), B.numCols(),
905  A.graph.row_map, A.graph.entries, false,
906  B.graph.row_map, B.graph.entries, false,
907  rowPtrC);
908 
909  // allocate column indices and values of AP
910  size_t nnzC = kh.get_spgemm_handle()->get_c_nnz();
911  if (nnzC) {
912  colIndC = col_indices_type(Kokkos::ViewAllocateWithoutInitializing("C column inds"), nnzC);
913  valuesC = values_type(Kokkos::ViewAllocateWithoutInitializing("C values"), nnzC);
914  }
915 
916  // Numeric multiplication
917  KokkosSparse::Experimental::spgemm_numeric(&kh, A.numRows(),
918  B.numRows(), B.numCols(),
919  A.graph.row_map, A.graph.entries, A.values, false,
920  B.graph.row_map, B.graph.entries, B.values, false,
921  rowPtrC, colIndC, valuesC);
922  kh.destroy_spgemm_handle();
923 
924  C = local_matrix_type(matrixLabel, A.numRows(), B.numCols(), nnzC, valuesC, rowPtrC, colIndC);
925 
926 } // localSpGEMM
927 
928 } // namespace MueLu
929 
930 #endif /* MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_ */
LocalOrdinal RandomOrdinal(LocalOrdinal min, LocalOrdinal max)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
MueLu::DefaultLocalOrdinal LocalOrdinal
void Build(Level &currentLevel) const
Build aggregates.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
Container class for aggregation information.
typename Matrix::local_matrix_type local_matrix_type
typename Teuchos::ScalarTraits< Scalar >::magnitudeType magnitude_type
basic_FancyOStream & setShowProcRank(const bool showProcRank)
T & get(const std::string &name, T def_value)
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
static RCP< Vector > GetMatrixOverlappedDeletedRowsum(const Matrix &A)
Extract Overlapped Matrix Deleted Rowsum.
static magnitudeType real(T a)
void BuildInitialAggregates(const Teuchos::ParameterList &params, const RCP< const Matrix > &A, const ArrayView< const LO > &orderingVector, const magnitude_type kappa, Aggregates &aggregates, std::vector< unsigned > &aggStat, LO &numNonAggregatedNodes, LO &numDirichletNodes) const
Initial aggregation phase.
size_type size() const
LocalOrdinal LO
void DeclareInput(Level &currentLevel) const
Input.
void BuildIntermediateProlongator(const LO numRows, const LO numDirichletNodes, const LO numLocalAggregates, const ArrayView< const LO > &localVertex2AggID, local_matrix_type &intermediateP) const
Construction of a local prolongator with values equal to 1.0.
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
static RCP< Xpetra::Vector< LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node > > CuthillMcKee(const Matrix &Op)
void BuildFurtherAggregates(const Teuchos::ParameterList &params, const RCP< const Matrix > &A, const Teuchos::ArrayView< const LO > &orderingVector, const local_matrix_type &coarseA, const magnitude_type kappa, const row_sum_type &rowSum, std::vector< LO > &localAggStat, Array< LO > &localVertex2AggID, LO &numLocalAggregates, LO &numNonAggregatedNodes) const
Further aggregation phase increases coarsening rate by a factor of ~2 per iteration.
bool isParameter(const std::string &name) const
Print statistics that do not involve significant additional computation.
void BuildCoarseLocalMatrix(const local_matrix_type &intermediateP, local_matrix_type &coarseA) const
Implementation of a local Galerkin projection called inside BuildFurtherAggregates.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
typename device_type::execution_space execution_space
static RCP< Xpetra::Vector< Magnitude, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixOverlappedAbsDeletedRowsum(const Matrix &A)
void BuildOnRankLocalMatrix(const local_matrix_type &localA, local_matrix_type &onRankA) const
basic_FancyOStream & setShowAllFrontMatter(const bool showAllFrontMatter)
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
static RCP< Vector > GetMatrixOverlappedDiagonal(const Matrix &A)
Extract Overlapped Matrix Diagonal.
void localSpGEMM(const local_matrix_type &A, const local_matrix_type &B, const std::string matrixLabel, local_matrix_type &C) const
Wrapper for kokkos-kernels&#39; spgemm that takes in CrsMatrix.
Timer to be used in non-factories.
void RandomReorder(Teuchos::Array< LocalOrdinal > &list)
size_type size() const
#define SET_VALID_ENTRY(name)
Scalar SC
Exception throws to report errors in the internal logical of the program.
typename Kokkos::View< impl_scalar_type *, Kokkos::LayoutLeft, device_type > row_sum_type
void SetNumAggregates(LO nAggregates)
Set number of local aggregates on current processor.