MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_NotayAggregationFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_
47 #define MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_
48 
49 #include <Xpetra_Map.hpp>
50 #include <Xpetra_Vector.hpp>
51 #include <Xpetra_MultiVectorFactory.hpp>
52 #include <Xpetra_MapFactory.hpp>
53 #include <Xpetra_VectorFactory.hpp>
54 
55 #include "KokkosKernels_Handle.hpp"
56 #include "KokkosSparse_spgemm.hpp"
57 #include "KokkosSparse_spmv.hpp"
58 
60 
61 #include "MueLu_Aggregates.hpp"
62 #include "MueLu_GraphBase.hpp"
63 #include "MueLu_Level.hpp"
64 #include "MueLu_MasterList.hpp"
65 #include "MueLu_Monitor.hpp"
66 #include "MueLu_Types.hpp"
67 #include "MueLu_Utilities.hpp"
68 
69 #if defined(HAVE_MUELU_KOKKOS_REFACTOR)
70 #include "MueLu_Utilities_kokkos.hpp"
71 #endif
72 
73 namespace MueLu {
74 
75  namespace NotayUtils {
76  template <class LocalOrdinal>
78  return min + as<LocalOrdinal>((max-min+1) * (static_cast<double>(std::rand()) / (RAND_MAX + 1.0)));
79  }
80 
81  template <class LocalOrdinal>
83  typedef LocalOrdinal LO;
84  LO n = Teuchos::as<LO>(list.size());
85  for(LO i = 0; i < n-1; i++)
86  std::swap(list[i], list[RandomOrdinal(i,n-1)]);
87  }
88  }
89 
90  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
92  RCP<ParameterList> validParamList = rcp(new ParameterList());
93 
95 
96 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
97  SET_VALID_ENTRY("aggregation: pairwise: size");
98  SET_VALID_ENTRY("aggregation: pairwise: tie threshold");
99  SET_VALID_ENTRY("aggregation: compute aggregate qualities");
100  SET_VALID_ENTRY("aggregation: Dirichlet threshold");
101  SET_VALID_ENTRY("aggregation: ordering");
102 #undef SET_VALID_ENTRY
103 
104  // general variables needed in AggregationFactory
105  validParamList->set< RCP<const FactoryBase> >("A", null, "Generating factory of the matrix");
106  validParamList->set< RCP<const FactoryBase> >("Graph", null, "Generating factory of the graph");
107  validParamList->set< RCP<const FactoryBase> >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
108  validParamList->set< RCP<const FactoryBase> >("AggregateQualities", null, "Generating factory for variable \'AggregateQualities\'");
109 
110 
111  return validParamList;
112  }
113 
114  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
116  const ParameterList& pL = GetParameterList();
117 
118  Input(currentLevel, "A");
119  Input(currentLevel, "Graph");
120  Input(currentLevel, "DofsPerNode");
121  if (pL.get<bool>("aggregation: compute aggregate qualities")) {
122  Input(currentLevel, "AggregateQualities");
123  }
124 
125 
126  }
127 
128 
129  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
131  FactoryMonitor m(*this, "Build", currentLevel);
132  using STS = Teuchos::ScalarTraits<Scalar>;
133  using MT = typename STS::magnitudeType;
134 
136 
138  if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
139  out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
140  out->setShowAllFrontMatter(false).setShowProcRank(true);
141  } else {
142  out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
143  }
144 
145  const ParameterList& pL = GetParameterList();
146 
147  const MT kappa = static_cast<MT>(pL.get<double>("aggregation: Dirichlet threshold"));
148  TEUCHOS_TEST_FOR_EXCEPTION(kappa <= MT_TWO,
150  "Pairwise requires kappa > 2"
151  " otherwise all rows are considered as Dirichlet rows.");
152 
153  // Parameters
154  int maxNumIter = 3;
155  if (pL.isParameter("aggregation: pairwise: size"))
156  maxNumIter = pL.get<int>("aggregation: pairwise: size");
157  TEUCHOS_TEST_FOR_EXCEPTION(maxNumIter < 1,
159  "NotayAggregationFactory::Build(): \"aggregation: pairwise: size\""
160  " must be a strictly positive integer");
161 
162 
163  RCP<const GraphBase> graph = Get< RCP<GraphBase> >(currentLevel, "Graph");
164  RCP<const Matrix> A = Get< RCP<Matrix> >(currentLevel, "A");
165  local_matrix_type localA = A->getLocalMatrix();
166 
167  // Setup aggregates & aggStat objects
168  RCP<Aggregates> aggregates = rcp(new Aggregates(*graph));
169  aggregates->setObjectLabel("PW");
170 
171  const LO numRows = graph->GetNodeNumVertices();
172 
173  // construct aggStat information
174  std::vector<unsigned> aggStat(numRows, READY);
175 
176 
177  const int DofsPerNode = Get<int>(currentLevel,"DofsPerNode");
179  "Pairwise only supports one dof per node");
180 
181  // This follows the paper:
182  // Notay, "Aggregation-based algebraic multigrid for convection-diffusion equations",
183  // SISC 34(3), pp. A2288-2316.
184 
185  // Handle Ordering
186  std::string orderingStr = pL.get<std::string>("aggregation: ordering");
187  enum {
188  O_NATURAL,
189  O_RANDOM,
190  O_CUTHILL_MCKEE,
191  } ordering;
192 
193  ordering = O_NATURAL;
194  if (orderingStr == "random" ) ordering = O_RANDOM;
195  else if(orderingStr == "natural") {}
196 #if defined(HAVE_MUELU_KOKKOS_REFACTOR)
197  else if(orderingStr == "cuthill-mckee" || orderingStr == "cm") ordering = O_CUTHILL_MCKEE;
198 #endif
199  else {
200  TEUCHOS_TEST_FOR_EXCEPTION(1,Exceptions::RuntimeError,"Invalid ordering type");
201  }
202 
203  // Get an ordering vector
204  // NOTE: The orderingVector only orders *rows* of the matrix. Off-proc columns
205  // will get ignored in the aggregation phases, so we don't need to worry about
206  // running off the end.
207  Array<LO> orderingVector(numRows);
208  for (LO i = 0; i < numRows; i++)
209  orderingVector[i] = i;
210  if (ordering == O_RANDOM)
211  MueLu::NotayUtils::RandomReorder(orderingVector);
212 #if defined(HAVE_MUELU_KOKKOS_REFACTOR)
213  else if (ordering == O_CUTHILL_MCKEE) {
215  auto localVector = rcmVector->getData(0);
216  for (LO i = 0; i < numRows; i++)
217  orderingVector[i] = localVector[i];
218  }
219 #endif
220 
221  // Get the party stated
222  LO numNonAggregatedNodes = numRows, numDirichletNodes = 0;
223  BuildInitialAggregates(pL, A, orderingVector(), kappa,
224  *aggregates, aggStat, numNonAggregatedNodes, numDirichletNodes);
225  TEUCHOS_TEST_FOR_EXCEPTION(0 < numNonAggregatedNodes, Exceptions::RuntimeError,
226  "Initial pairwise aggregation failed to aggregate all nodes");
227  LO numLocalAggregates = aggregates->GetNumAggregates();
228  GetOStream(Statistics0) << "Init : " << numLocalAggregates << " - "
229  << A->getNodeNumRows() / numLocalAggregates << std::endl;
230 
231  // Temporary data storage for further aggregation steps
232  local_matrix_type intermediateP;
233  local_matrix_type coarseLocalA;
234 
235  // Compute the on rank part of the local matrix
236  // that the square submatrix that only contains
237  // columns corresponding to local rows.
238  LO numLocalDirichletNodes = numDirichletNodes;
239  Array<LO> localVertex2AggId(aggregates->GetVertex2AggId()->getData(0).view(0, numRows));
240  BuildOnRankLocalMatrix(A->getLocalMatrix(), coarseLocalA);
241  for(LO aggregationIter = 1; aggregationIter < maxNumIter; ++aggregationIter) {
242  // Compute the intermediate prolongator
243  BuildIntermediateProlongator(coarseLocalA.numRows(), numLocalDirichletNodes, numLocalAggregates,
244  localVertex2AggId(), intermediateP);
245 
246  // Compute the coarse local matrix and coarse row sum
247  BuildCoarseLocalMatrix(intermediateP, coarseLocalA);
248 
249  // Directly compute rowsum from A, rather than coarseA
250  row_sum_type rowSum("rowSum", numLocalAggregates);
251  {
252  std::vector<std::vector<LO> > agg2vertex(numLocalAggregates);
253  auto vertex2AggId = aggregates->GetVertex2AggId()->getData(0);
254  for(LO i=0; i<(LO)numRows; i++) {
255  if(aggStat[i] != AGGREGATED)
256  continue;
257  LO agg=vertex2AggId[i];
258  agg2vertex[agg].push_back(i);
259  }
260 
261  typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum);
262  for(LO i = 0; i < numRows; i++) {
263  // If not aggregated already, skip this guy
264  if(aggStat[i] != AGGREGATED)
265  continue;
266  int agg = vertex2AggId[i];
267  std::vector<LO> & myagg = agg2vertex[agg];
268 
269  size_t nnz = A->getNumEntriesInLocalRow(i);
270  ArrayView<const LO> indices;
271  ArrayView<const SC> vals;
272  A->getLocalRowView(i, indices, vals);
273 
275  for (LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
276  bool found = false;
277  if(indices[colidx] < numRows) {
278  for(LO j=0; j<(LO)myagg.size(); j++)
279  if (vertex2AggId[indices[colidx]] == agg)
280  found=true;
281  }
282  if(!found) {
283  *out << "- ADDING col "<<indices[colidx]<<" = "<<vals[colidx] << std::endl;
284  mysum += vals[colidx];
285  }
286  else {
287  *out << "- NOT ADDING col "<<indices[colidx]<<" = "<<vals[colidx] << std::endl;
288  }
289  }
290 
291  rowSum_h[agg] = mysum;
292  }
293  Kokkos::deep_copy(rowSum, rowSum_h);
294  }
295 
296  // Get local orderingVector
297  Array<LO> localOrderingVector(numRows);
298  for (LO i = 0; i < numRows; i++)
299  localOrderingVector[i] = i;
300  if (ordering == O_RANDOM)
301  MueLu::NotayUtils::RandomReorder(localOrderingVector);
302 #if defined(HAVE_MUELU_KOKKOS_REFACTOR)
303  else if (ordering == O_CUTHILL_MCKEE) {
305  auto localVector = rcmVector->getData(0);
306  for (LO i = 0; i < numRows; i++)
307  localOrderingVector[i] = localVector[i];
308  }
309 #endif
310 
311  // Compute new aggregates
312  numLocalAggregates = 0;
313  numNonAggregatedNodes = static_cast<LO>(coarseLocalA.numRows());
314  std::vector<LO> localAggStat(numNonAggregatedNodes, READY);
315  localVertex2AggId.resize(numNonAggregatedNodes, -1);
316  BuildFurtherAggregates(pL, A, localOrderingVector, coarseLocalA, kappa, rowSum,
317  localAggStat, localVertex2AggId,
318  numLocalAggregates, numNonAggregatedNodes);
319 
320  // After the first initial pairwise aggregation
321  // the Dirichlet nodes have been removed.
322  numLocalDirichletNodes = 0;
323 
324  // Update the aggregates
325  RCP<LOMultiVector> vertex2AggIdMV = aggregates->GetVertex2AggId();
326  ArrayRCP<LO> vertex2AggId = vertex2AggIdMV->getDataNonConst(0);
327  for(size_t vertexIdx = 0; vertexIdx < A->getNodeNumRows(); ++vertexIdx) {
328  LO oldAggIdx = vertex2AggId[vertexIdx];
329  if(oldAggIdx != Teuchos::OrdinalTraits<LO>::invalid()) {
330  vertex2AggId[vertexIdx] = localVertex2AggId[oldAggIdx];
331  }
332  }
333 
334  // We could probably print some better statistics at some point
335  GetOStream(Statistics0) << "Iter " << aggregationIter << ": " << numLocalAggregates << " - "
336  << A->getNodeNumRows() / numLocalAggregates << std::endl;
337  }
338  aggregates->SetNumAggregates(numLocalAggregates);
339  aggregates->AggregatesCrossProcessors(false);
340  aggregates->ComputeAggregateSizes(true/*forceRecompute*/);
341 
342  // DO stuff
343  Set(currentLevel, "Aggregates", aggregates);
344  GetOStream(Statistics0) << aggregates->description() << std::endl;
345  }
346 
347 
348  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
351  const RCP<const Matrix>& A,
352  const Teuchos::ArrayView<const LO> & orderingVector,
354  Aggregates& aggregates,
355  std::vector<unsigned>& aggStat,
356  LO& numNonAggregatedNodes,
357  LO& numDirichletNodes) const {
358 
359  Monitor m(*this, "BuildInitialAggregates");
360  using STS = Teuchos::ScalarTraits<Scalar>;
361  using MT = typename STS::magnitudeType;
362  using RealValuedVector = Xpetra::Vector<MT,LocalOrdinal,GlobalOrdinal,Node>;
363 
365  if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
366  out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
367  out->setShowAllFrontMatter(false).setShowProcRank(true);
368  } else {
369  out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
370  }
371 
372 
373  const SC SC_ZERO = Teuchos::ScalarTraits<SC>::zero();
374  const MT MT_ZERO = Teuchos::ScalarTraits<MT>::zero();
375  const MT MT_ONE = Teuchos::ScalarTraits<MT>::one();
376  const MT MT_TWO = MT_ONE + MT_ONE;
377  const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
378  const LO LO_ZERO = Teuchos::OrdinalTraits<LO>::zero();
379 
380  const MT kappa_init = kappa / (kappa - MT_TWO);
381  const LO numRows = aggStat.size();
382  const int myRank = A->getMap()->getComm()->getRank();
383 
384  // For finding "ties" where we fall back to the ordering. Making this larger than
385  // hard zero substantially increases code robustness.
386  double tie_criterion = params.get<double>("aggregation: pairwise: tie threshold");
387  double tie_less = 1.0 - tie_criterion;
388  double tie_more = 1.0 + tie_criterion;
389 
390  // NOTE: Assumes 1 dof per node. This constraint is enforced in Build(),
391  // and so we're not doing again here.
392  // This should probably be fixed at some point.
393 
394  // Extract diagonal, rowsums, etc
395  // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S
399  const ArrayRCP<const SC> D = ghostedDiag->getData(0);
400  const ArrayRCP<const SC> S = ghostedRowSum->getData(0);
401  const ArrayRCP<const MT> AbsRs = ghostedAbsRowSum->getData(0);
402 
403  // Aggregates stuff
404  ArrayRCP<LO> vertex2AggId_rcp = aggregates.GetVertex2AggId()->getDataNonConst(0);
405  ArrayRCP<LO> procWinner_rcp = aggregates.GetProcWinner() ->getDataNonConst(0);
406  ArrayView<LO> vertex2AggId = vertex2AggId_rcp();
407  ArrayView<LO> procWinner = procWinner_rcp();
408 
409  // Algorithm 4.2
410 
411  // 0,1 : Initialize: Flag boundary conditions
412  // Modification: We assume symmetry here aij = aji
413  for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getNodeNumElements()); ++row) {
414  MT aii = STS::magnitude(D[row]);
415  MT rowsum = AbsRs[row];
416 
417  if(aii >= kappa_init * rowsum) {
418  *out << "Flagging index " << row << " as dirichlet "
419  "aii >= kappa*rowsum = " << aii << " >= " << kappa_init << " " << rowsum << std::endl;
420  aggStat[row] = IGNORED;
421  --numNonAggregatedNodes;
422  ++numDirichletNodes;
423  }
424  }
425 
426 
427  // 2 : Iteration
428  LO aggIndex = LO_ZERO;
429  for(LO i = 0; i < numRows; i++) {
430  LO current_idx = orderingVector[i];
431  // If we're aggregated already, skip this guy
432  if(aggStat[current_idx] != READY)
433  continue;
434 
435  MT best_mu = MT_ZERO;
436  LO best_idx = LO_INVALID;
437 
438  size_t nnz = A->getNumEntriesInLocalRow(current_idx);
439  ArrayView<const LO> indices;
440  ArrayView<const SC> vals;
441  A->getLocalRowView(current_idx, indices, vals);
442 
443  MT aii = STS::real(D[current_idx]);
444  MT si = STS::real(S[current_idx]);
445  for (LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
446  // Skip aggregated neighbors, off-rank neighbors, hard zeros and self
447  LO col = indices[colidx];
448  SC val = vals[colidx];
449  if(current_idx == col || aggStat[col] != READY || col > numRows || val == SC_ZERO)
450  continue;
451 
452  MT aij = STS::real(val);
453  MT ajj = STS::real(D[col]);
454  MT sj = - STS::real(S[col]); // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S
455  if(aii - si + ajj - sj >= MT_ZERO) {
456  // Modification: We assume symmetry here aij = aji
457  MT mu_top = MT_TWO / ( MT_ONE / aii + MT_ONE / ajj);
458  MT mu_bottom = - aij + MT_ONE / ( MT_ONE / (aii - si) + MT_ONE / (ajj - sj) );
459  MT mu = mu_top / mu_bottom;
460 
461  // Modification: Explicitly check the tie criterion here
462  if (mu > MT_ZERO && (best_idx == LO_INVALID || mu < best_mu * tie_less ||
463  (mu < best_mu*tie_more && orderingVector[col] < orderingVector[best_idx]))) {
464  best_mu = mu;
465  best_idx = col;
466  *out << "[" << current_idx << "] Column UPDATED " << col << ": "
467  << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
468  << " = " << aii - si + ajj - sj<< ", aij = "<<aij << ", mu = " << mu << std::endl;
469  }
470  else {
471  *out << "[" << current_idx << "] Column NOT UPDATED " << col << ": "
472  << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
473  << " = " << aii - si + ajj - sj << ", aij = "<<aij<< ", mu = " << mu << std::endl;
474  }
475  }
476  else {
477  *out << "[" << current_idx << "] Column FAILED " << col << ": "
478  << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
479  << " = " << aii - si + ajj - sj << std::endl;
480  }
481  }// end column for loop
482 
483  if(best_idx == LO_INVALID) {
484  *out << "No node buddy found for index " << current_idx
485  << " [agg " << aggIndex << "]\n" << std::endl;
486  // We found no potential node-buddy, so let's just make this a singleton
487  // NOTE: The behavior of what to do if you have no un-aggregated neighbors is not specified in
488  // the paper
489 
490  aggStat[current_idx] = ONEPT;
491  vertex2AggId[current_idx] = aggIndex;
492  procWinner[current_idx] = myRank;
493  numNonAggregatedNodes--;
494  aggIndex++;
495 
496  } else {
497  // We have a buddy, so aggregate, either as a singleton or as a pair, depending on mu
498  if(best_mu <= kappa) {
499  *out << "Node buddies (" << current_idx << "," << best_idx << ") [agg " << aggIndex << "]" << std::endl;
500 
501  aggStat[current_idx] = AGGREGATED;
502  aggStat[best_idx] = AGGREGATED;
503  vertex2AggId[current_idx] = aggIndex;
504  vertex2AggId[best_idx] = aggIndex;
505  procWinner[current_idx] = myRank;
506  procWinner[best_idx] = myRank;
507  numNonAggregatedNodes-=2;
508  aggIndex++;
509 
510  } else {
511  *out << "No buddy found for index " << current_idx << ","
512  " but aggregating as singleton [agg " << aggIndex << "]" << std::endl;
513 
514  aggStat[current_idx] = ONEPT;
515  vertex2AggId[current_idx] = aggIndex;
516  procWinner[current_idx] = myRank;
517  numNonAggregatedNodes--;
518  aggIndex++;
519  } // best_mu
520  } // best_idx
521  }// end Algorithm 4.2
522 
523  *out << "vertex2aggid :";
524  for(int i = 0; i < static_cast<int>(vertex2AggId.size()); ++i) {
525  *out << i << "(" << vertex2AggId[i] << ")";
526  }
527  *out << std::endl;
528 
529  // update aggregate object
530  aggregates.SetNumAggregates(aggIndex);
531  } // BuildInitialAggregates
532 
533  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
536  const RCP<const Matrix>& A,
537  const Teuchos::ArrayView<const LO> & orderingVector,
538  const typename Matrix::local_matrix_type& coarseA,
540  const Kokkos::View<typename Kokkos::ArithTraits<Scalar>::val_type*,
542  typename Matrix::local_matrix_type::device_type>& rowSum,
543  std::vector<LocalOrdinal>& localAggStat,
544  Teuchos::Array<LocalOrdinal>& localVertex2AggID,
545  LO& numLocalAggregates,
546  LO& numNonAggregatedNodes) const {
547  Monitor m(*this, "BuildFurtherAggregates");
548 
549  // Set debug outputs based on environment variable
551  if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
552  out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
553  out->setShowAllFrontMatter(false).setShowProcRank(true);
554  } else {
555  out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
556  }
557 
558  using value_type = typename local_matrix_type::value_type;
559  const value_type KAT_zero = Kokkos::ArithTraits<value_type>::zero();
562  const magnitude_type MT_two = MT_one + MT_one;
563  const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid() ;
564 
565  // For finding "ties" where we fall back to the ordering. Making this larger than
566  // hard zero substantially increases code robustness.
567  double tie_criterion = params.get<double>("aggregation: pairwise: tie threshold");
568  double tie_less = 1.0 - tie_criterion;
569  double tie_more = 1.0 + tie_criterion;
570 
571  typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum);
572  Kokkos::deep_copy(rowSum_h, rowSum);
573 
574  // Extracting the diagonal of a KokkosSparse::CrsMatrix
575  // is not currently provided in kokkos-kernels so here
576  // is an ugly way to get that done...
577  const LO numRows = static_cast<LO>(coarseA.numRows());
578  typename local_matrix_type::values_type::HostMirror diagA_h("diagA host", numRows);
579  typename local_matrix_type::row_map_type::HostMirror row_map_h
580  = Kokkos::create_mirror_view(coarseA.graph.row_map);
581  Kokkos::deep_copy(row_map_h, coarseA.graph.row_map);
582  typename local_matrix_type::index_type::HostMirror entries_h
583  = Kokkos::create_mirror_view(coarseA.graph.entries);
584  Kokkos::deep_copy(entries_h, coarseA.graph.entries);
585  typename local_matrix_type::values_type::HostMirror values_h
586  = Kokkos::create_mirror_view(coarseA.values);
587  Kokkos::deep_copy(values_h, coarseA.values);
588  for(LO rowIdx = 0; rowIdx < numRows; ++rowIdx) {
589  for(LO entryIdx = static_cast<LO>(row_map_h(rowIdx));
590  entryIdx < static_cast<LO>(row_map_h(rowIdx + 1));
591  ++entryIdx) {
592  if(rowIdx == static_cast<LO>(entries_h(entryIdx))) {
593  diagA_h(rowIdx) = values_h(entryIdx);
594  }
595  }
596  }
597 
598  for(LO currentIdx = 0; currentIdx < numRows; ++currentIdx) {
599  if(localAggStat[currentIdx] != READY) {
600  continue;
601  }
602 
605  const magnitude_type aii = Teuchos::ScalarTraits<value_type>::real(diagA_h(currentIdx));
606  const magnitude_type si = Teuchos::ScalarTraits<value_type>::real(rowSum_h(currentIdx));
607  for(auto entryIdx = row_map_h(currentIdx); entryIdx < row_map_h(currentIdx + 1); ++entryIdx) {
608  const LO colIdx = static_cast<LO>(entries_h(entryIdx));
609  if(currentIdx == colIdx || localAggStat[colIdx] != READY || values_h(entryIdx) == KAT_zero || colIdx > numRows) {
610  continue;
611  }
612 
613  const magnitude_type aij = Teuchos::ScalarTraits<value_type>::real(values_h(entryIdx));
614  const magnitude_type ajj = Teuchos::ScalarTraits<value_type>::real(diagA_h(colIdx));
615  const magnitude_type sj = - Teuchos::ScalarTraits<value_type>::real(rowSum_h(colIdx)); // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S
616  if(aii - si + ajj - sj >= MT_zero) {
617  const magnitude_type mu_top = MT_two / ( MT_one/aii + MT_one/ajj );
618  const magnitude_type mu_bottom = -aij + MT_one / (MT_one / (aii - si) + MT_one / (ajj - sj));
619  const magnitude_type mu = mu_top / mu_bottom;
620 
621  // Modification: Explicitly check the tie criterion here
622  if (mu > MT_zero && (bestIdx == LO_INVALID || mu < best_mu * tie_less ||
623  (mu < best_mu*tie_more && orderingVector[colIdx] < orderingVector[bestIdx]))) {
624  best_mu = mu;
625  bestIdx = colIdx;
626  *out << "[" << currentIdx << "] Column UPDATED " << colIdx << ": "
627  << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
628  << " = " << aii - si + ajj - sj << ", aij = "<<aij<<" mu = " << mu << std::endl;
629  }
630  else {
631  *out << "[" << currentIdx << "] Column NOT UPDATED " << colIdx << ": "
632  << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
633  << " = " << aii - si + ajj - sj << ", aij = "<<aij<<", mu = " << mu << std::endl;
634  }
635  } else {
636  *out << "[" << currentIdx << "] Column FAILED " << colIdx << ": "
637  << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
638  << " = " << aii - si + ajj - sj << std::endl;
639  }
640  } // end loop over row entries
641 
642  if(bestIdx == Teuchos::OrdinalTraits<LO>::invalid()) {
643  localAggStat[currentIdx] = ONEPT;
644  localVertex2AggID[currentIdx] = numLocalAggregates;
645  --numNonAggregatedNodes;
646  ++numLocalAggregates;
647  } else {
648  if(best_mu <= kappa) {
649  *out << "Node buddies (" << currentIdx << "," << bestIdx << ") [agg " << numLocalAggregates << "]" << std::endl;
650 
651  localAggStat[currentIdx] = AGGREGATED;
652  localVertex2AggID[currentIdx] = numLocalAggregates;
653  --numNonAggregatedNodes;
654 
655  localAggStat[bestIdx] = AGGREGATED;
656  localVertex2AggID[bestIdx] = numLocalAggregates;
657  --numNonAggregatedNodes;
658 
659  ++numLocalAggregates;
660  } else {
661  *out << "No buddy found for index " << currentIdx << ","
662  " but aggregating as singleton [agg " << numLocalAggregates << "]" << std::endl;
663 
664  localAggStat[currentIdx] = ONEPT;
665  localVertex2AggID[currentIdx] = numLocalAggregates;
666  --numNonAggregatedNodes;
667  ++numLocalAggregates;
668  }
669  }
670  } // end loop over matrix rows
671 
672  } // BuildFurtherAggregates
673 
674  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
676  BuildOnRankLocalMatrix(const typename Matrix::local_matrix_type& localA,
677  typename Matrix::local_matrix_type& onrankA) const {
678  Monitor m(*this, "BuildOnRankLocalMatrix");
679 
680  // Set debug outputs based on environment variable
682  if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
683  out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
684  out->setShowAllFrontMatter(false).setShowProcRank(true);
685  } else {
686  out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
687  }
688 
689  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
690  using values_type = typename local_matrix_type::values_type;
691  using size_type = typename local_graph_type::size_type;
692  using col_index_type = typename local_graph_type::data_type;
693  using array_layout = typename local_graph_type::array_layout;
694  using memory_traits = typename local_graph_type::memory_traits;
697  // Extract on rank part of A
698  // Simply check that the column index is less than the number of local rows
699  // otherwise remove it.
700 
701  const int numRows = static_cast<int>(localA.numRows());
702  row_pointer_type rowPtr("onrankA row pointer", numRows + 1);
703  typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr);
704  typename local_graph_type::row_map_type::HostMirror origRowPtr_h
705  = Kokkos::create_mirror_view(localA.graph.row_map);
706  typename local_graph_type::entries_type::HostMirror origColind_h
707  = Kokkos::create_mirror_view(localA.graph.entries);
708  typename values_type::HostMirror origValues_h
709  = Kokkos::create_mirror_view(localA.values);
710  Kokkos::deep_copy(origRowPtr_h, localA.graph.row_map);
711  Kokkos::deep_copy(origColind_h, localA.graph.entries);
712  Kokkos::deep_copy(origValues_h, localA.values);
713 
714  // Compute the number of nnz entries per row
715  rowPtr_h(0) = 0;
716  for(int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
717  for(size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
718  if(origColind_h(entryIdx) < numRows) {rowPtr_h(rowIdx + 1) += 1;}
719  }
720  rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx + 1) + rowPtr_h(rowIdx);
721  }
722  Kokkos::deep_copy(rowPtr, rowPtr_h);
723 
724  const LO nnzOnrankA = rowPtr_h(numRows);
725 
726  // Now use nnz per row to allocate matrix views and store column indices and values
727  col_indices_type colInd("onrankA column indices", rowPtr_h(numRows));
728  values_type values("onrankA values", rowPtr_h(numRows));
729  typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd);
730  typename values_type::HostMirror values_h = Kokkos::create_mirror_view(values);
731  int entriesInRow;
732  for(int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
733  entriesInRow = 0;
734  for(size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
735  if(origColind_h(entryIdx) < numRows) {
736  colInd_h(rowPtr_h(rowIdx) + entriesInRow) = origColind_h(entryIdx);
737  values_h(rowPtr_h(rowIdx) + entriesInRow) = origValues_h(entryIdx);
738  ++entriesInRow;
739  }
740  }
741  }
742  Kokkos::deep_copy(colInd, colInd_h);
743  Kokkos::deep_copy(values, values_h);
744 
745  onrankA = local_matrix_type("onrankA", numRows, numRows,
746  nnzOnrankA, values, rowPtr, colInd);
747  }
748 
749  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
752  const LocalOrdinal numDirichletNodes,
753  const LocalOrdinal numLocalAggregates,
754  const Teuchos::ArrayView<const LocalOrdinal>& localVertex2AggID,
755  typename Matrix::local_matrix_type& intermediateP) const {
756  Monitor m(*this, "BuildIntermediateProlongator");
757 
758  // Set debug outputs based on environment variable
760  if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
761  out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
762  out->setShowAllFrontMatter(false).setShowProcRank(true);
763  } else {
764  out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
765  }
766 
767  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
768  using values_type = typename local_matrix_type::values_type;
769  using size_type = typename local_graph_type::size_type;
770  using col_index_type = typename local_graph_type::data_type;
771  using array_layout = typename local_graph_type::array_layout;
772  using memory_traits = typename local_graph_type::memory_traits;
775 
776  const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
777 
778  const int intermediatePnnz = numRows - numDirichletNodes;
779  row_pointer_type rowPtr("intermediateP row pointer", numRows + 1);
780  col_indices_type colInd("intermediateP column indices", intermediatePnnz);
781  values_type values("intermediateP values", intermediatePnnz);
782  typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr);
783  typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd);
784 
785  rowPtr_h(0) = 0;
786  for(int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
787  // Skip Dirichlet nodes
788  if(localVertex2AggID[rowIdx] == LO_INVALID) {
789  rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx);
790  } else {
791  rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx) + 1;
792  colInd_h(rowPtr_h(rowIdx)) = localVertex2AggID[rowIdx];
793  }
794  }
795 
796  Kokkos::deep_copy(rowPtr, rowPtr_h);
797  Kokkos::deep_copy(colInd, colInd_h);
798  Kokkos::deep_copy(values, Kokkos::ArithTraits<typename values_type::value_type>::one());
799 
800  intermediateP = local_matrix_type("intermediateP",
801  numRows, numLocalAggregates, intermediatePnnz,
802  values, rowPtr, colInd);
803  } // BuildIntermediateProlongator
804 
805  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
807  BuildCoarseLocalMatrix(const typename Matrix::local_matrix_type& intermediateP,
808  typename Matrix::local_matrix_type& coarseA) const {
809  Monitor m(*this, "BuildCoarseLocalMatrix");
810 
811  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
812  using values_type = typename local_matrix_type::values_type;
813  using size_type = typename local_graph_type::size_type;
814  using col_index_type = typename local_graph_type::data_type;
815  using array_layout = typename local_graph_type::array_layout;
816  using memory_traits = typename local_graph_type::memory_traits;
819 
821  localSpGEMM(coarseA, intermediateP, "AP", AP);
822 
823  // Note 03/11/20, lbv: does kh need to destroy and recreate the spgemm handle
824  // I am not sure but doing it for safety in case it stashes data from the previous
825  // spgemm computation...
826 
827  // Compute Ac = Pt * AP
828  // Two steps needed:
829  // 1. compute Pt
830  // 2. perform multiplication
831 
832  // Step 1 compute Pt
833  // Obviously this requires the same amount of storage as P except for the rowPtr
834  row_pointer_type rowPtrPt(Kokkos::ViewAllocateWithoutInitializing("Pt row pointer"),
835  intermediateP.numCols() + 1);
836  col_indices_type colIndPt(Kokkos::ViewAllocateWithoutInitializing("Pt column indices"),
837  intermediateP.nnz());
838  values_type valuesPt(Kokkos::ViewAllocateWithoutInitializing("Pt values"),
839  intermediateP.nnz());
840 
841  typename row_pointer_type::HostMirror rowPtrPt_h = Kokkos::create_mirror_view(rowPtrPt);
842  Kokkos::deep_copy(rowPtrPt_h, 0);
843  for(size_type entryIdx = 0; entryIdx < intermediateP.nnz(); ++entryIdx) {
844  rowPtrPt_h(intermediateP.graph.entries(entryIdx) + 1) += 1;
845  }
846  for(LO rowIdx = 0; rowIdx < intermediateP.numCols(); ++rowIdx) {
847  rowPtrPt_h(rowIdx + 1) += rowPtrPt_h(rowIdx);
848  }
849  Kokkos::deep_copy(rowPtrPt, rowPtrPt_h);
850 
851  typename row_pointer_type::HostMirror rowPtrP_h = Kokkos::create_mirror_view(intermediateP.graph.row_map);
852  Kokkos::deep_copy(rowPtrP_h, intermediateP.graph.row_map);
853  typename col_indices_type::HostMirror colIndP_h = Kokkos::create_mirror_view(intermediateP.graph.entries);
854  Kokkos::deep_copy(colIndP_h, intermediateP.graph.entries);
855  typename values_type::HostMirror valuesP_h = Kokkos::create_mirror_view(intermediateP.values);
856  Kokkos::deep_copy(valuesP_h, intermediateP.values);
857  typename col_indices_type::HostMirror colIndPt_h = Kokkos::create_mirror_view(colIndPt);
858  typename values_type::HostMirror valuesPt_h = Kokkos::create_mirror_view(valuesPt);
859  const col_index_type invalidColumnIndex = KokkosSparse::OrdinalTraits<col_index_type>::invalid();
860  Kokkos::deep_copy(colIndPt_h, invalidColumnIndex);
861 
862  col_index_type colIdx = 0;
863  for(LO rowIdx = 0; rowIdx < intermediateP.numRows(); ++rowIdx) {
864  for(size_type entryIdxP = rowPtrP_h(rowIdx); entryIdxP < rowPtrP_h(rowIdx + 1); ++entryIdxP) {
865  colIdx = intermediateP.graph.entries(entryIdxP);
866  for(size_type entryIdxPt = rowPtrPt_h(colIdx); entryIdxPt < rowPtrPt_h(colIdx + 1); ++entryIdxPt) {
867  if(colIndPt_h(entryIdxPt) == invalidColumnIndex) {
868  colIndPt_h(entryIdxPt) = rowIdx;
869  valuesPt_h(entryIdxPt) = valuesP_h(entryIdxP);
870  break;
871  }
872  } // Loop over entries in row of Pt
873  } // Loop over entries in row of P
874  } // Loop over rows of P
875 
876  Kokkos::deep_copy(colIndPt, colIndPt_h);
877  Kokkos::deep_copy(valuesPt, valuesPt_h);
878 
879  local_matrix_type intermediatePt("intermediatePt",
880  intermediateP.numCols(),
881  intermediateP.numRows(),
882  intermediateP.nnz(),
883  valuesPt, rowPtrPt, colIndPt);
884 
885  // Create views for coarseA matrix
886  localSpGEMM(intermediatePt, AP, "coarseA", coarseA);
887  } // BuildCoarseLocalMatrix
888 
889  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
891  localSpGEMM(const typename Matrix::local_matrix_type& A,
892  const typename Matrix::local_matrix_type& B,
893  const std::string matrixLabel,
894  typename Matrix::local_matrix_type& C) const {
895 
896  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
897  using values_type = typename local_matrix_type::values_type;
898  using size_type = typename local_graph_type::size_type;
899  using col_index_type = typename local_graph_type::data_type;
900  using array_layout = typename local_graph_type::array_layout;
901  using memory_space = typename device_type::memory_space;
902  using memory_traits = typename local_graph_type::memory_traits;
905 
906  // Options
907  int team_work_size = 16;
908  std::string myalg("SPGEMM_KK_MEMORY");
909  KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
910  KokkosKernels::Experimental::KokkosKernelsHandle<typename row_pointer_type::const_value_type,
911  typename col_indices_type::const_value_type,
912  typename values_type::const_value_type,
914  memory_space,
915  memory_space> kh;
916  kh.create_spgemm_handle(alg_enum);
917  kh.set_team_work_size(team_work_size);
918 
919  // Create views for AP matrix
920  row_pointer_type rowPtrC(Kokkos::ViewAllocateWithoutInitializing("C row pointer"),
921  A.numRows() + 1);
922  col_indices_type colIndC;
923  values_type valuesC;
924 
925  // Symbolic multiplication
926  KokkosSparse::Experimental::spgemm_symbolic(&kh, A.numRows(),
927  B.numRows(), B.numCols(),
928  A.graph.row_map, A.graph.entries, false,
929  B.graph.row_map, B.graph.entries, false,
930  rowPtrC);
931 
932  // allocate column indices and values of AP
933  size_t nnzC = kh.get_spgemm_handle()->get_c_nnz();
934  if (nnzC) {
935  colIndC = col_indices_type(Kokkos::ViewAllocateWithoutInitializing("C column inds"), nnzC);
936  valuesC = values_type(Kokkos::ViewAllocateWithoutInitializing("C values"), nnzC);
937  }
938 
939  // Numeric multiplication
940  KokkosSparse::Experimental::spgemm_numeric(&kh, A.numRows(),
941  B.numRows(), B.numCols(),
942  A.graph.row_map, A.graph.entries, A.values, false,
943  B.graph.row_map, B.graph.entries, B.values, false,
944  rowPtrC, colIndC, valuesC);
945  kh.destroy_spgemm_handle();
946 
947  C = local_matrix_type(matrixLabel, A.numRows(), B.numCols(), nnzC, valuesC, rowPtrC, colIndC);
948 
949  } // localSpGEMM
950 
951 } //namespace MueLu
952 
953 #endif /* MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_ */
LocalOrdinal RandomOrdinal(LocalOrdinal min, LocalOrdinal max)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
MueLu::DefaultLocalOrdinal LocalOrdinal
void deep_copy(const View< DT, DP...> &dst, typename ViewTraits< DT, DP...>::const_value_type &value, typename std::enable_if< std::is_same< typename ViewTraits< DT, DP...>::specialize, void >::value >::type *=nullptr)
void Build(Level &currentLevel) const
Build aggregates.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
Container class for aggregation information.
typename Matrix::local_matrix_type local_matrix_type
typename Teuchos::ScalarTraits< Scalar >::magnitudeType magnitude_type
basic_FancyOStream & setShowProcRank(const bool showProcRank)
T & get(const std::string &name, T def_value)
virtual size_t GetNodeNumVertices() const =0
Return number of vertices owned by the calling node.
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
static magnitudeType real(T a)
void BuildInitialAggregates(const Teuchos::ParameterList &params, const RCP< const Matrix > &A, const ArrayView< const LO > &orderingVector, const magnitude_type kappa, Aggregates &aggregates, std::vector< unsigned > &aggStat, LO &numNonAggregatedNodes, LO &numDirichletNodes) const
Initial aggregation phase.
size_type size() const
static RCP< Vector > GetMatrixOverlappedDeletedRowsum(const Matrix &A)
Extract Overlapped Matrix Deleted Rowsum.
Teuchos::RCP< Xpetra::Vector< LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node > > CuthillMcKee(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op)
void DeclareInput(Level &currentLevel) const
Input.
void BuildIntermediateProlongator(const LO numRows, const LO numDirichletNodes, const LO numLocalAggregates, const ArrayView< const LO > &localVertex2AggID, local_matrix_type &intermediateP) const
Construction of a local prolongator with values equal to 1.0.
Teuchos::ArrayRCP< LO > ComputeAggregateSizes(bool forceRecompute=false) const
Compute sizes of aggregates.
void BuildFurtherAggregates(const Teuchos::ParameterList &params, const RCP< const Matrix > &A, const Teuchos::ArrayView< const LO > &orderingVector, const local_matrix_type &coarseA, const magnitude_type kappa, const row_sum_type &rowSum, std::vector< LO > &localAggStat, Array< LO > &localVertex2AggID, LO &numLocalAggregates, LO &numNonAggregatedNodes) const
Further aggregation phase increases coarsening rate by a factor of ~2 per iteration.
bool isParameter(const std::string &name) const
Print statistics that do not involve significant additional computation.
void BuildCoarseLocalMatrix(const local_matrix_type &intermediateP, local_matrix_type &coarseA) const
Implementation of a local Galerkin projection called inside BuildFurtherAggregates.
static RCP< Xpetra::Vector< Magnitude, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixOverlappedAbsDeletedRowsum(const Matrix &A)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
typename device_type::execution_space execution_space
LO GetNumAggregates() const
returns the number of aggregates of the current processor. Note: could/should be renamed to GetNumLoc...
void BuildOnRankLocalMatrix(const local_matrix_type &localA, local_matrix_type &onRankA) const
virtual void setObjectLabel(const std::string &objectLabel)
basic_FancyOStream & setShowAllFrontMatter(const bool showAllFrontMatter)
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
void localSpGEMM(const local_matrix_type &A, const local_matrix_type &B, const std::string matrixLabel, local_matrix_type &C) const
Wrapper for kokkos-kernels&#39; spgemm that takes in CrsMatrix.
static RCP< Xpetra::Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixOverlappedDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Timer to be used in non-factories.
void RandomReorder(Teuchos::Array< LocalOrdinal > &list)
size_type size() const
#define SET_VALID_ENTRY(name)
Exception throws to report errors in the internal logical of the program.
std::string description() const
Return a simple one-line description of this object.
void AggregatesCrossProcessors(const bool &flag)
Record whether aggregates include DOFs from other processes.
typename Kokkos::View< impl_scalar_type *, Kokkos::LayoutLeft, device_type > row_sum_type
void SetNumAggregates(LO nAggregates)
Set number of local aggregates on current processor.