MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_ClassicalPFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_CLASSICALPFACTORY_DEF_HPP
11 #define MUELU_CLASSICALPFACTORY_DEF_HPP
12 
13 #include <Xpetra_MultiVectorFactory.hpp>
14 #include <Xpetra_VectorFactory.hpp>
16 #include <Xpetra_Matrix.hpp>
17 #include <Xpetra_Map.hpp>
18 #include <Xpetra_MapFactory.hpp>
19 #include <Xpetra_Vector.hpp>
20 #include <Xpetra_Import.hpp>
21 #include <Xpetra_ImportUtils.hpp>
22 #include <Xpetra_IO.hpp>
23 #include <Xpetra_StridedMapFactory.hpp>
24 
26 
27 #include "MueLu_MasterList.hpp"
28 #include "MueLu_Monitor.hpp"
29 #include "MueLu_PerfUtils.hpp"
31 #include "MueLu_ClassicalMapFactory.hpp"
32 #include "MueLu_AmalgamationInfo.hpp"
33 #include "MueLu_LWGraph.hpp"
34 
35 //#define CMS_DEBUG
36 //#define CMS_DUMP
37 
38 namespace {
39 
40 template <class SC>
41 int Sign(SC val) {
42  using STS = typename Teuchos::ScalarTraits<SC>;
43  typename STS::magnitudeType MT_ZERO = Teuchos::ScalarTraits<typename STS::magnitudeType>::zero();
44  if (STS::real(val) > MT_ZERO)
45  return 1;
46  else if (STS::real(val) < MT_ZERO)
47  return -1;
48  else
49  return 0;
50 }
51 
52 } // namespace
53 
54 namespace MueLu {
55 
56 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
58  RCP<ParameterList> validParamList = rcp(new ParameterList());
59 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
60  SET_VALID_ENTRY("aggregation: deterministic");
61  SET_VALID_ENTRY("aggregation: coloring algorithm");
62  SET_VALID_ENTRY("aggregation: classical scheme");
63 
64  // To know if we need BlockNumber
65  SET_VALID_ENTRY("aggregation: drop scheme");
66  {
67  validParamList->getEntry("aggregation: classical scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("direct", "ext+i", "classical modified"))));
68  }
69 
70 #undef SET_VALID_ENTRY
71  validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
72  // validParamList->set< RCP<const FactoryBase> >("UnAmalgamationInfo", Teuchos::null, "Generating factory of UnAmalgamationInfo");
73  validParamList->set<RCP<const FactoryBase> >("Graph", null, "Generating factory of the graph");
74  validParamList->set<RCP<const FactoryBase> >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
75  validParamList->set<RCP<const FactoryBase> >("CoarseMap", Teuchos::null, "Generating factory of the CoarseMap");
76  validParamList->set<RCP<const FactoryBase> >("FC Splitting", Teuchos::null, "Generating factory of the FC Splitting");
77  validParamList->set<RCP<const FactoryBase> >("BlockNumber", Teuchos::null, "Generating factory for Block Number");
78  // validParamList->set< RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
79 
80  return validParamList;
81 }
82 
83 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
85  Input(fineLevel, "A");
86  Input(fineLevel, "Graph");
87  Input(fineLevel, "DofsPerNode");
88  // Input(fineLevel, "UnAmalgamationInfo");
89  Input(fineLevel, "CoarseMap");
90  Input(fineLevel, "FC Splitting");
91 
92  const ParameterList& pL = GetParameterList();
93  std::string drop_algo = pL.get<std::string>("aggregation: drop scheme");
94  if (drop_algo.find("block diagonal") != std::string::npos || drop_algo == "signed classical") {
95  Input(fineLevel, "BlockNumber");
96  }
97 }
98 
99 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
101  return BuildP(fineLevel, coarseLevel);
102 }
103 
104 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
106  FactoryMonitor m(*this, "Build", coarseLevel);
107  // using STS = Teuchos::ScalarTraits<SC>;
108 
109  // We start by assuming that someone did a reasonable strength of connection
110  // algorithm before we start to get our Graph, DofsPerNode and UnAmalgamationInfo
111 
112  // We begin by getting a MIS (from a graph coloring) and then at that point we need
113  // to start generating entries for the prolongator.
114  RCP<const Matrix> A = Get<RCP<Matrix> >(fineLevel, "A");
115  RCP<const Map> ownedCoarseMap = Get<RCP<const Map> >(fineLevel, "CoarseMap");
116  RCP<const LocalOrdinalVector> owned_fc_splitting = Get<RCP<LocalOrdinalVector> >(fineLevel, "FC Splitting");
117  RCP<const LWGraph> graph = Get<RCP<LWGraph> >(fineLevel, "Graph");
118  // LO nDofsPerNode = Get<LO>(fineLevel, "DofsPerNode");
119  // RCP<AmalgamationInfo> amalgInfo = Get< RCP<AmalgamationInfo> > (fineLevel, "UnAmalgamationInfo");
120  RCP<const Import> Importer = A->getCrsGraph()->getImporter();
121  Xpetra::UnderlyingLib lib = ownedCoarseMap->lib();
122 
123  // RCP<MultiVector> fineNullspace = Get< RCP<MultiVector> > (fineLevel, "Nullspace");
124  RCP<Matrix> P;
125  // SC SC_ZERO = STS::zero();
129  const ParameterList& pL = GetParameterList();
130 
131  // FIXME: This guy doesn't work right now for NumPDEs != 1
132  TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() != 1, Exceptions::RuntimeError, "ClassicalPFactory: Multiple PDEs per node not supported yet");
133 
134  // NOTE: Let's hope we never need to deal with this case
135  TEUCHOS_TEST_FOR_EXCEPTION(!A->getRowMap()->isSameAs(*A->getDomainMap()), Exceptions::RuntimeError, "ClassicalPFactory: MPI Ranks > 1 not supported yet");
136 
137  // Do we need ghosts rows of A and myPointType?
138  std::string scheme = pL.get<std::string>("aggregation: classical scheme");
139  bool need_ghost_rows = false;
140  if (scheme == "ext+i")
141  need_ghost_rows = true;
142  else if (scheme == "direct")
143  need_ghost_rows = false;
144  else if (scheme == "classical modified")
145  need_ghost_rows = true;
146  // NOTE: ParameterList validator will check this guy so we don't really need an "else" here
147 
148  if (GetVerbLevel() & Statistics1) {
149  GetOStream(Statistics1) << "ClassicalPFactory: scheme = " << scheme << std::endl;
150  }
151 
152  // Ghost the FC splitting and grab the data (if needed)
153  RCP<const LocalOrdinalVector> fc_splitting;
154  ArrayRCP<const LO> myPointType;
155  if (Importer.is_null()) {
156  fc_splitting = owned_fc_splitting;
157  } else {
158  RCP<LocalOrdinalVector> fc_splitting_nonconst = LocalOrdinalVectorFactory::Build(A->getCrsGraph()->getColMap());
159  fc_splitting_nonconst->doImport(*owned_fc_splitting, *Importer, Xpetra::INSERT);
160  fc_splitting = fc_splitting_nonconst;
161  }
162  myPointType = fc_splitting->getData(0);
163 
164  /* Ghost A (if needed) */
165  RCP<const Matrix> Aghost;
166  RCP<const LocalOrdinalVector> fc_splitting_ghost;
167  ArrayRCP<const LO> myPointType_ghost;
168  RCP<const Import> remoteOnlyImporter;
169  if (need_ghost_rows && !Importer.is_null()) {
170  ArrayView<const LO> remoteLIDs = Importer->getRemoteLIDs();
171  size_t numRemote = Importer->getNumRemoteIDs();
172  Array<GO> remoteRows(numRemote);
173  for (size_t i = 0; i < numRemote; i++)
174  remoteRows[i] = Importer->getTargetMap()->getGlobalElement(remoteLIDs[i]);
175 
176  RCP<const Map> remoteRowMap = MapFactory::Build(lib, Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(), remoteRows(),
177  A->getDomainMap()->getIndexBase(), A->getDomainMap()->getComm());
178 
179  remoteOnlyImporter = Importer->createRemoteOnlyImport(remoteRowMap);
180  RCP<const CrsMatrix> Acrs = rcp_dynamic_cast<const CrsMatrixWrap>(A)->getCrsMatrix();
181  RCP<CrsMatrix> Aghost_crs = CrsMatrixFactory::Build(Acrs, *remoteOnlyImporter, A->getDomainMap(), remoteOnlyImporter->getTargetMap());
182  Aghost = rcp(new CrsMatrixWrap(Aghost_crs));
183  // CAG: It seems that this isn't actually needed?
184  // We also may need need to ghost myPointType for Aghost
185  // RCP<const Import> Importer2 = Aghost->getCrsGraph()->getImporter();
186  // if(Importer2.is_null()) {
187  // RCP<LocalOrdinalVector> fc_splitting_ghost_nonconst = LocalOrdinalVectorFactory::Build(Aghost->getColMap());
188  // fc_splitting_ghost_nonconst->doImport(*owned_fc_splitting,*Importer,Xpetra::INSERT);
189  // fc_splitting_ghost = fc_splitting_ghost_nonconst;
190  // myPointType_ghost = fc_splitting_ghost->getData(0);
191  // }
192  }
193 
194  /* Generate the ghosted Coarse map using the "Tuminaro maneuver" (if needed)*/
195  RCP<const Map> coarseMap;
196  if (Importer.is_null())
197  coarseMap = ownedCoarseMap;
198  else {
199  // Generate a domain vector with the coarse ID's as entries for C points
200  GhostCoarseMap(*A, *Importer, myPointType, ownedCoarseMap, coarseMap);
201  }
202 
203  /* Get the block number, if we need it (and ghost it) */
204  RCP<LocalOrdinalVector> BlockNumber;
205  std::string drop_algo = pL.get<std::string>("aggregation: drop scheme");
206  if (drop_algo.find("block diagonal") != std::string::npos || drop_algo == "signed classical") {
207  RCP<LocalOrdinalVector> OwnedBlockNumber;
208  OwnedBlockNumber = Get<RCP<LocalOrdinalVector> >(fineLevel, "BlockNumber");
209  if (Importer.is_null())
210  BlockNumber = OwnedBlockNumber;
211  else {
212  BlockNumber = LocalOrdinalVectorFactory::Build(A->getColMap());
213  BlockNumber->doImport(*OwnedBlockNumber, *Importer, Xpetra::INSERT);
214  }
215  }
216 
217 #if defined(CMS_DEBUG) || defined(CMS_DUMP)
218  {
219  RCP<const CrsMatrix> Acrs = rcp_dynamic_cast<const CrsMatrixWrap>(A)->getCrsMatrix();
220  int rank = A->getRowMap()->getComm()->getRank();
221  printf("[%d] A local size = %dx%d\n", rank, (int)Acrs->getRowMap()->getLocalNumElements(), (int)Acrs->getColMap()->getLocalNumElements());
222 
223  printf("[%d] graph local size = %dx%d\n", rank, (int)graph->GetDomainMap()->getLocalNumElements(), (int)graph->GetImportMap()->getLocalNumElements());
224 
225  std::ofstream ofs(std::string("dropped_graph_") + std::to_string(fineLevel.GetLevelID()) + std::string("_") + std::to_string(rank) + std::string(".dat"), std::ofstream::out);
226  RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs));
227  graph->print(*fancy, Debug);
228  std::string out_fc = std::string("fc_splitting_") + std::to_string(fineLevel.GetLevelID()) + std::string("_") + std::to_string(rank) + std::string(".dat");
229  std::string out_block = std::string("block_number_") + std::to_string(fineLevel.GetLevelID()) + std::string("_") + std::to_string(rank) + std::string(".dat");
230 
231  // We don't support writing LO vectors in Xpetra (boo!) so....
232  using real_type = typename Teuchos::ScalarTraits<SC>::magnitudeType;
233  using RealValuedMultiVector = typename Xpetra::MultiVector<real_type, LO, GO, NO>;
234  typedef Xpetra::MultiVectorFactory<real_type, LO, GO, NO> RealValuedMultiVectorFactory;
235 
236  RCP<RealValuedMultiVector> mv = RealValuedMultiVectorFactory::Build(fc_splitting->getMap(), 1);
237  ArrayRCP<real_type> mv_data = mv->getDataNonConst(0);
238 
239  // FC Splitting
240  ArrayRCP<const LO> fc_data = fc_splitting->getData(0);
241  for (LO i = 0; i < (LO)fc_data.size(); i++)
242  mv_data[i] = Teuchos::as<real_type>(fc_data[i]);
244 
245  // Block Number
246  if (!BlockNumber.is_null()) {
247  RCP<RealValuedMultiVector> mv2 = RealValuedMultiVectorFactory::Build(BlockNumber->getMap(), 1);
248  ArrayRCP<real_type> mv_data2 = mv2->getDataNonConst(0);
249  ArrayRCP<const LO> b_data = BlockNumber->getData(0);
250  for (LO i = 0; i < (LO)b_data.size(); i++) {
251  mv_data2[i] = Teuchos::as<real_type>(b_data[i]);
252  }
254  }
255  }
256 
257 #endif
258 
259  /* Generate reindexing arrays */
260  // Note: cpoint2pcol is ghosted if myPointType is
261  // NOTE: Since the ghosted coarse column map follows the ordering of
262  // the fine column map, this *should* work, because it is in local indices.
263  // pcol2cpoint - Takes a LCID for P and turns in into an LCID for A.
264  // cpoint2pcol - Takes a LCID for A --- if it is a C Point --- and turns in into an LCID for P.
265  Array<LO> cpoint2pcol(myPointType.size(), LO_INVALID);
266  Array<LO> pcol2cpoint(coarseMap->getLocalNumElements(), LO_INVALID);
267  LO num_c_points = 0;
268  LO num_f_points = 0;
269  for (LO i = 0; i < (LO)myPointType.size(); i++) {
270  if (myPointType[i] == C_PT) {
271  cpoint2pcol[i] = num_c_points;
272  num_c_points++;
273  } else if (myPointType[i] == F_PT)
274  num_f_points++;
275  }
276  for (LO i = 0; i < (LO)cpoint2pcol.size(); i++) {
277  if (cpoint2pcol[i] != LO_INVALID)
278  pcol2cpoint[cpoint2pcol[i]] = i;
279  }
280 
281  // Generate edge strength flags (this will make everything easier later)
282  // These do *not* need to be ghosted (unlike A)
283  Teuchos::Array<size_t> eis_rowptr;
284  Teuchos::Array<bool> edgeIsStrong;
285  {
286  SubFactoryMonitor sfm(*this, "Strength Flags", coarseLevel);
287  GenerateStrengthFlags(*A, *graph, eis_rowptr, edgeIsStrong);
288  }
289 
290  // Phase 3: Generate the P matrix
291  RCP<const Map> coarseColMap = coarseMap;
292  RCP<const Map> coarseDomainMap = ownedCoarseMap;
293  if (scheme == "ext+i") {
294  SubFactoryMonitor sfm(*this, "Ext+i Interpolation", coarseLevel);
295  Coarsen_Ext_Plus_I(*A, Aghost, *graph, coarseColMap, coarseDomainMap, num_c_points, num_f_points, myPointType(), myPointType_ghost(), cpoint2pcol, pcol2cpoint, eis_rowptr, edgeIsStrong, BlockNumber, P);
296  } else if (scheme == "direct") {
297  SubFactoryMonitor sfm(*this, "Direct Interpolation", coarseLevel);
298  Coarsen_Direct(*A, Aghost, *graph, coarseColMap, coarseDomainMap, num_c_points, num_f_points, myPointType(), myPointType_ghost(), cpoint2pcol, pcol2cpoint, eis_rowptr, edgeIsStrong, BlockNumber, P);
299  } else if (scheme == "classical modified") {
300  SubFactoryMonitor sfm(*this, "Classical Modified Interpolation", coarseLevel);
301  Coarsen_ClassicalModified(*A, Aghost, *graph, coarseColMap, coarseDomainMap, num_c_points, num_f_points, myPointType(), myPointType_ghost(), cpoint2pcol, pcol2cpoint, eis_rowptr, edgeIsStrong, BlockNumber, remoteOnlyImporter, P);
302  }
303  // NOTE: ParameterList validator will check this guy so we don't really need an "else" here
304 
305 #ifdef CMS_DEBUG
306  Xpetra::IO<SC, LO, GO, NO>::Write("classical_p.mat." + std::to_string(coarseLevel.GetLevelID()), *P);
307  // Xpetra::IO<SC,LO,GO,NO>::WriteLocal("classical_p.mat."+std::to_string(coarseLevel.GetLevelID()), *P);
308 #endif
309 
310  // Save output
311  Set(coarseLevel, "P", P);
312  RCP<const CrsGraph> pg = P->getCrsGraph();
313  Set(coarseLevel, "P Graph", pg);
314 
315  // RCP<MultiVector> coarseNullspace = MultiVectorFactory::Build(coarseMap, fineNullspace->getNumVectors());
316  // P->apply(*fineNullspace, *coarseNullspace, Teuchos::TRANS, Teuchos::ScalarTraits<SC>::one(), Teuchos::ScalarTraits<SC>::zero());
317  // Set(coarseLevel, "Nullspace", coarseNullspace);
318 
319  if (IsPrint(Statistics1)) {
320  RCP<ParameterList> params = rcp(new ParameterList());
321  params->set("printLoadBalancingInfo", true);
322  GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*P, "P", params);
323  }
324 }
325 /* ************************************************************************* */
326 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
328  Coarsen_ClassicalModified(const Matrix& A, const RCP<const Matrix>& Aghost, const LWGraph& graph, RCP<const Map>& coarseColMap, RCP<const Map>& coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView<const LO>& myPointType, const Teuchos::ArrayView<const LO>& myPointType_ghost, const Teuchos::Array<LO>& cpoint2pcol, const Teuchos::Array<LO>& pcol2cpoint, Teuchos::Array<size_t>& eis_rowptr, Teuchos::Array<bool>& edgeIsStrong, RCP<LocalOrdinalVector>& BlockNumber, RCP<const Import> remoteOnlyImporter, RCP<Matrix>& P) const {
329  /* ============================================================= */
330  /* Phase 3 : Classical Modified Interpolation */
331  /* De Sterck, Falgout, Nolting and Yang. "Distance-two */
332  /* interpolation for parallel algebraic multigrid", NLAA 2008 */
333  /* 15:115-139 */
334  /* ============================================================= */
335  /* Definitions: */
336  /* F = F-points */
337  /* C = C-points */
338  /* N_i = non-zero neighbors of node i */
339  /* S_i = {j\in N_i | j strongly influences i } [strong neighbors of i] */
340  /* F_i^s = F \cap S_i [strong F-neighbors of i] */
341  /* C_i^s = C \cap S_i [strong C-neighbors of i] */
342 
343  /* N_i^w = N_i\ (F_i^s \cup C_i^s) [weak neighbors of i] */
344  /* This guy has a typo. The paper had a \cap instead of \cup */
345  /* I would note that this set can contain both F-points and */
346  /* C-points. They're just weak neighbors of this guy. */
347  /* Note that N_i^w \cup F_i^s \cup C_i^s = N_i by construction */
348 
349  /* \bar{a}_ij = { 0, if sign(a_ij) == sign(a_ii) */
350  /* { a_ij, otherwise */
351 
352  /* F_i^s\star = {k\in N_i | C_i^s \cap C_k^s = \emptyset} */
353  /* [set of F-neighbors of i that do not share a strong */
354  /* C-neighbor with i] */
355 
356  /* Rewritten Equation (9) on p. 120 */
357  /* \tilde{a}_ii = (a_ij + \sum_{k\in{N_i^w \cup F_i^s\star}} a_ik */
358  /* */
359  /* f_ij = \sum_{k\in{F_i^s\setminusF_i^s*}} \frac{a_ik \bar{a}_kj}{\sum_{m\inC_i^s \bar{a}_km}} */
360  /* */
361  /* w_ij = \frac{1}{\tilde{a}_ii} ( a_ij + f_ij) for all j in C_i^s */
362 
363  // const point_type F_PT = ClassicalMapFactory::F_PT;
365  const point_type DIRICHLET_PT = ClassicalMapFactory::DIRICHLET_PT;
366  using STS = typename Teuchos::ScalarTraits<SC>;
368  // size_t ST_INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
369  SC SC_ZERO = STS::zero();
370 #ifdef CMS_DEBUG
371  int rank = A.getRowMap()->getComm()->getRank();
372 #endif
373 
374  // Get the block number if we have it.
375  ArrayRCP<const LO> block_id;
376  if (!BlockNumber.is_null())
377  block_id = BlockNumber->getData(0);
378 
379  // Initial (estimated) allocation
380  // NOTE: If we only used Tpetra, then we could use these guys as is, but because Epetra, we can't, so there
381  // needs to be a copy below.
382  size_t Nrows = A.getLocalNumRows();
383  double c_point_density = (double)num_c_points / (num_c_points + num_f_points);
384  double mean_strong_neighbors_per_row = (double)graph.GetNodeNumEdges() / graph.GetNodeNumVertices();
385  // double mean_neighbors_per_row = (double)A.getLocalNumEntries() / Nrows;
386  double nnz_per_row_est = c_point_density * mean_strong_neighbors_per_row;
387 
388  size_t nnz_est = std::max(Nrows, std::min((size_t)A.getLocalNumEntries(), (size_t)(nnz_per_row_est * Nrows)));
389  Array<size_t> tmp_rowptr(Nrows + 1);
390  Array<LO> tmp_colind(nnz_est);
391 
392  // Algorithm (count+realloc)
393  // For each row, i,
394  // - Count the number of elements in \hat{C}_j, aka [C-neighbors and C-neighbors of strong F-neighbors of i]
395  size_t ct = 0;
396  for (LO row = 0; row < (LO)Nrows; row++) {
397  size_t row_start = eis_rowptr[row];
398  ArrayView<const LO> indices;
399  ArrayView<const SC> vals;
400  std::set<LO> C_hat;
401  if (myPointType[row] == DIRICHLET_PT) {
402  // Dirichlet points get ignored completely
403  } else if (myPointType[row] == C_PT) {
404  // C-Points get a single 1 in their row
405  C_hat.insert(cpoint2pcol[row]);
406  } else {
407  // F-Points have a more complicated interpolation
408 
409  // C-neighbors of row
410  A.getLocalRowView(row, indices, vals);
411  for (LO j = 0; j < indices.size(); j++)
412  if (myPointType[indices[j]] == C_PT && edgeIsStrong[row_start + j])
413  C_hat.insert(cpoint2pcol[indices[j]]);
414  } // end else
415 
416  // Realloc if needed
417  if (ct + (size_t)C_hat.size() > (size_t)tmp_colind.size()) {
418  tmp_colind.resize(std::max(ct + (size_t)C_hat.size(), (size_t)2 * tmp_colind.size()));
419  }
420 
421  // Copy
422  std::copy(C_hat.begin(), C_hat.end(), tmp_colind.begin() + ct);
423  ct += C_hat.size();
424  tmp_rowptr[row + 1] = tmp_rowptr[row] + C_hat.size();
425  }
426  // Resize down
427  tmp_colind.resize(tmp_rowptr[Nrows]);
428 
429  RCP<CrsMatrix> Pcrs = CrsMatrixFactory::Build(A.getRowMap(), coarseColMap, 0);
430  ArrayRCP<size_t> P_rowptr;
431  ArrayRCP<LO> P_colind;
432  ArrayRCP<SC> P_values;
433 
434  if (A.getRowMap()->lib() == Xpetra::UseTpetra) {
435  P_rowptr = Teuchos::arcpFromArray(tmp_rowptr);
436  P_colind = Teuchos::arcpFromArray(tmp_colind);
437  P_values.resize(P_rowptr[Nrows]);
438  } else {
439  // Make the matrix and then get the graph out of it (necessary for Epetra)
440  // NOTE: The lack of an Epetra_CrsGraph::ExpertStaticFillComplete makes this
441  // impossible to do the obvious way
442  Pcrs->allocateAllValues(tmp_rowptr[Nrows], P_rowptr, P_colind, P_values);
443  std::copy(tmp_rowptr.begin(), tmp_rowptr.end(), P_rowptr.begin());
444  std::copy(tmp_colind.begin(), tmp_colind.end(), P_colind.begin());
445  Pcrs->setAllValues(P_rowptr, P_colind, P_values);
446  Pcrs->expertStaticFillComplete(/*domain*/ coarseDomainMap, /*range*/ A.getDomainMap());
447  }
448 
449  // Generate a remote-ghosted version of the graph (if we're in parallel)
450  RCP<const CrsGraph> Pgraph;
451  RCP<const CrsGraph> Pghost;
452  // TODO: We might want to be more efficient here and actually use
453  // Pgraph in the matrix constructor.
454  ArrayRCP<const size_t> Pghost_rowptr;
455  ArrayRCP<const LO> Pghost_colind;
456  if (!remoteOnlyImporter.is_null()) {
457  if (A.getRowMap()->lib() == Xpetra::UseTpetra) {
458  RCP<CrsGraph> tempPgraph = CrsGraphFactory::Build(A.getRowMap(), coarseColMap, P_rowptr, P_colind);
459  tempPgraph->fillComplete(coarseDomainMap, A.getDomainMap());
460  Pgraph = tempPgraph;
461  } else {
462  // Epetra does not have a graph constructor that uses rowptr and colind.
463  Pgraph = Pcrs->getCrsGraph();
464  }
465  TEUCHOS_ASSERT(!Pgraph.is_null());
466  Pghost = CrsGraphFactory::Build(Pgraph, *remoteOnlyImporter, Pgraph->getDomainMap(), remoteOnlyImporter->getTargetMap());
467  Pghost->getAllIndices(Pghost_rowptr, Pghost_colind);
468  }
469 
470  // Gustavson-style perfect hashing
471  ArrayRCP<LO> Acol_to_Pcol(A.getColMap()->getLocalNumElements(), LO_INVALID);
472 
473  // Get a quick reindexing array from Pghost LCIDs to P LCIDs
474  ArrayRCP<LO> Pghostcol_to_Pcol;
475  if (!Pghost.is_null()) {
476  Pghostcol_to_Pcol.resize(Pghost->getColMap()->getLocalNumElements(), LO_INVALID);
477  for (LO i = 0; i < (LO)Pghost->getColMap()->getLocalNumElements(); i++)
478  Pghostcol_to_Pcol[i] = Pgraph->getColMap()->getLocalElement(Pghost->getColMap()->getGlobalElement(i));
479  } // end Pghost
480 
481  // Get a quick reindexing array from Aghost LCIDs to A LCIDs
482  ArrayRCP<LO> Aghostcol_to_Acol;
483  if (!Aghost.is_null()) {
484  Aghostcol_to_Acol.resize(Aghost->getColMap()->getLocalNumElements(), LO_INVALID);
485  for (LO i = 0; i < (LO)Aghost->getColMap()->getLocalNumElements(); i++)
486  Aghostcol_to_Acol[i] = A.getColMap()->getLocalElement(Aghost->getColMap()->getGlobalElement(i));
487  } // end Aghost
488 
489  // Algorithm (numeric)
490  for (LO i = 0; i < (LO)Nrows; i++) {
491  if (myPointType[i] == DIRICHLET_PT) {
492  // Dirichlet points get ignored completely
493 #ifdef CMS_DEBUG
494  // DEBUG
495  printf("[%d] ** A(%d,:) is a Dirichlet-Point.\n", rank, i);
496 #endif
497  } else if (myPointType[i] == C_PT) {
498  // C Points get a single 1 in their row
499  P_values[P_rowptr[i]] = Teuchos::ScalarTraits<SC>::one();
500 #ifdef CMS_DEBUG
501  // DEBUG
502  printf("[%d] ** A(%d,:) is a C-Point.\n", rank, i);
503 #endif
504  } else {
505  // F Points get all of the fancy stuff
506 #ifdef CMS_DEBUG
507  // DEBUG
508  printf("[%d] ** A(%d,:) is a F-Point.\n", rank, i);
509 #endif
510 
511  // Get all of the relevant information about this row
512  ArrayView<const LO> A_indices_i, A_indices_k;
513  ArrayView<const SC> A_vals_i, A_vals_k;
514  A.getLocalRowView(i, A_indices_i, A_vals_i);
515  size_t row_start = eis_rowptr[i];
516 
517  ArrayView<const LO> P_indices_i = P_colind.view(P_rowptr[i], P_rowptr[i + 1] - P_rowptr[i]);
518  ArrayView<SC> P_vals_i = P_values.view(P_rowptr[i], P_rowptr[i + 1] - P_rowptr[i]);
519 
520  // FIXME: Do we need this?
521  for (LO j = 0; j < (LO)P_vals_i.size(); j++)
522  P_vals_i[j] = SC_ZERO;
523 
524  // Stash the hash: Flag any strong C-points with their index into P_colind
525  // NOTE: We'll consider any points that are LO_INVALID or less than P_rowptr[i] as not strong C-points
526  for (LO j = 0; j < (LO)P_indices_i.size(); j++) {
527  Acol_to_Pcol[pcol2cpoint[P_indices_i[j]]] = P_rowptr[i] + j;
528  }
529 
530  // Loop over the entries in the row
531  SC first_denominator = SC_ZERO;
532 #ifdef CMS_DEBUG
533  SC a_ii = SC_ZERO;
534 #endif
535  for (LO k0 = 0; k0 < (LO)A_indices_i.size(); k0++) {
536  LO k = A_indices_i[k0];
537  SC a_ik = A_vals_i[k0];
538  LO pcol_k = Acol_to_Pcol[k];
539 
540  if (k == i) {
541  // Case A: Diagonal value (add to first denominator)
542  // FIXME: Add BlockNumber matching here
543  first_denominator += a_ik;
544 #ifdef CMS_DEBUG
545  a_ii = a_ik;
546  printf("- A(%d,%d) is the diagonal\n", i, k);
547 #endif
548 
549  } else if (myPointType[k] == DIRICHLET_PT) {
550  // Case B: Ignore dirichlet connections completely
551 #ifdef CMS_DEBUG
552  printf("- A(%d,%d) is a Dirichlet point\n", i, k);
553 #endif
554 
555  } else if (pcol_k != LO_INVALID && pcol_k >= (LO)P_rowptr[i]) {
556  // Case C: a_ik is strong C-Point (goes directly into the weight)
557  P_values[pcol_k] += a_ik;
558 #ifdef CMS_DEBUG
559  printf("- A(%d,%d) is a strong C-Point\n", i, k);
560 #endif
561  } else if (!edgeIsStrong[row_start + k0]) {
562  // Case D: Weak non-Dirichlet neighbor (add to first denominator)
563  if (block_id.size() == 0 || block_id[i] == block_id[k]) {
564  first_denominator += a_ik;
565 #ifdef CMS_DEBUG
566  printf("- A(%d,%d) is weak adding to diagonal(%d,%d) (%6.4e)\n", i, k, block_id[i], block_id[k], a_ik);
567  } else {
568  printf("- A(%d,%d) is weak but does not match blocks (%d,%d), discarding\n", i, k, block_id[i], block_id[k]);
569 #endif
570  }
571 
572  } else { // Case E
573  // Case E: Strong F-Point (adds to the first denominator if we don't share a
574  // a strong C-Point with i; adds to the second denominator otherwise)
575 #ifdef CMS_DEBUG
576  printf("- A(%d,%d) is a strong F-Point\n", i, k);
577 #endif
578 
579  SC a_kk = SC_ZERO;
580  SC second_denominator = SC_ZERO;
581  int sign_akk = 0;
582 
583  if (k < (LO)Nrows) {
584  // Grab the diagonal a_kk
585  A.getLocalRowView(k, A_indices_k, A_vals_k);
586  for (LO m0 = 0; m0 < (LO)A_indices_k.size(); m0++) {
587  LO m = A_indices_k[m0];
588  if (k == m) {
589  a_kk = A_vals_k[m0];
590  break;
591  }
592  } // end for A_indices_k
593 
594  // Compute the second denominator term
595  sign_akk = Sign(a_kk);
596  for (LO m0 = 0; m0 < (LO)A_indices_k.size(); m0++) {
597  LO m = A_indices_k[m0];
598  if (m != k && Acol_to_Pcol[A_indices_k[m0]] >= (LO)P_rowptr[i]) {
599  SC a_km = A_vals_k[m0];
600  second_denominator += (Sign(a_km) == sign_akk ? SC_ZERO : a_km);
601  }
602  } // end for A_indices_k
603 
604  // Now we have the second denominator, for this particular strong F point.
605  // So we can now add the sum to the w_ij components for the P values
606  if (second_denominator != SC_ZERO) {
607  for (LO j0 = 0; j0 < (LO)A_indices_k.size(); j0++) {
608  LO j = A_indices_k[j0];
609  // NOTE: Row k should be in fis_star, so I should have to check for diagonals here
610  // printf("Acol_to_Pcol[%d] = %d P_values.size() = %d\n",j,Acol_to_Pcol[j],(int)P_values.size());
611  if (Acol_to_Pcol[j] >= (LO)P_rowptr[i]) {
612  SC a_kj = A_vals_k[j0];
613  SC sign_akj_val = sign_akk == Sign(a_kj) ? SC_ZERO : a_kj;
614  P_values[Acol_to_Pcol[j]] += a_ik * sign_akj_val / second_denominator;
615 #ifdef CMS_DEBUG
616  printf("- - Unscaled P(%d,A-%d) += %6.4e = %5.4e\n", i, j, a_ik * sign_akj_val / second_denominator, P_values[Acol_to_Pcol[j]]);
617 #endif
618  }
619  } // end for A_indices_k
620  } // end if second_denominator != 0
621  else {
622 #ifdef CMS_DEBUG
623  printf("- - A(%d,%d) second denominator is zero\n", i, k);
624 #endif
625  if (block_id.size() == 0 || block_id[i] == block_id[k])
626  first_denominator += a_ik;
627  } // end else second_denominator != 0
628  } // end if k < Nrows
629  else {
630  // Ghost row
631  LO kless = k - Nrows;
632  // Grab the diagonal a_kk
633  // NOTE: ColMap is not locally fitted to the RowMap
634  // so we need to check GIDs here
635  Aghost->getLocalRowView(kless, A_indices_k, A_vals_k);
636  GO k_g = Aghost->getRowMap()->getGlobalElement(kless);
637  for (LO m0 = 0; m0 < (LO)A_indices_k.size(); m0++) {
638  GO m_g = Aghost->getColMap()->getGlobalElement(A_indices_k[m0]);
639  if (k_g == m_g) {
640  a_kk = A_vals_k[m0];
641  break;
642  }
643  } // end for A_indices_k
644 
645  // Compute the second denominator term
646  sign_akk = Sign(a_kk);
647  for (LO m0 = 0; m0 < (LO)A_indices_k.size(); m0++) {
648  GO m_g = Aghost->getColMap()->getGlobalElement(A_indices_k[m0]);
649  LO mghost = A_indices_k[m0]; // Aghost LCID
650  LO m = Aghostcol_to_Acol[mghost]; // A's LID (could be LO_INVALID)
651  if (m_g != k_g && m != LO_INVALID && Acol_to_Pcol[m] >= (LO)P_rowptr[i]) {
652  SC a_km = A_vals_k[m0];
653  second_denominator += (Sign(a_km) == sign_akk ? SC_ZERO : a_km);
654  }
655  } // end for A_indices_k
656 
657  // Now we have the second denominator, for this particular strong F point.
658  // So we can now add the sum to the w_ij components for the P values
659  if (second_denominator != SC_ZERO) {
660  for (LO j0 = 0; j0 < (LO)A_indices_k.size(); j0++) {
661  LO jghost = A_indices_k[j0]; // Aghost LCID
662  LO j = Aghostcol_to_Acol[jghost]; // A's LID (could be LO_INVALID)
663  // NOTE: Row k should be in fis_star, so I should have to check for diagonals here
664  if ((j != LO_INVALID) && (Acol_to_Pcol[j] >= (LO)P_rowptr[i])) {
665  SC a_kj = A_vals_k[j0];
666  SC sign_akj_val = sign_akk == Sign(a_kj) ? SC_ZERO : a_kj;
667  P_values[Acol_to_Pcol[j]] += a_ik * sign_akj_val / second_denominator;
668 #ifdef CMS_DEBUG
669  printf("- - Unscaled P(%d,A-%d) += %6.4e\n", i, j, a_ik * sign_akj_val / second_denominator);
670 #endif
671  }
672 
673  } // end for A_indices_k
674  } // end if second_denominator != 0
675  else {
676 #ifdef CMS_DEBUG
677  printf("- - A(%d,%d) second denominator is zero\n", i, k);
678 #endif
679  if (block_id.size() == 0 || block_id[i] == block_id[k])
680  first_denominator += a_ik;
681  } // end else second_denominator != 0
682  } // end else k < Nrows
683  } // end else Case A,...,E
684 
685  } // end for A_indices_i
686 
687  // Now, downscale by the first_denominator
688  if (first_denominator != SC_ZERO) {
689  for (LO j0 = 0; j0 < (LO)P_indices_i.size(); j0++) {
690 #ifdef CMS_DEBUG
691  SC old_pij = P_vals_i[j0];
692  P_vals_i[j0] /= -first_denominator;
693  printf("P(%d,%d) = %6.4e = %6.4e / (%6.4e + %6.4e)\n", i, P_indices_i[j0], P_vals_i[j0], old_pij, a_ii, first_denominator - a_ii);
694 #else
695  P_vals_i[j0] /= -first_denominator;
696 #endif
697  } // end for P_indices_i
698  } // end if first_denominator != 0
699 
700  } // end else C-Point
701 
702  } // end if i < Nrows
703 
704  // Finish up
705  Pcrs->setAllValues(P_rowptr, P_colind, P_values);
706  Pcrs->expertStaticFillComplete(/*domain*/ coarseDomainMap, /*range*/ A.getDomainMap());
707  // Wrap from CrsMatrix to Matrix
708  P = rcp(new CrsMatrixWrap(Pcrs));
709 
710 } // end Coarsen_ClassicalModified
711 
712 /* ************************************************************************* */
713 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
715  Coarsen_Direct(const Matrix& A, const RCP<const Matrix>& Aghost, const LWGraph& graph, RCP<const Map>& coarseColMap, RCP<const Map>& coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView<const LO>& myPointType, const Teuchos::ArrayView<const LO>& myPointType_ghost, const Teuchos::Array<LO>& cpoint2pcol, const Teuchos::Array<LO>& pcol2cpoint, Teuchos::Array<size_t>& eis_rowptr, Teuchos::Array<bool>& edgeIsStrong, RCP<LocalOrdinalVector>& BlockNumber, RCP<Matrix>& P) const {
716  /* ============================================================= */
717  /* Phase 3 : Direct Interpolation */
718  /* We do not use De Sterck, Falgout, Nolting and Yang (2008) */
719  /* here. Instead we follow: */
720  /* Trottenberg, Oosterlee and Schueller, Multigrid, 2001. */
721  /* with some modifications inspirted by PyAMG */
722  /* ============================================================= */
723  /* Definitions: */
724  /* F = F-points */
725  /* C = C-points */
726  /* N_i = non-zero neighbors of node i */
727  /* S_i = {j\in N_i | j strongly influences i } [strong neighbors of i] */
728  /* F_i^s = F \cap S_i [strong F-neighbors of i] */
729  /* C_i^s = C \cap S_i [strong C-neighbors of i] */
730  /* P_i = Set of interpolatory variables for row i [here = C_i^s] */
731 
732  /* (A.2.17) from p. 426 */
733  /* a_ij^- = { a_ij, if a_ij < 0 */
734  /* { 0, otherwise */
735  /* a_ij^+ = { a_ij, if a_ij > 0 */
736  /* { 0, otherwise */
737  /* P_i^- = P_i \cap {k | a_ij^- != 0 and a_ij^- = a_ij} */
738  /* [strong C-neighbors with negative edges] */
739  /* P_i^+ = P_i \cap {k | a_ij^+ != 0 and a_ij^+ = a_ij} */
740  /* [strong C-neighbors with positive edges] */
741 
742  /* de Sterck et al., gives us this: */
743  /* Rewritten Equation (6) on p. 119 */
744  /* w_ij = - a_ji / a_ii \frac{\sum_{k\in N_i} a_ik} {\sum k\inC_i^s} a_ik}, j\in C_i^s */
745 
746  /* Trottenberg et al. (A.7.6) and (A.7.7) on p. 479 gives this: */
747  /* alpha_i = \frac{ \sum_{j\in N_i} a_ij^- }{ \sum_{k\in P_i} a_ik^- } */
748  /* beta_i = \frac{ \sum_{j\in N_i} a_ij^+ }{ \sum_{k\in P_i} a_ik^+ } */
749  /* w_ik = { - alpha_i (a_ik / a_ii), if k\in P_i^- */
750  /* { - beta_i (a_ik / a_ii), if k\in P_i^+ */
751  /* NOTE: The text says to modify, if P_i^+ is zero but it isn't entirely clear how that */
752  /* works. We'll follow the PyAMG implementation in a few important ways. */
753 
755  const point_type DIRICHLET_PT = ClassicalMapFactory::DIRICHLET_PT;
756 
757  // Initial (estimated) allocation
758  // NOTE: If we only used Tpetra, then we could use these guys as is, but because Epetra, we can't, so there
759  // needs to be a copy below.
760  using STS = typename Teuchos::ScalarTraits<SC>;
761  using MT = typename STS::magnitudeType;
762  using MTS = typename Teuchos::ScalarTraits<MT>;
763  size_t Nrows = A.getLocalNumRows();
764  double c_point_density = (double)num_c_points / (num_c_points + num_f_points);
765  double mean_strong_neighbors_per_row = (double)graph.GetNodeNumEdges() / graph.GetNodeNumVertices();
766  // double mean_neighbors_per_row = (double)A.getLocalNumEntries() / Nrows;
767  double nnz_per_row_est = c_point_density * mean_strong_neighbors_per_row;
768 
769  size_t nnz_est = std::max(Nrows, std::min((size_t)A.getLocalNumEntries(), (size_t)(nnz_per_row_est * Nrows)));
770  SC SC_ZERO = STS::zero();
771  MT MT_ZERO = MTS::zero();
772  Array<size_t> tmp_rowptr(Nrows + 1);
773  Array<LO> tmp_colind(nnz_est);
774 
775  // Algorithm (count+realloc)
776  // For each row, i,
777  // - Count the number of elements in \hat{C}_j, aka [C-neighbors and C-neighbors of strong F-neighbors of i]
778  size_t ct = 0;
779  for (LO row = 0; row < (LO)Nrows; row++) {
780  size_t row_start = eis_rowptr[row];
781  ArrayView<const LO> indices;
782  ArrayView<const SC> vals;
783  std::set<LO> C_hat;
784  if (myPointType[row] == DIRICHLET_PT) {
785  // Dirichlet points get ignored completely
786  } else if (myPointType[row] == C_PT) {
787  // C-Points get a single 1 in their row
788  C_hat.insert(cpoint2pcol[row]);
789  } else {
790  // F-Points have a more complicated interpolation
791 
792  // C-neighbors of row
793  A.getLocalRowView(row, indices, vals);
794  for (LO j = 0; j < indices.size(); j++)
795  if (myPointType[indices[j]] == C_PT && edgeIsStrong[row_start + j])
796  C_hat.insert(cpoint2pcol[indices[j]]);
797  } // end else
798 
799  // Realloc if needed
800  if (ct + (size_t)C_hat.size() > (size_t)tmp_colind.size()) {
801  tmp_colind.resize(std::max(ct + (size_t)C_hat.size(), (size_t)2 * tmp_colind.size()));
802  }
803 
804  // Copy
805  std::copy(C_hat.begin(), C_hat.end(), tmp_colind.begin() + ct);
806  ct += C_hat.size();
807  tmp_rowptr[row + 1] = tmp_rowptr[row] + C_hat.size();
808  }
809  // Resize down
810  tmp_colind.resize(tmp_rowptr[Nrows]);
811 
812  // Allocate memory & copy
813  P = rcp(new CrsMatrixWrap(A.getRowMap(), coarseColMap, 0));
814  RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
815  ArrayRCP<size_t> P_rowptr;
816  ArrayRCP<LO> P_colind;
817  ArrayRCP<SC> P_values;
818 
819 #ifdef CMS_DEBUG
820  printf("CMS: Allocating P w/ %d nonzeros\n", (int)tmp_rowptr[Nrows]);
821 #endif
822  PCrs->allocateAllValues(tmp_rowptr[Nrows], P_rowptr, P_colind, P_values);
823  TEUCHOS_TEST_FOR_EXCEPTION(tmp_rowptr.size() != P_rowptr.size(), Exceptions::RuntimeError, "ClassicalPFactory: Allocation size error (rowptr)");
824  TEUCHOS_TEST_FOR_EXCEPTION(tmp_colind.size() != P_colind.size(), Exceptions::RuntimeError, "ClassicalPFactory: Allocation size error (colind)");
825  // FIXME: This can be short-circuited for Tpetra, if we decide we care
826  for (LO i = 0; i < (LO)Nrows + 1; i++)
827  P_rowptr[i] = tmp_rowptr[i];
828  for (LO i = 0; i < (LO)tmp_rowptr[Nrows]; i++)
829  P_colind[i] = tmp_colind[i];
830 
831  // Algorithm (numeric)
832  for (LO i = 0; i < (LO)Nrows; i++) {
833  if (myPointType[i] == DIRICHLET_PT) {
834  // Dirichlet points get ignored completely
835 #ifdef CMS_DEBUG
836  // DEBUG
837  printf("** A(%d,:) is a Dirichlet-Point.\n", i);
838 #endif
839  } else if (myPointType[i] == C_PT) {
840  // C Points get a single 1 in their row
841  P_values[P_rowptr[i]] = Teuchos::ScalarTraits<SC>::one();
842 #ifdef CMS_DEBUG
843  // DEBUG
844  printf("** A(%d,:) is a C-Point.\n", i);
845 #endif
846  } else {
847  /* Trottenberg et al. (A.7.6) and (A.7.7) on p. 479 gives this: */
848  /* alpha_i = \frac{ \sum_{j\in N_i} a_ij^- }{ \sum_{k\in P_i} a_ik^- } */
849  /* beta_i = \frac{ \sum_{j\in N_i} a_ij^+ }{ \sum_{k\in P_i} a_ik^+ } */
850  /* w_ik = { - alpha_i (a_ik / a_ii), if k\in P_i^- */
851  /* { - beta_i (a_ik / a_ii), if k\in P_i^+ */
852  ArrayView<const LO> A_indices_i, A_incides_k;
853  ArrayView<const SC> A_vals_i, A_indices_k;
854  A.getLocalRowView(i, A_indices_i, A_vals_i);
855  size_t row_start = eis_rowptr[i];
856 
857  ArrayView<LO> P_indices_i = P_colind.view(P_rowptr[i], P_rowptr[i + 1] - P_rowptr[i]);
858  ArrayView<SC> P_vals_i = P_values.view(P_rowptr[i], P_rowptr[i + 1] - P_rowptr[i]);
859 
860 #ifdef CMS_DEBUG
861  // DEBUG
862  {
863  char mylabel[5] = "FUCD";
864  char sw[3] = "ws";
865  printf("** A(%d,:) = ", i);
866  for (LO j = 0; j < (LO)A_indices_i.size(); j++) {
867  printf("%6.4e(%d-%c%c) ", A_vals_i[j], A_indices_i[j], mylabel[1 + myPointType[A_indices_i[j]]], sw[(int)edgeIsStrong[row_start + j]]);
868  }
869  printf("\n");
870  }
871 #endif
872 
873  SC a_ii = SC_ZERO;
874  SC pos_numerator = SC_ZERO, neg_numerator = SC_ZERO;
875  SC pos_denominator = SC_ZERO, neg_denominator = SC_ZERO;
876  // Find the diagonal and compute the sum ratio
877  for (LO j = 0; j < (LO)A_indices_i.size(); j++) {
878  SC a_ik = A_vals_i[j];
879  LO k = A_indices_i[j];
880 
881  // Diagonal
882  if (i == k) {
883  a_ii = a_ik;
884  }
885  // Only strong C-neighbors are in the denomintor
886  if (myPointType[k] == C_PT && edgeIsStrong[row_start + j]) {
887  if (STS::real(a_ik) > MT_ZERO)
888  pos_denominator += a_ik;
889  else
890  neg_denominator += a_ik;
891  }
892 
893  // All neighbors are in the numerator
894  // NOTE: As per PyAMG, this does not include the diagonal
895  if (i != k) {
896  if (STS::real(a_ik) > MT_ZERO)
897  pos_numerator += a_ik;
898  else
899  neg_numerator += a_ik;
900  }
901  }
902  SC alpha = (neg_denominator == MT_ZERO) ? SC_ZERO : (neg_numerator / neg_denominator);
903  SC beta = (pos_denominator == MT_ZERO) ? SC_ZERO : (pos_numerator / pos_denominator);
904  alpha /= -a_ii;
905  beta /= -a_ii;
906 
907  // Loop over the entries
908  for (LO p_j = 0; p_j < (LO)P_indices_i.size(); p_j++) {
909  LO P_col = pcol2cpoint[P_indices_i[p_j]];
910  SC a_ij = SC_ZERO;
911 
912  // Find A_ij (if it is there)
913  // FIXME: We can optimize this if we assume sorting
914  for (LO a_j = 0; a_j < (LO)A_indices_i.size(); a_j++) {
915  if (A_indices_i[a_j] == P_col) {
916  a_ij = A_vals_i[a_j];
917  break;
918  }
919  }
920  SC w_ij = (STS::real(a_ij) < 0) ? (alpha * a_ij) : (beta * a_ij);
921 #ifdef CMS_DEBUG
922  SC alpha_or_beta = (STS::real(a_ij) < 0) ? alpha : beta;
923  printf("P(%d,%d/%d) = - %6.4e * %6.4e = %6.4e\n", i, P_indices_i[p_j], pcol2cpoint[P_indices_i[p_j]], alpha_or_beta, a_ij, w_ij);
924 #endif
925  P_vals_i[p_j] = w_ij;
926  } // end for A_indices_i
927  } // end else C_PT
928  } // end for Numrows
929 
930  // Finish up
931  PCrs->setAllValues(P_rowptr, P_colind, P_values);
932  PCrs->expertStaticFillComplete(/*domain*/ coarseDomainMap, /*range*/ A.getDomainMap());
933 }
934 
935 /* ************************************************************************* */
936 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
938  Coarsen_Ext_Plus_I(const Matrix& A, const RCP<const Matrix>& Aghost, const LWGraph& graph, RCP<const Map>& coarseColMap, RCP<const Map>& coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView<const LO>& myPointType, const Teuchos::ArrayView<const LO>& myPointType_ghost, const Teuchos::Array<LO>& cpoint2pcol, const Teuchos::Array<LO>& pcol2cpoint, Teuchos::Array<size_t>& eis_rowptr, Teuchos::Array<bool>& edgeIsStrong, RCP<LocalOrdinalVector>& BlockNumber, RCP<Matrix>& P) const {
939  /* ============================================================= */
940  /* Phase 3 : Extended+i Interpolation */
941  /* De Sterck, Falgout, Nolting and Yang. "Distance-two */
942  /* interpolation for parallel algebraic multigrid", NLAA 2008 */
943  /* 15:115-139 */
944  /* ============================================================= */
945  /* Definitions: */
946  /* F = F-points */
947  /* C = C-points */
948  /* N_i = non-zero neighbors of node i */
949  /* S_i = {j\in N_i | j strongly influences i } [strong neighbors of i] */
950  /* F_i^s = F \cap S_i [strong F-neighbors of i] */
951  /* C_i^s = C \cap S_i [strong C-neighbors of i] */
952  /* N_i^w = N_i\ (F_i^s \cup C_i^s) [weak neighbors of i] */
953  /* This guy has a typo. The paper had a \cap instead of \cup */
954  /* I would note that this set can contain both F-points and */
955  /* C-points. They're just weak neighbors of this guy. */
956  /* Note that N_i^w \cup F_i^s \cup C_i^s = N_i by construction */
957 
958  /* \hat{C}_i = C_i \cup (\bigcup_{j\inF_i^s} C_j) */
959  /* [C-neighbors and C-neighbors of strong F-neighbors of i] */
960  /* */
961 
962  /* \bar{a}_ij = { 0, if sign(a_ij) == sign(a_ii) */
963  /* { a_ij, otherwise */
964 
965  /* Rewritten Equation (19) on p. 123 */
966  /* f_ik = \frac{\bar{a}_kj}{\sum{l\in \hat{C}_i\cup {i}} \bar{a}_kl */
967  /* w_ij = -\tilde{a}_ii^{-1} (a_ij + \sum_{k\inF_i^s} a_ik f_ik */
968  /* for j in \hat{C}_i */
969 
970  /* Rewritten Equation (20) on p. 124 [for the lumped diagonal] */
971  /* g_ik = \frac{\bar{a}_ki}{\sum{l\in \hat{C}_i\cup {i}} \bar{a}_kl */
972  /* \tilde{a}_ii = a_ii + \sum_{n\inN_i^w\setminus \hat{C}_i} a_in + \sum_{k\inF_i^s} a_ik g_ik */
973  TEUCHOS_TEST_FOR_EXCEPTION(1, std::runtime_error, "ClassicalPFactory: Ext+i not implemented");
974 }
975 
976 /* ************************************************************************* */
977 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
979  GenerateStrengthFlags(const Matrix& A, const LWGraph& graph, Teuchos::Array<size_t>& eis_rowptr, Teuchos::Array<bool>& edgeIsStrong) const {
980  // To make this easier, we'll create a bool array equal to the nnz in the matrix
981  // so we know whether each edge is strong or not. This will save us a bunch of
982  // trying to match the graph and matrix later
983  size_t Nrows = A.getLocalNumRows();
984  eis_rowptr.resize(Nrows + 1);
985 
986  if (edgeIsStrong.size() == 0) {
987  // Preferred
988  edgeIsStrong.resize(A.getLocalNumEntries(), false);
989  } else {
990  edgeIsStrong.resize(A.getLocalNumEntries(), false);
991  edgeIsStrong.assign(A.getLocalNumEntries(), false);
992  }
993 
994  eis_rowptr[0] = 0;
995  for (LO i = 0; i < (LO)Nrows; i++) {
996  LO rowstart = eis_rowptr[i];
997  ArrayView<const LO> A_indices;
998  ArrayView<const SC> A_values;
999  A.getLocalRowView(i, A_indices, A_values);
1000  LO A_size = (LO)A_indices.size();
1001 
1002  auto G_indices = graph.getNeighborVertices(i);
1003  LO G_size = (LO)G_indices.length;
1004 
1005  // Both of these guys should be in the same (sorted) order, but let's check
1006  bool is_ok = true;
1007  for (LO j = 0; j < A_size - 1; j++)
1008  if (A_indices[j] >= A_indices[j + 1]) {
1009  is_ok = false;
1010  break;
1011  }
1012  for (LO j = 0; j < G_size - 1; j++)
1013  if (G_indices(j) >= G_indices(j + 1)) {
1014  is_ok = false;
1015  break;
1016  }
1017  TEUCHOS_TEST_FOR_EXCEPTION(!is_ok, Exceptions::RuntimeError, "ClassicalPFactory: Exected A and Graph to be sorted");
1018 
1019  // Now cycle through and set the flags - if the edge is in G it is strong
1020  for (LO g_idx = 0, a_idx = 0; g_idx < G_size; g_idx++) {
1021  LO col = G_indices(g_idx);
1022  while (A_indices[a_idx] != col && a_idx < A_size) a_idx++;
1023  if (a_idx == A_size) {
1024  is_ok = false;
1025  break;
1026  }
1027  edgeIsStrong[rowstart + a_idx] = true;
1028  }
1029 
1030  eis_rowptr[i + 1] = eis_rowptr[i] + A_size;
1031  }
1032 }
1033 
1034 /* ************************************************************************* */
1035 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1037  GhostCoarseMap(const Matrix& A, const Import& Importer, const ArrayRCP<const LO> myPointType, const RCP<const Map>& coarseMap, RCP<const Map>& coarseColMap) const {
1039  const GO GO_INVALID = Teuchos::OrdinalTraits<GO>::invalid();
1040  RCP<GlobalOrdinalVector> d_coarseIds = GlobalOrdinalVectorFactory::Build(A.getRowMap());
1041  ArrayRCP<GO> d_data = d_coarseIds->getDataNonConst(0);
1042  LO ct = 0;
1043 
1044  for (LO i = 0; i < (LO)d_data.size(); i++) {
1045  if (myPointType[i] == C_PT) {
1046  d_data[i] = coarseMap->getGlobalElement(ct);
1047  ct++;
1048  } else
1049  d_data[i] = GO_INVALID;
1050  }
1051 
1052  // Ghost this guy
1053  RCP<GlobalOrdinalVector> c_coarseIds = GlobalOrdinalVectorFactory::Build(A.getColMap());
1054  c_coarseIds->doImport(*d_coarseIds, Importer, Xpetra::INSERT);
1055 
1056  // If we assume that A is in Aztec ordering, then any subset of A's unknowns will
1057  // be in Aztec ordering as well, which means we can just condense these guys down
1058  // Overallocate, count and view
1059  ArrayRCP<GO> c_data = c_coarseIds->getDataNonConst(0);
1060 
1061  Array<GO> c_gids(c_data.size());
1062  LO count = 0;
1063 
1064  for (LO i = 0; i < (LO)c_data.size(); i++) {
1065  if (c_data[i] != GO_INVALID) {
1066  c_gids[count] = c_data[i];
1067  count++;
1068  }
1069  }
1070  // FIXME: Assumes scalar PDE
1071  std::vector<size_t> stridingInfo_(1);
1072  stridingInfo_[0] = 1;
1073  GO domainGIDOffset = 0;
1074 
1075  coarseColMap = StridedMapFactory::Build(coarseMap->lib(),
1077  c_gids.view(0, count),
1078  coarseMap->getIndexBase(),
1079  stridingInfo_,
1080  coarseMap->getComm(),
1081  domainGIDOffset);
1082 }
1083 
1084 } // namespace MueLu
1085 
1086 #define MUELU_CLASSICALPFACTORY_SHORT
1087 #endif // MUELU_CLASSICALPFACTORY_DEF_HPP
void GenerateStrengthFlags(const Matrix &A, const LWGraph &graph, Teuchos::Array< size_t > &eis_rowptr, Teuchos::Array< bool > &edgeIsStrong) const
void setValidator(RCP< const ParameterEntryValidator > const &validator)
GlobalOrdinal GO
T & get(const std::string &name, T def_value)
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
static void Write(const std::string &fileName, const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &M)
void GhostCoarseMap(const Matrix &A, const Import &Importer, const ArrayRCP< const LO > myPointType, const RCP< const Map > &coarseMap, RCP< const Map > &coarseColMap) const
Print more statistics.
size_type size() const
Print additional debugging information.
LocalOrdinal LO
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void Coarsen_ClassicalModified(const Matrix &A, const RCP< const Matrix > &Aghost, const LWGraph &graph, RCP< const Map > &coarseColMap, RCP< const Map > &coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView< const LO > &myPointType, const Teuchos::ArrayView< const LO > &myPointType_ghost, const Teuchos::Array< LO > &cpoint2pcol, const Teuchos::Array< LO > &pcol2cpoint, Teuchos::Array< size_t > &eis_rowptr, Teuchos::Array< bool > &edgeIsStrong, RCP< LocalOrdinalVector > &BlockNumber, RCP< const Import > remoteOnlyImporter, RCP< Matrix > &P) const
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
KOKKOS_INLINE_FUNCTION size_type GetNodeNumEdges() const
Return number of graph edges.
size_type size() const
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
void resize(const size_type n, const T &val=T())
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:63
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
void Coarsen_Ext_Plus_I(const Matrix &A, const RCP< const Matrix > &Aghost, const LWGraph &graph, RCP< const Map > &coarseColMap, RCP< const Map > &coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView< const LO > &myPointType, const Teuchos::ArrayView< const LO > &myPointType_ghost, const Teuchos::Array< LO > &cpoint2pcol, const Teuchos::Array< LO > &pcol2cpoint, Teuchos::Array< size_t > &eis_rowptr, Teuchos::Array< bool > &edgeIsStrong, RCP< LocalOrdinalVector > &BlockNumber, RCP< Matrix > &P) const
void resize(size_type new_size, const value_type &x=value_type())
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
iterator end()
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex &#39;v&#39;.
size_type size() const
#define SET_VALID_ENTRY(name)
Scalar SC
Lightweight MueLu representation of a compressed row storage graph.
void Coarsen_Direct(const Matrix &A, const RCP< const Matrix > &Aghost, const LWGraph &graph, RCP< const Map > &coarseColMap, RCP< const Map > &coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView< const LO > &myPointType, const Teuchos::ArrayView< const LO > &myPointType_ghost, const Teuchos::Array< LO > &cpoint2pcol, const Teuchos::Array< LO > &pcol2cpoint, Teuchos::Array< size_t > &eis_rowptr, Teuchos::Array< bool > &edgeIsStrong, RCP< LocalOrdinalVector > &BlockNumber, RCP< Matrix > &P) const
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:51
typename ClassicalMapFactory::point_type point_type
Exception throws to report errors in the internal logical of the program.
#define TEUCHOS_ASSERT(assertion_test)
void assign(size_type n, const value_type &val)
ParameterEntry & getEntry(const std::string &name)
iterator begin()
iterator begin() const
bool is_null() const
ArrayView< T > view(size_type lowerOffset, size_type size) const