MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_ClassicalPFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_CLASSICALPFACTORY_DEF_HPP
47 #define MUELU_CLASSICALPFACTORY_DEF_HPP
48 
49 #include <Xpetra_MultiVectorFactory.hpp>
50 #include <Xpetra_VectorFactory.hpp>
52 #include <Xpetra_Matrix.hpp>
53 #include <Xpetra_Map.hpp>
54 #include <Xpetra_MapFactory.hpp>
55 #include <Xpetra_Vector.hpp>
56 #include <Xpetra_Import.hpp>
57 #include <Xpetra_ImportUtils.hpp>
58 #include <Xpetra_IO.hpp>
59 #include <Xpetra_StridedMapFactory.hpp>
60 
62 
63 #include "MueLu_MasterList.hpp"
64 #include "MueLu_Monitor.hpp"
65 #include "MueLu_PerfUtils.hpp"
67 #include "MueLu_ClassicalMapFactory.hpp"
68 #include "MueLu_AmalgamationInfo.hpp"
69 #include "MueLu_LWGraph.hpp"
70 
71 //#define CMS_DEBUG
72 //#define CMS_DUMP
73 
74 namespace {
75 
76 template <class SC>
77 int Sign(SC val) {
78  using STS = typename Teuchos::ScalarTraits<SC>;
79  typename STS::magnitudeType MT_ZERO = Teuchos::ScalarTraits<typename STS::magnitudeType>::zero();
80  if (STS::real(val) > MT_ZERO)
81  return 1;
82  else if (STS::real(val) < MT_ZERO)
83  return -1;
84  else
85  return 0;
86 }
87 
88 } // namespace
89 
90 namespace MueLu {
91 
92 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
94  RCP<ParameterList> validParamList = rcp(new ParameterList());
95 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
96  SET_VALID_ENTRY("aggregation: deterministic");
97  SET_VALID_ENTRY("aggregation: coloring algorithm");
98  SET_VALID_ENTRY("aggregation: classical scheme");
99 
100  // To know if we need BlockNumber
101  SET_VALID_ENTRY("aggregation: drop scheme");
102  {
103  validParamList->getEntry("aggregation: classical scheme").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("direct", "ext+i", "classical modified"))));
104  }
105 
106 #undef SET_VALID_ENTRY
107  validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
108  // validParamList->set< RCP<const FactoryBase> >("UnAmalgamationInfo", Teuchos::null, "Generating factory of UnAmalgamationInfo");
109  validParamList->set<RCP<const FactoryBase> >("Graph", null, "Generating factory of the graph");
110  validParamList->set<RCP<const FactoryBase> >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
111  validParamList->set<RCP<const FactoryBase> >("CoarseMap", Teuchos::null, "Generating factory of the CoarseMap");
112  validParamList->set<RCP<const FactoryBase> >("FC Splitting", Teuchos::null, "Generating factory of the FC Splitting");
113  validParamList->set<RCP<const FactoryBase> >("BlockNumber", Teuchos::null, "Generating factory for Block Number");
114  // validParamList->set< RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
115 
116  return validParamList;
117 }
118 
119 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
121  Input(fineLevel, "A");
122  Input(fineLevel, "Graph");
123  Input(fineLevel, "DofsPerNode");
124  // Input(fineLevel, "UnAmalgamationInfo");
125  Input(fineLevel, "CoarseMap");
126  Input(fineLevel, "FC Splitting");
127 
128  const ParameterList& pL = GetParameterList();
129  std::string drop_algo = pL.get<std::string>("aggregation: drop scheme");
130  if (drop_algo.find("block diagonal") != std::string::npos || drop_algo == "signed classical") {
131  Input(fineLevel, "BlockNumber");
132  }
133 }
134 
135 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
137  return BuildP(fineLevel, coarseLevel);
138 }
139 
140 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
142  FactoryMonitor m(*this, "Build", coarseLevel);
143  // using STS = Teuchos::ScalarTraits<SC>;
144 
145  // We start by assuming that someone did a reasonable strength of connection
146  // algorithm before we start to get our Graph, DofsPerNode and UnAmalgamationInfo
147 
148  // We begin by getting a MIS (from a graph coloring) and then at that point we need
149  // to start generating entries for the prolongator.
150  RCP<const Matrix> A = Get<RCP<Matrix> >(fineLevel, "A");
151  RCP<const Map> ownedCoarseMap = Get<RCP<const Map> >(fineLevel, "CoarseMap");
152  RCP<const LocalOrdinalVector> owned_fc_splitting = Get<RCP<LocalOrdinalVector> >(fineLevel, "FC Splitting");
153  RCP<const LWGraph> graph = Get<RCP<LWGraph> >(fineLevel, "Graph");
154  // LO nDofsPerNode = Get<LO>(fineLevel, "DofsPerNode");
155  // RCP<AmalgamationInfo> amalgInfo = Get< RCP<AmalgamationInfo> > (fineLevel, "UnAmalgamationInfo");
156  RCP<const Import> Importer = A->getCrsGraph()->getImporter();
157  Xpetra::UnderlyingLib lib = ownedCoarseMap->lib();
158 
159  // RCP<MultiVector> fineNullspace = Get< RCP<MultiVector> > (fineLevel, "Nullspace");
160  RCP<Matrix> P;
161  // SC SC_ZERO = STS::zero();
165  const ParameterList& pL = GetParameterList();
166 
167  // FIXME: This guy doesn't work right now for NumPDEs != 1
168  TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() != 1, Exceptions::RuntimeError, "ClassicalPFactory: Multiple PDEs per node not supported yet");
169 
170  // NOTE: Let's hope we never need to deal with this case
171  TEUCHOS_TEST_FOR_EXCEPTION(!A->getRowMap()->isSameAs(*A->getDomainMap()), Exceptions::RuntimeError, "ClassicalPFactory: MPI Ranks > 1 not supported yet");
172 
173  // Do we need ghosts rows of A and myPointType?
174  std::string scheme = pL.get<std::string>("aggregation: classical scheme");
175  bool need_ghost_rows = false;
176  if (scheme == "ext+i")
177  need_ghost_rows = true;
178  else if (scheme == "direct")
179  need_ghost_rows = false;
180  else if (scheme == "classical modified")
181  need_ghost_rows = true;
182  // NOTE: ParameterList validator will check this guy so we don't really need an "else" here
183 
184  if (GetVerbLevel() & Statistics1) {
185  GetOStream(Statistics1) << "ClassicalPFactory: scheme = " << scheme << std::endl;
186  }
187 
188  // Ghost the FC splitting and grab the data (if needed)
189  RCP<const LocalOrdinalVector> fc_splitting;
190  ArrayRCP<const LO> myPointType;
191  if (Importer.is_null()) {
192  fc_splitting = owned_fc_splitting;
193  } else {
194  RCP<LocalOrdinalVector> fc_splitting_nonconst = LocalOrdinalVectorFactory::Build(A->getCrsGraph()->getColMap());
195  fc_splitting_nonconst->doImport(*owned_fc_splitting, *Importer, Xpetra::INSERT);
196  fc_splitting = fc_splitting_nonconst;
197  }
198  myPointType = fc_splitting->getData(0);
199 
200  /* Ghost A (if needed) */
201  RCP<const Matrix> Aghost;
202  RCP<const LocalOrdinalVector> fc_splitting_ghost;
203  ArrayRCP<const LO> myPointType_ghost;
204  RCP<const Import> remoteOnlyImporter;
205  if (need_ghost_rows && !Importer.is_null()) {
206  ArrayView<const LO> remoteLIDs = Importer->getRemoteLIDs();
207  size_t numRemote = Importer->getNumRemoteIDs();
208  Array<GO> remoteRows(numRemote);
209  for (size_t i = 0; i < numRemote; i++)
210  remoteRows[i] = Importer->getTargetMap()->getGlobalElement(remoteLIDs[i]);
211 
212  RCP<const Map> remoteRowMap = MapFactory::Build(lib, Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(), remoteRows(),
213  A->getDomainMap()->getIndexBase(), A->getDomainMap()->getComm());
214 
215  remoteOnlyImporter = Importer->createRemoteOnlyImport(remoteRowMap);
216  RCP<const CrsMatrix> Acrs = rcp_dynamic_cast<const CrsMatrixWrap>(A)->getCrsMatrix();
217  RCP<CrsMatrix> Aghost_crs = CrsMatrixFactory::Build(Acrs, *remoteOnlyImporter, A->getDomainMap(), remoteOnlyImporter->getTargetMap());
218  Aghost = rcp(new CrsMatrixWrap(Aghost_crs));
219  // CAG: It seems that this isn't actually needed?
220  // We also may need need to ghost myPointType for Aghost
221  // RCP<const Import> Importer2 = Aghost->getCrsGraph()->getImporter();
222  // if(Importer2.is_null()) {
223  // RCP<LocalOrdinalVector> fc_splitting_ghost_nonconst = LocalOrdinalVectorFactory::Build(Aghost->getColMap());
224  // fc_splitting_ghost_nonconst->doImport(*owned_fc_splitting,*Importer,Xpetra::INSERT);
225  // fc_splitting_ghost = fc_splitting_ghost_nonconst;
226  // myPointType_ghost = fc_splitting_ghost->getData(0);
227  // }
228  }
229 
230  /* Generate the ghosted Coarse map using the "Tuminaro maneuver" (if needed)*/
231  RCP<const Map> coarseMap;
232  if (Importer.is_null())
233  coarseMap = ownedCoarseMap;
234  else {
235  // Generate a domain vector with the coarse ID's as entries for C points
236  GhostCoarseMap(*A, *Importer, myPointType, ownedCoarseMap, coarseMap);
237  }
238 
239  /* Get the block number, if we need it (and ghost it) */
240  RCP<LocalOrdinalVector> BlockNumber;
241  std::string drop_algo = pL.get<std::string>("aggregation: drop scheme");
242  if (drop_algo.find("block diagonal") != std::string::npos || drop_algo == "signed classical") {
243  RCP<LocalOrdinalVector> OwnedBlockNumber;
244  OwnedBlockNumber = Get<RCP<LocalOrdinalVector> >(fineLevel, "BlockNumber");
245  if (Importer.is_null())
246  BlockNumber = OwnedBlockNumber;
247  else {
248  BlockNumber = LocalOrdinalVectorFactory::Build(A->getColMap());
249  BlockNumber->doImport(*OwnedBlockNumber, *Importer, Xpetra::INSERT);
250  }
251  }
252 
253 #if defined(CMS_DEBUG) || defined(CMS_DUMP)
254  {
255  RCP<const CrsMatrix> Acrs = rcp_dynamic_cast<const CrsMatrixWrap>(A)->getCrsMatrix();
256  int rank = A->getRowMap()->getComm()->getRank();
257  printf("[%d] A local size = %dx%d\n", rank, (int)Acrs->getRowMap()->getLocalNumElements(), (int)Acrs->getColMap()->getLocalNumElements());
258 
259  printf("[%d] graph local size = %dx%d\n", rank, (int)graph->GetDomainMap()->getLocalNumElements(), (int)graph->GetImportMap()->getLocalNumElements());
260 
261  std::ofstream ofs(std::string("dropped_graph_") + std::to_string(fineLevel.GetLevelID()) + std::string("_") + std::to_string(rank) + std::string(".dat"), std::ofstream::out);
262  RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs));
263  graph->print(*fancy, Debug);
264  std::string out_fc = std::string("fc_splitting_") + std::to_string(fineLevel.GetLevelID()) + std::string("_") + std::to_string(rank) + std::string(".dat");
265  std::string out_block = std::string("block_number_") + std::to_string(fineLevel.GetLevelID()) + std::string("_") + std::to_string(rank) + std::string(".dat");
266 
267  // We don't support writing LO vectors in Xpetra (boo!) so....
268  using real_type = typename Teuchos::ScalarTraits<SC>::magnitudeType;
269  using RealValuedMultiVector = typename Xpetra::MultiVector<real_type, LO, GO, NO>;
270  typedef Xpetra::MultiVectorFactory<real_type, LO, GO, NO> RealValuedMultiVectorFactory;
271 
272  RCP<RealValuedMultiVector> mv = RealValuedMultiVectorFactory::Build(fc_splitting->getMap(), 1);
273  ArrayRCP<real_type> mv_data = mv->getDataNonConst(0);
274 
275  // FC Splitting
276  ArrayRCP<const LO> fc_data = fc_splitting->getData(0);
277  for (LO i = 0; i < (LO)fc_data.size(); i++)
278  mv_data[i] = Teuchos::as<real_type>(fc_data[i]);
280 
281  // Block Number
282  if (!BlockNumber.is_null()) {
283  RCP<RealValuedMultiVector> mv2 = RealValuedMultiVectorFactory::Build(BlockNumber->getMap(), 1);
284  ArrayRCP<real_type> mv_data2 = mv2->getDataNonConst(0);
285  ArrayRCP<const LO> b_data = BlockNumber->getData(0);
286  for (LO i = 0; i < (LO)b_data.size(); i++) {
287  mv_data2[i] = Teuchos::as<real_type>(b_data[i]);
288  }
290  }
291  }
292 
293 #endif
294 
295  /* Generate reindexing arrays */
296  // Note: cpoint2pcol is ghosted if myPointType is
297  // NOTE: Since the ghosted coarse column map follows the ordering of
298  // the fine column map, this *should* work, because it is in local indices.
299  // pcol2cpoint - Takes a LCID for P and turns in into an LCID for A.
300  // cpoint2pcol - Takes a LCID for A --- if it is a C Point --- and turns in into an LCID for P.
301  Array<LO> cpoint2pcol(myPointType.size(), LO_INVALID);
302  Array<LO> pcol2cpoint(coarseMap->getLocalNumElements(), LO_INVALID);
303  LO num_c_points = 0;
304  LO num_f_points = 0;
305  for (LO i = 0; i < (LO)myPointType.size(); i++) {
306  if (myPointType[i] == C_PT) {
307  cpoint2pcol[i] = num_c_points;
308  num_c_points++;
309  } else if (myPointType[i] == F_PT)
310  num_f_points++;
311  }
312  for (LO i = 0; i < (LO)cpoint2pcol.size(); i++) {
313  if (cpoint2pcol[i] != LO_INVALID)
314  pcol2cpoint[cpoint2pcol[i]] = i;
315  }
316 
317  // Generate edge strength flags (this will make everything easier later)
318  // These do *not* need to be ghosted (unlike A)
319  Teuchos::Array<size_t> eis_rowptr;
320  Teuchos::Array<bool> edgeIsStrong;
321  {
322  SubFactoryMonitor sfm(*this, "Strength Flags", coarseLevel);
323  GenerateStrengthFlags(*A, *graph, eis_rowptr, edgeIsStrong);
324  }
325 
326  // Phase 3: Generate the P matrix
327  RCP<const Map> coarseColMap = coarseMap;
328  RCP<const Map> coarseDomainMap = ownedCoarseMap;
329  if (scheme == "ext+i") {
330  SubFactoryMonitor sfm(*this, "Ext+i Interpolation", coarseLevel);
331  Coarsen_Ext_Plus_I(*A, Aghost, *graph, coarseColMap, coarseDomainMap, num_c_points, num_f_points, myPointType(), myPointType_ghost(), cpoint2pcol, pcol2cpoint, eis_rowptr, edgeIsStrong, BlockNumber, P);
332  } else if (scheme == "direct") {
333  SubFactoryMonitor sfm(*this, "Direct Interpolation", coarseLevel);
334  Coarsen_Direct(*A, Aghost, *graph, coarseColMap, coarseDomainMap, num_c_points, num_f_points, myPointType(), myPointType_ghost(), cpoint2pcol, pcol2cpoint, eis_rowptr, edgeIsStrong, BlockNumber, P);
335  } else if (scheme == "classical modified") {
336  SubFactoryMonitor sfm(*this, "Classical Modified Interpolation", coarseLevel);
337  Coarsen_ClassicalModified(*A, Aghost, *graph, coarseColMap, coarseDomainMap, num_c_points, num_f_points, myPointType(), myPointType_ghost(), cpoint2pcol, pcol2cpoint, eis_rowptr, edgeIsStrong, BlockNumber, remoteOnlyImporter, P);
338  }
339  // NOTE: ParameterList validator will check this guy so we don't really need an "else" here
340 
341 #ifdef CMS_DEBUG
342  Xpetra::IO<SC, LO, GO, NO>::Write("classical_p.mat." + std::to_string(coarseLevel.GetLevelID()), *P);
343  // Xpetra::IO<SC,LO,GO,NO>::WriteLocal("classical_p.mat."+std::to_string(coarseLevel.GetLevelID()), *P);
344 #endif
345 
346  // Save output
347  Set(coarseLevel, "P", P);
348  RCP<const CrsGraph> pg = P->getCrsGraph();
349  Set(coarseLevel, "P Graph", pg);
350 
351  // RCP<MultiVector> coarseNullspace = MultiVectorFactory::Build(coarseMap, fineNullspace->getNumVectors());
352  // P->apply(*fineNullspace, *coarseNullspace, Teuchos::TRANS, Teuchos::ScalarTraits<SC>::one(), Teuchos::ScalarTraits<SC>::zero());
353  // Set(coarseLevel, "Nullspace", coarseNullspace);
354 
355  if (IsPrint(Statistics1)) {
356  RCP<ParameterList> params = rcp(new ParameterList());
357  params->set("printLoadBalancingInfo", true);
358  GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*P, "P", params);
359  }
360 }
361 /* ************************************************************************* */
362 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
364  Coarsen_ClassicalModified(const Matrix& A, const RCP<const Matrix>& Aghost, const LWGraph& graph, RCP<const Map>& coarseColMap, RCP<const Map>& coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView<const LO>& myPointType, const Teuchos::ArrayView<const LO>& myPointType_ghost, const Teuchos::Array<LO>& cpoint2pcol, const Teuchos::Array<LO>& pcol2cpoint, Teuchos::Array<size_t>& eis_rowptr, Teuchos::Array<bool>& edgeIsStrong, RCP<LocalOrdinalVector>& BlockNumber, RCP<const Import> remoteOnlyImporter, RCP<Matrix>& P) const {
365  /* ============================================================= */
366  /* Phase 3 : Classical Modified Interpolation */
367  /* De Sterck, Falgout, Nolting and Yang. "Distance-two */
368  /* interpolation for parallel algebraic multigrid", NLAA 2008 */
369  /* 15:115-139 */
370  /* ============================================================= */
371  /* Definitions: */
372  /* F = F-points */
373  /* C = C-points */
374  /* N_i = non-zero neighbors of node i */
375  /* S_i = {j\in N_i | j strongly influences i } [strong neighbors of i] */
376  /* F_i^s = F \cap S_i [strong F-neighbors of i] */
377  /* C_i^s = C \cap S_i [strong C-neighbors of i] */
378 
379  /* N_i^w = N_i\ (F_i^s \cup C_i^s) [weak neighbors of i] */
380  /* This guy has a typo. The paper had a \cap instead of \cup */
381  /* I would note that this set can contain both F-points and */
382  /* C-points. They're just weak neighbors of this guy. */
383  /* Note that N_i^w \cup F_i^s \cup C_i^s = N_i by construction */
384 
385  /* \bar{a}_ij = { 0, if sign(a_ij) == sign(a_ii) */
386  /* { a_ij, otherwise */
387 
388  /* F_i^s\star = {k\in N_i | C_i^s \cap C_k^s = \emptyset} */
389  /* [set of F-neighbors of i that do not share a strong */
390  /* C-neighbor with i] */
391 
392  /* Rewritten Equation (9) on p. 120 */
393  /* \tilde{a}_ii = (a_ij + \sum_{k\in{N_i^w \cup F_i^s\star}} a_ik */
394  /* */
395  /* f_ij = \sum_{k\in{F_i^s\setminusF_i^s*}} \frac{a_ik \bar{a}_kj}{\sum_{m\inC_i^s \bar{a}_km}} */
396  /* */
397  /* w_ij = \frac{1}{\tilde{a}_ii} ( a_ij + f_ij) for all j in C_i^s */
398 
399  // const point_type F_PT = ClassicalMapFactory::F_PT;
401  const point_type DIRICHLET_PT = ClassicalMapFactory::DIRICHLET_PT;
402  using STS = typename Teuchos::ScalarTraits<SC>;
404  // size_t ST_INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
405  SC SC_ZERO = STS::zero();
406 #ifdef CMS_DEBUG
407  int rank = A.getRowMap()->getComm()->getRank();
408 #endif
409 
410  // Get the block number if we have it.
411  ArrayRCP<const LO> block_id;
412  if (!BlockNumber.is_null())
413  block_id = BlockNumber->getData(0);
414 
415  // Initial (estimated) allocation
416  // NOTE: If we only used Tpetra, then we could use these guys as is, but because Epetra, we can't, so there
417  // needs to be a copy below.
418  size_t Nrows = A.getLocalNumRows();
419  double c_point_density = (double)num_c_points / (num_c_points + num_f_points);
420  double mean_strong_neighbors_per_row = (double)graph.GetNodeNumEdges() / graph.GetNodeNumVertices();
421  // double mean_neighbors_per_row = (double)A.getLocalNumEntries() / Nrows;
422  double nnz_per_row_est = c_point_density * mean_strong_neighbors_per_row;
423 
424  size_t nnz_est = std::max(Nrows, std::min((size_t)A.getLocalNumEntries(), (size_t)(nnz_per_row_est * Nrows)));
425  Array<size_t> tmp_rowptr(Nrows + 1);
426  Array<LO> tmp_colind(nnz_est);
427 
428  // Algorithm (count+realloc)
429  // For each row, i,
430  // - Count the number of elements in \hat{C}_j, aka [C-neighbors and C-neighbors of strong F-neighbors of i]
431  size_t ct = 0;
432  for (LO row = 0; row < (LO)Nrows; row++) {
433  size_t row_start = eis_rowptr[row];
434  ArrayView<const LO> indices;
435  ArrayView<const SC> vals;
436  std::set<LO> C_hat;
437  if (myPointType[row] == DIRICHLET_PT) {
438  // Dirichlet points get ignored completely
439  } else if (myPointType[row] == C_PT) {
440  // C-Points get a single 1 in their row
441  C_hat.insert(cpoint2pcol[row]);
442  } else {
443  // F-Points have a more complicated interpolation
444 
445  // C-neighbors of row
446  A.getLocalRowView(row, indices, vals);
447  for (LO j = 0; j < indices.size(); j++)
448  if (myPointType[indices[j]] == C_PT && edgeIsStrong[row_start + j])
449  C_hat.insert(cpoint2pcol[indices[j]]);
450  } // end else
451 
452  // Realloc if needed
453  if (ct + (size_t)C_hat.size() > (size_t)tmp_colind.size()) {
454  tmp_colind.resize(std::max(ct + (size_t)C_hat.size(), (size_t)2 * tmp_colind.size()));
455  }
456 
457  // Copy
458  std::copy(C_hat.begin(), C_hat.end(), tmp_colind.begin() + ct);
459  ct += C_hat.size();
460  tmp_rowptr[row + 1] = tmp_rowptr[row] + C_hat.size();
461  }
462  // Resize down
463  tmp_colind.resize(tmp_rowptr[Nrows]);
464 
465  RCP<CrsMatrix> Pcrs = CrsMatrixFactory::Build(A.getRowMap(), coarseColMap, 0);
466  ArrayRCP<size_t> P_rowptr;
467  ArrayRCP<LO> P_colind;
468  ArrayRCP<SC> P_values;
469 
470  if (A.getRowMap()->lib() == Xpetra::UseTpetra) {
471  P_rowptr = Teuchos::arcpFromArray(tmp_rowptr);
472  P_colind = Teuchos::arcpFromArray(tmp_colind);
473  P_values.resize(P_rowptr[Nrows]);
474  } else {
475  // Make the matrix and then get the graph out of it (necessary for Epetra)
476  // NOTE: The lack of an Epetra_CrsGraph::ExpertStaticFillComplete makes this
477  // impossible to do the obvious way
478  Pcrs->allocateAllValues(tmp_rowptr[Nrows], P_rowptr, P_colind, P_values);
479  std::copy(tmp_rowptr.begin(), tmp_rowptr.end(), P_rowptr.begin());
480  std::copy(tmp_colind.begin(), tmp_colind.end(), P_colind.begin());
481  Pcrs->setAllValues(P_rowptr, P_colind, P_values);
482  Pcrs->expertStaticFillComplete(/*domain*/ coarseDomainMap, /*range*/ A.getDomainMap());
483  }
484 
485  // Generate a remote-ghosted version of the graph (if we're in parallel)
486  RCP<const CrsGraph> Pgraph;
487  RCP<const CrsGraph> Pghost;
488  // TODO: We might want to be more efficient here and actually use
489  // Pgraph in the matrix constructor.
490  ArrayRCP<const size_t> Pghost_rowptr;
491  ArrayRCP<const LO> Pghost_colind;
492  if (!remoteOnlyImporter.is_null()) {
493  if (A.getRowMap()->lib() == Xpetra::UseTpetra) {
494  RCP<CrsGraph> tempPgraph = CrsGraphFactory::Build(A.getRowMap(), coarseColMap, P_rowptr, P_colind);
495  tempPgraph->fillComplete(coarseDomainMap, A.getDomainMap());
496  Pgraph = tempPgraph;
497  } else {
498  // Epetra does not have a graph constructor that uses rowptr and colind.
499  Pgraph = Pcrs->getCrsGraph();
500  }
501  TEUCHOS_ASSERT(!Pgraph.is_null());
502  Pghost = CrsGraphFactory::Build(Pgraph, *remoteOnlyImporter, Pgraph->getDomainMap(), remoteOnlyImporter->getTargetMap());
503  Pghost->getAllIndices(Pghost_rowptr, Pghost_colind);
504  }
505 
506  // Gustavson-style perfect hashing
507  ArrayRCP<LO> Acol_to_Pcol(A.getColMap()->getLocalNumElements(), LO_INVALID);
508 
509  // Get a quick reindexing array from Pghost LCIDs to P LCIDs
510  ArrayRCP<LO> Pghostcol_to_Pcol;
511  if (!Pghost.is_null()) {
512  Pghostcol_to_Pcol.resize(Pghost->getColMap()->getLocalNumElements(), LO_INVALID);
513  for (LO i = 0; i < (LO)Pghost->getColMap()->getLocalNumElements(); i++)
514  Pghostcol_to_Pcol[i] = Pgraph->getColMap()->getLocalElement(Pghost->getColMap()->getGlobalElement(i));
515  } // end Pghost
516 
517  // Get a quick reindexing array from Aghost LCIDs to A LCIDs
518  ArrayRCP<LO> Aghostcol_to_Acol;
519  if (!Aghost.is_null()) {
520  Aghostcol_to_Acol.resize(Aghost->getColMap()->getLocalNumElements(), LO_INVALID);
521  for (LO i = 0; i < (LO)Aghost->getColMap()->getLocalNumElements(); i++)
522  Aghostcol_to_Acol[i] = A.getColMap()->getLocalElement(Aghost->getColMap()->getGlobalElement(i));
523  } // end Aghost
524 
525  // Algorithm (numeric)
526  for (LO i = 0; i < (LO)Nrows; i++) {
527  if (myPointType[i] == DIRICHLET_PT) {
528  // Dirichlet points get ignored completely
529 #ifdef CMS_DEBUG
530  // DEBUG
531  printf("[%d] ** A(%d,:) is a Dirichlet-Point.\n", rank, i);
532 #endif
533  } else if (myPointType[i] == C_PT) {
534  // C Points get a single 1 in their row
535  P_values[P_rowptr[i]] = Teuchos::ScalarTraits<SC>::one();
536 #ifdef CMS_DEBUG
537  // DEBUG
538  printf("[%d] ** A(%d,:) is a C-Point.\n", rank, i);
539 #endif
540  } else {
541  // F Points get all of the fancy stuff
542 #ifdef CMS_DEBUG
543  // DEBUG
544  printf("[%d] ** A(%d,:) is a F-Point.\n", rank, i);
545 #endif
546 
547  // Get all of the relevant information about this row
548  ArrayView<const LO> A_indices_i, A_indices_k;
549  ArrayView<const SC> A_vals_i, A_vals_k;
550  A.getLocalRowView(i, A_indices_i, A_vals_i);
551  size_t row_start = eis_rowptr[i];
552 
553  ArrayView<const LO> P_indices_i = P_colind.view(P_rowptr[i], P_rowptr[i + 1] - P_rowptr[i]);
554  ArrayView<SC> P_vals_i = P_values.view(P_rowptr[i], P_rowptr[i + 1] - P_rowptr[i]);
555 
556  // FIXME: Do we need this?
557  for (LO j = 0; j < (LO)P_vals_i.size(); j++)
558  P_vals_i[j] = SC_ZERO;
559 
560  // Stash the hash: Flag any strong C-points with their index into P_colind
561  // NOTE: We'll consider any points that are LO_INVALID or less than P_rowptr[i] as not strong C-points
562  for (LO j = 0; j < (LO)P_indices_i.size(); j++) {
563  Acol_to_Pcol[pcol2cpoint[P_indices_i[j]]] = P_rowptr[i] + j;
564  }
565 
566  // Loop over the entries in the row
567  SC first_denominator = SC_ZERO;
568 #ifdef CMS_DEBUG
569  SC a_ii = SC_ZERO;
570 #endif
571  for (LO k0 = 0; k0 < (LO)A_indices_i.size(); k0++) {
572  LO k = A_indices_i[k0];
573  SC a_ik = A_vals_i[k0];
574  LO pcol_k = Acol_to_Pcol[k];
575 
576  if (k == i) {
577  // Case A: Diagonal value (add to first denominator)
578  // FIXME: Add BlockNumber matching here
579  first_denominator += a_ik;
580 #ifdef CMS_DEBUG
581  a_ii = a_ik;
582  printf("- A(%d,%d) is the diagonal\n", i, k);
583 #endif
584 
585  } else if (myPointType[k] == DIRICHLET_PT) {
586  // Case B: Ignore dirichlet connections completely
587 #ifdef CMS_DEBUG
588  printf("- A(%d,%d) is a Dirichlet point\n", i, k);
589 #endif
590 
591  } else if (pcol_k != LO_INVALID && pcol_k >= (LO)P_rowptr[i]) {
592  // Case C: a_ik is strong C-Point (goes directly into the weight)
593  P_values[pcol_k] += a_ik;
594 #ifdef CMS_DEBUG
595  printf("- A(%d,%d) is a strong C-Point\n", i, k);
596 #endif
597  } else if (!edgeIsStrong[row_start + k0]) {
598  // Case D: Weak non-Dirichlet neighbor (add to first denominator)
599  if (block_id.size() == 0 || block_id[i] == block_id[k]) {
600  first_denominator += a_ik;
601 #ifdef CMS_DEBUG
602  printf("- A(%d,%d) is weak adding to diagonal(%d,%d) (%6.4e)\n", i, k, block_id[i], block_id[k], a_ik);
603  } else {
604  printf("- A(%d,%d) is weak but does not match blocks (%d,%d), discarding\n", i, k, block_id[i], block_id[k]);
605 #endif
606  }
607 
608  } else { // Case E
609  // Case E: Strong F-Point (adds to the first denominator if we don't share a
610  // a strong C-Point with i; adds to the second denominator otherwise)
611 #ifdef CMS_DEBUG
612  printf("- A(%d,%d) is a strong F-Point\n", i, k);
613 #endif
614 
615  SC a_kk = SC_ZERO;
616  SC second_denominator = SC_ZERO;
617  int sign_akk = 0;
618 
619  if (k < (LO)Nrows) {
620  // Grab the diagonal a_kk
621  A.getLocalRowView(k, A_indices_k, A_vals_k);
622  for (LO m0 = 0; m0 < (LO)A_indices_k.size(); m0++) {
623  LO m = A_indices_k[m0];
624  if (k == m) {
625  a_kk = A_vals_k[m0];
626  break;
627  }
628  } // end for A_indices_k
629 
630  // Compute the second denominator term
631  sign_akk = Sign(a_kk);
632  for (LO m0 = 0; m0 < (LO)A_indices_k.size(); m0++) {
633  LO m = A_indices_k[m0];
634  if (m != k && Acol_to_Pcol[A_indices_k[m0]] >= (LO)P_rowptr[i]) {
635  SC a_km = A_vals_k[m0];
636  second_denominator += (Sign(a_km) == sign_akk ? SC_ZERO : a_km);
637  }
638  } // end for A_indices_k
639 
640  // Now we have the second denominator, for this particular strong F point.
641  // So we can now add the sum to the w_ij components for the P values
642  if (second_denominator != SC_ZERO) {
643  for (LO j0 = 0; j0 < (LO)A_indices_k.size(); j0++) {
644  LO j = A_indices_k[j0];
645  // NOTE: Row k should be in fis_star, so I should have to check for diagonals here
646  // printf("Acol_to_Pcol[%d] = %d P_values.size() = %d\n",j,Acol_to_Pcol[j],(int)P_values.size());
647  if (Acol_to_Pcol[j] >= (LO)P_rowptr[i]) {
648  SC a_kj = A_vals_k[j0];
649  SC sign_akj_val = sign_akk == Sign(a_kj) ? SC_ZERO : a_kj;
650  P_values[Acol_to_Pcol[j]] += a_ik * sign_akj_val / second_denominator;
651 #ifdef CMS_DEBUG
652  printf("- - Unscaled P(%d,A-%d) += %6.4e = %5.4e\n", i, j, a_ik * sign_akj_val / second_denominator, P_values[Acol_to_Pcol[j]]);
653 #endif
654  }
655  } // end for A_indices_k
656  } // end if second_denominator != 0
657  else {
658 #ifdef CMS_DEBUG
659  printf("- - A(%d,%d) second denominator is zero\n", i, k);
660 #endif
661  if (block_id.size() == 0 || block_id[i] == block_id[k])
662  first_denominator += a_ik;
663  } // end else second_denominator != 0
664  } // end if k < Nrows
665  else {
666  // Ghost row
667  LO kless = k - Nrows;
668  // Grab the diagonal a_kk
669  // NOTE: ColMap is not locally fitted to the RowMap
670  // so we need to check GIDs here
671  Aghost->getLocalRowView(kless, A_indices_k, A_vals_k);
672  GO k_g = Aghost->getRowMap()->getGlobalElement(kless);
673  for (LO m0 = 0; m0 < (LO)A_indices_k.size(); m0++) {
674  GO m_g = Aghost->getColMap()->getGlobalElement(A_indices_k[m0]);
675  if (k_g == m_g) {
676  a_kk = A_vals_k[m0];
677  break;
678  }
679  } // end for A_indices_k
680 
681  // Compute the second denominator term
682  sign_akk = Sign(a_kk);
683  for (LO m0 = 0; m0 < (LO)A_indices_k.size(); m0++) {
684  GO m_g = Aghost->getColMap()->getGlobalElement(A_indices_k[m0]);
685  LO mghost = A_indices_k[m0]; // Aghost LCID
686  LO m = Aghostcol_to_Acol[mghost]; // A's LID (could be LO_INVALID)
687  if (m_g != k_g && m != LO_INVALID && Acol_to_Pcol[m] >= (LO)P_rowptr[i]) {
688  SC a_km = A_vals_k[m0];
689  second_denominator += (Sign(a_km) == sign_akk ? SC_ZERO : a_km);
690  }
691  } // end for A_indices_k
692 
693  // Now we have the second denominator, for this particular strong F point.
694  // So we can now add the sum to the w_ij components for the P values
695  if (second_denominator != SC_ZERO) {
696  for (LO j0 = 0; j0 < (LO)A_indices_k.size(); j0++) {
697  LO jghost = A_indices_k[j0]; // Aghost LCID
698  LO j = Aghostcol_to_Acol[jghost]; // A's LID (could be LO_INVALID)
699  // NOTE: Row k should be in fis_star, so I should have to check for diagonals here
700  if ((j != LO_INVALID) && (Acol_to_Pcol[j] >= (LO)P_rowptr[i])) {
701  SC a_kj = A_vals_k[j0];
702  SC sign_akj_val = sign_akk == Sign(a_kj) ? SC_ZERO : a_kj;
703  P_values[Acol_to_Pcol[j]] += a_ik * sign_akj_val / second_denominator;
704 #ifdef CMS_DEBUG
705  printf("- - Unscaled P(%d,A-%d) += %6.4e\n", i, j, a_ik * sign_akj_val / second_denominator);
706 #endif
707  }
708 
709  } // end for A_indices_k
710  } // end if second_denominator != 0
711  else {
712 #ifdef CMS_DEBUG
713  printf("- - A(%d,%d) second denominator is zero\n", i, k);
714 #endif
715  if (block_id.size() == 0 || block_id[i] == block_id[k])
716  first_denominator += a_ik;
717  } // end else second_denominator != 0
718  } // end else k < Nrows
719  } // end else Case A,...,E
720 
721  } // end for A_indices_i
722 
723  // Now, downscale by the first_denominator
724  if (first_denominator != SC_ZERO) {
725  for (LO j0 = 0; j0 < (LO)P_indices_i.size(); j0++) {
726 #ifdef CMS_DEBUG
727  SC old_pij = P_vals_i[j0];
728  P_vals_i[j0] /= -first_denominator;
729  printf("P(%d,%d) = %6.4e = %6.4e / (%6.4e + %6.4e)\n", i, P_indices_i[j0], P_vals_i[j0], old_pij, a_ii, first_denominator - a_ii);
730 #else
731  P_vals_i[j0] /= -first_denominator;
732 #endif
733  } // end for P_indices_i
734  } // end if first_denominator != 0
735 
736  } // end else C-Point
737 
738  } // end if i < Nrows
739 
740  // Finish up
741  Pcrs->setAllValues(P_rowptr, P_colind, P_values);
742  Pcrs->expertStaticFillComplete(/*domain*/ coarseDomainMap, /*range*/ A.getDomainMap());
743  // Wrap from CrsMatrix to Matrix
744  P = rcp(new CrsMatrixWrap(Pcrs));
745 
746 } // end Coarsen_ClassicalModified
747 
748 /* ************************************************************************* */
749 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
751  Coarsen_Direct(const Matrix& A, const RCP<const Matrix>& Aghost, const LWGraph& graph, RCP<const Map>& coarseColMap, RCP<const Map>& coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView<const LO>& myPointType, const Teuchos::ArrayView<const LO>& myPointType_ghost, const Teuchos::Array<LO>& cpoint2pcol, const Teuchos::Array<LO>& pcol2cpoint, Teuchos::Array<size_t>& eis_rowptr, Teuchos::Array<bool>& edgeIsStrong, RCP<LocalOrdinalVector>& BlockNumber, RCP<Matrix>& P) const {
752  /* ============================================================= */
753  /* Phase 3 : Direct Interpolation */
754  /* We do not use De Sterck, Falgout, Nolting and Yang (2008) */
755  /* here. Instead we follow: */
756  /* Trottenberg, Oosterlee and Schueller, Multigrid, 2001. */
757  /* with some modifications inspirted by PyAMG */
758  /* ============================================================= */
759  /* Definitions: */
760  /* F = F-points */
761  /* C = C-points */
762  /* N_i = non-zero neighbors of node i */
763  /* S_i = {j\in N_i | j strongly influences i } [strong neighbors of i] */
764  /* F_i^s = F \cap S_i [strong F-neighbors of i] */
765  /* C_i^s = C \cap S_i [strong C-neighbors of i] */
766  /* P_i = Set of interpolatory variables for row i [here = C_i^s] */
767 
768  /* (A.2.17) from p. 426 */
769  /* a_ij^- = { a_ij, if a_ij < 0 */
770  /* { 0, otherwise */
771  /* a_ij^+ = { a_ij, if a_ij > 0 */
772  /* { 0, otherwise */
773  /* P_i^- = P_i \cap {k | a_ij^- != 0 and a_ij^- = a_ij} */
774  /* [strong C-neighbors with negative edges] */
775  /* P_i^+ = P_i \cap {k | a_ij^+ != 0 and a_ij^+ = a_ij} */
776  /* [strong C-neighbors with positive edges] */
777 
778  /* de Sterck et al., gives us this: */
779  /* Rewritten Equation (6) on p. 119 */
780  /* w_ij = - a_ji / a_ii \frac{\sum_{k\in N_i} a_ik} {\sum k\inC_i^s} a_ik}, j\in C_i^s */
781 
782  /* Trottenberg et al. (A.7.6) and (A.7.7) on p. 479 gives this: */
783  /* alpha_i = \frac{ \sum_{j\in N_i} a_ij^- }{ \sum_{k\in P_i} a_ik^- } */
784  /* beta_i = \frac{ \sum_{j\in N_i} a_ij^+ }{ \sum_{k\in P_i} a_ik^+ } */
785  /* w_ik = { - alpha_i (a_ik / a_ii), if k\in P_i^- */
786  /* { - beta_i (a_ik / a_ii), if k\in P_i^+ */
787  /* NOTE: The text says to modify, if P_i^+ is zero but it isn't entirely clear how that */
788  /* works. We'll follow the PyAMG implementation in a few important ways. */
789 
791  const point_type DIRICHLET_PT = ClassicalMapFactory::DIRICHLET_PT;
792 
793  // Initial (estimated) allocation
794  // NOTE: If we only used Tpetra, then we could use these guys as is, but because Epetra, we can't, so there
795  // needs to be a copy below.
796  using STS = typename Teuchos::ScalarTraits<SC>;
797  using MT = typename STS::magnitudeType;
798  using MTS = typename Teuchos::ScalarTraits<MT>;
799  size_t Nrows = A.getLocalNumRows();
800  double c_point_density = (double)num_c_points / (num_c_points + num_f_points);
801  double mean_strong_neighbors_per_row = (double)graph.GetNodeNumEdges() / graph.GetNodeNumVertices();
802  // double mean_neighbors_per_row = (double)A.getLocalNumEntries() / Nrows;
803  double nnz_per_row_est = c_point_density * mean_strong_neighbors_per_row;
804 
805  size_t nnz_est = std::max(Nrows, std::min((size_t)A.getLocalNumEntries(), (size_t)(nnz_per_row_est * Nrows)));
806  SC SC_ZERO = STS::zero();
807  MT MT_ZERO = MTS::zero();
808  Array<size_t> tmp_rowptr(Nrows + 1);
809  Array<LO> tmp_colind(nnz_est);
810 
811  // Algorithm (count+realloc)
812  // For each row, i,
813  // - Count the number of elements in \hat{C}_j, aka [C-neighbors and C-neighbors of strong F-neighbors of i]
814  size_t ct = 0;
815  for (LO row = 0; row < (LO)Nrows; row++) {
816  size_t row_start = eis_rowptr[row];
817  ArrayView<const LO> indices;
818  ArrayView<const SC> vals;
819  std::set<LO> C_hat;
820  if (myPointType[row] == DIRICHLET_PT) {
821  // Dirichlet points get ignored completely
822  } else if (myPointType[row] == C_PT) {
823  // C-Points get a single 1 in their row
824  C_hat.insert(cpoint2pcol[row]);
825  } else {
826  // F-Points have a more complicated interpolation
827 
828  // C-neighbors of row
829  A.getLocalRowView(row, indices, vals);
830  for (LO j = 0; j < indices.size(); j++)
831  if (myPointType[indices[j]] == C_PT && edgeIsStrong[row_start + j])
832  C_hat.insert(cpoint2pcol[indices[j]]);
833  } // end else
834 
835  // Realloc if needed
836  if (ct + (size_t)C_hat.size() > (size_t)tmp_colind.size()) {
837  tmp_colind.resize(std::max(ct + (size_t)C_hat.size(), (size_t)2 * tmp_colind.size()));
838  }
839 
840  // Copy
841  std::copy(C_hat.begin(), C_hat.end(), tmp_colind.begin() + ct);
842  ct += C_hat.size();
843  tmp_rowptr[row + 1] = tmp_rowptr[row] + C_hat.size();
844  }
845  // Resize down
846  tmp_colind.resize(tmp_rowptr[Nrows]);
847 
848  // Allocate memory & copy
849  P = rcp(new CrsMatrixWrap(A.getRowMap(), coarseColMap, 0));
850  RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
851  ArrayRCP<size_t> P_rowptr;
852  ArrayRCP<LO> P_colind;
853  ArrayRCP<SC> P_values;
854 
855 #ifdef CMS_DEBUG
856  printf("CMS: Allocating P w/ %d nonzeros\n", (int)tmp_rowptr[Nrows]);
857 #endif
858  PCrs->allocateAllValues(tmp_rowptr[Nrows], P_rowptr, P_colind, P_values);
859  TEUCHOS_TEST_FOR_EXCEPTION(tmp_rowptr.size() != P_rowptr.size(), Exceptions::RuntimeError, "ClassicalPFactory: Allocation size error (rowptr)");
860  TEUCHOS_TEST_FOR_EXCEPTION(tmp_colind.size() != P_colind.size(), Exceptions::RuntimeError, "ClassicalPFactory: Allocation size error (colind)");
861  // FIXME: This can be short-circuited for Tpetra, if we decide we care
862  for (LO i = 0; i < (LO)Nrows + 1; i++)
863  P_rowptr[i] = tmp_rowptr[i];
864  for (LO i = 0; i < (LO)tmp_rowptr[Nrows]; i++)
865  P_colind[i] = tmp_colind[i];
866 
867  // Algorithm (numeric)
868  for (LO i = 0; i < (LO)Nrows; i++) {
869  if (myPointType[i] == DIRICHLET_PT) {
870  // Dirichlet points get ignored completely
871 #ifdef CMS_DEBUG
872  // DEBUG
873  printf("** A(%d,:) is a Dirichlet-Point.\n", i);
874 #endif
875  } else if (myPointType[i] == C_PT) {
876  // C Points get a single 1 in their row
877  P_values[P_rowptr[i]] = Teuchos::ScalarTraits<SC>::one();
878 #ifdef CMS_DEBUG
879  // DEBUG
880  printf("** A(%d,:) is a C-Point.\n", i);
881 #endif
882  } else {
883  /* Trottenberg et al. (A.7.6) and (A.7.7) on p. 479 gives this: */
884  /* alpha_i = \frac{ \sum_{j\in N_i} a_ij^- }{ \sum_{k\in P_i} a_ik^- } */
885  /* beta_i = \frac{ \sum_{j\in N_i} a_ij^+ }{ \sum_{k\in P_i} a_ik^+ } */
886  /* w_ik = { - alpha_i (a_ik / a_ii), if k\in P_i^- */
887  /* { - beta_i (a_ik / a_ii), if k\in P_i^+ */
888  ArrayView<const LO> A_indices_i, A_incides_k;
889  ArrayView<const SC> A_vals_i, A_indices_k;
890  A.getLocalRowView(i, A_indices_i, A_vals_i);
891  size_t row_start = eis_rowptr[i];
892 
893  ArrayView<LO> P_indices_i = P_colind.view(P_rowptr[i], P_rowptr[i + 1] - P_rowptr[i]);
894  ArrayView<SC> P_vals_i = P_values.view(P_rowptr[i], P_rowptr[i + 1] - P_rowptr[i]);
895 
896 #ifdef CMS_DEBUG
897  // DEBUG
898  {
899  char mylabel[5] = "FUCD";
900  char sw[3] = "ws";
901  printf("** A(%d,:) = ", i);
902  for (LO j = 0; j < (LO)A_indices_i.size(); j++) {
903  printf("%6.4e(%d-%c%c) ", A_vals_i[j], A_indices_i[j], mylabel[1 + myPointType[A_indices_i[j]]], sw[(int)edgeIsStrong[row_start + j]]);
904  }
905  printf("\n");
906  }
907 #endif
908 
909  SC a_ii = SC_ZERO;
910  SC pos_numerator = SC_ZERO, neg_numerator = SC_ZERO;
911  SC pos_denominator = SC_ZERO, neg_denominator = SC_ZERO;
912  // Find the diagonal and compute the sum ratio
913  for (LO j = 0; j < (LO)A_indices_i.size(); j++) {
914  SC a_ik = A_vals_i[j];
915  LO k = A_indices_i[j];
916 
917  // Diagonal
918  if (i == k) {
919  a_ii = a_ik;
920  }
921  // Only strong C-neighbors are in the denomintor
922  if (myPointType[k] == C_PT && edgeIsStrong[row_start + j]) {
923  if (STS::real(a_ik) > MT_ZERO)
924  pos_denominator += a_ik;
925  else
926  neg_denominator += a_ik;
927  }
928 
929  // All neighbors are in the numerator
930  // NOTE: As per PyAMG, this does not include the diagonal
931  if (i != k) {
932  if (STS::real(a_ik) > MT_ZERO)
933  pos_numerator += a_ik;
934  else
935  neg_numerator += a_ik;
936  }
937  }
938  SC alpha = (neg_denominator == MT_ZERO) ? SC_ZERO : (neg_numerator / neg_denominator);
939  SC beta = (pos_denominator == MT_ZERO) ? SC_ZERO : (pos_numerator / pos_denominator);
940  alpha /= -a_ii;
941  beta /= -a_ii;
942 
943  // Loop over the entries
944  for (LO p_j = 0; p_j < (LO)P_indices_i.size(); p_j++) {
945  LO P_col = pcol2cpoint[P_indices_i[p_j]];
946  SC a_ij = SC_ZERO;
947 
948  // Find A_ij (if it is there)
949  // FIXME: We can optimize this if we assume sorting
950  for (LO a_j = 0; a_j < (LO)A_indices_i.size(); a_j++) {
951  if (A_indices_i[a_j] == P_col) {
952  a_ij = A_vals_i[a_j];
953  break;
954  }
955  }
956  SC w_ij = (STS::real(a_ij) < 0) ? (alpha * a_ij) : (beta * a_ij);
957 #ifdef CMS_DEBUG
958  SC alpha_or_beta = (STS::real(a_ij) < 0) ? alpha : beta;
959  printf("P(%d,%d/%d) = - %6.4e * %6.4e = %6.4e\n", i, P_indices_i[p_j], pcol2cpoint[P_indices_i[p_j]], alpha_or_beta, a_ij, w_ij);
960 #endif
961  P_vals_i[p_j] = w_ij;
962  } // end for A_indices_i
963  } // end else C_PT
964  } // end for Numrows
965 
966  // Finish up
967  PCrs->setAllValues(P_rowptr, P_colind, P_values);
968  PCrs->expertStaticFillComplete(/*domain*/ coarseDomainMap, /*range*/ A.getDomainMap());
969 }
970 
971 /* ************************************************************************* */
972 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
974  Coarsen_Ext_Plus_I(const Matrix& A, const RCP<const Matrix>& Aghost, const LWGraph& graph, RCP<const Map>& coarseColMap, RCP<const Map>& coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView<const LO>& myPointType, const Teuchos::ArrayView<const LO>& myPointType_ghost, const Teuchos::Array<LO>& cpoint2pcol, const Teuchos::Array<LO>& pcol2cpoint, Teuchos::Array<size_t>& eis_rowptr, Teuchos::Array<bool>& edgeIsStrong, RCP<LocalOrdinalVector>& BlockNumber, RCP<Matrix>& P) const {
975  /* ============================================================= */
976  /* Phase 3 : Extended+i Interpolation */
977  /* De Sterck, Falgout, Nolting and Yang. "Distance-two */
978  /* interpolation for parallel algebraic multigrid", NLAA 2008 */
979  /* 15:115-139 */
980  /* ============================================================= */
981  /* Definitions: */
982  /* F = F-points */
983  /* C = C-points */
984  /* N_i = non-zero neighbors of node i */
985  /* S_i = {j\in N_i | j strongly influences i } [strong neighbors of i] */
986  /* F_i^s = F \cap S_i [strong F-neighbors of i] */
987  /* C_i^s = C \cap S_i [strong C-neighbors of i] */
988  /* N_i^w = N_i\ (F_i^s \cup C_i^s) [weak neighbors of i] */
989  /* This guy has a typo. The paper had a \cap instead of \cup */
990  /* I would note that this set can contain both F-points and */
991  /* C-points. They're just weak neighbors of this guy. */
992  /* Note that N_i^w \cup F_i^s \cup C_i^s = N_i by construction */
993 
994  /* \hat{C}_i = C_i \cup (\bigcup_{j\inF_i^s} C_j) */
995  /* [C-neighbors and C-neighbors of strong F-neighbors of i] */
996  /* */
997 
998  /* \bar{a}_ij = { 0, if sign(a_ij) == sign(a_ii) */
999  /* { a_ij, otherwise */
1000 
1001  /* Rewritten Equation (19) on p. 123 */
1002  /* f_ik = \frac{\bar{a}_kj}{\sum{l\in \hat{C}_i\cup {i}} \bar{a}_kl */
1003  /* w_ij = -\tilde{a}_ii^{-1} (a_ij + \sum_{k\inF_i^s} a_ik f_ik */
1004  /* for j in \hat{C}_i */
1005 
1006  /* Rewritten Equation (20) on p. 124 [for the lumped diagonal] */
1007  /* g_ik = \frac{\bar{a}_ki}{\sum{l\in \hat{C}_i\cup {i}} \bar{a}_kl */
1008  /* \tilde{a}_ii = a_ii + \sum_{n\inN_i^w\setminus \hat{C}_i} a_in + \sum_{k\inF_i^s} a_ik g_ik */
1009  TEUCHOS_TEST_FOR_EXCEPTION(1, std::runtime_error, "ClassicalPFactory: Ext+i not implemented");
1010 }
1011 
1012 /* ************************************************************************* */
1013 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1015  GenerateStrengthFlags(const Matrix& A, const LWGraph& graph, Teuchos::Array<size_t>& eis_rowptr, Teuchos::Array<bool>& edgeIsStrong) const {
1016  // To make this easier, we'll create a bool array equal to the nnz in the matrix
1017  // so we know whether each edge is strong or not. This will save us a bunch of
1018  // trying to match the graph and matrix later
1019  size_t Nrows = A.getLocalNumRows();
1020  eis_rowptr.resize(Nrows + 1);
1021 
1022  if (edgeIsStrong.size() == 0) {
1023  // Preferred
1024  edgeIsStrong.resize(A.getLocalNumEntries(), false);
1025  } else {
1026  edgeIsStrong.resize(A.getLocalNumEntries(), false);
1027  edgeIsStrong.assign(A.getLocalNumEntries(), false);
1028  }
1029 
1030  eis_rowptr[0] = 0;
1031  for (LO i = 0; i < (LO)Nrows; i++) {
1032  LO rowstart = eis_rowptr[i];
1033  ArrayView<const LO> A_indices;
1034  ArrayView<const SC> A_values;
1035  A.getLocalRowView(i, A_indices, A_values);
1036  LO A_size = (LO)A_indices.size();
1037 
1038  auto G_indices = graph.getNeighborVertices(i);
1039  LO G_size = (LO)G_indices.length;
1040 
1041  // Both of these guys should be in the same (sorted) order, but let's check
1042  bool is_ok = true;
1043  for (LO j = 0; j < A_size - 1; j++)
1044  if (A_indices[j] >= A_indices[j + 1]) {
1045  is_ok = false;
1046  break;
1047  }
1048  for (LO j = 0; j < G_size - 1; j++)
1049  if (G_indices(j) >= G_indices(j + 1)) {
1050  is_ok = false;
1051  break;
1052  }
1053  TEUCHOS_TEST_FOR_EXCEPTION(!is_ok, Exceptions::RuntimeError, "ClassicalPFactory: Exected A and Graph to be sorted");
1054 
1055  // Now cycle through and set the flags - if the edge is in G it is strong
1056  for (LO g_idx = 0, a_idx = 0; g_idx < G_size; g_idx++) {
1057  LO col = G_indices(g_idx);
1058  while (A_indices[a_idx] != col && a_idx < A_size) a_idx++;
1059  if (a_idx == A_size) {
1060  is_ok = false;
1061  break;
1062  }
1063  edgeIsStrong[rowstart + a_idx] = true;
1064  }
1065 
1066  eis_rowptr[i + 1] = eis_rowptr[i] + A_size;
1067  }
1068 }
1069 
1070 /* ************************************************************************* */
1071 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1073  GhostCoarseMap(const Matrix& A, const Import& Importer, const ArrayRCP<const LO> myPointType, const RCP<const Map>& coarseMap, RCP<const Map>& coarseColMap) const {
1075  const GO GO_INVALID = Teuchos::OrdinalTraits<GO>::invalid();
1076  RCP<GlobalOrdinalVector> d_coarseIds = GlobalOrdinalVectorFactory::Build(A.getRowMap());
1077  ArrayRCP<GO> d_data = d_coarseIds->getDataNonConst(0);
1078  LO ct = 0;
1079 
1080  for (LO i = 0; i < (LO)d_data.size(); i++) {
1081  if (myPointType[i] == C_PT) {
1082  d_data[i] = coarseMap->getGlobalElement(ct);
1083  ct++;
1084  } else
1085  d_data[i] = GO_INVALID;
1086  }
1087 
1088  // Ghost this guy
1089  RCP<GlobalOrdinalVector> c_coarseIds = GlobalOrdinalVectorFactory::Build(A.getColMap());
1090  c_coarseIds->doImport(*d_coarseIds, Importer, Xpetra::INSERT);
1091 
1092  // If we assume that A is in Aztec ordering, then any subset of A's unknowns will
1093  // be in Aztec ordering as well, which means we can just condense these guys down
1094  // Overallocate, count and view
1095  ArrayRCP<GO> c_data = c_coarseIds->getDataNonConst(0);
1096 
1097  Array<GO> c_gids(c_data.size());
1098  LO count = 0;
1099 
1100  for (LO i = 0; i < (LO)c_data.size(); i++) {
1101  if (c_data[i] != GO_INVALID) {
1102  c_gids[count] = c_data[i];
1103  count++;
1104  }
1105  }
1106  // FIXME: Assumes scalar PDE
1107  std::vector<size_t> stridingInfo_(1);
1108  stridingInfo_[0] = 1;
1109  GO domainGIDOffset = 0;
1110 
1111  coarseColMap = StridedMapFactory::Build(coarseMap->lib(),
1113  c_gids.view(0, count),
1114  coarseMap->getIndexBase(),
1115  stridingInfo_,
1116  coarseMap->getComm(),
1117  domainGIDOffset);
1118 }
1119 
1120 } // namespace MueLu
1121 
1122 #define MUELU_CLASSICALPFACTORY_SHORT
1123 #endif // MUELU_CLASSICALPFACTORY_DEF_HPP
void GenerateStrengthFlags(const Matrix &A, const LWGraph &graph, Teuchos::Array< size_t > &eis_rowptr, Teuchos::Array< bool > &edgeIsStrong) const
void setValidator(RCP< const ParameterEntryValidator > const &validator)
GlobalOrdinal GO
T & get(const std::string &name, T def_value)
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
static void Write(const std::string &fileName, const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &M)
void GhostCoarseMap(const Matrix &A, const Import &Importer, const ArrayRCP< const LO > myPointType, const RCP< const Map > &coarseMap, RCP< const Map > &coarseColMap) const
Print more statistics.
size_type size() const
Print additional debugging information.
LocalOrdinal LO
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void Coarsen_ClassicalModified(const Matrix &A, const RCP< const Matrix > &Aghost, const LWGraph &graph, RCP< const Map > &coarseColMap, RCP< const Map > &coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView< const LO > &myPointType, const Teuchos::ArrayView< const LO > &myPointType_ghost, const Teuchos::Array< LO > &cpoint2pcol, const Teuchos::Array< LO > &pcol2cpoint, Teuchos::Array< size_t > &eis_rowptr, Teuchos::Array< bool > &edgeIsStrong, RCP< LocalOrdinalVector > &BlockNumber, RCP< const Import > remoteOnlyImporter, RCP< Matrix > &P) const
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
KOKKOS_INLINE_FUNCTION size_type GetNodeNumEdges() const
Return number of graph edges.
size_type size() const
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
void resize(const size_type n, const T &val=T())
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
void Coarsen_Ext_Plus_I(const Matrix &A, const RCP< const Matrix > &Aghost, const LWGraph &graph, RCP< const Map > &coarseColMap, RCP< const Map > &coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView< const LO > &myPointType, const Teuchos::ArrayView< const LO > &myPointType_ghost, const Teuchos::Array< LO > &cpoint2pcol, const Teuchos::Array< LO > &pcol2cpoint, Teuchos::Array< size_t > &eis_rowptr, Teuchos::Array< bool > &edgeIsStrong, RCP< LocalOrdinalVector > &BlockNumber, RCP< Matrix > &P) const
void resize(size_type new_size, const value_type &x=value_type())
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
iterator end()
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex &#39;v&#39;.
size_type size() const
#define SET_VALID_ENTRY(name)
Scalar SC
Lightweight MueLu representation of a compressed row storage graph.
void Coarsen_Direct(const Matrix &A, const RCP< const Matrix > &Aghost, const LWGraph &graph, RCP< const Map > &coarseColMap, RCP< const Map > &coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView< const LO > &myPointType, const Teuchos::ArrayView< const LO > &myPointType_ghost, const Teuchos::Array< LO > &cpoint2pcol, const Teuchos::Array< LO > &pcol2cpoint, Teuchos::Array< size_t > &eis_rowptr, Teuchos::Array< bool > &edgeIsStrong, RCP< LocalOrdinalVector > &BlockNumber, RCP< Matrix > &P) const
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:76
typename ClassicalMapFactory::point_type point_type
Exception throws to report errors in the internal logical of the program.
#define TEUCHOS_ASSERT(assertion_test)
void assign(size_type n, const value_type &val)
ParameterEntry & getEntry(const std::string &name)
iterator begin()
iterator begin() const
bool is_null() const
ArrayView< T > view(size_type lowerOffset, size_type size) const