MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_TentativePFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_TENTATIVEPFACTORY_DEF_HPP
11 #define MUELU_TENTATIVEPFACTORY_DEF_HPP
12 
13 #include <Xpetra_MapFactory.hpp>
14 #include <Xpetra_Map.hpp>
15 #include <Xpetra_CrsMatrix.hpp>
17 #include <Xpetra_Matrix.hpp>
18 #include <Xpetra_MatrixMatrix.hpp>
19 #include <Xpetra_MultiVector.hpp>
20 #include <Xpetra_MultiVectorFactory.hpp>
21 #include <Xpetra_VectorFactory.hpp>
22 #include <Xpetra_Import.hpp>
23 #include <Xpetra_ImportFactory.hpp>
24 #include <Xpetra_CrsMatrixWrap.hpp>
25 #include <Xpetra_StridedMap.hpp>
26 #include <Xpetra_StridedMapFactory.hpp>
27 #include <Xpetra_IO.hpp>
28 
29 #include "Xpetra_TpetraBlockCrsMatrix.hpp"
30 
32 
33 #include "MueLu_Aggregates.hpp"
34 #include "MueLu_AmalgamationInfo.hpp"
35 #include "MueLu_MasterList.hpp"
36 #include "MueLu_Monitor.hpp"
37 #include "MueLu_PerfUtils.hpp"
38 #include "MueLu_Utilities.hpp"
39 
40 namespace MueLu {
41 
42 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
44  RCP<ParameterList> validParamList = rcp(new ParameterList());
45 
46 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
47  SET_VALID_ENTRY("tentative: calculate qr");
48  SET_VALID_ENTRY("tentative: build coarse coordinates");
49  SET_VALID_ENTRY("tentative: constant column sums");
50 #undef SET_VALID_ENTRY
51  validParamList->set<std::string>("Nullspace name", "Nullspace", "Name for the input nullspace");
52 
53  validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
54  validParamList->set<RCP<const FactoryBase> >("Aggregates", Teuchos::null, "Generating factory of the aggregates");
55  validParamList->set<RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
56  validParamList->set<RCP<const FactoryBase> >("Scaled Nullspace", Teuchos::null, "Generating factory of the scaled nullspace");
57  validParamList->set<RCP<const FactoryBase> >("UnAmalgamationInfo", Teuchos::null, "Generating factory of UnAmalgamationInfo");
58  validParamList->set<RCP<const FactoryBase> >("CoarseMap", Teuchos::null, "Generating factory of the coarse map");
59  validParamList->set<RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory of the coordinates");
60  validParamList->set<RCP<const FactoryBase> >("Node Comm", Teuchos::null, "Generating factory of the node level communicator");
61 
62  // Make sure we don't recursively validate options for the matrixmatrix kernels
63  ParameterList norecurse;
64  norecurse.disableRecursiveValidation();
65  validParamList->set<ParameterList>("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters");
66 
67  return validParamList;
68 }
69 
70 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
72  const ParameterList& pL = GetParameterList();
73  // NOTE: This guy can only either be 'Nullspace' or 'Scaled Nullspace' or else the validator above will cause issues
74  std::string nspName = "Nullspace";
75  if (pL.isParameter("Nullspace name")) nspName = pL.get<std::string>("Nullspace name");
76 
77  Input(fineLevel, "A");
78  Input(fineLevel, "Aggregates");
79  Input(fineLevel, nspName);
80  Input(fineLevel, "UnAmalgamationInfo");
81  Input(fineLevel, "CoarseMap");
82  if (fineLevel.GetLevelID() == 0 &&
83  fineLevel.IsAvailable("Coordinates", NoFactory::get()) && // we have coordinates (provided by user app)
84  pL.get<bool>("tentative: build coarse coordinates")) { // and we want coordinates on other levels
85  bTransferCoordinates_ = true; // then set the transfer coordinates flag to true
86  Input(fineLevel, "Coordinates");
87  } else if (bTransferCoordinates_) {
88  Input(fineLevel, "Coordinates");
89  }
90 }
91 
92 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
94  return BuildP(fineLevel, coarseLevel);
95 }
96 
97 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
99  FactoryMonitor m(*this, "Build", coarseLevel);
100  typedef typename Teuchos::ScalarTraits<Scalar>::coordinateType coordinate_type;
101  typedef Xpetra::MultiVector<coordinate_type, LO, GO, NO> RealValuedMultiVector;
102  typedef Xpetra::MultiVectorFactory<coordinate_type, LO, GO, NO> RealValuedMultiVectorFactory;
103 
104  const ParameterList& pL = GetParameterList();
105  std::string nspName = "Nullspace";
106  if (pL.isParameter("Nullspace name")) nspName = pL.get<std::string>("Nullspace name");
107 
108  RCP<Matrix> Ptentative;
109  RCP<Matrix> A = Get<RCP<Matrix> >(fineLevel, "A");
110  RCP<Aggregates> aggregates = Get<RCP<Aggregates> >(fineLevel, "Aggregates");
111  // No coarse DoFs so we need to bail by setting Ptentattive to null and returning
112  // This level will ultimately be removed in MueLu_Hierarchy_defs.h via a resize()
113  if (aggregates->GetNumGlobalAggregatesComputeIfNeeded() == 0) {
114  Ptentative = Teuchos::null;
115  Set(coarseLevel, "P", Ptentative);
116  return;
117  }
118 
119  RCP<AmalgamationInfo> amalgInfo = Get<RCP<AmalgamationInfo> >(fineLevel, "UnAmalgamationInfo");
120  RCP<MultiVector> fineNullspace = Get<RCP<MultiVector> >(fineLevel, nspName);
121  RCP<const Map> coarseMap = Get<RCP<const Map> >(fineLevel, "CoarseMap");
122  RCP<RealValuedMultiVector> fineCoords;
123  if (bTransferCoordinates_) {
124  fineCoords = Get<RCP<RealValuedMultiVector> >(fineLevel, "Coordinates");
125  }
126 
127  // FIXME: We should remove the NodeComm on levels past the threshold
128  if (fineLevel.IsAvailable("Node Comm")) {
129  RCP<const Teuchos::Comm<int> > nodeComm = Get<RCP<const Teuchos::Comm<int> > >(fineLevel, "Node Comm");
130  Set<RCP<const Teuchos::Comm<int> > >(coarseLevel, "Node Comm", nodeComm);
131  }
132 
133  // NOTE: We check DomainMap here rather than RowMap because those are different for BlockCrs matrices
134  TEUCHOS_TEST_FOR_EXCEPTION(A->getDomainMap()->getLocalNumElements() != fineNullspace->getMap()->getLocalNumElements(),
135  Exceptions::RuntimeError, "MueLu::TentativePFactory::MakeTentative: Size mismatch between A and Nullspace");
136 
137  RCP<MultiVector> coarseNullspace;
138  RCP<RealValuedMultiVector> coarseCoords;
139 
140  if (bTransferCoordinates_) {
141  //*** Create the coarse coordinates ***
142  // First create the coarse map and coarse multivector
143  ArrayView<const GO> elementAList = coarseMap->getLocalElementList();
144  LO blkSize = 1;
145  if (rcp_dynamic_cast<const StridedMap>(coarseMap) != Teuchos::null) {
146  blkSize = rcp_dynamic_cast<const StridedMap>(coarseMap)->getFixedBlockSize();
147  }
148  GO indexBase = coarseMap->getIndexBase();
149  LO numCoarseNodes = Teuchos::as<LO>(elementAList.size() / blkSize);
150  Array<GO> nodeList(numCoarseNodes);
151  const int numDimensions = fineCoords->getNumVectors();
152 
153  for (LO i = 0; i < numCoarseNodes; i++) {
154  nodeList[i] = (elementAList[i * blkSize] - indexBase) / blkSize + indexBase;
155  }
156  RCP<const Map> coarseCoordsMap = MapFactory::Build(fineCoords->getMap()->lib(),
158  nodeList,
159  indexBase,
160  fineCoords->getMap()->getComm());
161  coarseCoords = RealValuedMultiVectorFactory::Build(coarseCoordsMap, numDimensions);
162 
163  // Create overlapped fine coordinates to reduce global communication
164  RCP<RealValuedMultiVector> ghostedCoords;
165  if (aggregates->AggregatesCrossProcessors()) {
166  RCP<const Map> aggMap = aggregates->GetMap();
167  RCP<const Import> importer = ImportFactory::Build(fineCoords->getMap(), aggMap);
168 
169  ghostedCoords = RealValuedMultiVectorFactory::Build(aggMap, numDimensions);
170  ghostedCoords->doImport(*fineCoords, *importer, Xpetra::INSERT);
171  } else {
172  ghostedCoords = fineCoords;
173  }
174 
175  // Get some info about aggregates
176  int myPID = coarseCoordsMap->getComm()->getRank();
177  LO numAggs = aggregates->GetNumAggregates();
178  ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizesArrayRCP();
179  const ArrayRCP<const LO> vertex2AggID = aggregates->GetVertex2AggId()->getData(0);
180  const ArrayRCP<const LO> procWinner = aggregates->GetProcWinner()->getData(0);
181 
182  // Fill in coarse coordinates
183  for (int dim = 0; dim < numDimensions; ++dim) {
184  ArrayRCP<const coordinate_type> fineCoordsData = ghostedCoords->getData(dim);
185  ArrayRCP<coordinate_type> coarseCoordsData = coarseCoords->getDataNonConst(dim);
186 
187  for (LO lnode = 0; lnode < Teuchos::as<LO>(vertex2AggID.size()); lnode++) {
188  if (procWinner[lnode] == myPID &&
189  lnode < fineCoordsData.size() &&
190  vertex2AggID[lnode] < coarseCoordsData.size() &&
191  Teuchos::ScalarTraits<coordinate_type>::isnaninf(fineCoordsData[lnode]) == false) {
192  coarseCoordsData[vertex2AggID[lnode]] += fineCoordsData[lnode];
193  }
194  }
195  for (LO agg = 0; agg < numAggs; agg++) {
196  coarseCoordsData[agg] /= aggSizes[agg];
197  }
198  }
199  }
200 
201  if (!aggregates->AggregatesCrossProcessors()) {
203  BuildPuncoupledBlockCrs(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace, coarseLevel.GetLevelID());
204  } else {
205  BuildPuncoupled(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace, coarseLevel.GetLevelID());
206  }
207  } else
208  BuildPcoupled(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace);
209 
210  // If available, use striding information of fine level matrix A for range
211  // map and coarseMap as domain map; otherwise use plain range map of
212  // Ptent = plain range map of A for range map and coarseMap as domain map.
213  // NOTE:
214  // The latter is not really safe, since there is no striding information
215  // for the range map. This is not really a problem, since striding
216  // information is always available on the intermedium levels and the
217  // coarsest levels.
218  if (A->IsView("stridedMaps") == true)
219  Ptentative->CreateView("stridedMaps", A->getRowMap("stridedMaps"), coarseMap);
220  else
221  Ptentative->CreateView("stridedMaps", Ptentative->getRangeMap(), coarseMap);
222 
223  if (bTransferCoordinates_) {
224  Set(coarseLevel, "Coordinates", coarseCoords);
225  }
226  Set(coarseLevel, "Nullspace", coarseNullspace);
227  Set(coarseLevel, "P", Ptentative);
228 
229  if (IsPrint(Statistics2)) {
230  RCP<ParameterList> params = rcp(new ParameterList());
231  params->set("printLoadBalancingInfo", true);
232  GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*Ptentative, "Ptent", params);
233  }
234 }
235 
236 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
239  RCP<const Map> coarsePointMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace, const int levelID) const {
240  /* This routine generates a BlockCrs P for a BlockCrs A. There are a few assumptions here, which meet the use cases we care about, but could
241  be generalized later, if we ever need to do so:
242  1) Null space dimension === block size of matrix: So no elasticity right now
243  2) QR is not supported: Under assumption #1, this shouldn't cause problems.
244  3) Maps are "good": Aka the first chunk of the ColMap is the RowMap.
245 
246  These assumptions keep our code way simpler and still support the use cases we actually care about.
247  */
248 
249  RCP<const Map> rowMap = A->getRowMap();
250  RCP<const Map> rangeMap = A->getRangeMap();
251  RCP<const Map> colMap = A->getColMap();
252  // const size_t numFinePointRows = rangeMap->getLocalNumElements();
253  const size_t numFineBlockRows = rowMap->getLocalNumElements();
254 
255  typedef Teuchos::ScalarTraits<SC> STS;
256  // typedef typename STS::magnitudeType Magnitude;
257  const SC zero = STS::zero();
258  const SC one = STS::one();
259  const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
260 
261  const GO numAggs = aggregates->GetNumAggregates();
262  const size_t NSDim = fineNullspace->getNumVectors();
263  ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizesArrayRCP();
264 
265  // Need to generate the coarse block map
266  // NOTE: We assume NSDim == block size here
267  // NOTE: We also assume that coarseMap has contiguous GIDs
268  // const size_t numCoarsePointRows = coarsePointMap->getLocalNumElements();
269  const size_t numCoarseBlockRows = coarsePointMap->getLocalNumElements() / NSDim;
270  RCP<const Map> coarseBlockMap = MapFactory::Build(coarsePointMap->lib(),
272  numCoarseBlockRows,
273  coarsePointMap->getIndexBase(),
274  coarsePointMap->getComm());
275  // Sanity checking
276  const ParameterList& pL = GetParameterList();
277  const bool& doQRStep = pL.get<bool>("tentative: calculate qr");
278  const bool& constantColSums = pL.get<bool>("tentative: constant column sums");
279 
280  TEUCHOS_TEST_FOR_EXCEPTION(doQRStep && constantColSums, Exceptions::RuntimeError,
281  "MueLu::TentativePFactory::MakeTentative: cannot use 'constant column sums' and 'calculate qr' at the same time");
282 
283  // The aggregates use the amalgamated column map, which in this case is what we want
284 
285  // Aggregates map is based on the amalgamated column map
286  // We can skip global-to-local conversion if LIDs in row map are
287  // same as LIDs in column map
288  bool goodMap = MueLu::Utilities<SC, LO, GO, NO>::MapsAreNested(*rowMap, *colMap);
289 
290  // Create a lookup table to determine the rows (fine DOFs) that belong to a given aggregate.
291  // aggStart is a pointer into aggToRowMapLO
292  // aggStart[i]..aggStart[i+1] are indices into aggToRowMapLO
293  // aggToRowMapLO[aggStart[i]]..aggToRowMapLO[aggStart[i+1]-1] are the DOFs in aggregate i
294  ArrayRCP<LO> aggStart;
295  ArrayRCP<LO> aggToRowMapLO;
296  ArrayRCP<GO> aggToRowMapGO;
297  if (goodMap) {
298  amalgInfo->UnamalgamateAggregatesLO(*aggregates, aggStart, aggToRowMapLO);
299  GetOStream(Runtime1) << "Column map is consistent with the row map, good." << std::endl;
300  } else {
301  throw std::runtime_error("TentativePFactory::PuncoupledBlockCrs: Inconsistent maps not currently supported");
302  }
303 
304  coarseNullspace = MultiVectorFactory::Build(coarsePointMap, NSDim);
305 
306  // Pull out the nullspace vectors so that we can have random access.
309  for (size_t i = 0; i < NSDim; i++) {
310  fineNS[i] = fineNullspace->getData(i);
311  if (coarsePointMap->getLocalNumElements() > 0)
312  coarseNS[i] = coarseNullspace->getDataNonConst(i);
313  }
314 
315  // BlockCrs requires that we build the (block) graph first, so let's do that...
316  // NOTE: Because we're assuming that the NSDim == BlockSize, we only have one
317  // block non-zero per row in the matrix;
318  RCP<CrsGraph> BlockGraph = CrsGraphFactory::Build(rowMap, coarseBlockMap, 0);
319  ArrayRCP<size_t> iaPtent;
320  ArrayRCP<LO> jaPtent;
321  BlockGraph->allocateAllIndices(numFineBlockRows, iaPtent, jaPtent);
322  ArrayView<size_t> ia = iaPtent();
323  ArrayView<LO> ja = jaPtent();
324 
325  for (size_t i = 0; i < numFineBlockRows; i++) {
326  ia[i] = i;
327  ja[i] = INVALID;
328  }
329  ia[numCoarseBlockRows] = numCoarseBlockRows;
330 
331  for (GO agg = 0; agg < numAggs; agg++) {
332  LO aggSize = aggStart[agg + 1] - aggStart[agg];
333  Xpetra::global_size_t offset = agg;
334 
335  for (LO j = 0; j < aggSize; j++) {
336  // FIXME: Allow for bad maps
337  const LO localRow = aggToRowMapLO[aggStart[agg] + j];
338  const size_t rowStart = ia[localRow];
339  ja[rowStart] = offset;
340  }
341  }
342 
343  // Compress storage (remove all INVALID, which happen when we skip zeros)
344  // We do that in-place
345  size_t ia_tmp = 0, nnz = 0;
346  for (size_t i = 0; i < numFineBlockRows; i++) {
347  for (size_t j = ia_tmp; j < ia[i + 1]; j++)
348  if (ja[j] != INVALID) {
349  ja[nnz] = ja[j];
350  nnz++;
351  }
352  ia_tmp = ia[i + 1];
353  ia[i + 1] = nnz;
354  }
355 
356  if (rowMap->lib() == Xpetra::UseTpetra) {
357  // - Cannot resize for Epetra, as it checks for same pointers
358  // - Need to resize for Tpetra, as it check ().size() == ia[numRows]
359  // NOTE: these invalidate ja and val views
360  jaPtent.resize(nnz);
361  }
362 
363  GetOStream(Runtime1) << "TentativePFactory : generating block graph" << std::endl;
364  BlockGraph->setAllIndices(iaPtent, jaPtent);
365 
366  // Managing labels & constants for ESFC
367  {
368  RCP<ParameterList> FCparams;
369  if (pL.isSublist("matrixmatrix: kernel params"))
370  FCparams = rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
371  else
372  FCparams = rcp(new ParameterList);
373  // By default, we don't need global constants for TentativeP, but we do want it for the graph
374  // if we're printing statistics, so let's leave it on for now.
375  FCparams->set("compute global constants", FCparams->get("compute global constants", true));
376  std::string levelIDs = toString(levelID);
377  FCparams->set("Timer Label", std::string("MueLu::TentativeP-") + levelIDs);
378  RCP<const Export> dummy_e;
379  RCP<const Import> dummy_i;
380  BlockGraph->expertStaticFillComplete(coarseBlockMap, rowMap, dummy_i, dummy_e, FCparams);
381  }
382 
383  // Now let's make a BlockCrs Matrix
384  // NOTE: Assumes block size== NSDim
385  RCP<Xpetra::CrsMatrix<SC, LO, GO, NO> > P_xpetra = Xpetra::CrsMatrixFactory<SC, LO, GO, NO>::BuildBlock(BlockGraph, coarsePointMap, rangeMap, NSDim);
387  if (P_tpetra.is_null()) throw std::runtime_error("BuildPUncoupled: Matrix factory did not return a Tpetra::BlockCrsMatrix");
388  RCP<CrsMatrixWrap> P_wrap = rcp(new CrsMatrixWrap(P_xpetra));
389 
391  // "no-QR" option //
393  // Local Q factor is just the fine nullspace support over the current aggregate.
394  // Local R factor is the identity.
395  // NOTE: We're not going to do a QR here as we're assuming that blocksize == NSDim
396  // NOTE: "goodMap" case only
397  Teuchos::Array<Scalar> block(NSDim * NSDim, zero);
398  Teuchos::Array<LO> bcol(1);
399 
400  GetOStream(Runtime1) << "TentativePFactory : bypassing local QR phase" << std::endl;
401  for (LO agg = 0; agg < numAggs; agg++) {
402  bcol[0] = agg;
403  const LO aggSize = aggStart[agg + 1] - aggStart[agg];
404  Xpetra::global_size_t offset = agg * NSDim;
405 
406  // Process each row in the local Q factor
407  // NOTE: Blocks are in row-major order
408  for (LO j = 0; j < aggSize; j++) {
409  const LO localBlockRow = aggToRowMapLO[aggStart[agg] + j];
410 
411  for (size_t r = 0; r < NSDim; r++) {
412  LO localPointRow = localBlockRow * NSDim + r;
413  for (size_t c = 0; c < NSDim; c++)
414  block[r * NSDim + c] = fineNS[c][localPointRow];
415  }
416  // NOTE: Assumes columns==aggs and are ordered sequentially
417  P_tpetra->replaceLocalValues(localBlockRow, bcol(), block());
418 
419  } // end aggSize
420 
421  for (size_t j = 0; j < NSDim; j++)
422  coarseNS[j][offset + j] = one;
423 
424  } // for (GO agg = 0; agg < numAggs; agg++)
425 
426  Ptentative = P_wrap;
427 }
428 
429 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
432  RCP<const Map> coarseMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace) const {
433  typedef Teuchos::ScalarTraits<SC> STS;
434  typedef typename STS::magnitudeType Magnitude;
435  const SC zero = STS::zero();
436  const SC one = STS::one();
437 
438  // number of aggregates
439  GO numAggs = aggregates->GetNumAggregates();
440 
441  // Create a lookup table to determine the rows (fine DOFs) that belong to a given aggregate.
442  // aggStart is a pointer into aggToRowMap
443  // aggStart[i]..aggStart[i+1] are indices into aggToRowMap
444  // aggToRowMap[aggStart[i]]..aggToRowMap[aggStart[i+1]-1] are the DOFs in aggregate i
445  ArrayRCP<LO> aggStart;
446  ArrayRCP<GO> aggToRowMap;
447  amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMap);
448 
449  // find size of the largest aggregate
450  LO maxAggSize = 0;
451  for (GO i = 0; i < numAggs; ++i) {
452  LO sizeOfThisAgg = aggStart[i + 1] - aggStart[i];
453  if (sizeOfThisAgg > maxAggSize) maxAggSize = sizeOfThisAgg;
454  }
455 
456  // dimension of fine level nullspace
457  const size_t NSDim = fineNullspace->getNumVectors();
458 
459  // index base for coarse Dof map (usually 0)
460  GO indexBase = A->getRowMap()->getIndexBase();
461 
462  const RCP<const Map> nonUniqueMap = amalgInfo->ComputeUnamalgamatedImportDofMap(*aggregates);
463  const RCP<const Map> uniqueMap = A->getDomainMap();
464  RCP<const Import> importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
465  RCP<MultiVector> fineNullspaceWithOverlap = MultiVectorFactory::Build(nonUniqueMap, NSDim);
466  fineNullspaceWithOverlap->doImport(*fineNullspace, *importer, Xpetra::INSERT);
467 
468  // Pull out the nullspace vectors so that we can have random access.
470  for (size_t i = 0; i < NSDim; ++i)
471  fineNS[i] = fineNullspaceWithOverlap->getData(i);
472 
473  // Allocate storage for the coarse nullspace.
474  coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
475 
477  for (size_t i = 0; i < NSDim; ++i)
478  if (coarseMap->getLocalNumElements() > 0) coarseNS[i] = coarseNullspace->getDataNonConst(i);
479 
480  // This makes the rowmap of Ptent the same as that of A->
481  // This requires moving some parts of some local Q's to other processors
482  // because aggregates can span processors.
483  RCP<const Map> rowMapForPtent = A->getRowMap();
484  const Map& rowMapForPtentRef = *rowMapForPtent;
485 
486  // Set up storage for the rows of the local Qs that belong to other processors.
487  // FIXME This is inefficient and could be done within the main loop below with std::vector's.
488  RCP<const Map> colMap = A->getColMap();
489 
490  RCP<const Map> ghostQMap;
491  RCP<MultiVector> ghostQvalues;
493  RCP<Xpetra::Vector<GO, LO, GO, Node> > ghostQrowNums;
494  ArrayRCP<ArrayRCP<SC> > ghostQvals;
495  ArrayRCP<ArrayRCP<GO> > ghostQcols;
496  ArrayRCP<GO> ghostQrows;
497 
498  Array<GO> ghostGIDs;
499  for (LO j = 0; j < numAggs; ++j) {
500  for (LO k = aggStart[j]; k < aggStart[j + 1]; ++k) {
501  if (rowMapForPtentRef.isNodeGlobalElement(aggToRowMap[k]) == false) {
502  ghostGIDs.push_back(aggToRowMap[k]);
503  }
504  }
505  }
506  ghostQMap = MapFactory::Build(A->getRowMap()->lib(),
508  ghostGIDs,
509  indexBase, A->getRowMap()->getComm()); // JG:Xpetra::global_size_t>?
510  // Vector to hold bits of Q that go to other processors.
511  ghostQvalues = MultiVectorFactory::Build(ghostQMap, NSDim);
512  // Note that Epetra does not support MultiVectors templated on Scalar != double.
513  // So to work around this, we allocate an array of Vectors. This shouldn't be too
514  // expensive, as the number of Vectors is NSDim.
515  ghostQcolumns.resize(NSDim);
516  for (size_t i = 0; i < NSDim; ++i)
517  ghostQcolumns[i] = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(ghostQMap);
518  ghostQrowNums = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(ghostQMap);
519  if (ghostQvalues->getLocalLength() > 0) {
520  ghostQvals.resize(NSDim);
521  ghostQcols.resize(NSDim);
522  for (size_t i = 0; i < NSDim; ++i) {
523  ghostQvals[i] = ghostQvalues->getDataNonConst(i);
524  ghostQcols[i] = ghostQcolumns[i]->getDataNonConst(0);
525  }
526  ghostQrows = ghostQrowNums->getDataNonConst(0);
527  }
528 
529  // importer to handle moving Q
530  importer = ImportFactory::Build(ghostQMap, A->getRowMap());
531 
532  // Dense QR solver
534 
535  // Allocate temporary storage for the tentative prolongator.
536  Array<GO> globalColPtr(maxAggSize * NSDim, 0);
537  Array<LO> localColPtr(maxAggSize * NSDim, 0);
538  Array<SC> valPtr(maxAggSize * NSDim, 0.);
539 
540  // Create column map for Ptent, estimate local #nonzeros in Ptent, and create Ptent itself.
541  const Map& coarseMapRef = *coarseMap;
542 
543  // For the 3-arrays constructor
544  ArrayRCP<size_t> ptent_rowptr;
545  ArrayRCP<LO> ptent_colind;
546  ArrayRCP<Scalar> ptent_values;
547 
548  // Because ArrayRCPs are slow...
549  ArrayView<size_t> rowptr_v;
550  ArrayView<LO> colind_v;
551  ArrayView<Scalar> values_v;
552 
553  // For temporary usage
554  Array<size_t> rowptr_temp;
555  Array<LO> colind_temp;
556  Array<Scalar> values_temp;
557 
558  RCP<CrsMatrix> PtentCrs;
559 
560  RCP<CrsMatrixWrap> PtentCrsWrap = rcp(new CrsMatrixWrap(rowMapForPtent, NSDim));
561  PtentCrs = PtentCrsWrap->getCrsMatrix();
562  Ptentative = PtentCrsWrap;
563 
564  //*****************************************************************
565  // Loop over all aggregates and calculate local QR decompositions.
566  //*****************************************************************
567  GO qctr = 0; // for indexing into Ptent data vectors
568  const Map& nonUniqueMapRef = *nonUniqueMap;
569 
570  size_t total_nnz_count = 0;
571 
572  for (GO agg = 0; agg < numAggs; ++agg) {
573  LO myAggSize = aggStart[agg + 1] - aggStart[agg];
574  // For each aggregate, extract the corresponding piece of the nullspace and put it in the flat array,
575  // "localQR" (in column major format) for the QR routine.
576  Teuchos::SerialDenseMatrix<LO, SC> localQR(myAggSize, NSDim);
577  for (size_t j = 0; j < NSDim; ++j) {
578  bool bIsZeroNSColumn = true;
579  for (LO k = 0; k < myAggSize; ++k) {
580  // aggToRowMap[aggPtr[i]+k] is the kth DOF in the ith aggregate
581  // fineNS[j][n] is the nth entry in the jth NS vector
582  try {
583  SC nsVal = fineNS[j][nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg] + k])]; // extract information from fine level NS
584  localQR(k, j) = nsVal;
585  if (nsVal != zero) bIsZeroNSColumn = false;
586  } catch (...) {
587  GetOStream(Runtime1, -1) << "length of fine level nsp: " << fineNullspace->getGlobalLength() << std::endl;
588  GetOStream(Runtime1, -1) << "length of fine level nsp w overlap: " << fineNullspaceWithOverlap->getGlobalLength() << std::endl;
589  GetOStream(Runtime1, -1) << "(local?) aggId=" << agg << std::endl;
590  GetOStream(Runtime1, -1) << "aggSize=" << myAggSize << std::endl;
591  GetOStream(Runtime1, -1) << "agg DOF=" << k << std::endl;
592  GetOStream(Runtime1, -1) << "NS vector j=" << j << std::endl;
593  GetOStream(Runtime1, -1) << "j*myAggSize + k = " << j * myAggSize + k << std::endl;
594  GetOStream(Runtime1, -1) << "aggToRowMap[" << agg << "][" << k << "] = " << aggToRowMap[aggStart[agg] + k] << std::endl;
595  GetOStream(Runtime1, -1) << "id aggToRowMap[agg][k]=" << aggToRowMap[aggStart[agg] + k] << " is global element in nonUniqueMap = " << nonUniqueMapRef.isNodeGlobalElement(aggToRowMap[aggStart[agg] + k]) << std::endl;
596  GetOStream(Runtime1, -1) << "colMap local id aggToRowMap[agg][k]=" << nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg] + k]) << std::endl;
597  GetOStream(Runtime1, -1) << "fineNS...=" << fineNS[j][nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg] + k])] << std::endl;
598  GetOStream(Errors, -1) << "caught an error!" << std::endl;
599  }
600  } // for (LO k=0 ...
601  TEUCHOS_TEST_FOR_EXCEPTION(bIsZeroNSColumn == true, Exceptions::RuntimeError, "MueLu::TentativePFactory::MakeTentative: fine level NS part has a zero column. Error.");
602  } // for (LO j=0 ...
603 
604  Xpetra::global_size_t offset = agg * NSDim;
605 
606  if (myAggSize >= Teuchos::as<LocalOrdinal>(NSDim)) {
607  // calculate QR decomposition (standard)
608  // R is stored in localQR (size: myAggSize x NSDim)
609 
610  // Householder multiplier
611  SC tau = localQR(0, 0);
612 
613  if (NSDim == 1) {
614  // Only one nullspace vector, so normalize by hand
615  Magnitude dtemp = 0;
616  for (size_t k = 0; k < Teuchos::as<size_t>(myAggSize); ++k) {
617  Magnitude tmag = STS::magnitude(localQR(k, 0));
618  dtemp += tmag * tmag;
619  }
621  tau = localQR(0, 0);
622  localQR(0, 0) = dtemp;
623  } else {
624  qrSolver.setMatrix(Teuchos::rcp(&localQR, false));
625  qrSolver.factor();
626  }
627 
628  // Extract R, the coarse nullspace. This is stored in upper triangular part of localQR.
629  // Note: coarseNS[i][.] is the ith coarse nullspace vector, which may be counter to your intuition.
630  // This stores the (offset+k)th entry only if it is local according to the coarseMap.
631  for (size_t j = 0; j < NSDim; ++j) {
632  for (size_t k = 0; k <= j; ++k) {
633  try {
634  if (coarseMapRef.isNodeLocalElement(offset + k)) {
635  coarseNS[j][offset + k] = localQR(k, j); // TODO is offset+k the correct local ID?!
636  }
637  } catch (...) {
638  GetOStream(Errors, -1) << "caught error in coarseNS insert, j=" << j << ", offset+k = " << offset + k << std::endl;
639  }
640  }
641  }
642 
643  // Calculate Q, the tentative prolongator.
644  // The Lapack GEQRF call only works for myAggsize >= NSDim
645 
646  if (NSDim == 1) {
647  // Only one nullspace vector, so calculate Q by hand
648  Magnitude dtemp = Teuchos::ScalarTraits<SC>::magnitude(localQR(0, 0));
649  localQR(0, 0) = tau;
650  dtemp = 1 / dtemp;
651  for (LocalOrdinal i = 0; i < myAggSize; ++i) {
652  localQR(i, 0) *= dtemp;
653  }
654  } else {
655  qrSolver.formQ();
656  Teuchos::RCP<Teuchos::SerialDenseMatrix<LO, SC> > qFactor = qrSolver.getQ();
657  for (size_t j = 0; j < NSDim; j++) {
658  for (size_t i = 0; i < Teuchos::as<size_t>(myAggSize); i++) {
659  localQR(i, j) = (*qFactor)(i, j);
660  }
661  }
662  }
663 
664  // end default case (myAggSize >= NSDim)
665  } else { // special handling for myAggSize < NSDim (i.e. 1pt nodes)
666  // See comments for the uncoupled case
667 
668  // R = extended (by adding identity rows) localQR
669  for (size_t j = 0; j < NSDim; j++)
670  for (size_t k = 0; k < NSDim; k++) {
671  TEUCHOS_TEST_FOR_EXCEPTION(!coarseMapRef.isNodeLocalElement(offset + k), Exceptions::RuntimeError,
672  "Caught error in coarseNS insert, j=" << j << ", offset+k = " << offset + k);
673 
674  if (k < as<size_t>(myAggSize))
675  coarseNS[j][offset + k] = localQR(k, j);
676  else
677  coarseNS[j][offset + k] = (k == j ? one : zero);
678  }
679 
680  // Q = I (rectangular)
681  for (size_t i = 0; i < as<size_t>(myAggSize); i++)
682  for (size_t j = 0; j < NSDim; j++)
683  localQR(i, j) = (j == i ? one : zero);
684  } // end else (special handling for 1pt aggregates)
685 
686  // Process each row in the local Q factor. If the row is local to the current processor
687  // according to the rowmap, insert it into Ptentative. Otherwise, save it in ghostQ
688  // to be communicated later to the owning processor.
689  // FIXME -- what happens if maps are blocked?
690  for (GO j = 0; j < myAggSize; ++j) {
691  // This loop checks whether row associated with current DOF is local, according to rowMapForPtent.
692  // If it is, the row is inserted. If not, the row number, columns, and values are saved in
693  // MultiVectors that will be sent to other processors.
694  GO globalRow = aggToRowMap[aggStart[agg] + j];
695 
696  // TODO is the use of Xpetra::global_size_t below correct?
697  if (rowMapForPtentRef.isNodeGlobalElement(globalRow) == false) {
698  ghostQrows[qctr] = globalRow;
699  for (size_t k = 0; k < NSDim; ++k) {
700  ghostQcols[k][qctr] = coarseMapRef.getGlobalElement(agg * NSDim + k);
701  ghostQvals[k][qctr] = localQR(j, k);
702  }
703  ++qctr;
704  } else {
705  size_t nnz = 0;
706  for (size_t k = 0; k < NSDim; ++k) {
707  try {
708  if (localQR(j, k) != Teuchos::ScalarTraits<SC>::zero()) {
709  localColPtr[nnz] = agg * NSDim + k;
710  globalColPtr[nnz] = coarseMapRef.getGlobalElement(localColPtr[nnz]);
711  valPtr[nnz] = localQR(j, k);
712  ++total_nnz_count;
713  ++nnz;
714  }
715  } catch (...) {
716  GetOStream(Errors, -1) << "caught error in colPtr/valPtr insert, current index=" << nnz << std::endl;
717  }
718  } // for (size_t k=0; k<NSDim; ++k)
719 
720  try {
721  Ptentative->insertGlobalValues(globalRow, globalColPtr.view(0, nnz), valPtr.view(0, nnz));
722  } catch (...) {
723  GetOStream(Errors, -1) << "pid " << A->getRowMap()->getComm()->getRank()
724  << "caught error during Ptent row insertion, global row "
725  << globalRow << std::endl;
726  }
727  }
728  } // for (GO j=0; j<myAggSize; ++j)
729 
730  } // for (LO agg=0; agg<numAggs; ++agg)
731 
732  // ***********************************************************
733  // ************* end of aggregate-wise QR ********************
734  // ***********************************************************
735  GetOStream(Runtime1) << "TentativePFactory : aggregates may cross process boundaries" << std::endl;
736  // Import ghost parts of Q factors and insert into Ptentative.
737  // First import just the global row numbers.
739  targetQrowNums->putScalar(-1);
740  targetQrowNums->doImport(*ghostQrowNums, *importer, Xpetra::INSERT);
741  ArrayRCP<GO> targetQrows = targetQrowNums->getDataNonConst(0);
742 
743  // Now create map based on just the row numbers imported.
744  Array<GO> gidsToImport;
745  gidsToImport.reserve(targetQrows.size());
746  for (typename ArrayRCP<GO>::iterator r = targetQrows.begin(); r != targetQrows.end(); ++r) {
747  if (*r > -1) {
748  gidsToImport.push_back(*r);
749  }
750  }
751  RCP<const Map> reducedMap = MapFactory::Build(A->getRowMap()->lib(),
753  gidsToImport, indexBase, A->getRowMap()->getComm());
754 
755  // Import using the row numbers that this processor will receive.
756  importer = ImportFactory::Build(ghostQMap, reducedMap);
757 
758  Array<RCP<Xpetra::Vector<GO, LO, GO, Node> > > targetQcolumns(NSDim);
759  for (size_t i = 0; i < NSDim; ++i) {
760  targetQcolumns[i] = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(reducedMap);
761  targetQcolumns[i]->doImport(*(ghostQcolumns[i]), *importer, Xpetra::INSERT);
762  }
763  RCP<MultiVector> targetQvalues = MultiVectorFactory::Build(reducedMap, NSDim);
764  targetQvalues->doImport(*ghostQvalues, *importer, Xpetra::INSERT);
765 
766  ArrayRCP<ArrayRCP<SC> > targetQvals;
767  ArrayRCP<ArrayRCP<GO> > targetQcols;
768  if (targetQvalues->getLocalLength() > 0) {
769  targetQvals.resize(NSDim);
770  targetQcols.resize(NSDim);
771  for (size_t i = 0; i < NSDim; ++i) {
772  targetQvals[i] = targetQvalues->getDataNonConst(i);
773  targetQcols[i] = targetQcolumns[i]->getDataNonConst(0);
774  }
775  }
776 
777  valPtr = Array<SC>(NSDim, 0.);
778  globalColPtr = Array<GO>(NSDim, 0);
779  for (typename Array<GO>::iterator r = gidsToImport.begin(); r != gidsToImport.end(); ++r) {
780  if (targetQvalues->getLocalLength() > 0) {
781  for (size_t j = 0; j < NSDim; ++j) {
782  valPtr[j] = targetQvals[j][reducedMap->getLocalElement(*r)];
783  globalColPtr[j] = targetQcols[j][reducedMap->getLocalElement(*r)];
784  }
785  Ptentative->insertGlobalValues(*r, globalColPtr.view(0, NSDim), valPtr.view(0, NSDim));
786  } // if (targetQvalues->getLocalLength() > 0)
787  }
788 
789  Ptentative->fillComplete(coarseMap, A->getDomainMap());
790 }
791 
792 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
795  RCP<const Map> coarseMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace, const int levelID) const {
796  RCP<const Map> rowMap = A->getRowMap();
797  RCP<const Map> colMap = A->getColMap();
798  const size_t numRows = rowMap->getLocalNumElements();
799 
800  typedef Teuchos::ScalarTraits<SC> STS;
801  typedef typename STS::magnitudeType Magnitude;
802  const SC zero = STS::zero();
803  const SC one = STS::one();
804  const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
805 
806  const GO numAggs = aggregates->GetNumAggregates();
807  const size_t NSDim = fineNullspace->getNumVectors();
808  ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizesArrayRCP();
809 
810  // Sanity checking
811  const ParameterList& pL = GetParameterList();
812  const bool& doQRStep = pL.get<bool>("tentative: calculate qr");
813  const bool& constantColSums = pL.get<bool>("tentative: constant column sums");
814 
815  TEUCHOS_TEST_FOR_EXCEPTION(doQRStep && constantColSums, Exceptions::RuntimeError,
816  "MueLu::TentativePFactory::MakeTentative: cannot use 'constant column sums' and 'calculate qr' at the same time");
817 
818  // Aggregates map is based on the amalgamated column map
819  // We can skip global-to-local conversion if LIDs in row map are
820  // same as LIDs in column map
821  bool goodMap = MueLu::Utilities<SC, LO, GO, NO>::MapsAreNested(*rowMap, *colMap);
822 
823  // Create a lookup table to determine the rows (fine DOFs) that belong to a given aggregate.
824  // aggStart is a pointer into aggToRowMapLO
825  // aggStart[i]..aggStart[i+1] are indices into aggToRowMapLO
826  // aggToRowMapLO[aggStart[i]]..aggToRowMapLO[aggStart[i+1]-1] are the DOFs in aggregate i
827  ArrayRCP<LO> aggStart;
828  ArrayRCP<LO> aggToRowMapLO;
829  ArrayRCP<GO> aggToRowMapGO;
830  if (goodMap) {
831  amalgInfo->UnamalgamateAggregatesLO(*aggregates, aggStart, aggToRowMapLO);
832  GetOStream(Runtime1) << "Column map is consistent with the row map, good." << std::endl;
833 
834  } else {
835  amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMapGO);
836  GetOStream(Warnings0) << "Column map is not consistent with the row map\n"
837  << "using GO->LO conversion with performance penalty" << std::endl;
838  }
839  coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
840 
841  // Pull out the nullspace vectors so that we can have random access.
844  for (size_t i = 0; i < NSDim; i++) {
845  fineNS[i] = fineNullspace->getData(i);
846  if (coarseMap->getLocalNumElements() > 0)
847  coarseNS[i] = coarseNullspace->getDataNonConst(i);
848  }
849 
850  size_t nnzEstimate = numRows * NSDim;
851 
852  // Time to construct the matrix and fill in the values
853  Ptentative = rcp(new CrsMatrixWrap(rowMap, coarseMap, 0));
854  RCP<CrsMatrix> PtentCrs = rcp_dynamic_cast<CrsMatrixWrap>(Ptentative)->getCrsMatrix();
855 
856  ArrayRCP<size_t> iaPtent;
857  ArrayRCP<LO> jaPtent;
858  ArrayRCP<SC> valPtent;
859 
860  PtentCrs->allocateAllValues(nnzEstimate, iaPtent, jaPtent, valPtent);
861 
862  ArrayView<size_t> ia = iaPtent();
863  ArrayView<LO> ja = jaPtent();
864  ArrayView<SC> val = valPtent();
865 
866  ia[0] = 0;
867  for (size_t i = 1; i <= numRows; i++)
868  ia[i] = ia[i - 1] + NSDim;
869 
870  for (size_t j = 0; j < nnzEstimate; j++) {
871  ja[j] = INVALID;
872  val[j] = zero;
873  }
874 
875  if (doQRStep) {
877  // Standard aggregate-wise QR //
879  for (GO agg = 0; agg < numAggs; agg++) {
880  LO aggSize = aggStart[agg + 1] - aggStart[agg];
881 
882  Xpetra::global_size_t offset = agg * NSDim;
883 
884  // Extract the piece of the nullspace corresponding to the aggregate, and
885  // put it in the flat array, "localQR" (in column major format) for the
886  // QR routine.
887  Teuchos::SerialDenseMatrix<LO, SC> localQR(aggSize, NSDim);
888  if (goodMap) {
889  for (size_t j = 0; j < NSDim; j++)
890  for (LO k = 0; k < aggSize; k++)
891  localQR(k, j) = fineNS[j][aggToRowMapLO[aggStart[agg] + k]];
892  } else {
893  for (size_t j = 0; j < NSDim; j++)
894  for (LO k = 0; k < aggSize; k++)
895  localQR(k, j) = fineNS[j][rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + k])];
896  }
897 
898  // Test for zero columns
899  for (size_t j = 0; j < NSDim; j++) {
900  bool bIsZeroNSColumn = true;
901 
902  for (LO k = 0; k < aggSize; k++)
903  if (localQR(k, j) != zero)
904  bIsZeroNSColumn = false;
905 
906  TEUCHOS_TEST_FOR_EXCEPTION(bIsZeroNSColumn == true, Exceptions::RuntimeError,
907  "MueLu::TentativePFactory::MakeTentative: fine level NS part has a zero column in NS column " << j);
908  }
909 
910  // Calculate QR decomposition (standard)
911  // NOTE: Q is stored in localQR and R is stored in coarseNS
912  if (aggSize >= Teuchos::as<LO>(NSDim)) {
913  if (NSDim == 1) {
914  // Only one nullspace vector, calculate Q and R by hand
915  Magnitude norm = STS::magnitude(zero);
916  for (size_t k = 0; k < Teuchos::as<size_t>(aggSize); k++)
917  norm += STS::magnitude(localQR(k, 0) * localQR(k, 0));
919 
920  // R = norm
921  coarseNS[0][offset] = norm;
922 
923  // Q = localQR(:,0)/norm
924  for (LO i = 0; i < aggSize; i++)
925  localQR(i, 0) /= norm;
926 
927  } else {
929  qrSolver.setMatrix(Teuchos::rcp(&localQR, false));
930  qrSolver.factor();
931 
932  // R = upper triangular part of localQR
933  for (size_t j = 0; j < NSDim; j++)
934  for (size_t k = 0; k <= j; k++)
935  coarseNS[j][offset + k] = localQR(k, j); // TODO is offset+k the correct local ID?!
936 
937  // Calculate Q, the tentative prolongator.
938  // The Lapack GEQRF call only works for myAggsize >= NSDim
939  qrSolver.formQ();
941  for (size_t j = 0; j < NSDim; j++)
942  for (size_t i = 0; i < Teuchos::as<size_t>(aggSize); i++)
943  localQR(i, j) = (*qFactor)(i, j);
944  }
945 
946  } else {
947  // Special handling for aggSize < NSDim (i.e. single node aggregates in structural mechanics)
948 
949  // The local QR decomposition is not possible in the "overconstrained"
950  // case (i.e. number of columns in localQR > number of rows), which
951  // corresponds to #DOFs in Aggregate < NSDim. For usual problems this
952  // is only possible for single node aggregates in structural mechanics.
953  // (Similar problems may arise in discontinuous Galerkin problems...)
954  // We bypass the QR decomposition and use an identity block in the
955  // tentative prolongator for the single node aggregate and transfer the
956  // corresponding fine level null space information 1-to-1 to the coarse
957  // level null space part.
958 
959  // NOTE: The resulting tentative prolongation operator has
960  // (aggSize*DofsPerNode-NSDim) zero columns leading to a singular
961  // coarse level operator A. To deal with that one has the following
962  // options:
963  // - Use the "RepairMainDiagonal" flag in the RAPFactory (default:
964  // false) to add some identity block to the diagonal of the zero rows
965  // in the coarse level operator A, such that standard level smoothers
966  // can be used again.
967  // - Use special (projection-based) level smoothers, which can deal
968  // with singular matrices (very application specific)
969  // - Adapt the code below to avoid zero columns. However, we do not
970  // support a variable number of DOFs per node in MueLu/Xpetra which
971  // makes the implementation really hard.
972 
973  // R = extended (by adding identity rows) localQR
974  for (size_t j = 0; j < NSDim; j++)
975  for (size_t k = 0; k < NSDim; k++)
976  if (k < as<size_t>(aggSize))
977  coarseNS[j][offset + k] = localQR(k, j);
978  else
979  coarseNS[j][offset + k] = (k == j ? one : zero);
980 
981  // Q = I (rectangular)
982  for (size_t i = 0; i < as<size_t>(aggSize); i++)
983  for (size_t j = 0; j < NSDim; j++)
984  localQR(i, j) = (j == i ? one : zero);
985  }
986 
987  // Process each row in the local Q factor
988  // FIXME: What happens if maps are blocked?
989  for (LO j = 0; j < aggSize; j++) {
990  LO localRow = (goodMap ? aggToRowMapLO[aggStart[agg] + j] : rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + j]));
991 
992  size_t rowStart = ia[localRow];
993  for (size_t k = 0, lnnz = 0; k < NSDim; k++) {
994  // Skip zeros (there may be plenty of them, i.e., NSDim > 1 or boundary conditions)
995  if (localQR(j, k) != zero) {
996  ja[rowStart + lnnz] = offset + k;
997  val[rowStart + lnnz] = localQR(j, k);
998  lnnz++;
999  }
1000  }
1001  }
1002  }
1003 
1004  } else {
1005  GetOStream(Runtime1) << "TentativePFactory : bypassing local QR phase" << std::endl;
1006  if (NSDim > 1)
1007  GetOStream(Warnings0) << "TentativePFactory : for nontrivial nullspace, this may degrade performance" << std::endl;
1009  // "no-QR" option //
1011  // Local Q factor is just the fine nullspace support over the current aggregate.
1012  // Local R factor is the identity.
1013  // TODO I have not implemented any special handling for aggregates that are too
1014  // TODO small to locally support the nullspace, as is done in the standard QR
1015  // TODO case above.
1016  if (goodMap) {
1017  for (GO agg = 0; agg < numAggs; agg++) {
1018  const LO aggSize = aggStart[agg + 1] - aggStart[agg];
1019  Xpetra::global_size_t offset = agg * NSDim;
1020 
1021  // Process each row in the local Q factor
1022  // FIXME: What happens if maps are blocked?
1023  for (LO j = 0; j < aggSize; j++) {
1024  // TODO Here I do not check for a zero nullspace column on the aggregate.
1025  // as is done in the standard QR case.
1026 
1027  const LO localRow = aggToRowMapLO[aggStart[agg] + j];
1028 
1029  const size_t rowStart = ia[localRow];
1030 
1031  for (size_t k = 0, lnnz = 0; k < NSDim; k++) {
1032  // Skip zeros (there may be plenty of them, i.e., NSDim > 1 or boundary conditions)
1033  SC qr_jk = fineNS[k][aggToRowMapLO[aggStart[agg] + j]];
1034  if (constantColSums) qr_jk = qr_jk / (Magnitude)aggSizes[agg];
1035  if (qr_jk != zero) {
1036  ja[rowStart + lnnz] = offset + k;
1037  val[rowStart + lnnz] = qr_jk;
1038  lnnz++;
1039  }
1040  }
1041  }
1042  for (size_t j = 0; j < NSDim; j++)
1043  coarseNS[j][offset + j] = one;
1044  } // for (GO agg = 0; agg < numAggs; agg++)
1045 
1046  } else {
1047  for (GO agg = 0; agg < numAggs; agg++) {
1048  const LO aggSize = aggStart[agg + 1] - aggStart[agg];
1049  Xpetra::global_size_t offset = agg * NSDim;
1050  for (LO j = 0; j < aggSize; j++) {
1051  const LO localRow = rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + j]);
1052 
1053  const size_t rowStart = ia[localRow];
1054 
1055  for (size_t k = 0, lnnz = 0; k < NSDim; ++k) {
1056  // Skip zeros (there may be plenty of them, i.e., NSDim > 1 or boundary conditions)
1057  SC qr_jk = fineNS[k][rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + j])];
1058  if (constantColSums) qr_jk = qr_jk / (Magnitude)aggSizes[agg];
1059  if (qr_jk != zero) {
1060  ja[rowStart + lnnz] = offset + k;
1061  val[rowStart + lnnz] = qr_jk;
1062  lnnz++;
1063  }
1064  }
1065  }
1066  for (size_t j = 0; j < NSDim; j++)
1067  coarseNS[j][offset + j] = one;
1068  } // for (GO agg = 0; agg < numAggs; agg++)
1069 
1070  } // if (goodmap) else ...
1071 
1072  } // if doQRStep ... else
1073 
1074  // Compress storage (remove all INVALID, which happen when we skip zeros)
1075  // We do that in-place
1076  size_t ia_tmp = 0, nnz = 0;
1077  for (size_t i = 0; i < numRows; i++) {
1078  for (size_t j = ia_tmp; j < ia[i + 1]; j++)
1079  if (ja[j] != INVALID) {
1080  ja[nnz] = ja[j];
1081  val[nnz] = val[j];
1082  nnz++;
1083  }
1084  ia_tmp = ia[i + 1];
1085  ia[i + 1] = nnz;
1086  }
1087  if (rowMap->lib() == Xpetra::UseTpetra) {
1088  // - Cannot resize for Epetra, as it checks for same pointers
1089  // - Need to resize for Tpetra, as it check ().size() == ia[numRows]
1090  // NOTE: these invalidate ja and val views
1091  jaPtent.resize(nnz);
1092  valPtent.resize(nnz);
1093  }
1094 
1095  GetOStream(Runtime1) << "TentativePFactory : aggregates do not cross process boundaries" << std::endl;
1096 
1097  PtentCrs->setAllValues(iaPtent, jaPtent, valPtent);
1098 
1099  // Managing labels & constants for ESFC
1100  RCP<ParameterList> FCparams;
1101  if (pL.isSublist("matrixmatrix: kernel params"))
1102  FCparams = rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
1103  else
1104  FCparams = rcp(new ParameterList);
1105  // By default, we don't need global constants for TentativeP
1106  FCparams->set("compute global constants", FCparams->get("compute global constants", false));
1107  std::string levelIDs = toString(levelID);
1108  FCparams->set("Timer Label", std::string("MueLu::TentativeP-") + levelIDs);
1109  RCP<const Export> dummy_e;
1110  RCP<const Import> dummy_i;
1111 
1112  PtentCrs->expertStaticFillComplete(coarseMap, A->getDomainMap(), dummy_i, dummy_e, FCparams);
1113 }
1114 
1115 } // namespace MueLu
1116 
1117 // TODO ReUse: If only P or Nullspace is missing, TentativePFactory can be smart and skip part of the computation.
1118 
1119 #define MUELU_TENTATIVEPFACTORY_SHORT
1120 #endif // MUELU_TENTATIVEPFACTORY_DEF_HPP
void BuildPuncoupled(RCP< Matrix > A, RCP< Aggregates > aggregates, RCP< AmalgamationInfo > amalgInfo, RCP< MultiVector > fineNullspace, RCP< const Map > coarseMap, RCP< Matrix > &Ptentative, RCP< MultiVector > &coarseNullspace, const int levelID) const
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
Important warning messages (one line)
void reserve(size_type n)
MueLu::DefaultLocalOrdinal LocalOrdinal
int setMatrix(const RCP< SerialDenseMatrix< OrdinalType, ScalarType > > &A)
static bool MapsAreNested(const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &rowMap, const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &colMap)
void BuildPcoupled(RCP< Matrix > A, RCP< Aggregates > aggregates, RCP< AmalgamationInfo > amalgInfo, RCP< MultiVector > fineNullspace, RCP< const Map > coarseMap, RCP< Matrix > &Ptentative, RCP< MultiVector > &coarseNullspace) const
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
GlobalOrdinal GO
T & get(const std::string &name, T def_value)
void UnamalgamateAggregates(const Aggregates &aggregates, Teuchos::ArrayRCP< LocalOrdinal > &aggStart, Teuchos::ArrayRCP< GlobalOrdinal > &aggToRowMap) const
UnamalgamateAggregates.
ArrayView< T > view(size_type offset, size_type size)
static T squareroot(T x)
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
size_type size() const
void UnamalgamateAggregatesLO(const Aggregates &aggregates, Teuchos::ArrayRCP< LocalOrdinal > &aggStart, Teuchos::ArrayRCP< LO > &aggToRowMap) const
LocalOrdinal LO
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
size_type size() const
static const NoFactory * get()
Print even more statistics.
#define SET_VALID_ENTRY(name)
void resize(const size_type n, const T &val=T())
bool isParameter(const std::string &name) const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:63
Teuchos::RCP< Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > ComputeUnamalgamatedImportDofMap(const Aggregates &aggregates) const
ComputeUnamalgamatedImportDofMap build overlapping dof row map from aggregates needed for overlapping...
bool isSublist(const std::string &name) const
Teuchos::ArrayRCP< LocalOrdinal > ComputeAggregateSizesArrayRCP(bool forceRecompute=false) const
Compute sizes of aggregates.
void resize(size_type new_size, const value_type &x=value_type())
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
size_t global_size_t
static bool isnaninf(const T &x)
static RCP< Vector > Build(const Teuchos::RCP< const Map > &map, bool zeroOut=true)
iterator end()
static magnitudeType magnitude(T a)
void push_back(const value_type &x)
Scalar SC
ParameterList & sublist(const std::string &name, bool mustAlreadyExist=false, const std::string &docString="")
void BuildPuncoupledBlockCrs(RCP< Matrix > A, RCP< Aggregates > aggregates, RCP< AmalgamationInfo > amalgInfo, RCP< MultiVector > fineNullspace, RCP< const Map > coarseMap, RCP< Matrix > &Ptentative, RCP< MultiVector > &coarseNullspace, const int levelID) const
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:51
RCP< SerialDenseMatrix< OrdinalType, ScalarType > > getQ() const
Exception throws to report errors in the internal logical of the program.
iterator end() const
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
Description of what is happening (more verbose)
iterator begin()
iterator begin() const
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need&#39;s value has been saved.
bool is_null() const