MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_TentativePFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_TENTATIVEPFACTORY_DEF_HPP
47 #define MUELU_TENTATIVEPFACTORY_DEF_HPP
48 
49 #include <Xpetra_MapFactory.hpp>
50 #include <Xpetra_Map.hpp>
51 #include <Xpetra_CrsMatrix.hpp>
53 #include <Xpetra_Matrix.hpp>
54 #include <Xpetra_MatrixMatrix.hpp>
55 #include <Xpetra_MultiVector.hpp>
56 #include <Xpetra_MultiVectorFactory.hpp>
57 #include <Xpetra_VectorFactory.hpp>
58 #include <Xpetra_Import.hpp>
59 #include <Xpetra_ImportFactory.hpp>
60 #include <Xpetra_CrsMatrixWrap.hpp>
61 #include <Xpetra_StridedMap.hpp>
62 #include <Xpetra_StridedMapFactory.hpp>
63 #include <Xpetra_IO.hpp>
64 
65 #include "Xpetra_TpetraBlockCrsMatrix.hpp"
66 
68 
69 #include "MueLu_Aggregates.hpp"
70 #include "MueLu_AmalgamationInfo.hpp"
71 #include "MueLu_MasterList.hpp"
72 #include "MueLu_Monitor.hpp"
73 #include "MueLu_PerfUtils.hpp"
74 #include "MueLu_Utilities.hpp"
75 
76 namespace MueLu {
77 
78 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
80  RCP<ParameterList> validParamList = rcp(new ParameterList());
81 
82 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
83  SET_VALID_ENTRY("tentative: calculate qr");
84  SET_VALID_ENTRY("tentative: build coarse coordinates");
85  SET_VALID_ENTRY("tentative: constant column sums");
86 #undef SET_VALID_ENTRY
87  validParamList->set<std::string>("Nullspace name", "Nullspace", "Name for the input nullspace");
88 
89  validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
90  validParamList->set<RCP<const FactoryBase> >("Aggregates", Teuchos::null, "Generating factory of the aggregates");
91  validParamList->set<RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
92  validParamList->set<RCP<const FactoryBase> >("Scaled Nullspace", Teuchos::null, "Generating factory of the scaled nullspace");
93  validParamList->set<RCP<const FactoryBase> >("UnAmalgamationInfo", Teuchos::null, "Generating factory of UnAmalgamationInfo");
94  validParamList->set<RCP<const FactoryBase> >("CoarseMap", Teuchos::null, "Generating factory of the coarse map");
95  validParamList->set<RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory of the coordinates");
96  validParamList->set<RCP<const FactoryBase> >("Node Comm", Teuchos::null, "Generating factory of the node level communicator");
97 
98  // Make sure we don't recursively validate options for the matrixmatrix kernels
99  ParameterList norecurse;
100  norecurse.disableRecursiveValidation();
101  validParamList->set<ParameterList>("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters");
102 
103  return validParamList;
104 }
105 
106 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
108  const ParameterList& pL = GetParameterList();
109  // NOTE: This guy can only either be 'Nullspace' or 'Scaled Nullspace' or else the validator above will cause issues
110  std::string nspName = "Nullspace";
111  if (pL.isParameter("Nullspace name")) nspName = pL.get<std::string>("Nullspace name");
112 
113  Input(fineLevel, "A");
114  Input(fineLevel, "Aggregates");
115  Input(fineLevel, nspName);
116  Input(fineLevel, "UnAmalgamationInfo");
117  Input(fineLevel, "CoarseMap");
118  if (fineLevel.GetLevelID() == 0 &&
119  fineLevel.IsAvailable("Coordinates", NoFactory::get()) && // we have coordinates (provided by user app)
120  pL.get<bool>("tentative: build coarse coordinates")) { // and we want coordinates on other levels
121  bTransferCoordinates_ = true; // then set the transfer coordinates flag to true
122  Input(fineLevel, "Coordinates");
123  } else if (bTransferCoordinates_) {
124  Input(fineLevel, "Coordinates");
125  }
126 }
127 
128 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
130  return BuildP(fineLevel, coarseLevel);
131 }
132 
133 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
135  FactoryMonitor m(*this, "Build", coarseLevel);
136  typedef typename Teuchos::ScalarTraits<Scalar>::coordinateType coordinate_type;
137  typedef Xpetra::MultiVector<coordinate_type, LO, GO, NO> RealValuedMultiVector;
138  typedef Xpetra::MultiVectorFactory<coordinate_type, LO, GO, NO> RealValuedMultiVectorFactory;
139 
140  const ParameterList& pL = GetParameterList();
141  std::string nspName = "Nullspace";
142  if (pL.isParameter("Nullspace name")) nspName = pL.get<std::string>("Nullspace name");
143 
144  RCP<Matrix> Ptentative;
145  RCP<Matrix> A = Get<RCP<Matrix> >(fineLevel, "A");
146  RCP<Aggregates> aggregates = Get<RCP<Aggregates> >(fineLevel, "Aggregates");
147  // No coarse DoFs so we need to bail by setting Ptentattive to null and returning
148  // This level will ultimately be removed in MueLu_Hierarchy_defs.h via a resize()
149  if (aggregates->GetNumGlobalAggregatesComputeIfNeeded() == 0) {
150  Ptentative = Teuchos::null;
151  Set(coarseLevel, "P", Ptentative);
152  return;
153  }
154 
155  RCP<AmalgamationInfo> amalgInfo = Get<RCP<AmalgamationInfo> >(fineLevel, "UnAmalgamationInfo");
156  RCP<MultiVector> fineNullspace = Get<RCP<MultiVector> >(fineLevel, nspName);
157  RCP<const Map> coarseMap = Get<RCP<const Map> >(fineLevel, "CoarseMap");
158  RCP<RealValuedMultiVector> fineCoords;
159  if (bTransferCoordinates_) {
160  fineCoords = Get<RCP<RealValuedMultiVector> >(fineLevel, "Coordinates");
161  }
162 
163  // FIXME: We should remove the NodeComm on levels past the threshold
164  if (fineLevel.IsAvailable("Node Comm")) {
165  RCP<const Teuchos::Comm<int> > nodeComm = Get<RCP<const Teuchos::Comm<int> > >(fineLevel, "Node Comm");
166  Set<RCP<const Teuchos::Comm<int> > >(coarseLevel, "Node Comm", nodeComm);
167  }
168 
169  // NOTE: We check DomainMap here rather than RowMap because those are different for BlockCrs matrices
170  TEUCHOS_TEST_FOR_EXCEPTION(A->getDomainMap()->getLocalNumElements() != fineNullspace->getMap()->getLocalNumElements(),
171  Exceptions::RuntimeError, "MueLu::TentativePFactory::MakeTentative: Size mismatch between A and Nullspace");
172 
173  RCP<MultiVector> coarseNullspace;
174  RCP<RealValuedMultiVector> coarseCoords;
175 
176  if (bTransferCoordinates_) {
177  //*** Create the coarse coordinates ***
178  // First create the coarse map and coarse multivector
179  ArrayView<const GO> elementAList = coarseMap->getLocalElementList();
180  LO blkSize = 1;
181  if (rcp_dynamic_cast<const StridedMap>(coarseMap) != Teuchos::null) {
182  blkSize = rcp_dynamic_cast<const StridedMap>(coarseMap)->getFixedBlockSize();
183  }
184  GO indexBase = coarseMap->getIndexBase();
185  LO numCoarseNodes = Teuchos::as<LO>(elementAList.size() / blkSize);
186  Array<GO> nodeList(numCoarseNodes);
187  const int numDimensions = fineCoords->getNumVectors();
188 
189  for (LO i = 0; i < numCoarseNodes; i++) {
190  nodeList[i] = (elementAList[i * blkSize] - indexBase) / blkSize + indexBase;
191  }
192  RCP<const Map> coarseCoordsMap = MapFactory::Build(fineCoords->getMap()->lib(),
194  nodeList,
195  indexBase,
196  fineCoords->getMap()->getComm());
197  coarseCoords = RealValuedMultiVectorFactory::Build(coarseCoordsMap, numDimensions);
198 
199  // Create overlapped fine coordinates to reduce global communication
200  RCP<RealValuedMultiVector> ghostedCoords;
201  if (aggregates->AggregatesCrossProcessors()) {
202  RCP<const Map> aggMap = aggregates->GetMap();
203  RCP<const Import> importer = ImportFactory::Build(fineCoords->getMap(), aggMap);
204 
205  ghostedCoords = RealValuedMultiVectorFactory::Build(aggMap, numDimensions);
206  ghostedCoords->doImport(*fineCoords, *importer, Xpetra::INSERT);
207  } else {
208  ghostedCoords = fineCoords;
209  }
210 
211  // Get some info about aggregates
212  int myPID = coarseCoordsMap->getComm()->getRank();
213  LO numAggs = aggregates->GetNumAggregates();
214  ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizesArrayRCP();
215  const ArrayRCP<const LO> vertex2AggID = aggregates->GetVertex2AggId()->getData(0);
216  const ArrayRCP<const LO> procWinner = aggregates->GetProcWinner()->getData(0);
217 
218  // Fill in coarse coordinates
219  for (int dim = 0; dim < numDimensions; ++dim) {
220  ArrayRCP<const coordinate_type> fineCoordsData = ghostedCoords->getData(dim);
221  ArrayRCP<coordinate_type> coarseCoordsData = coarseCoords->getDataNonConst(dim);
222 
223  for (LO lnode = 0; lnode < Teuchos::as<LO>(vertex2AggID.size()); lnode++) {
224  if (procWinner[lnode] == myPID &&
225  lnode < fineCoordsData.size() &&
226  vertex2AggID[lnode] < coarseCoordsData.size() &&
227  Teuchos::ScalarTraits<coordinate_type>::isnaninf(fineCoordsData[lnode]) == false) {
228  coarseCoordsData[vertex2AggID[lnode]] += fineCoordsData[lnode];
229  }
230  }
231  for (LO agg = 0; agg < numAggs; agg++) {
232  coarseCoordsData[agg] /= aggSizes[agg];
233  }
234  }
235  }
236 
237  if (!aggregates->AggregatesCrossProcessors()) {
239  BuildPuncoupledBlockCrs(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace, coarseLevel.GetLevelID());
240  } else {
241  BuildPuncoupled(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace, coarseLevel.GetLevelID());
242  }
243  } else
244  BuildPcoupled(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace);
245 
246  // If available, use striding information of fine level matrix A for range
247  // map and coarseMap as domain map; otherwise use plain range map of
248  // Ptent = plain range map of A for range map and coarseMap as domain map.
249  // NOTE:
250  // The latter is not really safe, since there is no striding information
251  // for the range map. This is not really a problem, since striding
252  // information is always available on the intermedium levels and the
253  // coarsest levels.
254  if (A->IsView("stridedMaps") == true)
255  Ptentative->CreateView("stridedMaps", A->getRowMap("stridedMaps"), coarseMap);
256  else
257  Ptentative->CreateView("stridedMaps", Ptentative->getRangeMap(), coarseMap);
258 
259  if (bTransferCoordinates_) {
260  Set(coarseLevel, "Coordinates", coarseCoords);
261  }
262  Set(coarseLevel, "Nullspace", coarseNullspace);
263  Set(coarseLevel, "P", Ptentative);
264 
265  if (IsPrint(Statistics2)) {
266  RCP<ParameterList> params = rcp(new ParameterList());
267  params->set("printLoadBalancingInfo", true);
268  GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*Ptentative, "Ptent", params);
269  }
270 }
271 
272 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
275  RCP<const Map> coarsePointMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace, const int levelID) const {
276  /* This routine generates a BlockCrs P for a BlockCrs A. There are a few assumptions here, which meet the use cases we care about, but could
277  be generalized later, if we ever need to do so:
278  1) Null space dimension === block size of matrix: So no elasticity right now
279  2) QR is not supported: Under assumption #1, this shouldn't cause problems.
280  3) Maps are "good": Aka the first chunk of the ColMap is the RowMap.
281 
282  These assumptions keep our code way simpler and still support the use cases we actually care about.
283  */
284 
285  RCP<const Map> rowMap = A->getRowMap();
286  RCP<const Map> rangeMap = A->getRangeMap();
287  RCP<const Map> colMap = A->getColMap();
288  // const size_t numFinePointRows = rangeMap->getLocalNumElements();
289  const size_t numFineBlockRows = rowMap->getLocalNumElements();
290 
291  typedef Teuchos::ScalarTraits<SC> STS;
292  // typedef typename STS::magnitudeType Magnitude;
293  const SC zero = STS::zero();
294  const SC one = STS::one();
295  const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
296 
297  const GO numAggs = aggregates->GetNumAggregates();
298  const size_t NSDim = fineNullspace->getNumVectors();
299  ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizesArrayRCP();
300 
301  // Need to generate the coarse block map
302  // NOTE: We assume NSDim == block size here
303  // NOTE: We also assume that coarseMap has contiguous GIDs
304  // const size_t numCoarsePointRows = coarsePointMap->getLocalNumElements();
305  const size_t numCoarseBlockRows = coarsePointMap->getLocalNumElements() / NSDim;
306  RCP<const Map> coarseBlockMap = MapFactory::Build(coarsePointMap->lib(),
308  numCoarseBlockRows,
309  coarsePointMap->getIndexBase(),
310  coarsePointMap->getComm());
311  // Sanity checking
312  const ParameterList& pL = GetParameterList();
313  const bool& doQRStep = pL.get<bool>("tentative: calculate qr");
314  const bool& constantColSums = pL.get<bool>("tentative: constant column sums");
315 
316  TEUCHOS_TEST_FOR_EXCEPTION(doQRStep && constantColSums, Exceptions::RuntimeError,
317  "MueLu::TentativePFactory::MakeTentative: cannot use 'constant column sums' and 'calculate qr' at the same time");
318 
319  // The aggregates use the amalgamated column map, which in this case is what we want
320 
321  // Aggregates map is based on the amalgamated column map
322  // We can skip global-to-local conversion if LIDs in row map are
323  // same as LIDs in column map
324  bool goodMap = MueLu::Utilities<SC, LO, GO, NO>::MapsAreNested(*rowMap, *colMap);
325 
326  // Create a lookup table to determine the rows (fine DOFs) that belong to a given aggregate.
327  // aggStart is a pointer into aggToRowMapLO
328  // aggStart[i]..aggStart[i+1] are indices into aggToRowMapLO
329  // aggToRowMapLO[aggStart[i]]..aggToRowMapLO[aggStart[i+1]-1] are the DOFs in aggregate i
330  ArrayRCP<LO> aggStart;
331  ArrayRCP<LO> aggToRowMapLO;
332  ArrayRCP<GO> aggToRowMapGO;
333  if (goodMap) {
334  amalgInfo->UnamalgamateAggregatesLO(*aggregates, aggStart, aggToRowMapLO);
335  GetOStream(Runtime1) << "Column map is consistent with the row map, good." << std::endl;
336  } else {
337  throw std::runtime_error("TentativePFactory::PuncoupledBlockCrs: Inconsistent maps not currently supported");
338  }
339 
340  coarseNullspace = MultiVectorFactory::Build(coarsePointMap, NSDim);
341 
342  // Pull out the nullspace vectors so that we can have random access.
345  for (size_t i = 0; i < NSDim; i++) {
346  fineNS[i] = fineNullspace->getData(i);
347  if (coarsePointMap->getLocalNumElements() > 0)
348  coarseNS[i] = coarseNullspace->getDataNonConst(i);
349  }
350 
351  // BlockCrs requires that we build the (block) graph first, so let's do that...
352  // NOTE: Because we're assuming that the NSDim == BlockSize, we only have one
353  // block non-zero per row in the matrix;
354  RCP<CrsGraph> BlockGraph = CrsGraphFactory::Build(rowMap, coarseBlockMap, 0);
355  ArrayRCP<size_t> iaPtent;
356  ArrayRCP<LO> jaPtent;
357  BlockGraph->allocateAllIndices(numFineBlockRows, iaPtent, jaPtent);
358  ArrayView<size_t> ia = iaPtent();
359  ArrayView<LO> ja = jaPtent();
360 
361  for (size_t i = 0; i < numFineBlockRows; i++) {
362  ia[i] = i;
363  ja[i] = INVALID;
364  }
365  ia[numCoarseBlockRows] = numCoarseBlockRows;
366 
367  for (GO agg = 0; agg < numAggs; agg++) {
368  LO aggSize = aggStart[agg + 1] - aggStart[agg];
369  Xpetra::global_size_t offset = agg;
370 
371  for (LO j = 0; j < aggSize; j++) {
372  // FIXME: Allow for bad maps
373  const LO localRow = aggToRowMapLO[aggStart[agg] + j];
374  const size_t rowStart = ia[localRow];
375  ja[rowStart] = offset;
376  }
377  }
378 
379  // Compress storage (remove all INVALID, which happen when we skip zeros)
380  // We do that in-place
381  size_t ia_tmp = 0, nnz = 0;
382  for (size_t i = 0; i < numFineBlockRows; i++) {
383  for (size_t j = ia_tmp; j < ia[i + 1]; j++)
384  if (ja[j] != INVALID) {
385  ja[nnz] = ja[j];
386  nnz++;
387  }
388  ia_tmp = ia[i + 1];
389  ia[i + 1] = nnz;
390  }
391 
392  if (rowMap->lib() == Xpetra::UseTpetra) {
393  // - Cannot resize for Epetra, as it checks for same pointers
394  // - Need to resize for Tpetra, as it check ().size() == ia[numRows]
395  // NOTE: these invalidate ja and val views
396  jaPtent.resize(nnz);
397  }
398 
399  GetOStream(Runtime1) << "TentativePFactory : generating block graph" << std::endl;
400  BlockGraph->setAllIndices(iaPtent, jaPtent);
401 
402  // Managing labels & constants for ESFC
403  {
404  RCP<ParameterList> FCparams;
405  if (pL.isSublist("matrixmatrix: kernel params"))
406  FCparams = rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
407  else
408  FCparams = rcp(new ParameterList);
409  // By default, we don't need global constants for TentativeP, but we do want it for the graph
410  // if we're printing statistics, so let's leave it on for now.
411  FCparams->set("compute global constants", FCparams->get("compute global constants", true));
412  std::string levelIDs = toString(levelID);
413  FCparams->set("Timer Label", std::string("MueLu::TentativeP-") + levelIDs);
414  RCP<const Export> dummy_e;
415  RCP<const Import> dummy_i;
416  BlockGraph->expertStaticFillComplete(coarseBlockMap, rowMap, dummy_i, dummy_e, FCparams);
417  }
418 
419  // Now let's make a BlockCrs Matrix
420  // NOTE: Assumes block size== NSDim
421  RCP<Xpetra::CrsMatrix<SC, LO, GO, NO> > P_xpetra = Xpetra::CrsMatrixFactory<SC, LO, GO, NO>::BuildBlock(BlockGraph, coarsePointMap, rangeMap, NSDim);
423  if (P_tpetra.is_null()) throw std::runtime_error("BuildPUncoupled: Matrix factory did not return a Tpetra::BlockCrsMatrix");
424  RCP<CrsMatrixWrap> P_wrap = rcp(new CrsMatrixWrap(P_xpetra));
425 
427  // "no-QR" option //
429  // Local Q factor is just the fine nullspace support over the current aggregate.
430  // Local R factor is the identity.
431  // NOTE: We're not going to do a QR here as we're assuming that blocksize == NSDim
432  // NOTE: "goodMap" case only
433  Teuchos::Array<Scalar> block(NSDim * NSDim, zero);
434  Teuchos::Array<LO> bcol(1);
435 
436  GetOStream(Runtime1) << "TentativePFactory : bypassing local QR phase" << std::endl;
437  for (LO agg = 0; agg < numAggs; agg++) {
438  bcol[0] = agg;
439  const LO aggSize = aggStart[agg + 1] - aggStart[agg];
440  Xpetra::global_size_t offset = agg * NSDim;
441 
442  // Process each row in the local Q factor
443  // NOTE: Blocks are in row-major order
444  for (LO j = 0; j < aggSize; j++) {
445  const LO localBlockRow = aggToRowMapLO[aggStart[agg] + j];
446 
447  for (size_t r = 0; r < NSDim; r++) {
448  LO localPointRow = localBlockRow * NSDim + r;
449  for (size_t c = 0; c < NSDim; c++)
450  block[r * NSDim + c] = fineNS[c][localPointRow];
451  }
452  // NOTE: Assumes columns==aggs and are ordered sequentially
453  P_tpetra->replaceLocalValues(localBlockRow, bcol(), block());
454 
455  } // end aggSize
456 
457  for (size_t j = 0; j < NSDim; j++)
458  coarseNS[j][offset + j] = one;
459 
460  } // for (GO agg = 0; agg < numAggs; agg++)
461 
462  Ptentative = P_wrap;
463 }
464 
465 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
468  RCP<const Map> coarseMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace) const {
469  typedef Teuchos::ScalarTraits<SC> STS;
470  typedef typename STS::magnitudeType Magnitude;
471  const SC zero = STS::zero();
472  const SC one = STS::one();
473 
474  // number of aggregates
475  GO numAggs = aggregates->GetNumAggregates();
476 
477  // Create a lookup table to determine the rows (fine DOFs) that belong to a given aggregate.
478  // aggStart is a pointer into aggToRowMap
479  // aggStart[i]..aggStart[i+1] are indices into aggToRowMap
480  // aggToRowMap[aggStart[i]]..aggToRowMap[aggStart[i+1]-1] are the DOFs in aggregate i
481  ArrayRCP<LO> aggStart;
482  ArrayRCP<GO> aggToRowMap;
483  amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMap);
484 
485  // find size of the largest aggregate
486  LO maxAggSize = 0;
487  for (GO i = 0; i < numAggs; ++i) {
488  LO sizeOfThisAgg = aggStart[i + 1] - aggStart[i];
489  if (sizeOfThisAgg > maxAggSize) maxAggSize = sizeOfThisAgg;
490  }
491 
492  // dimension of fine level nullspace
493  const size_t NSDim = fineNullspace->getNumVectors();
494 
495  // index base for coarse Dof map (usually 0)
496  GO indexBase = A->getRowMap()->getIndexBase();
497 
498  const RCP<const Map> nonUniqueMap = amalgInfo->ComputeUnamalgamatedImportDofMap(*aggregates);
499  const RCP<const Map> uniqueMap = A->getDomainMap();
500  RCP<const Import> importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
501  RCP<MultiVector> fineNullspaceWithOverlap = MultiVectorFactory::Build(nonUniqueMap, NSDim);
502  fineNullspaceWithOverlap->doImport(*fineNullspace, *importer, Xpetra::INSERT);
503 
504  // Pull out the nullspace vectors so that we can have random access.
506  for (size_t i = 0; i < NSDim; ++i)
507  fineNS[i] = fineNullspaceWithOverlap->getData(i);
508 
509  // Allocate storage for the coarse nullspace.
510  coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
511 
513  for (size_t i = 0; i < NSDim; ++i)
514  if (coarseMap->getLocalNumElements() > 0) coarseNS[i] = coarseNullspace->getDataNonConst(i);
515 
516  // This makes the rowmap of Ptent the same as that of A->
517  // This requires moving some parts of some local Q's to other processors
518  // because aggregates can span processors.
519  RCP<const Map> rowMapForPtent = A->getRowMap();
520  const Map& rowMapForPtentRef = *rowMapForPtent;
521 
522  // Set up storage for the rows of the local Qs that belong to other processors.
523  // FIXME This is inefficient and could be done within the main loop below with std::vector's.
524  RCP<const Map> colMap = A->getColMap();
525 
526  RCP<const Map> ghostQMap;
527  RCP<MultiVector> ghostQvalues;
529  RCP<Xpetra::Vector<GO, LO, GO, Node> > ghostQrowNums;
530  ArrayRCP<ArrayRCP<SC> > ghostQvals;
531  ArrayRCP<ArrayRCP<GO> > ghostQcols;
532  ArrayRCP<GO> ghostQrows;
533 
534  Array<GO> ghostGIDs;
535  for (LO j = 0; j < numAggs; ++j) {
536  for (LO k = aggStart[j]; k < aggStart[j + 1]; ++k) {
537  if (rowMapForPtentRef.isNodeGlobalElement(aggToRowMap[k]) == false) {
538  ghostGIDs.push_back(aggToRowMap[k]);
539  }
540  }
541  }
542  ghostQMap = MapFactory::Build(A->getRowMap()->lib(),
544  ghostGIDs,
545  indexBase, A->getRowMap()->getComm()); // JG:Xpetra::global_size_t>?
546  // Vector to hold bits of Q that go to other processors.
547  ghostQvalues = MultiVectorFactory::Build(ghostQMap, NSDim);
548  // Note that Epetra does not support MultiVectors templated on Scalar != double.
549  // So to work around this, we allocate an array of Vectors. This shouldn't be too
550  // expensive, as the number of Vectors is NSDim.
551  ghostQcolumns.resize(NSDim);
552  for (size_t i = 0; i < NSDim; ++i)
553  ghostQcolumns[i] = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(ghostQMap);
554  ghostQrowNums = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(ghostQMap);
555  if (ghostQvalues->getLocalLength() > 0) {
556  ghostQvals.resize(NSDim);
557  ghostQcols.resize(NSDim);
558  for (size_t i = 0; i < NSDim; ++i) {
559  ghostQvals[i] = ghostQvalues->getDataNonConst(i);
560  ghostQcols[i] = ghostQcolumns[i]->getDataNonConst(0);
561  }
562  ghostQrows = ghostQrowNums->getDataNonConst(0);
563  }
564 
565  // importer to handle moving Q
566  importer = ImportFactory::Build(ghostQMap, A->getRowMap());
567 
568  // Dense QR solver
570 
571  // Allocate temporary storage for the tentative prolongator.
572  Array<GO> globalColPtr(maxAggSize * NSDim, 0);
573  Array<LO> localColPtr(maxAggSize * NSDim, 0);
574  Array<SC> valPtr(maxAggSize * NSDim, 0.);
575 
576  // Create column map for Ptent, estimate local #nonzeros in Ptent, and create Ptent itself.
577  const Map& coarseMapRef = *coarseMap;
578 
579  // For the 3-arrays constructor
580  ArrayRCP<size_t> ptent_rowptr;
581  ArrayRCP<LO> ptent_colind;
582  ArrayRCP<Scalar> ptent_values;
583 
584  // Because ArrayRCPs are slow...
585  ArrayView<size_t> rowptr_v;
586  ArrayView<LO> colind_v;
587  ArrayView<Scalar> values_v;
588 
589  // For temporary usage
590  Array<size_t> rowptr_temp;
591  Array<LO> colind_temp;
592  Array<Scalar> values_temp;
593 
594  RCP<CrsMatrix> PtentCrs;
595 
596  RCP<CrsMatrixWrap> PtentCrsWrap = rcp(new CrsMatrixWrap(rowMapForPtent, NSDim));
597  PtentCrs = PtentCrsWrap->getCrsMatrix();
598  Ptentative = PtentCrsWrap;
599 
600  //*****************************************************************
601  // Loop over all aggregates and calculate local QR decompositions.
602  //*****************************************************************
603  GO qctr = 0; // for indexing into Ptent data vectors
604  const Map& nonUniqueMapRef = *nonUniqueMap;
605 
606  size_t total_nnz_count = 0;
607 
608  for (GO agg = 0; agg < numAggs; ++agg) {
609  LO myAggSize = aggStart[agg + 1] - aggStart[agg];
610  // For each aggregate, extract the corresponding piece of the nullspace and put it in the flat array,
611  // "localQR" (in column major format) for the QR routine.
612  Teuchos::SerialDenseMatrix<LO, SC> localQR(myAggSize, NSDim);
613  for (size_t j = 0; j < NSDim; ++j) {
614  bool bIsZeroNSColumn = true;
615  for (LO k = 0; k < myAggSize; ++k) {
616  // aggToRowMap[aggPtr[i]+k] is the kth DOF in the ith aggregate
617  // fineNS[j][n] is the nth entry in the jth NS vector
618  try {
619  SC nsVal = fineNS[j][nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg] + k])]; // extract information from fine level NS
620  localQR(k, j) = nsVal;
621  if (nsVal != zero) bIsZeroNSColumn = false;
622  } catch (...) {
623  GetOStream(Runtime1, -1) << "length of fine level nsp: " << fineNullspace->getGlobalLength() << std::endl;
624  GetOStream(Runtime1, -1) << "length of fine level nsp w overlap: " << fineNullspaceWithOverlap->getGlobalLength() << std::endl;
625  GetOStream(Runtime1, -1) << "(local?) aggId=" << agg << std::endl;
626  GetOStream(Runtime1, -1) << "aggSize=" << myAggSize << std::endl;
627  GetOStream(Runtime1, -1) << "agg DOF=" << k << std::endl;
628  GetOStream(Runtime1, -1) << "NS vector j=" << j << std::endl;
629  GetOStream(Runtime1, -1) << "j*myAggSize + k = " << j * myAggSize + k << std::endl;
630  GetOStream(Runtime1, -1) << "aggToRowMap[" << agg << "][" << k << "] = " << aggToRowMap[aggStart[agg] + k] << std::endl;
631  GetOStream(Runtime1, -1) << "id aggToRowMap[agg][k]=" << aggToRowMap[aggStart[agg] + k] << " is global element in nonUniqueMap = " << nonUniqueMapRef.isNodeGlobalElement(aggToRowMap[aggStart[agg] + k]) << std::endl;
632  GetOStream(Runtime1, -1) << "colMap local id aggToRowMap[agg][k]=" << nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg] + k]) << std::endl;
633  GetOStream(Runtime1, -1) << "fineNS...=" << fineNS[j][nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg] + k])] << std::endl;
634  GetOStream(Errors, -1) << "caught an error!" << std::endl;
635  }
636  } // for (LO k=0 ...
637  TEUCHOS_TEST_FOR_EXCEPTION(bIsZeroNSColumn == true, Exceptions::RuntimeError, "MueLu::TentativePFactory::MakeTentative: fine level NS part has a zero column. Error.");
638  } // for (LO j=0 ...
639 
640  Xpetra::global_size_t offset = agg * NSDim;
641 
642  if (myAggSize >= Teuchos::as<LocalOrdinal>(NSDim)) {
643  // calculate QR decomposition (standard)
644  // R is stored in localQR (size: myAggSize x NSDim)
645 
646  // Householder multiplier
647  SC tau = localQR(0, 0);
648 
649  if (NSDim == 1) {
650  // Only one nullspace vector, so normalize by hand
651  Magnitude dtemp = 0;
652  for (size_t k = 0; k < Teuchos::as<size_t>(myAggSize); ++k) {
653  Magnitude tmag = STS::magnitude(localQR(k, 0));
654  dtemp += tmag * tmag;
655  }
657  tau = localQR(0, 0);
658  localQR(0, 0) = dtemp;
659  } else {
660  qrSolver.setMatrix(Teuchos::rcp(&localQR, false));
661  qrSolver.factor();
662  }
663 
664  // Extract R, the coarse nullspace. This is stored in upper triangular part of localQR.
665  // Note: coarseNS[i][.] is the ith coarse nullspace vector, which may be counter to your intuition.
666  // This stores the (offset+k)th entry only if it is local according to the coarseMap.
667  for (size_t j = 0; j < NSDim; ++j) {
668  for (size_t k = 0; k <= j; ++k) {
669  try {
670  if (coarseMapRef.isNodeLocalElement(offset + k)) {
671  coarseNS[j][offset + k] = localQR(k, j); // TODO is offset+k the correct local ID?!
672  }
673  } catch (...) {
674  GetOStream(Errors, -1) << "caught error in coarseNS insert, j=" << j << ", offset+k = " << offset + k << std::endl;
675  }
676  }
677  }
678 
679  // Calculate Q, the tentative prolongator.
680  // The Lapack GEQRF call only works for myAggsize >= NSDim
681 
682  if (NSDim == 1) {
683  // Only one nullspace vector, so calculate Q by hand
684  Magnitude dtemp = Teuchos::ScalarTraits<SC>::magnitude(localQR(0, 0));
685  localQR(0, 0) = tau;
686  dtemp = 1 / dtemp;
687  for (LocalOrdinal i = 0; i < myAggSize; ++i) {
688  localQR(i, 0) *= dtemp;
689  }
690  } else {
691  qrSolver.formQ();
692  Teuchos::RCP<Teuchos::SerialDenseMatrix<LO, SC> > qFactor = qrSolver.getQ();
693  for (size_t j = 0; j < NSDim; j++) {
694  for (size_t i = 0; i < Teuchos::as<size_t>(myAggSize); i++) {
695  localQR(i, j) = (*qFactor)(i, j);
696  }
697  }
698  }
699 
700  // end default case (myAggSize >= NSDim)
701  } else { // special handling for myAggSize < NSDim (i.e. 1pt nodes)
702  // See comments for the uncoupled case
703 
704  // R = extended (by adding identity rows) localQR
705  for (size_t j = 0; j < NSDim; j++)
706  for (size_t k = 0; k < NSDim; k++) {
707  TEUCHOS_TEST_FOR_EXCEPTION(!coarseMapRef.isNodeLocalElement(offset + k), Exceptions::RuntimeError,
708  "Caught error in coarseNS insert, j=" << j << ", offset+k = " << offset + k);
709 
710  if (k < as<size_t>(myAggSize))
711  coarseNS[j][offset + k] = localQR(k, j);
712  else
713  coarseNS[j][offset + k] = (k == j ? one : zero);
714  }
715 
716  // Q = I (rectangular)
717  for (size_t i = 0; i < as<size_t>(myAggSize); i++)
718  for (size_t j = 0; j < NSDim; j++)
719  localQR(i, j) = (j == i ? one : zero);
720  } // end else (special handling for 1pt aggregates)
721 
722  // Process each row in the local Q factor. If the row is local to the current processor
723  // according to the rowmap, insert it into Ptentative. Otherwise, save it in ghostQ
724  // to be communicated later to the owning processor.
725  // FIXME -- what happens if maps are blocked?
726  for (GO j = 0; j < myAggSize; ++j) {
727  // This loop checks whether row associated with current DOF is local, according to rowMapForPtent.
728  // If it is, the row is inserted. If not, the row number, columns, and values are saved in
729  // MultiVectors that will be sent to other processors.
730  GO globalRow = aggToRowMap[aggStart[agg] + j];
731 
732  // TODO is the use of Xpetra::global_size_t below correct?
733  if (rowMapForPtentRef.isNodeGlobalElement(globalRow) == false) {
734  ghostQrows[qctr] = globalRow;
735  for (size_t k = 0; k < NSDim; ++k) {
736  ghostQcols[k][qctr] = coarseMapRef.getGlobalElement(agg * NSDim + k);
737  ghostQvals[k][qctr] = localQR(j, k);
738  }
739  ++qctr;
740  } else {
741  size_t nnz = 0;
742  for (size_t k = 0; k < NSDim; ++k) {
743  try {
744  if (localQR(j, k) != Teuchos::ScalarTraits<SC>::zero()) {
745  localColPtr[nnz] = agg * NSDim + k;
746  globalColPtr[nnz] = coarseMapRef.getGlobalElement(localColPtr[nnz]);
747  valPtr[nnz] = localQR(j, k);
748  ++total_nnz_count;
749  ++nnz;
750  }
751  } catch (...) {
752  GetOStream(Errors, -1) << "caught error in colPtr/valPtr insert, current index=" << nnz << std::endl;
753  }
754  } // for (size_t k=0; k<NSDim; ++k)
755 
756  try {
757  Ptentative->insertGlobalValues(globalRow, globalColPtr.view(0, nnz), valPtr.view(0, nnz));
758  } catch (...) {
759  GetOStream(Errors, -1) << "pid " << A->getRowMap()->getComm()->getRank()
760  << "caught error during Ptent row insertion, global row "
761  << globalRow << std::endl;
762  }
763  }
764  } // for (GO j=0; j<myAggSize; ++j)
765 
766  } // for (LO agg=0; agg<numAggs; ++agg)
767 
768  // ***********************************************************
769  // ************* end of aggregate-wise QR ********************
770  // ***********************************************************
771  GetOStream(Runtime1) << "TentativePFactory : aggregates may cross process boundaries" << std::endl;
772  // Import ghost parts of Q factors and insert into Ptentative.
773  // First import just the global row numbers.
775  targetQrowNums->putScalar(-1);
776  targetQrowNums->doImport(*ghostQrowNums, *importer, Xpetra::INSERT);
777  ArrayRCP<GO> targetQrows = targetQrowNums->getDataNonConst(0);
778 
779  // Now create map based on just the row numbers imported.
780  Array<GO> gidsToImport;
781  gidsToImport.reserve(targetQrows.size());
782  for (typename ArrayRCP<GO>::iterator r = targetQrows.begin(); r != targetQrows.end(); ++r) {
783  if (*r > -1) {
784  gidsToImport.push_back(*r);
785  }
786  }
787  RCP<const Map> reducedMap = MapFactory::Build(A->getRowMap()->lib(),
789  gidsToImport, indexBase, A->getRowMap()->getComm());
790 
791  // Import using the row numbers that this processor will receive.
792  importer = ImportFactory::Build(ghostQMap, reducedMap);
793 
794  Array<RCP<Xpetra::Vector<GO, LO, GO, Node> > > targetQcolumns(NSDim);
795  for (size_t i = 0; i < NSDim; ++i) {
796  targetQcolumns[i] = Xpetra::VectorFactory<GO, LO, GO, Node>::Build(reducedMap);
797  targetQcolumns[i]->doImport(*(ghostQcolumns[i]), *importer, Xpetra::INSERT);
798  }
799  RCP<MultiVector> targetQvalues = MultiVectorFactory::Build(reducedMap, NSDim);
800  targetQvalues->doImport(*ghostQvalues, *importer, Xpetra::INSERT);
801 
802  ArrayRCP<ArrayRCP<SC> > targetQvals;
803  ArrayRCP<ArrayRCP<GO> > targetQcols;
804  if (targetQvalues->getLocalLength() > 0) {
805  targetQvals.resize(NSDim);
806  targetQcols.resize(NSDim);
807  for (size_t i = 0; i < NSDim; ++i) {
808  targetQvals[i] = targetQvalues->getDataNonConst(i);
809  targetQcols[i] = targetQcolumns[i]->getDataNonConst(0);
810  }
811  }
812 
813  valPtr = Array<SC>(NSDim, 0.);
814  globalColPtr = Array<GO>(NSDim, 0);
815  for (typename Array<GO>::iterator r = gidsToImport.begin(); r != gidsToImport.end(); ++r) {
816  if (targetQvalues->getLocalLength() > 0) {
817  for (size_t j = 0; j < NSDim; ++j) {
818  valPtr[j] = targetQvals[j][reducedMap->getLocalElement(*r)];
819  globalColPtr[j] = targetQcols[j][reducedMap->getLocalElement(*r)];
820  }
821  Ptentative->insertGlobalValues(*r, globalColPtr.view(0, NSDim), valPtr.view(0, NSDim));
822  } // if (targetQvalues->getLocalLength() > 0)
823  }
824 
825  Ptentative->fillComplete(coarseMap, A->getDomainMap());
826 }
827 
828 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
831  RCP<const Map> coarseMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace, const int levelID) const {
832  RCP<const Map> rowMap = A->getRowMap();
833  RCP<const Map> colMap = A->getColMap();
834  const size_t numRows = rowMap->getLocalNumElements();
835 
836  typedef Teuchos::ScalarTraits<SC> STS;
837  typedef typename STS::magnitudeType Magnitude;
838  const SC zero = STS::zero();
839  const SC one = STS::one();
840  const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
841 
842  const GO numAggs = aggregates->GetNumAggregates();
843  const size_t NSDim = fineNullspace->getNumVectors();
844  ArrayRCP<LO> aggSizes = aggregates->ComputeAggregateSizesArrayRCP();
845 
846  // Sanity checking
847  const ParameterList& pL = GetParameterList();
848  const bool& doQRStep = pL.get<bool>("tentative: calculate qr");
849  const bool& constantColSums = pL.get<bool>("tentative: constant column sums");
850 
851  TEUCHOS_TEST_FOR_EXCEPTION(doQRStep && constantColSums, Exceptions::RuntimeError,
852  "MueLu::TentativePFactory::MakeTentative: cannot use 'constant column sums' and 'calculate qr' at the same time");
853 
854  // Aggregates map is based on the amalgamated column map
855  // We can skip global-to-local conversion if LIDs in row map are
856  // same as LIDs in column map
857  bool goodMap = MueLu::Utilities<SC, LO, GO, NO>::MapsAreNested(*rowMap, *colMap);
858 
859  // Create a lookup table to determine the rows (fine DOFs) that belong to a given aggregate.
860  // aggStart is a pointer into aggToRowMapLO
861  // aggStart[i]..aggStart[i+1] are indices into aggToRowMapLO
862  // aggToRowMapLO[aggStart[i]]..aggToRowMapLO[aggStart[i+1]-1] are the DOFs in aggregate i
863  ArrayRCP<LO> aggStart;
864  ArrayRCP<LO> aggToRowMapLO;
865  ArrayRCP<GO> aggToRowMapGO;
866  if (goodMap) {
867  amalgInfo->UnamalgamateAggregatesLO(*aggregates, aggStart, aggToRowMapLO);
868  GetOStream(Runtime1) << "Column map is consistent with the row map, good." << std::endl;
869 
870  } else {
871  amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMapGO);
872  GetOStream(Warnings0) << "Column map is not consistent with the row map\n"
873  << "using GO->LO conversion with performance penalty" << std::endl;
874  }
875  coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
876 
877  // Pull out the nullspace vectors so that we can have random access.
880  for (size_t i = 0; i < NSDim; i++) {
881  fineNS[i] = fineNullspace->getData(i);
882  if (coarseMap->getLocalNumElements() > 0)
883  coarseNS[i] = coarseNullspace->getDataNonConst(i);
884  }
885 
886  size_t nnzEstimate = numRows * NSDim;
887 
888  // Time to construct the matrix and fill in the values
889  Ptentative = rcp(new CrsMatrixWrap(rowMap, coarseMap, 0));
890  RCP<CrsMatrix> PtentCrs = rcp_dynamic_cast<CrsMatrixWrap>(Ptentative)->getCrsMatrix();
891 
892  ArrayRCP<size_t> iaPtent;
893  ArrayRCP<LO> jaPtent;
894  ArrayRCP<SC> valPtent;
895 
896  PtentCrs->allocateAllValues(nnzEstimate, iaPtent, jaPtent, valPtent);
897 
898  ArrayView<size_t> ia = iaPtent();
899  ArrayView<LO> ja = jaPtent();
900  ArrayView<SC> val = valPtent();
901 
902  ia[0] = 0;
903  for (size_t i = 1; i <= numRows; i++)
904  ia[i] = ia[i - 1] + NSDim;
905 
906  for (size_t j = 0; j < nnzEstimate; j++) {
907  ja[j] = INVALID;
908  val[j] = zero;
909  }
910 
911  if (doQRStep) {
913  // Standard aggregate-wise QR //
915  for (GO agg = 0; agg < numAggs; agg++) {
916  LO aggSize = aggStart[agg + 1] - aggStart[agg];
917 
918  Xpetra::global_size_t offset = agg * NSDim;
919 
920  // Extract the piece of the nullspace corresponding to the aggregate, and
921  // put it in the flat array, "localQR" (in column major format) for the
922  // QR routine.
923  Teuchos::SerialDenseMatrix<LO, SC> localQR(aggSize, NSDim);
924  if (goodMap) {
925  for (size_t j = 0; j < NSDim; j++)
926  for (LO k = 0; k < aggSize; k++)
927  localQR(k, j) = fineNS[j][aggToRowMapLO[aggStart[agg] + k]];
928  } else {
929  for (size_t j = 0; j < NSDim; j++)
930  for (LO k = 0; k < aggSize; k++)
931  localQR(k, j) = fineNS[j][rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + k])];
932  }
933 
934  // Test for zero columns
935  for (size_t j = 0; j < NSDim; j++) {
936  bool bIsZeroNSColumn = true;
937 
938  for (LO k = 0; k < aggSize; k++)
939  if (localQR(k, j) != zero)
940  bIsZeroNSColumn = false;
941 
942  TEUCHOS_TEST_FOR_EXCEPTION(bIsZeroNSColumn == true, Exceptions::RuntimeError,
943  "MueLu::TentativePFactory::MakeTentative: fine level NS part has a zero column in NS column " << j);
944  }
945 
946  // Calculate QR decomposition (standard)
947  // NOTE: Q is stored in localQR and R is stored in coarseNS
948  if (aggSize >= Teuchos::as<LO>(NSDim)) {
949  if (NSDim == 1) {
950  // Only one nullspace vector, calculate Q and R by hand
951  Magnitude norm = STS::magnitude(zero);
952  for (size_t k = 0; k < Teuchos::as<size_t>(aggSize); k++)
953  norm += STS::magnitude(localQR(k, 0) * localQR(k, 0));
955 
956  // R = norm
957  coarseNS[0][offset] = norm;
958 
959  // Q = localQR(:,0)/norm
960  for (LO i = 0; i < aggSize; i++)
961  localQR(i, 0) /= norm;
962 
963  } else {
965  qrSolver.setMatrix(Teuchos::rcp(&localQR, false));
966  qrSolver.factor();
967 
968  // R = upper triangular part of localQR
969  for (size_t j = 0; j < NSDim; j++)
970  for (size_t k = 0; k <= j; k++)
971  coarseNS[j][offset + k] = localQR(k, j); // TODO is offset+k the correct local ID?!
972 
973  // Calculate Q, the tentative prolongator.
974  // The Lapack GEQRF call only works for myAggsize >= NSDim
975  qrSolver.formQ();
977  for (size_t j = 0; j < NSDim; j++)
978  for (size_t i = 0; i < Teuchos::as<size_t>(aggSize); i++)
979  localQR(i, j) = (*qFactor)(i, j);
980  }
981 
982  } else {
983  // Special handling for aggSize < NSDim (i.e. single node aggregates in structural mechanics)
984 
985  // The local QR decomposition is not possible in the "overconstrained"
986  // case (i.e. number of columns in localQR > number of rows), which
987  // corresponds to #DOFs in Aggregate < NSDim. For usual problems this
988  // is only possible for single node aggregates in structural mechanics.
989  // (Similar problems may arise in discontinuous Galerkin problems...)
990  // We bypass the QR decomposition and use an identity block in the
991  // tentative prolongator for the single node aggregate and transfer the
992  // corresponding fine level null space information 1-to-1 to the coarse
993  // level null space part.
994 
995  // NOTE: The resulting tentative prolongation operator has
996  // (aggSize*DofsPerNode-NSDim) zero columns leading to a singular
997  // coarse level operator A. To deal with that one has the following
998  // options:
999  // - Use the "RepairMainDiagonal" flag in the RAPFactory (default:
1000  // false) to add some identity block to the diagonal of the zero rows
1001  // in the coarse level operator A, such that standard level smoothers
1002  // can be used again.
1003  // - Use special (projection-based) level smoothers, which can deal
1004  // with singular matrices (very application specific)
1005  // - Adapt the code below to avoid zero columns. However, we do not
1006  // support a variable number of DOFs per node in MueLu/Xpetra which
1007  // makes the implementation really hard.
1008 
1009  // R = extended (by adding identity rows) localQR
1010  for (size_t j = 0; j < NSDim; j++)
1011  for (size_t k = 0; k < NSDim; k++)
1012  if (k < as<size_t>(aggSize))
1013  coarseNS[j][offset + k] = localQR(k, j);
1014  else
1015  coarseNS[j][offset + k] = (k == j ? one : zero);
1016 
1017  // Q = I (rectangular)
1018  for (size_t i = 0; i < as<size_t>(aggSize); i++)
1019  for (size_t j = 0; j < NSDim; j++)
1020  localQR(i, j) = (j == i ? one : zero);
1021  }
1022 
1023  // Process each row in the local Q factor
1024  // FIXME: What happens if maps are blocked?
1025  for (LO j = 0; j < aggSize; j++) {
1026  LO localRow = (goodMap ? aggToRowMapLO[aggStart[agg] + j] : rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + j]));
1027 
1028  size_t rowStart = ia[localRow];
1029  for (size_t k = 0, lnnz = 0; k < NSDim; k++) {
1030  // Skip zeros (there may be plenty of them, i.e., NSDim > 1 or boundary conditions)
1031  if (localQR(j, k) != zero) {
1032  ja[rowStart + lnnz] = offset + k;
1033  val[rowStart + lnnz] = localQR(j, k);
1034  lnnz++;
1035  }
1036  }
1037  }
1038  }
1039 
1040  } else {
1041  GetOStream(Runtime1) << "TentativePFactory : bypassing local QR phase" << std::endl;
1042  if (NSDim > 1)
1043  GetOStream(Warnings0) << "TentativePFactory : for nontrivial nullspace, this may degrade performance" << std::endl;
1045  // "no-QR" option //
1047  // Local Q factor is just the fine nullspace support over the current aggregate.
1048  // Local R factor is the identity.
1049  // TODO I have not implemented any special handling for aggregates that are too
1050  // TODO small to locally support the nullspace, as is done in the standard QR
1051  // TODO case above.
1052  if (goodMap) {
1053  for (GO agg = 0; agg < numAggs; agg++) {
1054  const LO aggSize = aggStart[agg + 1] - aggStart[agg];
1055  Xpetra::global_size_t offset = agg * NSDim;
1056 
1057  // Process each row in the local Q factor
1058  // FIXME: What happens if maps are blocked?
1059  for (LO j = 0; j < aggSize; j++) {
1060  // TODO Here I do not check for a zero nullspace column on the aggregate.
1061  // as is done in the standard QR case.
1062 
1063  const LO localRow = aggToRowMapLO[aggStart[agg] + j];
1064 
1065  const size_t rowStart = ia[localRow];
1066 
1067  for (size_t k = 0, lnnz = 0; k < NSDim; k++) {
1068  // Skip zeros (there may be plenty of them, i.e., NSDim > 1 or boundary conditions)
1069  SC qr_jk = fineNS[k][aggToRowMapLO[aggStart[agg] + j]];
1070  if (constantColSums) qr_jk = qr_jk / (Magnitude)aggSizes[agg];
1071  if (qr_jk != zero) {
1072  ja[rowStart + lnnz] = offset + k;
1073  val[rowStart + lnnz] = qr_jk;
1074  lnnz++;
1075  }
1076  }
1077  }
1078  for (size_t j = 0; j < NSDim; j++)
1079  coarseNS[j][offset + j] = one;
1080  } // for (GO agg = 0; agg < numAggs; agg++)
1081 
1082  } else {
1083  for (GO agg = 0; agg < numAggs; agg++) {
1084  const LO aggSize = aggStart[agg + 1] - aggStart[agg];
1085  Xpetra::global_size_t offset = agg * NSDim;
1086  for (LO j = 0; j < aggSize; j++) {
1087  const LO localRow = rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + j]);
1088 
1089  const size_t rowStart = ia[localRow];
1090 
1091  for (size_t k = 0, lnnz = 0; k < NSDim; ++k) {
1092  // Skip zeros (there may be plenty of them, i.e., NSDim > 1 or boundary conditions)
1093  SC qr_jk = fineNS[k][rowMap->getLocalElement(aggToRowMapGO[aggStart[agg] + j])];
1094  if (constantColSums) qr_jk = qr_jk / (Magnitude)aggSizes[agg];
1095  if (qr_jk != zero) {
1096  ja[rowStart + lnnz] = offset + k;
1097  val[rowStart + lnnz] = qr_jk;
1098  lnnz++;
1099  }
1100  }
1101  }
1102  for (size_t j = 0; j < NSDim; j++)
1103  coarseNS[j][offset + j] = one;
1104  } // for (GO agg = 0; agg < numAggs; agg++)
1105 
1106  } // if (goodmap) else ...
1107 
1108  } // if doQRStep ... else
1109 
1110  // Compress storage (remove all INVALID, which happen when we skip zeros)
1111  // We do that in-place
1112  size_t ia_tmp = 0, nnz = 0;
1113  for (size_t i = 0; i < numRows; i++) {
1114  for (size_t j = ia_tmp; j < ia[i + 1]; j++)
1115  if (ja[j] != INVALID) {
1116  ja[nnz] = ja[j];
1117  val[nnz] = val[j];
1118  nnz++;
1119  }
1120  ia_tmp = ia[i + 1];
1121  ia[i + 1] = nnz;
1122  }
1123  if (rowMap->lib() == Xpetra::UseTpetra) {
1124  // - Cannot resize for Epetra, as it checks for same pointers
1125  // - Need to resize for Tpetra, as it check ().size() == ia[numRows]
1126  // NOTE: these invalidate ja and val views
1127  jaPtent.resize(nnz);
1128  valPtent.resize(nnz);
1129  }
1130 
1131  GetOStream(Runtime1) << "TentativePFactory : aggregates do not cross process boundaries" << std::endl;
1132 
1133  PtentCrs->setAllValues(iaPtent, jaPtent, valPtent);
1134 
1135  // Managing labels & constants for ESFC
1136  RCP<ParameterList> FCparams;
1137  if (pL.isSublist("matrixmatrix: kernel params"))
1138  FCparams = rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
1139  else
1140  FCparams = rcp(new ParameterList);
1141  // By default, we don't need global constants for TentativeP
1142  FCparams->set("compute global constants", FCparams->get("compute global constants", false));
1143  std::string levelIDs = toString(levelID);
1144  FCparams->set("Timer Label", std::string("MueLu::TentativeP-") + levelIDs);
1145  RCP<const Export> dummy_e;
1146  RCP<const Import> dummy_i;
1147 
1148  PtentCrs->expertStaticFillComplete(coarseMap, A->getDomainMap(), dummy_i, dummy_e, FCparams);
1149 }
1150 
1151 } // namespace MueLu
1152 
1153 // TODO ReUse: If only P or Nullspace is missing, TentativePFactory can be smart and skip part of the computation.
1154 
1155 #define MUELU_TENTATIVEPFACTORY_SHORT
1156 #endif // MUELU_TENTATIVEPFACTORY_DEF_HPP
void BuildPuncoupled(RCP< Matrix > A, RCP< Aggregates > aggregates, RCP< AmalgamationInfo > amalgInfo, RCP< MultiVector > fineNullspace, RCP< const Map > coarseMap, RCP< Matrix > &Ptentative, RCP< MultiVector > &coarseNullspace, const int levelID) const
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
Important warning messages (one line)
void reserve(size_type n)
MueLu::DefaultLocalOrdinal LocalOrdinal
int setMatrix(const RCP< SerialDenseMatrix< OrdinalType, ScalarType > > &A)
static bool MapsAreNested(const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &rowMap, const Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > &colMap)
void BuildPcoupled(RCP< Matrix > A, RCP< Aggregates > aggregates, RCP< AmalgamationInfo > amalgInfo, RCP< MultiVector > fineNullspace, RCP< const Map > coarseMap, RCP< Matrix > &Ptentative, RCP< MultiVector > &coarseNullspace) const
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
GlobalOrdinal GO
T & get(const std::string &name, T def_value)
void UnamalgamateAggregates(const Aggregates &aggregates, Teuchos::ArrayRCP< LocalOrdinal > &aggStart, Teuchos::ArrayRCP< GlobalOrdinal > &aggToRowMap) const
UnamalgamateAggregates.
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
ArrayView< T > view(size_type offset, size_type size)
static T squareroot(T x)
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
size_type size() const
void UnamalgamateAggregatesLO(const Aggregates &aggregates, Teuchos::ArrayRCP< LocalOrdinal > &aggStart, Teuchos::ArrayRCP< LO > &aggToRowMap) const
LocalOrdinal LO
size_type size() const
static const NoFactory * get()
Print even more statistics.
#define SET_VALID_ENTRY(name)
void resize(const size_type n, const T &val=T())
bool isParameter(const std::string &name) const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
Teuchos::RCP< Xpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > ComputeUnamalgamatedImportDofMap(const Aggregates &aggregates) const
ComputeUnamalgamatedImportDofMap build overlapping dof row map from aggregates needed for overlapping...
bool isSublist(const std::string &name) const
Teuchos::ArrayRCP< LocalOrdinal > ComputeAggregateSizesArrayRCP(bool forceRecompute=false) const
Compute sizes of aggregates.
void resize(size_type new_size, const value_type &x=value_type())
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
size_t global_size_t
static bool isnaninf(const T &x)
static RCP< Vector > Build(const Teuchos::RCP< const Map > &map, bool zeroOut=true)
iterator end()
static magnitudeType magnitude(T a)
void push_back(const value_type &x)
Scalar SC
ParameterList & sublist(const std::string &name, bool mustAlreadyExist=false, const std::string &docString="")
void BuildPuncoupledBlockCrs(RCP< Matrix > A, RCP< Aggregates > aggregates, RCP< AmalgamationInfo > amalgInfo, RCP< MultiVector > fineNullspace, RCP< const Map > coarseMap, RCP< Matrix > &Ptentative, RCP< MultiVector > &coarseNullspace, const int levelID) const
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:76
RCP< SerialDenseMatrix< OrdinalType, ScalarType > > getQ() const
Exception throws to report errors in the internal logical of the program.
iterator end() const
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
Description of what is happening (more verbose)
iterator begin()
iterator begin() const
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need&#39;s value has been saved.
bool is_null() const