MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_SemiCoarsenPFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_SEMICOARSENPFACTORY_DEF_HPP
11 #define MUELU_SEMICOARSENPFACTORY_DEF_HPP
12 
13 #include <stdlib.h>
14 
15 #include <Teuchos_LAPACK.hpp>
16 
17 #include <Xpetra_CrsMatrixWrap.hpp>
18 #include <Xpetra_ImportFactory.hpp>
19 #include <Xpetra_Matrix.hpp>
20 #include <Xpetra_MultiVectorFactory.hpp>
21 #include <Xpetra_VectorFactory.hpp>
22 
25 
26 #include "MueLu_MasterList.hpp"
27 #include "MueLu_Monitor.hpp"
28 
29 namespace MueLu {
30 
31 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
33  RCP<ParameterList> validParamList = rcp(new ParameterList());
34 
35 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
36  SET_VALID_ENTRY("semicoarsen: piecewise constant");
37  SET_VALID_ENTRY("semicoarsen: piecewise linear");
38  SET_VALID_ENTRY("semicoarsen: coarsen rate");
39  SET_VALID_ENTRY("semicoarsen: calculate nonsym restriction");
40 #undef SET_VALID_ENTRY
41  validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
42  validParamList->set<RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
43  validParamList->set<RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory for coordinates");
44 
45  validParamList->set<RCP<const FactoryBase> >("LineDetection_VertLineIds", Teuchos::null, "Generating factory for LineDetection vertical line ids");
46  validParamList->set<RCP<const FactoryBase> >("LineDetection_Layers", Teuchos::null, "Generating factory for LineDetection layer ids");
47  validParamList->set<RCP<const FactoryBase> >("CoarseNumZLayers", Teuchos::null, "Generating factory for number of coarse z-layers");
48 
49  return validParamList;
50 }
51 
52 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
54  Input(fineLevel, "A");
55  Input(fineLevel, "Nullspace");
56 
57  Input(fineLevel, "LineDetection_VertLineIds");
58  Input(fineLevel, "LineDetection_Layers");
59  Input(fineLevel, "CoarseNumZLayers");
60 
61  // check whether fine level coordinate information is available.
62  // If yes, request the fine level coordinates and generate coarse coordinates
63  // during the Build call
64  if (fineLevel.GetLevelID() == 0) {
65  if (fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
66  fineLevel.DeclareInput("Coordinates", NoFactory::get(), this);
67  bTransferCoordinates_ = true;
68  }
69  } else if (bTransferCoordinates_ == true) {
70  // on coarser levels we check the default factory providing "Coordinates"
71  // or the factory declared to provide "Coordinates"
72  // first, check which factory is providing coordinate information
73  RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
74  if (myCoordsFact == Teuchos::null) {
75  myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates");
76  }
77  if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
78  fineLevel.DeclareInput("Coordinates", myCoordsFact.get(), this);
79  }
80  }
81 }
82 
83 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
85  return BuildP(fineLevel, coarseLevel);
86 }
87 
88 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
90  FactoryMonitor m(*this, "Build", coarseLevel);
91 
92  // obtain general variables
93  RCP<Matrix> A = Get<RCP<Matrix> >(fineLevel, "A");
94  RCP<MultiVector> fineNullspace = Get<RCP<MultiVector> >(fineLevel, "Nullspace");
95 
96  // get user-provided coarsening rate parameter (constant over all levels)
97  const ParameterList& pL = GetParameterList();
98  LO CoarsenRate = as<LO>(pL.get<int>("semicoarsen: coarsen rate"));
99  bool buildRestriction = pL.get<bool>("semicoarsen: calculate nonsym restriction");
100  bool doLinear = pL.get<bool>("semicoarsen: piecewise linear");
101 
102  // collect general input data
103  LO BlkSize = A->GetFixedBlockSize();
104  RCP<const Map> rowMap = A->getRowMap();
105  LO Ndofs = rowMap->getLocalNumElements();
106  LO Nnodes = Ndofs / BlkSize;
107 
108  // collect line detection information generated by the LineDetectionFactory instance
109  LO FineNumZLayers = Get<LO>(fineLevel, "CoarseNumZLayers");
110  Teuchos::ArrayRCP<LO> TVertLineId = Get<Teuchos::ArrayRCP<LO> >(fineLevel, "LineDetection_VertLineIds");
111  Teuchos::ArrayRCP<LO> TLayerId = Get<Teuchos::ArrayRCP<LO> >(fineLevel, "LineDetection_Layers");
112  LO* VertLineId = TVertLineId.getRawPtr();
113  LO* LayerId = TLayerId.getRawPtr();
114 
115  // generate transfer operator with semicoarsening
116  RCP<const Map> theCoarseMap;
117  RCP<Matrix> P, R;
118  RCP<MultiVector> coarseNullspace;
119  GO Ncoarse = MakeSemiCoarsenP(Nnodes, FineNumZLayers, CoarsenRate, LayerId, VertLineId,
120  BlkSize, A, P, theCoarseMap, fineNullspace, coarseNullspace, R, buildRestriction, doLinear);
121 
122  // set StridingInformation of P
123  if (A->IsView("stridedMaps") == true)
124  P->CreateView("stridedMaps", A->getRowMap("stridedMaps"), theCoarseMap);
125  else
126  P->CreateView("stridedMaps", P->getRangeMap(), theCoarseMap);
127 
128  if (buildRestriction) {
129  if (A->IsView("stridedMaps") == true)
130  R->CreateView("stridedMaps", theCoarseMap, A->getRowMap("stridedMaps"));
131  else
132  R->CreateView("stridedMaps", theCoarseMap, R->getDomainMap());
133  }
134  if (pL.get<bool>("semicoarsen: piecewise constant")) {
135  TEUCHOS_TEST_FOR_EXCEPTION(buildRestriction, Exceptions::RuntimeError, "Cannot use calculate nonsym restriction with piecewise constant.");
136  RevertToPieceWiseConstant(P, BlkSize);
137  }
138  if (pL.get<bool>("semicoarsen: piecewise linear")) {
139  TEUCHOS_TEST_FOR_EXCEPTION(buildRestriction, Exceptions::RuntimeError, "Cannot use calculate nonsym restriction with piecewise linear.");
140  TEUCHOS_TEST_FOR_EXCEPTION(pL.get<bool>("semicoarsen: piecewise constant"), Exceptions::RuntimeError, "Cannot use piecewise constant with piecewise linear.");
141  }
142 
143  // Store number of coarse z-layers on the coarse level container
144  // This information is used by the LineDetectionAlgorithm
145  // TODO get rid of the NoFactory
146 
147  LO CoarseNumZLayers = FineNumZLayers * Ncoarse;
148  CoarseNumZLayers /= Ndofs;
149  coarseLevel.Set("NumZLayers", Teuchos::as<LO>(CoarseNumZLayers), MueLu::NoFactory::get());
150 
151  // store semicoarsening transfer on coarse level
152  Set(coarseLevel, "P", P);
153  if (buildRestriction) Set(coarseLevel, "RfromPfactory", R);
154 
155  Set(coarseLevel, "Nullspace", coarseNullspace);
156 
157  // transfer coordinates
158  if (bTransferCoordinates_) {
160  RCP<xdMV> fineCoords = Teuchos::null;
161  if (fineLevel.GetLevelID() == 0 &&
162  fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
163  fineCoords = fineLevel.Get<RCP<xdMV> >("Coordinates", NoFactory::get());
164  } else {
165  RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
166  if (myCoordsFact == Teuchos::null) {
167  myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates");
168  }
169  if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
170  fineCoords = fineLevel.Get<RCP<xdMV> >("Coordinates", myCoordsFact.get());
171  }
172  }
173 
174  TEUCHOS_TEST_FOR_EXCEPTION(fineCoords == Teuchos::null, Exceptions::RuntimeError, "No Coordinates found provided by the user.");
175 
176  TEUCHOS_TEST_FOR_EXCEPTION(fineCoords->getNumVectors() != 3, Exceptions::RuntimeError, "Three coordinates arrays must be supplied if line detection orientation not given.");
177  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> x = fineCoords->getDataNonConst(0);
178  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> y = fineCoords->getDataNonConst(1);
179  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> z = fineCoords->getDataNonConst(2);
180 
181  // determine the maximum and minimum z coordinate value on the current processor.
184  for (auto it = z.begin(); it != z.end(); ++it) {
185  if (*it > zval_max) zval_max = *it;
186  if (*it < zval_min) zval_min = *it;
187  }
188 
189  LO myCoarseZLayers = Teuchos::as<LO>(CoarseNumZLayers);
190 
191  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> myZLayerCoords = Teuchos::arcp<typename Teuchos::ScalarTraits<Scalar>::coordinateType>(myCoarseZLayers);
192  if (myCoarseZLayers == 1) {
193  myZLayerCoords[0] = zval_min;
194  } else {
195  typename Teuchos::ScalarTraits<Scalar>::coordinateType dz = (zval_max - zval_min) / (myCoarseZLayers - 1);
196  for (LO k = 0; k < myCoarseZLayers; ++k) {
197  myZLayerCoords[k] = k * dz;
198  }
199  }
200 
201  // Note, that the coarse level node coordinates have to be in vertical ordering according
202  // to the numbering of the vertical lines
203 
204  // number of vertical lines on current node:
205  LO numVertLines = Nnodes / FineNumZLayers;
206  LO numLocalCoarseNodes = numVertLines * myCoarseZLayers;
207 
208  RCP<const Map> coarseCoordMap =
209  MapFactory::Build(fineCoords->getMap()->lib(),
211  Teuchos::as<size_t>(numLocalCoarseNodes),
212  fineCoords->getMap()->getIndexBase(),
213  fineCoords->getMap()->getComm());
214  RCP<xdMV> coarseCoords = Xpetra::MultiVectorFactory<typename Teuchos::ScalarTraits<Scalar>::coordinateType, LO, GO, NO>::Build(coarseCoordMap, fineCoords->getNumVectors());
215  coarseCoords->putScalar(-1.0);
216  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cx = coarseCoords->getDataNonConst(0);
217  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cy = coarseCoords->getDataNonConst(1);
218  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cz = coarseCoords->getDataNonConst(2);
219 
220  // loop over all vert line indices (stop as soon as possible)
221  LO cntCoarseNodes = 0;
222  for (LO vt = 0; vt < TVertLineId.size(); ++vt) {
223  // vertical line id in *vt
224  LO curVertLineId = TVertLineId[vt];
225 
226  if (cx[curVertLineId * myCoarseZLayers] == -1.0 &&
227  cy[curVertLineId * myCoarseZLayers] == -1.0) {
228  // loop over all local myCoarseZLayers
229  for (LO n = 0; n < myCoarseZLayers; ++n) {
230  cx[curVertLineId * myCoarseZLayers + n] = x[vt];
231  cy[curVertLineId * myCoarseZLayers + n] = y[vt];
232  cz[curVertLineId * myCoarseZLayers + n] = myZLayerCoords[n];
233  }
234  cntCoarseNodes += myCoarseZLayers;
235  }
236 
237  TEUCHOS_TEST_FOR_EXCEPTION(cntCoarseNodes > numLocalCoarseNodes, Exceptions::RuntimeError, "number of coarse nodes is inconsistent.");
238  if (cntCoarseNodes == numLocalCoarseNodes) break;
239  }
240 
241  // set coarse level coordinates
242  Set(coarseLevel, "Coordinates", coarseCoords);
243  } /* end bool bTransferCoordinates */
244 }
245 
246 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
248  /*
249  * Given the number of points in the z direction (PtsPerLine) and a
250  * coarsening rate (CoarsenRate), determine which z-points will serve
251  * as Cpts and return the total number of Cpts.
252  *
253  * Input
254  * PtsPerLine: Number of fine level points in the z direction
255  *
256  * CoarsenRate: Roughly, number of Cpts = (PtsPerLine+1)/CoarsenRate - 1
257  *
258  * Thin: Must be either 0 or 1. Thin decides what to do when
259  * (PtsPerLine+1)/CoarsenRate is not an integer.
260  *
261  * Thin == 0 ==> ceil() the above fraction
262  * Thin == 1 ==> floor() the above fraction
263  *
264  * Output
265  * LayerCpts Array where LayerCpts[i] indicates that the
266  * LayerCpts[i]th fine level layer is a Cpt Layer.
267  * Note: fine level layers are assumed to be numbered starting
268  * a one.
269  */
270  typename Teuchos::ScalarTraits<Scalar>::coordinateType temp, RestStride, di;
271  LO NCpts, i;
272  LO NCLayers = -1;
273  LO FirstStride;
274 
275  temp = ((typename Teuchos::ScalarTraits<Scalar>::coordinateType)(PtsPerLine + 1)) / ((typename Teuchos::ScalarTraits<Scalar>::coordinateType)(CoarsenRate)) - 1.0;
276  if (Thin == 1)
277  NCpts = (LO)ceil(temp);
278  else
279  NCpts = (LO)floor(temp);
280 
281  TEUCHOS_TEST_FOR_EXCEPTION(PtsPerLine == 1, Exceptions::RuntimeError, "SemiCoarsenPFactory::FindCpts: cannot coarsen further.");
282 
283  if (NCpts < 1) NCpts = 1;
284 
285  FirstStride = (LO)ceil(((typename Teuchos::ScalarTraits<Scalar>::coordinateType)PtsPerLine + 1) / ((typename Teuchos::ScalarTraits<Scalar>::coordinateType)(NCpts + 1)));
286  RestStride = ((typename Teuchos::ScalarTraits<Scalar>::coordinateType)(PtsPerLine - FirstStride + 1)) / ((typename Teuchos::ScalarTraits<Scalar>::coordinateType)NCpts);
287 
288  NCLayers = (LO)floor((((typename Teuchos::ScalarTraits<Scalar>::coordinateType)(PtsPerLine - FirstStride + 1)) / RestStride) + .00001);
289 
290  TEUCHOS_TEST_FOR_EXCEPTION(NCLayers != NCpts, Exceptions::RuntimeError, "sizes do not match.");
291 
292  di = (typename Teuchos::ScalarTraits<Scalar>::coordinateType)FirstStride;
293  for (i = 1; i <= NCpts; i++) {
294  (*LayerCpts)[i] = (LO)floor(di);
295  di += RestStride;
296  }
297 
298  return (NCLayers);
299 }
300 
301 #define MaxHorNeighborNodes 75
302 
303 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
305  MakeSemiCoarsenP(LO const Ntotal, LO const nz, LO const CoarsenRate, LO const LayerId[],
306  LO const VertLineId[], LO const DofsPerNode, RCP<Matrix>& Amat, RCP<Matrix>& P,
307  RCP<const Map>& coarseMap, const RCP<MultiVector> fineNullspace,
308  RCP<MultiVector>& coarseNullspace, RCP<Matrix>& R, bool buildRestriction, bool doLinear) const {
309  /*
310  * Given a CSR matrix (OrigARowPtr, OrigAcols, OrigAvals), information
311  * describing the z-layer and vertical line (LayerId and VertLineId)
312  * of each matrix block row, a coarsening rate, and dofs/node information,
313  * construct a prolongator that coarsening to semicoarsening in the z-direction
314  * using something like an operator-dependent grid transfer. In particular,
315  * matrix stencils are collapsed to vertical lines. Thus, each vertical line
316  * gives rise to a block tridiagonal matrix. BlkRows corresponding to
317  * Cpts are replaced by identity matrices. This tridiagonal is solved
318  * to determine each interpolation basis functions. Each Blk Rhs corresponds
319  * to all zeros except at the corresponding C-pt which has an identity
320  *
321  * On termination, return the number of local prolongator columns owned by
322  * this processor.
323  *
324  * Note: This code was adapted from a matlab code where offsets/arrays
325  * start from 1. In most parts of the code, this 1 offset is kept
326  * (in some cases wasting the first element of the array). The
327  * input and output matrices of this function has been changed to
328  * have offsets/rows/columns which start from 0. LayerId[] and
329  * VertLineId[] currently start from 1.
330  *
331  * Input
332  * =====
333  * Ntotal Number of fine level Blk Rows owned by this processor
334  *
335  * nz Number of vertical layers. Note: partitioning must be done
336  * so that each processor owns an entire vertical line. This
337  * means that nz is the global number of layers, which should
338  * be equal to the local number of layers.
339  * CoarsenRate Rate of z semicoarsening. Smoothed aggregation-like coarsening
340  * would correspond to CoarsenRate = 3.
341  * LayerId Array from 0 to Ntotal-1 + Ghost. LayerId(BlkRow) gives the
342  * layer number associated with the dofs within BlkRow.
343  * VertLineId Array from 1 to Ntotal, VertLineId(BlkRow) gives a unique
344  * vertical line id (from 0 to Ntotal/nz-1) of BlkRow. All
345  * BlkRows associated with nodes along the same vertical line
346  * in the mesh should have the same LineId.
347  * DofsPerNode Number of degrees-of-freedom per mesh node.
348  *
349  * OrigARowPtr, CSR arrays corresponding to the fine level matrix.
350  * OrigAcols,
351  * OrigAvals
352  *
353  * Output
354  * =====
355  * ParamPptr, CSR arrays corresponding to the final prolongation matrix.
356  * ParamPcols,
357  * ParamsPvals
358  */
359  int NLayers, NVertLines, MaxNnz, NCLayers, MyLine, MyLayer;
360  int *InvLineLayer = NULL, *CptLayers = NULL, StartLayer, NStencilNodes;
361  int BlkRow = -1, dof_j, node_k, *Sub2FullMap = NULL, RowLeng;
362  int i, j, iii, col, count, index, loc, PtRow, PtCol;
363  SC *BandSol = NULL, *BandMat = NULL, TheSum, *RestrictBandMat = NULL, *RestrictBandSol = NULL;
364  int *IPIV = NULL, KL, KU, KLU, N, NRHS, LDAB, INFO;
365  int* Pcols;
366  size_t* Pptr;
367  SC* Pvals;
368  LO* Rcols = NULL;
369  size_t* Rptr = NULL;
370  SC* Rvals = NULL;
371  int MaxStencilSize, MaxNnzPerRow;
372  LO* LayDiff = NULL;
373  int CurRow, LastGuy = -1, NewPtr, RLastGuy = -1;
374  int Ndofs;
375  int Nghost;
376  LO *Layerdofs = NULL, *Col2Dof = NULL;
377 
379 
380  char notrans[3];
381  notrans[0] = 'N';
382  notrans[1] = 'N';
383  char trans[3];
384  trans[0] = 'T';
385  trans[1] = 'T';
386 
387  MaxNnzPerRow = MaxHorNeighborNodes * DofsPerNode * 3;
388  Teuchos::ArrayRCP<LO> TLayDiff = Teuchos::arcp<LO>(1 + MaxNnzPerRow);
389  LayDiff = TLayDiff.getRawPtr();
390 
391  Nghost = Amat->getColMap()->getLocalNumElements() - Amat->getDomainMap()->getLocalNumElements();
392  if (Nghost < 0) Nghost = 0;
393  Teuchos::ArrayRCP<LO> TLayerdofs = Teuchos::arcp<LO>(Ntotal * DofsPerNode + Nghost + 1);
394  Layerdofs = TLayerdofs.getRawPtr();
395  Teuchos::ArrayRCP<LO> TCol2Dof = Teuchos::arcp<LO>(Ntotal * DofsPerNode + Nghost + 1);
396  Col2Dof = TCol2Dof.getRawPtr();
397 
400  ArrayRCP<LO> valptr = localdtemp->getDataNonConst(0);
401 
402  for (i = 0; i < Ntotal * DofsPerNode; i++)
403  valptr[i] = LayerId[i / DofsPerNode];
404  valptr = ArrayRCP<LO>();
405 
406  RCP<const Import> importer;
407  importer = Amat->getCrsGraph()->getImporter();
408  if (importer == Teuchos::null) {
409  importer = ImportFactory::Build(Amat->getDomainMap(), Amat->getColMap());
410  }
411  dtemp->doImport(*localdtemp, *(importer), Xpetra::INSERT);
412 
413  valptr = dtemp->getDataNonConst(0);
414  for (i = 0; i < Ntotal * DofsPerNode + Nghost; i++) Layerdofs[i] = valptr[i];
415  valptr = localdtemp->getDataNonConst(0);
416  for (i = 0; i < Ntotal * DofsPerNode; i++) valptr[i] = i % DofsPerNode;
417  valptr = ArrayRCP<LO>();
418  dtemp->doImport(*localdtemp, *(importer), Xpetra::INSERT);
419 
420  valptr = dtemp->getDataNonConst(0);
421  for (i = 0; i < Ntotal * DofsPerNode + Nghost; i++) Col2Dof[i] = valptr[i];
422  valptr = ArrayRCP<LO>();
423 
424  if (Ntotal != 0) {
425  NLayers = LayerId[0];
426  NVertLines = VertLineId[0];
427  } else {
428  NLayers = -1;
429  NVertLines = -1;
430  }
431 
432  for (i = 1; i < Ntotal; i++) {
433  if (VertLineId[i] > NVertLines) NVertLines = VertLineId[i];
434  if (LayerId[i] > NLayers) NLayers = LayerId[i];
435  }
436  NLayers++;
437  NVertLines++;
438 
439  /*
440  * Make an inverse map so that we can quickly find the dof
441  * associated with a particular vertical line and layer.
442  */
443 
444  Teuchos::ArrayRCP<LO> TInvLineLayer = Teuchos::arcp<LO>(1 + NVertLines * NLayers);
445  InvLineLayer = TInvLineLayer.getRawPtr();
446  for (i = 0; i < Ntotal; i++) {
447  InvLineLayer[VertLineId[i] + 1 + LayerId[i] * NVertLines] = i;
448  }
449 
450  /*
451  * Determine coarse layers where injection will be applied.
452  */
453 
454  Teuchos::ArrayRCP<LO> TCptLayers = Teuchos::arcp<LO>(nz + 1);
455  CptLayers = TCptLayers.getRawPtr();
456  NCLayers = FindCpts(nz, CoarsenRate, 0, &CptLayers);
457 
458  /*
459  * Compute the largest possible interpolation stencil width based
460  * on the location of the Clayers. This stencil width is actually
461  * nodal (i.e. assuming 1 dof/node). To get the true max stencil width
462  * one needs to multiply this by DofsPerNode.
463  */
464 
465  if (NCLayers < 2)
466  MaxStencilSize = nz;
467  else
468  MaxStencilSize = CptLayers[2];
469 
470  for (i = 3; i <= NCLayers; i++) {
471  if (MaxStencilSize < CptLayers[i] - CptLayers[i - 2])
472  MaxStencilSize = CptLayers[i] - CptLayers[i - 2];
473  }
474  if (NCLayers > 1) {
475  if (MaxStencilSize < nz - CptLayers[NCLayers - 1] + 1)
476  MaxStencilSize = nz - CptLayers[NCLayers - 1] + 1;
477  }
478 
479  /*
480  * Allocate storage associated with solving a banded sub-matrix needed to
481  * determine the interpolation stencil. Note: we compute interpolation stencils
482  * for all dofs within a node at the same time, and so the banded solution
483  * must be large enough to hold all DofsPerNode simultaneously.
484  */
485 
486  Teuchos::ArrayRCP<LO> TSub2FullMap = Teuchos::arcp<LO>((MaxStencilSize + 1) * DofsPerNode);
487  Sub2FullMap = TSub2FullMap.getRawPtr();
488  Teuchos::ArrayRCP<SC> TBandSol = Teuchos::arcp<SC>((MaxStencilSize + 1) * DofsPerNode * DofsPerNode);
489  BandSol = TBandSol.getRawPtr();
490  Teuchos::ArrayRCP<SC> TResBandSol;
491  if (buildRestriction) {
492  TResBandSol = Teuchos::arcp<SC>((MaxStencilSize + 1) * DofsPerNode * DofsPerNode);
493  RestrictBandSol = TResBandSol.getRawPtr();
494  }
495  /*
496  * Lapack variables. See comments for dgbsv().
497  */
498  KL = 2 * DofsPerNode - 1;
499  KU = 2 * DofsPerNode - 1;
500  KLU = KL + KU;
501  LDAB = 2 * KL + KU + 1;
502  NRHS = DofsPerNode;
503  Teuchos::ArrayRCP<SC> TBandMat = Teuchos::arcp<SC>(LDAB * MaxStencilSize * DofsPerNode + 1);
504  BandMat = TBandMat.getRawPtr();
505  Teuchos::ArrayRCP<LO> TIPIV = Teuchos::arcp<LO>((MaxStencilSize + 1) * DofsPerNode);
506  IPIV = TIPIV.getRawPtr();
507 
508  Teuchos::ArrayRCP<SC> TResBandMat;
509  if (buildRestriction) {
510  TResBandMat = Teuchos::arcp<SC>(LDAB * MaxStencilSize * DofsPerNode + 1);
511  RestrictBandMat = TResBandMat.getRawPtr();
512  }
513 
514  /*
515  * Allocate storage for the final interpolation matrix. Note: each prolongator
516  * row might have entries corresponding to at most two nodes.
517  * Note: the total fine level dofs equals DofsPerNode*Ntotal and the max
518  * nnz per prolongator row is DofsPerNode*2.
519  */
520 
521  Ndofs = DofsPerNode * Ntotal;
522  MaxNnz = 2 * DofsPerNode * Ndofs;
523 
524  RCP<const Map> rowMap = Amat->getRowMap();
525  Xpetra::global_size_t GNdofs = rowMap->getGlobalNumElements();
526 
527  std::vector<size_t> stridingInfo_;
528  stridingInfo_.push_back(DofsPerNode);
529 
530  Xpetra::global_size_t itemp = GNdofs / nz;
531  coarseMap = StridedMapFactory::Build(rowMap->lib(),
532  NCLayers * itemp,
533  NCLayers * NVertLines * DofsPerNode,
534  0, /* index base */
535  stridingInfo_,
536  rowMap->getComm(),
537  -1, /* strided block id */
538  0 /* domain gid offset */);
539 
540  // coarseMap = MapFactory::createContigMapWithNode(rowMap->lib(),(NCLayers*GNdofs)/nz, NCLayers*NVertLines*DofsPerNode,(rowMap->getComm()), rowMap->getNode());
541 
542  P = rcp(new CrsMatrixWrap(rowMap, coarseMap, 0));
543  coarseNullspace = MultiVectorFactory::Build(coarseMap, fineNullspace->getNumVectors());
544 
545  Teuchos::ArrayRCP<SC> TPvals = Teuchos::arcp<SC>(1 + MaxNnz);
546  Pvals = TPvals.getRawPtr();
547  Teuchos::ArrayRCP<size_t> TPptr = Teuchos::arcp<size_t>(DofsPerNode * (2 + Ntotal));
548  Pptr = TPptr.getRawPtr();
549  Teuchos::ArrayRCP<LO> TPcols = Teuchos::arcp<LO>(1 + MaxNnz);
550  Pcols = TPcols.getRawPtr();
551 
552  TEUCHOS_TEST_FOR_EXCEPTION(Pcols == NULL, Exceptions::RuntimeError, "MakeSemiCoarsenP: Not enough space \n");
553  Pptr[0] = 0;
554  Pptr[1] = 0;
555 
556  Teuchos::ArrayRCP<SC> TRvals;
558  Teuchos::ArrayRCP<LO> TRcols;
559  LO RmaxNnz = -1, RmaxPerRow = -1;
560  if (buildRestriction) {
561  RmaxPerRow = ((LO)ceil(((double)(MaxNnz + 7)) / ((double)(coarseMap->getLocalNumElements()))));
562  RmaxNnz = RmaxPerRow * coarseMap->getLocalNumElements();
563  TRvals = Teuchos::arcp<SC>(1 + RmaxNnz);
564  Rvals = TRvals.getRawPtr();
565  TRptr = Teuchos::arcp<size_t>((2 + coarseMap->getLocalNumElements()));
566  Rptr = TRptr.getRawPtr();
567  TRcols = Teuchos::arcp<LO>(1 + RmaxNnz);
568  Rcols = TRcols.getRawPtr();
569  TEUCHOS_TEST_FOR_EXCEPTION(Rcols == NULL, Exceptions::RuntimeError, "MakeSemiCoarsenP: Not enough space \n");
570  Rptr[0] = 0;
571  Rptr[1] = 0;
572  }
573  /*
574  * Setup P's rowptr as if each row had its maximum of 2*DofsPerNode nonzeros.
575  * This will be useful while filling up P, and then later we will squeeze out
576  * the unused nonzeros locations.
577  */
578 
579  for (i = 1; i <= MaxNnz; i++) Pcols[i] = -1; /* mark all entries as unused */
580  count = 1;
581  for (i = 1; i <= DofsPerNode * Ntotal + 1; i++) {
582  Pptr[i] = count;
583  count += (2 * DofsPerNode);
584  }
585  if (buildRestriction) {
586  for (i = 1; i <= RmaxNnz; i++) Rcols[i] = -1; /* mark all entries as unused */
587  count = 1;
588  for (i = 1; i <= (int)(coarseMap->getLocalNumElements() + 1); i++) {
589  Rptr[i] = count;
590  count += RmaxPerRow;
591  }
592  }
593 
594  /*
595  * Build P column by column. The 1st block column corresponds to the 1st coarse
596  * layer and the first line. The 2nd block column corresponds to the 2nd coarse
597  * layer and the first line. The NCLayers+1 block column corresponds to the
598  * 1st coarse layer and the 2nd line, etc.
599  */
600 
601  col = 0;
602  for (MyLine = 1; MyLine <= NVertLines; MyLine += 1) {
603  for (iii = 1; iii <= NCLayers; iii += 1) {
604  col = col + 1;
605  MyLayer = CptLayers[iii];
606 
607  /*
608  * StartLayer gives the layer number of the lowest layer that
609  * is nonzero in the interpolation stencil that is currently
610  * being computed. Normally, if we are not near a boundary this
611  * is simply CptsLayers[iii-1]+1.
612  *
613  * NStencilNodes indicates the number of nonzero nodes in the
614  * interpolation stencil that is currently being computed. Normally,
615  * if we are not near a boundary this is CptLayers[iii+1]-StartLayer.
616  */
617 
618  if (iii != 1)
619  StartLayer = CptLayers[iii - 1] + 1;
620  else
621  StartLayer = 1;
622 
623  if (iii != NCLayers)
624  NStencilNodes = CptLayers[iii + 1] - StartLayer;
625  else
626  NStencilNodes = NLayers - StartLayer + 1;
627 
628  N = NStencilNodes * DofsPerNode;
629 
630  /*
631  * dgbsv() does not require that the first KL rows be initialized,
632  * so we could avoid zeroing out some entries?
633  */
634 
635  for (i = 0; i < NStencilNodes * DofsPerNode * DofsPerNode; i++) BandSol[i] = 0.0;
636  for (i = 0; i < LDAB * N; i++) BandMat[i] = 0.0;
637  if (buildRestriction) {
638  for (i = 0; i < NStencilNodes * DofsPerNode * DofsPerNode; i++) RestrictBandSol[i] = 0.0;
639  for (i = 0; i < LDAB * N; i++) RestrictBandMat[i] = 0.0;
640  }
641 
642  /*
643  * Fill BandMat and BandSol (which is initially the rhs) for each
644  * node in the interpolation stencil that is being computed.
645  */
646 
647  if (!doLinear) {
648  for (node_k = 1; node_k <= NStencilNodes; node_k++) {
649  /* Map a Line and Layer number to a BlkRow in the fine level matrix
650  * and record the mapping from the sub-system to the BlkRow of the
651  * fine level matrix.
652  */
653  BlkRow = InvLineLayer[MyLine + (StartLayer + node_k - 2) * NVertLines] + 1;
654  Sub2FullMap[node_k] = BlkRow;
655 
656  /* Two cases:
657  * 1) the current layer is not a Cpoint layer. In this case we
658  * want to basically stick the matrix couplings to other
659  * nonzero stencil rows into the band matrix. One way to do
660  * this is to include couplings associated with only MyLine
661  * and ignore all the other couplings. However, what we do
662  * instead is to sum all the coupling at each layer participating
663  * in this interpolation stencil and stick this sum into BandMat.
664  * 2) the current layer is a Cpoint layer and so we
665  * stick an identity block in BandMat and rhs.
666  */
667  for (int dof_i = 0; dof_i < DofsPerNode; dof_i++) {
668  j = (BlkRow - 1) * DofsPerNode + dof_i;
669  ArrayView<const LO> AAcols;
670  ArrayView<const SC> AAvals;
671  Amat->getLocalRowView(j, AAcols, AAvals);
672  const int* Acols = AAcols.getRawPtr();
673  const SC* Avals = AAvals.getRawPtr();
674  RowLeng = AAvals.size();
675 
676  TEUCHOS_TEST_FOR_EXCEPTION(RowLeng >= MaxNnzPerRow, Exceptions::RuntimeError, "MakeSemiCoarsenP: recompile with larger Max(HorNeighborNodes)\n");
677 
678  for (i = 0; i < RowLeng; i++) {
679  LayDiff[i] = Layerdofs[Acols[i]] - StartLayer - node_k + 2;
680 
681  /* This is the main spot where there might be off- */
682  /* processor communication. That is, when we */
683  /* average the stencil in the horizontal direction,*/
684  /* we need to know the layer id of some */
685  /* neighbors that might reside off-processor. */
686  }
687  PtRow = (node_k - 1) * DofsPerNode + dof_i + 1;
688  for (dof_j = 0; dof_j < DofsPerNode; dof_j++) {
689  PtCol = (node_k - 1) * DofsPerNode + dof_j + 1;
690  /* Stick in entry corresponding to Mat(PtRow,PtCol) */
691  /* see dgbsv() comments for matrix format. */
692  TheSum = 0.0;
693  for (i = 0; i < RowLeng; i++) {
694  if ((LayDiff[i] == 0) && (Col2Dof[Acols[i]] == dof_j))
695  TheSum += Avals[i];
696  }
697  index = LDAB * (PtCol - 1) + KLU + PtRow - PtCol;
698  BandMat[index] = TheSum;
699  if (buildRestriction) RestrictBandMat[index] = TheSum;
700  if (node_k != NStencilNodes) {
701  /* Stick Mat(PtRow,PtCol+DofsPerNode) entry */
702  /* see dgbsv() comments for matrix format. */
703  TheSum = 0.0;
704  for (i = 0; i < RowLeng; i++) {
705  if ((LayDiff[i] == 1) && (Col2Dof[Acols[i]] == dof_j))
706  TheSum += Avals[i];
707  }
708  j = PtCol + DofsPerNode;
709  index = LDAB * (j - 1) + KLU + PtRow - j;
710  BandMat[index] = TheSum;
711  if (buildRestriction) RestrictBandMat[index] = TheSum;
712  }
713  if (node_k != 1) {
714  /* Stick Mat(PtRow,PtCol-DofsPerNode) entry */
715  /* see dgbsv() comments for matrix format. */
716  TheSum = 0.0;
717  for (i = 0; i < RowLeng; i++) {
718  if ((LayDiff[i] == -1) && (Col2Dof[Acols[i]] == dof_j))
719  TheSum += Avals[i];
720  }
721  j = PtCol - DofsPerNode;
722  index = LDAB * (j - 1) + KLU + PtRow - j;
723  BandMat[index] = TheSum;
724  if (buildRestriction) RestrictBandMat[index] = TheSum;
725  }
726  }
727  }
728  }
729  node_k = MyLayer - StartLayer + 1;
730  for (int dof_i = 0; dof_i < DofsPerNode; dof_i++) {
731  /* Stick Mat(PtRow,PtRow) and Rhs(PtRow,dof_i+1) */
732  /* see dgbsv() comments for matrix format. */
733  PtRow = (node_k - 1) * DofsPerNode + dof_i + 1;
734  BandSol[(dof_i)*DofsPerNode * NStencilNodes + PtRow - 1] = 1.;
735  if (buildRestriction) RestrictBandSol[(dof_i)*DofsPerNode * NStencilNodes + PtRow - 1] = 1.;
736 
737  for (dof_j = 0; dof_j < DofsPerNode; dof_j++) {
738  PtCol = (node_k - 1) * DofsPerNode + dof_j + 1;
739  index = LDAB * (PtCol - 1) + KLU + PtRow - PtCol;
740  if (PtCol == PtRow)
741  BandMat[index] = 1.0;
742  else
743  BandMat[index] = 0.0;
744  if (buildRestriction) {
745  index = LDAB * (PtRow - 1) + KLU + PtCol - PtRow;
746  if (PtCol == PtRow)
747  RestrictBandMat[index] = 1.0;
748  else
749  RestrictBandMat[index] = 0.0;
750  }
751  if (node_k != NStencilNodes) {
752  PtCol = (node_k)*DofsPerNode + dof_j + 1;
753  index = LDAB * (PtCol - 1) + KLU + PtRow - PtCol;
754  BandMat[index] = 0.0;
755  if (buildRestriction) {
756  index = LDAB * (PtRow - 1) + KLU + PtCol - PtRow;
757  RestrictBandMat[index] = 0.0;
758  }
759  }
760  if (node_k != 1) {
761  PtCol = (node_k - 2) * DofsPerNode + dof_j + 1;
762  index = LDAB * (PtCol - 1) + KLU + PtRow - PtCol;
763  BandMat[index] = 0.0;
764  if (buildRestriction) {
765  index = LDAB * (PtRow - 1) + KLU + PtCol - PtRow;
766  RestrictBandMat[index] = 0.0;
767  }
768  }
769  }
770  }
771 
772  /* Solve banded system and then stick result in Pmatrix arrays */
773 
774  lapack.GBTRF(N, N, KL, KU, BandMat, LDAB, IPIV, &INFO);
775 
776  TEUCHOS_TEST_FOR_EXCEPTION(INFO != 0, Exceptions::RuntimeError, "Lapack band factorization failed");
777 
778  lapack.GBTRS(notrans[0], N, KL, KU, NRHS, BandMat, LDAB, IPIV,
779  BandSol, N, &INFO);
780 
781  TEUCHOS_TEST_FOR_EXCEPTION(INFO != 0, Exceptions::RuntimeError, "Lapack band solve back substitution failed");
782 
783  for (dof_j = 0; dof_j < DofsPerNode; dof_j++) {
784  for (int dof_i = 0; dof_i < DofsPerNode; dof_i++) {
785  for (i = 1; i <= NStencilNodes; i++) {
786  index = (Sub2FullMap[i] - 1) * DofsPerNode + dof_i + 1;
787  loc = Pptr[index];
788  Pcols[loc] = (col - 1) * DofsPerNode + dof_j + 1;
789  Pvals[loc] = BandSol[dof_j * DofsPerNode * NStencilNodes +
790  (i - 1) * DofsPerNode + dof_i];
791  Pptr[index] = Pptr[index] + 1;
792  }
793  }
794  }
795  if (buildRestriction) {
796  lapack.GBTRF(N, N, KL, KU, RestrictBandMat, LDAB, IPIV, &INFO);
797  TEUCHOS_TEST_FOR_EXCEPTION(INFO != 0, Exceptions::RuntimeError, "Lapack band factorization failed");
798  lapack.GBTRS(trans[0], N, KL, KU, NRHS, RestrictBandMat, LDAB, IPIV, RestrictBandSol, N, &INFO);
799  TEUCHOS_TEST_FOR_EXCEPTION(INFO != 0, Exceptions::RuntimeError, "Lapack band solve back substitution failed");
800  for (dof_j = 0; dof_j < DofsPerNode; dof_j++) {
801  for (int dof_i = 0; dof_i < DofsPerNode; dof_i++) {
802  for (i = 1; i <= NStencilNodes; i++) {
803  index = (col - 1) * DofsPerNode + dof_j + 1;
804  loc = Rptr[index];
805  Rcols[loc] = (Sub2FullMap[i] - 1) * DofsPerNode + dof_i + 1;
806  Rvals[loc] = RestrictBandSol[dof_j * DofsPerNode * NStencilNodes +
807  (i - 1) * DofsPerNode + dof_i];
808  Rptr[index] = Rptr[index] + 1;
809  }
810  }
811  }
812  }
813  } else {
814  int denom1 = MyLayer - StartLayer + 1;
815  int denom2 = StartLayer + NStencilNodes - MyLayer;
816  for (int dof_i = 0; dof_i < DofsPerNode; dof_i++) {
817  for (i = 1; i <= NStencilNodes; i++) {
818  index = (InvLineLayer[MyLine + (StartLayer + i - 2) * NVertLines]) * DofsPerNode + dof_i + 1;
819  loc = Pptr[index];
820  Pcols[loc] = (col - 1) * DofsPerNode + dof_i + 1;
821  if (i > denom1)
822  Pvals[loc] = 1.0 + ((double)(denom1 - i)) / ((double)denom2);
823  else
824  Pvals[loc] = ((double)(i)) / ((double)denom1);
825  Pptr[index] = Pptr[index] + 1;
826  }
827  }
828  }
829  /* inject the null space */
830  // int FineStride = Ntotal*DofsPerNode;
831  // int CoarseStride= NVertLines*NCLayers*DofsPerNode;
832 
833  BlkRow = InvLineLayer[MyLine + (MyLayer - 1) * NVertLines] + 1;
834  for (int k = 0; k < static_cast<int>(fineNullspace->getNumVectors()); k++) {
835  Teuchos::ArrayRCP<SC> OneCNull = coarseNullspace->getDataNonConst(k);
836  Teuchos::ArrayRCP<SC> OneFNull = fineNullspace->getDataNonConst(k);
837  for (int dof_i = 0; dof_i < DofsPerNode; dof_i++) {
838  OneCNull[(col - 1) * DofsPerNode + dof_i] = OneFNull[(BlkRow - 1) * DofsPerNode + dof_i];
839  }
840  }
841  }
842  }
843 
844  /*
845  * Squeeze the -1's out of the columns. At the same time convert Pcols
846  * so that now the first column is numbered '0' as opposed to '1'.
847  * Also, the arrays Pcols and Pvals should now use the zeroth element
848  * as opposed to just starting with the first element. Pptr will be
849  * fixed in the for loop below so that Pptr[0] = 0, etc.
850  */
851  CurRow = 1;
852  NewPtr = 1;
853  for (size_t ii = 1; ii <= Pptr[Ntotal * DofsPerNode] - 1; ii++) {
854  if (ii == Pptr[CurRow]) {
855  Pptr[CurRow] = LastGuy;
856  CurRow++;
857  while (ii > Pptr[CurRow]) {
858  Pptr[CurRow] = LastGuy;
859  CurRow++;
860  }
861  }
862  if (Pcols[ii] != -1) {
863  Pcols[NewPtr - 1] = Pcols[ii] - 1; /* these -1's fix the offset and */
864  Pvals[NewPtr - 1] = Pvals[ii]; /* start using the zeroth element */
865  LastGuy = NewPtr;
866  NewPtr++;
867  }
868  }
869  for (i = CurRow; i <= Ntotal * DofsPerNode; i++) Pptr[CurRow] = LastGuy;
870 
871  /* Now move the pointers so that they now point to the beginning of each
872  * row as opposed to the end of each row
873  */
874  for (i = -Ntotal * DofsPerNode + 1; i >= 2; i--) {
875  Pptr[i - 1] = Pptr[i - 2]; /* extra -1 added to start from 0 */
876  }
877  Pptr[0] = 0;
878 
879  // do the same for R
880  if (buildRestriction) {
881  CurRow = 1;
882  NewPtr = 1;
883  for (size_t ii = 1; ii <= Rptr[coarseMap->getLocalNumElements()] - 1; ii++) {
884  if (ii == Rptr[CurRow]) {
885  Rptr[CurRow] = RLastGuy;
886  CurRow++;
887  while (ii > Rptr[CurRow]) {
888  Rptr[CurRow] = RLastGuy;
889  CurRow++;
890  }
891  }
892  if (Rcols[ii] != -1) {
893  Rcols[NewPtr - 1] = Rcols[ii] - 1; /* these -1's fix the offset and */
894  Rvals[NewPtr - 1] = Rvals[ii]; /* start using the zeroth element */
895  RLastGuy = NewPtr;
896  NewPtr++;
897  }
898  }
899  for (i = CurRow; i <= ((int)coarseMap->getLocalNumElements()); i++) Rptr[CurRow] = RLastGuy;
900  for (i = -coarseMap->getLocalNumElements() + 1; i >= 2; i--) {
901  Rptr[i - 1] = Rptr[i - 2]; /* extra -1 added to start from 0 */
902  }
903  Rptr[0] = 0;
904  }
905  ArrayRCP<size_t> rcpRowPtr;
906  ArrayRCP<LO> rcpColumns;
907  ArrayRCP<SC> rcpValues;
908 
909  RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
910  LO nnz = Pptr[Ndofs];
911  PCrs->allocateAllValues(nnz, rcpRowPtr, rcpColumns, rcpValues);
912 
913  ArrayView<size_t> rowPtr = rcpRowPtr();
914  ArrayView<LO> columns = rcpColumns();
915  ArrayView<SC> values = rcpValues();
916 
917  // copy data over
918 
919  for (LO ii = 0; ii <= Ndofs; ii++) rowPtr[ii] = Pptr[ii];
920  for (LO ii = 0; ii < nnz; ii++) columns[ii] = Pcols[ii];
921  for (LO ii = 0; ii < nnz; ii++) values[ii] = Pvals[ii];
922  PCrs->setAllValues(rcpRowPtr, rcpColumns, rcpValues);
923  PCrs->expertStaticFillComplete(coarseMap, Amat->getDomainMap());
924  ArrayRCP<size_t> RrcpRowPtr;
925  ArrayRCP<LO> RrcpColumns;
926  ArrayRCP<SC> RrcpValues;
927  RCP<CrsMatrix> RCrs;
928  if (buildRestriction) {
929  R = rcp(new CrsMatrixWrap(coarseMap, rowMap, 0));
930  RCrs = rcp_dynamic_cast<CrsMatrixWrap>(R)->getCrsMatrix();
931  nnz = Rptr[coarseMap->getLocalNumElements()];
932  RCrs->allocateAllValues(nnz, RrcpRowPtr, RrcpColumns, RrcpValues);
933 
934  ArrayView<size_t> RrowPtr = RrcpRowPtr();
935  ArrayView<LO> Rcolumns = RrcpColumns();
936  ArrayView<SC> Rvalues = RrcpValues();
937 
938  // copy data over
939 
940  for (LO ii = 0; ii <= ((LO)coarseMap->getLocalNumElements()); ii++) RrowPtr[ii] = Rptr[ii];
941  for (LO ii = 0; ii < nnz; ii++) Rcolumns[ii] = Rcols[ii];
942  for (LO ii = 0; ii < nnz; ii++) Rvalues[ii] = Rvals[ii];
943  RCrs->setAllValues(RrcpRowPtr, RrcpColumns, RrcpValues);
944  RCrs->expertStaticFillComplete(Amat->getRangeMap(), coarseMap);
945  }
946 
947  return NCLayers * NVertLines * DofsPerNode;
948 }
949 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
951  // This function is a bit of a hack. We basically compute the semi-coarsening transfers and then throw
952  // away all the interpolation coefficients, instead replacing them by piecewise constants. The reason for this
953  // is that SemiCoarsening has no notion of aggregates so defining piecewise constants in the "usual way" is
954  // not possible. Instead, we look for the largest entry in each row, make it a one, and zero out the other
955  // non-zero entries
956 
959  ArrayView<Scalar> vals;
962 
963  for (size_t i = 0; i < P->getRowMap()->getLocalNumElements(); i++) {
964  P->getLocalRowView(i, inds, vals1);
965 
966  size_t nnz = inds.size();
967  if (nnz != 0) vals = ArrayView<Scalar>(const_cast<Scalar*>(vals1.getRawPtr()), nnz);
968 
969  LO largestIndex = -1;
970  Scalar largestValue = ZERO;
971  /* find largest value in row and change that one to a 1 while the others are set to 0 */
972 
973  LO rowDof = i % BlkSize;
974  for (size_t j = 0; j < nnz; j++) {
976  if (inds[j] % BlkSize == rowDof) {
977  largestValue = vals[j];
978  largestIndex = (int)j;
979  }
980  }
981  vals[j] = ZERO;
982  }
983  if (largestIndex != -1)
984  vals[largestIndex] = ONE;
985  else
986  TEUCHOS_TEST_FOR_EXCEPTION(nnz > 0, Exceptions::RuntimeError, "no nonzero column associated with a proper dof within node.");
987 
988  if (Teuchos::ScalarTraits<SC>::magnitude(largestValue) == Teuchos::ScalarTraits<SC>::magnitude(ZERO)) vals[largestIndex] = ZERO;
989  }
990 }
991 
992 } // namespace MueLu
993 
994 #define MUELU_SEMICOARSENPFACTORY_SHORT
995 #endif // MUELU_SEMICOARSENPFACTORY_DEF_HPP
MueLu::DefaultLocalOrdinal LocalOrdinal
GlobalOrdinal GO
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
T * getRawPtr() const
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
size_type size() const
void GBTRF(const OrdinalType &m, const OrdinalType &n, const OrdinalType &kl, const OrdinalType &ku, ScalarType *A, const OrdinalType &lda, OrdinalType *IPIV, OrdinalType *info) const
LocalOrdinal LO
T * get() const
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
LO MakeSemiCoarsenP(LO const Ntotal, LO const nz, LO const CoarsenRate, LO const LayerId[], LO const VertLineId[], LO const DofsPerNode, RCP< Matrix > &Amat, RCP< Matrix > &P, RCP< const Map > &coarseMap, const RCP< MultiVector > fineNullspace, RCP< MultiVector > &coarseNullspace, RCP< Matrix > &R, bool buildRestriction, bool doLinear) const
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
static const NoFactory * get()
void RevertToPieceWiseConstant(RCP< Matrix > &P, LO BlkSize) const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultScalar Scalar
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:63
LO FindCpts(LO const PtsPerLine, LO const CoarsenRate, LO const Thin, LO **LayerCpts) const
T * getRawPtr() const
size_t global_size_t
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
static RCP< Vector > Build(const Teuchos::RCP< const Map > &map, bool zeroOut=true)
void GBTRS(const char &TRANS, const OrdinalType &n, const OrdinalType &kl, const OrdinalType &ku, const OrdinalType &nrhs, const ScalarType *A, const OrdinalType &lda, const OrdinalType *IPIV, ScalarType *B, const OrdinalType &ldb, OrdinalType *info) const
#define SET_VALID_ENTRY(name)
#define MaxHorNeighborNodes
Scalar SC
const RCP< const FactoryManagerBase > GetFactoryManager()
returns the current factory manager
Definition: MueLu_Level.cpp:73
Node NO
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:51
Exception throws to report errors in the internal logical of the program.
iterator end() const
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
iterator begin() const
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need&#39;s value has been saved.