MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_SemiCoarsenPFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_SEMICOARSENPFACTORY_DEF_HPP
47 #define MUELU_SEMICOARSENPFACTORY_DEF_HPP
48 
49 #include <stdlib.h>
50 
51 #include <Teuchos_LAPACK.hpp>
52 
53 #include <Xpetra_CrsMatrixWrap.hpp>
54 #include <Xpetra_ImportFactory.hpp>
55 #include <Xpetra_Matrix.hpp>
56 #include <Xpetra_MultiVectorFactory.hpp>
57 #include <Xpetra_VectorFactory.hpp>
58 
61 
62 #include "MueLu_MasterList.hpp"
63 #include "MueLu_Monitor.hpp"
64 
65 namespace MueLu {
66 
67 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
69  RCP<ParameterList> validParamList = rcp(new ParameterList());
70 
71 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
72  SET_VALID_ENTRY("semicoarsen: piecewise constant");
73  SET_VALID_ENTRY("semicoarsen: piecewise linear");
74  SET_VALID_ENTRY("semicoarsen: coarsen rate");
75  SET_VALID_ENTRY("semicoarsen: calculate nonsym restriction");
76 #undef SET_VALID_ENTRY
77  validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
78  validParamList->set<RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
79  validParamList->set<RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory for coordinates");
80 
81  validParamList->set<RCP<const FactoryBase> >("LineDetection_VertLineIds", Teuchos::null, "Generating factory for LineDetection vertical line ids");
82  validParamList->set<RCP<const FactoryBase> >("LineDetection_Layers", Teuchos::null, "Generating factory for LineDetection layer ids");
83  validParamList->set<RCP<const FactoryBase> >("CoarseNumZLayers", Teuchos::null, "Generating factory for number of coarse z-layers");
84 
85  return validParamList;
86 }
87 
88 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
90  Input(fineLevel, "A");
91  Input(fineLevel, "Nullspace");
92 
93  Input(fineLevel, "LineDetection_VertLineIds");
94  Input(fineLevel, "LineDetection_Layers");
95  Input(fineLevel, "CoarseNumZLayers");
96 
97  // check whether fine level coordinate information is available.
98  // If yes, request the fine level coordinates and generate coarse coordinates
99  // during the Build call
100  if (fineLevel.GetLevelID() == 0) {
101  if (fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
102  fineLevel.DeclareInput("Coordinates", NoFactory::get(), this);
103  bTransferCoordinates_ = true;
104  }
105  } else if (bTransferCoordinates_ == true) {
106  // on coarser levels we check the default factory providing "Coordinates"
107  // or the factory declared to provide "Coordinates"
108  // first, check which factory is providing coordinate information
109  RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
110  if (myCoordsFact == Teuchos::null) {
111  myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates");
112  }
113  if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
114  fineLevel.DeclareInput("Coordinates", myCoordsFact.get(), this);
115  }
116  }
117 }
118 
119 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
121  return BuildP(fineLevel, coarseLevel);
122 }
123 
124 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
126  FactoryMonitor m(*this, "Build", coarseLevel);
127 
128  // obtain general variables
129  RCP<Matrix> A = Get<RCP<Matrix> >(fineLevel, "A");
130  RCP<MultiVector> fineNullspace = Get<RCP<MultiVector> >(fineLevel, "Nullspace");
131 
132  // get user-provided coarsening rate parameter (constant over all levels)
133  const ParameterList& pL = GetParameterList();
134  LO CoarsenRate = as<LO>(pL.get<int>("semicoarsen: coarsen rate"));
135  bool buildRestriction = pL.get<bool>("semicoarsen: calculate nonsym restriction");
136  bool doLinear = pL.get<bool>("semicoarsen: piecewise linear");
137 
138  // collect general input data
139  LO BlkSize = A->GetFixedBlockSize();
140  RCP<const Map> rowMap = A->getRowMap();
141  LO Ndofs = rowMap->getLocalNumElements();
142  LO Nnodes = Ndofs / BlkSize;
143 
144  // collect line detection information generated by the LineDetectionFactory instance
145  LO FineNumZLayers = Get<LO>(fineLevel, "CoarseNumZLayers");
146  Teuchos::ArrayRCP<LO> TVertLineId = Get<Teuchos::ArrayRCP<LO> >(fineLevel, "LineDetection_VertLineIds");
147  Teuchos::ArrayRCP<LO> TLayerId = Get<Teuchos::ArrayRCP<LO> >(fineLevel, "LineDetection_Layers");
148  LO* VertLineId = TVertLineId.getRawPtr();
149  LO* LayerId = TLayerId.getRawPtr();
150 
151  // generate transfer operator with semicoarsening
152  RCP<const Map> theCoarseMap;
153  RCP<Matrix> P, R;
154  RCP<MultiVector> coarseNullspace;
155  GO Ncoarse = MakeSemiCoarsenP(Nnodes, FineNumZLayers, CoarsenRate, LayerId, VertLineId,
156  BlkSize, A, P, theCoarseMap, fineNullspace, coarseNullspace, R, buildRestriction, doLinear);
157 
158  // set StridingInformation of P
159  if (A->IsView("stridedMaps") == true)
160  P->CreateView("stridedMaps", A->getRowMap("stridedMaps"), theCoarseMap);
161  else
162  P->CreateView("stridedMaps", P->getRangeMap(), theCoarseMap);
163 
164  if (buildRestriction) {
165  if (A->IsView("stridedMaps") == true)
166  R->CreateView("stridedMaps", theCoarseMap, A->getRowMap("stridedMaps"));
167  else
168  R->CreateView("stridedMaps", theCoarseMap, R->getDomainMap());
169  }
170  if (pL.get<bool>("semicoarsen: piecewise constant")) {
171  TEUCHOS_TEST_FOR_EXCEPTION(buildRestriction, Exceptions::RuntimeError, "Cannot use calculate nonsym restriction with piecewise constant.");
172  RevertToPieceWiseConstant(P, BlkSize);
173  }
174  if (pL.get<bool>("semicoarsen: piecewise linear")) {
175  TEUCHOS_TEST_FOR_EXCEPTION(buildRestriction, Exceptions::RuntimeError, "Cannot use calculate nonsym restriction with piecewise linear.");
176  TEUCHOS_TEST_FOR_EXCEPTION(pL.get<bool>("semicoarsen: piecewise constant"), Exceptions::RuntimeError, "Cannot use piecewise constant with piecewise linear.");
177  }
178 
179  // Store number of coarse z-layers on the coarse level container
180  // This information is used by the LineDetectionAlgorithm
181  // TODO get rid of the NoFactory
182 
183  LO CoarseNumZLayers = FineNumZLayers * Ncoarse;
184  CoarseNumZLayers /= Ndofs;
185  coarseLevel.Set("NumZLayers", Teuchos::as<LO>(CoarseNumZLayers), MueLu::NoFactory::get());
186 
187  // store semicoarsening transfer on coarse level
188  Set(coarseLevel, "P", P);
189  if (buildRestriction) Set(coarseLevel, "RfromPfactory", R);
190 
191  Set(coarseLevel, "Nullspace", coarseNullspace);
192 
193  // transfer coordinates
194  if (bTransferCoordinates_) {
196  RCP<xdMV> fineCoords = Teuchos::null;
197  if (fineLevel.GetLevelID() == 0 &&
198  fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
199  fineCoords = fineLevel.Get<RCP<xdMV> >("Coordinates", NoFactory::get());
200  } else {
201  RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
202  if (myCoordsFact == Teuchos::null) {
203  myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates");
204  }
205  if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
206  fineCoords = fineLevel.Get<RCP<xdMV> >("Coordinates", myCoordsFact.get());
207  }
208  }
209 
210  TEUCHOS_TEST_FOR_EXCEPTION(fineCoords == Teuchos::null, Exceptions::RuntimeError, "No Coordinates found provided by the user.");
211 
212  TEUCHOS_TEST_FOR_EXCEPTION(fineCoords->getNumVectors() != 3, Exceptions::RuntimeError, "Three coordinates arrays must be supplied if line detection orientation not given.");
213  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> x = fineCoords->getDataNonConst(0);
214  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> y = fineCoords->getDataNonConst(1);
215  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> z = fineCoords->getDataNonConst(2);
216 
217  // determine the maximum and minimum z coordinate value on the current processor.
220  for (auto it = z.begin(); it != z.end(); ++it) {
221  if (*it > zval_max) zval_max = *it;
222  if (*it < zval_min) zval_min = *it;
223  }
224 
225  LO myCoarseZLayers = Teuchos::as<LO>(CoarseNumZLayers);
226 
227  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> myZLayerCoords = Teuchos::arcp<typename Teuchos::ScalarTraits<Scalar>::coordinateType>(myCoarseZLayers);
228  if (myCoarseZLayers == 1) {
229  myZLayerCoords[0] = zval_min;
230  } else {
231  typename Teuchos::ScalarTraits<Scalar>::coordinateType dz = (zval_max - zval_min) / (myCoarseZLayers - 1);
232  for (LO k = 0; k < myCoarseZLayers; ++k) {
233  myZLayerCoords[k] = k * dz;
234  }
235  }
236 
237  // Note, that the coarse level node coordinates have to be in vertical ordering according
238  // to the numbering of the vertical lines
239 
240  // number of vertical lines on current node:
241  LO numVertLines = Nnodes / FineNumZLayers;
242  LO numLocalCoarseNodes = numVertLines * myCoarseZLayers;
243 
244  RCP<const Map> coarseCoordMap =
245  MapFactory::Build(fineCoords->getMap()->lib(),
247  Teuchos::as<size_t>(numLocalCoarseNodes),
248  fineCoords->getMap()->getIndexBase(),
249  fineCoords->getMap()->getComm());
250  RCP<xdMV> coarseCoords = Xpetra::MultiVectorFactory<typename Teuchos::ScalarTraits<Scalar>::coordinateType, LO, GO, NO>::Build(coarseCoordMap, fineCoords->getNumVectors());
251  coarseCoords->putScalar(-1.0);
252  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cx = coarseCoords->getDataNonConst(0);
253  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cy = coarseCoords->getDataNonConst(1);
254  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cz = coarseCoords->getDataNonConst(2);
255 
256  // loop over all vert line indices (stop as soon as possible)
257  LO cntCoarseNodes = 0;
258  for (LO vt = 0; vt < TVertLineId.size(); ++vt) {
259  // vertical line id in *vt
260  LO curVertLineId = TVertLineId[vt];
261 
262  if (cx[curVertLineId * myCoarseZLayers] == -1.0 &&
263  cy[curVertLineId * myCoarseZLayers] == -1.0) {
264  // loop over all local myCoarseZLayers
265  for (LO n = 0; n < myCoarseZLayers; ++n) {
266  cx[curVertLineId * myCoarseZLayers + n] = x[vt];
267  cy[curVertLineId * myCoarseZLayers + n] = y[vt];
268  cz[curVertLineId * myCoarseZLayers + n] = myZLayerCoords[n];
269  }
270  cntCoarseNodes += myCoarseZLayers;
271  }
272 
273  TEUCHOS_TEST_FOR_EXCEPTION(cntCoarseNodes > numLocalCoarseNodes, Exceptions::RuntimeError, "number of coarse nodes is inconsistent.");
274  if (cntCoarseNodes == numLocalCoarseNodes) break;
275  }
276 
277  // set coarse level coordinates
278  Set(coarseLevel, "Coordinates", coarseCoords);
279  } /* end bool bTransferCoordinates */
280 }
281 
282 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
284  /*
285  * Given the number of points in the z direction (PtsPerLine) and a
286  * coarsening rate (CoarsenRate), determine which z-points will serve
287  * as Cpts and return the total number of Cpts.
288  *
289  * Input
290  * PtsPerLine: Number of fine level points in the z direction
291  *
292  * CoarsenRate: Roughly, number of Cpts = (PtsPerLine+1)/CoarsenRate - 1
293  *
294  * Thin: Must be either 0 or 1. Thin decides what to do when
295  * (PtsPerLine+1)/CoarsenRate is not an integer.
296  *
297  * Thin == 0 ==> ceil() the above fraction
298  * Thin == 1 ==> floor() the above fraction
299  *
300  * Output
301  * LayerCpts Array where LayerCpts[i] indicates that the
302  * LayerCpts[i]th fine level layer is a Cpt Layer.
303  * Note: fine level layers are assumed to be numbered starting
304  * a one.
305  */
306  typename Teuchos::ScalarTraits<Scalar>::coordinateType temp, RestStride, di;
307  LO NCpts, i;
308  LO NCLayers = -1;
309  LO FirstStride;
310 
311  temp = ((typename Teuchos::ScalarTraits<Scalar>::coordinateType)(PtsPerLine + 1)) / ((typename Teuchos::ScalarTraits<Scalar>::coordinateType)(CoarsenRate)) - 1.0;
312  if (Thin == 1)
313  NCpts = (LO)ceil(temp);
314  else
315  NCpts = (LO)floor(temp);
316 
317  TEUCHOS_TEST_FOR_EXCEPTION(PtsPerLine == 1, Exceptions::RuntimeError, "SemiCoarsenPFactory::FindCpts: cannot coarsen further.");
318 
319  if (NCpts < 1) NCpts = 1;
320 
321  FirstStride = (LO)ceil(((typename Teuchos::ScalarTraits<Scalar>::coordinateType)PtsPerLine + 1) / ((typename Teuchos::ScalarTraits<Scalar>::coordinateType)(NCpts + 1)));
322  RestStride = ((typename Teuchos::ScalarTraits<Scalar>::coordinateType)(PtsPerLine - FirstStride + 1)) / ((typename Teuchos::ScalarTraits<Scalar>::coordinateType)NCpts);
323 
324  NCLayers = (LO)floor((((typename Teuchos::ScalarTraits<Scalar>::coordinateType)(PtsPerLine - FirstStride + 1)) / RestStride) + .00001);
325 
326  TEUCHOS_TEST_FOR_EXCEPTION(NCLayers != NCpts, Exceptions::RuntimeError, "sizes do not match.");
327 
328  di = (typename Teuchos::ScalarTraits<Scalar>::coordinateType)FirstStride;
329  for (i = 1; i <= NCpts; i++) {
330  (*LayerCpts)[i] = (LO)floor(di);
331  di += RestStride;
332  }
333 
334  return (NCLayers);
335 }
336 
337 #define MaxHorNeighborNodes 75
338 
339 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
341  MakeSemiCoarsenP(LO const Ntotal, LO const nz, LO const CoarsenRate, LO const LayerId[],
342  LO const VertLineId[], LO const DofsPerNode, RCP<Matrix>& Amat, RCP<Matrix>& P,
343  RCP<const Map>& coarseMap, const RCP<MultiVector> fineNullspace,
344  RCP<MultiVector>& coarseNullspace, RCP<Matrix>& R, bool buildRestriction, bool doLinear) const {
345  /*
346  * Given a CSR matrix (OrigARowPtr, OrigAcols, OrigAvals), information
347  * describing the z-layer and vertical line (LayerId and VertLineId)
348  * of each matrix block row, a coarsening rate, and dofs/node information,
349  * construct a prolongator that coarsening to semicoarsening in the z-direction
350  * using something like an operator-dependent grid transfer. In particular,
351  * matrix stencils are collapsed to vertical lines. Thus, each vertical line
352  * gives rise to a block tridiagonal matrix. BlkRows corresponding to
353  * Cpts are replaced by identity matrices. This tridiagonal is solved
354  * to determine each interpolation basis functions. Each Blk Rhs corresponds
355  * to all zeros except at the corresponding C-pt which has an identity
356  *
357  * On termination, return the number of local prolongator columns owned by
358  * this processor.
359  *
360  * Note: This code was adapted from a matlab code where offsets/arrays
361  * start from 1. In most parts of the code, this 1 offset is kept
362  * (in some cases wasting the first element of the array). The
363  * input and output matrices of this function has been changed to
364  * have offsets/rows/columns which start from 0. LayerId[] and
365  * VertLineId[] currently start from 1.
366  *
367  * Input
368  * =====
369  * Ntotal Number of fine level Blk Rows owned by this processor
370  *
371  * nz Number of vertical layers. Note: partitioning must be done
372  * so that each processor owns an entire vertical line. This
373  * means that nz is the global number of layers, which should
374  * be equal to the local number of layers.
375  * CoarsenRate Rate of z semicoarsening. Smoothed aggregation-like coarsening
376  * would correspond to CoarsenRate = 3.
377  * LayerId Array from 0 to Ntotal-1 + Ghost. LayerId(BlkRow) gives the
378  * layer number associated with the dofs within BlkRow.
379  * VertLineId Array from 1 to Ntotal, VertLineId(BlkRow) gives a unique
380  * vertical line id (from 0 to Ntotal/nz-1) of BlkRow. All
381  * BlkRows associated with nodes along the same vertical line
382  * in the mesh should have the same LineId.
383  * DofsPerNode Number of degrees-of-freedom per mesh node.
384  *
385  * OrigARowPtr, CSR arrays corresponding to the fine level matrix.
386  * OrigAcols,
387  * OrigAvals
388  *
389  * Output
390  * =====
391  * ParamPptr, CSR arrays corresponding to the final prolongation matrix.
392  * ParamPcols,
393  * ParamsPvals
394  */
395  int NLayers, NVertLines, MaxNnz, NCLayers, MyLine, MyLayer;
396  int *InvLineLayer = NULL, *CptLayers = NULL, StartLayer, NStencilNodes;
397  int BlkRow = -1, dof_j, node_k, *Sub2FullMap = NULL, RowLeng;
398  int i, j, iii, col, count, index, loc, PtRow, PtCol;
399  SC *BandSol = NULL, *BandMat = NULL, TheSum, *RestrictBandMat = NULL, *RestrictBandSol = NULL;
400  int *IPIV = NULL, KL, KU, KLU, N, NRHS, LDAB, INFO;
401  int* Pcols;
402  size_t* Pptr;
403  SC* Pvals;
404  LO* Rcols = NULL;
405  size_t* Rptr = NULL;
406  SC* Rvals = NULL;
407  int MaxStencilSize, MaxNnzPerRow;
408  LO* LayDiff = NULL;
409  int CurRow, LastGuy = -1, NewPtr, RLastGuy = -1;
410  int Ndofs;
411  int Nghost;
412  LO *Layerdofs = NULL, *Col2Dof = NULL;
413 
415 
416  char notrans[3];
417  notrans[0] = 'N';
418  notrans[1] = 'N';
419  char trans[3];
420  trans[0] = 'T';
421  trans[1] = 'T';
422 
423  MaxNnzPerRow = MaxHorNeighborNodes * DofsPerNode * 3;
424  Teuchos::ArrayRCP<LO> TLayDiff = Teuchos::arcp<LO>(1 + MaxNnzPerRow);
425  LayDiff = TLayDiff.getRawPtr();
426 
427  Nghost = Amat->getColMap()->getLocalNumElements() - Amat->getDomainMap()->getLocalNumElements();
428  if (Nghost < 0) Nghost = 0;
429  Teuchos::ArrayRCP<LO> TLayerdofs = Teuchos::arcp<LO>(Ntotal * DofsPerNode + Nghost + 1);
430  Layerdofs = TLayerdofs.getRawPtr();
431  Teuchos::ArrayRCP<LO> TCol2Dof = Teuchos::arcp<LO>(Ntotal * DofsPerNode + Nghost + 1);
432  Col2Dof = TCol2Dof.getRawPtr();
433 
436  ArrayRCP<LO> valptr = localdtemp->getDataNonConst(0);
437 
438  for (i = 0; i < Ntotal * DofsPerNode; i++)
439  valptr[i] = LayerId[i / DofsPerNode];
440  valptr = ArrayRCP<LO>();
441 
442  RCP<const Import> importer;
443  importer = Amat->getCrsGraph()->getImporter();
444  if (importer == Teuchos::null) {
445  importer = ImportFactory::Build(Amat->getDomainMap(), Amat->getColMap());
446  }
447  dtemp->doImport(*localdtemp, *(importer), Xpetra::INSERT);
448 
449  valptr = dtemp->getDataNonConst(0);
450  for (i = 0; i < Ntotal * DofsPerNode + Nghost; i++) Layerdofs[i] = valptr[i];
451  valptr = localdtemp->getDataNonConst(0);
452  for (i = 0; i < Ntotal * DofsPerNode; i++) valptr[i] = i % DofsPerNode;
453  valptr = ArrayRCP<LO>();
454  dtemp->doImport(*localdtemp, *(importer), Xpetra::INSERT);
455 
456  valptr = dtemp->getDataNonConst(0);
457  for (i = 0; i < Ntotal * DofsPerNode + Nghost; i++) Col2Dof[i] = valptr[i];
458  valptr = ArrayRCP<LO>();
459 
460  if (Ntotal != 0) {
461  NLayers = LayerId[0];
462  NVertLines = VertLineId[0];
463  } else {
464  NLayers = -1;
465  NVertLines = -1;
466  }
467 
468  for (i = 1; i < Ntotal; i++) {
469  if (VertLineId[i] > NVertLines) NVertLines = VertLineId[i];
470  if (LayerId[i] > NLayers) NLayers = LayerId[i];
471  }
472  NLayers++;
473  NVertLines++;
474 
475  /*
476  * Make an inverse map so that we can quickly find the dof
477  * associated with a particular vertical line and layer.
478  */
479 
480  Teuchos::ArrayRCP<LO> TInvLineLayer = Teuchos::arcp<LO>(1 + NVertLines * NLayers);
481  InvLineLayer = TInvLineLayer.getRawPtr();
482  for (i = 0; i < Ntotal; i++) {
483  InvLineLayer[VertLineId[i] + 1 + LayerId[i] * NVertLines] = i;
484  }
485 
486  /*
487  * Determine coarse layers where injection will be applied.
488  */
489 
490  Teuchos::ArrayRCP<LO> TCptLayers = Teuchos::arcp<LO>(nz + 1);
491  CptLayers = TCptLayers.getRawPtr();
492  NCLayers = FindCpts(nz, CoarsenRate, 0, &CptLayers);
493 
494  /*
495  * Compute the largest possible interpolation stencil width based
496  * on the location of the Clayers. This stencil width is actually
497  * nodal (i.e. assuming 1 dof/node). To get the true max stencil width
498  * one needs to multiply this by DofsPerNode.
499  */
500 
501  if (NCLayers < 2)
502  MaxStencilSize = nz;
503  else
504  MaxStencilSize = CptLayers[2];
505 
506  for (i = 3; i <= NCLayers; i++) {
507  if (MaxStencilSize < CptLayers[i] - CptLayers[i - 2])
508  MaxStencilSize = CptLayers[i] - CptLayers[i - 2];
509  }
510  if (NCLayers > 1) {
511  if (MaxStencilSize < nz - CptLayers[NCLayers - 1] + 1)
512  MaxStencilSize = nz - CptLayers[NCLayers - 1] + 1;
513  }
514 
515  /*
516  * Allocate storage associated with solving a banded sub-matrix needed to
517  * determine the interpolation stencil. Note: we compute interpolation stencils
518  * for all dofs within a node at the same time, and so the banded solution
519  * must be large enough to hold all DofsPerNode simultaneously.
520  */
521 
522  Teuchos::ArrayRCP<LO> TSub2FullMap = Teuchos::arcp<LO>((MaxStencilSize + 1) * DofsPerNode);
523  Sub2FullMap = TSub2FullMap.getRawPtr();
524  Teuchos::ArrayRCP<SC> TBandSol = Teuchos::arcp<SC>((MaxStencilSize + 1) * DofsPerNode * DofsPerNode);
525  BandSol = TBandSol.getRawPtr();
526  Teuchos::ArrayRCP<SC> TResBandSol;
527  if (buildRestriction) {
528  TResBandSol = Teuchos::arcp<SC>((MaxStencilSize + 1) * DofsPerNode * DofsPerNode);
529  RestrictBandSol = TResBandSol.getRawPtr();
530  }
531  /*
532  * Lapack variables. See comments for dgbsv().
533  */
534  KL = 2 * DofsPerNode - 1;
535  KU = 2 * DofsPerNode - 1;
536  KLU = KL + KU;
537  LDAB = 2 * KL + KU + 1;
538  NRHS = DofsPerNode;
539  Teuchos::ArrayRCP<SC> TBandMat = Teuchos::arcp<SC>(LDAB * MaxStencilSize * DofsPerNode + 1);
540  BandMat = TBandMat.getRawPtr();
541  Teuchos::ArrayRCP<LO> TIPIV = Teuchos::arcp<LO>((MaxStencilSize + 1) * DofsPerNode);
542  IPIV = TIPIV.getRawPtr();
543 
544  Teuchos::ArrayRCP<SC> TResBandMat;
545  if (buildRestriction) {
546  TResBandMat = Teuchos::arcp<SC>(LDAB * MaxStencilSize * DofsPerNode + 1);
547  RestrictBandMat = TResBandMat.getRawPtr();
548  }
549 
550  /*
551  * Allocate storage for the final interpolation matrix. Note: each prolongator
552  * row might have entries corresponding to at most two nodes.
553  * Note: the total fine level dofs equals DofsPerNode*Ntotal and the max
554  * nnz per prolongator row is DofsPerNode*2.
555  */
556 
557  Ndofs = DofsPerNode * Ntotal;
558  MaxNnz = 2 * DofsPerNode * Ndofs;
559 
560  RCP<const Map> rowMap = Amat->getRowMap();
561  Xpetra::global_size_t GNdofs = rowMap->getGlobalNumElements();
562 
563  std::vector<size_t> stridingInfo_;
564  stridingInfo_.push_back(DofsPerNode);
565 
566  Xpetra::global_size_t itemp = GNdofs / nz;
567  coarseMap = StridedMapFactory::Build(rowMap->lib(),
568  NCLayers * itemp,
569  NCLayers * NVertLines * DofsPerNode,
570  0, /* index base */
571  stridingInfo_,
572  rowMap->getComm(),
573  -1, /* strided block id */
574  0 /* domain gid offset */);
575 
576  // coarseMap = MapFactory::createContigMapWithNode(rowMap->lib(),(NCLayers*GNdofs)/nz, NCLayers*NVertLines*DofsPerNode,(rowMap->getComm()), rowMap->getNode());
577 
578  P = rcp(new CrsMatrixWrap(rowMap, coarseMap, 0));
579  coarseNullspace = MultiVectorFactory::Build(coarseMap, fineNullspace->getNumVectors());
580 
581  Teuchos::ArrayRCP<SC> TPvals = Teuchos::arcp<SC>(1 + MaxNnz);
582  Pvals = TPvals.getRawPtr();
583  Teuchos::ArrayRCP<size_t> TPptr = Teuchos::arcp<size_t>(DofsPerNode * (2 + Ntotal));
584  Pptr = TPptr.getRawPtr();
585  Teuchos::ArrayRCP<LO> TPcols = Teuchos::arcp<LO>(1 + MaxNnz);
586  Pcols = TPcols.getRawPtr();
587 
588  TEUCHOS_TEST_FOR_EXCEPTION(Pcols == NULL, Exceptions::RuntimeError, "MakeSemiCoarsenP: Not enough space \n");
589  Pptr[0] = 0;
590  Pptr[1] = 0;
591 
592  Teuchos::ArrayRCP<SC> TRvals;
594  Teuchos::ArrayRCP<LO> TRcols;
595  LO RmaxNnz = -1, RmaxPerRow = -1;
596  if (buildRestriction) {
597  RmaxPerRow = ((LO)ceil(((double)(MaxNnz + 7)) / ((double)(coarseMap->getLocalNumElements()))));
598  RmaxNnz = RmaxPerRow * coarseMap->getLocalNumElements();
599  TRvals = Teuchos::arcp<SC>(1 + RmaxNnz);
600  Rvals = TRvals.getRawPtr();
601  TRptr = Teuchos::arcp<size_t>((2 + coarseMap->getLocalNumElements()));
602  Rptr = TRptr.getRawPtr();
603  TRcols = Teuchos::arcp<LO>(1 + RmaxNnz);
604  Rcols = TRcols.getRawPtr();
605  TEUCHOS_TEST_FOR_EXCEPTION(Rcols == NULL, Exceptions::RuntimeError, "MakeSemiCoarsenP: Not enough space \n");
606  Rptr[0] = 0;
607  Rptr[1] = 0;
608  }
609  /*
610  * Setup P's rowptr as if each row had its maximum of 2*DofsPerNode nonzeros.
611  * This will be useful while filling up P, and then later we will squeeze out
612  * the unused nonzeros locations.
613  */
614 
615  for (i = 1; i <= MaxNnz; i++) Pcols[i] = -1; /* mark all entries as unused */
616  count = 1;
617  for (i = 1; i <= DofsPerNode * Ntotal + 1; i++) {
618  Pptr[i] = count;
619  count += (2 * DofsPerNode);
620  }
621  if (buildRestriction) {
622  for (i = 1; i <= RmaxNnz; i++) Rcols[i] = -1; /* mark all entries as unused */
623  count = 1;
624  for (i = 1; i <= (int)(coarseMap->getLocalNumElements() + 1); i++) {
625  Rptr[i] = count;
626  count += RmaxPerRow;
627  }
628  }
629 
630  /*
631  * Build P column by column. The 1st block column corresponds to the 1st coarse
632  * layer and the first line. The 2nd block column corresponds to the 2nd coarse
633  * layer and the first line. The NCLayers+1 block column corresponds to the
634  * 1st coarse layer and the 2nd line, etc.
635  */
636 
637  col = 0;
638  for (MyLine = 1; MyLine <= NVertLines; MyLine += 1) {
639  for (iii = 1; iii <= NCLayers; iii += 1) {
640  col = col + 1;
641  MyLayer = CptLayers[iii];
642 
643  /*
644  * StartLayer gives the layer number of the lowest layer that
645  * is nonzero in the interpolation stencil that is currently
646  * being computed. Normally, if we are not near a boundary this
647  * is simply CptsLayers[iii-1]+1.
648  *
649  * NStencilNodes indicates the number of nonzero nodes in the
650  * interpolation stencil that is currently being computed. Normally,
651  * if we are not near a boundary this is CptLayers[iii+1]-StartLayer.
652  */
653 
654  if (iii != 1)
655  StartLayer = CptLayers[iii - 1] + 1;
656  else
657  StartLayer = 1;
658 
659  if (iii != NCLayers)
660  NStencilNodes = CptLayers[iii + 1] - StartLayer;
661  else
662  NStencilNodes = NLayers - StartLayer + 1;
663 
664  N = NStencilNodes * DofsPerNode;
665 
666  /*
667  * dgbsv() does not require that the first KL rows be initialized,
668  * so we could avoid zeroing out some entries?
669  */
670 
671  for (i = 0; i < NStencilNodes * DofsPerNode * DofsPerNode; i++) BandSol[i] = 0.0;
672  for (i = 0; i < LDAB * N; i++) BandMat[i] = 0.0;
673  if (buildRestriction) {
674  for (i = 0; i < NStencilNodes * DofsPerNode * DofsPerNode; i++) RestrictBandSol[i] = 0.0;
675  for (i = 0; i < LDAB * N; i++) RestrictBandMat[i] = 0.0;
676  }
677 
678  /*
679  * Fill BandMat and BandSol (which is initially the rhs) for each
680  * node in the interpolation stencil that is being computed.
681  */
682 
683  if (!doLinear) {
684  for (node_k = 1; node_k <= NStencilNodes; node_k++) {
685  /* Map a Line and Layer number to a BlkRow in the fine level matrix
686  * and record the mapping from the sub-system to the BlkRow of the
687  * fine level matrix.
688  */
689  BlkRow = InvLineLayer[MyLine + (StartLayer + node_k - 2) * NVertLines] + 1;
690  Sub2FullMap[node_k] = BlkRow;
691 
692  /* Two cases:
693  * 1) the current layer is not a Cpoint layer. In this case we
694  * want to basically stick the matrix couplings to other
695  * nonzero stencil rows into the band matrix. One way to do
696  * this is to include couplings associated with only MyLine
697  * and ignore all the other couplings. However, what we do
698  * instead is to sum all the coupling at each layer participating
699  * in this interpolation stencil and stick this sum into BandMat.
700  * 2) the current layer is a Cpoint layer and so we
701  * stick an identity block in BandMat and rhs.
702  */
703  for (int dof_i = 0; dof_i < DofsPerNode; dof_i++) {
704  j = (BlkRow - 1) * DofsPerNode + dof_i;
705  ArrayView<const LO> AAcols;
706  ArrayView<const SC> AAvals;
707  Amat->getLocalRowView(j, AAcols, AAvals);
708  const int* Acols = AAcols.getRawPtr();
709  const SC* Avals = AAvals.getRawPtr();
710  RowLeng = AAvals.size();
711 
712  TEUCHOS_TEST_FOR_EXCEPTION(RowLeng >= MaxNnzPerRow, Exceptions::RuntimeError, "MakeSemiCoarsenP: recompile with larger Max(HorNeighborNodes)\n");
713 
714  for (i = 0; i < RowLeng; i++) {
715  LayDiff[i] = Layerdofs[Acols[i]] - StartLayer - node_k + 2;
716 
717  /* This is the main spot where there might be off- */
718  /* processor communication. That is, when we */
719  /* average the stencil in the horizontal direction,*/
720  /* we need to know the layer id of some */
721  /* neighbors that might reside off-processor. */
722  }
723  PtRow = (node_k - 1) * DofsPerNode + dof_i + 1;
724  for (dof_j = 0; dof_j < DofsPerNode; dof_j++) {
725  PtCol = (node_k - 1) * DofsPerNode + dof_j + 1;
726  /* Stick in entry corresponding to Mat(PtRow,PtCol) */
727  /* see dgbsv() comments for matrix format. */
728  TheSum = 0.0;
729  for (i = 0; i < RowLeng; i++) {
730  if ((LayDiff[i] == 0) && (Col2Dof[Acols[i]] == dof_j))
731  TheSum += Avals[i];
732  }
733  index = LDAB * (PtCol - 1) + KLU + PtRow - PtCol;
734  BandMat[index] = TheSum;
735  if (buildRestriction) RestrictBandMat[index] = TheSum;
736  if (node_k != NStencilNodes) {
737  /* Stick Mat(PtRow,PtCol+DofsPerNode) entry */
738  /* see dgbsv() comments for matrix format. */
739  TheSum = 0.0;
740  for (i = 0; i < RowLeng; i++) {
741  if ((LayDiff[i] == 1) && (Col2Dof[Acols[i]] == dof_j))
742  TheSum += Avals[i];
743  }
744  j = PtCol + DofsPerNode;
745  index = LDAB * (j - 1) + KLU + PtRow - j;
746  BandMat[index] = TheSum;
747  if (buildRestriction) RestrictBandMat[index] = TheSum;
748  }
749  if (node_k != 1) {
750  /* Stick Mat(PtRow,PtCol-DofsPerNode) entry */
751  /* see dgbsv() comments for matrix format. */
752  TheSum = 0.0;
753  for (i = 0; i < RowLeng; i++) {
754  if ((LayDiff[i] == -1) && (Col2Dof[Acols[i]] == dof_j))
755  TheSum += Avals[i];
756  }
757  j = PtCol - DofsPerNode;
758  index = LDAB * (j - 1) + KLU + PtRow - j;
759  BandMat[index] = TheSum;
760  if (buildRestriction) RestrictBandMat[index] = TheSum;
761  }
762  }
763  }
764  }
765  node_k = MyLayer - StartLayer + 1;
766  for (int dof_i = 0; dof_i < DofsPerNode; dof_i++) {
767  /* Stick Mat(PtRow,PtRow) and Rhs(PtRow,dof_i+1) */
768  /* see dgbsv() comments for matrix format. */
769  PtRow = (node_k - 1) * DofsPerNode + dof_i + 1;
770  BandSol[(dof_i)*DofsPerNode * NStencilNodes + PtRow - 1] = 1.;
771  if (buildRestriction) RestrictBandSol[(dof_i)*DofsPerNode * NStencilNodes + PtRow - 1] = 1.;
772 
773  for (dof_j = 0; dof_j < DofsPerNode; dof_j++) {
774  PtCol = (node_k - 1) * DofsPerNode + dof_j + 1;
775  index = LDAB * (PtCol - 1) + KLU + PtRow - PtCol;
776  if (PtCol == PtRow)
777  BandMat[index] = 1.0;
778  else
779  BandMat[index] = 0.0;
780  if (buildRestriction) {
781  index = LDAB * (PtRow - 1) + KLU + PtCol - PtRow;
782  if (PtCol == PtRow)
783  RestrictBandMat[index] = 1.0;
784  else
785  RestrictBandMat[index] = 0.0;
786  }
787  if (node_k != NStencilNodes) {
788  PtCol = (node_k)*DofsPerNode + dof_j + 1;
789  index = LDAB * (PtCol - 1) + KLU + PtRow - PtCol;
790  BandMat[index] = 0.0;
791  if (buildRestriction) {
792  index = LDAB * (PtRow - 1) + KLU + PtCol - PtRow;
793  RestrictBandMat[index] = 0.0;
794  }
795  }
796  if (node_k != 1) {
797  PtCol = (node_k - 2) * DofsPerNode + dof_j + 1;
798  index = LDAB * (PtCol - 1) + KLU + PtRow - PtCol;
799  BandMat[index] = 0.0;
800  if (buildRestriction) {
801  index = LDAB * (PtRow - 1) + KLU + PtCol - PtRow;
802  RestrictBandMat[index] = 0.0;
803  }
804  }
805  }
806  }
807 
808  /* Solve banded system and then stick result in Pmatrix arrays */
809 
810  lapack.GBTRF(N, N, KL, KU, BandMat, LDAB, IPIV, &INFO);
811 
812  TEUCHOS_TEST_FOR_EXCEPTION(INFO != 0, Exceptions::RuntimeError, "Lapack band factorization failed");
813 
814  lapack.GBTRS(notrans[0], N, KL, KU, NRHS, BandMat, LDAB, IPIV,
815  BandSol, N, &INFO);
816 
817  TEUCHOS_TEST_FOR_EXCEPTION(INFO != 0, Exceptions::RuntimeError, "Lapack band solve back substitution failed");
818 
819  for (dof_j = 0; dof_j < DofsPerNode; dof_j++) {
820  for (int dof_i = 0; dof_i < DofsPerNode; dof_i++) {
821  for (i = 1; i <= NStencilNodes; i++) {
822  index = (Sub2FullMap[i] - 1) * DofsPerNode + dof_i + 1;
823  loc = Pptr[index];
824  Pcols[loc] = (col - 1) * DofsPerNode + dof_j + 1;
825  Pvals[loc] = BandSol[dof_j * DofsPerNode * NStencilNodes +
826  (i - 1) * DofsPerNode + dof_i];
827  Pptr[index] = Pptr[index] + 1;
828  }
829  }
830  }
831  if (buildRestriction) {
832  lapack.GBTRF(N, N, KL, KU, RestrictBandMat, LDAB, IPIV, &INFO);
833  TEUCHOS_TEST_FOR_EXCEPTION(INFO != 0, Exceptions::RuntimeError, "Lapack band factorization failed");
834  lapack.GBTRS(trans[0], N, KL, KU, NRHS, RestrictBandMat, LDAB, IPIV, RestrictBandSol, N, &INFO);
835  TEUCHOS_TEST_FOR_EXCEPTION(INFO != 0, Exceptions::RuntimeError, "Lapack band solve back substitution failed");
836  for (dof_j = 0; dof_j < DofsPerNode; dof_j++) {
837  for (int dof_i = 0; dof_i < DofsPerNode; dof_i++) {
838  for (i = 1; i <= NStencilNodes; i++) {
839  index = (col - 1) * DofsPerNode + dof_j + 1;
840  loc = Rptr[index];
841  Rcols[loc] = (Sub2FullMap[i] - 1) * DofsPerNode + dof_i + 1;
842  Rvals[loc] = RestrictBandSol[dof_j * DofsPerNode * NStencilNodes +
843  (i - 1) * DofsPerNode + dof_i];
844  Rptr[index] = Rptr[index] + 1;
845  }
846  }
847  }
848  }
849  } else {
850  int denom1 = MyLayer - StartLayer + 1;
851  int denom2 = StartLayer + NStencilNodes - MyLayer;
852  for (int dof_i = 0; dof_i < DofsPerNode; dof_i++) {
853  for (i = 1; i <= NStencilNodes; i++) {
854  index = (InvLineLayer[MyLine + (StartLayer + i - 2) * NVertLines]) * DofsPerNode + dof_i + 1;
855  loc = Pptr[index];
856  Pcols[loc] = (col - 1) * DofsPerNode + dof_i + 1;
857  if (i > denom1)
858  Pvals[loc] = 1.0 + ((double)(denom1 - i)) / ((double)denom2);
859  else
860  Pvals[loc] = ((double)(i)) / ((double)denom1);
861  Pptr[index] = Pptr[index] + 1;
862  }
863  }
864  }
865  /* inject the null space */
866  // int FineStride = Ntotal*DofsPerNode;
867  // int CoarseStride= NVertLines*NCLayers*DofsPerNode;
868 
869  BlkRow = InvLineLayer[MyLine + (MyLayer - 1) * NVertLines] + 1;
870  for (int k = 0; k < static_cast<int>(fineNullspace->getNumVectors()); k++) {
871  Teuchos::ArrayRCP<SC> OneCNull = coarseNullspace->getDataNonConst(k);
872  Teuchos::ArrayRCP<SC> OneFNull = fineNullspace->getDataNonConst(k);
873  for (int dof_i = 0; dof_i < DofsPerNode; dof_i++) {
874  OneCNull[(col - 1) * DofsPerNode + dof_i] = OneFNull[(BlkRow - 1) * DofsPerNode + dof_i];
875  }
876  }
877  }
878  }
879 
880  /*
881  * Squeeze the -1's out of the columns. At the same time convert Pcols
882  * so that now the first column is numbered '0' as opposed to '1'.
883  * Also, the arrays Pcols and Pvals should now use the zeroth element
884  * as opposed to just starting with the first element. Pptr will be
885  * fixed in the for loop below so that Pptr[0] = 0, etc.
886  */
887  CurRow = 1;
888  NewPtr = 1;
889  for (size_t ii = 1; ii <= Pptr[Ntotal * DofsPerNode] - 1; ii++) {
890  if (ii == Pptr[CurRow]) {
891  Pptr[CurRow] = LastGuy;
892  CurRow++;
893  while (ii > Pptr[CurRow]) {
894  Pptr[CurRow] = LastGuy;
895  CurRow++;
896  }
897  }
898  if (Pcols[ii] != -1) {
899  Pcols[NewPtr - 1] = Pcols[ii] - 1; /* these -1's fix the offset and */
900  Pvals[NewPtr - 1] = Pvals[ii]; /* start using the zeroth element */
901  LastGuy = NewPtr;
902  NewPtr++;
903  }
904  }
905  for (i = CurRow; i <= Ntotal * DofsPerNode; i++) Pptr[CurRow] = LastGuy;
906 
907  /* Now move the pointers so that they now point to the beginning of each
908  * row as opposed to the end of each row
909  */
910  for (i = -Ntotal * DofsPerNode + 1; i >= 2; i--) {
911  Pptr[i - 1] = Pptr[i - 2]; /* extra -1 added to start from 0 */
912  }
913  Pptr[0] = 0;
914 
915  // do the same for R
916  if (buildRestriction) {
917  CurRow = 1;
918  NewPtr = 1;
919  for (size_t ii = 1; ii <= Rptr[coarseMap->getLocalNumElements()] - 1; ii++) {
920  if (ii == Rptr[CurRow]) {
921  Rptr[CurRow] = RLastGuy;
922  CurRow++;
923  while (ii > Rptr[CurRow]) {
924  Rptr[CurRow] = RLastGuy;
925  CurRow++;
926  }
927  }
928  if (Rcols[ii] != -1) {
929  Rcols[NewPtr - 1] = Rcols[ii] - 1; /* these -1's fix the offset and */
930  Rvals[NewPtr - 1] = Rvals[ii]; /* start using the zeroth element */
931  RLastGuy = NewPtr;
932  NewPtr++;
933  }
934  }
935  for (i = CurRow; i <= ((int)coarseMap->getLocalNumElements()); i++) Rptr[CurRow] = RLastGuy;
936  for (i = -coarseMap->getLocalNumElements() + 1; i >= 2; i--) {
937  Rptr[i - 1] = Rptr[i - 2]; /* extra -1 added to start from 0 */
938  }
939  Rptr[0] = 0;
940  }
941  ArrayRCP<size_t> rcpRowPtr;
942  ArrayRCP<LO> rcpColumns;
943  ArrayRCP<SC> rcpValues;
944 
945  RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
946  LO nnz = Pptr[Ndofs];
947  PCrs->allocateAllValues(nnz, rcpRowPtr, rcpColumns, rcpValues);
948 
949  ArrayView<size_t> rowPtr = rcpRowPtr();
950  ArrayView<LO> columns = rcpColumns();
951  ArrayView<SC> values = rcpValues();
952 
953  // copy data over
954 
955  for (LO ii = 0; ii <= Ndofs; ii++) rowPtr[ii] = Pptr[ii];
956  for (LO ii = 0; ii < nnz; ii++) columns[ii] = Pcols[ii];
957  for (LO ii = 0; ii < nnz; ii++) values[ii] = Pvals[ii];
958  PCrs->setAllValues(rcpRowPtr, rcpColumns, rcpValues);
959  PCrs->expertStaticFillComplete(coarseMap, Amat->getDomainMap());
960  ArrayRCP<size_t> RrcpRowPtr;
961  ArrayRCP<LO> RrcpColumns;
962  ArrayRCP<SC> RrcpValues;
963  RCP<CrsMatrix> RCrs;
964  if (buildRestriction) {
965  R = rcp(new CrsMatrixWrap(coarseMap, rowMap, 0));
966  RCrs = rcp_dynamic_cast<CrsMatrixWrap>(R)->getCrsMatrix();
967  nnz = Rptr[coarseMap->getLocalNumElements()];
968  RCrs->allocateAllValues(nnz, RrcpRowPtr, RrcpColumns, RrcpValues);
969 
970  ArrayView<size_t> RrowPtr = RrcpRowPtr();
971  ArrayView<LO> Rcolumns = RrcpColumns();
972  ArrayView<SC> Rvalues = RrcpValues();
973 
974  // copy data over
975 
976  for (LO ii = 0; ii <= ((LO)coarseMap->getLocalNumElements()); ii++) RrowPtr[ii] = Rptr[ii];
977  for (LO ii = 0; ii < nnz; ii++) Rcolumns[ii] = Rcols[ii];
978  for (LO ii = 0; ii < nnz; ii++) Rvalues[ii] = Rvals[ii];
979  RCrs->setAllValues(RrcpRowPtr, RrcpColumns, RrcpValues);
980  RCrs->expertStaticFillComplete(Amat->getRangeMap(), coarseMap);
981  }
982 
983  return NCLayers * NVertLines * DofsPerNode;
984 }
985 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
987  // This function is a bit of a hack. We basically compute the semi-coarsening transfers and then throw
988  // away all the interpolation coefficients, instead replacing them by piecewise constants. The reason for this
989  // is that SemiCoarsening has no notion of aggregates so defining piecewise constants in the "usual way" is
990  // not possible. Instead, we look for the largest entry in each row, make it a one, and zero out the other
991  // non-zero entries
992 
995  ArrayView<Scalar> vals;
998 
999  for (size_t i = 0; i < P->getRowMap()->getLocalNumElements(); i++) {
1000  P->getLocalRowView(i, inds, vals1);
1001 
1002  size_t nnz = inds.size();
1003  if (nnz != 0) vals = ArrayView<Scalar>(const_cast<Scalar*>(vals1.getRawPtr()), nnz);
1004 
1005  LO largestIndex = -1;
1006  Scalar largestValue = ZERO;
1007  /* find largest value in row and change that one to a 1 while the others are set to 0 */
1008 
1009  LO rowDof = i % BlkSize;
1010  for (size_t j = 0; j < nnz; j++) {
1012  if (inds[j] % BlkSize == rowDof) {
1013  largestValue = vals[j];
1014  largestIndex = (int)j;
1015  }
1016  }
1017  vals[j] = ZERO;
1018  }
1019  if (largestIndex != -1)
1020  vals[largestIndex] = ONE;
1021  else
1022  TEUCHOS_TEST_FOR_EXCEPTION(nnz > 0, Exceptions::RuntimeError, "no nonzero column associated with a proper dof within node.");
1023 
1024  if (Teuchos::ScalarTraits<SC>::magnitude(largestValue) == Teuchos::ScalarTraits<SC>::magnitude(ZERO)) vals[largestIndex] = ZERO;
1025  }
1026 }
1027 
1028 } // namespace MueLu
1029 
1030 #define MUELU_SEMICOARSENPFACTORY_SHORT
1031 #endif // MUELU_SEMICOARSENPFACTORY_DEF_HPP
MueLu::DefaultLocalOrdinal LocalOrdinal
GlobalOrdinal GO
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
T * getRawPtr() const
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
size_type size() const
void GBTRF(const OrdinalType &m, const OrdinalType &n, const OrdinalType &kl, const OrdinalType &ku, ScalarType *A, const OrdinalType &lda, OrdinalType *IPIV, OrdinalType *info) const
LocalOrdinal LO
T * get() const
LO MakeSemiCoarsenP(LO const Ntotal, LO const nz, LO const CoarsenRate, LO const LayerId[], LO const VertLineId[], LO const DofsPerNode, RCP< Matrix > &Amat, RCP< Matrix > &P, RCP< const Map > &coarseMap, const RCP< MultiVector > fineNullspace, RCP< MultiVector > &coarseNullspace, RCP< Matrix > &R, bool buildRestriction, bool doLinear) const
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
static const NoFactory * get()
void RevertToPieceWiseConstant(RCP< Matrix > &P, LO BlkSize) const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultScalar Scalar
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
LO FindCpts(LO const PtsPerLine, LO const CoarsenRate, LO const Thin, LO **LayerCpts) const
T * getRawPtr() const
size_t global_size_t
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
static RCP< Vector > Build(const Teuchos::RCP< const Map > &map, bool zeroOut=true)
void GBTRS(const char &TRANS, const OrdinalType &n, const OrdinalType &kl, const OrdinalType &ku, const OrdinalType &nrhs, const ScalarType *A, const OrdinalType &lda, const OrdinalType *IPIV, ScalarType *B, const OrdinalType &ldb, OrdinalType *info) const
#define SET_VALID_ENTRY(name)
#define MaxHorNeighborNodes
Scalar SC
const RCP< const FactoryManagerBase > GetFactoryManager()
returns the current factory manager
Definition: MueLu_Level.cpp:96
Node NO
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:76
Exception throws to report errors in the internal logical of the program.
iterator end() const
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
iterator begin() const
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need&#39;s value has been saved.