MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_SemiCoarsenPFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_SEMICOARSENPFACTORY_DEF_HPP
47 #define MUELU_SEMICOARSENPFACTORY_DEF_HPP
48 
49 #include <stdlib.h>
50 
51 #include <Teuchos_LAPACK.hpp>
52 
53 #include <Xpetra_CrsMatrixWrap.hpp>
54 #include <Xpetra_ImportFactory.hpp>
55 #include <Xpetra_Matrix.hpp>
57 #include <Xpetra_VectorFactory.hpp>
58 
61 
62 #include "MueLu_MasterList.hpp"
63 #include "MueLu_Monitor.hpp"
64 
65 namespace MueLu {
66 
67  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
69  RCP<ParameterList> validParamList = rcp(new ParameterList());
70 
71 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
72  SET_VALID_ENTRY("semicoarsen: coarsen rate");
73 #undef SET_VALID_ENTRY
74  validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
75  validParamList->set< RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
76  validParamList->set< RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory for coorindates");
77 
78  validParamList->set< RCP<const FactoryBase> >("LineDetection_VertLineIds", Teuchos::null, "Generating factory for LineDetection information");
79  validParamList->set< RCP<const FactoryBase> >("LineDetection_Layers", Teuchos::null, "Generating factory for LineDetection information");
80  validParamList->set< RCP<const FactoryBase> >("CoarseNumZLayers", Teuchos::null, "Generating factory for LineDetection information");
81 
82  return validParamList;
83  }
84 
85  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
87  Input(fineLevel, "A");
88  Input(fineLevel, "Nullspace");
89 
90  Input(fineLevel, "LineDetection_VertLineIds");
91  Input(fineLevel, "LineDetection_Layers");
92  Input(fineLevel, "CoarseNumZLayers");
93 
94  // check whether fine level coordinate information is available.
95  // If yes, request the fine level coordinates and generate coarse coordinates
96  // during the Build call
97  if (fineLevel.GetLevelID() == 0) {
98  if (fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
99  fineLevel.DeclareInput("Coordinates", NoFactory::get(), this);
100  bTransferCoordinates_ = true;
101  }
102  } else if (bTransferCoordinates_ == true){
103  // on coarser levels we check the default factory providing "Coordinates"
104  // or the factory declared to provide "Coordinates"
105  // first, check which factory is providing coordinate information
106  RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
107  if (myCoordsFact == Teuchos::null) { myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates"); }
108  if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
109  fineLevel.DeclareInput("Coordinates", myCoordsFact.get(), this);
110  bTransferCoordinates_ = true;
111  }
112  }
113  }
114 
115  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
117  return BuildP(fineLevel, coarseLevel);
118  }
119 
120  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
122  FactoryMonitor m(*this, "Build", coarseLevel);
123 
124  // obtain general variables
125  RCP<Matrix> A = Get< RCP<Matrix> > (fineLevel, "A");
126  RCP<MultiVector> fineNullspace = Get< RCP<MultiVector> > (fineLevel, "Nullspace");
127 
128  // get user-provided coarsening rate parameter (constant over all levels)
129  const ParameterList& pL = GetParameterList();
130  LO CoarsenRate = as<LO>(pL.get<int>("semicoarsen: coarsen rate"));
131 
132  // collect general input data
133  LO BlkSize = A->GetFixedBlockSize();
134  RCP<const Map> rowMap = A->getRowMap();
135  LO Ndofs = rowMap->getNodeNumElements();
136  LO Nnodes = Ndofs/BlkSize;
137 
138  // collect line detection information generated by the LineDetectionFactory instance
139  LO FineNumZLayers = Get< LO >(fineLevel, "CoarseNumZLayers");
140  Teuchos::ArrayRCP<LO> TVertLineId = Get< Teuchos::ArrayRCP<LO> > (fineLevel, "LineDetection_VertLineIds");
141  Teuchos::ArrayRCP<LO> TLayerId = Get< Teuchos::ArrayRCP<LO> > (fineLevel, "LineDetection_Layers");
142  LO* VertLineId = TVertLineId.getRawPtr();
143  LO* LayerId = TLayerId.getRawPtr();
144 
145  // generate transfer operator with semicoarsening
146  RCP<const Map> theCoarseMap;
147  RCP<Matrix> P;
148  RCP<MultiVector> coarseNullspace;
149  GO Ncoarse = MakeSemiCoarsenP(Nnodes,FineNumZLayers,CoarsenRate,LayerId,VertLineId,
150  BlkSize, A, P, theCoarseMap, fineNullspace,coarseNullspace);
151 
152  // set StridingInformation of P
153  if (A->IsView("stridedMaps") == true)
154  P->CreateView("stridedMaps", A->getRowMap("stridedMaps"), theCoarseMap);
155  else
156  P->CreateView("stridedMaps", P->getRangeMap(), theCoarseMap);
157 
158  // Store number of coarse z-layers on the coarse level container
159  // This information is used by the LineDetectionAlgorithm
160  // TODO get rid of the NoFactory
161 
162  LO CoarseNumZLayers = FineNumZLayers*Ncoarse;
163  CoarseNumZLayers /= Ndofs;
164  coarseLevel.Set("NumZLayers", Teuchos::as<LO>(CoarseNumZLayers), MueLu::NoFactory::get());
165 
166  // store semicoarsening transfer on coarse level
167  Set(coarseLevel, "P", P);
168 
169  Set(coarseLevel, "Nullspace", coarseNullspace);
170 
171  // transfer coordinates
172  if(bTransferCoordinates_) {
173  //Teuchos::FancyOStream> out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
175  RCP<xdMV> fineCoords = Teuchos::null;
176  if (fineLevel.GetLevelID() == 0 &&
177  fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
178  fineCoords = fineLevel.Get< RCP<xdMV> >("Coordinates", NoFactory::get());
179  } else {
180  RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
181  if (myCoordsFact == Teuchos::null) { myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates"); }
182  if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
183  fineCoords = fineLevel.Get< RCP<xdMV> >("Coordinates", myCoordsFact.get());
184  }
185  }
186 
187  TEUCHOS_TEST_FOR_EXCEPTION(fineCoords==Teuchos::null, Exceptions::RuntimeError, "No Coordinates found provided by the user.");
188 
189  TEUCHOS_TEST_FOR_EXCEPTION(fineCoords->getNumVectors() != 3, Exceptions::RuntimeError, "Three coordinates arrays must be supplied if line detection orientation not given.");
190  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> x = fineCoords->getDataNonConst(0);
191  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> y = fineCoords->getDataNonConst(1);
192  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> z = fineCoords->getDataNonConst(2);
193 
194  // determine the maximum and minimum z coordinate value on the current processor.
197  for ( auto it = z.begin(); it != z.end(); ++it) {
198  if(*it > zval_max) zval_max = *it;
199  if(*it < zval_min) zval_min = *it;
200  }
201 
202  LO myCoarseZLayers = Teuchos::as<LO>(CoarseNumZLayers);
203 
204  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> myZLayerCoords = Teuchos::arcp<typename Teuchos::ScalarTraits<Scalar>::coordinateType>(myCoarseZLayers);
205  if(myCoarseZLayers == 1) {
206  myZLayerCoords[0] = zval_min;
207  } else {
208  typename Teuchos::ScalarTraits<Scalar>::coordinateType dz = (zval_max-zval_min)/(myCoarseZLayers-1);
209  for(LO k = 0; k<myCoarseZLayers; ++k) {
210  myZLayerCoords[k] = k*dz;
211  }
212  }
213 
214  // Note, that the coarse level node coordinates have to be in vertical ordering according
215  // to the numbering of the vertical lines
216 
217  // number of vertical lines on current node:
218  LO numVertLines = Nnodes / FineNumZLayers;
219  LO numLocalCoarseNodes = numVertLines * myCoarseZLayers;
220 
221  //std::cout << "rowMap elements: " << rowMap->getNodeNumElements() << std::endl;
222  //std::cout << "fineCoords: " << fineCoords->getNodeNumElements() << std::endl;
223  //std::cout << "TVertLineId.size(): " << TVertLineId.size() << std::endl;
224  //std::cout << "numVertLines=" << numVertLines << std::endl;
225  //std::cout << "numLocalCoarseNodes=" << numLocalCoarseNodes << std::endl;
226 
227  RCP<const Map> coarseCoordMap =
228  MapFactory::Build (fineCoords->getMap()->lib(),
230  Teuchos::as<size_t>(numLocalCoarseNodes),
231  fineCoords->getMap()->getIndexBase(),
232  fineCoords->getMap()->getComm());
233  RCP<xdMV> coarseCoords = Xpetra::MultiVectorFactory<typename Teuchos::ScalarTraits<Scalar>::coordinateType,LO,GO,NO>::Build(coarseCoordMap, fineCoords->getNumVectors());
234  coarseCoords->putScalar(-1.0);
235  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cx = coarseCoords->getDataNonConst(0);
236  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cy = coarseCoords->getDataNonConst(1);
237  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cz = coarseCoords->getDataNonConst(2);
238 
239  // loop over all vert line indices (stop as soon as possible)
240  LO cntCoarseNodes = 0;
241  for( LO vt = 0; vt < TVertLineId.size(); ++vt) {
242  //vertical line id in *vt
243  LO curVertLineId = TVertLineId[vt];
244 
245  if(cx[curVertLineId * myCoarseZLayers] == -1.0 &&
246  cy[curVertLineId * myCoarseZLayers] == -1.0) {
247  // loop over all local myCoarseZLayers
248  for (LO n=0; n<myCoarseZLayers; ++n) {
249  cx[curVertLineId * myCoarseZLayers + n] = x[vt];
250  cy[curVertLineId * myCoarseZLayers + n] = y[vt];
251  cz[curVertLineId * myCoarseZLayers + n] = myZLayerCoords[n];
252  }
253  cntCoarseNodes += myCoarseZLayers;
254  }
255 
256  TEUCHOS_TEST_FOR_EXCEPTION(cntCoarseNodes > numLocalCoarseNodes, Exceptions::RuntimeError, "number of coarse nodes is inconsistent.");
257  if(cntCoarseNodes == numLocalCoarseNodes) break;
258  }
259 
260  // set coarse level coordinates
261  Set(coarseLevel, "Coordinates", coarseCoords);
262  } /* end bool bTransferCoordinates */
263 
264  }
265 
266  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
267  LocalOrdinal SemiCoarsenPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::FindCpts(LocalOrdinal const PtsPerLine, LocalOrdinal const CoarsenRate, LocalOrdinal const Thin, LocalOrdinal **LayerCpts) const {
268  /*
269  * Given the number of points in the z direction (PtsPerLine) and a
270  * coarsening rate (CoarsenRate), determine which z-points will serve
271  * as Cpts and return the total number of Cpts.
272  *
273  * Input
274  * PtsPerLine: Number of fine level points in the z direction
275  *
276  * CoarsenRate: Roughly, number of Cpts = (PtsPerLine+1)/CoarsenRate - 1
277  *
278  * Thin: Must be either 0 or 1. Thin decides what to do when
279  * (PtsPerLine+1)/CoarsenRate is not an integer.
280  *
281  * Thin == 0 ==> ceil() the above fraction
282  * Thin == 1 ==> floor() the above fraction
283  *
284  * Output
285  * LayerCpts Array where LayerCpts[i] indicates that the
286  * LayerCpts[i]th fine level layer is a Cpt Layer.
287  * Note: fine level layers are assumed to be numbered starting
288  * a one.
289  */
290  typename Teuchos::ScalarTraits<Scalar>::coordinateType temp, RestStride, di;
291  LO NCpts, i;
292  LO NCLayers = -1;
293  LO FirstStride;
294 
295  temp = ((typename Teuchos::ScalarTraits<Scalar>::coordinateType) (PtsPerLine+1))/((typename Teuchos::ScalarTraits<Scalar>::coordinateType) (CoarsenRate)) - 1.0;
296  if (Thin == 1) NCpts = (LO) ceil(temp);
297  else NCpts = (LO) floor(temp);
298 
299  TEUCHOS_TEST_FOR_EXCEPTION(PtsPerLine == 1, Exceptions::RuntimeError, "SemiCoarsenPFactory::FindCpts: cannot coarsen further.");
300 
301  if (NCpts < 1) NCpts = 1;
302 
303  FirstStride= (LO) ceil( ((typename Teuchos::ScalarTraits<Scalar>::coordinateType) PtsPerLine+1)/( (typename Teuchos::ScalarTraits<Scalar>::coordinateType) (NCpts+1)));
304  RestStride = ((typename Teuchos::ScalarTraits<Scalar>::coordinateType) (PtsPerLine-FirstStride+1))/((typename Teuchos::ScalarTraits<Scalar>::coordinateType) NCpts);
305 
306  NCLayers = (LO) floor((((typename Teuchos::ScalarTraits<Scalar>::coordinateType) (PtsPerLine-FirstStride+1))/RestStride)+.00001);
307 
308  TEUCHOS_TEST_FOR_EXCEPTION(NCLayers != NCpts, Exceptions::RuntimeError, "sizes do not match.");
309 
310  di = (typename Teuchos::ScalarTraits<Scalar>::coordinateType) FirstStride;
311  for (i = 1; i <= NCpts; i++) {
312  (*LayerCpts)[i] = (LO) floor(di);
313  di += RestStride;
314  }
315 
316  return(NCLayers);
317  }
318 
319 #define MaxHorNeighborNodes 75
320 
321  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
323  MakeSemiCoarsenP(LO const Ntotal, LO const nz, LO const CoarsenRate, LO const LayerId[],
324  LO const VertLineId[], LO const DofsPerNode, RCP<Matrix> & Amat, RCP<Matrix>& P,
325  RCP<const Map>& coarseMap, const RCP<MultiVector> fineNullspace,
326  RCP<MultiVector>& coarseNullspace ) const {
327 
328  /*
329  * Given a CSR matrix (OrigARowPtr, OrigAcols, OrigAvals), information
330  * describing the z-layer and vertical line (LayerId and VertLineId)
331  * of each matrix block row, a coarsening rate, and dofs/node information,
332  * construct a prolongator that coarsening to semicoarsening in the z-direction
333  * using something like an operator-dependent grid transfer. In particular,
334  * matrix stencils are collapsed to vertical lines. Thus, each vertical line
335  * gives rise to a block tridiagonal matrix. BlkRows corresponding to
336  * Cpts are replaced by identity matrices. This tridiagonal is solved
337  * to determine each interpolation basis functions. Each Blk Rhs corresponds
338  * to all zeros except at the corresponding C-pt which has an identity
339  *
340  * On termination, return the number of local prolongator columns owned by
341  * this processor.
342  *
343  * Note: This code was adapted from a matlab code where offsets/arrays
344  * start from 1. In most parts of the code, this 1 offset is kept
345  * (in some cases wasting the first element of the array). The
346  * input and output matrices of this function has been changed to
347  * have offsets/rows/columns which start from 0. LayerId[] and
348  * VertLineId[] currently start from 1.
349  *
350  * Input
351  * =====
352  * Ntotal Number of fine level Blk Rows owned by this processor
353  *
354  * nz Number of vertical layers. Note: partitioning must be done
355  * so that each processor owns an entire vertical line. This
356  * means that nz is the global number of layers, which should
357  * be equal to the local number of layers.
358  * CoarsenRate Rate of z semicoarsening. Smoothed aggregation-like coarsening
359  * would correspond to CoarsenRate = 3.
360  * LayerId Array from 0 to Ntotal-1 + Ghost. LayerId(BlkRow) gives the
361  * layer number associated with the dofs within BlkRow.
362  * VertLineId Array from 1 to Ntotal, VertLineId(BlkRow) gives a unique
363  * vertical line id (from 0 to Ntotal/nz-1) of BlkRow. All
364  * BlkRows associated with nodes along the same vertical line
365  * in the mesh should have the same LineId.
366  * DofsPerNode Number of degrees-of-freedom per mesh node.
367  *
368  * OrigARowPtr, CSR arrays corresponding to the fine level matrix.
369  * OrigAcols,
370  * OrigAvals
371  *
372  * Output
373  * =====
374  * ParamPptr, CSR arrays corresponding to the final prolongation matrix.
375  * ParamPcols,
376  * ParamsPvals
377  */
378  int NLayers, NVertLines, MaxNnz, NCLayers, MyLine, MyLayer;
379  int *InvLineLayer=NULL, *CptLayers=NULL, StartLayer, NStencilNodes;
380  int BlkRow, dof_j, node_k, *Sub2FullMap=NULL, RowLeng;
381  int i, j, iii, col, count, index, loc, PtRow, PtCol;
382  SC *BandSol=NULL, *BandMat=NULL, TheSum;
383  int *IPIV=NULL, KL, KU, KLU, N, NRHS, LDAB,INFO;
384  int *Pcols;
385  size_t *Pptr;
386  SC *Pvals;
387  int MaxStencilSize, MaxNnzPerRow;
388  LO *LayDiff=NULL;
389  int CurRow, LastGuy = -1, NewPtr;
390  int Ndofs;
391  int Nghost;
392  LO *Layerdofs = NULL, *Col2Dof = NULL;
393 
394  Teuchos::LAPACK<LO,SC> lapack;
395 
396  char notrans[3];
397  notrans[0] = 'N';
398  notrans[1] = 'N';
399 
400 
401  MaxNnzPerRow = MaxHorNeighborNodes*DofsPerNode*3;
402  Teuchos::ArrayRCP<LO> TLayDiff = Teuchos::arcp<LO>(1+MaxNnzPerRow); LayDiff = TLayDiff.getRawPtr();
403 
404  Nghost = Amat->getColMap()->getNodeNumElements() - Amat->getDomainMap()->getNodeNumElements();
405  if (Nghost < 0) Nghost = 0;
406  Teuchos::ArrayRCP<LO> TLayerdofs= Teuchos::arcp<LO>(Ntotal*DofsPerNode+Nghost+1); Layerdofs = TLayerdofs.getRawPtr();
407  Teuchos::ArrayRCP<LO> TCol2Dof= Teuchos::arcp<LO>(Ntotal*DofsPerNode+Nghost+1); Col2Dof= TCol2Dof.getRawPtr();
408 
411  ArrayRCP<LO> valptr= localdtemp->getDataNonConst(0);
412 
413  for (i = 0; i < Ntotal*DofsPerNode; i++)
414  valptr[i]= LayerId[i/DofsPerNode];
415 
416  RCP< const Import> importer;
417  importer = Amat->getCrsGraph()->getImporter();
418  if (importer == Teuchos::null) {
419  importer = ImportFactory::Build(Amat->getDomainMap(), Amat->getColMap());
420  }
421  dtemp->doImport(*localdtemp, *(importer), Xpetra::INSERT);
422 
423  valptr= dtemp->getDataNonConst(0);
424  for (i = 0; i < Ntotal*DofsPerNode+Nghost; i++) Layerdofs[i]= valptr[i];
425  valptr= localdtemp->getDataNonConst(0);
426  for (i = 0; i < Ntotal*DofsPerNode; i++) valptr[i]= i%DofsPerNode;
427  dtemp->doImport(*localdtemp, *(importer), Xpetra::INSERT);
428  valptr= dtemp->getDataNonConst(0);
429  for (i = 0; i < Ntotal*DofsPerNode+Nghost; i++) Col2Dof[i]= valptr[i];
430 
431  if (Ntotal != 0) {
432  NLayers = LayerId[0];
433  NVertLines= VertLineId[0];
434  }
435  else { NLayers = -1; NVertLines = -1; }
436 
437  for (i = 1; i < Ntotal; i++) {
438  if ( VertLineId[i] > NVertLines ) NVertLines = VertLineId[i];
439  if ( LayerId[i] > NLayers ) NLayers = LayerId[i];
440  }
441  NLayers++;
442  NVertLines++;
443 
444  /*
445  * Make an inverse map so that we can quickly find the dof
446  * associated with a particular vertical line and layer.
447  */
448 
449  Teuchos::ArrayRCP<LO> TInvLineLayer= Teuchos::arcp<LO>(1+NVertLines*NLayers); InvLineLayer = TInvLineLayer.getRawPtr();
450  for (i=0; i < Ntotal; i++) {
451  InvLineLayer[ VertLineId[i]+1+LayerId[i]*NVertLines ] = i;
452  }
453 
454  /*
455  * Determine coarse layers where injection will be applied.
456  */
457 
458  Teuchos::ArrayRCP<LO> TCptLayers = Teuchos::arcp<LO>(nz+1); CptLayers = TCptLayers.getRawPtr();
459  NCLayers = FindCpts(nz,CoarsenRate,0, &CptLayers);
460 
461  /*
462  * Compute the largest possible interpolation stencil width based
463  * on the location of the Clayers. This stencil width is actually
464  * nodal (i.e. assuming 1 dof/node). To get the true max stencil width
465  * one needs to multiply this by DofsPerNode.
466  */
467 
468  if (NCLayers < 2) MaxStencilSize = nz;
469  else MaxStencilSize = CptLayers[2];
470 
471  for (i = 3; i <= NCLayers; i++) {
472  if (MaxStencilSize < CptLayers[i]- CptLayers[i-2])
473  MaxStencilSize = CptLayers[i]- CptLayers[i-2];
474  }
475  if (NCLayers > 1) {
476  if (MaxStencilSize < nz - CptLayers[NCLayers-1]+1)
477  MaxStencilSize = nz - CptLayers[NCLayers-1]+1;
478  }
479 
480  /*
481  * Allocate storage associated with solving a banded sub-matrix needed to
482  * determine the interpolation stencil. Note: we compute interpolation stencils
483  * for all dofs within a node at the same time, and so the banded solution
484  * must be large enough to hold all DofsPerNode simultaneously.
485  */
486 
487  Teuchos::ArrayRCP<LO> TSub2FullMap= Teuchos::arcp<LO>((MaxStencilSize+1)*DofsPerNode); Sub2FullMap= TSub2FullMap.getRawPtr();
488  Teuchos::ArrayRCP<SC> TBandSol= Teuchos::arcp<SC>((MaxStencilSize+1)*DofsPerNode*DofsPerNode); BandSol = TBandSol.getRawPtr();
489  /*
490  * Lapack variables. See comments for dgbsv().
491  */
492  KL = 2*DofsPerNode-1;
493  KU = 2*DofsPerNode-1;
494  KLU = KL+KU;
495  LDAB = 2*KL+KU+1;
496  NRHS = DofsPerNode;
497  Teuchos::ArrayRCP<SC> TBandMat= Teuchos::arcp<SC>(LDAB*MaxStencilSize*DofsPerNode+1); BandMat = TBandMat.getRawPtr();
498  Teuchos::ArrayRCP<LO> TIPIV= Teuchos::arcp<LO>((MaxStencilSize+1)*DofsPerNode); IPIV = TIPIV.getRawPtr();
499 
500  /*
501  * Allocate storage for the final interpolation matrix. Note: each prolongator
502  * row might have entries corresponding to at most two nodes.
503  * Note: the total fine level dofs equals DofsPerNode*Ntotal and the max
504  * nnz per prolongator row is DofsPerNode*2.
505  */
506 
507  Ndofs = DofsPerNode*Ntotal;
508  MaxNnz = 2*DofsPerNode*Ndofs;
509 
510  RCP<const Map> rowMap = Amat->getRowMap();
511  Xpetra::global_size_t GNdofs= rowMap->getGlobalNumElements();
512 
513  std::vector<size_t> stridingInfo_;
514  stridingInfo_.push_back(DofsPerNode);
515 
516  Xpetra::global_size_t itemp = GNdofs/nz;
517  coarseMap = StridedMapFactory::Build(rowMap->lib(),
518  NCLayers*itemp,
519  NCLayers*NVertLines*DofsPerNode,
520  0, /* index base */
521  stridingInfo_,
522  rowMap->getComm(),
523  -1, /* strided block id */
524  0 /* domain gid offset */);
525 
526 
527  //coarseMap = MapFactory::createContigMapWithNode(rowMap->lib(),(NCLayers*GNdofs)/nz, NCLayers*NVertLines*DofsPerNode,(rowMap->getComm()), rowMap->getNode());
528 
529 
530  P = rcp(new CrsMatrixWrap(rowMap, coarseMap , 0, Xpetra::StaticProfile));
531  coarseNullspace = MultiVectorFactory::Build(coarseMap, fineNullspace->getNumVectors());
532 
533 
534  Teuchos::ArrayRCP<SC> TPvals= Teuchos::arcp<SC>(1+MaxNnz); Pvals= TPvals.getRawPtr();
535  Teuchos::ArrayRCP<size_t> TPptr = Teuchos::arcp<size_t>(DofsPerNode*(2+Ntotal)); Pptr = TPptr.getRawPtr();
536  Teuchos::ArrayRCP<LO> TPcols= Teuchos::arcp<LO>(1+MaxNnz); Pcols= TPcols.getRawPtr();
537 
538  Pptr[0] = 0; Pptr[1] = 0;
539 
540  TEUCHOS_TEST_FOR_EXCEPTION(Pcols == NULL, Exceptions::RuntimeError, "MakeSemiCoarsenP: Not enough space \n");
541 
542  /*
543  * Setup P's rowptr as if each row had its maximum of 2*DofsPerNode nonzeros.
544  * This will be useful while filling up P, and then later we will squeeze out
545  * the unused nonzeros locations.
546  */
547 
548  for (i = 1; i <= MaxNnz; i++) Pcols[i] = -1; /* mark all entries as unused */
549  count = 1;
550  for (i = 1; i <= DofsPerNode*Ntotal+1; i++) {
551  Pptr[i] = count;
552  count += (2*DofsPerNode);
553  }
554 
555  /*
556  * Build P column by column. The 1st block column corresponds to the 1st coarse
557  * layer and the first line. The 2nd block column corresponds to the 2nd coarse
558  * layer and the first line. The NCLayers+1 block column corresponds to the
559  * 1st coarse layer and the 2nd line, etc.
560  */
561 
562 
563  col = 0;
564  for (MyLine=1; MyLine <= NVertLines; MyLine += 1) {
565  for (iii=1; iii <= NCLayers; iii+= 1) {
566  col = col+1;
567  MyLayer = CptLayers[iii];
568 
569  /*
570  * StartLayer gives the layer number of the lowest layer that
571  * is nonzero in the interpolation stencil that is currently
572  * being computed. Normally, if we are not near a boundary this
573  * is simply CptsLayers[iii-1]+1.
574  *
575  * NStencilNodes indicates the number of nonzero nodes in the
576  * interpolation stencil that is currently being computed. Normally,
577  * if we are not near a boundary this is CptLayers[iii+1]-StartLayer.
578  */
579 
580  if (iii != 1 ) StartLayer = CptLayers[iii-1]+1;
581  else StartLayer = 1;
582 
583  if (iii != NCLayers) NStencilNodes= CptLayers[iii+1]-StartLayer;
584  else NStencilNodes= NLayers - StartLayer + 1;
585 
586 
587  N = NStencilNodes*DofsPerNode;
588 
589  /*
590  * dgbsv() does not require that the first KL rows be initialized,
591  * so we could avoid zeroing out some entries?
592  */
593 
594  for (i = 0; i < NStencilNodes*DofsPerNode*DofsPerNode; i++)
595  BandSol[ i] = 0.0;
596  for (i = 0; i < LDAB*N; i++) BandMat[ i] = 0.0;
597 
598  /*
599  * Fill BandMat and BandSol (which is initially the rhs) for each
600  * node in the interpolation stencil that is being computed.
601  */
602 
603  for (node_k=1; node_k <= NStencilNodes ; node_k++) {
604 
605  /* Map a Line and Layer number to a BlkRow in the fine level matrix
606  * and record the mapping from the sub-system to the BlkRow of the
607  * fine level matrix.
608  */
609  BlkRow = InvLineLayer[MyLine+(StartLayer+node_k-2)*NVertLines]+1;
610  Sub2FullMap[node_k] = BlkRow;
611 
612  /* Two cases:
613  * 1) the current layer is not a Cpoint layer. In this case we
614  * want to basically stick the matrix couplings to other
615  * nonzero stencil rows into the band matrix. One way to do
616  * this is to include couplings associated with only MyLine
617  * and ignore all the other couplings. However, what we do
618  * instead is to sum all the coupling at each layer participating
619  * in this interpolation stencil and stick this sum into BandMat.
620  * 2) the current layer is a Cpoint layer and so we
621  * stick an identity block in BandMat and rhs.
622  */
623  if (StartLayer+node_k-1 != MyLayer) {
624  for (int dof_i=0; dof_i < DofsPerNode; dof_i++) {
625 
626  j = (BlkRow-1)*DofsPerNode+dof_i;
627  ArrayView<const LO> AAcols;
628  ArrayView<const SC> AAvals;
629  Amat->getLocalRowView(j, AAcols, AAvals);
630  const int *Acols = AAcols.getRawPtr();
631  const SC *Avals = AAvals.getRawPtr();
632  RowLeng = AAvals.size();
633 
634  TEUCHOS_TEST_FOR_EXCEPTION(RowLeng >= MaxNnzPerRow, Exceptions::RuntimeError, "MakeSemiCoarsenP: recompile with larger Max(HorNeighborNodes)\n");
635 
636  for (i = 0; i < RowLeng; i++) {
637  LayDiff[i] = Layerdofs[Acols[i]]-StartLayer-node_k+2;
638 
639  /* This is the main spot where there might be off- */
640  /* processor communication. That is, when we */
641  /* average the stencil in the horizontal direction,*/
642  /* we need to know the layer id of some */
643  /* neighbors that might reside off-processor. */
644  }
645  PtRow = (node_k-1)*DofsPerNode+dof_i+1;
646  for (dof_j=0; dof_j < DofsPerNode; dof_j++) {
647  PtCol = (node_k-1)*DofsPerNode+dof_j + 1;
648  /* Stick in entry corresponding to Mat(PtRow,PtCol) */
649  /* see dgbsv() comments for matrix format. */
650  TheSum = 0.0;
651  for (i = 0; i < RowLeng; i++) {
652  if ((LayDiff[i] == 0) && (Col2Dof[Acols[i]] == dof_j))
653  TheSum += Avals[i];
654  }
655  index = LDAB*(PtCol-1)+KLU+PtRow-PtCol;
656  BandMat[index] = TheSum;
657  if (node_k != NStencilNodes) {
658  /* Stick Mat(PtRow,PtCol+DofsPerNode) entry */
659  /* see dgbsv() comments for matrix format. */
660  TheSum = 0.0;
661  for (i = 0; i < RowLeng; i++) {
662  if ((LayDiff[i] == 1) &&(Col2Dof[Acols[i]]== dof_j))
663  TheSum += Avals[i];
664  }
665  j = PtCol+DofsPerNode;
666  index=LDAB*(j-1)+KLU+PtRow-j;
667  BandMat[index] = TheSum;
668  }
669  if (node_k != 1) {
670  /* Stick Mat(PtRow,PtCol-DofsPerNode) entry */
671  /* see dgbsv() comments for matrix format. */
672  TheSum = 0.0;
673  for (i = 0; i < RowLeng; i++) {
674  if ((LayDiff[i]== -1) &&(Col2Dof[Acols[i]]== dof_j))
675  TheSum += Avals[i];
676  }
677  j = PtCol-DofsPerNode;
678  index=LDAB*(j-1)+KLU+PtRow-j;
679  BandMat[index] = TheSum;
680  }
681  }
682  }
683  }
684  else {
685 
686  /* inject the null space */
687  // int FineStride = Ntotal*DofsPerNode;
688  // int CoarseStride= NVertLines*NCLayers*DofsPerNode;
689  for (int k = 0; k < static_cast<int>(fineNullspace->getNumVectors()); k++) {
690  Teuchos::ArrayRCP<SC> OneCNull = coarseNullspace->getDataNonConst(k);
691  Teuchos::ArrayRCP<SC> OneFNull = fineNullspace->getDataNonConst(k);
692  for (int dof_i = 0; dof_i < DofsPerNode; dof_i++) {
693  OneCNull[( col-1)*DofsPerNode+dof_i] = OneFNull[ (BlkRow-1)*DofsPerNode+dof_i];
694  }
695  }
696 
697  for (int dof_i = 0; dof_i < DofsPerNode; dof_i++) {
698  /* Stick Mat(PtRow,PtRow) and Rhs(PtRow,dof_i+1) */
699  /* see dgbsv() comments for matrix format. */
700  PtRow = (node_k-1)*DofsPerNode+dof_i+1;
701  index = LDAB*(PtRow-1)+KLU;
702  BandMat[index] = 1.0;
703  BandSol[(dof_i)*DofsPerNode*NStencilNodes+PtRow-1] = 1.;
704  }
705  }
706  }
707 
708  /* Solve banded system and then stick result in Pmatrix arrays */
709 
710  lapack.GBTRF( N, N, KL, KU, BandMat, LDAB, IPIV, &INFO);
711 
712  TEUCHOS_TEST_FOR_EXCEPTION(INFO != 0, Exceptions::RuntimeError, "Lapack band factorization failed");
713 
714  lapack.GBTRS(notrans[0], N, KL, KU, NRHS, BandMat, LDAB, IPIV,
715  BandSol, N, &INFO );
716 
717  TEUCHOS_TEST_FOR_EXCEPTION(INFO != 0, Exceptions::RuntimeError, "Lapack band solve back substitution failed");
718 
719  for (dof_j=0; dof_j < DofsPerNode; dof_j++) {
720  for (int dof_i=0; dof_i < DofsPerNode; dof_i++) {
721  for (i =1; i <= NStencilNodes ; i++) {
722  index = (Sub2FullMap[i]-1)*DofsPerNode+dof_i+1;
723  loc = Pptr[index];
724  Pcols[loc] = (col-1)*DofsPerNode+dof_j+1;
725  Pvals[loc] = BandSol[dof_j*DofsPerNode*NStencilNodes +
726  (i-1)*DofsPerNode + dof_i];
727  Pptr[index]= Pptr[index] + 1;
728  }
729  }
730  }
731  }
732  }
733 
734  /*
735  * Squeeze the -1's out of the columns. At the same time convert Pcols
736  * so that now the first column is numbered '0' as opposed to '1'.
737  * Also, the arrays Pcols and Pvals should now use the zeroth element
738  * as opposed to just starting with the first element. Pptr will be
739  * fixed in the for loop below so that Pptr[0] = 0, etc.
740  */
741  CurRow = 1;
742  NewPtr = 1;
743  for (size_t ii=1; ii <= Pptr[Ntotal*DofsPerNode]-1; ii++) {
744  if (ii == Pptr[CurRow]) {
745  Pptr[CurRow] = LastGuy;
746  CurRow++;
747  while (ii > Pptr[CurRow]) {
748  Pptr[CurRow] = LastGuy;
749  CurRow++;
750  }
751  }
752  if (Pcols[ii] != -1) {
753  Pcols[NewPtr-1] = Pcols[ii]-1; /* these -1's fix the offset and */
754  Pvals[NewPtr-1] = Pvals[ii]; /* start using the zeroth element */
755  LastGuy = NewPtr;
756  NewPtr++;
757  }
758  }
759  for (i = CurRow; i <= Ntotal*DofsPerNode; i++) Pptr[CurRow] = LastGuy;
760 
761  /* Now move the pointers so that they now point to the beginning of each
762  * row as opposed to the end of each row
763  */
764  for (i=-Ntotal*DofsPerNode+1; i>= 2 ; i--) {
765  Pptr[i-1] = Pptr[i-2]; /* extra -1 added to start from 0 */
766  }
767  Pptr[0] = 0;
768 
769  ArrayRCP<size_t> rcpRowPtr;
770  ArrayRCP<LO> rcpColumns;
771  ArrayRCP<SC> rcpValues;
772 
773  RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
774  LO nnz = Pptr[Ndofs];
775  PCrs->allocateAllValues(nnz, rcpRowPtr, rcpColumns, rcpValues);
776 
777  ArrayView<size_t> rowPtr = rcpRowPtr();
778  ArrayView<LO> columns = rcpColumns();
779  ArrayView<SC> values = rcpValues();
780 
781  // copy data over
782 
783  for (LO ii = 0; ii <= Ndofs; ii++) rowPtr[ii] = Pptr[ii];
784  for (LO ii = 0; ii < nnz; ii++) columns[ii] = Pcols[ii];
785  for (LO ii = 0; ii < nnz; ii++) values[ii] = Pvals[ii];
786  PCrs->setAllValues(rcpRowPtr, rcpColumns, rcpValues);
787  PCrs->expertStaticFillComplete(coarseMap, Amat->getDomainMap());
788 
789 
790  return NCLayers*NVertLines*DofsPerNode;
791  }
792 } //namespace MueLu
793 
794 #define MUELU_SEMICOARSENPFACTORY_SHORT
795 #endif // MUELU_SEMICOARSENPFACTORY_DEF_HPP
GlobalOrdinal GO
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
T * getRawPtr() const
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
size_type size() const
void GBTRF(const OrdinalType &m, const OrdinalType &n, const OrdinalType &kl, const OrdinalType &ku, ScalarType *A, const OrdinalType &lda, OrdinalType *IPIV, OrdinalType *info) const
LocalOrdinal LO
T * get() const
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
static const NoFactory * get()
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
LO FindCpts(LO const PtsPerLine, LO const CoarsenRate, LO const Thin, LO **LayerCpts) const
T * getRawPtr() const
size_t global_size_t
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
static RCP< Vector > Build(const Teuchos::RCP< const Map > &map, bool zeroOut=true)
void GBTRS(const char &TRANS, const OrdinalType &n, const OrdinalType &kl, const OrdinalType &ku, const OrdinalType &nrhs, const ScalarType *A, const OrdinalType &lda, const OrdinalType *IPIV, ScalarType *B, const OrdinalType &ldb, OrdinalType *info) const
#define SET_VALID_ENTRY(name)
#define MaxHorNeighborNodes
Scalar SC
LO MakeSemiCoarsenP(LO const Ntotal, LO const nz, LO const CoarsenRate, LO const LayerId[], LO const VertLineId[], LO const DofsPerNode, RCP< Matrix > &Amat, RCP< Matrix > &P, RCP< const Map > &coarseMap, const RCP< MultiVector > fineNullspace, RCP< MultiVector > &coarseNullspace) const
const RCP< const FactoryManagerBase > GetFactoryManager()
returns the current factory manager
Definition: MueLu_Level.cpp:96
Node NO
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:76
Exception throws to report errors in the internal logical of the program.
iterator end() const
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
iterator begin() const
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need&#39;s value has been saved.