MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_BlackBoxPFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_BLACKBOXPFACTORY_DEF_HPP
47 #define MUELU_BLACKBOXPFACTORY_DEF_HPP
48 
49 #include <stdlib.h>
50 #include <iomanip>
51 
52 
53 // #include <Teuchos_LAPACK.hpp>
57 
59 #include <Xpetra_CrsMatrixWrap.hpp>
60 #include <Xpetra_ImportFactory.hpp>
61 #include <Xpetra_Matrix.hpp>
62 #include <Xpetra_MapFactory.hpp>
64 #include <Xpetra_VectorFactory.hpp>
65 
66 #include <Xpetra_IO.hpp>
67 
70 
71 #include "MueLu_MasterList.hpp"
72 #include "MueLu_Monitor.hpp"
73 
74 namespace MueLu {
75 
76  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
78  RCP<ParameterList> validParamList = rcp(new ParameterList());
79 
80  // Coarsen can come in two forms, either a single char that will be interpreted as an integer
81  // which is used as the coarsening rate in every spatial dimentions,
82  // or it can be a longer string that will then be interpreted as an array of integers.
83  // By default coarsen is set as "{2}", hence a coarsening rate of 2 in every spatial dimension
84  // is the default setting!
85  validParamList->set<std::string > ("Coarsen", "{3}", "Coarsening rate in all spatial dimensions");
86  validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
87  validParamList->set<RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
88  validParamList->set<RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory for coorindates");
89  validParamList->set<RCP<const FactoryBase> >("gNodesPerDim", Teuchos::null, "Number of nodes per spatial dimmension provided by CoordinatesTransferFactory.");
90  validParamList->set<RCP<const FactoryBase> >("lNodesPerDim", Teuchos::null, "Number of nodes per spatial dimmension provided by CoordinatesTransferFactory.");
91  validParamList->set<std::string> ("stencil type", "full", "You can use two type of stencils: full and reduced, that correspond to 27 and 7 points stencils respectively in 3D.");
92  validParamList->set<std::string> ("block strategy", "coupled", "The strategy used to handle systems of PDEs can be: coupled or uncoupled.");
93 
94  return validParamList;
95  }
96 
97  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
99  Level& /* coarseLevel */)
100  const {
101  Input(fineLevel, "A");
102  Input(fineLevel, "Nullspace");
103  Input(fineLevel, "Coordinates");
104  // Request the global number of nodes per dimensions
105  if(fineLevel.GetLevelID() == 0) {
106  if(fineLevel.IsAvailable("gNodesPerDim", NoFactory::get())) {
107  fineLevel.DeclareInput("gNodesPerDim", NoFactory::get(), this);
108  } else {
109  TEUCHOS_TEST_FOR_EXCEPTION(fineLevel.IsAvailable("gNodesPerDim", NoFactory::get()),
111  "gNodesPerDim was not provided by the user on level0!");
112  }
113  } else {
114  Input(fineLevel, "gNodesPerDim");
115  }
116 
117  // Request the local number of nodes per dimensions
118  if(fineLevel.GetLevelID() == 0) {
119  if(fineLevel.IsAvailable("lNodesPerDim", NoFactory::get())) {
120  fineLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this);
121  } else {
122  TEUCHOS_TEST_FOR_EXCEPTION(fineLevel.IsAvailable("lNodesPerDim", NoFactory::get()),
124  "lNodesPerDim was not provided by the user on level0!");
125  }
126  } else {
127  Input(fineLevel, "lNodesPerDim");
128  }
129  }
130 
131  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
133  Level& coarseLevel) const{
134  return BuildP(fineLevel, coarseLevel);
135 
136  }
137 
138  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
140  Level& coarseLevel)const{
141  FactoryMonitor m(*this, "Build", coarseLevel);
142 
143  // Get parameter list
144  const ParameterList& pL = GetParameterList();
145 
146  // obtain general variables
147  RCP<Matrix> A = Get< RCP<Matrix> > (fineLevel, "A");
148  RCP<MultiVector> fineNullspace = Get< RCP<MultiVector> > (fineLevel, "Nullspace");
150  Get< RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType,LO,GO,NO> > >(fineLevel, "Coordinates");
151  LO numDimensions = coordinates->getNumVectors();
152  LO BlkSize = A->GetFixedBlockSize();
153 
154  // Get fine level geometric data: g(l)FineNodesPerDir and g(l)NumFineNodes
155  Array<GO> gFineNodesPerDir(3);
156  Array<LO> lFineNodesPerDir(3);
157  // Get the number of points in each direction
158  if(fineLevel.GetLevelID() == 0) {
159  gFineNodesPerDir = fineLevel.Get<Array<GO> >("gNodesPerDim", NoFactory::get());
160  lFineNodesPerDir = fineLevel.Get<Array<LO> >("lNodesPerDim", NoFactory::get());
161  } else {
162  // Loading global number of nodes per diretions
163  gFineNodesPerDir = Get<Array<GO> >(fineLevel, "gNodesPerDim");
164 
165  // Loading local number of nodes per diretions
166  lFineNodesPerDir = Get<Array<LO> >(fineLevel, "lNodesPerDim");
167  }
168  for(LO i = 0; i < 3; ++i) {
169  if(gFineNodesPerDir[i] == 0) {
170  GetOStream(Runtime0) << "gNodesPerDim in direction " << i << " is set to 1 from 0"
171  << std::endl;
172  gFineNodesPerDir[i] = 1;
173  }
174  if(lFineNodesPerDir[i] == 0) {
175  GetOStream(Runtime0) << "lNodesPerDim in direction " << i << " is set to 1 from 0"
176  << std::endl;
177  lFineNodesPerDir[i] = 1;
178  }
179  }
180  GO gNumFineNodes = gFineNodesPerDir[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
181  LO lNumFineNodes = lFineNodesPerDir[2]*lFineNodesPerDir[1]*lFineNodesPerDir[0];
182 
183  // Get the coarsening rate
184  std::string coarsenRate = pL.get<std::string>("Coarsen");
185  Array<LO> coarseRate(3);
186  {
187  Teuchos::Array<LO> crates;
188  try {
189  crates = Teuchos::fromStringToArray<LO>(coarsenRate);
191  GetOStream(Errors,-1) << " *** Coarsen must be a string convertible into an array! *** "
192  << std::endl;
193  throw e;
194  }
195  TEUCHOS_TEST_FOR_EXCEPTION((crates.size() > 1) && (crates.size() < numDimensions),
197  "Coarsen must have at least as many components as the number of"
198  " spatial dimensions in the problem.");
199  for(LO i = 0; i < 3; ++i) {
200  if(i < numDimensions) {
201  if(crates.size() == 1) {
202  coarseRate[i] = crates[0];
203  } else if(i < crates.size()) {
204  coarseRate[i] = crates[i];
205  } else {
206  GetOStream(Errors,-1) << " *** Coarsen must be at least as long as the number of"
207  " spatial dimensions! *** " << std::endl;
208  throw Exceptions::RuntimeError(" *** Coarsen must be at least as long as the number of"
209  " spatial dimensions! *** \n");
210  }
211  } else {
212  coarseRate[i] = 1;
213  }
214  }
215  } // End of scope for crates
216 
217  // Get the stencil type used for discretization
218  const std::string stencilType = pL.get<std::string>("stencil type");
219  if(stencilType != "full" && stencilType != "reduced") {
220  GetOStream(Errors,-1) << " *** stencil type must be set to: full or reduced, any other value "
221  "is trated as an error! *** " << std::endl;
222  throw Exceptions::RuntimeError(" *** stencil type is neither full, nor reduced! *** \n");
223  }
224 
225  // Get the strategy for PDE systems
226  const std::string blockStrategy = pL.get<std::string>("block strategy");
227  if(blockStrategy != "coupled" && blockStrategy != "uncoupled") {
228  GetOStream(Errors,-1) << " *** block strategy must be set to: coupled or uncoupled, any other "
229  "value is trated as an error! *** " << std::endl;
230  throw Exceptions::RuntimeError(" *** block strategy is neither coupled, nor uncoupled! *** \n");
231  }
232 
233  GO gNumCoarseNodes = 0;
234  LO lNumCoarseNodes = 0;
235  Array<GO> gIndices(3), gCoarseNodesPerDir(3), ghostGIDs, coarseNodesGIDs, colGIDs;
236  Array<LO> myOffset(3), lCoarseNodesPerDir(3), glCoarseNodesPerDir(3), endRate(3);
237  Array<bool> ghostInterface(6);
238  Array<int> boundaryFlags(3);
241  for(LO dim = 0; dim < numDimensions; ++dim) {fineNodes[dim] = coordinates->getData(dim)();}
242 
243  // This struct stores PIDs, LIDs and GIDs on the fine mesh and GIDs on the coarse mesh.
244  RCP<NodesIDs> ghostedCoarseNodes = rcp(new NodesIDs{});
245 
246  GetGeometricData(coordinates, coarseRate, gFineNodesPerDir, lFineNodesPerDir, BlkSize,// inputs
247  gIndices, myOffset, ghostInterface, endRate, gCoarseNodesPerDir, // outputs
248  lCoarseNodesPerDir, glCoarseNodesPerDir, ghostGIDs, coarseNodesGIDs, colGIDs,
249  gNumCoarseNodes, lNumCoarseNodes, coarseNodes, boundaryFlags,
250  ghostedCoarseNodes);
251 
252  // Create the MultiVector of coarse coordinates
253  Xpetra::UnderlyingLib lib = coordinates->getMap()->lib();
254  RCP<const Map> coarseCoordsMap = MapFactory::Build (lib,
255  gNumCoarseNodes,
256  coarseNodesGIDs.view(0, lNumCoarseNodes),
257  coordinates->getMap()->getIndexBase(),
258  coordinates->getMap()->getComm());
260  for(LO dim = 0; dim < numDimensions; ++dim) {
261  coarseCoords[dim] = coarseNodes[dim]();
262  }
263  RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType,LO,GO,NO> > coarseCoordinates =
264  Xpetra::MultiVectorFactory<typename Teuchos::ScalarTraits<Scalar>::magnitudeType,LO,GO,NO>::Build(coarseCoordsMap, coarseCoords(),
265  numDimensions);
266 
267  // Now create a new matrix: Aghost that contains all the data
268  // locally needed to compute the local part of the prolongator.
269  // Here assuming that all the coarse nodes o and fine nodes +
270  // are local then all the data associated with the coarse
271  // nodes O and the fine nodes * needs to be imported.
272  //
273  // *--*--*--*--*--*--*--*
274  // | | | | | | | |
275  // o--+--+--o--+--+--O--*
276  // | | | | | | | |
277  // +--+--+--+--+--+--*--*
278  // | | | | | | | |
279  // +--+--+--+--+--+--*--*
280  // | | | | | | | |
281  // o--+--+--o--+--+--O--*
282  // | | | | | | | |
283  // +--+--+--+--+--+--*--*
284  // | | | | | | | |
285  // *--*--*--*--*--*--*--*
286  // | | | | | | | |
287  // O--*--*--O--*--*--O--*
288  //
289  // Creating that local matrix is easy enough using proper range
290  // and domain maps to import data from A. Note that with this
291  // approach we reorder the local entries using the domain map and
292  // can subsequently compute the prolongator without reordering.
293  // As usual we need to be careful about any coarsening rate
294  // change at the boundary!
295 
296  // The ingredients needed are an importer, a range map and a domain map
297  Array<GO> ghostRowGIDs, ghostColGIDs, nodeSteps(3);
298  nodeSteps[0] = 1;
299  nodeSteps[1] = gFineNodesPerDir[0];
300  nodeSteps[2] = gFineNodesPerDir[0]*gFineNodesPerDir[1];
301  Array<LO> glFineNodesPerDir(3);
302  GO startingGID = A->getRowMap()->getMinGlobalIndex();
303  for(LO dim = 0; dim < 3; ++dim) {
304  LO numCoarseNodes = 0;
305  if(dim < numDimensions) {
306  startingGID -= myOffset[dim]*nodeSteps[dim];
307  numCoarseNodes = lCoarseNodesPerDir[dim];
308  if(ghostInterface[2*dim]) {++numCoarseNodes;}
309  if(ghostInterface[2*dim+1]) {++numCoarseNodes;}
310  if(gIndices[dim] + lFineNodesPerDir[dim] == gFineNodesPerDir[dim]) {
311  glFineNodesPerDir[dim] = (numCoarseNodes - 2)*coarseRate[dim] + endRate[dim] + 1;
312  } else {
313  glFineNodesPerDir[dim] = (numCoarseNodes - 1)*coarseRate[dim] + 1;
314  }
315  } else {
316  glFineNodesPerDir[dim] = 1;
317  }
318  }
319  ghostRowGIDs.resize(glFineNodesPerDir[0]*glFineNodesPerDir[1]*glFineNodesPerDir[2]*BlkSize);
320  for(LO k = 0; k < glFineNodesPerDir[2]; ++k) {
321  for(LO j = 0; j < glFineNodesPerDir[1]; ++j) {
322  for(LO i = 0; i < glFineNodesPerDir[0]; ++i) {
323  for(LO l = 0; l < BlkSize; ++l) {
324  ghostRowGIDs[(k*glFineNodesPerDir[1]*glFineNodesPerDir[0]
325  + j*glFineNodesPerDir[0] + i)*BlkSize + l] = startingGID
326  + (k*gFineNodesPerDir[1]*gFineNodesPerDir[0] + j*gFineNodesPerDir[0] + i)*BlkSize + l;
327  }
328  }
329  }
330  }
331 
332  // Looking at the above loops it is easy to find startingGID for the ghostColGIDs
333  Array<GO> startingGlobalIndices(numDimensions), dimStride(numDimensions);
334  Array<GO> startingColIndices(numDimensions), finishingColIndices(numDimensions);
335  GO colMinGID = 0;
336  Array<LO> colRange(numDimensions);
337  dimStride[0] = 1;
338  for(int dim = 1; dim < numDimensions; ++dim) {
339  dimStride[dim] = dimStride[dim - 1]*gFineNodesPerDir[dim - 1];
340  }
341  {
342  GO tmp = startingGID;
343  for(int dim = numDimensions; dim > 0; --dim) {
344  startingGlobalIndices[dim - 1] = tmp / dimStride[dim - 1];
345  tmp = tmp % dimStride[dim - 1];
346 
347  if(startingGlobalIndices[dim - 1] > 0) {
348  startingColIndices[dim - 1] = startingGlobalIndices[dim - 1] - 1;
349  }
350  if(startingGlobalIndices[dim - 1] + glFineNodesPerDir[dim - 1] < gFineNodesPerDir[dim - 1]){
351  finishingColIndices[dim - 1] = startingGlobalIndices[dim - 1]
352  + glFineNodesPerDir[dim - 1];
353  } else {
354  finishingColIndices[dim - 1] = startingGlobalIndices[dim - 1]
355  + glFineNodesPerDir[dim - 1] - 1;
356  }
357  colRange[dim - 1] = finishingColIndices[dim - 1] - startingColIndices[dim - 1] + 1;
358  colMinGID += startingColIndices[dim - 1]*dimStride[dim - 1];
359  }
360  }
361  ghostColGIDs.resize(colRange[0]*colRange[1]*colRange[2]*BlkSize);
362  for(LO k = 0; k < colRange[2]; ++k) {
363  for(LO j = 0; j < colRange[1]; ++j) {
364  for(LO i = 0; i < colRange[0]; ++i) {
365  for(LO l = 0; l < BlkSize; ++l) {
366  ghostColGIDs[(k*colRange[1]*colRange[0] + j*colRange[0] + i)*BlkSize + l] = colMinGID
367  + (k*gFineNodesPerDir[1]*gFineNodesPerDir[0] + j*gFineNodesPerDir[0] + i)*BlkSize + l;
368  }
369  }
370  }
371  }
372 
373  RCP<const Map> ghostedRowMap = Xpetra::MapFactory<LO,GO,NO>::Build(A->getRowMap()->lib(),
375  ghostRowGIDs(),
376  A->getRowMap()->getIndexBase(),
377  A->getRowMap()->getComm());
378  RCP<const Map> ghostedColMap = Xpetra::MapFactory<LO,GO,NO>::Build(A->getRowMap()->lib(),
380  ghostColGIDs(),
381  A->getRowMap()->getIndexBase(),
382  A->getRowMap()->getComm());
383  RCP<const Import> ghostImporter = Xpetra::ImportFactory<LO,GO,NO>::Build(A->getRowMap(),
384  ghostedRowMap);
386  ghostedRowMap,
387  ghostedRowMap);
388 
389  // Create the maps and data structures for the projection matrix
390  RCP<const Map> rowMapP = A->getDomainMap();
391 
392  RCP<const Map> domainMapP;
393 
394  RCP<const Map> colMapP;
395 
396  // At this point we need to create the column map which is a delicate operation.
397  // The entries in that map need to be ordered as follows:
398  // 1) first owned entries ordered by LID
399  // 2) second order the remaining entries by PID
400  // 3) entries with the same remote PID are ordered by GID.
401  // One piece of good news: lNumCoarseNodes is the number of ownedNodes and lNumGhostNodes
402  // is the number of remote nodes. The sorting can be limited to remote nodes
403  // as the owned ones are alreaded ordered by LID!
404 
405  LO lNumGhostedNodes = ghostedCoarseNodes->GIDs.size();
406  {
407  std::vector<NodeID> colMapOrdering(lNumGhostedNodes);
408  for(LO ind = 0; ind < lNumGhostedNodes; ++ind) {
409  colMapOrdering[ind].GID = ghostedCoarseNodes->GIDs[ind];
410  if(ghostedCoarseNodes->PIDs[ind] == rowMapP->getComm()->getRank()) {
411  colMapOrdering[ind].PID = -1;
412  } else {
413  colMapOrdering[ind].PID = ghostedCoarseNodes->PIDs[ind];
414  }
415  colMapOrdering[ind].LID = ghostedCoarseNodes->LIDs[ind];
416  colMapOrdering[ind].lexiInd = ind;
417  }
418  std::sort(colMapOrdering.begin(), colMapOrdering.end(),
419  [](NodeID a, NodeID b)->bool{
420  return ( (a.PID < b.PID) || ((a.PID == b.PID) && (a.GID < b.GID)) );
421  });
422 
423  colGIDs.resize(BlkSize*lNumGhostedNodes);
424  for(LO ind = 0; ind < lNumGhostedNodes; ++ind) {
425  // Save the permutation calculated to go from Lexicographic indexing to column map indexing
426  ghostedCoarseNodes->colInds[colMapOrdering[ind].lexiInd] = ind;
427  for(LO dof = 0; dof < BlkSize; ++dof) {
428  colGIDs[BlkSize*ind + dof] = BlkSize*colMapOrdering[ind].GID + dof;
429  }
430  }
431  domainMapP = Xpetra::MapFactory<LO,GO,NO>::Build(rowMapP->lib(),
432  BlkSize*gNumCoarseNodes,
433  colGIDs.view(0,BlkSize*lNumCoarseNodes),
434  rowMapP->getIndexBase(),
435  rowMapP->getComm());
436  colMapP = Xpetra::MapFactory<LO,GO,NO>::Build(rowMapP->lib(),
438  colGIDs.view(0, colGIDs.size()),
439  rowMapP->getIndexBase(),
440  rowMapP->getComm());
441  } // End of scope for colMapOrdering and colGIDs
442 
443  std::vector<size_t> strideInfo(1);
444  strideInfo[0] = BlkSize;
445  RCP<const Map> stridedDomainMapP = Xpetra::StridedMapFactory<LO,GO,NO>::Build(domainMapP,
446  strideInfo);
447 
448  GO gnnzP = 0;
449  LO lnnzP = 0;
450  // coarse points have one nnz per row
451  gnnzP += gNumCoarseNodes;
452  lnnzP += lNumCoarseNodes;
453  // add all other points multiplying by 2^numDimensions
454  gnnzP += (gNumFineNodes - gNumCoarseNodes)*std::pow(2, numDimensions);
455  lnnzP += (lNumFineNodes - lNumCoarseNodes)*std::pow(2, numDimensions);
456  // finally multiply by the number of dofs per node
457  gnnzP = gnnzP*BlkSize;
458  lnnzP = lnnzP*BlkSize;
459 
460  // Create the matrix itself using the above maps
461  RCP<Matrix> P;
462  P = rcp(new CrsMatrixWrap(rowMapP, colMapP, 0, Xpetra::StaticProfile));
463  RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
464 
465  ArrayRCP<size_t> iaP;
466  ArrayRCP<LO> jaP;
467  ArrayRCP<SC> valP;
468 
469  PCrs->allocateAllValues(lnnzP, iaP, jaP, valP);
470 
471  ArrayView<size_t> ia = iaP();
472  ArrayView<LO> ja = jaP();
473  ArrayView<SC> val = valP();
474  ia[0] = 0;
475 
476 
477  LO numCoarseElements = 1;
478  Array<LO> lCoarseElementsPerDir(3);
479  for(LO dim = 0; dim < numDimensions; ++dim) {
480  lCoarseElementsPerDir[dim] = lCoarseNodesPerDir[dim];
481  if(ghostInterface[2*dim]) {++lCoarseElementsPerDir[dim];}
482  if(!ghostInterface[2*dim+1]) {--lCoarseElementsPerDir[dim];}
483  numCoarseElements = numCoarseElements*lCoarseElementsPerDir[dim];
484  }
485 
486  for(LO dim = numDimensions; dim < 3; ++dim) {
487  lCoarseElementsPerDir[dim] = 1;
488  }
489 
490  // Loop over the coarse elements
491  Array<int> elementFlags(3);
492  Array<LO> elemInds(3), elementNodesPerDir(3), glElementRefTuple(3);
493  Array<LO> glElementRefTupleCG(3), glElementCoarseNodeCG(8);
494  const int numCoarseNodesInElement = std::pow(2, numDimensions);
495  const int nnzPerCoarseNode = (blockStrategy == "coupled") ? BlkSize : 1;
496  const int numRowsPerPoint = BlkSize;
497  for(elemInds[2] = 0; elemInds[2] < lCoarseElementsPerDir[2]; ++elemInds[2]) {
498  for(elemInds[1] = 0; elemInds[1] < lCoarseElementsPerDir[1]; ++elemInds[1]) {
499  for(elemInds[0] = 0; elemInds[0] < lCoarseElementsPerDir[0]; ++elemInds[0]) {
500  elementFlags[0] = 0; elementFlags[1] = 0; elementFlags[2] = 0;
501  for(int dim = 0; dim < 3; ++dim) {
502  // Detect boundary conditions on the element and set corresponding flags.
503  if(elemInds[dim] == 0 && elemInds[dim] == lCoarseElementsPerDir[dim] - 1) {
504  elementFlags[dim] = boundaryFlags[dim];
505  } else if(elemInds[dim] == 0 && (boundaryFlags[dim] == 1 || boundaryFlags[dim] == 3)) {
506  elementFlags[dim] += 1;
507  } else if((elemInds[dim] == lCoarseElementsPerDir[dim] - 1)
508  && (boundaryFlags[dim] == 2 || boundaryFlags[dim] == 3)) {
509  elementFlags[dim] += 2;
510  } else {
511  elementFlags[dim] = 0;
512  }
513 
514  // Compute the number of nodes in the current element.
515  if(dim < numDimensions) {
516  if((elemInds[dim] == lCoarseElementsPerDir[dim])
517  && (gIndices[dim] + lFineNodesPerDir[dim] == gFineNodesPerDir[dim])) {
518  elementNodesPerDir[dim] = endRate[dim] + 1;
519  } else {
520  elementNodesPerDir[dim] = coarseRate[dim] + 1;
521  }
522  } else {
523  elementNodesPerDir[dim] = 1;
524  }
525 
526  // Get the lowest tuple of the element using the ghosted local coordinate system
527  glElementRefTuple[dim] = elemInds[dim]*coarseRate[dim];
528  glElementRefTupleCG[dim] = elemInds[dim];
529  }
530 
531  // Now get the column map indices corresponding to the dofs associated with the current
532  // element's coarse nodes.
533  for(typename Array<LO>::size_type elem = 0; elem < glElementCoarseNodeCG.size(); ++elem) {
534  glElementCoarseNodeCG[elem]
535  = glElementRefTupleCG[2]*glCoarseNodesPerDir[1]*glCoarseNodesPerDir[0]
536  + glElementRefTupleCG[1]*glCoarseNodesPerDir[0] + glElementRefTupleCG[0];
537  }
538  glElementCoarseNodeCG[4] += glCoarseNodesPerDir[1]*glCoarseNodesPerDir[0];
539  glElementCoarseNodeCG[5] += glCoarseNodesPerDir[1]*glCoarseNodesPerDir[0];
540  glElementCoarseNodeCG[6] += glCoarseNodesPerDir[1]*glCoarseNodesPerDir[0];
541  glElementCoarseNodeCG[7] += glCoarseNodesPerDir[1]*glCoarseNodesPerDir[0];
542 
543  glElementCoarseNodeCG[2] += glCoarseNodesPerDir[0];
544  glElementCoarseNodeCG[3] += glCoarseNodesPerDir[0];
545  glElementCoarseNodeCG[6] += glCoarseNodesPerDir[0];
546  glElementCoarseNodeCG[7] += glCoarseNodesPerDir[0];
547 
548  glElementCoarseNodeCG[1] += 1;
549  glElementCoarseNodeCG[3] += 1;
550  glElementCoarseNodeCG[5] += 1;
551  glElementCoarseNodeCG[7] += 1;
552 
553  LO numNodesInElement = elementNodesPerDir[0]*elementNodesPerDir[1]*elementNodesPerDir[2];
554  // LO elementOffset = elemInds[2]*coarseRate[2]*glFineNodesPerDir[1]*glFineNodesPerDir[0]
555  // + elemInds[1]*coarseRate[1]*glFineNodesPerDir[0] + elemInds[0]*coarseRate[0];
556 
557  // Compute the element prolongator
559  Array<LO> dofType(numNodesInElement*BlkSize), lDofInd(numNodesInElement*BlkSize);
560  ComputeLocalEntries(Aghost, coarseRate, endRate, BlkSize, elemInds, lCoarseElementsPerDir,
561  numDimensions, glFineNodesPerDir, gFineNodesPerDir, gIndices,
562  lCoarseNodesPerDir, ghostInterface, elementFlags, stencilType,
563  blockStrategy, elementNodesPerDir, numNodesInElement, colGIDs,
564  Pi, Pf, Pe, dofType, lDofInd);
565 
566  // Find ghosted LID associated with nodes in the element and eventually which of these
567  // nodes are ghosts, this information is used to fill the local prolongator.
568  Array<LO> lNodeLIDs(numNodesInElement);
569  {
570  Array<LO> lNodeTuple(3), nodeInd(3);
571  for(nodeInd[2] = 0; nodeInd[2] < elementNodesPerDir[2]; ++nodeInd[2]) {
572  for(nodeInd[1] = 0; nodeInd[1] < elementNodesPerDir[1]; ++nodeInd[1]) {
573  for(nodeInd[0] = 0; nodeInd[0] < elementNodesPerDir[0]; ++nodeInd[0]) {
574  int stencilLength = 0;
575  if((nodeInd[0] == 0 || nodeInd[0] == elementNodesPerDir[0] - 1) &&
576  (nodeInd[1] == 0 || nodeInd[1] == elementNodesPerDir[1] - 1) &&
577  (nodeInd[2] == 0 || nodeInd[2] == elementNodesPerDir[2] - 1)) {
578  stencilLength = 1;
579  } else {
580  stencilLength = std::pow(2, numDimensions);
581  }
582  LO nodeElementInd = nodeInd[2]*elementNodesPerDir[1]*elementNodesPerDir[1]
583  + nodeInd[1]*elementNodesPerDir[0] + nodeInd[0];
584  for(int dim = 0; dim < 3; ++dim) {
585  lNodeTuple[dim] = glElementRefTuple[dim] - myOffset[dim] + nodeInd[dim];
586  }
587  if(lNodeTuple[0] < 0 || lNodeTuple[1] < 0 || lNodeTuple[2] < 0 ||
588  lNodeTuple[0] > lFineNodesPerDir[0] - 1 ||
589  lNodeTuple[1] > lFineNodesPerDir[1] - 1 ||
590  lNodeTuple[2] > lFineNodesPerDir[2] - 1) {
591  // This flags the ghosts nodes used for prolongator calculation but for which
592  // we do not own the row, hence we won't fill these values on this rank.
593  lNodeLIDs[nodeElementInd] = -1;
594  } else if ((nodeInd[0] == 0 && elemInds[0] !=0) ||
595  (nodeInd[1] == 0 && elemInds[1] !=0) ||
596  (nodeInd[2] == 0 && elemInds[2] !=0)) {
597  // This flags nodes that are owned but common to two coarse elements and that
598  // were already filled by another element, we don't want to fill them twice so
599  // we skip them
600  lNodeLIDs[nodeElementInd] = -2;
601  } else {
602  // The remaining nodes are locally owned and we need to fill the coresponding
603  // rows of the prolongator
604 
605  // First we need to find which row needs to be filled
606  lNodeLIDs[nodeElementInd] = lNodeTuple[2]*lFineNodesPerDir[1]*lFineNodesPerDir[0]
607  + lNodeTuple[1]*lFineNodesPerDir[0] + lNodeTuple[0];
608 
609  // We also compute the row offset using arithmetic to ensure that we can loop
610  // easily over the nodes in a macro-element as well as facilitate on-node
611  // parallelization. The node serial code could be rewritten with two loops over
612  // the local part of the mesh to avoid the costly integer divisions...
613  Array<LO> refCoarsePointTuple(3);
614  for(int dim = 2; dim > -1; --dim) {
615  if(dim == 0) {
616  refCoarsePointTuple[dim] = (lNodeTuple[dim] + myOffset[dim]) / coarseRate[dim];
617  if(myOffset[dim] == 0) {
618  ++refCoarsePointTuple[dim];
619  }
620  } else {
621  // Note: no need for magnitudeType here, just use double because these things are LO's
622  refCoarsePointTuple[dim] =
623  std::ceil(static_cast<double>(lNodeTuple[dim] + myOffset[dim])
624  / coarseRate[dim]);
625  }
626  if((lNodeTuple[dim] + myOffset[dim]) % coarseRate[dim] > 0) {break;}
627  }
628  const LO numCoarsePoints = refCoarsePointTuple[0]
629  + refCoarsePointTuple[1]*lCoarseNodesPerDir[0]
630  + refCoarsePointTuple[2]*lCoarseNodesPerDir[1]*lCoarseNodesPerDir[0];
631  const LO numFinePoints = lNodeLIDs[nodeElementInd] + 1;
632 
633  // The below formula computes the rowPtr for row 'n+BlkSize' and we are about to
634  // fill row 'n' to 'n+BlkSize'.
635  size_t rowPtr = (numFinePoints - numCoarsePoints)*numRowsPerPoint
636  *numCoarseNodesInElement*nnzPerCoarseNode + numCoarsePoints*numRowsPerPoint;
637  if(dofType[nodeElementInd*BlkSize] == 0) {
638  // Check if current node is a coarse point
639  rowPtr = rowPtr - numRowsPerPoint;
640  } else {
641  rowPtr = rowPtr - numRowsPerPoint*numCoarseNodesInElement*nnzPerCoarseNode;
642  }
643 
644  for(int dof = 0; dof < BlkSize; ++dof) {
645 
646  // Now we loop over the stencil and fill the column indices and row values
647  int cornerInd = 0;
648  switch(dofType[nodeElementInd*BlkSize + dof]) {
649  case 0: // Corner node
650  if(nodeInd[2] == elementNodesPerDir[2] - 1) {cornerInd += 4;}
651  if(nodeInd[1] == elementNodesPerDir[1] - 1) {cornerInd += 2;}
652  if(nodeInd[0] == elementNodesPerDir[0] - 1) {cornerInd += 1;}
653  ia[lNodeLIDs[nodeElementInd]*BlkSize + dof + 1] = rowPtr + dof + 1;
654  ja[rowPtr + dof] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[cornerInd]]*BlkSize + dof;
655  val[rowPtr + dof] = 1.0;
656  break;
657 
658  case 1: // Edge node
659  ia[lNodeLIDs[nodeElementInd]*BlkSize + dof + 1]
660  = rowPtr + (dof + 1)*numCoarseNodesInElement*nnzPerCoarseNode;
661  for(int ind1 = 0; ind1 < stencilLength; ++ind1) {
662  if(blockStrategy == "coupled") {
663  for(int ind2 = 0; ind2 < BlkSize; ++ind2) {
664  size_t lRowPtr = rowPtr + dof*numCoarseNodesInElement*nnzPerCoarseNode
665  + ind1*BlkSize + ind2;
666  ja[lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]]*BlkSize + ind2;
667  val[lRowPtr] = Pe(lDofInd[nodeElementInd*BlkSize + dof],
668  ind1*BlkSize + ind2);
669  }
670  } else if(blockStrategy == "uncoupled") {
671  size_t lRowPtr = rowPtr + dof*numCoarseNodesInElement*nnzPerCoarseNode
672  + ind1;
673  ja[rowPtr + dof] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]]*BlkSize + dof;
674  val[lRowPtr] = Pe(lDofInd[nodeElementInd*BlkSize + dof],
675  ind1*BlkSize + dof);
676  }
677  }
678  break;
679 
680  case 2: // Face node
681  ia[lNodeLIDs[nodeElementInd]*BlkSize + dof + 1]
682  = rowPtr + (dof + 1)*numCoarseNodesInElement*nnzPerCoarseNode;
683  for(int ind1 = 0; ind1 < stencilLength; ++ind1) {
684  if(blockStrategy == "coupled") {
685  for(int ind2 = 0; ind2 < BlkSize; ++ind2) {
686  size_t lRowPtr = rowPtr + dof*numCoarseNodesInElement*nnzPerCoarseNode
687  + ind1*BlkSize + ind2;
688  ja [lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]]*BlkSize + ind2;
689  val[lRowPtr] = Pf(lDofInd[nodeElementInd*BlkSize + dof],
690  ind1*BlkSize + ind2);
691  }
692  } else if(blockStrategy == "uncoupled") {
693  size_t lRowPtr = rowPtr + dof*numCoarseNodesInElement*nnzPerCoarseNode
694  + ind1;
695  // ja [lRowPtr] = colGIDs[glElementCoarseNodeCG[ind1]*BlkSize + dof];
696  ja [lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]]*BlkSize + dof;
697  val[lRowPtr] = Pf(lDofInd[nodeElementInd*BlkSize + dof],
698  ind1*BlkSize + dof);
699  }
700  }
701  break;
702 
703  case 3: // Interior node
704  ia[lNodeLIDs[nodeElementInd]*BlkSize + dof + 1]
705  = rowPtr + (dof + 1)*numCoarseNodesInElement*nnzPerCoarseNode;
706  for(int ind1 = 0; ind1 < stencilLength; ++ind1) {
707  if(blockStrategy == "coupled") {
708  for(int ind2 = 0; ind2 < BlkSize; ++ind2) {
709  size_t lRowPtr = rowPtr + dof*numCoarseNodesInElement*nnzPerCoarseNode
710  + ind1*BlkSize + ind2;
711  ja [lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]]*BlkSize + ind2;
712  val[lRowPtr] = Pi(lDofInd[nodeElementInd*BlkSize + dof],
713  ind1*BlkSize + ind2);
714  }
715  } else if(blockStrategy == "uncoupled") {
716  size_t lRowPtr = rowPtr + dof*numCoarseNodesInElement*nnzPerCoarseNode
717  + ind1;
718  ja [lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]]*BlkSize + dof;
719  val[lRowPtr] = Pi(lDofInd[nodeElementInd*BlkSize + dof],
720  ind1*BlkSize + dof);
721  }
722  }
723  break;
724  }
725  }
726  }
727  }
728  }
729  }
730  } // End of scopt for lNodeTuple and nodeInd
731  }
732  }
733  }
734 
735  // Sort all row's column indicies and entries by LID
736  Xpetra::CrsMatrixUtils<SC,LO,GO,NO>::sortCrsEntries(ia, ja, val, rowMapP->lib());
737 
738  // Set the values of the prolongation operators into the CrsMatrix P and call FillComplete
739  PCrs->setAllValues(iaP, jaP, valP);
740  PCrs->expertStaticFillComplete(domainMapP,rowMapP);
741 
742  // set StridingInformation of P
743  if (A->IsView("stridedMaps") == true) {
744  P->CreateView("stridedMaps", A->getRowMap("stridedMaps"), stridedDomainMapP);
745  } else {
746  P->CreateView("stridedMaps", P->getRangeMap(), stridedDomainMapP);
747  }
748 
749  // store the transfer operator and node coordinates on coarse level
750  Set(coarseLevel, "P", P);
751  Set(coarseLevel, "coarseCoordinates", coarseCoordinates);
752  Set<Array<GO> >(coarseLevel, "gCoarseNodesPerDim", gCoarseNodesPerDir);
753  Set<Array<LO> >(coarseLevel, "lCoarseNodesPerDim", lCoarseNodesPerDir);
754 
755  }
756 
757  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
760  const Array<LO> coarseRate, const Array<GO> gFineNodesPerDir,
761  const Array<LO> lFineNodesPerDir, const LO BlkSize, Array<GO>& gIndices,
762  Array<LO>& myOffset, Array<bool>& ghostInterface, Array<LO>& endRate,
763  Array<GO>& gCoarseNodesPerDir, Array<LO>& lCoarseNodesPerDir,
764  Array<LO>& glCoarseNodesPerDir, Array<GO>& ghostGIDs, Array<GO>& coarseNodesGIDs,
765  Array<GO>& colGIDs, GO& gNumCoarseNodes, LO& lNumCoarseNodes,
766  ArrayRCP<Array<typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > coarseNodes, Array<int>& boundaryFlags,
767  RCP<NodesIDs> ghostedCoarseNodes) const {
768  // This function is extracting the geometric information from the coordinates
769  // and creates the necessary data/formatting to perform locally the calculation
770  // of the pronlongator.
771  //
772  // inputs:
773 
774  RCP<const Map> coordinatesMap = coordinates->getMap();
775  LO numDimensions = coordinates->getNumVectors();
776 
777  // Using the coarsening rate and the fine level data,
778  // compute coarse level data
779 
780  // Phase 1 //
781  // ------------------------------------------------------------------ //
782  // We first start by finding small informations on the mesh such as //
783  // the number of coarse nodes (local and global) and the number of //
784  // ghost nodes / the end rate of coarsening. //
785  // ------------------------------------------------------------------ //
786  GO minGlobalIndex = coordinatesMap->getMinGlobalIndex();
787  {
788  GO tmp;
789  gIndices[2] = minGlobalIndex / (gFineNodesPerDir[1]*gFineNodesPerDir[0]);
790  tmp = minGlobalIndex % (gFineNodesPerDir[1]*gFineNodesPerDir[0]);
791  gIndices[1] = tmp / gFineNodesPerDir[0];
792  gIndices[0] = tmp % gFineNodesPerDir[0];
793 
794  myOffset[2] = gIndices[2] % coarseRate[2];
795  myOffset[1] = gIndices[1] % coarseRate[1];
796  myOffset[0] = gIndices[0] % coarseRate[0];
797  }
798 
799  for(int dim = 0; dim < 3; ++dim) {
800  if(gIndices[dim] == 0) {
801  boundaryFlags[dim] += 1;
802  }
803  if(gIndices[dim] + lFineNodesPerDir[dim] == gFineNodesPerDir[dim]) {
804  boundaryFlags[dim] += 2;
805  }
806  }
807 
808  // Check whether ghost nodes are needed in each direction
809  for(LO i=0; i < numDimensions; ++i) {
810  if((gIndices[i] != 0) && (gIndices[i] % coarseRate[i] > 0)) {
811  ghostInterface[2*i] = true;
812  }
813  if(((gIndices[i] + lFineNodesPerDir[i]) != gFineNodesPerDir[i])
814  && ((gIndices[i] + lFineNodesPerDir[i] - 1) % coarseRate[i] > 0)) {
815  ghostInterface[2*i + 1] = true;
816  }
817  }
818 
819  for(LO i = 0; i < 3; ++i) {
820  if(i < numDimensions) {
821  lCoarseNodesPerDir[i] = (lFineNodesPerDir[i] + myOffset[i] - 1) / coarseRate[i];
822  if(myOffset[i] == 0) { ++lCoarseNodesPerDir[i]; }
823  gCoarseNodesPerDir[i] = (gFineNodesPerDir[i] - 1) / coarseRate[i];
824  endRate[i] = (gFineNodesPerDir[i] - 1) % coarseRate[i];
825  if(endRate[i] == 0) {
826  ++gCoarseNodesPerDir[i];
827  endRate[i] = coarseRate[i];
828  }
829  } else {
830  // Most quantities need to be set to 1 for extra dimensions
831  // this is rather logical, an x-y plane is like a single layer
832  // of nodes in the z direction...
833  gCoarseNodesPerDir[i] = 1;
834  lCoarseNodesPerDir[i] = 1;
835  endRate[i] = 1;
836  }
837  }
838 
839  gNumCoarseNodes = gCoarseNodesPerDir[0]*gCoarseNodesPerDir[1]*gCoarseNodesPerDir[2];
840  lNumCoarseNodes = lCoarseNodesPerDir[0]*lCoarseNodesPerDir[1]*lCoarseNodesPerDir[2];
841 
842  // For each direction, determine how many ghost points are required.
843  LO lNumGhostNodes = 0;
844  Array<GO> startGhostedCoarseNode(3);
845  {
846  const int complementaryIndices[3][2] = {{1,2}, {0,2}, {0,1}};
847  LO tmp = 0;
848  for(LO i = 0; i < 3; ++i) {
849  // The first branch of this if-statement will be used if the rank contains only one layer
850  // of nodes in direction i, that layer must also coincide with the boundary of the mesh
851  // and coarseRate[i] == endRate[i]...
852  if((gIndices[i] == gFineNodesPerDir[i] - 1) && (gIndices[i] % coarseRate[i] == 0)) {
853  startGhostedCoarseNode[i] = gIndices[i] / coarseRate[i] - 1;
854  } else {
855  startGhostedCoarseNode[i] = gIndices[i] / coarseRate[i];
856  }
857  // First we load the number of locally owned coarse nodes
858  glCoarseNodesPerDir[i] = lCoarseNodesPerDir[i];
859 
860  // Check whether a face in direction i needs ghost nodes
861  if(ghostInterface[2*i] || ghostInterface[2*i+1]) {
862  ++glCoarseNodesPerDir[i];
863  if(i == 0) {tmp = lCoarseNodesPerDir[1]*lCoarseNodesPerDir[2];}
864  if(i == 1) {tmp = lCoarseNodesPerDir[0]*lCoarseNodesPerDir[2];}
865  if(i == 2) {tmp = lCoarseNodesPerDir[0]*lCoarseNodesPerDir[1];}
866  }
867  // If both faces in direction i need nodes, double the number of ghost nodes
868  if(ghostInterface[2*i] && ghostInterface[2*i+1]) {
869  ++glCoarseNodesPerDir[i];
870  tmp = 2*tmp;
871  }
872  lNumGhostNodes += tmp;
873 
874  // The corners and edges need to be checked in 2D / 3D to add more ghosts nodes
875  for(LO j = 0; j < 2; ++j) {
876  for(LO k = 0; k < 2; ++k) {
877  // Check if two adjoining faces need ghost nodes and then add their common edge
878  if(ghostInterface[2*complementaryIndices[i][0]+j]
879  && ghostInterface[2*complementaryIndices[i][1]+k]) {
880  lNumGhostNodes += lCoarseNodesPerDir[i];
881  // Add corners if three adjoining faces need ghost nodes,
882  // but add them only once! Hence when i == 0.
883  if(ghostInterface[2*i] && (i == 0)) { lNumGhostNodes += 1; }
884  if(ghostInterface[2*i+1] && (i == 0)) { lNumGhostNodes += 1; }
885  }
886  }
887  }
888  tmp = 0;
889  }
890  } // end of scope for tmp and complementaryIndices
891 
892  const LO lNumGhostedNodes = glCoarseNodesPerDir[0]*glCoarseNodesPerDir[1]
893  *glCoarseNodesPerDir[2];
894  ghostedCoarseNodes->PIDs.resize(lNumGhostedNodes);
895  ghostedCoarseNodes->LIDs.resize(lNumGhostedNodes);
896  ghostedCoarseNodes->GIDs.resize(lNumGhostedNodes);
897  ghostedCoarseNodes->coarseGIDs.resize(lNumGhostedNodes);
898  ghostedCoarseNodes->colInds.resize(lNumGhostedNodes);
899 
900  // We loop over all ghosted coarse nodes by increasing global lexicographic order
901  Array<LO> coarseNodeCoarseIndices(3), coarseNodeFineIndices(3), ijk(3);
902  LO currentIndex = -1;
903  for(ijk[2] = 0; ijk[2] < glCoarseNodesPerDir[2]; ++ijk[2]) {
904  for(ijk[1] = 0; ijk[1] < glCoarseNodesPerDir[1]; ++ijk[1]) {
905  for(ijk[0] = 0; ijk[0] < glCoarseNodesPerDir[0]; ++ijk[0]) {
906  currentIndex = ijk[2]*glCoarseNodesPerDir[1]*glCoarseNodesPerDir[0]
907  + ijk[1]*glCoarseNodesPerDir[0] + ijk[0];
908  coarseNodeCoarseIndices[0] = startGhostedCoarseNode[0] + ijk[0];
909  coarseNodeFineIndices[0] = coarseNodeCoarseIndices[0]*coarseRate[0];
910  if(coarseNodeFineIndices[0] > gFineNodesPerDir[0] - 1) {
911  coarseNodeFineIndices[0] = gFineNodesPerDir[0] - 1;
912  }
913  coarseNodeCoarseIndices[1] = startGhostedCoarseNode[1] + ijk[1];
914  coarseNodeFineIndices[1] = coarseNodeCoarseIndices[1]*coarseRate[1];
915  if(coarseNodeFineIndices[1] > gFineNodesPerDir[1] - 1) {
916  coarseNodeFineIndices[1] = gFineNodesPerDir[1] - 1;
917  }
918  coarseNodeCoarseIndices[2] = startGhostedCoarseNode[2] + ijk[2];
919  coarseNodeFineIndices[2] = coarseNodeCoarseIndices[2]*coarseRate[2];
920  if(coarseNodeFineIndices[2] > gFineNodesPerDir[2] - 1) {
921  coarseNodeFineIndices[2] = gFineNodesPerDir[2] - 1;
922  }
923  GO myGID = 0, myCoarseGID = -1;
924  GO factor[3] = {};
925  factor[2] = gFineNodesPerDir[1]*gFineNodesPerDir[0];
926  factor[1] = gFineNodesPerDir[0];
927  factor[0] = 1;
928  for(int dim = 0; dim < 3; ++dim) {
929  if(dim < numDimensions) {
930  if(gIndices[dim] - myOffset[dim] + ijk[dim]*coarseRate[dim]
931  < gFineNodesPerDir[dim] - 1) {
932  myGID += (gIndices[dim] - myOffset[dim]
933  + ijk[dim]*coarseRate[dim])*factor[dim];
934  } else {
935  myGID += (gIndices[dim] - myOffset[dim]
936  + (ijk[dim] - 1)*coarseRate[dim] + endRate[dim])*factor[dim];
937  }
938  }
939  }
940  myCoarseGID = coarseNodeCoarseIndices[0]
941  + coarseNodeCoarseIndices[1]*gCoarseNodesPerDir[0]
942  + coarseNodeCoarseIndices[2]*gCoarseNodesPerDir[1]*gCoarseNodesPerDir[0];
943  ghostedCoarseNodes->GIDs[currentIndex] = myGID;
944  ghostedCoarseNodes->coarseGIDs[currentIndex] = myCoarseGID;
945  }
946  }
947  }
948  coordinatesMap->getRemoteIndexList(ghostedCoarseNodes->GIDs(),
949  ghostedCoarseNodes->PIDs(),
950  ghostedCoarseNodes->LIDs());
951 
952  // Phase 2 //
953  // ------------------------------------------------------------------ //
954  // Build a list of GIDs to import the required ghost nodes. //
955  // The ordering of the ghosts nodes will be as natural as possible, //
956  // i.e. it should follow the GID ordering of the mesh. //
957  // ------------------------------------------------------------------ //
958 
959  // Saddly we have to more or less redo what was just done to figure out the value of
960  // lNumGhostNodes, there might be some optimization possibility here...
961  ghostGIDs.resize(lNumGhostNodes);
962  LO countGhosts = 0;
963  // Get the GID of the first point on the current partition.
964  GO startingGID = minGlobalIndex;
965  Array<GO> startingIndices(3);
966  // We still want ghost nodes even if have with a 0 offset,
967  // except when we are on a boundary
968  if(ghostInterface[4] && (myOffset[2] == 0)) {
969  if(gIndices[2] + coarseRate[2] > gFineNodesPerDir[2]) {
970  startingGID -= endRate[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
971  } else {
972  startingGID -= coarseRate[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
973  }
974  } else {
975  startingGID -= myOffset[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
976  }
977  if(ghostInterface[2] && (myOffset[1] == 0)) {
978  if(gIndices[1] + coarseRate[1] > gFineNodesPerDir[1]) {
979  startingGID -= endRate[1]*gFineNodesPerDir[0];
980  } else {
981  startingGID -= coarseRate[1]*gFineNodesPerDir[0];
982  }
983  } else {
984  startingGID -= myOffset[1]*gFineNodesPerDir[0];
985  }
986  if(ghostInterface[0] && (myOffset[0] == 0)) {
987  if(gIndices[0] + coarseRate[0] > gFineNodesPerDir[0]) {
988  startingGID -= endRate[0];
989  } else {
990  startingGID -= coarseRate[0];
991  }
992  } else {
993  startingGID -= myOffset[0];
994  }
995 
996  { // scope for tmp
997  GO tmp;
998  startingIndices[2] = startingGID / (gFineNodesPerDir[1]*gFineNodesPerDir[0]);
999  tmp = startingGID % (gFineNodesPerDir[1]*gFineNodesPerDir[0]);
1000  startingIndices[1] = tmp / gFineNodesPerDir[0];
1001  startingIndices[0] = tmp % gFineNodesPerDir[0];
1002  }
1003 
1004  GO ghostOffset[3] = {0, 0, 0};
1005  LO lengthZero = lCoarseNodesPerDir[0];
1006  LO lengthOne = lCoarseNodesPerDir[1];
1007  LO lengthTwo = lCoarseNodesPerDir[2];
1008  if(ghostInterface[0]) {++lengthZero;}
1009  if(ghostInterface[1]) {++lengthZero;}
1010  if(ghostInterface[2]) {++lengthOne;}
1011  if(ghostInterface[3]) {++lengthOne;}
1012  if(ghostInterface[4]) {++lengthTwo;}
1013  if(ghostInterface[5]) {++lengthTwo;}
1014 
1015  // First check the bottom face as it will have the lowest GIDs
1016  if(ghostInterface[4]) {
1017  ghostOffset[2] = startingGID;
1018  for(LO j = 0; j < lengthOne; ++j) {
1019  if( (j == lengthOne-1)
1020  && (startingIndices[1] + j*coarseRate[1] + 1 > gFineNodesPerDir[1]) ) {
1021  ghostOffset[1] = ((j-1)*coarseRate[1] + endRate[1])*gFineNodesPerDir[0];
1022  } else {
1023  ghostOffset[1] = j*coarseRate[1]*gFineNodesPerDir[0];
1024  }
1025  for(LO k = 0; k < lengthZero; ++k) {
1026  if( (k == lengthZero-1)
1027  && (startingIndices[0] + k*coarseRate[0] + 1 > gFineNodesPerDir[0]) ) {
1028  ghostOffset[0] = (k-1)*coarseRate[0] + endRate[0];
1029  } else {
1030  ghostOffset[0] = k*coarseRate[0];
1031  }
1032  // If the partition includes a changed rate at the edge, ghost nodes need to be picked
1033  // carefully.
1034  // This if statement is repeated each time a ghost node is selected.
1035  ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1036  ++countGhosts;
1037  }
1038  }
1039  }
1040 
1041  // Sweep over the lCoarseNodesPerDir[2] coarse layers in direction 2 and gather necessary ghost
1042  // nodes located on these layers.
1043  for(LO i = 0; i < lengthTwo; ++i) {
1044  // Exclude the cases where ghost nodes exists on the faces in directions 2, these faces are
1045  // swept seperatly for efficiency.
1046  if( !((i == lengthTwo-1) && ghostInterface[5]) && !((i == 0) && ghostInterface[4]) ) {
1047  // Set the ghostOffset in direction 2 taking into account a possible endRate different
1048  // from the regular coarseRate.
1049  if( (i == lengthTwo-1) && !ghostInterface[5] ) {
1050  ghostOffset[2] = startingGID + ((i-1)*coarseRate[2] + endRate[2])
1051  *gFineNodesPerDir[1]*gFineNodesPerDir[0];
1052  } else {
1053  ghostOffset[2] = startingGID + i*coarseRate[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
1054  }
1055  for(LO j = 0; j < lengthOne; ++j) {
1056  if( (j == 0) && ghostInterface[2] ) {
1057  for(LO k = 0; k < lengthZero; ++k) {
1058  if( (k == lengthZero-1)
1059  && (startingIndices[0] + k*coarseRate[0] + 1 > gFineNodesPerDir[0]) ) {
1060  if(k == 0) {
1061  ghostOffset[0] = endRate[0];
1062  } else {
1063  ghostOffset[0] = (k-1)*coarseRate[0] + endRate[0];
1064  }
1065  } else {
1066  ghostOffset[0] = k*coarseRate[0];
1067  }
1068  // In this case j == 0 so ghostOffset[1] is zero and can be ignored
1069  ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[0];
1070  ++countGhosts;
1071  }
1072  } else if( (j == lengthOne-1) && ghostInterface[3] ) {
1073  // Set the ghostOffset in direction 1 taking into account a possible endRate different
1074  // from the regular coarseRate.
1075  if( (j == lengthOne-1)
1076  && (startingIndices[1] + j*coarseRate[1] + 1 > gFineNodesPerDir[1]) ) {
1077  ghostOffset[1] = ((j-1)*coarseRate[1] + endRate[1])*gFineNodesPerDir[0];
1078  } else {
1079  ghostOffset[1] = j*coarseRate[1]*gFineNodesPerDir[0];
1080  }
1081  for(LO k = 0; k < lengthZero; ++k) {
1082  if( (k == lengthZero-1)
1083  && (startingIndices[0] + k*coarseRate[0] + 1 > gFineNodesPerDir[0]) ) {
1084  ghostOffset[0] = (k-1)*coarseRate[0] + endRate[0];
1085  } else {
1086  ghostOffset[0] = k*coarseRate[0];
1087  }
1088  ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1089  ++countGhosts;
1090  }
1091  } else {
1092  // Set ghostOffset[1] for side faces sweep
1093  if( (j == lengthOne-1)
1094  && (startingIndices[1] + j*coarseRate[1] + 1 > gFineNodesPerDir[1]) ) {
1095  ghostOffset[1] = ( (j-1)*coarseRate[1] + endRate[1] )*gFineNodesPerDir[0];
1096  } else {
1097  ghostOffset[1] = j*coarseRate[1]*gFineNodesPerDir[0];
1098  }
1099 
1100  // Set ghostOffset[0], ghostGIDs and countGhosts
1101  if(ghostInterface[0]) { // In that case ghostOffset[0]==0, so we can ignore it
1102  ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1];
1103  ++countGhosts;
1104  }
1105  if(ghostInterface[1]) { // Grab ghost point at the end of direction 0.
1106  if( (startingIndices[0] + (lengthZero-1)*coarseRate[0]) > gFineNodesPerDir[0] - 1 ) {
1107  if(lengthZero > 2) {
1108  ghostOffset[0] = (lengthZero-2)*coarseRate[0] + endRate[0];
1109  } else {
1110  ghostOffset[0] = endRate[0];
1111  }
1112  } else {
1113  ghostOffset[0] = (lengthZero-1)*coarseRate[0];
1114  }
1115  ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1116  ++countGhosts;
1117  }
1118  }
1119  }
1120  }
1121  }
1122 
1123  // Finally check the top face
1124  if(ghostInterface[5]) {
1125  if( startingIndices[2] + (lengthTwo-1)*coarseRate[2] + 1 > gFineNodesPerDir[2] ) {
1126  ghostOffset[2] = startingGID
1127  + ((lengthTwo-2)*coarseRate[2] + endRate[2])*gFineNodesPerDir[1]*gFineNodesPerDir[0];
1128  } else {
1129  ghostOffset[2] = startingGID
1130  + (lengthTwo-1)*coarseRate[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
1131  }
1132  for(LO j = 0; j < lengthOne; ++j) {
1133  if( (j == lengthOne-1)
1134  && (startingIndices[1] + j*coarseRate[1] + 1 > gFineNodesPerDir[1]) ) {
1135  ghostOffset[1] = ( (j-1)*coarseRate[1] + endRate[1] )*gFineNodesPerDir[0];
1136  } else {
1137  ghostOffset[1] = j*coarseRate[1]*gFineNodesPerDir[0];
1138  }
1139  for(LO k = 0; k < lengthZero; ++k) {
1140  if( (k == lengthZero-1)
1141  && (startingIndices[0] + k*coarseRate[0] + 1 > gFineNodesPerDir[0]) ) {
1142  ghostOffset[0] = (k-1)*coarseRate[0] + endRate[0];
1143  } else {
1144  ghostOffset[0] = k*coarseRate[0];
1145  }
1146  ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1147  ++countGhosts;
1148  }
1149  }
1150  }
1151 
1152  // Phase 3 //
1153  // ------------------------------------------------------------------ //
1154  // Final phase of this function, lists are being built to create the //
1155  // column and domain maps of the projection as well as the map and //
1156  // arrays for the coarse coordinates multivector. //
1157  // ------------------------------------------------------------------ //
1158 
1159  Array<GO> gCoarseNodesGIDs(lNumCoarseNodes);
1160  LO currentNode, offset2, offset1, offset0;
1161  // Find the GIDs of the coarse nodes on the partition.
1162  for(LO ind2 = 0; ind2 < lCoarseNodesPerDir[2]; ++ind2) {
1163  if(myOffset[2] == 0) {
1164  offset2 = startingIndices[2] + myOffset[2];
1165  } else {
1166  if(startingIndices[2] + endRate[2] == gFineNodesPerDir[2] - 1) {
1167  offset2 = startingIndices[2] + endRate[2];
1168  } else {
1169  offset2 = startingIndices[2] + coarseRate[2];
1170  }
1171  }
1172  if(offset2 + ind2*coarseRate[2] > gFineNodesPerDir[2] - 1) {
1173  offset2 += (ind2 - 1)*coarseRate[2] + endRate[2];
1174  } else {
1175  offset2 += ind2*coarseRate[2];
1176  }
1177  offset2 = offset2*gFineNodesPerDir[1]*gFineNodesPerDir[0];
1178 
1179  for(LO ind1 = 0; ind1 < lCoarseNodesPerDir[1]; ++ind1) {
1180  if(myOffset[1] == 0) {
1181  offset1 = startingIndices[1] + myOffset[1];
1182  } else {
1183  if(startingIndices[1] + endRate[1] == gFineNodesPerDir[1] - 1) {
1184  offset1 = startingIndices[1] + endRate[1];
1185  } else {
1186  offset1 = startingIndices[1] + coarseRate[1];
1187  }
1188  }
1189  if(offset1 + ind1*coarseRate[1] > gFineNodesPerDir[1] - 1) {
1190  offset1 += (ind1 - 1)*coarseRate[1] + endRate[1];
1191  } else {
1192  offset1 += ind1*coarseRate[1];
1193  }
1194  offset1 = offset1*gFineNodesPerDir[0];
1195  for(LO ind0 = 0; ind0 < lCoarseNodesPerDir[0]; ++ind0) {
1196  offset0 = startingIndices[0];
1197  if(myOffset[0] == 0) {
1198  offset0 += ind0*coarseRate[0];
1199  } else {
1200  offset0 += (ind0 + 1)*coarseRate[0];
1201  }
1202  if(offset0 > gFineNodesPerDir[0] - 1) {offset0 += endRate[0] - coarseRate[0];}
1203 
1204  currentNode = ind2*lCoarseNodesPerDir[1]*lCoarseNodesPerDir[0]
1205  + ind1*lCoarseNodesPerDir[0]
1206  + ind0;
1207  gCoarseNodesGIDs[currentNode] = offset2 + offset1 + offset0;
1208  }
1209  }
1210  }
1211 
1212  // Actual loop over all the coarse/ghost nodes to find their index on the coarse mesh
1213  // and the corresponding dofs that will need to be added to colMapP.
1214  colGIDs.resize(BlkSize*(lNumCoarseNodes+lNumGhostNodes));
1215  coarseNodesGIDs.resize(lNumCoarseNodes);
1216  for(LO i = 0; i < numDimensions; ++i) {coarseNodes[i].resize(lNumCoarseNodes);}
1217  GO fineNodesPerCoarseSlab = coarseRate[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
1218  GO fineNodesEndCoarseSlab = endRate[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
1219  GO fineNodesPerCoarsePlane = coarseRate[1]*gFineNodesPerDir[0];
1220  GO fineNodesEndCoarsePlane = endRate[1]*gFineNodesPerDir[0];
1221  GO coarseNodesPerCoarseLayer = gCoarseNodesPerDir[1]*gCoarseNodesPerDir[0];
1222  GO gCoarseNodeOnCoarseGridGID;
1223  LO gInd[3], lCol;
1224  Array<int> ghostPIDs (lNumGhostNodes);
1225  Array<LO> ghostLIDs (lNumGhostNodes);
1226  Array<LO> ghostPermut(lNumGhostNodes);
1227  for(LO k = 0; k < lNumGhostNodes; ++k) {ghostPermut[k] = k;}
1228  coordinatesMap->getRemoteIndexList(ghostGIDs, ghostPIDs, ghostLIDs);
1229  sh_sort_permute(ghostPIDs.begin(),ghostPIDs.end(), ghostPermut.begin(),ghostPermut.end());
1230 
1231  { // scope for tmpInds, tmpVars and tmp.
1232  GO tmpInds[3], tmpVars[2];
1233  LO tmp;
1234  // Loop over the coarse nodes of the partition and add them to colGIDs
1235  // that will be used to construct the column and domain maps of P as well
1236  // as to construct the coarse coordinates map.
1237  // for(LO col = 0; col < lNumCoarseNodes; ++col) { // This should most likely be replaced
1238  // by loops of lCoarseNodesPerDir[] to simplify arithmetics
1239  LO col = 0;
1240  LO firstCoarseNodeInds[3], currentCoarseNode;
1241  for(LO dim = 0; dim < 3; ++dim) {
1242  if(myOffset[dim] == 0) {
1243  firstCoarseNodeInds[dim] = 0;
1244  } else {
1245  firstCoarseNodeInds[dim] = coarseRate[dim] - myOffset[dim];
1246  }
1247  }
1249  for(LO dim = 0; dim < numDimensions; ++dim) {fineNodes[dim] = coordinates->getData(dim);}
1250  for(LO k = 0; k < lCoarseNodesPerDir[2]; ++k) {
1251  for(LO j = 0; j < lCoarseNodesPerDir[1]; ++j) {
1252  for(LO i = 0; i < lCoarseNodesPerDir[0]; ++i) {
1253  col = k*lCoarseNodesPerDir[1]*lCoarseNodesPerDir[0] + j*lCoarseNodesPerDir[0] + i;
1254 
1255  // Check for endRate
1256  currentCoarseNode = 0;
1257  if(firstCoarseNodeInds[0] + i*coarseRate[0] > lFineNodesPerDir[0] - 1) {
1258  currentCoarseNode += firstCoarseNodeInds[0] + (i-1)*coarseRate[0] + endRate[0];
1259  } else {
1260  currentCoarseNode += firstCoarseNodeInds[0] + i*coarseRate[0];
1261  }
1262  if(firstCoarseNodeInds[1] + j*coarseRate[1] > lFineNodesPerDir[1] - 1) {
1263  currentCoarseNode += (firstCoarseNodeInds[1] + (j-1)*coarseRate[1] + endRate[1])
1264  *lFineNodesPerDir[0];
1265  } else {
1266  currentCoarseNode += (firstCoarseNodeInds[1] + j*coarseRate[1])*lFineNodesPerDir[0];
1267  }
1268  if(firstCoarseNodeInds[2] + k*coarseRate[2] > lFineNodesPerDir[2] - 1) {
1269  currentCoarseNode += (firstCoarseNodeInds[2] + (k-1)*coarseRate[2] + endRate[2])
1270  *lFineNodesPerDir[1]*lFineNodesPerDir[0];
1271  } else {
1272  currentCoarseNode += (firstCoarseNodeInds[2] + k*coarseRate[2])
1273  *lFineNodesPerDir[1]*lFineNodesPerDir[0];
1274  }
1275  // Load coordinates
1276  for(LO dim = 0; dim < numDimensions; ++dim) {
1277  coarseNodes[dim][col] = fineNodes[dim][currentCoarseNode];
1278  }
1279 
1280  if((endRate[2] != coarseRate[2])
1281  && (gCoarseNodesGIDs[col] > (gCoarseNodesPerDir[2] - 2)*fineNodesPerCoarseSlab
1282  + fineNodesEndCoarseSlab - 1)) {
1283  tmpInds[2] = gCoarseNodesGIDs[col] / fineNodesPerCoarseSlab + 1;
1284  tmpVars[0] = gCoarseNodesGIDs[col] - (tmpInds[2] - 1)*fineNodesPerCoarseSlab
1285  - fineNodesEndCoarseSlab;
1286  } else {
1287  tmpInds[2] = gCoarseNodesGIDs[col] / fineNodesPerCoarseSlab;
1288  tmpVars[0] = gCoarseNodesGIDs[col] % fineNodesPerCoarseSlab;
1289  }
1290  if((endRate[1] != coarseRate[1])
1291  && (tmpVars[0] > (gCoarseNodesPerDir[1] - 2)*fineNodesPerCoarsePlane
1292  + fineNodesEndCoarsePlane - 1)) {
1293  tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane + 1;
1294  tmpVars[1] = tmpVars[0] - (tmpInds[1] - 1)*fineNodesPerCoarsePlane
1295  - fineNodesEndCoarsePlane;
1296  } else {
1297  tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane;
1298  tmpVars[1] = tmpVars[0] % fineNodesPerCoarsePlane;
1299  }
1300  if(tmpVars[1] == gFineNodesPerDir[0] - 1) {
1301  tmpInds[0] = gCoarseNodesPerDir[0] - 1;
1302  } else {
1303  tmpInds[0] = tmpVars[1] / coarseRate[0];
1304  }
1305  gInd[2] = col / (lCoarseNodesPerDir[1]*lCoarseNodesPerDir[0]);
1306  tmp = col % (lCoarseNodesPerDir[1]*lCoarseNodesPerDir[0]);
1307  gInd[1] = tmp / lCoarseNodesPerDir[0];
1308  gInd[0] = tmp % lCoarseNodesPerDir[0];
1309  lCol = gInd[2]*(lCoarseNodesPerDir[1]*lCoarseNodesPerDir[0])
1310  + gInd[1]*lCoarseNodesPerDir[0] + gInd[0];
1311  gCoarseNodeOnCoarseGridGID = tmpInds[2]*coarseNodesPerCoarseLayer
1312  + tmpInds[1]*gCoarseNodesPerDir[0] + tmpInds[0];
1313  coarseNodesGIDs[lCol] = gCoarseNodeOnCoarseGridGID;
1314  for(LO dof = 0; dof < BlkSize; ++dof) {
1315  colGIDs[BlkSize*lCol + dof] = BlkSize*gCoarseNodeOnCoarseGridGID + dof;
1316  }
1317  }
1318  }
1319  }
1320  // Now loop over the ghost nodes of the partition to add them to colGIDs
1321  // since they will need to be included in the column map of P
1322  for(col = lNumCoarseNodes; col < lNumCoarseNodes + lNumGhostNodes; ++col) {
1323  if((endRate[2] != coarseRate[2])
1324  && (ghostGIDs[ghostPermut[col - lNumCoarseNodes]] > (gCoarseNodesPerDir[2] - 2)
1325  *fineNodesPerCoarseSlab + fineNodesEndCoarseSlab - 1)) {
1326  tmpInds[2] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]] / fineNodesPerCoarseSlab + 1;
1327  tmpVars[0] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]]
1328  - (tmpInds[2] - 1)*fineNodesPerCoarseSlab - fineNodesEndCoarseSlab;
1329  } else {
1330  tmpInds[2] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]] / fineNodesPerCoarseSlab;
1331  tmpVars[0] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]] % fineNodesPerCoarseSlab;
1332  }
1333  if((endRate[1] != coarseRate[1])
1334  && (tmpVars[0] > (gCoarseNodesPerDir[1] - 2)*fineNodesPerCoarsePlane
1335  + fineNodesEndCoarsePlane - 1)) {
1336  tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane + 1;
1337  tmpVars[1] = tmpVars[0] - (tmpInds[1] - 1)*fineNodesPerCoarsePlane
1338  - fineNodesEndCoarsePlane;
1339  } else {
1340  tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane;
1341  tmpVars[1] = tmpVars[0] % fineNodesPerCoarsePlane;
1342  }
1343  if(tmpVars[1] == gFineNodesPerDir[0] - 1) {
1344  tmpInds[0] = gCoarseNodesPerDir[0] - 1;
1345  } else {
1346  tmpInds[0] = tmpVars[1] / coarseRate[0];
1347  }
1348  gCoarseNodeOnCoarseGridGID = tmpInds[2]*coarseNodesPerCoarseLayer
1349  + tmpInds[1]*gCoarseNodesPerDir[0] + tmpInds[0];
1350  for(LO dof = 0; dof < BlkSize; ++dof) {
1351  colGIDs[BlkSize*col + dof] = BlkSize*gCoarseNodeOnCoarseGridGID + dof;
1352  }
1353  }
1354  } // End of scope for tmpInds, tmpVars and tmp
1355 
1356  } // GetGeometricData()
1357 
1358  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1360  ComputeLocalEntries(const RCP<const Matrix>& Aghost, const Array<LO> coarseRate,
1361  const Array<LO> /* endRate */, const LO BlkSize, const Array<LO> elemInds,
1362  const Array<LO> /* lCoarseElementsPerDir */, const LO numDimensions,
1363  const Array<LO> lFineNodesPerDir, const Array<GO> /* gFineNodesPerDir */,
1364  const Array<GO> /* gIndices */, const Array<LO> /* lCoarseNodesPerDir */,
1365  const Array<bool> ghostInterface, const Array<int> elementFlags,
1366  const std::string stencilType, const std::string /* blockStrategy */,
1367  const Array<LO> elementNodesPerDir, const LO numNodesInElement,
1368  const Array<GO> /* colGIDs */,
1371  Array<LO>& lDofInd) const {
1372 
1373  // First extract data from Aghost and move it to the corresponding dense matrix
1374  // This step requires to compute the number of nodes (resp dofs) in the current
1375  // coarse element, then allocate storage for local dense matrices, finally loop
1376  // over the dofs and extract the corresponding row in Aghost before dispactching
1377  // its data to the dense matrices.
1378  // At the same time we want to compute a mapping from the element indexing to
1379  // group indexing. The groups are the following: corner, edge, face and interior
1380  // nodes. We uses these groups to operate on the dense matrices but need to
1381  // store the nodes in their original order after groupd operations are completed.
1382  LO countInterior=0, countFace=0, countEdge=0, countCorner =0;
1383  Array<LO> collapseDir(numNodesInElement*BlkSize);
1384  for(LO ke = 0; ke < elementNodesPerDir[2]; ++ke) {
1385  for(LO je = 0; je < elementNodesPerDir[1]; ++je) {
1386  for(LO ie = 0; ie < elementNodesPerDir[0]; ++ie) {
1387  // Check for corner node
1388  if((ke == 0 || ke == elementNodesPerDir[2]-1)
1389  && (je == 0 || je == elementNodesPerDir[1]-1)
1390  && (ie == 0 || ie == elementNodesPerDir[0]-1)) {
1391  for(LO dof = 0; dof < BlkSize; ++dof) {
1392  dofType[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1393  + je*elementNodesPerDir[0] + ie) + dof] = 0;
1394  lDofInd[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1395  + je*elementNodesPerDir[0] + ie) + dof] = BlkSize*countCorner + dof;
1396  }
1397  ++countCorner;
1398 
1399  // Check for edge node
1400  } else if (((ke == 0 || ke == elementNodesPerDir[2]-1)
1401  && (je == 0 || je == elementNodesPerDir[1]-1))
1402  || ((ke == 0 || ke == elementNodesPerDir[2]-1)
1403  && (ie == 0 || ie == elementNodesPerDir[0]-1))
1404  || ((je == 0 || je == elementNodesPerDir[1]-1)
1405  && (ie == 0 || ie == elementNodesPerDir[0]-1))) {
1406  for(LO dof = 0; dof < BlkSize; ++dof) {
1407  dofType[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1408  + je*elementNodesPerDir[0] + ie) + dof] = 1;
1409  lDofInd[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1410  + je*elementNodesPerDir[0] + ie) + dof] = BlkSize*countEdge + dof;
1411  if((ke == 0 || ke == elementNodesPerDir[2]-1)
1412  && (je == 0 || je == elementNodesPerDir[1]-1)) {
1413  collapseDir[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1414  + je*elementNodesPerDir[0] + ie) + dof] = 0;
1415  } else if((ke == 0 || ke == elementNodesPerDir[2]-1)
1416  && (ie == 0 || ie == elementNodesPerDir[0]-1)) {
1417  collapseDir[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1418  + je*elementNodesPerDir[0] + ie) + dof] = 1;
1419  } else if((je == 0 || je == elementNodesPerDir[1]-1
1420  ) && (ie == 0 || ie == elementNodesPerDir[0]-1)) {
1421  collapseDir[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1422  + je*elementNodesPerDir[0] + ie) + dof] = 2;
1423  }
1424  }
1425  ++countEdge;
1426 
1427  // Check for face node
1428  } else if ((ke == 0 || ke == elementNodesPerDir[2]-1)
1429  || (je == 0 || je == elementNodesPerDir[1]-1)
1430  || (ie == 0 || ie == elementNodesPerDir[0]-1)) {
1431  for(LO dof = 0; dof < BlkSize; ++dof) {
1432  dofType[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1433  + je*elementNodesPerDir[0] + ie) + dof] = 2;
1434  lDofInd[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1435  + je*elementNodesPerDir[0] + ie) + dof] = BlkSize*countFace + dof;
1436  if(ke == 0 || ke == elementNodesPerDir[2]-1) {
1437  collapseDir[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1438  + je*elementNodesPerDir[0] + ie) + dof] = 2;
1439  } else if(je == 0 || je == elementNodesPerDir[1]-1) {
1440  collapseDir[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1441  + je*elementNodesPerDir[0] + ie) + dof] = 1;
1442  } else if(ie == 0 || ie == elementNodesPerDir[0]-1) {
1443  collapseDir[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1444  + je*elementNodesPerDir[0] + ie) + dof] = 0;
1445  }
1446  }
1447  ++countFace;
1448 
1449  // Otherwise it is an interior node
1450  } else {
1451  for(LO dof = 0; dof < BlkSize; ++dof) {
1452  dofType[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1453  + je*elementNodesPerDir[0] + ie) + dof] = 3;
1454  lDofInd[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1455  + je*elementNodesPerDir[0] + ie) + dof] = BlkSize*countInterior +dof;
1456  }
1457  ++countInterior;
1458  }
1459  }
1460  }
1461  }
1462 
1463  LO numInteriorNodes = 0, numFaceNodes = 0, numEdgeNodes = 0, numCornerNodes = 8;
1464  numInteriorNodes = (elementNodesPerDir[0] - 2)*(elementNodesPerDir[1] - 2)
1465  *(elementNodesPerDir[2] - 2);
1466  numFaceNodes = 2*(elementNodesPerDir[0] - 2)*(elementNodesPerDir[1] - 2)
1467  + 2*(elementNodesPerDir[0] - 2)*(elementNodesPerDir[2] - 2)
1468  + 2*(elementNodesPerDir[1] - 2)*(elementNodesPerDir[2] - 2);
1469  numEdgeNodes = 4*(elementNodesPerDir[0] - 2) + 4*(elementNodesPerDir[1] - 2)
1470  + 4*(elementNodesPerDir[2] - 2);
1471  // Diagonal blocks
1472  Teuchos::SerialDenseMatrix<LO,SC> Aii(BlkSize*numInteriorNodes, BlkSize*numInteriorNodes);
1473  Teuchos::SerialDenseMatrix<LO,SC> Aff(BlkSize*numFaceNodes, BlkSize*numFaceNodes);
1474  Teuchos::SerialDenseMatrix<LO,SC> Aee(BlkSize*numEdgeNodes, BlkSize*numEdgeNodes);
1475  // Upper triangular blocks
1476  Teuchos::SerialDenseMatrix<LO,SC> Aif(BlkSize*numInteriorNodes, BlkSize*numFaceNodes);
1477  Teuchos::SerialDenseMatrix<LO,SC> Aie(BlkSize*numInteriorNodes, BlkSize*numEdgeNodes);
1478  Teuchos::SerialDenseMatrix<LO,SC> Afe(BlkSize*numFaceNodes, BlkSize*numEdgeNodes);
1479  // Coarse nodes "right hand sides" and local prolongators
1480  Teuchos::SerialDenseMatrix<LO,SC> Aic(BlkSize*numInteriorNodes, BlkSize*numCornerNodes);
1481  Teuchos::SerialDenseMatrix<LO,SC> Afc(BlkSize*numFaceNodes, BlkSize*numCornerNodes);
1482  Teuchos::SerialDenseMatrix<LO,SC> Aec(BlkSize*numEdgeNodes, BlkSize*numCornerNodes);
1483  Pi.shape( BlkSize*numInteriorNodes, BlkSize*numCornerNodes);
1484  Pf.shape( BlkSize*numFaceNodes, BlkSize*numCornerNodes);
1485  Pe.shape( BlkSize*numEdgeNodes, BlkSize*numCornerNodes);
1486 
1487  ArrayView<const LO> rowIndices;
1488  ArrayView<const SC> rowValues;
1489  LO idof, iInd, jInd;
1490  int iType = 0, jType = 0;
1491  int orientation = -1;
1492  int collapseFlags[3] = {};
1493  Array<SC> stencil((std::pow(3,numDimensions))*BlkSize);
1494  // LBV Note: we could skip the extraction of rows corresponding to coarse nodes
1495  // we might want to fuse the first three loops and do integer arithmetic
1496  // to have more optimization during compilation...
1497  for(LO ke = 0; ke < elementNodesPerDir[2]; ++ke) {
1498  for(LO je = 0; je < elementNodesPerDir[1]; ++je) {
1499  for(LO ie = 0; ie < elementNodesPerDir[0]; ++ie) {
1500  collapseFlags[0] = 0; collapseFlags[1] = 0; collapseFlags[2] = 0;
1501  if((elementFlags[0] == 1 || elementFlags[0] == 3) && ie == 0) {
1502  collapseFlags[0] += 1;
1503  }
1504  if((elementFlags[0] == 2 || elementFlags[0] == 3) && ie == elementNodesPerDir[0] - 1) {
1505  collapseFlags[0] += 2;
1506  }
1507  if((elementFlags[1] == 1 || elementFlags[1] == 3) && je == 0) {
1508  collapseFlags[1] += 1;
1509  }
1510  if((elementFlags[1] == 2 || elementFlags[1] == 3) && je == elementNodesPerDir[1] - 1) {
1511  collapseFlags[1] += 2;
1512  }
1513  if((elementFlags[2] == 1 || elementFlags[2] == 3) && ke == 0) {
1514  collapseFlags[2] += 1;
1515  }
1516  if((elementFlags[2] == 2 || elementFlags[2] == 3) && ke == elementNodesPerDir[2] - 1) {
1517  collapseFlags[2] += 2;
1518  }
1519 
1520  // Based on (ie, je, ke) we detect the type of node we are dealing with
1521  GetNodeInfo(ie, je, ke, elementNodesPerDir, &iType, iInd, &orientation);
1522  for(LO dof0 = 0; dof0 < BlkSize; ++dof0) {
1523  // Compute the dof index for each dof at point (ie, je, ke)
1524  idof = ((elemInds[2]*coarseRate[2] + ke)*lFineNodesPerDir[1]*lFineNodesPerDir[0]
1525  + (elemInds[1]*coarseRate[1] + je)*lFineNodesPerDir[0]
1526  + elemInds[0]*coarseRate[0] + ie)*BlkSize + dof0;
1527  Aghost->getLocalRowView(idof, rowIndices, rowValues);
1528  FormatStencil(BlkSize, ghostInterface, ie, je, ke, rowValues,
1529  elementNodesPerDir, collapseFlags, stencilType, stencil);
1530  LO io, jo, ko;
1531  if(iType == 3) {// interior node, no stencil collapse needed
1532  for(LO interactingNode = 0; interactingNode < 27; ++interactingNode) {
1533  // Find (if, jf, kf) the indices associated with the interacting node
1534  ko = ke + (interactingNode / 9 - 1);
1535  {
1536  LO tmp = interactingNode % 9;
1537  jo = je + (tmp / 3 - 1);
1538  io = ie + (tmp % 3 - 1);
1539  int dummy;
1540  GetNodeInfo(io, jo, ko, elementNodesPerDir, &jType, jInd, &dummy);
1541  }
1542  if((io > -1 && io < elementNodesPerDir[0]) &&
1543  (jo > -1 && jo < elementNodesPerDir[1]) &&
1544  (ko > -1 && ko < elementNodesPerDir[2])) {
1545  for(LO dof1 = 0; dof1 < BlkSize; ++dof1) {
1546  if (jType == 3) {
1547  Aii(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1548  stencil[interactingNode*BlkSize + dof1];
1549  } else if(jType == 2) {
1550  Aif(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1551  stencil[interactingNode*BlkSize + dof1];
1552  } else if(jType == 1) {
1553  Aie(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1554  stencil[interactingNode*BlkSize + dof1];
1555  } else if(jType == 0) {
1556  Aic(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1557  -stencil[interactingNode*BlkSize + dof1];
1558  }
1559  }
1560  }
1561  }
1562  } else if(iType == 2) {// Face node, collapse stencil along face normal (*orientation)
1563  CollapseStencil(2, orientation, collapseFlags, stencil);
1564  for(LO interactingNode = 0; interactingNode < 27; ++interactingNode) {
1565  // Find (if, jf, kf) the indices associated with the interacting node
1566  ko = ke + (interactingNode / 9 - 1);
1567  {
1568  LO tmp = interactingNode % 9;
1569  jo = je + (tmp / 3 - 1);
1570  io = ie + (tmp % 3 - 1);
1571  int dummy;
1572  GetNodeInfo(io, jo, ko, elementNodesPerDir, &jType, jInd, &dummy);
1573  }
1574  if((io > -1 && io < elementNodesPerDir[0]) &&
1575  (jo > -1 && jo < elementNodesPerDir[1]) &&
1576  (ko > -1 && ko < elementNodesPerDir[2])) {
1577  for(LO dof1 = 0; dof1 < BlkSize; ++dof1) {
1578  if(jType == 2) {
1579  Aff(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1580  stencil[interactingNode*BlkSize + dof1];
1581  } else if(jType == 1) {
1582  Afe(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1583  stencil[interactingNode*BlkSize + dof1];
1584  } else if(jType == 0) {
1585  Afc(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1586  -stencil[interactingNode*BlkSize + dof1];
1587  }
1588  }
1589  }
1590  }
1591  } else if(iType == 1) {// Edge node, collapse stencil perpendicular to edge direction
1592  CollapseStencil(1, orientation, collapseFlags, stencil);
1593  for(LO interactingNode = 0; interactingNode < 27; ++interactingNode) {
1594  // Find (if, jf, kf) the indices associated with the interacting node
1595  ko = ke + (interactingNode / 9 - 1);
1596  {
1597  LO tmp = interactingNode % 9;
1598  jo = je + (tmp / 3 - 1);
1599  io = ie + (tmp % 3 - 1);
1600  int dummy;
1601  GetNodeInfo(io, jo, ko, elementNodesPerDir, &jType, jInd, &dummy);
1602  }
1603  if((io > -1 && io < elementNodesPerDir[0]) &&
1604  (jo > -1 && jo < elementNodesPerDir[1]) &&
1605  (ko > -1 && ko < elementNodesPerDir[2])) {
1606  for(LO dof1 = 0; dof1 < BlkSize; ++dof1) {
1607  if(jType == 1) {
1608  Aee(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1609  stencil[interactingNode*BlkSize + dof1];
1610  } else if(jType == 0) {
1611  Aec(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1612  -stencil[interactingNode*BlkSize + dof1];
1613  }
1614  } // Check if interacting node is in element or not
1615  } // Pc is the identity so no need to treat iType == 0
1616  } // Loop over interacting nodes within a row
1617  } // Check for current node type
1618  } // Loop over degrees of freedom associated to a given node
1619  } // Loop over i
1620  } // Loop over j
1621  } // Loop over k
1622 
1623  // Compute the projection operators for edge and interior nodes
1624  //
1625  // [P_i] = [A_{ii} & A_{if} & A_{ie}]^{-1} [A_{ic}]
1626  // [P_f] = [ 0 & A_{ff} & A_{fe}] [A_{fc}]
1627  // [P_e] = [ 0 & 0 & A_{ee}] [A_{ec}]
1628  // [P_c] = I_c
1629  //
1630  { // Compute P_e
1631  // We need to solve for P_e in: A_{ee}*P_e = A_{fc}
1632  // So we simple do P_e = A_{ee}^{-1)*A_{ec}
1634  problem.setMatrix(Teuchos::rcp(&Aee, false));
1635  problem.setVectors(Teuchos::rcp(&Pe, false), Teuchos::rcp(&Aec, false));
1636  problem.factorWithEquilibration(true);
1637  problem.solve();
1638  problem.unequilibrateLHS();
1639  }
1640 
1641  { // Compute P_f
1642  // We need to solve for P_f in: A_{ff}*P_f + A_{fe}*P_e = A_{fc}
1643  // Step one: A_{fc} = 1.0*A_{fc} + (-1.0)*A_{fe}*P_e
1644  Afc.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -1.0, Afe, Pe, 1.0);
1645  // Step two: P_f = A_{ff}^{-1}*A_{fc}
1647  problem.setMatrix(Teuchos::rcp(&Aff, false));
1648  problem.setVectors(Teuchos::rcp(&Pf, false), Teuchos::rcp(&Afc, false));
1649  problem.factorWithEquilibration(true);
1650  problem.solve();
1651  problem.unequilibrateLHS();
1652  }
1653 
1654  { // Compute P_i
1655  // We need to solve for P_i in: A_{ii}*P_i + A_{if}*P_f + A_{ie}*P_e = A_{ic}
1656  // Step one: A_{ic} = 1.0*A_{ic} + (-1.0)*A_{ie}*P_e
1657  Aic.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -1.0, Aie, Pe, 1.0);
1658  // Step one: A_{ic} = 1.0*A_{ic} + (-1.0)*A_{if}*P_f
1659  Aic.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -1.0, Aif, Pf, 1.0);
1660  // Step two: P_i = A_{ii}^{-1}*A_{ic}
1662  problem.setMatrix(Teuchos::rcp(&Aii, false));
1663  problem.setVectors(Teuchos::rcp(&Pi, false), Teuchos::rcp(&Aic, false));
1664  problem.factorWithEquilibration(true);
1665  problem.solve();
1666  problem.unequilibrateLHS();
1667  }
1668 
1669  } // ComputeLocalEntries()
1670 
1671  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1673  CollapseStencil(const int type, const int orientation, const int /* collapseFlags */[3],
1674  Array<SC>& stencil) const {
1675 
1676  if(type == 2) {// Face stencil collapse
1677  // Let's hope things vectorize well inside this if statement
1678  if(orientation == 0) {
1679  for(LO i = 0; i < 9; ++i) {
1680  stencil[3*i + 1] = stencil[3*i] + stencil[3*i + 1] + stencil[3*i + 2];
1681  stencil[3*i] = 0;
1682  stencil[3*i + 2] = 0;
1683  }
1684  } else if(orientation == 1) {
1685  for(LO i = 0; i < 3; ++i) {
1686  stencil[9*i + 3] = stencil[9*i + 0] + stencil[9*i + 3] + stencil[9*i + 6];
1687  stencil[9*i + 0] = 0;
1688  stencil[9*i + 6] = 0;
1689  stencil[9*i + 4] = stencil[9*i + 1] + stencil[9*i + 4] + stencil[9*i + 7];
1690  stencil[9*i + 1] = 0;
1691  stencil[9*i + 7] = 0;
1692  stencil[9*i + 5] = stencil[9*i + 2] + stencil[9*i + 5] + stencil[9*i + 8];
1693  stencil[9*i + 2] = 0;
1694  stencil[9*i + 8] = 0;
1695  }
1696  } else if(orientation == 2) {
1697  for(LO i = 0; i < 9; ++i) {
1698  stencil[i + 9] = stencil[i] + stencil[i + 9] + stencil[i + 18];
1699  stencil[i] = 0;
1700  stencil[i + 18] = 0;
1701  }
1702  }
1703  } else if(type == 1) {// Edge stencil collapse
1704  SC tmp1 = 0, tmp2 = 0, tmp3 = 0;
1705  if(orientation == 0) {
1706  for(LO i = 0; i < 9; ++i) {
1707  tmp1 += stencil[0 + 3*i];
1708  stencil[0 + 3*i] = 0;
1709  tmp2 += stencil[1 + 3*i];
1710  stencil[1 + 3*i] = 0;
1711  tmp3 += stencil[2 + 3*i];
1712  stencil[2 + 3*i] = 0;
1713  }
1714  stencil[12] = tmp1;
1715  stencil[13] = tmp2;
1716  stencil[14] = tmp3;
1717  } else if(orientation == 1) {
1718  for(LO i = 0; i < 3; ++i) {
1719  tmp1 += stencil[0 + i] + stencil[9 + i] + stencil[18 + i];
1720  stencil[ 0 + i] = 0;
1721  stencil[ 9 + i] = 0;
1722  stencil[18 + i] = 0;
1723  tmp2 += stencil[3 + i] + stencil[12 + i] + stencil[21 + i];
1724  stencil[ 3 + i] = 0;
1725  stencil[12 + i] = 0;
1726  stencil[21 + i] = 0;
1727  tmp3 += stencil[6 + i] + stencil[15 + i] + stencil[24 + i];
1728  stencil[ 6 + i] = 0;
1729  stencil[15 + i] = 0;
1730  stencil[24 + i] = 0;
1731  }
1732  stencil[10] = tmp1;
1733  stencil[13] = tmp2;
1734  stencil[16] = tmp3;
1735  } else if(orientation == 2) {
1736  for(LO i = 0; i < 9; ++i) {
1737  tmp1 += stencil[i];
1738  stencil[i] = 0;
1739  tmp2 += stencil[i + 9];
1740  stencil[i + 9] = 0;
1741  tmp3 += stencil[i + 18];
1742  stencil[i + 18] = 0;
1743  }
1744  stencil[ 4] = tmp1;
1745  stencil[13] = tmp2;
1746  stencil[22] = tmp3;
1747  }
1748  }
1749  } // CollapseStencil
1750 
1751  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1753  FormatStencil(const LO BlkSize, const Array<bool> /* ghostInterface */, const LO /* ie */, const LO /* je */,
1754  const LO /* ke */, const ArrayView<const SC> rowValues,const Array<LO> /* elementNodesPerDir */,
1755  const int collapseFlags[3], const std::string stencilType, Array<SC>& stencil)
1756  const {
1757 
1758  if(stencilType == "reduced") {
1759  Array<int> fullStencilInds(7);
1760  fullStencilInds[0] = 4; fullStencilInds[1] = 10; fullStencilInds[2] = 12;
1761  fullStencilInds[3] = 13; fullStencilInds[4] = 14; fullStencilInds[5] = 16;
1762  fullStencilInds[6] = 22;
1763 
1764  // Create a mask array to automate mesh boundary processing
1765  Array<int> mask(7);
1766  int stencilSize = rowValues.size();
1767  if(collapseFlags[0] + collapseFlags[1] + collapseFlags[2] > 0) {
1768  if(stencilSize == 1) {
1769  // The assumption is made that if only one non-zero exists in the row
1770  // then this represent a Dirichlet BC being enforced.
1771  // One might want to zero out the corresponding row in the prolongator.
1772  mask[0] = 1; mask[1] = 1; mask[2] = 1;
1773  mask[4] = 1; mask[5] = 1; mask[6] = 1;
1774  } else {
1775  // Here we are looking at Neumann like BC where only a flux is prescribed
1776  // and less non-zeros will be present in the corresponding row.
1777  if(collapseFlags[2] == 1 || collapseFlags[2] == 3) {
1778  mask[0] = 1;
1779  }
1780  if(collapseFlags[2] == 2 || collapseFlags[2] == 3) {
1781  mask[6] = 1;
1782  }
1783  if(collapseFlags[1] == 1 || collapseFlags[1] == 3) {
1784  mask[1] = 1;
1785  }
1786  if(collapseFlags[1] == 2 || collapseFlags[1] == 3) {
1787  mask[5] = 1;
1788  }
1789  if(collapseFlags[0] == 1 || collapseFlags[0] == 3) {
1790  mask[2] = 1;
1791  }
1792  if(collapseFlags[0] == 2 || collapseFlags[0] == 3) {
1793  mask[4] = 1;
1794  }
1795  }
1796  }
1797 
1798  int offset = 0;
1799  for(int ind = 0; ind < 7; ++ind) {
1800  if(mask[ind] == 1) {
1801  for(int dof = 0; dof < BlkSize; ++dof) {
1802  stencil[BlkSize*fullStencilInds[ind] + dof] = 0.0;
1803  }
1804  ++offset; // The offset is used to stay within rowValues bounds
1805  } else {
1806  for(int dof = 0; dof < BlkSize; ++dof) {
1807  stencil[BlkSize*fullStencilInds[ind] + dof] = rowValues[BlkSize*(ind - offset) + dof];
1808  }
1809  }
1810  }
1811  } else if(stencilType == "full") {
1812  // Create a mask array to automate mesh boundary processing
1813  Array<int> mask(27);
1814  if(collapseFlags[2] == 1 || collapseFlags[2] == 3) {
1815  for(int ind = 0; ind < 9; ++ind) {
1816  mask[ind] = 1;
1817  }
1818  }
1819  if(collapseFlags[2] == 2 || collapseFlags[2] == 3) {
1820  for(int ind = 0; ind < 9; ++ind) {
1821  mask[18 + ind] = 1;
1822  }
1823  }
1824  if(collapseFlags[1] == 1 || collapseFlags[1] == 3) {
1825  for(int ind1 = 0; ind1 < 3; ++ind1) {
1826  for(int ind2 = 0; ind2 < 3; ++ind2) {
1827  mask[ind1*9 + ind2] = 1;
1828  }
1829  }
1830  }
1831  if(collapseFlags[1] == 2 || collapseFlags[1] == 3) {
1832  for(int ind1 = 0; ind1 < 3; ++ind1) {
1833  for(int ind2 = 0; ind2 < 3; ++ind2) {
1834  mask[ind1*9 + ind2 + 6] = 1;
1835  }
1836  }
1837  }
1838  if(collapseFlags[0] == 1 || collapseFlags[0] == 3) {
1839  for(int ind = 0; ind < 9; ++ind) {
1840  mask[3*ind] = 1;
1841  }
1842  }
1843  if(collapseFlags[0] == 2 || collapseFlags[0] == 3) {
1844  for(int ind = 0; ind < 9; ++ind) {
1845  mask[3*ind + 2] = 1;
1846  }
1847  }
1848 
1849  // Now the stencil is extracted and formated
1850  int offset = 0;
1851  for(LO ind = 0; ind < 27; ++ind) {
1852  if(mask[ind] == 0) {
1853  for(int dof = 0; dof < BlkSize; ++dof) {
1854  stencil[BlkSize*ind + dof] = 0.0;
1855  }
1856  ++offset; // The offset is used to stay within rowValues bounds
1857  } else {
1858  for(int dof = 0; dof < BlkSize; ++dof) {
1859  stencil[BlkSize*ind + dof] = rowValues[BlkSize*(ind - offset) + dof];
1860  }
1861  }
1862  }
1863  } // stencilTpye
1864  } // FormatStencil()
1865 
1866  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1868  const LO ie, const LO je, const LO ke,
1869  const Array<LO> elementNodesPerDir,
1870  int* type, LO& ind, int* orientation) const {
1871 
1872  // Initialize flags with -1 to be able to catch issues easily
1873  *type = -1, ind = 0, *orientation = -1;
1874  if((ke == 0 || ke == elementNodesPerDir[2]-1)
1875  && (je == 0 || je == elementNodesPerDir[1]-1)
1876  && (ie == 0 || ie == elementNodesPerDir[0]-1)) {
1877  // Corner node
1878  *type = 0;
1879  ind = 4*ke / (elementNodesPerDir[2]-1) + 2*je / (elementNodesPerDir[1]-1)
1880  + ie / (elementNodesPerDir[0]-1);
1881  } else if(((ke == 0 || ke == elementNodesPerDir[2]-1)
1882  && (je == 0 || je == elementNodesPerDir[1]-1))
1883  || ((ke == 0 || ke == elementNodesPerDir[2]-1)
1884  && (ie == 0 || ie == elementNodesPerDir[0]-1))
1885  || ((je == 0 || je == elementNodesPerDir[1]-1)
1886  && (ie == 0 || ie == elementNodesPerDir[0]-1))) {
1887  // Edge node
1888  *type = 1;
1889  if(ke > 0) {ind += 2*(elementNodesPerDir[0] - 2 + elementNodesPerDir[1] - 2);}
1890  if(ke == elementNodesPerDir[2] - 1) {ind += 4*(elementNodesPerDir[2] - 2);}
1891  if((ke == 0) || (ke == elementNodesPerDir[2] - 1)) { // je or ie edges
1892  if(je == 0) {// jlo ie edge
1893  *orientation = 0;
1894  ind += ie - 1;
1895  } else if(je == elementNodesPerDir[1] - 1) {// jhi ie edge
1896  *orientation = 0;
1897  ind += 2*(elementNodesPerDir[1] - 2) + elementNodesPerDir[0] - 2 + ie - 1;
1898  } else {// ilo or ihi je edge
1899  *orientation = 1;
1900  ind += elementNodesPerDir[0] - 2 + 2*(je - 1) + ie / (elementNodesPerDir[0] - 1);
1901  }
1902  } else {// ke edges
1903  *orientation = 2;
1904  ind += 4*(ke - 1) + 2*(je/(elementNodesPerDir[1] - 1)) + ie / (elementNodesPerDir[0] - 1);
1905  }
1906  } else if ((ke == 0 || ke == elementNodesPerDir[2]-1)
1907  || (je == 0 || je == elementNodesPerDir[1]-1)
1908  || (ie == 0 || ie == elementNodesPerDir[0]-1)) {
1909  // Face node
1910  *type = 2;
1911  if(ke == 0) {// current node is on "bottom" face
1912  *orientation = 2;
1913  ind = (je - 1)*(elementNodesPerDir[0] - 2) + ie - 1;
1914  } else {
1915  // add nodes from "bottom" face
1916  ind += (elementNodesPerDir[1] - 2)*(elementNodesPerDir[0] - 2);
1917  // add nodes from side faces
1918  ind += 2*(ke - 1)*(elementNodesPerDir[1] - 2 + elementNodesPerDir[0] - 2);
1919  if(ke == elementNodesPerDir[2]-1) {// current node is on "top" face
1920  *orientation = 2;
1921  ind += (je - 1)*(elementNodesPerDir[0] - 2) + ie - 1;
1922  } else {// current node is on a side face
1923  if(je == 0) {
1924  *orientation = 1;
1925  ind += ie - 1;
1926  } else if(je == elementNodesPerDir[1] - 1) {
1927  *orientation = 1;
1928  ind += 2*(elementNodesPerDir[1] - 2) + elementNodesPerDir[0] - 2 + ie - 1;
1929  } else {
1930  *orientation = 0;
1931  ind += elementNodesPerDir[0] - 2 + 2*(je - 1) + ie / (elementNodesPerDir[0]-1);
1932  }
1933  }
1934  }
1935  } else {
1936  // Interior node
1937  *type = 3;
1938  ind = (ke - 1)*(elementNodesPerDir[1] - 2)*(elementNodesPerDir[0] - 2)
1939  + (je - 1)*(elementNodesPerDir[0] - 2) + ie - 1;
1940  }
1941  } // GetNodeInfo()
1942 
1943  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1945  const typename Teuchos::Array<LocalOrdinal>::iterator& first1,
1946  const typename Teuchos::Array<LocalOrdinal>::iterator& last1,
1947  const typename Teuchos::Array<LocalOrdinal>::iterator& first2,
1948  const typename Teuchos::Array<LocalOrdinal>::iterator& /* last2 */) const
1949  {
1950  typedef typename std::iterator_traits<typename Teuchos::Array<LocalOrdinal>::iterator>::difference_type DT;
1951  DT n = last1 - first1;
1952  DT m = n / 2;
1954  while (m > z)
1955  {
1956  DT max = n - m;
1957  for (DT j = 0; j < max; j++)
1958  {
1959  for (DT k = j; k >= 0; k-=m)
1960  {
1961  if (first1[first2[k+m]] >= first1[first2[k]])
1962  break;
1963  std::swap(first2[k+m], first2[k]);
1964  }
1965  }
1966  m = m/2;
1967  }
1968  }
1969 
1970 } //namespace MueLu
1971 
1972 #define MUELU_BLACKBOXPFACTORY_SHORT
1973 #endif // MUELU_BLACKBOXPFACTORY_DEF_HPP
static RCP< Import< LocalOrdinal, GlobalOrdinal, Node > > Build(const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &source, const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &target)
void GetGeometricData(RCP< Xpetra::MultiVector< typename Teuchos::ScalarTraits< Scalar >::magnitudeType, LO, GO, NO > > &coordinates, const Array< LO > coarseRate, const Array< GO > gFineNodesPerDir, const Array< LO > lFineNodesPerDir, const LO BlkSize, Array< GO > &gIndices, Array< LO > &myOffset, Array< bool > &ghostInterface, Array< LO > &endRate, Array< GO > &gCoarseNodesPerDir, Array< LO > &lCoarseNodesPerDir, Array< LO > &glCoarseNodesPerDir, Array< GO > &ghostGIDs, Array< GO > &coarseNodesGIDs, Array< GO > &colGIDs, GO &gNumCoarseNodes, LO &lNumCoarseNodes, ArrayRCP< Array< typename Teuchos::ScalarTraits< Scalar >::magnitudeType > > coarseNodes, Array< int > &boundaryFlags, RCP< NodesIDs > ghostedCoarseNodes) const
static RCP< StridedMap > Build(UnderlyingLib lib, global_size_t numGlobalElements, GlobalOrdinal indexBase, std::vector< size_t > &stridingInfo, const Teuchos::RCP< const Teuchos::Comm< int > > &comm, LocalOrdinal stridedBlockId=-1, GlobalOrdinal offset=0, LocalGlobal lg=Xpetra::GloballyDistributed)
GlobalOrdinal GO
T & get(const std::string &name, T def_value)
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
ArrayView< T > view(size_type offset, size_type size)
int multiply(ETransp transa, ETransp transb, ScalarType alpha, const SerialDenseMatrix< OrdinalType, ScalarType > &A, const SerialDenseMatrix< OrdinalType, ScalarType > &B, ScalarType beta)
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
void GetNodeInfo(const LO ie, const LO je, const LO ke, const Array< LO > elementNodesPerDir, int *type, LO &ind, int *orientation) const
void ComputeLocalEntries(const RCP< const Matrix > &Aghost, const Array< LO > coarseRate, const Array< LO > endRate, const LO BlkSize, const Array< LO > elemInds, const Array< LO > lCoarseElementsPerDir, const LO numDimensions, const Array< LO > lFineNodesPerDir, const Array< GO > gFineNodesPerDir, const Array< GO > gIndices, const Array< LO > lCoarseNodesPerDir, const Array< bool > ghostInterface, const Array< int > elementFlags, const std::string stencilType, const std::string blockStrategy, const Array< LO > elementNodesPerDir, const LO numNodesInElement, const Array< GO > colGIDs, Teuchos::SerialDenseMatrix< LO, SC > &Pi, Teuchos::SerialDenseMatrix< LO, SC > &Pf, Teuchos::SerialDenseMatrix< LO, SC > &Pe, Array< LO > &dofType, Array< LO > &lDofInd) const
size_type size() const
LocalOrdinal LO
One-liner description of what is happening.
static const NoFactory * get()
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
void factorWithEquilibration(bool flag)
void FormatStencil(const LO BlkSize, const Array< bool > ghostInterface, const LO ie, const LO je, const LO ke, const ArrayView< const SC > rowValues, const Array< LO > elementNodesPerDir, const int collapseFlags[3], const std::string stencilType, Array< SC > &stencil) const
void resize(size_type new_size, const value_type &x=value_type())
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
iterator end()
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void CollapseStencil(const int type, const int orientation, const int collapseFlags[3], Array< SC > &stencil) const
static void sortCrsEntries(const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< LocalOrdinal > &CRS_colind, const Teuchos::ArrayView< Scalar > &CRS_vals, const UnderlyingLib lib)
int setVectors(const RCP< SerialDenseMatrix< OrdinalType, ScalarType > > &X, const RCP< SerialDenseMatrix< OrdinalType, ScalarType > > &B)
size_type size() const
Scalar SC
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
Node NO
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:76
static RCP< Matrix > Build(const RCP< const Map > &rowMap, size_t maxNumEntriesPerRow, Xpetra::ProfileType pftype=Xpetra::DynamicProfile)
Exception throws to report errors in the internal logical of the program.
int shape(OrdinalType numRows, OrdinalType numCols)
static Teuchos::RCP< Map< LocalOrdinal, GlobalOrdinal, Node > > Build(UnderlyingLib lib, global_size_t numGlobalElements, GlobalOrdinal indexBase, const Teuchos::RCP< const Teuchos::Comm< int > > &comm, LocalGlobal lg=Xpetra::GloballyDistributed)
std::vector< T >::iterator iterator
iterator begin()
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need&#39;s value has been saved.
int setMatrix(const RCP< SerialDenseMatrix< OrdinalType, ScalarType > > &A)
void sh_sort_permute(const typename Teuchos::Array< LocalOrdinal >::iterator &first1, const typename Teuchos::Array< LocalOrdinal >::iterator &last1, const typename Teuchos::Array< LocalOrdinal >::iterator &first2, const typename Teuchos::Array< LocalOrdinal >::iterator &last2) const