MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_BlackBoxPFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_BLACKBOXPFACTORY_DEF_HPP
11 #define MUELU_BLACKBOXPFACTORY_DEF_HPP
12 
13 #include <stdlib.h>
14 #include <iomanip>
15 
16 // #include <Teuchos_LAPACK.hpp>
20 
22 #include <Xpetra_CrsMatrixWrap.hpp>
23 #include <Xpetra_ImportFactory.hpp>
24 #include <Xpetra_Matrix.hpp>
25 #include <Xpetra_MapFactory.hpp>
26 #include <Xpetra_MultiVectorFactory.hpp>
27 #include <Xpetra_VectorFactory.hpp>
28 
29 #include <Xpetra_IO.hpp>
30 
32 
33 #include "MueLu_Monitor.hpp"
34 
35 namespace MueLu {
36 
37 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
39  RCP<ParameterList> validParamList = rcp(new ParameterList());
40 
41  // Coarsen can come in two forms, either a single char that will be interpreted as an integer
42  // which is used as the coarsening rate in every spatial dimentions,
43  // or it can be a longer string that will then be interpreted as an array of integers.
44  // By default coarsen is set as "{2}", hence a coarsening rate of 2 in every spatial dimension
45  // is the default setting!
46  validParamList->set<std::string>("Coarsen", "{3}", "Coarsening rate in all spatial dimensions");
47  validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
48  validParamList->set<RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
49  validParamList->set<RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory for coorindates");
50  validParamList->set<RCP<const FactoryBase> >("gNodesPerDim", Teuchos::null, "Number of nodes per spatial dimmension provided by CoordinatesTransferFactory.");
51  validParamList->set<RCP<const FactoryBase> >("lNodesPerDim", Teuchos::null, "Number of nodes per spatial dimmension provided by CoordinatesTransferFactory.");
52  validParamList->set<std::string>("stencil type", "full", "You can use two type of stencils: full and reduced, that correspond to 27 and 7 points stencils respectively in 3D.");
53  validParamList->set<std::string>("block strategy", "coupled", "The strategy used to handle systems of PDEs can be: coupled or uncoupled.");
54 
55  return validParamList;
56 }
57 
58 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
60  Level& /* coarseLevel */)
61  const {
62  Input(fineLevel, "A");
63  Input(fineLevel, "Nullspace");
64  Input(fineLevel, "Coordinates");
65  // Request the global number of nodes per dimensions
66  if (fineLevel.GetLevelID() == 0) {
67  if (fineLevel.IsAvailable("gNodesPerDim", NoFactory::get())) {
68  fineLevel.DeclareInput("gNodesPerDim", NoFactory::get(), this);
69  } else {
70  TEUCHOS_TEST_FOR_EXCEPTION(fineLevel.IsAvailable("gNodesPerDim", NoFactory::get()),
72  "gNodesPerDim was not provided by the user on level0!");
73  }
74  } else {
75  Input(fineLevel, "gNodesPerDim");
76  }
77 
78  // Request the local number of nodes per dimensions
79  if (fineLevel.GetLevelID() == 0) {
80  if (fineLevel.IsAvailable("lNodesPerDim", NoFactory::get())) {
81  fineLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this);
82  } else {
83  TEUCHOS_TEST_FOR_EXCEPTION(fineLevel.IsAvailable("lNodesPerDim", NoFactory::get()),
85  "lNodesPerDim was not provided by the user on level0!");
86  }
87  } else {
88  Input(fineLevel, "lNodesPerDim");
89  }
90 }
91 
92 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
94  Level& coarseLevel) const {
95  return BuildP(fineLevel, coarseLevel);
96 }
97 
98 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
100  Level& coarseLevel) const {
101  FactoryMonitor m(*this, "Build", coarseLevel);
102 
103  // Get parameter list
104  const ParameterList& pL = GetParameterList();
105 
106  // obtain general variables
107  RCP<Matrix> A = Get<RCP<Matrix> >(fineLevel, "A");
108  RCP<MultiVector> fineNullspace = Get<RCP<MultiVector> >(fineLevel, "Nullspace");
110  Get<RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType, LO, GO, NO> > >(fineLevel, "Coordinates");
111  LO numDimensions = coordinates->getNumVectors();
112  LO BlkSize = A->GetFixedBlockSize();
113 
114  // Get fine level geometric data: g(l)FineNodesPerDir and g(l)NumFineNodes
115  Array<GO> gFineNodesPerDir(3);
116  Array<LO> lFineNodesPerDir(3);
117  // Get the number of points in each direction
118  if (fineLevel.GetLevelID() == 0) {
119  gFineNodesPerDir = fineLevel.Get<Array<GO> >("gNodesPerDim", NoFactory::get());
120  lFineNodesPerDir = fineLevel.Get<Array<LO> >("lNodesPerDim", NoFactory::get());
121  } else {
122  // Loading global number of nodes per diretions
123  gFineNodesPerDir = Get<Array<GO> >(fineLevel, "gNodesPerDim");
124 
125  // Loading local number of nodes per diretions
126  lFineNodesPerDir = Get<Array<LO> >(fineLevel, "lNodesPerDim");
127  }
128  for (LO i = 0; i < 3; ++i) {
129  if (gFineNodesPerDir[i] == 0) {
130  GetOStream(Runtime0) << "gNodesPerDim in direction " << i << " is set to 1 from 0"
131  << std::endl;
132  gFineNodesPerDir[i] = 1;
133  }
134  if (lFineNodesPerDir[i] == 0) {
135  GetOStream(Runtime0) << "lNodesPerDim in direction " << i << " is set to 1 from 0"
136  << std::endl;
137  lFineNodesPerDir[i] = 1;
138  }
139  }
140  GO gNumFineNodes = gFineNodesPerDir[2] * gFineNodesPerDir[1] * gFineNodesPerDir[0];
141  LO lNumFineNodes = lFineNodesPerDir[2] * lFineNodesPerDir[1] * lFineNodesPerDir[0];
142 
143  // Get the coarsening rate
144  std::string coarsenRate = pL.get<std::string>("Coarsen");
145  Array<LO> coarseRate(3);
146  {
147  Teuchos::Array<LO> crates;
148  try {
149  crates = Teuchos::fromStringToArray<LO>(coarsenRate);
150  } catch (const Teuchos::InvalidArrayStringRepresentation& e) {
151  GetOStream(Errors, -1) << " *** Coarsen must be a string convertible into an array! *** "
152  << std::endl;
153  throw e;
154  }
155  TEUCHOS_TEST_FOR_EXCEPTION((crates.size() > 1) && (crates.size() < numDimensions),
157  "Coarsen must have at least as many components as the number of"
158  " spatial dimensions in the problem.");
159  for (LO i = 0; i < 3; ++i) {
160  if (i < numDimensions) {
161  if (crates.size() == 1) {
162  coarseRate[i] = crates[0];
163  } else if (i < crates.size()) {
164  coarseRate[i] = crates[i];
165  } else {
166  GetOStream(Errors, -1) << " *** Coarsen must be at least as long as the number of"
167  " spatial dimensions! *** "
168  << std::endl;
170  " *** Coarsen must be at least as long as the number of"
171  " spatial dimensions! *** \n");
172  }
173  } else {
174  coarseRate[i] = 1;
175  }
176  }
177  } // End of scope for crates
178 
179  // Get the stencil type used for discretization
180  const std::string stencilType = pL.get<std::string>("stencil type");
181  if (stencilType != "full" && stencilType != "reduced") {
182  GetOStream(Errors, -1) << " *** stencil type must be set to: full or reduced, any other value "
183  "is trated as an error! *** "
184  << std::endl;
185  throw Exceptions::RuntimeError(" *** stencil type is neither full, nor reduced! *** \n");
186  }
187 
188  // Get the strategy for PDE systems
189  const std::string blockStrategy = pL.get<std::string>("block strategy");
190  if (blockStrategy != "coupled" && blockStrategy != "uncoupled") {
191  GetOStream(Errors, -1) << " *** block strategy must be set to: coupled or uncoupled, any other "
192  "value is trated as an error! *** "
193  << std::endl;
194  throw Exceptions::RuntimeError(" *** block strategy is neither coupled, nor uncoupled! *** \n");
195  }
196 
197  GO gNumCoarseNodes = 0;
198  LO lNumCoarseNodes = 0;
199  Array<GO> gIndices(3), gCoarseNodesPerDir(3), ghostGIDs, coarseNodesGIDs, colGIDs;
200  Array<LO> myOffset(3), lCoarseNodesPerDir(3), glCoarseNodesPerDir(3), endRate(3);
201  Array<bool> ghostInterface(6);
202  Array<int> boundaryFlags(3);
205  for (LO dim = 0; dim < numDimensions; ++dim) {
206  fineNodes[dim] = coordinates->getData(dim)();
207  }
208 
209  // This struct stores PIDs, LIDs and GIDs on the fine mesh and GIDs on the coarse mesh.
210  RCP<NodesIDs> ghostedCoarseNodes = rcp(new NodesIDs{});
211 
212  GetGeometricData(coordinates, coarseRate, gFineNodesPerDir, lFineNodesPerDir, BlkSize, // inputs
213  gIndices, myOffset, ghostInterface, endRate, gCoarseNodesPerDir, // outputs
214  lCoarseNodesPerDir, glCoarseNodesPerDir, ghostGIDs, coarseNodesGIDs, colGIDs,
215  gNumCoarseNodes, lNumCoarseNodes, coarseNodes, boundaryFlags,
216  ghostedCoarseNodes);
217 
218  // Create the MultiVector of coarse coordinates
219  Xpetra::UnderlyingLib lib = coordinates->getMap()->lib();
220  RCP<const Map> coarseCoordsMap = MapFactory::Build(lib,
221  gNumCoarseNodes,
222  coarseNodesGIDs.view(0, lNumCoarseNodes),
223  coordinates->getMap()->getIndexBase(),
224  coordinates->getMap()->getComm());
226  for (LO dim = 0; dim < numDimensions; ++dim) {
227  coarseCoords[dim] = coarseNodes[dim]();
228  }
229  RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType, LO, GO, NO> > coarseCoordinates =
230  Xpetra::MultiVectorFactory<typename Teuchos::ScalarTraits<Scalar>::magnitudeType, LO, GO, NO>::Build(coarseCoordsMap, coarseCoords(),
231  numDimensions);
232 
233  // Now create a new matrix: Aghost that contains all the data
234  // locally needed to compute the local part of the prolongator.
235  // Here assuming that all the coarse nodes o and fine nodes +
236  // are local then all the data associated with the coarse
237  // nodes O and the fine nodes * needs to be imported.
238  //
239  // *--*--*--*--*--*--*--*
240  // | | | | | | | |
241  // o--+--+--o--+--+--O--*
242  // | | | | | | | |
243  // +--+--+--+--+--+--*--*
244  // | | | | | | | |
245  // +--+--+--+--+--+--*--*
246  // | | | | | | | |
247  // o--+--+--o--+--+--O--*
248  // | | | | | | | |
249  // +--+--+--+--+--+--*--*
250  // | | | | | | | |
251  // *--*--*--*--*--*--*--*
252  // | | | | | | | |
253  // O--*--*--O--*--*--O--*
254  //
255  // Creating that local matrix is easy enough using proper range
256  // and domain maps to import data from A. Note that with this
257  // approach we reorder the local entries using the domain map and
258  // can subsequently compute the prolongator without reordering.
259  // As usual we need to be careful about any coarsening rate
260  // change at the boundary!
261 
262  // The ingredients needed are an importer, a range map and a domain map
263  Array<GO> ghostRowGIDs, ghostColGIDs, nodeSteps(3);
264  nodeSteps[0] = 1;
265  nodeSteps[1] = gFineNodesPerDir[0];
266  nodeSteps[2] = gFineNodesPerDir[0] * gFineNodesPerDir[1];
267  Array<LO> glFineNodesPerDir(3);
268  GO startingGID = A->getRowMap()->getMinGlobalIndex();
269  for (LO dim = 0; dim < 3; ++dim) {
270  LO numCoarseNodes = 0;
271  if (dim < numDimensions) {
272  startingGID -= myOffset[dim] * nodeSteps[dim];
273  numCoarseNodes = lCoarseNodesPerDir[dim];
274  if (ghostInterface[2 * dim]) {
275  ++numCoarseNodes;
276  }
277  if (ghostInterface[2 * dim + 1]) {
278  ++numCoarseNodes;
279  }
280  if (gIndices[dim] + lFineNodesPerDir[dim] == gFineNodesPerDir[dim]) {
281  glFineNodesPerDir[dim] = (numCoarseNodes - 2) * coarseRate[dim] + endRate[dim] + 1;
282  } else {
283  glFineNodesPerDir[dim] = (numCoarseNodes - 1) * coarseRate[dim] + 1;
284  }
285  } else {
286  glFineNodesPerDir[dim] = 1;
287  }
288  }
289  ghostRowGIDs.resize(glFineNodesPerDir[0] * glFineNodesPerDir[1] * glFineNodesPerDir[2] * BlkSize);
290  for (LO k = 0; k < glFineNodesPerDir[2]; ++k) {
291  for (LO j = 0; j < glFineNodesPerDir[1]; ++j) {
292  for (LO i = 0; i < glFineNodesPerDir[0]; ++i) {
293  for (LO l = 0; l < BlkSize; ++l) {
294  ghostRowGIDs[(k * glFineNodesPerDir[1] * glFineNodesPerDir[0] + j * glFineNodesPerDir[0] + i) * BlkSize + l] = startingGID + (k * gFineNodesPerDir[1] * gFineNodesPerDir[0] + j * gFineNodesPerDir[0] + i) * BlkSize + l;
295  }
296  }
297  }
298  }
299 
300  // Looking at the above loops it is easy to find startingGID for the ghostColGIDs
301  Array<GO> startingGlobalIndices(numDimensions), dimStride(numDimensions);
302  Array<GO> startingColIndices(numDimensions), finishingColIndices(numDimensions);
303  GO colMinGID = 0;
304  Array<LO> colRange(numDimensions);
305  dimStride[0] = 1;
306  for (int dim = 1; dim < numDimensions; ++dim) {
307  dimStride[dim] = dimStride[dim - 1] * gFineNodesPerDir[dim - 1];
308  }
309  {
310  GO tmp = startingGID;
311  for (int dim = numDimensions; dim > 0; --dim) {
312  startingGlobalIndices[dim - 1] = tmp / dimStride[dim - 1];
313  tmp = tmp % dimStride[dim - 1];
314 
315  if (startingGlobalIndices[dim - 1] > 0) {
316  startingColIndices[dim - 1] = startingGlobalIndices[dim - 1] - 1;
317  }
318  if (startingGlobalIndices[dim - 1] + glFineNodesPerDir[dim - 1] < gFineNodesPerDir[dim - 1]) {
319  finishingColIndices[dim - 1] = startingGlobalIndices[dim - 1] + glFineNodesPerDir[dim - 1];
320  } else {
321  finishingColIndices[dim - 1] = startingGlobalIndices[dim - 1] + glFineNodesPerDir[dim - 1] - 1;
322  }
323  colRange[dim - 1] = finishingColIndices[dim - 1] - startingColIndices[dim - 1] + 1;
324  colMinGID += startingColIndices[dim - 1] * dimStride[dim - 1];
325  }
326  }
327  ghostColGIDs.resize(colRange[0] * colRange[1] * colRange[2] * BlkSize);
328  for (LO k = 0; k < colRange[2]; ++k) {
329  for (LO j = 0; j < colRange[1]; ++j) {
330  for (LO i = 0; i < colRange[0]; ++i) {
331  for (LO l = 0; l < BlkSize; ++l) {
332  ghostColGIDs[(k * colRange[1] * colRange[0] + j * colRange[0] + i) * BlkSize + l] = colMinGID + (k * gFineNodesPerDir[1] * gFineNodesPerDir[0] + j * gFineNodesPerDir[0] + i) * BlkSize + l;
333  }
334  }
335  }
336  }
337 
338  RCP<const Map> ghostedRowMap = Xpetra::MapFactory<LO, GO, NO>::Build(A->getRowMap()->lib(),
340  ghostRowGIDs(),
341  A->getRowMap()->getIndexBase(),
342  A->getRowMap()->getComm());
343  RCP<const Map> ghostedColMap = Xpetra::MapFactory<LO, GO, NO>::Build(A->getRowMap()->lib(),
345  ghostColGIDs(),
346  A->getRowMap()->getIndexBase(),
347  A->getRowMap()->getComm());
348  RCP<const Import> ghostImporter = Xpetra::ImportFactory<LO, GO, NO>::Build(A->getRowMap(),
349  ghostedRowMap);
351  ghostedRowMap,
352  ghostedRowMap);
353 
354  // Create the maps and data structures for the projection matrix
355  RCP<const Map> rowMapP = A->getDomainMap();
356 
357  RCP<const Map> domainMapP;
358 
359  RCP<const Map> colMapP;
360 
361  // At this point we need to create the column map which is a delicate operation.
362  // The entries in that map need to be ordered as follows:
363  // 1) first owned entries ordered by LID
364  // 2) second order the remaining entries by PID
365  // 3) entries with the same remote PID are ordered by GID.
366  // One piece of good news: lNumCoarseNodes is the number of ownedNodes and lNumGhostNodes
367  // is the number of remote nodes. The sorting can be limited to remote nodes
368  // as the owned ones are alreaded ordered by LID!
369 
370  LO lNumGhostedNodes = ghostedCoarseNodes->GIDs.size();
371  {
372  std::vector<NodeID> colMapOrdering(lNumGhostedNodes);
373  for (LO ind = 0; ind < lNumGhostedNodes; ++ind) {
374  colMapOrdering[ind].GID = ghostedCoarseNodes->GIDs[ind];
375  if (ghostedCoarseNodes->PIDs[ind] == rowMapP->getComm()->getRank()) {
376  colMapOrdering[ind].PID = -1;
377  } else {
378  colMapOrdering[ind].PID = ghostedCoarseNodes->PIDs[ind];
379  }
380  colMapOrdering[ind].LID = ghostedCoarseNodes->LIDs[ind];
381  colMapOrdering[ind].lexiInd = ind;
382  }
383  std::sort(colMapOrdering.begin(), colMapOrdering.end(),
384  [](NodeID a, NodeID b) -> bool {
385  return ((a.PID < b.PID) || ((a.PID == b.PID) && (a.GID < b.GID)));
386  });
387 
388  colGIDs.resize(BlkSize * lNumGhostedNodes);
389  for (LO ind = 0; ind < lNumGhostedNodes; ++ind) {
390  // Save the permutation calculated to go from Lexicographic indexing to column map indexing
391  ghostedCoarseNodes->colInds[colMapOrdering[ind].lexiInd] = ind;
392  for (LO dof = 0; dof < BlkSize; ++dof) {
393  colGIDs[BlkSize * ind + dof] = BlkSize * colMapOrdering[ind].GID + dof;
394  }
395  }
396  domainMapP = Xpetra::MapFactory<LO, GO, NO>::Build(rowMapP->lib(),
397  BlkSize * gNumCoarseNodes,
398  colGIDs.view(0, BlkSize * lNumCoarseNodes),
399  rowMapP->getIndexBase(),
400  rowMapP->getComm());
401  colMapP = Xpetra::MapFactory<LO, GO, NO>::Build(rowMapP->lib(),
403  colGIDs.view(0, colGIDs.size()),
404  rowMapP->getIndexBase(),
405  rowMapP->getComm());
406  } // End of scope for colMapOrdering and colGIDs
407 
408  std::vector<size_t> strideInfo(1);
409  strideInfo[0] = BlkSize;
410  RCP<const Map> stridedDomainMapP = Xpetra::StridedMapFactory<LO, GO, NO>::Build(domainMapP,
411  strideInfo);
412 
413  GO gnnzP = 0;
414  LO lnnzP = 0;
415  // coarse points have one nnz per row
416  gnnzP += gNumCoarseNodes;
417  lnnzP += lNumCoarseNodes;
418  // add all other points multiplying by 2^numDimensions
419  gnnzP += (gNumFineNodes - gNumCoarseNodes) * std::pow(2, numDimensions);
420  lnnzP += (lNumFineNodes - lNumCoarseNodes) * std::pow(2, numDimensions);
421  // finally multiply by the number of dofs per node
422  gnnzP = gnnzP * BlkSize;
423  lnnzP = lnnzP * BlkSize;
424 
425  // Create the matrix itself using the above maps
426  RCP<Matrix> P;
427  P = rcp(new CrsMatrixWrap(rowMapP, colMapP, 0));
428  RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
429 
430  ArrayRCP<size_t> iaP;
431  ArrayRCP<LO> jaP;
432  ArrayRCP<SC> valP;
433 
434  PCrs->allocateAllValues(lnnzP, iaP, jaP, valP);
435 
436  ArrayView<size_t> ia = iaP();
437  ArrayView<LO> ja = jaP();
438  ArrayView<SC> val = valP();
439  ia[0] = 0;
440 
441  LO numCoarseElements = 1;
442  Array<LO> lCoarseElementsPerDir(3);
443  for (LO dim = 0; dim < numDimensions; ++dim) {
444  lCoarseElementsPerDir[dim] = lCoarseNodesPerDir[dim];
445  if (ghostInterface[2 * dim]) {
446  ++lCoarseElementsPerDir[dim];
447  }
448  if (!ghostInterface[2 * dim + 1]) {
449  --lCoarseElementsPerDir[dim];
450  }
451  numCoarseElements = numCoarseElements * lCoarseElementsPerDir[dim];
452  }
453 
454  for (LO dim = numDimensions; dim < 3; ++dim) {
455  lCoarseElementsPerDir[dim] = 1;
456  }
457 
458  // Loop over the coarse elements
459  Array<int> elementFlags(3);
460  Array<LO> elemInds(3), elementNodesPerDir(3), glElementRefTuple(3);
461  Array<LO> glElementRefTupleCG(3), glElementCoarseNodeCG(8);
462  const int numCoarseNodesInElement = std::pow(2, numDimensions);
463  const int nnzPerCoarseNode = (blockStrategy == "coupled") ? BlkSize : 1;
464  const int numRowsPerPoint = BlkSize;
465  for (elemInds[2] = 0; elemInds[2] < lCoarseElementsPerDir[2]; ++elemInds[2]) {
466  for (elemInds[1] = 0; elemInds[1] < lCoarseElementsPerDir[1]; ++elemInds[1]) {
467  for (elemInds[0] = 0; elemInds[0] < lCoarseElementsPerDir[0]; ++elemInds[0]) {
468  elementFlags[0] = 0;
469  elementFlags[1] = 0;
470  elementFlags[2] = 0;
471  for (int dim = 0; dim < 3; ++dim) {
472  // Detect boundary conditions on the element and set corresponding flags.
473  if (elemInds[dim] == 0 && elemInds[dim] == lCoarseElementsPerDir[dim] - 1) {
474  elementFlags[dim] = boundaryFlags[dim];
475  } else if (elemInds[dim] == 0 && (boundaryFlags[dim] == 1 || boundaryFlags[dim] == 3)) {
476  elementFlags[dim] += 1;
477  } else if ((elemInds[dim] == lCoarseElementsPerDir[dim] - 1) && (boundaryFlags[dim] == 2 || boundaryFlags[dim] == 3)) {
478  elementFlags[dim] += 2;
479  } else {
480  elementFlags[dim] = 0;
481  }
482 
483  // Compute the number of nodes in the current element.
484  if (dim < numDimensions) {
485  if ((elemInds[dim] == lCoarseElementsPerDir[dim]) && (gIndices[dim] + lFineNodesPerDir[dim] == gFineNodesPerDir[dim])) {
486  elementNodesPerDir[dim] = endRate[dim] + 1;
487  } else {
488  elementNodesPerDir[dim] = coarseRate[dim] + 1;
489  }
490  } else {
491  elementNodesPerDir[dim] = 1;
492  }
493 
494  // Get the lowest tuple of the element using the ghosted local coordinate system
495  glElementRefTuple[dim] = elemInds[dim] * coarseRate[dim];
496  glElementRefTupleCG[dim] = elemInds[dim];
497  }
498 
499  // Now get the column map indices corresponding to the dofs associated with the current
500  // element's coarse nodes.
501  for (typename Array<LO>::size_type elem = 0; elem < glElementCoarseNodeCG.size(); ++elem) {
502  glElementCoarseNodeCG[elem] = glElementRefTupleCG[2] * glCoarseNodesPerDir[1] * glCoarseNodesPerDir[0] + glElementRefTupleCG[1] * glCoarseNodesPerDir[0] + glElementRefTupleCG[0];
503  }
504  glElementCoarseNodeCG[4] += glCoarseNodesPerDir[1] * glCoarseNodesPerDir[0];
505  glElementCoarseNodeCG[5] += glCoarseNodesPerDir[1] * glCoarseNodesPerDir[0];
506  glElementCoarseNodeCG[6] += glCoarseNodesPerDir[1] * glCoarseNodesPerDir[0];
507  glElementCoarseNodeCG[7] += glCoarseNodesPerDir[1] * glCoarseNodesPerDir[0];
508 
509  glElementCoarseNodeCG[2] += glCoarseNodesPerDir[0];
510  glElementCoarseNodeCG[3] += glCoarseNodesPerDir[0];
511  glElementCoarseNodeCG[6] += glCoarseNodesPerDir[0];
512  glElementCoarseNodeCG[7] += glCoarseNodesPerDir[0];
513 
514  glElementCoarseNodeCG[1] += 1;
515  glElementCoarseNodeCG[3] += 1;
516  glElementCoarseNodeCG[5] += 1;
517  glElementCoarseNodeCG[7] += 1;
518 
519  LO numNodesInElement = elementNodesPerDir[0] * elementNodesPerDir[1] * elementNodesPerDir[2];
520  // LO elementOffset = elemInds[2]*coarseRate[2]*glFineNodesPerDir[1]*glFineNodesPerDir[0]
521  // + elemInds[1]*coarseRate[1]*glFineNodesPerDir[0] + elemInds[0]*coarseRate[0];
522 
523  // Compute the element prolongator
525  Array<LO> dofType(numNodesInElement * BlkSize), lDofInd(numNodesInElement * BlkSize);
526  ComputeLocalEntries(Aghost, coarseRate, endRate, BlkSize, elemInds, lCoarseElementsPerDir,
527  numDimensions, glFineNodesPerDir, gFineNodesPerDir, gIndices,
528  lCoarseNodesPerDir, ghostInterface, elementFlags, stencilType,
529  blockStrategy, elementNodesPerDir, numNodesInElement, colGIDs,
530  Pi, Pf, Pe, dofType, lDofInd);
531 
532  // Find ghosted LID associated with nodes in the element and eventually which of these
533  // nodes are ghosts, this information is used to fill the local prolongator.
534  Array<LO> lNodeLIDs(numNodesInElement);
535  {
536  Array<LO> lNodeTuple(3), nodeInd(3);
537  for (nodeInd[2] = 0; nodeInd[2] < elementNodesPerDir[2]; ++nodeInd[2]) {
538  for (nodeInd[1] = 0; nodeInd[1] < elementNodesPerDir[1]; ++nodeInd[1]) {
539  for (nodeInd[0] = 0; nodeInd[0] < elementNodesPerDir[0]; ++nodeInd[0]) {
540  int stencilLength = 0;
541  if ((nodeInd[0] == 0 || nodeInd[0] == elementNodesPerDir[0] - 1) &&
542  (nodeInd[1] == 0 || nodeInd[1] == elementNodesPerDir[1] - 1) &&
543  (nodeInd[2] == 0 || nodeInd[2] == elementNodesPerDir[2] - 1)) {
544  stencilLength = 1;
545  } else {
546  stencilLength = std::pow(2, numDimensions);
547  }
548  LO nodeElementInd = nodeInd[2] * elementNodesPerDir[1] * elementNodesPerDir[1] + nodeInd[1] * elementNodesPerDir[0] + nodeInd[0];
549  for (int dim = 0; dim < 3; ++dim) {
550  lNodeTuple[dim] = glElementRefTuple[dim] - myOffset[dim] + nodeInd[dim];
551  }
552  if (lNodeTuple[0] < 0 || lNodeTuple[1] < 0 || lNodeTuple[2] < 0 ||
553  lNodeTuple[0] > lFineNodesPerDir[0] - 1 ||
554  lNodeTuple[1] > lFineNodesPerDir[1] - 1 ||
555  lNodeTuple[2] > lFineNodesPerDir[2] - 1) {
556  // This flags the ghosts nodes used for prolongator calculation but for which
557  // we do not own the row, hence we won't fill these values on this rank.
558  lNodeLIDs[nodeElementInd] = -1;
559  } else if ((nodeInd[0] == 0 && elemInds[0] != 0) ||
560  (nodeInd[1] == 0 && elemInds[1] != 0) ||
561  (nodeInd[2] == 0 && elemInds[2] != 0)) {
562  // This flags nodes that are owned but common to two coarse elements and that
563  // were already filled by another element, we don't want to fill them twice so
564  // we skip them
565  lNodeLIDs[nodeElementInd] = -2;
566  } else {
567  // The remaining nodes are locally owned and we need to fill the coresponding
568  // rows of the prolongator
569 
570  // First we need to find which row needs to be filled
571  lNodeLIDs[nodeElementInd] = lNodeTuple[2] * lFineNodesPerDir[1] * lFineNodesPerDir[0] + lNodeTuple[1] * lFineNodesPerDir[0] + lNodeTuple[0];
572 
573  // We also compute the row offset using arithmetic to ensure that we can loop
574  // easily over the nodes in a macro-element as well as facilitate on-node
575  // parallelization. The node serial code could be rewritten with two loops over
576  // the local part of the mesh to avoid the costly integer divisions...
577  Array<LO> refCoarsePointTuple(3);
578  for (int dim = 2; dim > -1; --dim) {
579  if (dim == 0) {
580  refCoarsePointTuple[dim] = (lNodeTuple[dim] + myOffset[dim]) / coarseRate[dim];
581  if (myOffset[dim] == 0) {
582  ++refCoarsePointTuple[dim];
583  }
584  } else {
585  // Note: no need for magnitudeType here, just use double because these things are LO's
586  refCoarsePointTuple[dim] =
587  std::ceil(static_cast<double>(lNodeTuple[dim] + myOffset[dim]) / coarseRate[dim]);
588  }
589  if ((lNodeTuple[dim] + myOffset[dim]) % coarseRate[dim] > 0) {
590  break;
591  }
592  }
593  const LO numCoarsePoints = refCoarsePointTuple[0] + refCoarsePointTuple[1] * lCoarseNodesPerDir[0] + refCoarsePointTuple[2] * lCoarseNodesPerDir[1] * lCoarseNodesPerDir[0];
594  const LO numFinePoints = lNodeLIDs[nodeElementInd] + 1;
595 
596  // The below formula computes the rowPtr for row 'n+BlkSize' and we are about to
597  // fill row 'n' to 'n+BlkSize'.
598  size_t rowPtr = (numFinePoints - numCoarsePoints) * numRowsPerPoint * numCoarseNodesInElement * nnzPerCoarseNode + numCoarsePoints * numRowsPerPoint;
599  if (dofType[nodeElementInd * BlkSize] == 0) {
600  // Check if current node is a coarse point
601  rowPtr = rowPtr - numRowsPerPoint;
602  } else {
603  rowPtr = rowPtr - numRowsPerPoint * numCoarseNodesInElement * nnzPerCoarseNode;
604  }
605 
606  for (int dof = 0; dof < BlkSize; ++dof) {
607  // Now we loop over the stencil and fill the column indices and row values
608  int cornerInd = 0;
609  switch (dofType[nodeElementInd * BlkSize + dof]) {
610  case 0: // Corner node
611  if (nodeInd[2] == elementNodesPerDir[2] - 1) {
612  cornerInd += 4;
613  }
614  if (nodeInd[1] == elementNodesPerDir[1] - 1) {
615  cornerInd += 2;
616  }
617  if (nodeInd[0] == elementNodesPerDir[0] - 1) {
618  cornerInd += 1;
619  }
620  ia[lNodeLIDs[nodeElementInd] * BlkSize + dof + 1] = rowPtr + dof + 1;
621  ja[rowPtr + dof] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[cornerInd]] * BlkSize + dof;
622  val[rowPtr + dof] = 1.0;
623  break;
624 
625  case 1: // Edge node
626  ia[lNodeLIDs[nodeElementInd] * BlkSize + dof + 1] = rowPtr + (dof + 1) * numCoarseNodesInElement * nnzPerCoarseNode;
627  for (int ind1 = 0; ind1 < stencilLength; ++ind1) {
628  if (blockStrategy == "coupled") {
629  for (int ind2 = 0; ind2 < BlkSize; ++ind2) {
630  size_t lRowPtr = rowPtr + dof * numCoarseNodesInElement * nnzPerCoarseNode + ind1 * BlkSize + ind2;
631  ja[lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]] * BlkSize + ind2;
632  val[lRowPtr] = Pe(lDofInd[nodeElementInd * BlkSize + dof],
633  ind1 * BlkSize + ind2);
634  }
635  } else if (blockStrategy == "uncoupled") {
636  size_t lRowPtr = rowPtr + dof * numCoarseNodesInElement * nnzPerCoarseNode + ind1;
637  ja[rowPtr + dof] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]] * BlkSize + dof;
638  val[lRowPtr] = Pe(lDofInd[nodeElementInd * BlkSize + dof],
639  ind1 * BlkSize + dof);
640  }
641  }
642  break;
643 
644  case 2: // Face node
645  ia[lNodeLIDs[nodeElementInd] * BlkSize + dof + 1] = rowPtr + (dof + 1) * numCoarseNodesInElement * nnzPerCoarseNode;
646  for (int ind1 = 0; ind1 < stencilLength; ++ind1) {
647  if (blockStrategy == "coupled") {
648  for (int ind2 = 0; ind2 < BlkSize; ++ind2) {
649  size_t lRowPtr = rowPtr + dof * numCoarseNodesInElement * nnzPerCoarseNode + ind1 * BlkSize + ind2;
650  ja[lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]] * BlkSize + ind2;
651  val[lRowPtr] = Pf(lDofInd[nodeElementInd * BlkSize + dof],
652  ind1 * BlkSize + ind2);
653  }
654  } else if (blockStrategy == "uncoupled") {
655  size_t lRowPtr = rowPtr + dof * numCoarseNodesInElement * nnzPerCoarseNode + ind1;
656  // ja [lRowPtr] = colGIDs[glElementCoarseNodeCG[ind1]*BlkSize + dof];
657  ja[lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]] * BlkSize + dof;
658  val[lRowPtr] = Pf(lDofInd[nodeElementInd * BlkSize + dof],
659  ind1 * BlkSize + dof);
660  }
661  }
662  break;
663 
664  case 3: // Interior node
665  ia[lNodeLIDs[nodeElementInd] * BlkSize + dof + 1] = rowPtr + (dof + 1) * numCoarseNodesInElement * nnzPerCoarseNode;
666  for (int ind1 = 0; ind1 < stencilLength; ++ind1) {
667  if (blockStrategy == "coupled") {
668  for (int ind2 = 0; ind2 < BlkSize; ++ind2) {
669  size_t lRowPtr = rowPtr + dof * numCoarseNodesInElement * nnzPerCoarseNode + ind1 * BlkSize + ind2;
670  ja[lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]] * BlkSize + ind2;
671  val[lRowPtr] = Pi(lDofInd[nodeElementInd * BlkSize + dof],
672  ind1 * BlkSize + ind2);
673  }
674  } else if (blockStrategy == "uncoupled") {
675  size_t lRowPtr = rowPtr + dof * numCoarseNodesInElement * nnzPerCoarseNode + ind1;
676  ja[lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]] * BlkSize + dof;
677  val[lRowPtr] = Pi(lDofInd[nodeElementInd * BlkSize + dof],
678  ind1 * BlkSize + dof);
679  }
680  }
681  break;
682  }
683  }
684  }
685  }
686  }
687  }
688  } // End of scopt for lNodeTuple and nodeInd
689  }
690  }
691  }
692 
693  // Sort all row's column indicies and entries by LID
694  Xpetra::CrsMatrixUtils<SC, LO, GO, NO>::sortCrsEntries(ia, ja, val, rowMapP->lib());
695 
696  // Set the values of the prolongation operators into the CrsMatrix P and call FillComplete
697  PCrs->setAllValues(iaP, jaP, valP);
698  PCrs->expertStaticFillComplete(domainMapP, rowMapP);
699 
700  // set StridingInformation of P
701  if (A->IsView("stridedMaps") == true) {
702  P->CreateView("stridedMaps", A->getRowMap("stridedMaps"), stridedDomainMapP);
703  } else {
704  P->CreateView("stridedMaps", P->getRangeMap(), stridedDomainMapP);
705  }
706 
707  // store the transfer operator and node coordinates on coarse level
708  Set(coarseLevel, "P", P);
709  Set(coarseLevel, "coarseCoordinates", coarseCoordinates);
710  Set<Array<GO> >(coarseLevel, "gCoarseNodesPerDim", gCoarseNodesPerDir);
711  Set<Array<LO> >(coarseLevel, "lCoarseNodesPerDim", lCoarseNodesPerDir);
712 }
713 
714 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
717  const Array<LO> coarseRate, const Array<GO> gFineNodesPerDir,
718  const Array<LO> lFineNodesPerDir, const LO BlkSize, Array<GO>& gIndices,
719  Array<LO>& myOffset, Array<bool>& ghostInterface, Array<LO>& endRate,
720  Array<GO>& gCoarseNodesPerDir, Array<LO>& lCoarseNodesPerDir,
721  Array<LO>& glCoarseNodesPerDir, Array<GO>& ghostGIDs, Array<GO>& coarseNodesGIDs,
722  Array<GO>& colGIDs, GO& gNumCoarseNodes, LO& lNumCoarseNodes,
723  ArrayRCP<Array<typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > coarseNodes, Array<int>& boundaryFlags,
724  RCP<NodesIDs> ghostedCoarseNodes) const {
725  // This function is extracting the geometric information from the coordinates
726  // and creates the necessary data/formatting to perform locally the calculation
727  // of the pronlongator.
728  //
729  // inputs:
730 
731  RCP<const Map> coordinatesMap = coordinates->getMap();
732  LO numDimensions = coordinates->getNumVectors();
733 
734  // Using the coarsening rate and the fine level data,
735  // compute coarse level data
736 
737  // Phase 1 //
738  // ------------------------------------------------------------------ //
739  // We first start by finding small informations on the mesh such as //
740  // the number of coarse nodes (local and global) and the number of //
741  // ghost nodes / the end rate of coarsening. //
742  // ------------------------------------------------------------------ //
743  GO minGlobalIndex = coordinatesMap->getMinGlobalIndex();
744  {
745  GO tmp;
746  gIndices[2] = minGlobalIndex / (gFineNodesPerDir[1] * gFineNodesPerDir[0]);
747  tmp = minGlobalIndex % (gFineNodesPerDir[1] * gFineNodesPerDir[0]);
748  gIndices[1] = tmp / gFineNodesPerDir[0];
749  gIndices[0] = tmp % gFineNodesPerDir[0];
750 
751  myOffset[2] = gIndices[2] % coarseRate[2];
752  myOffset[1] = gIndices[1] % coarseRate[1];
753  myOffset[0] = gIndices[0] % coarseRate[0];
754  }
755 
756  for (int dim = 0; dim < 3; ++dim) {
757  if (gIndices[dim] == 0) {
758  boundaryFlags[dim] += 1;
759  }
760  if (gIndices[dim] + lFineNodesPerDir[dim] == gFineNodesPerDir[dim]) {
761  boundaryFlags[dim] += 2;
762  }
763  }
764 
765  // Check whether ghost nodes are needed in each direction
766  for (LO i = 0; i < numDimensions; ++i) {
767  if ((gIndices[i] != 0) && (gIndices[i] % coarseRate[i] > 0)) {
768  ghostInterface[2 * i] = true;
769  }
770  if (((gIndices[i] + lFineNodesPerDir[i]) != gFineNodesPerDir[i]) && ((gIndices[i] + lFineNodesPerDir[i] - 1) % coarseRate[i] > 0)) {
771  ghostInterface[2 * i + 1] = true;
772  }
773  }
774 
775  for (LO i = 0; i < 3; ++i) {
776  if (i < numDimensions) {
777  lCoarseNodesPerDir[i] = (lFineNodesPerDir[i] + myOffset[i] - 1) / coarseRate[i];
778  if (myOffset[i] == 0) {
779  ++lCoarseNodesPerDir[i];
780  }
781  gCoarseNodesPerDir[i] = (gFineNodesPerDir[i] - 1) / coarseRate[i];
782  endRate[i] = (gFineNodesPerDir[i] - 1) % coarseRate[i];
783  if (endRate[i] == 0) {
784  ++gCoarseNodesPerDir[i];
785  endRate[i] = coarseRate[i];
786  }
787  } else {
788  // Most quantities need to be set to 1 for extra dimensions
789  // this is rather logical, an x-y plane is like a single layer
790  // of nodes in the z direction...
791  gCoarseNodesPerDir[i] = 1;
792  lCoarseNodesPerDir[i] = 1;
793  endRate[i] = 1;
794  }
795  }
796 
797  gNumCoarseNodes = gCoarseNodesPerDir[0] * gCoarseNodesPerDir[1] * gCoarseNodesPerDir[2];
798  lNumCoarseNodes = lCoarseNodesPerDir[0] * lCoarseNodesPerDir[1] * lCoarseNodesPerDir[2];
799 
800  // For each direction, determine how many ghost points are required.
801  LO lNumGhostNodes = 0;
802  Array<GO> startGhostedCoarseNode(3);
803  {
804  const int complementaryIndices[3][2] = {{1, 2}, {0, 2}, {0, 1}};
805  LO tmp = 0;
806  for (LO i = 0; i < 3; ++i) {
807  // The first branch of this if-statement will be used if the rank contains only one layer
808  // of nodes in direction i, that layer must also coincide with the boundary of the mesh
809  // and coarseRate[i] == endRate[i]...
810  if ((gIndices[i] == gFineNodesPerDir[i] - 1) && (gIndices[i] % coarseRate[i] == 0)) {
811  startGhostedCoarseNode[i] = gIndices[i] / coarseRate[i] - 1;
812  } else {
813  startGhostedCoarseNode[i] = gIndices[i] / coarseRate[i];
814  }
815  // First we load the number of locally owned coarse nodes
816  glCoarseNodesPerDir[i] = lCoarseNodesPerDir[i];
817 
818  // Check whether a face in direction i needs ghost nodes
819  if (ghostInterface[2 * i] || ghostInterface[2 * i + 1]) {
820  ++glCoarseNodesPerDir[i];
821  if (i == 0) {
822  tmp = lCoarseNodesPerDir[1] * lCoarseNodesPerDir[2];
823  }
824  if (i == 1) {
825  tmp = lCoarseNodesPerDir[0] * lCoarseNodesPerDir[2];
826  }
827  if (i == 2) {
828  tmp = lCoarseNodesPerDir[0] * lCoarseNodesPerDir[1];
829  }
830  }
831  // If both faces in direction i need nodes, double the number of ghost nodes
832  if (ghostInterface[2 * i] && ghostInterface[2 * i + 1]) {
833  ++glCoarseNodesPerDir[i];
834  tmp = 2 * tmp;
835  }
836  lNumGhostNodes += tmp;
837 
838  // The corners and edges need to be checked in 2D / 3D to add more ghosts nodes
839  for (LO j = 0; j < 2; ++j) {
840  for (LO k = 0; k < 2; ++k) {
841  // Check if two adjoining faces need ghost nodes and then add their common edge
842  if (ghostInterface[2 * complementaryIndices[i][0] + j] && ghostInterface[2 * complementaryIndices[i][1] + k]) {
843  lNumGhostNodes += lCoarseNodesPerDir[i];
844  // Add corners if three adjoining faces need ghost nodes,
845  // but add them only once! Hence when i == 0.
846  if (ghostInterface[2 * i] && (i == 0)) {
847  lNumGhostNodes += 1;
848  }
849  if (ghostInterface[2 * i + 1] && (i == 0)) {
850  lNumGhostNodes += 1;
851  }
852  }
853  }
854  }
855  tmp = 0;
856  }
857  } // end of scope for tmp and complementaryIndices
858 
859  const LO lNumGhostedNodes = glCoarseNodesPerDir[0] * glCoarseNodesPerDir[1] * glCoarseNodesPerDir[2];
860  ghostedCoarseNodes->PIDs.resize(lNumGhostedNodes);
861  ghostedCoarseNodes->LIDs.resize(lNumGhostedNodes);
862  ghostedCoarseNodes->GIDs.resize(lNumGhostedNodes);
863  ghostedCoarseNodes->coarseGIDs.resize(lNumGhostedNodes);
864  ghostedCoarseNodes->colInds.resize(lNumGhostedNodes);
865 
866  // We loop over all ghosted coarse nodes by increasing global lexicographic order
867  Array<LO> coarseNodeCoarseIndices(3), coarseNodeFineIndices(3), ijk(3);
868  LO currentIndex = -1;
869  for (ijk[2] = 0; ijk[2] < glCoarseNodesPerDir[2]; ++ijk[2]) {
870  for (ijk[1] = 0; ijk[1] < glCoarseNodesPerDir[1]; ++ijk[1]) {
871  for (ijk[0] = 0; ijk[0] < glCoarseNodesPerDir[0]; ++ijk[0]) {
872  currentIndex = ijk[2] * glCoarseNodesPerDir[1] * glCoarseNodesPerDir[0] + ijk[1] * glCoarseNodesPerDir[0] + ijk[0];
873  coarseNodeCoarseIndices[0] = startGhostedCoarseNode[0] + ijk[0];
874  coarseNodeFineIndices[0] = coarseNodeCoarseIndices[0] * coarseRate[0];
875  if (coarseNodeFineIndices[0] > gFineNodesPerDir[0] - 1) {
876  coarseNodeFineIndices[0] = gFineNodesPerDir[0] - 1;
877  }
878  coarseNodeCoarseIndices[1] = startGhostedCoarseNode[1] + ijk[1];
879  coarseNodeFineIndices[1] = coarseNodeCoarseIndices[1] * coarseRate[1];
880  if (coarseNodeFineIndices[1] > gFineNodesPerDir[1] - 1) {
881  coarseNodeFineIndices[1] = gFineNodesPerDir[1] - 1;
882  }
883  coarseNodeCoarseIndices[2] = startGhostedCoarseNode[2] + ijk[2];
884  coarseNodeFineIndices[2] = coarseNodeCoarseIndices[2] * coarseRate[2];
885  if (coarseNodeFineIndices[2] > gFineNodesPerDir[2] - 1) {
886  coarseNodeFineIndices[2] = gFineNodesPerDir[2] - 1;
887  }
888  GO myGID = 0, myCoarseGID = -1;
889  GO factor[3] = {};
890  factor[2] = gFineNodesPerDir[1] * gFineNodesPerDir[0];
891  factor[1] = gFineNodesPerDir[0];
892  factor[0] = 1;
893  for (int dim = 0; dim < 3; ++dim) {
894  if (dim < numDimensions) {
895  if (gIndices[dim] - myOffset[dim] + ijk[dim] * coarseRate[dim] < gFineNodesPerDir[dim] - 1) {
896  myGID += (gIndices[dim] - myOffset[dim] + ijk[dim] * coarseRate[dim]) * factor[dim];
897  } else {
898  myGID += (gIndices[dim] - myOffset[dim] + (ijk[dim] - 1) * coarseRate[dim] + endRate[dim]) * factor[dim];
899  }
900  }
901  }
902  myCoarseGID = coarseNodeCoarseIndices[0] + coarseNodeCoarseIndices[1] * gCoarseNodesPerDir[0] + coarseNodeCoarseIndices[2] * gCoarseNodesPerDir[1] * gCoarseNodesPerDir[0];
903  ghostedCoarseNodes->GIDs[currentIndex] = myGID;
904  ghostedCoarseNodes->coarseGIDs[currentIndex] = myCoarseGID;
905  }
906  }
907  }
908  coordinatesMap->getRemoteIndexList(ghostedCoarseNodes->GIDs(),
909  ghostedCoarseNodes->PIDs(),
910  ghostedCoarseNodes->LIDs());
911 
912  // Phase 2 //
913  // ------------------------------------------------------------------ //
914  // Build a list of GIDs to import the required ghost nodes. //
915  // The ordering of the ghosts nodes will be as natural as possible, //
916  // i.e. it should follow the GID ordering of the mesh. //
917  // ------------------------------------------------------------------ //
918 
919  // Saddly we have to more or less redo what was just done to figure out the value of
920  // lNumGhostNodes, there might be some optimization possibility here...
921  ghostGIDs.resize(lNumGhostNodes);
922  LO countGhosts = 0;
923  // Get the GID of the first point on the current partition.
924  GO startingGID = minGlobalIndex;
925  Array<GO> startingIndices(3);
926  // We still want ghost nodes even if have with a 0 offset,
927  // except when we are on a boundary
928  if (ghostInterface[4] && (myOffset[2] == 0)) {
929  if (gIndices[2] + coarseRate[2] > gFineNodesPerDir[2]) {
930  startingGID -= endRate[2] * gFineNodesPerDir[1] * gFineNodesPerDir[0];
931  } else {
932  startingGID -= coarseRate[2] * gFineNodesPerDir[1] * gFineNodesPerDir[0];
933  }
934  } else {
935  startingGID -= myOffset[2] * gFineNodesPerDir[1] * gFineNodesPerDir[0];
936  }
937  if (ghostInterface[2] && (myOffset[1] == 0)) {
938  if (gIndices[1] + coarseRate[1] > gFineNodesPerDir[1]) {
939  startingGID -= endRate[1] * gFineNodesPerDir[0];
940  } else {
941  startingGID -= coarseRate[1] * gFineNodesPerDir[0];
942  }
943  } else {
944  startingGID -= myOffset[1] * gFineNodesPerDir[0];
945  }
946  if (ghostInterface[0] && (myOffset[0] == 0)) {
947  if (gIndices[0] + coarseRate[0] > gFineNodesPerDir[0]) {
948  startingGID -= endRate[0];
949  } else {
950  startingGID -= coarseRate[0];
951  }
952  } else {
953  startingGID -= myOffset[0];
954  }
955 
956  { // scope for tmp
957  GO tmp;
958  startingIndices[2] = startingGID / (gFineNodesPerDir[1] * gFineNodesPerDir[0]);
959  tmp = startingGID % (gFineNodesPerDir[1] * gFineNodesPerDir[0]);
960  startingIndices[1] = tmp / gFineNodesPerDir[0];
961  startingIndices[0] = tmp % gFineNodesPerDir[0];
962  }
963 
964  GO ghostOffset[3] = {0, 0, 0};
965  LO lengthZero = lCoarseNodesPerDir[0];
966  LO lengthOne = lCoarseNodesPerDir[1];
967  LO lengthTwo = lCoarseNodesPerDir[2];
968  if (ghostInterface[0]) {
969  ++lengthZero;
970  }
971  if (ghostInterface[1]) {
972  ++lengthZero;
973  }
974  if (ghostInterface[2]) {
975  ++lengthOne;
976  }
977  if (ghostInterface[3]) {
978  ++lengthOne;
979  }
980  if (ghostInterface[4]) {
981  ++lengthTwo;
982  }
983  if (ghostInterface[5]) {
984  ++lengthTwo;
985  }
986 
987  // First check the bottom face as it will have the lowest GIDs
988  if (ghostInterface[4]) {
989  ghostOffset[2] = startingGID;
990  for (LO j = 0; j < lengthOne; ++j) {
991  if ((j == lengthOne - 1) && (startingIndices[1] + j * coarseRate[1] + 1 > gFineNodesPerDir[1])) {
992  ghostOffset[1] = ((j - 1) * coarseRate[1] + endRate[1]) * gFineNodesPerDir[0];
993  } else {
994  ghostOffset[1] = j * coarseRate[1] * gFineNodesPerDir[0];
995  }
996  for (LO k = 0; k < lengthZero; ++k) {
997  if ((k == lengthZero - 1) && (startingIndices[0] + k * coarseRate[0] + 1 > gFineNodesPerDir[0])) {
998  ghostOffset[0] = (k - 1) * coarseRate[0] + endRate[0];
999  } else {
1000  ghostOffset[0] = k * coarseRate[0];
1001  }
1002  // If the partition includes a changed rate at the edge, ghost nodes need to be picked
1003  // carefully.
1004  // This if statement is repeated each time a ghost node is selected.
1005  ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1006  ++countGhosts;
1007  }
1008  }
1009  }
1010 
1011  // Sweep over the lCoarseNodesPerDir[2] coarse layers in direction 2 and gather necessary ghost
1012  // nodes located on these layers.
1013  for (LO i = 0; i < lengthTwo; ++i) {
1014  // Exclude the cases where ghost nodes exists on the faces in directions 2, these faces are
1015  // swept seperatly for efficiency.
1016  if (!((i == lengthTwo - 1) && ghostInterface[5]) && !((i == 0) && ghostInterface[4])) {
1017  // Set the ghostOffset in direction 2 taking into account a possible endRate different
1018  // from the regular coarseRate.
1019  if ((i == lengthTwo - 1) && !ghostInterface[5]) {
1020  ghostOffset[2] = startingGID + ((i - 1) * coarseRate[2] + endRate[2]) * gFineNodesPerDir[1] * gFineNodesPerDir[0];
1021  } else {
1022  ghostOffset[2] = startingGID + i * coarseRate[2] * gFineNodesPerDir[1] * gFineNodesPerDir[0];
1023  }
1024  for (LO j = 0; j < lengthOne; ++j) {
1025  if ((j == 0) && ghostInterface[2]) {
1026  for (LO k = 0; k < lengthZero; ++k) {
1027  if ((k == lengthZero - 1) && (startingIndices[0] + k * coarseRate[0] + 1 > gFineNodesPerDir[0])) {
1028  if (k == 0) {
1029  ghostOffset[0] = endRate[0];
1030  } else {
1031  ghostOffset[0] = (k - 1) * coarseRate[0] + endRate[0];
1032  }
1033  } else {
1034  ghostOffset[0] = k * coarseRate[0];
1035  }
1036  // In this case j == 0 so ghostOffset[1] is zero and can be ignored
1037  ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[0];
1038  ++countGhosts;
1039  }
1040  } else if ((j == lengthOne - 1) && ghostInterface[3]) {
1041  // Set the ghostOffset in direction 1 taking into account a possible endRate different
1042  // from the regular coarseRate.
1043  if ((j == lengthOne - 1) && (startingIndices[1] + j * coarseRate[1] + 1 > gFineNodesPerDir[1])) {
1044  ghostOffset[1] = ((j - 1) * coarseRate[1] + endRate[1]) * gFineNodesPerDir[0];
1045  } else {
1046  ghostOffset[1] = j * coarseRate[1] * gFineNodesPerDir[0];
1047  }
1048  for (LO k = 0; k < lengthZero; ++k) {
1049  if ((k == lengthZero - 1) && (startingIndices[0] + k * coarseRate[0] + 1 > gFineNodesPerDir[0])) {
1050  ghostOffset[0] = (k - 1) * coarseRate[0] + endRate[0];
1051  } else {
1052  ghostOffset[0] = k * coarseRate[0];
1053  }
1054  ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1055  ++countGhosts;
1056  }
1057  } else {
1058  // Set ghostOffset[1] for side faces sweep
1059  if ((j == lengthOne - 1) && (startingIndices[1] + j * coarseRate[1] + 1 > gFineNodesPerDir[1])) {
1060  ghostOffset[1] = ((j - 1) * coarseRate[1] + endRate[1]) * gFineNodesPerDir[0];
1061  } else {
1062  ghostOffset[1] = j * coarseRate[1] * gFineNodesPerDir[0];
1063  }
1064 
1065  // Set ghostOffset[0], ghostGIDs and countGhosts
1066  if (ghostInterface[0]) { // In that case ghostOffset[0]==0, so we can ignore it
1067  ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1];
1068  ++countGhosts;
1069  }
1070  if (ghostInterface[1]) { // Grab ghost point at the end of direction 0.
1071  if ((startingIndices[0] + (lengthZero - 1) * coarseRate[0]) > gFineNodesPerDir[0] - 1) {
1072  if (lengthZero > 2) {
1073  ghostOffset[0] = (lengthZero - 2) * coarseRate[0] + endRate[0];
1074  } else {
1075  ghostOffset[0] = endRate[0];
1076  }
1077  } else {
1078  ghostOffset[0] = (lengthZero - 1) * coarseRate[0];
1079  }
1080  ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1081  ++countGhosts;
1082  }
1083  }
1084  }
1085  }
1086  }
1087 
1088  // Finally check the top face
1089  if (ghostInterface[5]) {
1090  if (startingIndices[2] + (lengthTwo - 1) * coarseRate[2] + 1 > gFineNodesPerDir[2]) {
1091  ghostOffset[2] = startingGID + ((lengthTwo - 2) * coarseRate[2] + endRate[2]) * gFineNodesPerDir[1] * gFineNodesPerDir[0];
1092  } else {
1093  ghostOffset[2] = startingGID + (lengthTwo - 1) * coarseRate[2] * gFineNodesPerDir[1] * gFineNodesPerDir[0];
1094  }
1095  for (LO j = 0; j < lengthOne; ++j) {
1096  if ((j == lengthOne - 1) && (startingIndices[1] + j * coarseRate[1] + 1 > gFineNodesPerDir[1])) {
1097  ghostOffset[1] = ((j - 1) * coarseRate[1] + endRate[1]) * gFineNodesPerDir[0];
1098  } else {
1099  ghostOffset[1] = j * coarseRate[1] * gFineNodesPerDir[0];
1100  }
1101  for (LO k = 0; k < lengthZero; ++k) {
1102  if ((k == lengthZero - 1) && (startingIndices[0] + k * coarseRate[0] + 1 > gFineNodesPerDir[0])) {
1103  ghostOffset[0] = (k - 1) * coarseRate[0] + endRate[0];
1104  } else {
1105  ghostOffset[0] = k * coarseRate[0];
1106  }
1107  ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1108  ++countGhosts;
1109  }
1110  }
1111  }
1112 
1113  // Phase 3 //
1114  // ------------------------------------------------------------------ //
1115  // Final phase of this function, lists are being built to create the //
1116  // column and domain maps of the projection as well as the map and //
1117  // arrays for the coarse coordinates multivector. //
1118  // ------------------------------------------------------------------ //
1119 
1120  Array<GO> gCoarseNodesGIDs(lNumCoarseNodes);
1121  LO currentNode, offset2, offset1, offset0;
1122  // Find the GIDs of the coarse nodes on the partition.
1123  for (LO ind2 = 0; ind2 < lCoarseNodesPerDir[2]; ++ind2) {
1124  if (myOffset[2] == 0) {
1125  offset2 = startingIndices[2] + myOffset[2];
1126  } else {
1127  if (startingIndices[2] + endRate[2] == gFineNodesPerDir[2] - 1) {
1128  offset2 = startingIndices[2] + endRate[2];
1129  } else {
1130  offset2 = startingIndices[2] + coarseRate[2];
1131  }
1132  }
1133  if (offset2 + ind2 * coarseRate[2] > gFineNodesPerDir[2] - 1) {
1134  offset2 += (ind2 - 1) * coarseRate[2] + endRate[2];
1135  } else {
1136  offset2 += ind2 * coarseRate[2];
1137  }
1138  offset2 = offset2 * gFineNodesPerDir[1] * gFineNodesPerDir[0];
1139 
1140  for (LO ind1 = 0; ind1 < lCoarseNodesPerDir[1]; ++ind1) {
1141  if (myOffset[1] == 0) {
1142  offset1 = startingIndices[1] + myOffset[1];
1143  } else {
1144  if (startingIndices[1] + endRate[1] == gFineNodesPerDir[1] - 1) {
1145  offset1 = startingIndices[1] + endRate[1];
1146  } else {
1147  offset1 = startingIndices[1] + coarseRate[1];
1148  }
1149  }
1150  if (offset1 + ind1 * coarseRate[1] > gFineNodesPerDir[1] - 1) {
1151  offset1 += (ind1 - 1) * coarseRate[1] + endRate[1];
1152  } else {
1153  offset1 += ind1 * coarseRate[1];
1154  }
1155  offset1 = offset1 * gFineNodesPerDir[0];
1156  for (LO ind0 = 0; ind0 < lCoarseNodesPerDir[0]; ++ind0) {
1157  offset0 = startingIndices[0];
1158  if (myOffset[0] == 0) {
1159  offset0 += ind0 * coarseRate[0];
1160  } else {
1161  offset0 += (ind0 + 1) * coarseRate[0];
1162  }
1163  if (offset0 > gFineNodesPerDir[0] - 1) {
1164  offset0 += endRate[0] - coarseRate[0];
1165  }
1166 
1167  currentNode = ind2 * lCoarseNodesPerDir[1] * lCoarseNodesPerDir[0] + ind1 * lCoarseNodesPerDir[0] + ind0;
1168  gCoarseNodesGIDs[currentNode] = offset2 + offset1 + offset0;
1169  }
1170  }
1171  }
1172 
1173  // Actual loop over all the coarse/ghost nodes to find their index on the coarse mesh
1174  // and the corresponding dofs that will need to be added to colMapP.
1175  colGIDs.resize(BlkSize * (lNumCoarseNodes + lNumGhostNodes));
1176  coarseNodesGIDs.resize(lNumCoarseNodes);
1177  for (LO i = 0; i < numDimensions; ++i) {
1178  coarseNodes[i].resize(lNumCoarseNodes);
1179  }
1180  GO fineNodesPerCoarseSlab = coarseRate[2] * gFineNodesPerDir[1] * gFineNodesPerDir[0];
1181  GO fineNodesEndCoarseSlab = endRate[2] * gFineNodesPerDir[1] * gFineNodesPerDir[0];
1182  GO fineNodesPerCoarsePlane = coarseRate[1] * gFineNodesPerDir[0];
1183  GO fineNodesEndCoarsePlane = endRate[1] * gFineNodesPerDir[0];
1184  GO coarseNodesPerCoarseLayer = gCoarseNodesPerDir[1] * gCoarseNodesPerDir[0];
1185  GO gCoarseNodeOnCoarseGridGID;
1186  LO gInd[3], lCol;
1187  Array<int> ghostPIDs(lNumGhostNodes);
1188  Array<LO> ghostLIDs(lNumGhostNodes);
1189  Array<LO> ghostPermut(lNumGhostNodes);
1190  for (LO k = 0; k < lNumGhostNodes; ++k) {
1191  ghostPermut[k] = k;
1192  }
1193  coordinatesMap->getRemoteIndexList(ghostGIDs, ghostPIDs, ghostLIDs);
1194  sh_sort_permute(ghostPIDs.begin(), ghostPIDs.end(), ghostPermut.begin(), ghostPermut.end());
1195 
1196  { // scope for tmpInds, tmpVars and tmp.
1197  GO tmpInds[3], tmpVars[2];
1198  LO tmp;
1199  // Loop over the coarse nodes of the partition and add them to colGIDs
1200  // that will be used to construct the column and domain maps of P as well
1201  // as to construct the coarse coordinates map.
1202  // for(LO col = 0; col < lNumCoarseNodes; ++col) { // This should most likely be replaced
1203  // by loops of lCoarseNodesPerDir[] to simplify arithmetics
1204  LO col = 0;
1205  LO firstCoarseNodeInds[3], currentCoarseNode;
1206  for (LO dim = 0; dim < 3; ++dim) {
1207  if (myOffset[dim] == 0) {
1208  firstCoarseNodeInds[dim] = 0;
1209  } else {
1210  firstCoarseNodeInds[dim] = coarseRate[dim] - myOffset[dim];
1211  }
1212  }
1214  for (LO dim = 0; dim < numDimensions; ++dim) {
1215  fineNodes[dim] = coordinates->getData(dim);
1216  }
1217  for (LO k = 0; k < lCoarseNodesPerDir[2]; ++k) {
1218  for (LO j = 0; j < lCoarseNodesPerDir[1]; ++j) {
1219  for (LO i = 0; i < lCoarseNodesPerDir[0]; ++i) {
1220  col = k * lCoarseNodesPerDir[1] * lCoarseNodesPerDir[0] + j * lCoarseNodesPerDir[0] + i;
1221 
1222  // Check for endRate
1223  currentCoarseNode = 0;
1224  if (firstCoarseNodeInds[0] + i * coarseRate[0] > lFineNodesPerDir[0] - 1) {
1225  currentCoarseNode += firstCoarseNodeInds[0] + (i - 1) * coarseRate[0] + endRate[0];
1226  } else {
1227  currentCoarseNode += firstCoarseNodeInds[0] + i * coarseRate[0];
1228  }
1229  if (firstCoarseNodeInds[1] + j * coarseRate[1] > lFineNodesPerDir[1] - 1) {
1230  currentCoarseNode += (firstCoarseNodeInds[1] + (j - 1) * coarseRate[1] + endRate[1]) * lFineNodesPerDir[0];
1231  } else {
1232  currentCoarseNode += (firstCoarseNodeInds[1] + j * coarseRate[1]) * lFineNodesPerDir[0];
1233  }
1234  if (firstCoarseNodeInds[2] + k * coarseRate[2] > lFineNodesPerDir[2] - 1) {
1235  currentCoarseNode += (firstCoarseNodeInds[2] + (k - 1) * coarseRate[2] + endRate[2]) * lFineNodesPerDir[1] * lFineNodesPerDir[0];
1236  } else {
1237  currentCoarseNode += (firstCoarseNodeInds[2] + k * coarseRate[2]) * lFineNodesPerDir[1] * lFineNodesPerDir[0];
1238  }
1239  // Load coordinates
1240  for (LO dim = 0; dim < numDimensions; ++dim) {
1241  coarseNodes[dim][col] = fineNodes[dim][currentCoarseNode];
1242  }
1243 
1244  if ((endRate[2] != coarseRate[2]) && (gCoarseNodesGIDs[col] > (gCoarseNodesPerDir[2] - 2) * fineNodesPerCoarseSlab + fineNodesEndCoarseSlab - 1)) {
1245  tmpInds[2] = gCoarseNodesGIDs[col] / fineNodesPerCoarseSlab + 1;
1246  tmpVars[0] = gCoarseNodesGIDs[col] - (tmpInds[2] - 1) * fineNodesPerCoarseSlab - fineNodesEndCoarseSlab;
1247  } else {
1248  tmpInds[2] = gCoarseNodesGIDs[col] / fineNodesPerCoarseSlab;
1249  tmpVars[0] = gCoarseNodesGIDs[col] % fineNodesPerCoarseSlab;
1250  }
1251  if ((endRate[1] != coarseRate[1]) && (tmpVars[0] > (gCoarseNodesPerDir[1] - 2) * fineNodesPerCoarsePlane + fineNodesEndCoarsePlane - 1)) {
1252  tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane + 1;
1253  tmpVars[1] = tmpVars[0] - (tmpInds[1] - 1) * fineNodesPerCoarsePlane - fineNodesEndCoarsePlane;
1254  } else {
1255  tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane;
1256  tmpVars[1] = tmpVars[0] % fineNodesPerCoarsePlane;
1257  }
1258  if (tmpVars[1] == gFineNodesPerDir[0] - 1) {
1259  tmpInds[0] = gCoarseNodesPerDir[0] - 1;
1260  } else {
1261  tmpInds[0] = tmpVars[1] / coarseRate[0];
1262  }
1263  gInd[2] = col / (lCoarseNodesPerDir[1] * lCoarseNodesPerDir[0]);
1264  tmp = col % (lCoarseNodesPerDir[1] * lCoarseNodesPerDir[0]);
1265  gInd[1] = tmp / lCoarseNodesPerDir[0];
1266  gInd[0] = tmp % lCoarseNodesPerDir[0];
1267  lCol = gInd[2] * (lCoarseNodesPerDir[1] * lCoarseNodesPerDir[0]) + gInd[1] * lCoarseNodesPerDir[0] + gInd[0];
1268  gCoarseNodeOnCoarseGridGID = tmpInds[2] * coarseNodesPerCoarseLayer + tmpInds[1] * gCoarseNodesPerDir[0] + tmpInds[0];
1269  coarseNodesGIDs[lCol] = gCoarseNodeOnCoarseGridGID;
1270  for (LO dof = 0; dof < BlkSize; ++dof) {
1271  colGIDs[BlkSize * lCol + dof] = BlkSize * gCoarseNodeOnCoarseGridGID + dof;
1272  }
1273  }
1274  }
1275  }
1276  // Now loop over the ghost nodes of the partition to add them to colGIDs
1277  // since they will need to be included in the column map of P
1278  for (col = lNumCoarseNodes; col < lNumCoarseNodes + lNumGhostNodes; ++col) {
1279  if ((endRate[2] != coarseRate[2]) && (ghostGIDs[ghostPermut[col - lNumCoarseNodes]] > (gCoarseNodesPerDir[2] - 2) * fineNodesPerCoarseSlab + fineNodesEndCoarseSlab - 1)) {
1280  tmpInds[2] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]] / fineNodesPerCoarseSlab + 1;
1281  tmpVars[0] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]] - (tmpInds[2] - 1) * fineNodesPerCoarseSlab - fineNodesEndCoarseSlab;
1282  } else {
1283  tmpInds[2] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]] / fineNodesPerCoarseSlab;
1284  tmpVars[0] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]] % fineNodesPerCoarseSlab;
1285  }
1286  if ((endRate[1] != coarseRate[1]) && (tmpVars[0] > (gCoarseNodesPerDir[1] - 2) * fineNodesPerCoarsePlane + fineNodesEndCoarsePlane - 1)) {
1287  tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane + 1;
1288  tmpVars[1] = tmpVars[0] - (tmpInds[1] - 1) * fineNodesPerCoarsePlane - fineNodesEndCoarsePlane;
1289  } else {
1290  tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane;
1291  tmpVars[1] = tmpVars[0] % fineNodesPerCoarsePlane;
1292  }
1293  if (tmpVars[1] == gFineNodesPerDir[0] - 1) {
1294  tmpInds[0] = gCoarseNodesPerDir[0] - 1;
1295  } else {
1296  tmpInds[0] = tmpVars[1] / coarseRate[0];
1297  }
1298  gCoarseNodeOnCoarseGridGID = tmpInds[2] * coarseNodesPerCoarseLayer + tmpInds[1] * gCoarseNodesPerDir[0] + tmpInds[0];
1299  for (LO dof = 0; dof < BlkSize; ++dof) {
1300  colGIDs[BlkSize * col + dof] = BlkSize * gCoarseNodeOnCoarseGridGID + dof;
1301  }
1302  }
1303  } // End of scope for tmpInds, tmpVars and tmp
1304 
1305 } // GetGeometricData()
1306 
1307 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1309  ComputeLocalEntries(const RCP<const Matrix>& Aghost, const Array<LO> coarseRate,
1310  const Array<LO> /* endRate */, const LO BlkSize, const Array<LO> elemInds,
1311  const Array<LO> /* lCoarseElementsPerDir */, const LO numDimensions,
1312  const Array<LO> lFineNodesPerDir, const Array<GO> /* gFineNodesPerDir */,
1313  const Array<GO> /* gIndices */, const Array<LO> /* lCoarseNodesPerDir */,
1314  const Array<bool> ghostInterface, const Array<int> elementFlags,
1315  const std::string stencilType, const std::string /* blockStrategy */,
1316  const Array<LO> elementNodesPerDir, const LO numNodesInElement,
1317  const Array<GO> /* colGIDs */,
1320  Array<LO>& lDofInd) const {
1321  // First extract data from Aghost and move it to the corresponding dense matrix
1322  // This step requires to compute the number of nodes (resp dofs) in the current
1323  // coarse element, then allocate storage for local dense matrices, finally loop
1324  // over the dofs and extract the corresponding row in Aghost before dispactching
1325  // its data to the dense matrices.
1326  // At the same time we want to compute a mapping from the element indexing to
1327  // group indexing. The groups are the following: corner, edge, face and interior
1328  // nodes. We uses these groups to operate on the dense matrices but need to
1329  // store the nodes in their original order after groupd operations are completed.
1330  LO countInterior = 0, countFace = 0, countEdge = 0, countCorner = 0;
1331  Array<LO> collapseDir(numNodesInElement * BlkSize);
1332  for (LO ke = 0; ke < elementNodesPerDir[2]; ++ke) {
1333  for (LO je = 0; je < elementNodesPerDir[1]; ++je) {
1334  for (LO ie = 0; ie < elementNodesPerDir[0]; ++ie) {
1335  // Check for corner node
1336  if ((ke == 0 || ke == elementNodesPerDir[2] - 1) && (je == 0 || je == elementNodesPerDir[1] - 1) && (ie == 0 || ie == elementNodesPerDir[0] - 1)) {
1337  for (LO dof = 0; dof < BlkSize; ++dof) {
1338  dofType[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 0;
1339  lDofInd[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = BlkSize * countCorner + dof;
1340  }
1341  ++countCorner;
1342 
1343  // Check for edge node
1344  } else if (((ke == 0 || ke == elementNodesPerDir[2] - 1) && (je == 0 || je == elementNodesPerDir[1] - 1)) || ((ke == 0 || ke == elementNodesPerDir[2] - 1) && (ie == 0 || ie == elementNodesPerDir[0] - 1)) || ((je == 0 || je == elementNodesPerDir[1] - 1) && (ie == 0 || ie == elementNodesPerDir[0] - 1))) {
1345  for (LO dof = 0; dof < BlkSize; ++dof) {
1346  dofType[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 1;
1347  lDofInd[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = BlkSize * countEdge + dof;
1348  if ((ke == 0 || ke == elementNodesPerDir[2] - 1) && (je == 0 || je == elementNodesPerDir[1] - 1)) {
1349  collapseDir[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 0;
1350  } else if ((ke == 0 || ke == elementNodesPerDir[2] - 1) && (ie == 0 || ie == elementNodesPerDir[0] - 1)) {
1351  collapseDir[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 1;
1352  } else if ((je == 0 || je == elementNodesPerDir[1] - 1) && (ie == 0 || ie == elementNodesPerDir[0] - 1)) {
1353  collapseDir[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 2;
1354  }
1355  }
1356  ++countEdge;
1357 
1358  // Check for face node
1359  } else if ((ke == 0 || ke == elementNodesPerDir[2] - 1) || (je == 0 || je == elementNodesPerDir[1] - 1) || (ie == 0 || ie == elementNodesPerDir[0] - 1)) {
1360  for (LO dof = 0; dof < BlkSize; ++dof) {
1361  dofType[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 2;
1362  lDofInd[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = BlkSize * countFace + dof;
1363  if (ke == 0 || ke == elementNodesPerDir[2] - 1) {
1364  collapseDir[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 2;
1365  } else if (je == 0 || je == elementNodesPerDir[1] - 1) {
1366  collapseDir[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 1;
1367  } else if (ie == 0 || ie == elementNodesPerDir[0] - 1) {
1368  collapseDir[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 0;
1369  }
1370  }
1371  ++countFace;
1372 
1373  // Otherwise it is an interior node
1374  } else {
1375  for (LO dof = 0; dof < BlkSize; ++dof) {
1376  dofType[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 3;
1377  lDofInd[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = BlkSize * countInterior + dof;
1378  }
1379  ++countInterior;
1380  }
1381  }
1382  }
1383  }
1384 
1385  LO numInteriorNodes = 0, numFaceNodes = 0, numEdgeNodes = 0, numCornerNodes = 8;
1386  numInteriorNodes = (elementNodesPerDir[0] - 2) * (elementNodesPerDir[1] - 2) * (elementNodesPerDir[2] - 2);
1387  numFaceNodes = 2 * (elementNodesPerDir[0] - 2) * (elementNodesPerDir[1] - 2) + 2 * (elementNodesPerDir[0] - 2) * (elementNodesPerDir[2] - 2) + 2 * (elementNodesPerDir[1] - 2) * (elementNodesPerDir[2] - 2);
1388  numEdgeNodes = 4 * (elementNodesPerDir[0] - 2) + 4 * (elementNodesPerDir[1] - 2) + 4 * (elementNodesPerDir[2] - 2);
1389  // Diagonal blocks
1390  Teuchos::SerialDenseMatrix<LO, SC> Aii(BlkSize * numInteriorNodes, BlkSize * numInteriorNodes);
1391  Teuchos::SerialDenseMatrix<LO, SC> Aff(BlkSize * numFaceNodes, BlkSize * numFaceNodes);
1392  Teuchos::SerialDenseMatrix<LO, SC> Aee(BlkSize * numEdgeNodes, BlkSize * numEdgeNodes);
1393  // Upper triangular blocks
1394  Teuchos::SerialDenseMatrix<LO, SC> Aif(BlkSize * numInteriorNodes, BlkSize * numFaceNodes);
1395  Teuchos::SerialDenseMatrix<LO, SC> Aie(BlkSize * numInteriorNodes, BlkSize * numEdgeNodes);
1396  Teuchos::SerialDenseMatrix<LO, SC> Afe(BlkSize * numFaceNodes, BlkSize * numEdgeNodes);
1397  // Coarse nodes "right hand sides" and local prolongators
1398  Teuchos::SerialDenseMatrix<LO, SC> Aic(BlkSize * numInteriorNodes, BlkSize * numCornerNodes);
1399  Teuchos::SerialDenseMatrix<LO, SC> Afc(BlkSize * numFaceNodes, BlkSize * numCornerNodes);
1400  Teuchos::SerialDenseMatrix<LO, SC> Aec(BlkSize * numEdgeNodes, BlkSize * numCornerNodes);
1401  Pi.shape(BlkSize * numInteriorNodes, BlkSize * numCornerNodes);
1402  Pf.shape(BlkSize * numFaceNodes, BlkSize * numCornerNodes);
1403  Pe.shape(BlkSize * numEdgeNodes, BlkSize * numCornerNodes);
1404 
1405  ArrayView<const LO> rowIndices;
1406  ArrayView<const SC> rowValues;
1407  LO idof, iInd, jInd;
1408  int iType = 0, jType = 0;
1409  int orientation = -1;
1410  int collapseFlags[3] = {};
1411  Array<SC> stencil((std::pow(3, numDimensions)) * BlkSize);
1412  // LBV Note: we could skip the extraction of rows corresponding to coarse nodes
1413  // we might want to fuse the first three loops and do integer arithmetic
1414  // to have more optimization during compilation...
1415  for (LO ke = 0; ke < elementNodesPerDir[2]; ++ke) {
1416  for (LO je = 0; je < elementNodesPerDir[1]; ++je) {
1417  for (LO ie = 0; ie < elementNodesPerDir[0]; ++ie) {
1418  collapseFlags[0] = 0;
1419  collapseFlags[1] = 0;
1420  collapseFlags[2] = 0;
1421  if ((elementFlags[0] == 1 || elementFlags[0] == 3) && ie == 0) {
1422  collapseFlags[0] += 1;
1423  }
1424  if ((elementFlags[0] == 2 || elementFlags[0] == 3) && ie == elementNodesPerDir[0] - 1) {
1425  collapseFlags[0] += 2;
1426  }
1427  if ((elementFlags[1] == 1 || elementFlags[1] == 3) && je == 0) {
1428  collapseFlags[1] += 1;
1429  }
1430  if ((elementFlags[1] == 2 || elementFlags[1] == 3) && je == elementNodesPerDir[1] - 1) {
1431  collapseFlags[1] += 2;
1432  }
1433  if ((elementFlags[2] == 1 || elementFlags[2] == 3) && ke == 0) {
1434  collapseFlags[2] += 1;
1435  }
1436  if ((elementFlags[2] == 2 || elementFlags[2] == 3) && ke == elementNodesPerDir[2] - 1) {
1437  collapseFlags[2] += 2;
1438  }
1439 
1440  // Based on (ie, je, ke) we detect the type of node we are dealing with
1441  GetNodeInfo(ie, je, ke, elementNodesPerDir, &iType, iInd, &orientation);
1442  for (LO dof0 = 0; dof0 < BlkSize; ++dof0) {
1443  // Compute the dof index for each dof at point (ie, je, ke)
1444  idof = ((elemInds[2] * coarseRate[2] + ke) * lFineNodesPerDir[1] * lFineNodesPerDir[0] + (elemInds[1] * coarseRate[1] + je) * lFineNodesPerDir[0] + elemInds[0] * coarseRate[0] + ie) * BlkSize + dof0;
1445  Aghost->getLocalRowView(idof, rowIndices, rowValues);
1446  FormatStencil(BlkSize, ghostInterface, ie, je, ke, rowValues,
1447  elementNodesPerDir, collapseFlags, stencilType, stencil);
1448  LO io, jo, ko;
1449  if (iType == 3) { // interior node, no stencil collapse needed
1450  for (LO interactingNode = 0; interactingNode < 27; ++interactingNode) {
1451  // Find (if, jf, kf) the indices associated with the interacting node
1452  ko = ke + (interactingNode / 9 - 1);
1453  {
1454  LO tmp = interactingNode % 9;
1455  jo = je + (tmp / 3 - 1);
1456  io = ie + (tmp % 3 - 1);
1457  int dummy;
1458  GetNodeInfo(io, jo, ko, elementNodesPerDir, &jType, jInd, &dummy);
1459  }
1460  if ((io > -1 && io < elementNodesPerDir[0]) &&
1461  (jo > -1 && jo < elementNodesPerDir[1]) &&
1462  (ko > -1 && ko < elementNodesPerDir[2])) {
1463  for (LO dof1 = 0; dof1 < BlkSize; ++dof1) {
1464  if (jType == 3) {
1465  Aii(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1466  stencil[interactingNode * BlkSize + dof1];
1467  } else if (jType == 2) {
1468  Aif(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1469  stencil[interactingNode * BlkSize + dof1];
1470  } else if (jType == 1) {
1471  Aie(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1472  stencil[interactingNode * BlkSize + dof1];
1473  } else if (jType == 0) {
1474  Aic(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1475  -stencil[interactingNode * BlkSize + dof1];
1476  }
1477  }
1478  }
1479  }
1480  } else if (iType == 2) { // Face node, collapse stencil along face normal (*orientation)
1481  CollapseStencil(2, orientation, collapseFlags, stencil);
1482  for (LO interactingNode = 0; interactingNode < 27; ++interactingNode) {
1483  // Find (if, jf, kf) the indices associated with the interacting node
1484  ko = ke + (interactingNode / 9 - 1);
1485  {
1486  LO tmp = interactingNode % 9;
1487  jo = je + (tmp / 3 - 1);
1488  io = ie + (tmp % 3 - 1);
1489  int dummy;
1490  GetNodeInfo(io, jo, ko, elementNodesPerDir, &jType, jInd, &dummy);
1491  }
1492  if ((io > -1 && io < elementNodesPerDir[0]) &&
1493  (jo > -1 && jo < elementNodesPerDir[1]) &&
1494  (ko > -1 && ko < elementNodesPerDir[2])) {
1495  for (LO dof1 = 0; dof1 < BlkSize; ++dof1) {
1496  if (jType == 2) {
1497  Aff(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1498  stencil[interactingNode * BlkSize + dof1];
1499  } else if (jType == 1) {
1500  Afe(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1501  stencil[interactingNode * BlkSize + dof1];
1502  } else if (jType == 0) {
1503  Afc(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1504  -stencil[interactingNode * BlkSize + dof1];
1505  }
1506  }
1507  }
1508  }
1509  } else if (iType == 1) { // Edge node, collapse stencil perpendicular to edge direction
1510  CollapseStencil(1, orientation, collapseFlags, stencil);
1511  for (LO interactingNode = 0; interactingNode < 27; ++interactingNode) {
1512  // Find (if, jf, kf) the indices associated with the interacting node
1513  ko = ke + (interactingNode / 9 - 1);
1514  {
1515  LO tmp = interactingNode % 9;
1516  jo = je + (tmp / 3 - 1);
1517  io = ie + (tmp % 3 - 1);
1518  int dummy;
1519  GetNodeInfo(io, jo, ko, elementNodesPerDir, &jType, jInd, &dummy);
1520  }
1521  if ((io > -1 && io < elementNodesPerDir[0]) &&
1522  (jo > -1 && jo < elementNodesPerDir[1]) &&
1523  (ko > -1 && ko < elementNodesPerDir[2])) {
1524  for (LO dof1 = 0; dof1 < BlkSize; ++dof1) {
1525  if (jType == 1) {
1526  Aee(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1527  stencil[interactingNode * BlkSize + dof1];
1528  } else if (jType == 0) {
1529  Aec(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1530  -stencil[interactingNode * BlkSize + dof1];
1531  }
1532  } // Check if interacting node is in element or not
1533  } // Pc is the identity so no need to treat iType == 0
1534  } // Loop over interacting nodes within a row
1535  } // Check for current node type
1536  } // Loop over degrees of freedom associated to a given node
1537  } // Loop over i
1538  } // Loop over j
1539  } // Loop over k
1540 
1541  // Compute the projection operators for edge and interior nodes
1542  //
1543  // [P_i] = [A_{ii} & A_{if} & A_{ie}]^{-1} [A_{ic}]
1544  // [P_f] = [ 0 & A_{ff} & A_{fe}] [A_{fc}]
1545  // [P_e] = [ 0 & 0 & A_{ee}] [A_{ec}]
1546  // [P_c] = I_c
1547  //
1548  { // Compute P_e
1549  // We need to solve for P_e in: A_{ee}*P_e = A_{fc}
1550  // So we simple do P_e = A_{ee}^{-1)*A_{ec}
1552  problem.setMatrix(Teuchos::rcp(&Aee, false));
1553  problem.setVectors(Teuchos::rcp(&Pe, false), Teuchos::rcp(&Aec, false));
1554  problem.factorWithEquilibration(true);
1555  problem.solve();
1556  problem.unequilibrateLHS();
1557  }
1558 
1559  { // Compute P_f
1560  // We need to solve for P_f in: A_{ff}*P_f + A_{fe}*P_e = A_{fc}
1561  // Step one: A_{fc} = 1.0*A_{fc} + (-1.0)*A_{fe}*P_e
1562  Afc.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -1.0, Afe, Pe, 1.0);
1563  // Step two: P_f = A_{ff}^{-1}*A_{fc}
1565  problem.setMatrix(Teuchos::rcp(&Aff, false));
1566  problem.setVectors(Teuchos::rcp(&Pf, false), Teuchos::rcp(&Afc, false));
1567  problem.factorWithEquilibration(true);
1568  problem.solve();
1569  problem.unequilibrateLHS();
1570  }
1571 
1572  { // Compute P_i
1573  // We need to solve for P_i in: A_{ii}*P_i + A_{if}*P_f + A_{ie}*P_e = A_{ic}
1574  // Step one: A_{ic} = 1.0*A_{ic} + (-1.0)*A_{ie}*P_e
1575  Aic.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -1.0, Aie, Pe, 1.0);
1576  // Step one: A_{ic} = 1.0*A_{ic} + (-1.0)*A_{if}*P_f
1577  Aic.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -1.0, Aif, Pf, 1.0);
1578  // Step two: P_i = A_{ii}^{-1}*A_{ic}
1580  problem.setMatrix(Teuchos::rcp(&Aii, false));
1581  problem.setVectors(Teuchos::rcp(&Pi, false), Teuchos::rcp(&Aic, false));
1582  problem.factorWithEquilibration(true);
1583  problem.solve();
1584  problem.unequilibrateLHS();
1585  }
1586 
1587 } // ComputeLocalEntries()
1588 
1589 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1591  CollapseStencil(const int type, const int orientation, const int /* collapseFlags */[3],
1592  Array<SC>& stencil) const {
1593  if (type == 2) { // Face stencil collapse
1594  // Let's hope things vectorize well inside this if statement
1595  if (orientation == 0) {
1596  for (LO i = 0; i < 9; ++i) {
1597  stencil[3 * i + 1] = stencil[3 * i] + stencil[3 * i + 1] + stencil[3 * i + 2];
1598  stencil[3 * i] = 0;
1599  stencil[3 * i + 2] = 0;
1600  }
1601  } else if (orientation == 1) {
1602  for (LO i = 0; i < 3; ++i) {
1603  stencil[9 * i + 3] = stencil[9 * i + 0] + stencil[9 * i + 3] + stencil[9 * i + 6];
1604  stencil[9 * i + 0] = 0;
1605  stencil[9 * i + 6] = 0;
1606  stencil[9 * i + 4] = stencil[9 * i + 1] + stencil[9 * i + 4] + stencil[9 * i + 7];
1607  stencil[9 * i + 1] = 0;
1608  stencil[9 * i + 7] = 0;
1609  stencil[9 * i + 5] = stencil[9 * i + 2] + stencil[9 * i + 5] + stencil[9 * i + 8];
1610  stencil[9 * i + 2] = 0;
1611  stencil[9 * i + 8] = 0;
1612  }
1613  } else if (orientation == 2) {
1614  for (LO i = 0; i < 9; ++i) {
1615  stencil[i + 9] = stencil[i] + stencil[i + 9] + stencil[i + 18];
1616  stencil[i] = 0;
1617  stencil[i + 18] = 0;
1618  }
1619  }
1620  } else if (type == 1) { // Edge stencil collapse
1621  SC tmp1 = 0, tmp2 = 0, tmp3 = 0;
1622  if (orientation == 0) {
1623  for (LO i = 0; i < 9; ++i) {
1624  tmp1 += stencil[0 + 3 * i];
1625  stencil[0 + 3 * i] = 0;
1626  tmp2 += stencil[1 + 3 * i];
1627  stencil[1 + 3 * i] = 0;
1628  tmp3 += stencil[2 + 3 * i];
1629  stencil[2 + 3 * i] = 0;
1630  }
1631  stencil[12] = tmp1;
1632  stencil[13] = tmp2;
1633  stencil[14] = tmp3;
1634  } else if (orientation == 1) {
1635  for (LO i = 0; i < 3; ++i) {
1636  tmp1 += stencil[0 + i] + stencil[9 + i] + stencil[18 + i];
1637  stencil[0 + i] = 0;
1638  stencil[9 + i] = 0;
1639  stencil[18 + i] = 0;
1640  tmp2 += stencil[3 + i] + stencil[12 + i] + stencil[21 + i];
1641  stencil[3 + i] = 0;
1642  stencil[12 + i] = 0;
1643  stencil[21 + i] = 0;
1644  tmp3 += stencil[6 + i] + stencil[15 + i] + stencil[24 + i];
1645  stencil[6 + i] = 0;
1646  stencil[15 + i] = 0;
1647  stencil[24 + i] = 0;
1648  }
1649  stencil[10] = tmp1;
1650  stencil[13] = tmp2;
1651  stencil[16] = tmp3;
1652  } else if (orientation == 2) {
1653  for (LO i = 0; i < 9; ++i) {
1654  tmp1 += stencil[i];
1655  stencil[i] = 0;
1656  tmp2 += stencil[i + 9];
1657  stencil[i + 9] = 0;
1658  tmp3 += stencil[i + 18];
1659  stencil[i + 18] = 0;
1660  }
1661  stencil[4] = tmp1;
1662  stencil[13] = tmp2;
1663  stencil[22] = tmp3;
1664  }
1665  }
1666 } // CollapseStencil
1667 
1668 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1670  FormatStencil(const LO BlkSize, const Array<bool> /* ghostInterface */, const LO /* ie */, const LO /* je */,
1671  const LO /* ke */, const ArrayView<const SC> rowValues, const Array<LO> /* elementNodesPerDir */,
1672  const int collapseFlags[3], const std::string stencilType, Array<SC>& stencil)
1673  const {
1674  if (stencilType == "reduced") {
1675  Array<int> fullStencilInds(7);
1676  fullStencilInds[0] = 4;
1677  fullStencilInds[1] = 10;
1678  fullStencilInds[2] = 12;
1679  fullStencilInds[3] = 13;
1680  fullStencilInds[4] = 14;
1681  fullStencilInds[5] = 16;
1682  fullStencilInds[6] = 22;
1683 
1684  // Create a mask array to automate mesh boundary processing
1685  Array<int> mask(7);
1686  int stencilSize = rowValues.size();
1687  if (collapseFlags[0] + collapseFlags[1] + collapseFlags[2] > 0) {
1688  if (stencilSize == 1) {
1689  // The assumption is made that if only one non-zero exists in the row
1690  // then this represent a Dirichlet BC being enforced.
1691  // One might want to zero out the corresponding row in the prolongator.
1692  mask[0] = 1;
1693  mask[1] = 1;
1694  mask[2] = 1;
1695  mask[4] = 1;
1696  mask[5] = 1;
1697  mask[6] = 1;
1698  } else {
1699  // Here we are looking at Neumann like BC where only a flux is prescribed
1700  // and less non-zeros will be present in the corresponding row.
1701  if (collapseFlags[2] == 1 || collapseFlags[2] == 3) {
1702  mask[0] = 1;
1703  }
1704  if (collapseFlags[2] == 2 || collapseFlags[2] == 3) {
1705  mask[6] = 1;
1706  }
1707  if (collapseFlags[1] == 1 || collapseFlags[1] == 3) {
1708  mask[1] = 1;
1709  }
1710  if (collapseFlags[1] == 2 || collapseFlags[1] == 3) {
1711  mask[5] = 1;
1712  }
1713  if (collapseFlags[0] == 1 || collapseFlags[0] == 3) {
1714  mask[2] = 1;
1715  }
1716  if (collapseFlags[0] == 2 || collapseFlags[0] == 3) {
1717  mask[4] = 1;
1718  }
1719  }
1720  }
1721 
1722  int offset = 0;
1723  for (int ind = 0; ind < 7; ++ind) {
1724  if (mask[ind] == 1) {
1725  for (int dof = 0; dof < BlkSize; ++dof) {
1726  stencil[BlkSize * fullStencilInds[ind] + dof] = 0.0;
1727  }
1728  ++offset; // The offset is used to stay within rowValues bounds
1729  } else {
1730  for (int dof = 0; dof < BlkSize; ++dof) {
1731  stencil[BlkSize * fullStencilInds[ind] + dof] = rowValues[BlkSize * (ind - offset) + dof];
1732  }
1733  }
1734  }
1735  } else if (stencilType == "full") {
1736  // Create a mask array to automate mesh boundary processing
1737  Array<int> mask(27);
1738  if (collapseFlags[2] == 1 || collapseFlags[2] == 3) {
1739  for (int ind = 0; ind < 9; ++ind) {
1740  mask[ind] = 1;
1741  }
1742  }
1743  if (collapseFlags[2] == 2 || collapseFlags[2] == 3) {
1744  for (int ind = 0; ind < 9; ++ind) {
1745  mask[18 + ind] = 1;
1746  }
1747  }
1748  if (collapseFlags[1] == 1 || collapseFlags[1] == 3) {
1749  for (int ind1 = 0; ind1 < 3; ++ind1) {
1750  for (int ind2 = 0; ind2 < 3; ++ind2) {
1751  mask[ind1 * 9 + ind2] = 1;
1752  }
1753  }
1754  }
1755  if (collapseFlags[1] == 2 || collapseFlags[1] == 3) {
1756  for (int ind1 = 0; ind1 < 3; ++ind1) {
1757  for (int ind2 = 0; ind2 < 3; ++ind2) {
1758  mask[ind1 * 9 + ind2 + 6] = 1;
1759  }
1760  }
1761  }
1762  if (collapseFlags[0] == 1 || collapseFlags[0] == 3) {
1763  for (int ind = 0; ind < 9; ++ind) {
1764  mask[3 * ind] = 1;
1765  }
1766  }
1767  if (collapseFlags[0] == 2 || collapseFlags[0] == 3) {
1768  for (int ind = 0; ind < 9; ++ind) {
1769  mask[3 * ind + 2] = 1;
1770  }
1771  }
1772 
1773  // Now the stencil is extracted and formated
1774  int offset = 0;
1775  for (LO ind = 0; ind < 27; ++ind) {
1776  if (mask[ind] == 0) {
1777  for (int dof = 0; dof < BlkSize; ++dof) {
1778  stencil[BlkSize * ind + dof] = 0.0;
1779  }
1780  ++offset; // The offset is used to stay within rowValues bounds
1781  } else {
1782  for (int dof = 0; dof < BlkSize; ++dof) {
1783  stencil[BlkSize * ind + dof] = rowValues[BlkSize * (ind - offset) + dof];
1784  }
1785  }
1786  }
1787  } // stencilTpye
1788 } // FormatStencil()
1789 
1790 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1792  const LO ie, const LO je, const LO ke,
1793  const Array<LO> elementNodesPerDir,
1794  int* type, LO& ind, int* orientation) const {
1795  // Initialize flags with -1 to be able to catch issues easily
1796  *type = -1, ind = 0, *orientation = -1;
1797  if ((ke == 0 || ke == elementNodesPerDir[2] - 1) && (je == 0 || je == elementNodesPerDir[1] - 1) && (ie == 0 || ie == elementNodesPerDir[0] - 1)) {
1798  // Corner node
1799  *type = 0;
1800  ind = 4 * ke / (elementNodesPerDir[2] - 1) + 2 * je / (elementNodesPerDir[1] - 1) + ie / (elementNodesPerDir[0] - 1);
1801  } else if (((ke == 0 || ke == elementNodesPerDir[2] - 1) && (je == 0 || je == elementNodesPerDir[1] - 1)) || ((ke == 0 || ke == elementNodesPerDir[2] - 1) && (ie == 0 || ie == elementNodesPerDir[0] - 1)) || ((je == 0 || je == elementNodesPerDir[1] - 1) && (ie == 0 || ie == elementNodesPerDir[0] - 1))) {
1802  // Edge node
1803  *type = 1;
1804  if (ke > 0) {
1805  ind += 2 * (elementNodesPerDir[0] - 2 + elementNodesPerDir[1] - 2);
1806  }
1807  if (ke == elementNodesPerDir[2] - 1) {
1808  ind += 4 * (elementNodesPerDir[2] - 2);
1809  }
1810  if ((ke == 0) || (ke == elementNodesPerDir[2] - 1)) { // je or ie edges
1811  if (je == 0) { // jlo ie edge
1812  *orientation = 0;
1813  ind += ie - 1;
1814  } else if (je == elementNodesPerDir[1] - 1) { // jhi ie edge
1815  *orientation = 0;
1816  ind += 2 * (elementNodesPerDir[1] - 2) + elementNodesPerDir[0] - 2 + ie - 1;
1817  } else { // ilo or ihi je edge
1818  *orientation = 1;
1819  ind += elementNodesPerDir[0] - 2 + 2 * (je - 1) + ie / (elementNodesPerDir[0] - 1);
1820  }
1821  } else { // ke edges
1822  *orientation = 2;
1823  ind += 4 * (ke - 1) + 2 * (je / (elementNodesPerDir[1] - 1)) + ie / (elementNodesPerDir[0] - 1);
1824  }
1825  } else if ((ke == 0 || ke == elementNodesPerDir[2] - 1) || (je == 0 || je == elementNodesPerDir[1] - 1) || (ie == 0 || ie == elementNodesPerDir[0] - 1)) {
1826  // Face node
1827  *type = 2;
1828  if (ke == 0) { // current node is on "bottom" face
1829  *orientation = 2;
1830  ind = (je - 1) * (elementNodesPerDir[0] - 2) + ie - 1;
1831  } else {
1832  // add nodes from "bottom" face
1833  ind += (elementNodesPerDir[1] - 2) * (elementNodesPerDir[0] - 2);
1834  // add nodes from side faces
1835  ind += 2 * (ke - 1) * (elementNodesPerDir[1] - 2 + elementNodesPerDir[0] - 2);
1836  if (ke == elementNodesPerDir[2] - 1) { // current node is on "top" face
1837  *orientation = 2;
1838  ind += (je - 1) * (elementNodesPerDir[0] - 2) + ie - 1;
1839  } else { // current node is on a side face
1840  if (je == 0) {
1841  *orientation = 1;
1842  ind += ie - 1;
1843  } else if (je == elementNodesPerDir[1] - 1) {
1844  *orientation = 1;
1845  ind += 2 * (elementNodesPerDir[1] - 2) + elementNodesPerDir[0] - 2 + ie - 1;
1846  } else {
1847  *orientation = 0;
1848  ind += elementNodesPerDir[0] - 2 + 2 * (je - 1) + ie / (elementNodesPerDir[0] - 1);
1849  }
1850  }
1851  }
1852  } else {
1853  // Interior node
1854  *type = 3;
1855  ind = (ke - 1) * (elementNodesPerDir[1] - 2) * (elementNodesPerDir[0] - 2) + (je - 1) * (elementNodesPerDir[0] - 2) + ie - 1;
1856  }
1857 } // GetNodeInfo()
1858 
1859 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1861  const typename Teuchos::Array<LocalOrdinal>::iterator& first1,
1862  const typename Teuchos::Array<LocalOrdinal>::iterator& last1,
1863  const typename Teuchos::Array<LocalOrdinal>::iterator& first2,
1864  const typename Teuchos::Array<LocalOrdinal>::iterator& /* last2 */) const {
1865  typedef typename std::iterator_traits<typename Teuchos::Array<LocalOrdinal>::iterator>::difference_type DT;
1866  DT n = last1 - first1;
1867  DT m = n / 2;
1869  while (m > z) {
1870  DT max = n - m;
1871  for (DT j = 0; j < max; j++) {
1872  for (DT k = j; k >= 0; k -= m) {
1873  if (first1[first2[k + m]] >= first1[first2[k]])
1874  break;
1875  std::swap(first2[k + m], first2[k]);
1876  }
1877  }
1878  m = m / 2;
1879  }
1880 }
1881 
1882 } // namespace MueLu
1883 
1884 #define MUELU_BLACKBOXPFACTORY_SHORT
1885 #endif // MUELU_BLACKBOXPFACTORY_DEF_HPP
void GetGeometricData(RCP< Xpetra::MultiVector< typename Teuchos::ScalarTraits< Scalar >::magnitudeType, LO, GO, NO > > &coordinates, const Array< LO > coarseRate, const Array< GO > gFineNodesPerDir, const Array< LO > lFineNodesPerDir, const LO BlkSize, Array< GO > &gIndices, Array< LO > &myOffset, Array< bool > &ghostInterface, Array< LO > &endRate, Array< GO > &gCoarseNodesPerDir, Array< LO > &lCoarseNodesPerDir, Array< LO > &glCoarseNodesPerDir, Array< GO > &ghostGIDs, Array< GO > &coarseNodesGIDs, Array< GO > &colGIDs, GO &gNumCoarseNodes, LO &lNumCoarseNodes, ArrayRCP< Array< typename Teuchos::ScalarTraits< Scalar >::magnitudeType > > coarseNodes, Array< int > &boundaryFlags, RCP< NodesIDs > ghostedCoarseNodes) const
GlobalOrdinal GO
T & get(const std::string &name, T def_value)
ArrayView< T > view(size_type offset, size_type size)
int multiply(ETransp transa, ETransp transb, ScalarType alpha, const SerialDenseMatrix< OrdinalType, ScalarType > &A, const SerialDenseMatrix< OrdinalType, ScalarType > &B, ScalarType beta)
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
void GetNodeInfo(const LO ie, const LO je, const LO ke, const Array< LO > elementNodesPerDir, int *type, LO &ind, int *orientation) const
void ComputeLocalEntries(const RCP< const Matrix > &Aghost, const Array< LO > coarseRate, const Array< LO > endRate, const LO BlkSize, const Array< LO > elemInds, const Array< LO > lCoarseElementsPerDir, const LO numDimensions, const Array< LO > lFineNodesPerDir, const Array< GO > gFineNodesPerDir, const Array< GO > gIndices, const Array< LO > lCoarseNodesPerDir, const Array< bool > ghostInterface, const Array< int > elementFlags, const std::string stencilType, const std::string blockStrategy, const Array< LO > elementNodesPerDir, const LO numNodesInElement, const Array< GO > colGIDs, Teuchos::SerialDenseMatrix< LO, SC > &Pi, Teuchos::SerialDenseMatrix< LO, SC > &Pf, Teuchos::SerialDenseMatrix< LO, SC > &Pe, Array< LO > &dofType, Array< LO > &lDofInd) const
size_type size() const
LocalOrdinal LO
One-liner description of what is happening.
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
static const NoFactory * get()
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:63
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
static Teuchos::RCP< Map< LocalOrdinal, GlobalOrdinal, Node > > Build(UnderlyingLib lib, global_size_t numGlobalElements, GlobalOrdinal indexBase, const Teuchos::RCP< const Teuchos::Comm< int >> &comm, LocalGlobal lg=Xpetra::GloballyDistributed)
void factorWithEquilibration(bool flag)
void FormatStencil(const LO BlkSize, const Array< bool > ghostInterface, const LO ie, const LO je, const LO ke, const ArrayView< const SC > rowValues, const Array< LO > elementNodesPerDir, const int collapseFlags[3], const std::string stencilType, Array< SC > &stencil) const
void resize(size_type new_size, const value_type &x=value_type())
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
iterator end()
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void CollapseStencil(const int type, const int orientation, const int collapseFlags[3], Array< SC > &stencil) const
static void sortCrsEntries(const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< LocalOrdinal > &CRS_colind, const Teuchos::ArrayView< Scalar > &CRS_vals, const UnderlyingLib lib)
int setVectors(const RCP< SerialDenseMatrix< OrdinalType, ScalarType > > &X, const RCP< SerialDenseMatrix< OrdinalType, ScalarType > > &B)
size_type size() const
Scalar SC
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
Node NO
static RCP< Xpetra::StridedMap< LocalOrdinal, GlobalOrdinal, Node > > Build(UnderlyingLib lib, global_size_t numGlobalElements, GlobalOrdinal indexBase, std::vector< size_t > &stridingInfo, const Teuchos::RCP< const Teuchos::Comm< int >> &comm, LocalOrdinal stridedBlockId=-1, GlobalOrdinal offset=0, LocalGlobal lg=Xpetra::GloballyDistributed)
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:51
static RCP< Matrix > Build(const RCP< const Map > &rowMap, size_t maxNumEntriesPerRow, Xpetra::ProfileType pftype=Xpetra::DynamicProfile)
Exception throws to report errors in the internal logical of the program.
int shape(OrdinalType numRows, OrdinalType numCols)
std::vector< T >::iterator iterator
iterator begin()
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
static RCP< Import< LocalOrdinal, GlobalOrdinal, Node > > Build(const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &source, const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &target, const Teuchos::RCP< Teuchos::ParameterList > &plist=Teuchos::null)
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need&#39;s value has been saved.
int setMatrix(const RCP< SerialDenseMatrix< OrdinalType, ScalarType > > &A)
void sh_sort_permute(const typename Teuchos::Array< LocalOrdinal >::iterator &first1, const typename Teuchos::Array< LocalOrdinal >::iterator &last1, const typename Teuchos::Array< LocalOrdinal >::iterator &first2, const typename Teuchos::Array< LocalOrdinal >::iterator &last2) const