MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_BlackBoxPFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_BLACKBOXPFACTORY_DEF_HPP
47 #define MUELU_BLACKBOXPFACTORY_DEF_HPP
48 
49 #include <stdlib.h>
50 #include <iomanip>
51 
52 // #include <Teuchos_LAPACK.hpp>
56 
58 #include <Xpetra_CrsMatrixWrap.hpp>
59 #include <Xpetra_ImportFactory.hpp>
60 #include <Xpetra_Matrix.hpp>
61 #include <Xpetra_MapFactory.hpp>
62 #include <Xpetra_MultiVectorFactory.hpp>
63 #include <Xpetra_VectorFactory.hpp>
64 
65 #include <Xpetra_IO.hpp>
66 
68 
69 #include "MueLu_Monitor.hpp"
70 
71 namespace MueLu {
72 
73 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
75  RCP<ParameterList> validParamList = rcp(new ParameterList());
76 
77  // Coarsen can come in two forms, either a single char that will be interpreted as an integer
78  // which is used as the coarsening rate in every spatial dimentions,
79  // or it can be a longer string that will then be interpreted as an array of integers.
80  // By default coarsen is set as "{2}", hence a coarsening rate of 2 in every spatial dimension
81  // is the default setting!
82  validParamList->set<std::string>("Coarsen", "{3}", "Coarsening rate in all spatial dimensions");
83  validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
84  validParamList->set<RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
85  validParamList->set<RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory for coorindates");
86  validParamList->set<RCP<const FactoryBase> >("gNodesPerDim", Teuchos::null, "Number of nodes per spatial dimmension provided by CoordinatesTransferFactory.");
87  validParamList->set<RCP<const FactoryBase> >("lNodesPerDim", Teuchos::null, "Number of nodes per spatial dimmension provided by CoordinatesTransferFactory.");
88  validParamList->set<std::string>("stencil type", "full", "You can use two type of stencils: full and reduced, that correspond to 27 and 7 points stencils respectively in 3D.");
89  validParamList->set<std::string>("block strategy", "coupled", "The strategy used to handle systems of PDEs can be: coupled or uncoupled.");
90 
91  return validParamList;
92 }
93 
94 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
96  Level& /* coarseLevel */)
97  const {
98  Input(fineLevel, "A");
99  Input(fineLevel, "Nullspace");
100  Input(fineLevel, "Coordinates");
101  // Request the global number of nodes per dimensions
102  if (fineLevel.GetLevelID() == 0) {
103  if (fineLevel.IsAvailable("gNodesPerDim", NoFactory::get())) {
104  fineLevel.DeclareInput("gNodesPerDim", NoFactory::get(), this);
105  } else {
106  TEUCHOS_TEST_FOR_EXCEPTION(fineLevel.IsAvailable("gNodesPerDim", NoFactory::get()),
108  "gNodesPerDim was not provided by the user on level0!");
109  }
110  } else {
111  Input(fineLevel, "gNodesPerDim");
112  }
113 
114  // Request the local number of nodes per dimensions
115  if (fineLevel.GetLevelID() == 0) {
116  if (fineLevel.IsAvailable("lNodesPerDim", NoFactory::get())) {
117  fineLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this);
118  } else {
119  TEUCHOS_TEST_FOR_EXCEPTION(fineLevel.IsAvailable("lNodesPerDim", NoFactory::get()),
121  "lNodesPerDim was not provided by the user on level0!");
122  }
123  } else {
124  Input(fineLevel, "lNodesPerDim");
125  }
126 }
127 
128 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
130  Level& coarseLevel) const {
131  return BuildP(fineLevel, coarseLevel);
132 }
133 
134 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
136  Level& coarseLevel) const {
137  FactoryMonitor m(*this, "Build", coarseLevel);
138 
139  // Get parameter list
140  const ParameterList& pL = GetParameterList();
141 
142  // obtain general variables
143  RCP<Matrix> A = Get<RCP<Matrix> >(fineLevel, "A");
144  RCP<MultiVector> fineNullspace = Get<RCP<MultiVector> >(fineLevel, "Nullspace");
146  Get<RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType, LO, GO, NO> > >(fineLevel, "Coordinates");
147  LO numDimensions = coordinates->getNumVectors();
148  LO BlkSize = A->GetFixedBlockSize();
149 
150  // Get fine level geometric data: g(l)FineNodesPerDir and g(l)NumFineNodes
151  Array<GO> gFineNodesPerDir(3);
152  Array<LO> lFineNodesPerDir(3);
153  // Get the number of points in each direction
154  if (fineLevel.GetLevelID() == 0) {
155  gFineNodesPerDir = fineLevel.Get<Array<GO> >("gNodesPerDim", NoFactory::get());
156  lFineNodesPerDir = fineLevel.Get<Array<LO> >("lNodesPerDim", NoFactory::get());
157  } else {
158  // Loading global number of nodes per diretions
159  gFineNodesPerDir = Get<Array<GO> >(fineLevel, "gNodesPerDim");
160 
161  // Loading local number of nodes per diretions
162  lFineNodesPerDir = Get<Array<LO> >(fineLevel, "lNodesPerDim");
163  }
164  for (LO i = 0; i < 3; ++i) {
165  if (gFineNodesPerDir[i] == 0) {
166  GetOStream(Runtime0) << "gNodesPerDim in direction " << i << " is set to 1 from 0"
167  << std::endl;
168  gFineNodesPerDir[i] = 1;
169  }
170  if (lFineNodesPerDir[i] == 0) {
171  GetOStream(Runtime0) << "lNodesPerDim in direction " << i << " is set to 1 from 0"
172  << std::endl;
173  lFineNodesPerDir[i] = 1;
174  }
175  }
176  GO gNumFineNodes = gFineNodesPerDir[2] * gFineNodesPerDir[1] * gFineNodesPerDir[0];
177  LO lNumFineNodes = lFineNodesPerDir[2] * lFineNodesPerDir[1] * lFineNodesPerDir[0];
178 
179  // Get the coarsening rate
180  std::string coarsenRate = pL.get<std::string>("Coarsen");
181  Array<LO> coarseRate(3);
182  {
183  Teuchos::Array<LO> crates;
184  try {
185  crates = Teuchos::fromStringToArray<LO>(coarsenRate);
186  } catch (const Teuchos::InvalidArrayStringRepresentation& e) {
187  GetOStream(Errors, -1) << " *** Coarsen must be a string convertible into an array! *** "
188  << std::endl;
189  throw e;
190  }
191  TEUCHOS_TEST_FOR_EXCEPTION((crates.size() > 1) && (crates.size() < numDimensions),
193  "Coarsen must have at least as many components as the number of"
194  " spatial dimensions in the problem.");
195  for (LO i = 0; i < 3; ++i) {
196  if (i < numDimensions) {
197  if (crates.size() == 1) {
198  coarseRate[i] = crates[0];
199  } else if (i < crates.size()) {
200  coarseRate[i] = crates[i];
201  } else {
202  GetOStream(Errors, -1) << " *** Coarsen must be at least as long as the number of"
203  " spatial dimensions! *** "
204  << std::endl;
206  " *** Coarsen must be at least as long as the number of"
207  " spatial dimensions! *** \n");
208  }
209  } else {
210  coarseRate[i] = 1;
211  }
212  }
213  } // End of scope for crates
214 
215  // Get the stencil type used for discretization
216  const std::string stencilType = pL.get<std::string>("stencil type");
217  if (stencilType != "full" && stencilType != "reduced") {
218  GetOStream(Errors, -1) << " *** stencil type must be set to: full or reduced, any other value "
219  "is trated as an error! *** "
220  << std::endl;
221  throw Exceptions::RuntimeError(" *** stencil type is neither full, nor reduced! *** \n");
222  }
223 
224  // Get the strategy for PDE systems
225  const std::string blockStrategy = pL.get<std::string>("block strategy");
226  if (blockStrategy != "coupled" && blockStrategy != "uncoupled") {
227  GetOStream(Errors, -1) << " *** block strategy must be set to: coupled or uncoupled, any other "
228  "value is trated as an error! *** "
229  << std::endl;
230  throw Exceptions::RuntimeError(" *** block strategy is neither coupled, nor uncoupled! *** \n");
231  }
232 
233  GO gNumCoarseNodes = 0;
234  LO lNumCoarseNodes = 0;
235  Array<GO> gIndices(3), gCoarseNodesPerDir(3), ghostGIDs, coarseNodesGIDs, colGIDs;
236  Array<LO> myOffset(3), lCoarseNodesPerDir(3), glCoarseNodesPerDir(3), endRate(3);
237  Array<bool> ghostInterface(6);
238  Array<int> boundaryFlags(3);
241  for (LO dim = 0; dim < numDimensions; ++dim) {
242  fineNodes[dim] = coordinates->getData(dim)();
243  }
244 
245  // This struct stores PIDs, LIDs and GIDs on the fine mesh and GIDs on the coarse mesh.
246  RCP<NodesIDs> ghostedCoarseNodes = rcp(new NodesIDs{});
247 
248  GetGeometricData(coordinates, coarseRate, gFineNodesPerDir, lFineNodesPerDir, BlkSize, // inputs
249  gIndices, myOffset, ghostInterface, endRate, gCoarseNodesPerDir, // outputs
250  lCoarseNodesPerDir, glCoarseNodesPerDir, ghostGIDs, coarseNodesGIDs, colGIDs,
251  gNumCoarseNodes, lNumCoarseNodes, coarseNodes, boundaryFlags,
252  ghostedCoarseNodes);
253 
254  // Create the MultiVector of coarse coordinates
255  Xpetra::UnderlyingLib lib = coordinates->getMap()->lib();
256  RCP<const Map> coarseCoordsMap = MapFactory::Build(lib,
257  gNumCoarseNodes,
258  coarseNodesGIDs.view(0, lNumCoarseNodes),
259  coordinates->getMap()->getIndexBase(),
260  coordinates->getMap()->getComm());
262  for (LO dim = 0; dim < numDimensions; ++dim) {
263  coarseCoords[dim] = coarseNodes[dim]();
264  }
265  RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType, LO, GO, NO> > coarseCoordinates =
266  Xpetra::MultiVectorFactory<typename Teuchos::ScalarTraits<Scalar>::magnitudeType, LO, GO, NO>::Build(coarseCoordsMap, coarseCoords(),
267  numDimensions);
268 
269  // Now create a new matrix: Aghost that contains all the data
270  // locally needed to compute the local part of the prolongator.
271  // Here assuming that all the coarse nodes o and fine nodes +
272  // are local then all the data associated with the coarse
273  // nodes O and the fine nodes * needs to be imported.
274  //
275  // *--*--*--*--*--*--*--*
276  // | | | | | | | |
277  // o--+--+--o--+--+--O--*
278  // | | | | | | | |
279  // +--+--+--+--+--+--*--*
280  // | | | | | | | |
281  // +--+--+--+--+--+--*--*
282  // | | | | | | | |
283  // o--+--+--o--+--+--O--*
284  // | | | | | | | |
285  // +--+--+--+--+--+--*--*
286  // | | | | | | | |
287  // *--*--*--*--*--*--*--*
288  // | | | | | | | |
289  // O--*--*--O--*--*--O--*
290  //
291  // Creating that local matrix is easy enough using proper range
292  // and domain maps to import data from A. Note that with this
293  // approach we reorder the local entries using the domain map and
294  // can subsequently compute the prolongator without reordering.
295  // As usual we need to be careful about any coarsening rate
296  // change at the boundary!
297 
298  // The ingredients needed are an importer, a range map and a domain map
299  Array<GO> ghostRowGIDs, ghostColGIDs, nodeSteps(3);
300  nodeSteps[0] = 1;
301  nodeSteps[1] = gFineNodesPerDir[0];
302  nodeSteps[2] = gFineNodesPerDir[0] * gFineNodesPerDir[1];
303  Array<LO> glFineNodesPerDir(3);
304  GO startingGID = A->getRowMap()->getMinGlobalIndex();
305  for (LO dim = 0; dim < 3; ++dim) {
306  LO numCoarseNodes = 0;
307  if (dim < numDimensions) {
308  startingGID -= myOffset[dim] * nodeSteps[dim];
309  numCoarseNodes = lCoarseNodesPerDir[dim];
310  if (ghostInterface[2 * dim]) {
311  ++numCoarseNodes;
312  }
313  if (ghostInterface[2 * dim + 1]) {
314  ++numCoarseNodes;
315  }
316  if (gIndices[dim] + lFineNodesPerDir[dim] == gFineNodesPerDir[dim]) {
317  glFineNodesPerDir[dim] = (numCoarseNodes - 2) * coarseRate[dim] + endRate[dim] + 1;
318  } else {
319  glFineNodesPerDir[dim] = (numCoarseNodes - 1) * coarseRate[dim] + 1;
320  }
321  } else {
322  glFineNodesPerDir[dim] = 1;
323  }
324  }
325  ghostRowGIDs.resize(glFineNodesPerDir[0] * glFineNodesPerDir[1] * glFineNodesPerDir[2] * BlkSize);
326  for (LO k = 0; k < glFineNodesPerDir[2]; ++k) {
327  for (LO j = 0; j < glFineNodesPerDir[1]; ++j) {
328  for (LO i = 0; i < glFineNodesPerDir[0]; ++i) {
329  for (LO l = 0; l < BlkSize; ++l) {
330  ghostRowGIDs[(k * glFineNodesPerDir[1] * glFineNodesPerDir[0] + j * glFineNodesPerDir[0] + i) * BlkSize + l] = startingGID + (k * gFineNodesPerDir[1] * gFineNodesPerDir[0] + j * gFineNodesPerDir[0] + i) * BlkSize + l;
331  }
332  }
333  }
334  }
335 
336  // Looking at the above loops it is easy to find startingGID for the ghostColGIDs
337  Array<GO> startingGlobalIndices(numDimensions), dimStride(numDimensions);
338  Array<GO> startingColIndices(numDimensions), finishingColIndices(numDimensions);
339  GO colMinGID = 0;
340  Array<LO> colRange(numDimensions);
341  dimStride[0] = 1;
342  for (int dim = 1; dim < numDimensions; ++dim) {
343  dimStride[dim] = dimStride[dim - 1] * gFineNodesPerDir[dim - 1];
344  }
345  {
346  GO tmp = startingGID;
347  for (int dim = numDimensions; dim > 0; --dim) {
348  startingGlobalIndices[dim - 1] = tmp / dimStride[dim - 1];
349  tmp = tmp % dimStride[dim - 1];
350 
351  if (startingGlobalIndices[dim - 1] > 0) {
352  startingColIndices[dim - 1] = startingGlobalIndices[dim - 1] - 1;
353  }
354  if (startingGlobalIndices[dim - 1] + glFineNodesPerDir[dim - 1] < gFineNodesPerDir[dim - 1]) {
355  finishingColIndices[dim - 1] = startingGlobalIndices[dim - 1] + glFineNodesPerDir[dim - 1];
356  } else {
357  finishingColIndices[dim - 1] = startingGlobalIndices[dim - 1] + glFineNodesPerDir[dim - 1] - 1;
358  }
359  colRange[dim - 1] = finishingColIndices[dim - 1] - startingColIndices[dim - 1] + 1;
360  colMinGID += startingColIndices[dim - 1] * dimStride[dim - 1];
361  }
362  }
363  ghostColGIDs.resize(colRange[0] * colRange[1] * colRange[2] * BlkSize);
364  for (LO k = 0; k < colRange[2]; ++k) {
365  for (LO j = 0; j < colRange[1]; ++j) {
366  for (LO i = 0; i < colRange[0]; ++i) {
367  for (LO l = 0; l < BlkSize; ++l) {
368  ghostColGIDs[(k * colRange[1] * colRange[0] + j * colRange[0] + i) * BlkSize + l] = colMinGID + (k * gFineNodesPerDir[1] * gFineNodesPerDir[0] + j * gFineNodesPerDir[0] + i) * BlkSize + l;
369  }
370  }
371  }
372  }
373 
374  RCP<const Map> ghostedRowMap = Xpetra::MapFactory<LO, GO, NO>::Build(A->getRowMap()->lib(),
376  ghostRowGIDs(),
377  A->getRowMap()->getIndexBase(),
378  A->getRowMap()->getComm());
379  RCP<const Map> ghostedColMap = Xpetra::MapFactory<LO, GO, NO>::Build(A->getRowMap()->lib(),
381  ghostColGIDs(),
382  A->getRowMap()->getIndexBase(),
383  A->getRowMap()->getComm());
384  RCP<const Import> ghostImporter = Xpetra::ImportFactory<LO, GO, NO>::Build(A->getRowMap(),
385  ghostedRowMap);
387  ghostedRowMap,
388  ghostedRowMap);
389 
390  // Create the maps and data structures for the projection matrix
391  RCP<const Map> rowMapP = A->getDomainMap();
392 
393  RCP<const Map> domainMapP;
394 
395  RCP<const Map> colMapP;
396 
397  // At this point we need to create the column map which is a delicate operation.
398  // The entries in that map need to be ordered as follows:
399  // 1) first owned entries ordered by LID
400  // 2) second order the remaining entries by PID
401  // 3) entries with the same remote PID are ordered by GID.
402  // One piece of good news: lNumCoarseNodes is the number of ownedNodes and lNumGhostNodes
403  // is the number of remote nodes. The sorting can be limited to remote nodes
404  // as the owned ones are alreaded ordered by LID!
405 
406  LO lNumGhostedNodes = ghostedCoarseNodes->GIDs.size();
407  {
408  std::vector<NodeID> colMapOrdering(lNumGhostedNodes);
409  for (LO ind = 0; ind < lNumGhostedNodes; ++ind) {
410  colMapOrdering[ind].GID = ghostedCoarseNodes->GIDs[ind];
411  if (ghostedCoarseNodes->PIDs[ind] == rowMapP->getComm()->getRank()) {
412  colMapOrdering[ind].PID = -1;
413  } else {
414  colMapOrdering[ind].PID = ghostedCoarseNodes->PIDs[ind];
415  }
416  colMapOrdering[ind].LID = ghostedCoarseNodes->LIDs[ind];
417  colMapOrdering[ind].lexiInd = ind;
418  }
419  std::sort(colMapOrdering.begin(), colMapOrdering.end(),
420  [](NodeID a, NodeID b) -> bool {
421  return ((a.PID < b.PID) || ((a.PID == b.PID) && (a.GID < b.GID)));
422  });
423 
424  colGIDs.resize(BlkSize * lNumGhostedNodes);
425  for (LO ind = 0; ind < lNumGhostedNodes; ++ind) {
426  // Save the permutation calculated to go from Lexicographic indexing to column map indexing
427  ghostedCoarseNodes->colInds[colMapOrdering[ind].lexiInd] = ind;
428  for (LO dof = 0; dof < BlkSize; ++dof) {
429  colGIDs[BlkSize * ind + dof] = BlkSize * colMapOrdering[ind].GID + dof;
430  }
431  }
432  domainMapP = Xpetra::MapFactory<LO, GO, NO>::Build(rowMapP->lib(),
433  BlkSize * gNumCoarseNodes,
434  colGIDs.view(0, BlkSize * lNumCoarseNodes),
435  rowMapP->getIndexBase(),
436  rowMapP->getComm());
437  colMapP = Xpetra::MapFactory<LO, GO, NO>::Build(rowMapP->lib(),
439  colGIDs.view(0, colGIDs.size()),
440  rowMapP->getIndexBase(),
441  rowMapP->getComm());
442  } // End of scope for colMapOrdering and colGIDs
443 
444  std::vector<size_t> strideInfo(1);
445  strideInfo[0] = BlkSize;
446  RCP<const Map> stridedDomainMapP = Xpetra::StridedMapFactory<LO, GO, NO>::Build(domainMapP,
447  strideInfo);
448 
449  GO gnnzP = 0;
450  LO lnnzP = 0;
451  // coarse points have one nnz per row
452  gnnzP += gNumCoarseNodes;
453  lnnzP += lNumCoarseNodes;
454  // add all other points multiplying by 2^numDimensions
455  gnnzP += (gNumFineNodes - gNumCoarseNodes) * std::pow(2, numDimensions);
456  lnnzP += (lNumFineNodes - lNumCoarseNodes) * std::pow(2, numDimensions);
457  // finally multiply by the number of dofs per node
458  gnnzP = gnnzP * BlkSize;
459  lnnzP = lnnzP * BlkSize;
460 
461  // Create the matrix itself using the above maps
462  RCP<Matrix> P;
463  P = rcp(new CrsMatrixWrap(rowMapP, colMapP, 0));
464  RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
465 
466  ArrayRCP<size_t> iaP;
467  ArrayRCP<LO> jaP;
468  ArrayRCP<SC> valP;
469 
470  PCrs->allocateAllValues(lnnzP, iaP, jaP, valP);
471 
472  ArrayView<size_t> ia = iaP();
473  ArrayView<LO> ja = jaP();
474  ArrayView<SC> val = valP();
475  ia[0] = 0;
476 
477  LO numCoarseElements = 1;
478  Array<LO> lCoarseElementsPerDir(3);
479  for (LO dim = 0; dim < numDimensions; ++dim) {
480  lCoarseElementsPerDir[dim] = lCoarseNodesPerDir[dim];
481  if (ghostInterface[2 * dim]) {
482  ++lCoarseElementsPerDir[dim];
483  }
484  if (!ghostInterface[2 * dim + 1]) {
485  --lCoarseElementsPerDir[dim];
486  }
487  numCoarseElements = numCoarseElements * lCoarseElementsPerDir[dim];
488  }
489 
490  for (LO dim = numDimensions; dim < 3; ++dim) {
491  lCoarseElementsPerDir[dim] = 1;
492  }
493 
494  // Loop over the coarse elements
495  Array<int> elementFlags(3);
496  Array<LO> elemInds(3), elementNodesPerDir(3), glElementRefTuple(3);
497  Array<LO> glElementRefTupleCG(3), glElementCoarseNodeCG(8);
498  const int numCoarseNodesInElement = std::pow(2, numDimensions);
499  const int nnzPerCoarseNode = (blockStrategy == "coupled") ? BlkSize : 1;
500  const int numRowsPerPoint = BlkSize;
501  for (elemInds[2] = 0; elemInds[2] < lCoarseElementsPerDir[2]; ++elemInds[2]) {
502  for (elemInds[1] = 0; elemInds[1] < lCoarseElementsPerDir[1]; ++elemInds[1]) {
503  for (elemInds[0] = 0; elemInds[0] < lCoarseElementsPerDir[0]; ++elemInds[0]) {
504  elementFlags[0] = 0;
505  elementFlags[1] = 0;
506  elementFlags[2] = 0;
507  for (int dim = 0; dim < 3; ++dim) {
508  // Detect boundary conditions on the element and set corresponding flags.
509  if (elemInds[dim] == 0 && elemInds[dim] == lCoarseElementsPerDir[dim] - 1) {
510  elementFlags[dim] = boundaryFlags[dim];
511  } else if (elemInds[dim] == 0 && (boundaryFlags[dim] == 1 || boundaryFlags[dim] == 3)) {
512  elementFlags[dim] += 1;
513  } else if ((elemInds[dim] == lCoarseElementsPerDir[dim] - 1) && (boundaryFlags[dim] == 2 || boundaryFlags[dim] == 3)) {
514  elementFlags[dim] += 2;
515  } else {
516  elementFlags[dim] = 0;
517  }
518 
519  // Compute the number of nodes in the current element.
520  if (dim < numDimensions) {
521  if ((elemInds[dim] == lCoarseElementsPerDir[dim]) && (gIndices[dim] + lFineNodesPerDir[dim] == gFineNodesPerDir[dim])) {
522  elementNodesPerDir[dim] = endRate[dim] + 1;
523  } else {
524  elementNodesPerDir[dim] = coarseRate[dim] + 1;
525  }
526  } else {
527  elementNodesPerDir[dim] = 1;
528  }
529 
530  // Get the lowest tuple of the element using the ghosted local coordinate system
531  glElementRefTuple[dim] = elemInds[dim] * coarseRate[dim];
532  glElementRefTupleCG[dim] = elemInds[dim];
533  }
534 
535  // Now get the column map indices corresponding to the dofs associated with the current
536  // element's coarse nodes.
537  for (typename Array<LO>::size_type elem = 0; elem < glElementCoarseNodeCG.size(); ++elem) {
538  glElementCoarseNodeCG[elem] = glElementRefTupleCG[2] * glCoarseNodesPerDir[1] * glCoarseNodesPerDir[0] + glElementRefTupleCG[1] * glCoarseNodesPerDir[0] + glElementRefTupleCG[0];
539  }
540  glElementCoarseNodeCG[4] += glCoarseNodesPerDir[1] * glCoarseNodesPerDir[0];
541  glElementCoarseNodeCG[5] += glCoarseNodesPerDir[1] * glCoarseNodesPerDir[0];
542  glElementCoarseNodeCG[6] += glCoarseNodesPerDir[1] * glCoarseNodesPerDir[0];
543  glElementCoarseNodeCG[7] += glCoarseNodesPerDir[1] * glCoarseNodesPerDir[0];
544 
545  glElementCoarseNodeCG[2] += glCoarseNodesPerDir[0];
546  glElementCoarseNodeCG[3] += glCoarseNodesPerDir[0];
547  glElementCoarseNodeCG[6] += glCoarseNodesPerDir[0];
548  glElementCoarseNodeCG[7] += glCoarseNodesPerDir[0];
549 
550  glElementCoarseNodeCG[1] += 1;
551  glElementCoarseNodeCG[3] += 1;
552  glElementCoarseNodeCG[5] += 1;
553  glElementCoarseNodeCG[7] += 1;
554 
555  LO numNodesInElement = elementNodesPerDir[0] * elementNodesPerDir[1] * elementNodesPerDir[2];
556  // LO elementOffset = elemInds[2]*coarseRate[2]*glFineNodesPerDir[1]*glFineNodesPerDir[0]
557  // + elemInds[1]*coarseRate[1]*glFineNodesPerDir[0] + elemInds[0]*coarseRate[0];
558 
559  // Compute the element prolongator
561  Array<LO> dofType(numNodesInElement * BlkSize), lDofInd(numNodesInElement * BlkSize);
562  ComputeLocalEntries(Aghost, coarseRate, endRate, BlkSize, elemInds, lCoarseElementsPerDir,
563  numDimensions, glFineNodesPerDir, gFineNodesPerDir, gIndices,
564  lCoarseNodesPerDir, ghostInterface, elementFlags, stencilType,
565  blockStrategy, elementNodesPerDir, numNodesInElement, colGIDs,
566  Pi, Pf, Pe, dofType, lDofInd);
567 
568  // Find ghosted LID associated with nodes in the element and eventually which of these
569  // nodes are ghosts, this information is used to fill the local prolongator.
570  Array<LO> lNodeLIDs(numNodesInElement);
571  {
572  Array<LO> lNodeTuple(3), nodeInd(3);
573  for (nodeInd[2] = 0; nodeInd[2] < elementNodesPerDir[2]; ++nodeInd[2]) {
574  for (nodeInd[1] = 0; nodeInd[1] < elementNodesPerDir[1]; ++nodeInd[1]) {
575  for (nodeInd[0] = 0; nodeInd[0] < elementNodesPerDir[0]; ++nodeInd[0]) {
576  int stencilLength = 0;
577  if ((nodeInd[0] == 0 || nodeInd[0] == elementNodesPerDir[0] - 1) &&
578  (nodeInd[1] == 0 || nodeInd[1] == elementNodesPerDir[1] - 1) &&
579  (nodeInd[2] == 0 || nodeInd[2] == elementNodesPerDir[2] - 1)) {
580  stencilLength = 1;
581  } else {
582  stencilLength = std::pow(2, numDimensions);
583  }
584  LO nodeElementInd = nodeInd[2] * elementNodesPerDir[1] * elementNodesPerDir[1] + nodeInd[1] * elementNodesPerDir[0] + nodeInd[0];
585  for (int dim = 0; dim < 3; ++dim) {
586  lNodeTuple[dim] = glElementRefTuple[dim] - myOffset[dim] + nodeInd[dim];
587  }
588  if (lNodeTuple[0] < 0 || lNodeTuple[1] < 0 || lNodeTuple[2] < 0 ||
589  lNodeTuple[0] > lFineNodesPerDir[0] - 1 ||
590  lNodeTuple[1] > lFineNodesPerDir[1] - 1 ||
591  lNodeTuple[2] > lFineNodesPerDir[2] - 1) {
592  // This flags the ghosts nodes used for prolongator calculation but for which
593  // we do not own the row, hence we won't fill these values on this rank.
594  lNodeLIDs[nodeElementInd] = -1;
595  } else if ((nodeInd[0] == 0 && elemInds[0] != 0) ||
596  (nodeInd[1] == 0 && elemInds[1] != 0) ||
597  (nodeInd[2] == 0 && elemInds[2] != 0)) {
598  // This flags nodes that are owned but common to two coarse elements and that
599  // were already filled by another element, we don't want to fill them twice so
600  // we skip them
601  lNodeLIDs[nodeElementInd] = -2;
602  } else {
603  // The remaining nodes are locally owned and we need to fill the coresponding
604  // rows of the prolongator
605 
606  // First we need to find which row needs to be filled
607  lNodeLIDs[nodeElementInd] = lNodeTuple[2] * lFineNodesPerDir[1] * lFineNodesPerDir[0] + lNodeTuple[1] * lFineNodesPerDir[0] + lNodeTuple[0];
608 
609  // We also compute the row offset using arithmetic to ensure that we can loop
610  // easily over the nodes in a macro-element as well as facilitate on-node
611  // parallelization. The node serial code could be rewritten with two loops over
612  // the local part of the mesh to avoid the costly integer divisions...
613  Array<LO> refCoarsePointTuple(3);
614  for (int dim = 2; dim > -1; --dim) {
615  if (dim == 0) {
616  refCoarsePointTuple[dim] = (lNodeTuple[dim] + myOffset[dim]) / coarseRate[dim];
617  if (myOffset[dim] == 0) {
618  ++refCoarsePointTuple[dim];
619  }
620  } else {
621  // Note: no need for magnitudeType here, just use double because these things are LO's
622  refCoarsePointTuple[dim] =
623  std::ceil(static_cast<double>(lNodeTuple[dim] + myOffset[dim]) / coarseRate[dim]);
624  }
625  if ((lNodeTuple[dim] + myOffset[dim]) % coarseRate[dim] > 0) {
626  break;
627  }
628  }
629  const LO numCoarsePoints = refCoarsePointTuple[0] + refCoarsePointTuple[1] * lCoarseNodesPerDir[0] + refCoarsePointTuple[2] * lCoarseNodesPerDir[1] * lCoarseNodesPerDir[0];
630  const LO numFinePoints = lNodeLIDs[nodeElementInd] + 1;
631 
632  // The below formula computes the rowPtr for row 'n+BlkSize' and we are about to
633  // fill row 'n' to 'n+BlkSize'.
634  size_t rowPtr = (numFinePoints - numCoarsePoints) * numRowsPerPoint * numCoarseNodesInElement * nnzPerCoarseNode + numCoarsePoints * numRowsPerPoint;
635  if (dofType[nodeElementInd * BlkSize] == 0) {
636  // Check if current node is a coarse point
637  rowPtr = rowPtr - numRowsPerPoint;
638  } else {
639  rowPtr = rowPtr - numRowsPerPoint * numCoarseNodesInElement * nnzPerCoarseNode;
640  }
641 
642  for (int dof = 0; dof < BlkSize; ++dof) {
643  // Now we loop over the stencil and fill the column indices and row values
644  int cornerInd = 0;
645  switch (dofType[nodeElementInd * BlkSize + dof]) {
646  case 0: // Corner node
647  if (nodeInd[2] == elementNodesPerDir[2] - 1) {
648  cornerInd += 4;
649  }
650  if (nodeInd[1] == elementNodesPerDir[1] - 1) {
651  cornerInd += 2;
652  }
653  if (nodeInd[0] == elementNodesPerDir[0] - 1) {
654  cornerInd += 1;
655  }
656  ia[lNodeLIDs[nodeElementInd] * BlkSize + dof + 1] = rowPtr + dof + 1;
657  ja[rowPtr + dof] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[cornerInd]] * BlkSize + dof;
658  val[rowPtr + dof] = 1.0;
659  break;
660 
661  case 1: // Edge node
662  ia[lNodeLIDs[nodeElementInd] * BlkSize + dof + 1] = rowPtr + (dof + 1) * numCoarseNodesInElement * nnzPerCoarseNode;
663  for (int ind1 = 0; ind1 < stencilLength; ++ind1) {
664  if (blockStrategy == "coupled") {
665  for (int ind2 = 0; ind2 < BlkSize; ++ind2) {
666  size_t lRowPtr = rowPtr + dof * numCoarseNodesInElement * nnzPerCoarseNode + ind1 * BlkSize + ind2;
667  ja[lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]] * BlkSize + ind2;
668  val[lRowPtr] = Pe(lDofInd[nodeElementInd * BlkSize + dof],
669  ind1 * BlkSize + ind2);
670  }
671  } else if (blockStrategy == "uncoupled") {
672  size_t lRowPtr = rowPtr + dof * numCoarseNodesInElement * nnzPerCoarseNode + ind1;
673  ja[rowPtr + dof] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]] * BlkSize + dof;
674  val[lRowPtr] = Pe(lDofInd[nodeElementInd * BlkSize + dof],
675  ind1 * BlkSize + dof);
676  }
677  }
678  break;
679 
680  case 2: // Face node
681  ia[lNodeLIDs[nodeElementInd] * BlkSize + dof + 1] = rowPtr + (dof + 1) * numCoarseNodesInElement * nnzPerCoarseNode;
682  for (int ind1 = 0; ind1 < stencilLength; ++ind1) {
683  if (blockStrategy == "coupled") {
684  for (int ind2 = 0; ind2 < BlkSize; ++ind2) {
685  size_t lRowPtr = rowPtr + dof * numCoarseNodesInElement * nnzPerCoarseNode + ind1 * BlkSize + ind2;
686  ja[lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]] * BlkSize + ind2;
687  val[lRowPtr] = Pf(lDofInd[nodeElementInd * BlkSize + dof],
688  ind1 * BlkSize + ind2);
689  }
690  } else if (blockStrategy == "uncoupled") {
691  size_t lRowPtr = rowPtr + dof * numCoarseNodesInElement * nnzPerCoarseNode + ind1;
692  // ja [lRowPtr] = colGIDs[glElementCoarseNodeCG[ind1]*BlkSize + dof];
693  ja[lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]] * BlkSize + dof;
694  val[lRowPtr] = Pf(lDofInd[nodeElementInd * BlkSize + dof],
695  ind1 * BlkSize + dof);
696  }
697  }
698  break;
699 
700  case 3: // Interior node
701  ia[lNodeLIDs[nodeElementInd] * BlkSize + dof + 1] = rowPtr + (dof + 1) * numCoarseNodesInElement * nnzPerCoarseNode;
702  for (int ind1 = 0; ind1 < stencilLength; ++ind1) {
703  if (blockStrategy == "coupled") {
704  for (int ind2 = 0; ind2 < BlkSize; ++ind2) {
705  size_t lRowPtr = rowPtr + dof * numCoarseNodesInElement * nnzPerCoarseNode + ind1 * BlkSize + ind2;
706  ja[lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]] * BlkSize + ind2;
707  val[lRowPtr] = Pi(lDofInd[nodeElementInd * BlkSize + dof],
708  ind1 * BlkSize + ind2);
709  }
710  } else if (blockStrategy == "uncoupled") {
711  size_t lRowPtr = rowPtr + dof * numCoarseNodesInElement * nnzPerCoarseNode + ind1;
712  ja[lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]] * BlkSize + dof;
713  val[lRowPtr] = Pi(lDofInd[nodeElementInd * BlkSize + dof],
714  ind1 * BlkSize + dof);
715  }
716  }
717  break;
718  }
719  }
720  }
721  }
722  }
723  }
724  } // End of scopt for lNodeTuple and nodeInd
725  }
726  }
727  }
728 
729  // Sort all row's column indicies and entries by LID
730  Xpetra::CrsMatrixUtils<SC, LO, GO, NO>::sortCrsEntries(ia, ja, val, rowMapP->lib());
731 
732  // Set the values of the prolongation operators into the CrsMatrix P and call FillComplete
733  PCrs->setAllValues(iaP, jaP, valP);
734  PCrs->expertStaticFillComplete(domainMapP, rowMapP);
735 
736  // set StridingInformation of P
737  if (A->IsView("stridedMaps") == true) {
738  P->CreateView("stridedMaps", A->getRowMap("stridedMaps"), stridedDomainMapP);
739  } else {
740  P->CreateView("stridedMaps", P->getRangeMap(), stridedDomainMapP);
741  }
742 
743  // store the transfer operator and node coordinates on coarse level
744  Set(coarseLevel, "P", P);
745  Set(coarseLevel, "coarseCoordinates", coarseCoordinates);
746  Set<Array<GO> >(coarseLevel, "gCoarseNodesPerDim", gCoarseNodesPerDir);
747  Set<Array<LO> >(coarseLevel, "lCoarseNodesPerDim", lCoarseNodesPerDir);
748 }
749 
750 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
753  const Array<LO> coarseRate, const Array<GO> gFineNodesPerDir,
754  const Array<LO> lFineNodesPerDir, const LO BlkSize, Array<GO>& gIndices,
755  Array<LO>& myOffset, Array<bool>& ghostInterface, Array<LO>& endRate,
756  Array<GO>& gCoarseNodesPerDir, Array<LO>& lCoarseNodesPerDir,
757  Array<LO>& glCoarseNodesPerDir, Array<GO>& ghostGIDs, Array<GO>& coarseNodesGIDs,
758  Array<GO>& colGIDs, GO& gNumCoarseNodes, LO& lNumCoarseNodes,
759  ArrayRCP<Array<typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > coarseNodes, Array<int>& boundaryFlags,
760  RCP<NodesIDs> ghostedCoarseNodes) const {
761  // This function is extracting the geometric information from the coordinates
762  // and creates the necessary data/formatting to perform locally the calculation
763  // of the pronlongator.
764  //
765  // inputs:
766 
767  RCP<const Map> coordinatesMap = coordinates->getMap();
768  LO numDimensions = coordinates->getNumVectors();
769 
770  // Using the coarsening rate and the fine level data,
771  // compute coarse level data
772 
773  // Phase 1 //
774  // ------------------------------------------------------------------ //
775  // We first start by finding small informations on the mesh such as //
776  // the number of coarse nodes (local and global) and the number of //
777  // ghost nodes / the end rate of coarsening. //
778  // ------------------------------------------------------------------ //
779  GO minGlobalIndex = coordinatesMap->getMinGlobalIndex();
780  {
781  GO tmp;
782  gIndices[2] = minGlobalIndex / (gFineNodesPerDir[1] * gFineNodesPerDir[0]);
783  tmp = minGlobalIndex % (gFineNodesPerDir[1] * gFineNodesPerDir[0]);
784  gIndices[1] = tmp / gFineNodesPerDir[0];
785  gIndices[0] = tmp % gFineNodesPerDir[0];
786 
787  myOffset[2] = gIndices[2] % coarseRate[2];
788  myOffset[1] = gIndices[1] % coarseRate[1];
789  myOffset[0] = gIndices[0] % coarseRate[0];
790  }
791 
792  for (int dim = 0; dim < 3; ++dim) {
793  if (gIndices[dim] == 0) {
794  boundaryFlags[dim] += 1;
795  }
796  if (gIndices[dim] + lFineNodesPerDir[dim] == gFineNodesPerDir[dim]) {
797  boundaryFlags[dim] += 2;
798  }
799  }
800 
801  // Check whether ghost nodes are needed in each direction
802  for (LO i = 0; i < numDimensions; ++i) {
803  if ((gIndices[i] != 0) && (gIndices[i] % coarseRate[i] > 0)) {
804  ghostInterface[2 * i] = true;
805  }
806  if (((gIndices[i] + lFineNodesPerDir[i]) != gFineNodesPerDir[i]) && ((gIndices[i] + lFineNodesPerDir[i] - 1) % coarseRate[i] > 0)) {
807  ghostInterface[2 * i + 1] = true;
808  }
809  }
810 
811  for (LO i = 0; i < 3; ++i) {
812  if (i < numDimensions) {
813  lCoarseNodesPerDir[i] = (lFineNodesPerDir[i] + myOffset[i] - 1) / coarseRate[i];
814  if (myOffset[i] == 0) {
815  ++lCoarseNodesPerDir[i];
816  }
817  gCoarseNodesPerDir[i] = (gFineNodesPerDir[i] - 1) / coarseRate[i];
818  endRate[i] = (gFineNodesPerDir[i] - 1) % coarseRate[i];
819  if (endRate[i] == 0) {
820  ++gCoarseNodesPerDir[i];
821  endRate[i] = coarseRate[i];
822  }
823  } else {
824  // Most quantities need to be set to 1 for extra dimensions
825  // this is rather logical, an x-y plane is like a single layer
826  // of nodes in the z direction...
827  gCoarseNodesPerDir[i] = 1;
828  lCoarseNodesPerDir[i] = 1;
829  endRate[i] = 1;
830  }
831  }
832 
833  gNumCoarseNodes = gCoarseNodesPerDir[0] * gCoarseNodesPerDir[1] * gCoarseNodesPerDir[2];
834  lNumCoarseNodes = lCoarseNodesPerDir[0] * lCoarseNodesPerDir[1] * lCoarseNodesPerDir[2];
835 
836  // For each direction, determine how many ghost points are required.
837  LO lNumGhostNodes = 0;
838  Array<GO> startGhostedCoarseNode(3);
839  {
840  const int complementaryIndices[3][2] = {{1, 2}, {0, 2}, {0, 1}};
841  LO tmp = 0;
842  for (LO i = 0; i < 3; ++i) {
843  // The first branch of this if-statement will be used if the rank contains only one layer
844  // of nodes in direction i, that layer must also coincide with the boundary of the mesh
845  // and coarseRate[i] == endRate[i]...
846  if ((gIndices[i] == gFineNodesPerDir[i] - 1) && (gIndices[i] % coarseRate[i] == 0)) {
847  startGhostedCoarseNode[i] = gIndices[i] / coarseRate[i] - 1;
848  } else {
849  startGhostedCoarseNode[i] = gIndices[i] / coarseRate[i];
850  }
851  // First we load the number of locally owned coarse nodes
852  glCoarseNodesPerDir[i] = lCoarseNodesPerDir[i];
853 
854  // Check whether a face in direction i needs ghost nodes
855  if (ghostInterface[2 * i] || ghostInterface[2 * i + 1]) {
856  ++glCoarseNodesPerDir[i];
857  if (i == 0) {
858  tmp = lCoarseNodesPerDir[1] * lCoarseNodesPerDir[2];
859  }
860  if (i == 1) {
861  tmp = lCoarseNodesPerDir[0] * lCoarseNodesPerDir[2];
862  }
863  if (i == 2) {
864  tmp = lCoarseNodesPerDir[0] * lCoarseNodesPerDir[1];
865  }
866  }
867  // If both faces in direction i need nodes, double the number of ghost nodes
868  if (ghostInterface[2 * i] && ghostInterface[2 * i + 1]) {
869  ++glCoarseNodesPerDir[i];
870  tmp = 2 * tmp;
871  }
872  lNumGhostNodes += tmp;
873 
874  // The corners and edges need to be checked in 2D / 3D to add more ghosts nodes
875  for (LO j = 0; j < 2; ++j) {
876  for (LO k = 0; k < 2; ++k) {
877  // Check if two adjoining faces need ghost nodes and then add their common edge
878  if (ghostInterface[2 * complementaryIndices[i][0] + j] && ghostInterface[2 * complementaryIndices[i][1] + k]) {
879  lNumGhostNodes += lCoarseNodesPerDir[i];
880  // Add corners if three adjoining faces need ghost nodes,
881  // but add them only once! Hence when i == 0.
882  if (ghostInterface[2 * i] && (i == 0)) {
883  lNumGhostNodes += 1;
884  }
885  if (ghostInterface[2 * i + 1] && (i == 0)) {
886  lNumGhostNodes += 1;
887  }
888  }
889  }
890  }
891  tmp = 0;
892  }
893  } // end of scope for tmp and complementaryIndices
894 
895  const LO lNumGhostedNodes = glCoarseNodesPerDir[0] * glCoarseNodesPerDir[1] * glCoarseNodesPerDir[2];
896  ghostedCoarseNodes->PIDs.resize(lNumGhostedNodes);
897  ghostedCoarseNodes->LIDs.resize(lNumGhostedNodes);
898  ghostedCoarseNodes->GIDs.resize(lNumGhostedNodes);
899  ghostedCoarseNodes->coarseGIDs.resize(lNumGhostedNodes);
900  ghostedCoarseNodes->colInds.resize(lNumGhostedNodes);
901 
902  // We loop over all ghosted coarse nodes by increasing global lexicographic order
903  Array<LO> coarseNodeCoarseIndices(3), coarseNodeFineIndices(3), ijk(3);
904  LO currentIndex = -1;
905  for (ijk[2] = 0; ijk[2] < glCoarseNodesPerDir[2]; ++ijk[2]) {
906  for (ijk[1] = 0; ijk[1] < glCoarseNodesPerDir[1]; ++ijk[1]) {
907  for (ijk[0] = 0; ijk[0] < glCoarseNodesPerDir[0]; ++ijk[0]) {
908  currentIndex = ijk[2] * glCoarseNodesPerDir[1] * glCoarseNodesPerDir[0] + ijk[1] * glCoarseNodesPerDir[0] + ijk[0];
909  coarseNodeCoarseIndices[0] = startGhostedCoarseNode[0] + ijk[0];
910  coarseNodeFineIndices[0] = coarseNodeCoarseIndices[0] * coarseRate[0];
911  if (coarseNodeFineIndices[0] > gFineNodesPerDir[0] - 1) {
912  coarseNodeFineIndices[0] = gFineNodesPerDir[0] - 1;
913  }
914  coarseNodeCoarseIndices[1] = startGhostedCoarseNode[1] + ijk[1];
915  coarseNodeFineIndices[1] = coarseNodeCoarseIndices[1] * coarseRate[1];
916  if (coarseNodeFineIndices[1] > gFineNodesPerDir[1] - 1) {
917  coarseNodeFineIndices[1] = gFineNodesPerDir[1] - 1;
918  }
919  coarseNodeCoarseIndices[2] = startGhostedCoarseNode[2] + ijk[2];
920  coarseNodeFineIndices[2] = coarseNodeCoarseIndices[2] * coarseRate[2];
921  if (coarseNodeFineIndices[2] > gFineNodesPerDir[2] - 1) {
922  coarseNodeFineIndices[2] = gFineNodesPerDir[2] - 1;
923  }
924  GO myGID = 0, myCoarseGID = -1;
925  GO factor[3] = {};
926  factor[2] = gFineNodesPerDir[1] * gFineNodesPerDir[0];
927  factor[1] = gFineNodesPerDir[0];
928  factor[0] = 1;
929  for (int dim = 0; dim < 3; ++dim) {
930  if (dim < numDimensions) {
931  if (gIndices[dim] - myOffset[dim] + ijk[dim] * coarseRate[dim] < gFineNodesPerDir[dim] - 1) {
932  myGID += (gIndices[dim] - myOffset[dim] + ijk[dim] * coarseRate[dim]) * factor[dim];
933  } else {
934  myGID += (gIndices[dim] - myOffset[dim] + (ijk[dim] - 1) * coarseRate[dim] + endRate[dim]) * factor[dim];
935  }
936  }
937  }
938  myCoarseGID = coarseNodeCoarseIndices[0] + coarseNodeCoarseIndices[1] * gCoarseNodesPerDir[0] + coarseNodeCoarseIndices[2] * gCoarseNodesPerDir[1] * gCoarseNodesPerDir[0];
939  ghostedCoarseNodes->GIDs[currentIndex] = myGID;
940  ghostedCoarseNodes->coarseGIDs[currentIndex] = myCoarseGID;
941  }
942  }
943  }
944  coordinatesMap->getRemoteIndexList(ghostedCoarseNodes->GIDs(),
945  ghostedCoarseNodes->PIDs(),
946  ghostedCoarseNodes->LIDs());
947 
948  // Phase 2 //
949  // ------------------------------------------------------------------ //
950  // Build a list of GIDs to import the required ghost nodes. //
951  // The ordering of the ghosts nodes will be as natural as possible, //
952  // i.e. it should follow the GID ordering of the mesh. //
953  // ------------------------------------------------------------------ //
954 
955  // Saddly we have to more or less redo what was just done to figure out the value of
956  // lNumGhostNodes, there might be some optimization possibility here...
957  ghostGIDs.resize(lNumGhostNodes);
958  LO countGhosts = 0;
959  // Get the GID of the first point on the current partition.
960  GO startingGID = minGlobalIndex;
961  Array<GO> startingIndices(3);
962  // We still want ghost nodes even if have with a 0 offset,
963  // except when we are on a boundary
964  if (ghostInterface[4] && (myOffset[2] == 0)) {
965  if (gIndices[2] + coarseRate[2] > gFineNodesPerDir[2]) {
966  startingGID -= endRate[2] * gFineNodesPerDir[1] * gFineNodesPerDir[0];
967  } else {
968  startingGID -= coarseRate[2] * gFineNodesPerDir[1] * gFineNodesPerDir[0];
969  }
970  } else {
971  startingGID -= myOffset[2] * gFineNodesPerDir[1] * gFineNodesPerDir[0];
972  }
973  if (ghostInterface[2] && (myOffset[1] == 0)) {
974  if (gIndices[1] + coarseRate[1] > gFineNodesPerDir[1]) {
975  startingGID -= endRate[1] * gFineNodesPerDir[0];
976  } else {
977  startingGID -= coarseRate[1] * gFineNodesPerDir[0];
978  }
979  } else {
980  startingGID -= myOffset[1] * gFineNodesPerDir[0];
981  }
982  if (ghostInterface[0] && (myOffset[0] == 0)) {
983  if (gIndices[0] + coarseRate[0] > gFineNodesPerDir[0]) {
984  startingGID -= endRate[0];
985  } else {
986  startingGID -= coarseRate[0];
987  }
988  } else {
989  startingGID -= myOffset[0];
990  }
991 
992  { // scope for tmp
993  GO tmp;
994  startingIndices[2] = startingGID / (gFineNodesPerDir[1] * gFineNodesPerDir[0]);
995  tmp = startingGID % (gFineNodesPerDir[1] * gFineNodesPerDir[0]);
996  startingIndices[1] = tmp / gFineNodesPerDir[0];
997  startingIndices[0] = tmp % gFineNodesPerDir[0];
998  }
999 
1000  GO ghostOffset[3] = {0, 0, 0};
1001  LO lengthZero = lCoarseNodesPerDir[0];
1002  LO lengthOne = lCoarseNodesPerDir[1];
1003  LO lengthTwo = lCoarseNodesPerDir[2];
1004  if (ghostInterface[0]) {
1005  ++lengthZero;
1006  }
1007  if (ghostInterface[1]) {
1008  ++lengthZero;
1009  }
1010  if (ghostInterface[2]) {
1011  ++lengthOne;
1012  }
1013  if (ghostInterface[3]) {
1014  ++lengthOne;
1015  }
1016  if (ghostInterface[4]) {
1017  ++lengthTwo;
1018  }
1019  if (ghostInterface[5]) {
1020  ++lengthTwo;
1021  }
1022 
1023  // First check the bottom face as it will have the lowest GIDs
1024  if (ghostInterface[4]) {
1025  ghostOffset[2] = startingGID;
1026  for (LO j = 0; j < lengthOne; ++j) {
1027  if ((j == lengthOne - 1) && (startingIndices[1] + j * coarseRate[1] + 1 > gFineNodesPerDir[1])) {
1028  ghostOffset[1] = ((j - 1) * coarseRate[1] + endRate[1]) * gFineNodesPerDir[0];
1029  } else {
1030  ghostOffset[1] = j * coarseRate[1] * gFineNodesPerDir[0];
1031  }
1032  for (LO k = 0; k < lengthZero; ++k) {
1033  if ((k == lengthZero - 1) && (startingIndices[0] + k * coarseRate[0] + 1 > gFineNodesPerDir[0])) {
1034  ghostOffset[0] = (k - 1) * coarseRate[0] + endRate[0];
1035  } else {
1036  ghostOffset[0] = k * coarseRate[0];
1037  }
1038  // If the partition includes a changed rate at the edge, ghost nodes need to be picked
1039  // carefully.
1040  // This if statement is repeated each time a ghost node is selected.
1041  ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1042  ++countGhosts;
1043  }
1044  }
1045  }
1046 
1047  // Sweep over the lCoarseNodesPerDir[2] coarse layers in direction 2 and gather necessary ghost
1048  // nodes located on these layers.
1049  for (LO i = 0; i < lengthTwo; ++i) {
1050  // Exclude the cases where ghost nodes exists on the faces in directions 2, these faces are
1051  // swept seperatly for efficiency.
1052  if (!((i == lengthTwo - 1) && ghostInterface[5]) && !((i == 0) && ghostInterface[4])) {
1053  // Set the ghostOffset in direction 2 taking into account a possible endRate different
1054  // from the regular coarseRate.
1055  if ((i == lengthTwo - 1) && !ghostInterface[5]) {
1056  ghostOffset[2] = startingGID + ((i - 1) * coarseRate[2] + endRate[2]) * gFineNodesPerDir[1] * gFineNodesPerDir[0];
1057  } else {
1058  ghostOffset[2] = startingGID + i * coarseRate[2] * gFineNodesPerDir[1] * gFineNodesPerDir[0];
1059  }
1060  for (LO j = 0; j < lengthOne; ++j) {
1061  if ((j == 0) && ghostInterface[2]) {
1062  for (LO k = 0; k < lengthZero; ++k) {
1063  if ((k == lengthZero - 1) && (startingIndices[0] + k * coarseRate[0] + 1 > gFineNodesPerDir[0])) {
1064  if (k == 0) {
1065  ghostOffset[0] = endRate[0];
1066  } else {
1067  ghostOffset[0] = (k - 1) * coarseRate[0] + endRate[0];
1068  }
1069  } else {
1070  ghostOffset[0] = k * coarseRate[0];
1071  }
1072  // In this case j == 0 so ghostOffset[1] is zero and can be ignored
1073  ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[0];
1074  ++countGhosts;
1075  }
1076  } else if ((j == lengthOne - 1) && ghostInterface[3]) {
1077  // Set the ghostOffset in direction 1 taking into account a possible endRate different
1078  // from the regular coarseRate.
1079  if ((j == lengthOne - 1) && (startingIndices[1] + j * coarseRate[1] + 1 > gFineNodesPerDir[1])) {
1080  ghostOffset[1] = ((j - 1) * coarseRate[1] + endRate[1]) * gFineNodesPerDir[0];
1081  } else {
1082  ghostOffset[1] = j * coarseRate[1] * gFineNodesPerDir[0];
1083  }
1084  for (LO k = 0; k < lengthZero; ++k) {
1085  if ((k == lengthZero - 1) && (startingIndices[0] + k * coarseRate[0] + 1 > gFineNodesPerDir[0])) {
1086  ghostOffset[0] = (k - 1) * coarseRate[0] + endRate[0];
1087  } else {
1088  ghostOffset[0] = k * coarseRate[0];
1089  }
1090  ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1091  ++countGhosts;
1092  }
1093  } else {
1094  // Set ghostOffset[1] for side faces sweep
1095  if ((j == lengthOne - 1) && (startingIndices[1] + j * coarseRate[1] + 1 > gFineNodesPerDir[1])) {
1096  ghostOffset[1] = ((j - 1) * coarseRate[1] + endRate[1]) * gFineNodesPerDir[0];
1097  } else {
1098  ghostOffset[1] = j * coarseRate[1] * gFineNodesPerDir[0];
1099  }
1100 
1101  // Set ghostOffset[0], ghostGIDs and countGhosts
1102  if (ghostInterface[0]) { // In that case ghostOffset[0]==0, so we can ignore it
1103  ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1];
1104  ++countGhosts;
1105  }
1106  if (ghostInterface[1]) { // Grab ghost point at the end of direction 0.
1107  if ((startingIndices[0] + (lengthZero - 1) * coarseRate[0]) > gFineNodesPerDir[0] - 1) {
1108  if (lengthZero > 2) {
1109  ghostOffset[0] = (lengthZero - 2) * coarseRate[0] + endRate[0];
1110  } else {
1111  ghostOffset[0] = endRate[0];
1112  }
1113  } else {
1114  ghostOffset[0] = (lengthZero - 1) * coarseRate[0];
1115  }
1116  ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1117  ++countGhosts;
1118  }
1119  }
1120  }
1121  }
1122  }
1123 
1124  // Finally check the top face
1125  if (ghostInterface[5]) {
1126  if (startingIndices[2] + (lengthTwo - 1) * coarseRate[2] + 1 > gFineNodesPerDir[2]) {
1127  ghostOffset[2] = startingGID + ((lengthTwo - 2) * coarseRate[2] + endRate[2]) * gFineNodesPerDir[1] * gFineNodesPerDir[0];
1128  } else {
1129  ghostOffset[2] = startingGID + (lengthTwo - 1) * coarseRate[2] * gFineNodesPerDir[1] * gFineNodesPerDir[0];
1130  }
1131  for (LO j = 0; j < lengthOne; ++j) {
1132  if ((j == lengthOne - 1) && (startingIndices[1] + j * coarseRate[1] + 1 > gFineNodesPerDir[1])) {
1133  ghostOffset[1] = ((j - 1) * coarseRate[1] + endRate[1]) * gFineNodesPerDir[0];
1134  } else {
1135  ghostOffset[1] = j * coarseRate[1] * gFineNodesPerDir[0];
1136  }
1137  for (LO k = 0; k < lengthZero; ++k) {
1138  if ((k == lengthZero - 1) && (startingIndices[0] + k * coarseRate[0] + 1 > gFineNodesPerDir[0])) {
1139  ghostOffset[0] = (k - 1) * coarseRate[0] + endRate[0];
1140  } else {
1141  ghostOffset[0] = k * coarseRate[0];
1142  }
1143  ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1144  ++countGhosts;
1145  }
1146  }
1147  }
1148 
1149  // Phase 3 //
1150  // ------------------------------------------------------------------ //
1151  // Final phase of this function, lists are being built to create the //
1152  // column and domain maps of the projection as well as the map and //
1153  // arrays for the coarse coordinates multivector. //
1154  // ------------------------------------------------------------------ //
1155 
1156  Array<GO> gCoarseNodesGIDs(lNumCoarseNodes);
1157  LO currentNode, offset2, offset1, offset0;
1158  // Find the GIDs of the coarse nodes on the partition.
1159  for (LO ind2 = 0; ind2 < lCoarseNodesPerDir[2]; ++ind2) {
1160  if (myOffset[2] == 0) {
1161  offset2 = startingIndices[2] + myOffset[2];
1162  } else {
1163  if (startingIndices[2] + endRate[2] == gFineNodesPerDir[2] - 1) {
1164  offset2 = startingIndices[2] + endRate[2];
1165  } else {
1166  offset2 = startingIndices[2] + coarseRate[2];
1167  }
1168  }
1169  if (offset2 + ind2 * coarseRate[2] > gFineNodesPerDir[2] - 1) {
1170  offset2 += (ind2 - 1) * coarseRate[2] + endRate[2];
1171  } else {
1172  offset2 += ind2 * coarseRate[2];
1173  }
1174  offset2 = offset2 * gFineNodesPerDir[1] * gFineNodesPerDir[0];
1175 
1176  for (LO ind1 = 0; ind1 < lCoarseNodesPerDir[1]; ++ind1) {
1177  if (myOffset[1] == 0) {
1178  offset1 = startingIndices[1] + myOffset[1];
1179  } else {
1180  if (startingIndices[1] + endRate[1] == gFineNodesPerDir[1] - 1) {
1181  offset1 = startingIndices[1] + endRate[1];
1182  } else {
1183  offset1 = startingIndices[1] + coarseRate[1];
1184  }
1185  }
1186  if (offset1 + ind1 * coarseRate[1] > gFineNodesPerDir[1] - 1) {
1187  offset1 += (ind1 - 1) * coarseRate[1] + endRate[1];
1188  } else {
1189  offset1 += ind1 * coarseRate[1];
1190  }
1191  offset1 = offset1 * gFineNodesPerDir[0];
1192  for (LO ind0 = 0; ind0 < lCoarseNodesPerDir[0]; ++ind0) {
1193  offset0 = startingIndices[0];
1194  if (myOffset[0] == 0) {
1195  offset0 += ind0 * coarseRate[0];
1196  } else {
1197  offset0 += (ind0 + 1) * coarseRate[0];
1198  }
1199  if (offset0 > gFineNodesPerDir[0] - 1) {
1200  offset0 += endRate[0] - coarseRate[0];
1201  }
1202 
1203  currentNode = ind2 * lCoarseNodesPerDir[1] * lCoarseNodesPerDir[0] + ind1 * lCoarseNodesPerDir[0] + ind0;
1204  gCoarseNodesGIDs[currentNode] = offset2 + offset1 + offset0;
1205  }
1206  }
1207  }
1208 
1209  // Actual loop over all the coarse/ghost nodes to find their index on the coarse mesh
1210  // and the corresponding dofs that will need to be added to colMapP.
1211  colGIDs.resize(BlkSize * (lNumCoarseNodes + lNumGhostNodes));
1212  coarseNodesGIDs.resize(lNumCoarseNodes);
1213  for (LO i = 0; i < numDimensions; ++i) {
1214  coarseNodes[i].resize(lNumCoarseNodes);
1215  }
1216  GO fineNodesPerCoarseSlab = coarseRate[2] * gFineNodesPerDir[1] * gFineNodesPerDir[0];
1217  GO fineNodesEndCoarseSlab = endRate[2] * gFineNodesPerDir[1] * gFineNodesPerDir[0];
1218  GO fineNodesPerCoarsePlane = coarseRate[1] * gFineNodesPerDir[0];
1219  GO fineNodesEndCoarsePlane = endRate[1] * gFineNodesPerDir[0];
1220  GO coarseNodesPerCoarseLayer = gCoarseNodesPerDir[1] * gCoarseNodesPerDir[0];
1221  GO gCoarseNodeOnCoarseGridGID;
1222  LO gInd[3], lCol;
1223  Array<int> ghostPIDs(lNumGhostNodes);
1224  Array<LO> ghostLIDs(lNumGhostNodes);
1225  Array<LO> ghostPermut(lNumGhostNodes);
1226  for (LO k = 0; k < lNumGhostNodes; ++k) {
1227  ghostPermut[k] = k;
1228  }
1229  coordinatesMap->getRemoteIndexList(ghostGIDs, ghostPIDs, ghostLIDs);
1230  sh_sort_permute(ghostPIDs.begin(), ghostPIDs.end(), ghostPermut.begin(), ghostPermut.end());
1231 
1232  { // scope for tmpInds, tmpVars and tmp.
1233  GO tmpInds[3], tmpVars[2];
1234  LO tmp;
1235  // Loop over the coarse nodes of the partition and add them to colGIDs
1236  // that will be used to construct the column and domain maps of P as well
1237  // as to construct the coarse coordinates map.
1238  // for(LO col = 0; col < lNumCoarseNodes; ++col) { // This should most likely be replaced
1239  // by loops of lCoarseNodesPerDir[] to simplify arithmetics
1240  LO col = 0;
1241  LO firstCoarseNodeInds[3], currentCoarseNode;
1242  for (LO dim = 0; dim < 3; ++dim) {
1243  if (myOffset[dim] == 0) {
1244  firstCoarseNodeInds[dim] = 0;
1245  } else {
1246  firstCoarseNodeInds[dim] = coarseRate[dim] - myOffset[dim];
1247  }
1248  }
1250  for (LO dim = 0; dim < numDimensions; ++dim) {
1251  fineNodes[dim] = coordinates->getData(dim);
1252  }
1253  for (LO k = 0; k < lCoarseNodesPerDir[2]; ++k) {
1254  for (LO j = 0; j < lCoarseNodesPerDir[1]; ++j) {
1255  for (LO i = 0; i < lCoarseNodesPerDir[0]; ++i) {
1256  col = k * lCoarseNodesPerDir[1] * lCoarseNodesPerDir[0] + j * lCoarseNodesPerDir[0] + i;
1257 
1258  // Check for endRate
1259  currentCoarseNode = 0;
1260  if (firstCoarseNodeInds[0] + i * coarseRate[0] > lFineNodesPerDir[0] - 1) {
1261  currentCoarseNode += firstCoarseNodeInds[0] + (i - 1) * coarseRate[0] + endRate[0];
1262  } else {
1263  currentCoarseNode += firstCoarseNodeInds[0] + i * coarseRate[0];
1264  }
1265  if (firstCoarseNodeInds[1] + j * coarseRate[1] > lFineNodesPerDir[1] - 1) {
1266  currentCoarseNode += (firstCoarseNodeInds[1] + (j - 1) * coarseRate[1] + endRate[1]) * lFineNodesPerDir[0];
1267  } else {
1268  currentCoarseNode += (firstCoarseNodeInds[1] + j * coarseRate[1]) * lFineNodesPerDir[0];
1269  }
1270  if (firstCoarseNodeInds[2] + k * coarseRate[2] > lFineNodesPerDir[2] - 1) {
1271  currentCoarseNode += (firstCoarseNodeInds[2] + (k - 1) * coarseRate[2] + endRate[2]) * lFineNodesPerDir[1] * lFineNodesPerDir[0];
1272  } else {
1273  currentCoarseNode += (firstCoarseNodeInds[2] + k * coarseRate[2]) * lFineNodesPerDir[1] * lFineNodesPerDir[0];
1274  }
1275  // Load coordinates
1276  for (LO dim = 0; dim < numDimensions; ++dim) {
1277  coarseNodes[dim][col] = fineNodes[dim][currentCoarseNode];
1278  }
1279 
1280  if ((endRate[2] != coarseRate[2]) && (gCoarseNodesGIDs[col] > (gCoarseNodesPerDir[2] - 2) * fineNodesPerCoarseSlab + fineNodesEndCoarseSlab - 1)) {
1281  tmpInds[2] = gCoarseNodesGIDs[col] / fineNodesPerCoarseSlab + 1;
1282  tmpVars[0] = gCoarseNodesGIDs[col] - (tmpInds[2] - 1) * fineNodesPerCoarseSlab - fineNodesEndCoarseSlab;
1283  } else {
1284  tmpInds[2] = gCoarseNodesGIDs[col] / fineNodesPerCoarseSlab;
1285  tmpVars[0] = gCoarseNodesGIDs[col] % fineNodesPerCoarseSlab;
1286  }
1287  if ((endRate[1] != coarseRate[1]) && (tmpVars[0] > (gCoarseNodesPerDir[1] - 2) * fineNodesPerCoarsePlane + fineNodesEndCoarsePlane - 1)) {
1288  tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane + 1;
1289  tmpVars[1] = tmpVars[0] - (tmpInds[1] - 1) * fineNodesPerCoarsePlane - fineNodesEndCoarsePlane;
1290  } else {
1291  tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane;
1292  tmpVars[1] = tmpVars[0] % fineNodesPerCoarsePlane;
1293  }
1294  if (tmpVars[1] == gFineNodesPerDir[0] - 1) {
1295  tmpInds[0] = gCoarseNodesPerDir[0] - 1;
1296  } else {
1297  tmpInds[0] = tmpVars[1] / coarseRate[0];
1298  }
1299  gInd[2] = col / (lCoarseNodesPerDir[1] * lCoarseNodesPerDir[0]);
1300  tmp = col % (lCoarseNodesPerDir[1] * lCoarseNodesPerDir[0]);
1301  gInd[1] = tmp / lCoarseNodesPerDir[0];
1302  gInd[0] = tmp % lCoarseNodesPerDir[0];
1303  lCol = gInd[2] * (lCoarseNodesPerDir[1] * lCoarseNodesPerDir[0]) + gInd[1] * lCoarseNodesPerDir[0] + gInd[0];
1304  gCoarseNodeOnCoarseGridGID = tmpInds[2] * coarseNodesPerCoarseLayer + tmpInds[1] * gCoarseNodesPerDir[0] + tmpInds[0];
1305  coarseNodesGIDs[lCol] = gCoarseNodeOnCoarseGridGID;
1306  for (LO dof = 0; dof < BlkSize; ++dof) {
1307  colGIDs[BlkSize * lCol + dof] = BlkSize * gCoarseNodeOnCoarseGridGID + dof;
1308  }
1309  }
1310  }
1311  }
1312  // Now loop over the ghost nodes of the partition to add them to colGIDs
1313  // since they will need to be included in the column map of P
1314  for (col = lNumCoarseNodes; col < lNumCoarseNodes + lNumGhostNodes; ++col) {
1315  if ((endRate[2] != coarseRate[2]) && (ghostGIDs[ghostPermut[col - lNumCoarseNodes]] > (gCoarseNodesPerDir[2] - 2) * fineNodesPerCoarseSlab + fineNodesEndCoarseSlab - 1)) {
1316  tmpInds[2] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]] / fineNodesPerCoarseSlab + 1;
1317  tmpVars[0] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]] - (tmpInds[2] - 1) * fineNodesPerCoarseSlab - fineNodesEndCoarseSlab;
1318  } else {
1319  tmpInds[2] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]] / fineNodesPerCoarseSlab;
1320  tmpVars[0] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]] % fineNodesPerCoarseSlab;
1321  }
1322  if ((endRate[1] != coarseRate[1]) && (tmpVars[0] > (gCoarseNodesPerDir[1] - 2) * fineNodesPerCoarsePlane + fineNodesEndCoarsePlane - 1)) {
1323  tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane + 1;
1324  tmpVars[1] = tmpVars[0] - (tmpInds[1] - 1) * fineNodesPerCoarsePlane - fineNodesEndCoarsePlane;
1325  } else {
1326  tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane;
1327  tmpVars[1] = tmpVars[0] % fineNodesPerCoarsePlane;
1328  }
1329  if (tmpVars[1] == gFineNodesPerDir[0] - 1) {
1330  tmpInds[0] = gCoarseNodesPerDir[0] - 1;
1331  } else {
1332  tmpInds[0] = tmpVars[1] / coarseRate[0];
1333  }
1334  gCoarseNodeOnCoarseGridGID = tmpInds[2] * coarseNodesPerCoarseLayer + tmpInds[1] * gCoarseNodesPerDir[0] + tmpInds[0];
1335  for (LO dof = 0; dof < BlkSize; ++dof) {
1336  colGIDs[BlkSize * col + dof] = BlkSize * gCoarseNodeOnCoarseGridGID + dof;
1337  }
1338  }
1339  } // End of scope for tmpInds, tmpVars and tmp
1340 
1341 } // GetGeometricData()
1342 
1343 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1345  ComputeLocalEntries(const RCP<const Matrix>& Aghost, const Array<LO> coarseRate,
1346  const Array<LO> /* endRate */, const LO BlkSize, const Array<LO> elemInds,
1347  const Array<LO> /* lCoarseElementsPerDir */, const LO numDimensions,
1348  const Array<LO> lFineNodesPerDir, const Array<GO> /* gFineNodesPerDir */,
1349  const Array<GO> /* gIndices */, const Array<LO> /* lCoarseNodesPerDir */,
1350  const Array<bool> ghostInterface, const Array<int> elementFlags,
1351  const std::string stencilType, const std::string /* blockStrategy */,
1352  const Array<LO> elementNodesPerDir, const LO numNodesInElement,
1353  const Array<GO> /* colGIDs */,
1356  Array<LO>& lDofInd) const {
1357  // First extract data from Aghost and move it to the corresponding dense matrix
1358  // This step requires to compute the number of nodes (resp dofs) in the current
1359  // coarse element, then allocate storage for local dense matrices, finally loop
1360  // over the dofs and extract the corresponding row in Aghost before dispactching
1361  // its data to the dense matrices.
1362  // At the same time we want to compute a mapping from the element indexing to
1363  // group indexing. The groups are the following: corner, edge, face and interior
1364  // nodes. We uses these groups to operate on the dense matrices but need to
1365  // store the nodes in their original order after groupd operations are completed.
1366  LO countInterior = 0, countFace = 0, countEdge = 0, countCorner = 0;
1367  Array<LO> collapseDir(numNodesInElement * BlkSize);
1368  for (LO ke = 0; ke < elementNodesPerDir[2]; ++ke) {
1369  for (LO je = 0; je < elementNodesPerDir[1]; ++je) {
1370  for (LO ie = 0; ie < elementNodesPerDir[0]; ++ie) {
1371  // Check for corner node
1372  if ((ke == 0 || ke == elementNodesPerDir[2] - 1) && (je == 0 || je == elementNodesPerDir[1] - 1) && (ie == 0 || ie == elementNodesPerDir[0] - 1)) {
1373  for (LO dof = 0; dof < BlkSize; ++dof) {
1374  dofType[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 0;
1375  lDofInd[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = BlkSize * countCorner + dof;
1376  }
1377  ++countCorner;
1378 
1379  // Check for edge node
1380  } else if (((ke == 0 || ke == elementNodesPerDir[2] - 1) && (je == 0 || je == elementNodesPerDir[1] - 1)) || ((ke == 0 || ke == elementNodesPerDir[2] - 1) && (ie == 0 || ie == elementNodesPerDir[0] - 1)) || ((je == 0 || je == elementNodesPerDir[1] - 1) && (ie == 0 || ie == elementNodesPerDir[0] - 1))) {
1381  for (LO dof = 0; dof < BlkSize; ++dof) {
1382  dofType[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 1;
1383  lDofInd[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = BlkSize * countEdge + dof;
1384  if ((ke == 0 || ke == elementNodesPerDir[2] - 1) && (je == 0 || je == elementNodesPerDir[1] - 1)) {
1385  collapseDir[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 0;
1386  } else if ((ke == 0 || ke == elementNodesPerDir[2] - 1) && (ie == 0 || ie == elementNodesPerDir[0] - 1)) {
1387  collapseDir[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 1;
1388  } else if ((je == 0 || je == elementNodesPerDir[1] - 1) && (ie == 0 || ie == elementNodesPerDir[0] - 1)) {
1389  collapseDir[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 2;
1390  }
1391  }
1392  ++countEdge;
1393 
1394  // Check for face node
1395  } else if ((ke == 0 || ke == elementNodesPerDir[2] - 1) || (je == 0 || je == elementNodesPerDir[1] - 1) || (ie == 0 || ie == elementNodesPerDir[0] - 1)) {
1396  for (LO dof = 0; dof < BlkSize; ++dof) {
1397  dofType[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 2;
1398  lDofInd[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = BlkSize * countFace + dof;
1399  if (ke == 0 || ke == elementNodesPerDir[2] - 1) {
1400  collapseDir[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 2;
1401  } else if (je == 0 || je == elementNodesPerDir[1] - 1) {
1402  collapseDir[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 1;
1403  } else if (ie == 0 || ie == elementNodesPerDir[0] - 1) {
1404  collapseDir[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 0;
1405  }
1406  }
1407  ++countFace;
1408 
1409  // Otherwise it is an interior node
1410  } else {
1411  for (LO dof = 0; dof < BlkSize; ++dof) {
1412  dofType[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = 3;
1413  lDofInd[BlkSize * (ke * elementNodesPerDir[1] * elementNodesPerDir[0] + je * elementNodesPerDir[0] + ie) + dof] = BlkSize * countInterior + dof;
1414  }
1415  ++countInterior;
1416  }
1417  }
1418  }
1419  }
1420 
1421  LO numInteriorNodes = 0, numFaceNodes = 0, numEdgeNodes = 0, numCornerNodes = 8;
1422  numInteriorNodes = (elementNodesPerDir[0] - 2) * (elementNodesPerDir[1] - 2) * (elementNodesPerDir[2] - 2);
1423  numFaceNodes = 2 * (elementNodesPerDir[0] - 2) * (elementNodesPerDir[1] - 2) + 2 * (elementNodesPerDir[0] - 2) * (elementNodesPerDir[2] - 2) + 2 * (elementNodesPerDir[1] - 2) * (elementNodesPerDir[2] - 2);
1424  numEdgeNodes = 4 * (elementNodesPerDir[0] - 2) + 4 * (elementNodesPerDir[1] - 2) + 4 * (elementNodesPerDir[2] - 2);
1425  // Diagonal blocks
1426  Teuchos::SerialDenseMatrix<LO, SC> Aii(BlkSize * numInteriorNodes, BlkSize * numInteriorNodes);
1427  Teuchos::SerialDenseMatrix<LO, SC> Aff(BlkSize * numFaceNodes, BlkSize * numFaceNodes);
1428  Teuchos::SerialDenseMatrix<LO, SC> Aee(BlkSize * numEdgeNodes, BlkSize * numEdgeNodes);
1429  // Upper triangular blocks
1430  Teuchos::SerialDenseMatrix<LO, SC> Aif(BlkSize * numInteriorNodes, BlkSize * numFaceNodes);
1431  Teuchos::SerialDenseMatrix<LO, SC> Aie(BlkSize * numInteriorNodes, BlkSize * numEdgeNodes);
1432  Teuchos::SerialDenseMatrix<LO, SC> Afe(BlkSize * numFaceNodes, BlkSize * numEdgeNodes);
1433  // Coarse nodes "right hand sides" and local prolongators
1434  Teuchos::SerialDenseMatrix<LO, SC> Aic(BlkSize * numInteriorNodes, BlkSize * numCornerNodes);
1435  Teuchos::SerialDenseMatrix<LO, SC> Afc(BlkSize * numFaceNodes, BlkSize * numCornerNodes);
1436  Teuchos::SerialDenseMatrix<LO, SC> Aec(BlkSize * numEdgeNodes, BlkSize * numCornerNodes);
1437  Pi.shape(BlkSize * numInteriorNodes, BlkSize * numCornerNodes);
1438  Pf.shape(BlkSize * numFaceNodes, BlkSize * numCornerNodes);
1439  Pe.shape(BlkSize * numEdgeNodes, BlkSize * numCornerNodes);
1440 
1441  ArrayView<const LO> rowIndices;
1442  ArrayView<const SC> rowValues;
1443  LO idof, iInd, jInd;
1444  int iType = 0, jType = 0;
1445  int orientation = -1;
1446  int collapseFlags[3] = {};
1447  Array<SC> stencil((std::pow(3, numDimensions)) * BlkSize);
1448  // LBV Note: we could skip the extraction of rows corresponding to coarse nodes
1449  // we might want to fuse the first three loops and do integer arithmetic
1450  // to have more optimization during compilation...
1451  for (LO ke = 0; ke < elementNodesPerDir[2]; ++ke) {
1452  for (LO je = 0; je < elementNodesPerDir[1]; ++je) {
1453  for (LO ie = 0; ie < elementNodesPerDir[0]; ++ie) {
1454  collapseFlags[0] = 0;
1455  collapseFlags[1] = 0;
1456  collapseFlags[2] = 0;
1457  if ((elementFlags[0] == 1 || elementFlags[0] == 3) && ie == 0) {
1458  collapseFlags[0] += 1;
1459  }
1460  if ((elementFlags[0] == 2 || elementFlags[0] == 3) && ie == elementNodesPerDir[0] - 1) {
1461  collapseFlags[0] += 2;
1462  }
1463  if ((elementFlags[1] == 1 || elementFlags[1] == 3) && je == 0) {
1464  collapseFlags[1] += 1;
1465  }
1466  if ((elementFlags[1] == 2 || elementFlags[1] == 3) && je == elementNodesPerDir[1] - 1) {
1467  collapseFlags[1] += 2;
1468  }
1469  if ((elementFlags[2] == 1 || elementFlags[2] == 3) && ke == 0) {
1470  collapseFlags[2] += 1;
1471  }
1472  if ((elementFlags[2] == 2 || elementFlags[2] == 3) && ke == elementNodesPerDir[2] - 1) {
1473  collapseFlags[2] += 2;
1474  }
1475 
1476  // Based on (ie, je, ke) we detect the type of node we are dealing with
1477  GetNodeInfo(ie, je, ke, elementNodesPerDir, &iType, iInd, &orientation);
1478  for (LO dof0 = 0; dof0 < BlkSize; ++dof0) {
1479  // Compute the dof index for each dof at point (ie, je, ke)
1480  idof = ((elemInds[2] * coarseRate[2] + ke) * lFineNodesPerDir[1] * lFineNodesPerDir[0] + (elemInds[1] * coarseRate[1] + je) * lFineNodesPerDir[0] + elemInds[0] * coarseRate[0] + ie) * BlkSize + dof0;
1481  Aghost->getLocalRowView(idof, rowIndices, rowValues);
1482  FormatStencil(BlkSize, ghostInterface, ie, je, ke, rowValues,
1483  elementNodesPerDir, collapseFlags, stencilType, stencil);
1484  LO io, jo, ko;
1485  if (iType == 3) { // interior node, no stencil collapse needed
1486  for (LO interactingNode = 0; interactingNode < 27; ++interactingNode) {
1487  // Find (if, jf, kf) the indices associated with the interacting node
1488  ko = ke + (interactingNode / 9 - 1);
1489  {
1490  LO tmp = interactingNode % 9;
1491  jo = je + (tmp / 3 - 1);
1492  io = ie + (tmp % 3 - 1);
1493  int dummy;
1494  GetNodeInfo(io, jo, ko, elementNodesPerDir, &jType, jInd, &dummy);
1495  }
1496  if ((io > -1 && io < elementNodesPerDir[0]) &&
1497  (jo > -1 && jo < elementNodesPerDir[1]) &&
1498  (ko > -1 && ko < elementNodesPerDir[2])) {
1499  for (LO dof1 = 0; dof1 < BlkSize; ++dof1) {
1500  if (jType == 3) {
1501  Aii(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1502  stencil[interactingNode * BlkSize + dof1];
1503  } else if (jType == 2) {
1504  Aif(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1505  stencil[interactingNode * BlkSize + dof1];
1506  } else if (jType == 1) {
1507  Aie(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1508  stencil[interactingNode * BlkSize + dof1];
1509  } else if (jType == 0) {
1510  Aic(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1511  -stencil[interactingNode * BlkSize + dof1];
1512  }
1513  }
1514  }
1515  }
1516  } else if (iType == 2) { // Face node, collapse stencil along face normal (*orientation)
1517  CollapseStencil(2, orientation, collapseFlags, stencil);
1518  for (LO interactingNode = 0; interactingNode < 27; ++interactingNode) {
1519  // Find (if, jf, kf) the indices associated with the interacting node
1520  ko = ke + (interactingNode / 9 - 1);
1521  {
1522  LO tmp = interactingNode % 9;
1523  jo = je + (tmp / 3 - 1);
1524  io = ie + (tmp % 3 - 1);
1525  int dummy;
1526  GetNodeInfo(io, jo, ko, elementNodesPerDir, &jType, jInd, &dummy);
1527  }
1528  if ((io > -1 && io < elementNodesPerDir[0]) &&
1529  (jo > -1 && jo < elementNodesPerDir[1]) &&
1530  (ko > -1 && ko < elementNodesPerDir[2])) {
1531  for (LO dof1 = 0; dof1 < BlkSize; ++dof1) {
1532  if (jType == 2) {
1533  Aff(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1534  stencil[interactingNode * BlkSize + dof1];
1535  } else if (jType == 1) {
1536  Afe(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1537  stencil[interactingNode * BlkSize + dof1];
1538  } else if (jType == 0) {
1539  Afc(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1540  -stencil[interactingNode * BlkSize + dof1];
1541  }
1542  }
1543  }
1544  }
1545  } else if (iType == 1) { // Edge node, collapse stencil perpendicular to edge direction
1546  CollapseStencil(1, orientation, collapseFlags, stencil);
1547  for (LO interactingNode = 0; interactingNode < 27; ++interactingNode) {
1548  // Find (if, jf, kf) the indices associated with the interacting node
1549  ko = ke + (interactingNode / 9 - 1);
1550  {
1551  LO tmp = interactingNode % 9;
1552  jo = je + (tmp / 3 - 1);
1553  io = ie + (tmp % 3 - 1);
1554  int dummy;
1555  GetNodeInfo(io, jo, ko, elementNodesPerDir, &jType, jInd, &dummy);
1556  }
1557  if ((io > -1 && io < elementNodesPerDir[0]) &&
1558  (jo > -1 && jo < elementNodesPerDir[1]) &&
1559  (ko > -1 && ko < elementNodesPerDir[2])) {
1560  for (LO dof1 = 0; dof1 < BlkSize; ++dof1) {
1561  if (jType == 1) {
1562  Aee(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1563  stencil[interactingNode * BlkSize + dof1];
1564  } else if (jType == 0) {
1565  Aec(iInd * BlkSize + dof0, jInd * BlkSize + dof1) =
1566  -stencil[interactingNode * BlkSize + dof1];
1567  }
1568  } // Check if interacting node is in element or not
1569  } // Pc is the identity so no need to treat iType == 0
1570  } // Loop over interacting nodes within a row
1571  } // Check for current node type
1572  } // Loop over degrees of freedom associated to a given node
1573  } // Loop over i
1574  } // Loop over j
1575  } // Loop over k
1576 
1577  // Compute the projection operators for edge and interior nodes
1578  //
1579  // [P_i] = [A_{ii} & A_{if} & A_{ie}]^{-1} [A_{ic}]
1580  // [P_f] = [ 0 & A_{ff} & A_{fe}] [A_{fc}]
1581  // [P_e] = [ 0 & 0 & A_{ee}] [A_{ec}]
1582  // [P_c] = I_c
1583  //
1584  { // Compute P_e
1585  // We need to solve for P_e in: A_{ee}*P_e = A_{fc}
1586  // So we simple do P_e = A_{ee}^{-1)*A_{ec}
1588  problem.setMatrix(Teuchos::rcp(&Aee, false));
1589  problem.setVectors(Teuchos::rcp(&Pe, false), Teuchos::rcp(&Aec, false));
1590  problem.factorWithEquilibration(true);
1591  problem.solve();
1592  problem.unequilibrateLHS();
1593  }
1594 
1595  { // Compute P_f
1596  // We need to solve for P_f in: A_{ff}*P_f + A_{fe}*P_e = A_{fc}
1597  // Step one: A_{fc} = 1.0*A_{fc} + (-1.0)*A_{fe}*P_e
1598  Afc.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -1.0, Afe, Pe, 1.0);
1599  // Step two: P_f = A_{ff}^{-1}*A_{fc}
1601  problem.setMatrix(Teuchos::rcp(&Aff, false));
1602  problem.setVectors(Teuchos::rcp(&Pf, false), Teuchos::rcp(&Afc, false));
1603  problem.factorWithEquilibration(true);
1604  problem.solve();
1605  problem.unequilibrateLHS();
1606  }
1607 
1608  { // Compute P_i
1609  // We need to solve for P_i in: A_{ii}*P_i + A_{if}*P_f + A_{ie}*P_e = A_{ic}
1610  // Step one: A_{ic} = 1.0*A_{ic} + (-1.0)*A_{ie}*P_e
1611  Aic.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -1.0, Aie, Pe, 1.0);
1612  // Step one: A_{ic} = 1.0*A_{ic} + (-1.0)*A_{if}*P_f
1613  Aic.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -1.0, Aif, Pf, 1.0);
1614  // Step two: P_i = A_{ii}^{-1}*A_{ic}
1616  problem.setMatrix(Teuchos::rcp(&Aii, false));
1617  problem.setVectors(Teuchos::rcp(&Pi, false), Teuchos::rcp(&Aic, false));
1618  problem.factorWithEquilibration(true);
1619  problem.solve();
1620  problem.unequilibrateLHS();
1621  }
1622 
1623 } // ComputeLocalEntries()
1624 
1625 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1627  CollapseStencil(const int type, const int orientation, const int /* collapseFlags */[3],
1628  Array<SC>& stencil) const {
1629  if (type == 2) { // Face stencil collapse
1630  // Let's hope things vectorize well inside this if statement
1631  if (orientation == 0) {
1632  for (LO i = 0; i < 9; ++i) {
1633  stencil[3 * i + 1] = stencil[3 * i] + stencil[3 * i + 1] + stencil[3 * i + 2];
1634  stencil[3 * i] = 0;
1635  stencil[3 * i + 2] = 0;
1636  }
1637  } else if (orientation == 1) {
1638  for (LO i = 0; i < 3; ++i) {
1639  stencil[9 * i + 3] = stencil[9 * i + 0] + stencil[9 * i + 3] + stencil[9 * i + 6];
1640  stencil[9 * i + 0] = 0;
1641  stencil[9 * i + 6] = 0;
1642  stencil[9 * i + 4] = stencil[9 * i + 1] + stencil[9 * i + 4] + stencil[9 * i + 7];
1643  stencil[9 * i + 1] = 0;
1644  stencil[9 * i + 7] = 0;
1645  stencil[9 * i + 5] = stencil[9 * i + 2] + stencil[9 * i + 5] + stencil[9 * i + 8];
1646  stencil[9 * i + 2] = 0;
1647  stencil[9 * i + 8] = 0;
1648  }
1649  } else if (orientation == 2) {
1650  for (LO i = 0; i < 9; ++i) {
1651  stencil[i + 9] = stencil[i] + stencil[i + 9] + stencil[i + 18];
1652  stencil[i] = 0;
1653  stencil[i + 18] = 0;
1654  }
1655  }
1656  } else if (type == 1) { // Edge stencil collapse
1657  SC tmp1 = 0, tmp2 = 0, tmp3 = 0;
1658  if (orientation == 0) {
1659  for (LO i = 0; i < 9; ++i) {
1660  tmp1 += stencil[0 + 3 * i];
1661  stencil[0 + 3 * i] = 0;
1662  tmp2 += stencil[1 + 3 * i];
1663  stencil[1 + 3 * i] = 0;
1664  tmp3 += stencil[2 + 3 * i];
1665  stencil[2 + 3 * i] = 0;
1666  }
1667  stencil[12] = tmp1;
1668  stencil[13] = tmp2;
1669  stencil[14] = tmp3;
1670  } else if (orientation == 1) {
1671  for (LO i = 0; i < 3; ++i) {
1672  tmp1 += stencil[0 + i] + stencil[9 + i] + stencil[18 + i];
1673  stencil[0 + i] = 0;
1674  stencil[9 + i] = 0;
1675  stencil[18 + i] = 0;
1676  tmp2 += stencil[3 + i] + stencil[12 + i] + stencil[21 + i];
1677  stencil[3 + i] = 0;
1678  stencil[12 + i] = 0;
1679  stencil[21 + i] = 0;
1680  tmp3 += stencil[6 + i] + stencil[15 + i] + stencil[24 + i];
1681  stencil[6 + i] = 0;
1682  stencil[15 + i] = 0;
1683  stencil[24 + i] = 0;
1684  }
1685  stencil[10] = tmp1;
1686  stencil[13] = tmp2;
1687  stencil[16] = tmp3;
1688  } else if (orientation == 2) {
1689  for (LO i = 0; i < 9; ++i) {
1690  tmp1 += stencil[i];
1691  stencil[i] = 0;
1692  tmp2 += stencil[i + 9];
1693  stencil[i + 9] = 0;
1694  tmp3 += stencil[i + 18];
1695  stencil[i + 18] = 0;
1696  }
1697  stencil[4] = tmp1;
1698  stencil[13] = tmp2;
1699  stencil[22] = tmp3;
1700  }
1701  }
1702 } // CollapseStencil
1703 
1704 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1706  FormatStencil(const LO BlkSize, const Array<bool> /* ghostInterface */, const LO /* ie */, const LO /* je */,
1707  const LO /* ke */, const ArrayView<const SC> rowValues, const Array<LO> /* elementNodesPerDir */,
1708  const int collapseFlags[3], const std::string stencilType, Array<SC>& stencil)
1709  const {
1710  if (stencilType == "reduced") {
1711  Array<int> fullStencilInds(7);
1712  fullStencilInds[0] = 4;
1713  fullStencilInds[1] = 10;
1714  fullStencilInds[2] = 12;
1715  fullStencilInds[3] = 13;
1716  fullStencilInds[4] = 14;
1717  fullStencilInds[5] = 16;
1718  fullStencilInds[6] = 22;
1719 
1720  // Create a mask array to automate mesh boundary processing
1721  Array<int> mask(7);
1722  int stencilSize = rowValues.size();
1723  if (collapseFlags[0] + collapseFlags[1] + collapseFlags[2] > 0) {
1724  if (stencilSize == 1) {
1725  // The assumption is made that if only one non-zero exists in the row
1726  // then this represent a Dirichlet BC being enforced.
1727  // One might want to zero out the corresponding row in the prolongator.
1728  mask[0] = 1;
1729  mask[1] = 1;
1730  mask[2] = 1;
1731  mask[4] = 1;
1732  mask[5] = 1;
1733  mask[6] = 1;
1734  } else {
1735  // Here we are looking at Neumann like BC where only a flux is prescribed
1736  // and less non-zeros will be present in the corresponding row.
1737  if (collapseFlags[2] == 1 || collapseFlags[2] == 3) {
1738  mask[0] = 1;
1739  }
1740  if (collapseFlags[2] == 2 || collapseFlags[2] == 3) {
1741  mask[6] = 1;
1742  }
1743  if (collapseFlags[1] == 1 || collapseFlags[1] == 3) {
1744  mask[1] = 1;
1745  }
1746  if (collapseFlags[1] == 2 || collapseFlags[1] == 3) {
1747  mask[5] = 1;
1748  }
1749  if (collapseFlags[0] == 1 || collapseFlags[0] == 3) {
1750  mask[2] = 1;
1751  }
1752  if (collapseFlags[0] == 2 || collapseFlags[0] == 3) {
1753  mask[4] = 1;
1754  }
1755  }
1756  }
1757 
1758  int offset = 0;
1759  for (int ind = 0; ind < 7; ++ind) {
1760  if (mask[ind] == 1) {
1761  for (int dof = 0; dof < BlkSize; ++dof) {
1762  stencil[BlkSize * fullStencilInds[ind] + dof] = 0.0;
1763  }
1764  ++offset; // The offset is used to stay within rowValues bounds
1765  } else {
1766  for (int dof = 0; dof < BlkSize; ++dof) {
1767  stencil[BlkSize * fullStencilInds[ind] + dof] = rowValues[BlkSize * (ind - offset) + dof];
1768  }
1769  }
1770  }
1771  } else if (stencilType == "full") {
1772  // Create a mask array to automate mesh boundary processing
1773  Array<int> mask(27);
1774  if (collapseFlags[2] == 1 || collapseFlags[2] == 3) {
1775  for (int ind = 0; ind < 9; ++ind) {
1776  mask[ind] = 1;
1777  }
1778  }
1779  if (collapseFlags[2] == 2 || collapseFlags[2] == 3) {
1780  for (int ind = 0; ind < 9; ++ind) {
1781  mask[18 + ind] = 1;
1782  }
1783  }
1784  if (collapseFlags[1] == 1 || collapseFlags[1] == 3) {
1785  for (int ind1 = 0; ind1 < 3; ++ind1) {
1786  for (int ind2 = 0; ind2 < 3; ++ind2) {
1787  mask[ind1 * 9 + ind2] = 1;
1788  }
1789  }
1790  }
1791  if (collapseFlags[1] == 2 || collapseFlags[1] == 3) {
1792  for (int ind1 = 0; ind1 < 3; ++ind1) {
1793  for (int ind2 = 0; ind2 < 3; ++ind2) {
1794  mask[ind1 * 9 + ind2 + 6] = 1;
1795  }
1796  }
1797  }
1798  if (collapseFlags[0] == 1 || collapseFlags[0] == 3) {
1799  for (int ind = 0; ind < 9; ++ind) {
1800  mask[3 * ind] = 1;
1801  }
1802  }
1803  if (collapseFlags[0] == 2 || collapseFlags[0] == 3) {
1804  for (int ind = 0; ind < 9; ++ind) {
1805  mask[3 * ind + 2] = 1;
1806  }
1807  }
1808 
1809  // Now the stencil is extracted and formated
1810  int offset = 0;
1811  for (LO ind = 0; ind < 27; ++ind) {
1812  if (mask[ind] == 0) {
1813  for (int dof = 0; dof < BlkSize; ++dof) {
1814  stencil[BlkSize * ind + dof] = 0.0;
1815  }
1816  ++offset; // The offset is used to stay within rowValues bounds
1817  } else {
1818  for (int dof = 0; dof < BlkSize; ++dof) {
1819  stencil[BlkSize * ind + dof] = rowValues[BlkSize * (ind - offset) + dof];
1820  }
1821  }
1822  }
1823  } // stencilTpye
1824 } // FormatStencil()
1825 
1826 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1828  const LO ie, const LO je, const LO ke,
1829  const Array<LO> elementNodesPerDir,
1830  int* type, LO& ind, int* orientation) const {
1831  // Initialize flags with -1 to be able to catch issues easily
1832  *type = -1, ind = 0, *orientation = -1;
1833  if ((ke == 0 || ke == elementNodesPerDir[2] - 1) && (je == 0 || je == elementNodesPerDir[1] - 1) && (ie == 0 || ie == elementNodesPerDir[0] - 1)) {
1834  // Corner node
1835  *type = 0;
1836  ind = 4 * ke / (elementNodesPerDir[2] - 1) + 2 * je / (elementNodesPerDir[1] - 1) + ie / (elementNodesPerDir[0] - 1);
1837  } else if (((ke == 0 || ke == elementNodesPerDir[2] - 1) && (je == 0 || je == elementNodesPerDir[1] - 1)) || ((ke == 0 || ke == elementNodesPerDir[2] - 1) && (ie == 0 || ie == elementNodesPerDir[0] - 1)) || ((je == 0 || je == elementNodesPerDir[1] - 1) && (ie == 0 || ie == elementNodesPerDir[0] - 1))) {
1838  // Edge node
1839  *type = 1;
1840  if (ke > 0) {
1841  ind += 2 * (elementNodesPerDir[0] - 2 + elementNodesPerDir[1] - 2);
1842  }
1843  if (ke == elementNodesPerDir[2] - 1) {
1844  ind += 4 * (elementNodesPerDir[2] - 2);
1845  }
1846  if ((ke == 0) || (ke == elementNodesPerDir[2] - 1)) { // je or ie edges
1847  if (je == 0) { // jlo ie edge
1848  *orientation = 0;
1849  ind += ie - 1;
1850  } else if (je == elementNodesPerDir[1] - 1) { // jhi ie edge
1851  *orientation = 0;
1852  ind += 2 * (elementNodesPerDir[1] - 2) + elementNodesPerDir[0] - 2 + ie - 1;
1853  } else { // ilo or ihi je edge
1854  *orientation = 1;
1855  ind += elementNodesPerDir[0] - 2 + 2 * (je - 1) + ie / (elementNodesPerDir[0] - 1);
1856  }
1857  } else { // ke edges
1858  *orientation = 2;
1859  ind += 4 * (ke - 1) + 2 * (je / (elementNodesPerDir[1] - 1)) + ie / (elementNodesPerDir[0] - 1);
1860  }
1861  } else if ((ke == 0 || ke == elementNodesPerDir[2] - 1) || (je == 0 || je == elementNodesPerDir[1] - 1) || (ie == 0 || ie == elementNodesPerDir[0] - 1)) {
1862  // Face node
1863  *type = 2;
1864  if (ke == 0) { // current node is on "bottom" face
1865  *orientation = 2;
1866  ind = (je - 1) * (elementNodesPerDir[0] - 2) + ie - 1;
1867  } else {
1868  // add nodes from "bottom" face
1869  ind += (elementNodesPerDir[1] - 2) * (elementNodesPerDir[0] - 2);
1870  // add nodes from side faces
1871  ind += 2 * (ke - 1) * (elementNodesPerDir[1] - 2 + elementNodesPerDir[0] - 2);
1872  if (ke == elementNodesPerDir[2] - 1) { // current node is on "top" face
1873  *orientation = 2;
1874  ind += (je - 1) * (elementNodesPerDir[0] - 2) + ie - 1;
1875  } else { // current node is on a side face
1876  if (je == 0) {
1877  *orientation = 1;
1878  ind += ie - 1;
1879  } else if (je == elementNodesPerDir[1] - 1) {
1880  *orientation = 1;
1881  ind += 2 * (elementNodesPerDir[1] - 2) + elementNodesPerDir[0] - 2 + ie - 1;
1882  } else {
1883  *orientation = 0;
1884  ind += elementNodesPerDir[0] - 2 + 2 * (je - 1) + ie / (elementNodesPerDir[0] - 1);
1885  }
1886  }
1887  }
1888  } else {
1889  // Interior node
1890  *type = 3;
1891  ind = (ke - 1) * (elementNodesPerDir[1] - 2) * (elementNodesPerDir[0] - 2) + (je - 1) * (elementNodesPerDir[0] - 2) + ie - 1;
1892  }
1893 } // GetNodeInfo()
1894 
1895 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1897  const typename Teuchos::Array<LocalOrdinal>::iterator& first1,
1898  const typename Teuchos::Array<LocalOrdinal>::iterator& last1,
1899  const typename Teuchos::Array<LocalOrdinal>::iterator& first2,
1900  const typename Teuchos::Array<LocalOrdinal>::iterator& /* last2 */) const {
1901  typedef typename std::iterator_traits<typename Teuchos::Array<LocalOrdinal>::iterator>::difference_type DT;
1902  DT n = last1 - first1;
1903  DT m = n / 2;
1905  while (m > z) {
1906  DT max = n - m;
1907  for (DT j = 0; j < max; j++) {
1908  for (DT k = j; k >= 0; k -= m) {
1909  if (first1[first2[k + m]] >= first1[first2[k]])
1910  break;
1911  std::swap(first2[k + m], first2[k]);
1912  }
1913  }
1914  m = m / 2;
1915  }
1916 }
1917 
1918 } // namespace MueLu
1919 
1920 #define MUELU_BLACKBOXPFACTORY_SHORT
1921 #endif // MUELU_BLACKBOXPFACTORY_DEF_HPP
void GetGeometricData(RCP< Xpetra::MultiVector< typename Teuchos::ScalarTraits< Scalar >::magnitudeType, LO, GO, NO > > &coordinates, const Array< LO > coarseRate, const Array< GO > gFineNodesPerDir, const Array< LO > lFineNodesPerDir, const LO BlkSize, Array< GO > &gIndices, Array< LO > &myOffset, Array< bool > &ghostInterface, Array< LO > &endRate, Array< GO > &gCoarseNodesPerDir, Array< LO > &lCoarseNodesPerDir, Array< LO > &glCoarseNodesPerDir, Array< GO > &ghostGIDs, Array< GO > &coarseNodesGIDs, Array< GO > &colGIDs, GO &gNumCoarseNodes, LO &lNumCoarseNodes, ArrayRCP< Array< typename Teuchos::ScalarTraits< Scalar >::magnitudeType > > coarseNodes, Array< int > &boundaryFlags, RCP< NodesIDs > ghostedCoarseNodes) const
GlobalOrdinal GO
T & get(const std::string &name, T def_value)
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
ArrayView< T > view(size_type offset, size_type size)
int multiply(ETransp transa, ETransp transb, ScalarType alpha, const SerialDenseMatrix< OrdinalType, ScalarType > &A, const SerialDenseMatrix< OrdinalType, ScalarType > &B, ScalarType beta)
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
void GetNodeInfo(const LO ie, const LO je, const LO ke, const Array< LO > elementNodesPerDir, int *type, LO &ind, int *orientation) const
void ComputeLocalEntries(const RCP< const Matrix > &Aghost, const Array< LO > coarseRate, const Array< LO > endRate, const LO BlkSize, const Array< LO > elemInds, const Array< LO > lCoarseElementsPerDir, const LO numDimensions, const Array< LO > lFineNodesPerDir, const Array< GO > gFineNodesPerDir, const Array< GO > gIndices, const Array< LO > lCoarseNodesPerDir, const Array< bool > ghostInterface, const Array< int > elementFlags, const std::string stencilType, const std::string blockStrategy, const Array< LO > elementNodesPerDir, const LO numNodesInElement, const Array< GO > colGIDs, Teuchos::SerialDenseMatrix< LO, SC > &Pi, Teuchos::SerialDenseMatrix< LO, SC > &Pf, Teuchos::SerialDenseMatrix< LO, SC > &Pe, Array< LO > &dofType, Array< LO > &lDofInd) const
size_type size() const
LocalOrdinal LO
One-liner description of what is happening.
static const NoFactory * get()
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
static Teuchos::RCP< Map< LocalOrdinal, GlobalOrdinal, Node > > Build(UnderlyingLib lib, global_size_t numGlobalElements, GlobalOrdinal indexBase, const Teuchos::RCP< const Teuchos::Comm< int >> &comm, LocalGlobal lg=Xpetra::GloballyDistributed)
void factorWithEquilibration(bool flag)
void FormatStencil(const LO BlkSize, const Array< bool > ghostInterface, const LO ie, const LO je, const LO ke, const ArrayView< const SC > rowValues, const Array< LO > elementNodesPerDir, const int collapseFlags[3], const std::string stencilType, Array< SC > &stencil) const
void resize(size_type new_size, const value_type &x=value_type())
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
iterator end()
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void CollapseStencil(const int type, const int orientation, const int collapseFlags[3], Array< SC > &stencil) const
static void sortCrsEntries(const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< LocalOrdinal > &CRS_colind, const Teuchos::ArrayView< Scalar > &CRS_vals, const UnderlyingLib lib)
int setVectors(const RCP< SerialDenseMatrix< OrdinalType, ScalarType > > &X, const RCP< SerialDenseMatrix< OrdinalType, ScalarType > > &B)
size_type size() const
Scalar SC
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
Node NO
static RCP< Xpetra::StridedMap< LocalOrdinal, GlobalOrdinal, Node > > Build(UnderlyingLib lib, global_size_t numGlobalElements, GlobalOrdinal indexBase, std::vector< size_t > &stridingInfo, const Teuchos::RCP< const Teuchos::Comm< int >> &comm, LocalOrdinal stridedBlockId=-1, GlobalOrdinal offset=0, LocalGlobal lg=Xpetra::GloballyDistributed)
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:76
static RCP< Matrix > Build(const RCP< const Map > &rowMap, size_t maxNumEntriesPerRow, Xpetra::ProfileType pftype=Xpetra::DynamicProfile)
Exception throws to report errors in the internal logical of the program.
int shape(OrdinalType numRows, OrdinalType numCols)
std::vector< T >::iterator iterator
iterator begin()
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
static RCP< Import< LocalOrdinal, GlobalOrdinal, Node > > Build(const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &source, const RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &target, const Teuchos::RCP< Teuchos::ParameterList > &plist=Teuchos::null)
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need&#39;s value has been saved.
int setMatrix(const RCP< SerialDenseMatrix< OrdinalType, ScalarType > > &A)
void sh_sort_permute(const typename Teuchos::Array< LocalOrdinal >::iterator &first1, const typename Teuchos::Array< LocalOrdinal >::iterator &last1, const typename Teuchos::Array< LocalOrdinal >::iterator &first2, const typename Teuchos::Array< LocalOrdinal >::iterator &last2) const