MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_RegionRFactory_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_REGIONRFACTORY_KOKKOS_DEF_HPP
11 #define MUELU_REGIONRFACTORY_KOKKOS_DEF_HPP
12 
13 #include "Kokkos_UnorderedMap.hpp"
14 
16 
17 #include <Xpetra_Matrix.hpp>
19 
20 #include "MueLu_Types.hpp"
21 
22 namespace MueLu {
23 
24 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
27  RCP<ParameterList> validParamList = rcp(new ParameterList());
28 
29  validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null,
30  "Generating factory of the matrix A");
31  validParamList->set<RCP<const FactoryBase> >("numDimensions", Teuchos::null,
32  "Number of spatial dimensions in the problem.");
33  validParamList->set<RCP<const FactoryBase> >("lNodesPerDim", Teuchos::null,
34  "Number of local nodes per spatial dimension on the fine grid.");
35  validParamList->set<RCP<const FactoryBase> >("Nullspace", Teuchos::null,
36  "Fine level nullspace used to construct the coarse level nullspace.");
37  validParamList->set<RCP<const FactoryBase> >("Coordinates", Teuchos::null,
38  "Fine level coordinates used to construct piece-wise linear prolongator and coarse level coordinates.");
39  validParamList->set<bool>("keep coarse coords", false, "Flag to keep coordinates for special coarse grid solve");
40 
41  return validParamList;
42 } // GetValidParameterList()
43 
44 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
46  DeclareInput(Level& fineLevel, Level& /* coarseLevel */) const {
47  Input(fineLevel, "A");
48  Input(fineLevel, "numDimensions");
49  Input(fineLevel, "lNodesPerDim");
50  Input(fineLevel, "Nullspace");
51  Input(fineLevel, "Coordinates");
52 
53 } // DeclareInput()
54 
55 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
57  Build(Level& fineLevel, Level& coarseLevel) const {
58  // Set debug outputs based on environment variable
60  if (const char* dbg = std::getenv("MUELU_REGIONRFACTORY_DEBUG")) {
61  out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
62  out->setShowAllFrontMatter(false).setShowProcRank(true);
63  } else {
64  out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
65  }
66 
67  *out << "Starting RegionRFactory_kokkos::Build." << std::endl;
68 
69  // First get the inputs from the fineLevel
70  const int numDimensions = Get<int>(fineLevel, "numDimensions");
71  Array<LO> lFineNodesPerDim(3, Teuchos::OrdinalTraits<LO>::one());
72  {
73  Array<LO> lNodesPerDim = Get<Array<LO> >(fineLevel, "lNodesPerDim");
74  for (int dim = 0; dim < numDimensions; ++dim) {
75  lFineNodesPerDim[dim] = lNodesPerDim[dim];
76  }
77  }
78  *out << "numDimensions " << numDimensions << " and lFineNodesPerDim: " << lFineNodesPerDim
79  << std::endl;
80 
81  // Let us check that the inputs verify our assumptions
82  if (numDimensions < 1 || numDimensions > 3) {
83  throw std::runtime_error("numDimensions must be 1, 2 or 3!");
84  }
85  for (int dim = 0; dim < numDimensions; ++dim) {
86  if (lFineNodesPerDim[dim] % 3 != 1) {
87  throw std::runtime_error("The number of fine node in each direction need to be 3n+1");
88  }
89  }
90  Array<LO> lCoarseNodesPerDim(3, Teuchos::OrdinalTraits<LO>::one());
91 
92  const RCP<Matrix> A = Get<RCP<Matrix> >(fineLevel, "A");
93 
94  RCP<realvaluedmultivector_type> fineCoordinates, coarseCoordinates;
95  fineCoordinates = Get<RCP<realvaluedmultivector_type> >(fineLevel, "Coordinates");
96  if (static_cast<int>(fineCoordinates->getNumVectors()) != numDimensions) {
97  throw std::runtime_error("The number of vectors in the coordinates is not equal to numDimensions!");
98  }
99 
100  // Let us create R and pass it down to the
101  // appropriate specialization and see what we
102  // get back!
103  RCP<Matrix> R;
104 
105  if (numDimensions == 1) {
106  throw std::runtime_error("RegionRFactory_kokkos no implemented for 1D case yet.");
107  } else if (numDimensions == 2) {
108  throw std::runtime_error("RegionRFactory_kokkos no implemented for 2D case yet.");
109  } else if (numDimensions == 3) {
110  Build3D(numDimensions, lFineNodesPerDim, A, fineCoordinates,
111  R, coarseCoordinates, lCoarseNodesPerDim);
112  }
113 
114  const Teuchos::ParameterList& pL = GetParameterList();
115 
116  // Reuse pattern if available (multiple solve)
117  RCP<ParameterList> Tparams;
118  if (pL.isSublist("matrixmatrix: kernel params"))
119  Tparams = rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
120  else
121  Tparams = rcp(new ParameterList);
122 
123  // R->describe(*out, Teuchos::VERB_EXTREME);
124  *out << "Compute P=R^t" << std::endl;
125  // By default, we don't need global constants for transpose
126  Tparams->set("compute global constants: temporaries", Tparams->get("compute global constants: temporaries", false));
127  Tparams->set("compute global constants", Tparams->get("compute global constants", false));
128  std::string label = "MueLu::RegionR-transR" + Teuchos::toString(coarseLevel.GetLevelID());
129  RCP<Matrix> P = Utilities::Transpose(*R, true, label, Tparams);
130 
131  *out << "Compute coarse nullspace" << std::endl;
132  RCP<MultiVector> fineNullspace = Get<RCP<MultiVector> >(fineLevel, "Nullspace");
133  RCP<MultiVector> coarseNullspace = MultiVectorFactory::Build(R->getRowMap(),
134  fineNullspace->getNumVectors());
135  R->apply(*fineNullspace, *coarseNullspace, Teuchos::NO_TRANS, Teuchos::ScalarTraits<SC>::one(),
137 
138  *out << "Set data on coarse level" << std::endl;
139  Set(coarseLevel, "numDimensions", numDimensions);
140  Set(coarseLevel, "lNodesPerDim", lCoarseNodesPerDim);
141  Set(coarseLevel, "Nullspace", coarseNullspace);
142  Set(coarseLevel, "Coordinates", coarseCoordinates);
143  if (pL.get<bool>("keep coarse coords")) {
144  coarseLevel.Set<RCP<realvaluedmultivector_type> >("Coordinates2", coarseCoordinates, NoFactory::get());
145  }
146 
147  R->SetFixedBlockSize(A->GetFixedBlockSize());
148  P->SetFixedBlockSize(A->GetFixedBlockSize());
149 
150  Set(coarseLevel, "R", R);
151  Set(coarseLevel, "P", P);
152 
153 } // Build()
154 
155 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
157  Build3D(const int numDimensions,
158  Teuchos::Array<LocalOrdinal>& lFineNodesPerDim,
159  const RCP<Matrix>& A,
161  RCP<Matrix>& R,
163  Teuchos::Array<LocalOrdinal>& lCoarseNodesPerDim) const {
164  using local_matrix_type = typename CrsMatrix::local_matrix_type;
165  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
166  using row_map_type = typename local_matrix_type::row_map_type::non_const_type;
167  using entries_type = typename local_matrix_type::index_type::non_const_type;
168  using values_type = typename local_matrix_type::values_type::non_const_type;
169  using impl_scalar_type = typename Kokkos::ArithTraits<Scalar>::val_type;
170 
171  // Set debug outputs based on environment variable
173  if (const char* dbg = std::getenv("MUELU_REGIONRFACTORY_DEBUG")) {
174  out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
175  out->setShowAllFrontMatter(false).setShowProcRank(true);
176  } else {
177  out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
178  }
179 
180  // Now compute number of coarse grid points
181  for (int dim = 0; dim < numDimensions; ++dim) {
182  lCoarseNodesPerDim[dim] = lFineNodesPerDim[dim] / 3 + 1;
183  }
184  *out << "lCoarseNodesPerDim " << lCoarseNodesPerDim << std::endl;
185 
186  // Grab the block size here and multiply all existing offsets by it
187  const LO blkSize = A->GetFixedBlockSize();
188  *out << "blkSize " << blkSize << std::endl;
189 
190  // Based on lCoarseNodesPerDim and lFineNodesPerDim
191  // we can compute numRows, numCols and NNZ for R
192  const LO numRows = blkSize * lCoarseNodesPerDim[0] * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[2];
193  const LO numCols = blkSize * lFineNodesPerDim[0] * lFineNodesPerDim[1] * lFineNodesPerDim[2];
194 
195  // Create the coarse coordinates multivector
196  // so we can fill it on the fly while computing
197  // the restriction operator
198  RCP<Map> rowMap = MapFactory::Build(A->getRowMap()->lib(),
200  numRows,
201  A->getRowMap()->getIndexBase(),
202  A->getRowMap()->getComm());
203 
204  RCP<Map> coordRowMap = MapFactory::Build(A->getRowMap()->lib(),
206  lCoarseNodesPerDim[0] * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[2],
207  A->getRowMap()->getIndexBase(),
208  A->getRowMap()->getComm());
209 
210  coarseCoordinates = Xpetra::MultiVectorFactory<real_type, LO, GO, NO>::Build(coordRowMap,
211  numDimensions);
212 
213  // Get device views of coordinates
214  auto fineCoordsView = fineCoordinates->getDeviceLocalView(Xpetra::Access::ReadOnly);
215  auto coarseCoordsView = coarseCoordinates->getDeviceLocalView(Xpetra::Access::OverwriteAll);
216 
217  Array<ArrayRCP<const real_type> > fineCoordData(numDimensions);
218  Array<ArrayRCP<real_type> > coarseCoordData(numDimensions);
219  for (int dim = 0; dim < numDimensions; ++dim) {
220  fineCoordData[dim] = fineCoordinates->getData(dim);
221  coarseCoordData[dim] = coarseCoordinates->getDataNonConst(dim);
222  }
223 
224  // Let us set some parameter that will be useful
225  // while constructing R
226 
227  // Length of interpolation stencils based on geometry
228  const LO cornerStencilLength = 27;
229  const LO edgeStencilLength = 45;
230  const LO faceStencilLength = 75;
231  const LO interiorStencilLength = 125;
232 
233  // Number of corner, edge, face and interior nodes
234  const LO numCorners = 8;
235  const LO numEdges = 4 * (lCoarseNodesPerDim[0] - 2) + 4 * (lCoarseNodesPerDim[1] - 2) + 4 * (lCoarseNodesPerDim[2] - 2);
236  const LO numFaces = 2 * (lCoarseNodesPerDim[0] - 2) * (lCoarseNodesPerDim[1] - 2) + 2 * (lCoarseNodesPerDim[0] - 2) * (lCoarseNodesPerDim[2] - 2) + 2 * (lCoarseNodesPerDim[1] - 2) * (lCoarseNodesPerDim[2] - 2);
237  const LO numInteriors = (lCoarseNodesPerDim[0] - 2) * (lCoarseNodesPerDim[1] - 2) * (lCoarseNodesPerDim[2] - 2);
238 
239  const LO nnz = (numCorners * cornerStencilLength + numEdges * edgeStencilLength + numFaces * faceStencilLength + numInteriors * interiorStencilLength) * blkSize;
240 
241  // Having the number of rows and columns we can genrate
242  // the appropriate maps for our operator.
243 
244  *out << "R statistics:" << std::endl
245  << " -numRows= " << numRows << std::endl
246  << " -numCols= " << numCols << std::endl
247  << " -nnz= " << nnz << std::endl;
248 
249  row_map_type row_map(Kokkos::ViewAllocateWithoutInitializing("row_map"), numRows + 1);
250  typename row_map_type::HostMirror row_map_h = Kokkos::create_mirror_view(row_map);
251 
252  entries_type entries(Kokkos::ViewAllocateWithoutInitializing("entries"), nnz);
253  typename entries_type::HostMirror entries_h = Kokkos::create_mirror_view(entries);
254 
255  values_type values(Kokkos::ViewAllocateWithoutInitializing("values"), nnz);
256  typename values_type::HostMirror values_h = Kokkos::create_mirror_view(values);
257 
258  // Compute the basic interpolation
259  // coefficients for 1D rate of 3
260  // coarsening.
261  Array<SC> coeffs({1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0});
262  row_map_h(0) = 0;
263 
264  // Define some offsets that
265  // will be needed often later on
266  const LO edgeLineOffset = 2 * cornerStencilLength + (lCoarseNodesPerDim[0] - 2) * edgeStencilLength;
267  const LO faceLineOffset = 2 * edgeStencilLength + (lCoarseNodesPerDim[0] - 2) * faceStencilLength;
268  const LO interiorLineOffset = 2 * faceStencilLength + (lCoarseNodesPerDim[0] - 2) * interiorStencilLength;
269 
270  const LO facePlaneOffset = 2 * edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset;
271  const LO interiorPlaneOffset = 2 * faceLineOffset + (lCoarseNodesPerDim[1] - 2) * interiorLineOffset;
272 
273  // Let us take care of the corners
274  // first since we always have
275  // corners to deal with!
276  {
277  // Corner 1
278  LO coordRowIdx = 0, rowIdx = 0, coordColumnOffset = 0, columnOffset = 0, entryOffset = 0;
279  for (LO l = 0; l < blkSize; ++l) {
280  for (LO k = 0; k < 3; ++k) {
281  for (LO j = 0; j < 3; ++j) {
282  for (LO i = 0; i < 3; ++i) {
283  entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i) * blkSize + l;
284  values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j + 2] * coeffs[i + 2];
285  }
286  }
287  }
288  }
289  for (LO l = 0; l < blkSize; ++l) {
290  row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
291  }
292  for (int dim = 0; dim < numDimensions; ++dim) {
293  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
294  }
295 
296  // Corner 5
297  coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
298  rowIdx = coordRowIdx * blkSize;
299  coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
300  columnOffset = coordColumnOffset * blkSize;
301  entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
302  for (LO l = 0; l < blkSize; ++l) {
303  for (LO k = 0; k < 3; ++k) {
304  for (LO j = 0; j < 3; ++j) {
305  for (LO i = 0; i < 3; ++i) {
306  entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i) * blkSize + l;
307  values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i + 2];
308  }
309  }
310  }
311  }
312  for (LO l = 0; l < blkSize; ++l) {
313  row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
314  }
315  for (int dim = 0; dim < numDimensions; ++dim) {
316  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
317  }
318 
319  // Corner 2
320  coordRowIdx = (lCoarseNodesPerDim[0] - 1);
321  rowIdx = coordRowIdx * blkSize;
322  coordColumnOffset = (lFineNodesPerDim[0] - 1);
323  columnOffset = coordColumnOffset * blkSize;
324  entryOffset = (cornerStencilLength + (lCoarseNodesPerDim[0] - 2) * edgeStencilLength) * blkSize;
325  for (LO l = 0; l < blkSize; ++l) {
326  for (LO k = 0; k < 3; ++k) {
327  for (LO j = 0; j < 3; ++j) {
328  for (LO i = 0; i < 3; ++i) {
329  entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + (i - 2)) * blkSize + l;
330  values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j + 2] * coeffs[i];
331  }
332  }
333  }
334  }
335  for (LO l = 0; l < blkSize; ++l) {
336  row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
337  }
338  for (int dim = 0; dim < numDimensions; ++dim) {
339  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
340  }
341 
342  // Corner 6
343  coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
344  rowIdx = coordRowIdx * blkSize;
345  coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
346  columnOffset = coordColumnOffset * blkSize;
347  entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
348  for (LO l = 0; l < blkSize; ++l) {
349  for (LO k = 0; k < 3; ++k) {
350  for (LO j = 0; j < 3; ++j) {
351  for (LO i = 0; i < 3; ++i) {
352  entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
353  values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i];
354  }
355  }
356  }
357  }
358  for (LO l = 0; l < blkSize; ++l) {
359  row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
360  }
361  for (int dim = 0; dim < numDimensions; ++dim) {
362  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
363  }
364 
365  // Corner 3
366  coordRowIdx = (lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0];
367  rowIdx = coordRowIdx * blkSize;
368  coordColumnOffset = (lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0];
369  columnOffset = coordColumnOffset * blkSize;
370  entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset) * blkSize;
371  for (LO l = 0; l < blkSize; ++l) {
372  for (LO k = 0; k < 3; ++k) {
373  for (LO j = 0; j < 3; ++j) {
374  for (LO i = 0; i < 3; ++i) {
375  entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
376  values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i + 2];
377  }
378  }
379  }
380  }
381  for (LO l = 0; l < blkSize; ++l) {
382  row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
383  }
384  for (int dim = 0; dim < numDimensions; ++dim) {
385  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
386  }
387 
388  // Corner 7
389  coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
390  rowIdx = coordRowIdx * blkSize;
391  coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
392  columnOffset = coordColumnOffset * blkSize;
393  entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
394  for (LO l = 0; l < blkSize; ++l) {
395  for (LO k = 0; k < 3; ++k) {
396  for (LO j = 0; j < 3; ++j) {
397  for (LO i = 0; i < 3; ++i) {
398  entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
399  values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i + 2];
400  }
401  }
402  }
403  }
404  for (LO l = 0; l < blkSize; ++l) {
405  row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
406  }
407  for (int dim = 0; dim < numDimensions; ++dim) {
408  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
409  }
410 
411  // Corner 4
412  coordRowIdx = (lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] - 1);
413  rowIdx = coordRowIdx * blkSize;
414  coordColumnOffset = (lFineNodesPerDim[1] * lFineNodesPerDim[0] - 1);
415  columnOffset = coordColumnOffset * blkSize;
416  entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset +
417  cornerStencilLength + (lCoarseNodesPerDim[0] - 2) * edgeStencilLength) *
418  blkSize;
419  for (LO l = 0; l < blkSize; ++l) {
420  for (LO k = 0; k < 3; ++k) {
421  for (LO j = 0; j < 3; ++j) {
422  for (LO i = 0; i < 3; ++i) {
423  entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + (i - 2)) * blkSize + l;
424  values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i];
425  }
426  }
427  }
428  }
429  for (LO l = 0; l < blkSize; ++l) {
430  row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
431  }
432  for (int dim = 0; dim < numDimensions; ++dim) {
433  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
434  }
435 
436  // Corner 8
437  coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
438  rowIdx = coordRowIdx * blkSize;
439  coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
440  columnOffset = coordColumnOffset * blkSize;
441  entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
442  for (LO l = 0; l < blkSize; ++l) {
443  for (LO k = 0; k < 3; ++k) {
444  for (LO j = 0; j < 3; ++j) {
445  for (LO i = 0; i < 3; ++i) {
446  entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + (i - 2)) * blkSize + l;
447  values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
448  }
449  }
450  }
451  }
452  for (LO l = 0; l < blkSize; ++l) {
453  row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
454  }
455  for (int dim = 0; dim < numDimensions; ++dim) {
456  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
457  }
458  } // Corners are done!
459 
460  // Edges along 0 direction
461  if (lCoarseNodesPerDim[0] - 2 > 0) {
462  LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
463  for (LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[0] - 2; ++edgeIdx) {
464  // Edge 0
465  coordRowIdx = (edgeIdx + 1);
466  rowIdx = coordRowIdx * blkSize;
467  coordColumnOffset = (edgeIdx + 1) * 3;
468  columnOffset = coordColumnOffset * blkSize;
469  entryOffset = (cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
470  for (LO l = 0; l < blkSize; ++l) {
471  for (LO k = 0; k < 3; ++k) {
472  for (LO j = 0; j < 3; ++j) {
473  for (LO i = 0; i < 5; ++i) {
474  entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
475  values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j + 2] * coeffs[i];
476  }
477  }
478  }
479  }
480  for (LO l = 0; l < blkSize; ++l) {
481  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
482  }
483  for (int dim = 0; dim < numDimensions; ++dim) {
484  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
485  }
486 
487  // Edge 1
488  coordRowIdx = ((lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0] + edgeIdx + 1);
489  rowIdx = coordRowIdx * blkSize;
490  coordColumnOffset = ((lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0] + (edgeIdx + 1) * 3);
491  columnOffset = coordColumnOffset * blkSize;
492  entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset + cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
493  for (LO l = 0; l < blkSize; ++l) {
494  for (LO k = 0; k < 3; ++k) {
495  for (LO j = 0; j < 3; ++j) {
496  for (LO i = 0; i < 5; ++i) {
497  entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
498  values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i];
499  }
500  }
501  }
502  }
503  for (LO l = 0; l < blkSize; ++l) {
504  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
505  }
506  for (int dim = 0; dim < numDimensions; ++dim) {
507  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
508  }
509 
510  // Edge 2
511  coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + edgeIdx + 1);
512  rowIdx = coordRowIdx * blkSize;
513  coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (edgeIdx + 1) * 3);
514  columnOffset = coordColumnOffset * blkSize;
515  entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
516  for (LO l = 0; l < blkSize; ++l) {
517  for (LO k = 0; k < 3; ++k) {
518  for (LO j = 0; j < 3; ++j) {
519  for (LO i = 0; i < 5; ++i) {
520  entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
521  values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i];
522  }
523  }
524  }
525  }
526  for (LO l = 0; l < blkSize; ++l) {
527  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
528  }
529  for (int dim = 0; dim < numDimensions; ++dim) {
530  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
531  }
532 
533  // Edge 3
534  coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0] + edgeIdx + 1);
535  rowIdx = coordRowIdx * blkSize;
536  coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0] + (edgeIdx + 1) * 3);
537  columnOffset = coordColumnOffset * blkSize;
538  entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset + cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
539  for (LO l = 0; l < blkSize; ++l) {
540  for (LO k = 0; k < 3; ++k) {
541  for (LO j = 0; j < 3; ++j) {
542  for (LO i = 0; i < 5; ++i) {
543  entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
544  values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
545  }
546  }
547  }
548  }
549  for (LO l = 0; l < blkSize; ++l) {
550  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
551  }
552  for (int dim = 0; dim < numDimensions; ++dim) {
553  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
554  }
555  }
556  }
557 
558  // Edges along 1 direction
559  if (lCoarseNodesPerDim[1] - 2 > 0) {
560  LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
561  for (LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[1] - 2; ++edgeIdx) {
562  // Edge 0
563  coordRowIdx = (edgeIdx + 1) * lCoarseNodesPerDim[0];
564  rowIdx = coordRowIdx * blkSize;
565  coordColumnOffset = (edgeIdx + 1) * 3 * lFineNodesPerDim[0];
566  columnOffset = coordColumnOffset * blkSize;
567  entryOffset = (edgeLineOffset + edgeIdx * faceLineOffset) * blkSize;
568  for (LO l = 0; l < blkSize; ++l) {
569  for (LO k = 0; k < 3; ++k) {
570  for (LO j = 0; j < 5; ++j) {
571  for (LO i = 0; i < 3; ++i) {
572  entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
573  values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i + 2];
574  }
575  }
576  }
577  }
578  for (LO l = 0; l < blkSize; ++l) {
579  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
580  }
581  for (int dim = 0; dim < numDimensions; ++dim) {
582  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
583  }
584 
585  // Edge 1
586  coordRowIdx = ((edgeIdx + 1) * lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
587  rowIdx = coordRowIdx * blkSize;
588  coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
589  columnOffset = coordColumnOffset * blkSize;
590  entryOffset = (edgeLineOffset + edgeIdx * faceLineOffset + edgeStencilLength + (lCoarseNodesPerDim[0] - 2) * faceStencilLength) * blkSize;
591  for (LO l = 0; l < blkSize; ++l) {
592  for (LO k = 0; k < 3; ++k) {
593  for (LO j = 0; j < 5; ++j) {
594  for (LO i = 0; i < 3; ++i) {
595  entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
596  values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i];
597  }
598  }
599  }
600  }
601  for (LO l = 0; l < blkSize; ++l) {
602  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
603  }
604  for (int dim = 0; dim < numDimensions; ++dim) {
605  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
606  }
607 
608  // Edge 2
609  coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (edgeIdx + 1) * lCoarseNodesPerDim[0]);
610  rowIdx = coordRowIdx * blkSize;
611  coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (edgeIdx + 1) * 3 * lFineNodesPerDim[0]);
612  columnOffset = coordColumnOffset * blkSize;
613  entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + edgeLineOffset + edgeIdx * faceLineOffset) * blkSize;
614  for (LO l = 0; l < blkSize; ++l) {
615  for (LO k = 0; k < 3; ++k) {
616  for (LO j = 0; j < 5; ++j) {
617  for (LO i = 0; i < 3; ++i) {
618  entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
619  values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i + 2];
620  }
621  }
622  }
623  }
624  for (LO l = 0; l < blkSize; ++l) {
625  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
626  }
627  for (int dim = 0; dim < numDimensions; ++dim) {
628  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
629  }
630 
631  // Edge 3
632  coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (edgeIdx + 1) * lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
633  rowIdx = coordRowIdx * blkSize;
634  coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (edgeIdx + 1) * 3 * lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
635  columnOffset = coordColumnOffset * blkSize;
636  entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + edgeLineOffset + edgeIdx * faceLineOffset + edgeStencilLength + (lCoarseNodesPerDim[0] - 2) * faceStencilLength) * blkSize;
637  for (LO l = 0; l < blkSize; ++l) {
638  for (LO k = 0; k < 3; ++k) {
639  for (LO j = 0; j < 5; ++j) {
640  for (LO i = 0; i < 3; ++i) {
641  entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
642  values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
643  }
644  }
645  }
646  }
647  for (LO l = 0; l < blkSize; ++l) {
648  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
649  }
650  for (int dim = 0; dim < numDimensions; ++dim) {
651  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
652  }
653  }
654  }
655 
656  // Edges along 2 direction
657  if (lCoarseNodesPerDim[2] - 2 > 0) {
658  LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
659  for (LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[2] - 2; ++edgeIdx) {
660  // Edge 0
661  coordRowIdx = (edgeIdx + 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
662  rowIdx = coordRowIdx * blkSize;
663  coordColumnOffset = (edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0];
664  columnOffset = coordColumnOffset * blkSize;
665  entryOffset = (facePlaneOffset + edgeIdx * interiorPlaneOffset) * blkSize;
666  for (LO l = 0; l < blkSize; ++l) {
667  for (LO k = 0; k < 5; ++k) {
668  for (LO j = 0; j < 3; ++j) {
669  for (LO i = 0; i < 3; ++i) {
670  entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i) * blkSize + l;
671  values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i + 2];
672  }
673  }
674  }
675  }
676  for (LO l = 0; l < blkSize; ++l) {
677  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
678  }
679  for (int dim = 0; dim < numDimensions; ++dim) {
680  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
681  }
682 
683  // Edge 1
684  coordRowIdx = ((edgeIdx + 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
685  rowIdx = coordRowIdx * blkSize;
686  coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
687  columnOffset = coordColumnOffset * blkSize;
688  entryOffset = (facePlaneOffset + faceLineOffset - edgeStencilLength + edgeIdx * interiorPlaneOffset) * blkSize;
689  for (LO l = 0; l < blkSize; ++l) {
690  for (LO k = 0; k < 5; ++k) {
691  for (LO j = 0; j < 3; ++j) {
692  for (LO i = 0; i < 3; ++i) {
693  entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
694  values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i];
695  }
696  }
697  }
698  }
699  for (LO l = 0; l < blkSize; ++l) {
700  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
701  }
702  for (int dim = 0; dim < numDimensions; ++dim) {
703  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
704  }
705 
706  // Edge 2
707  coordRowIdx = ((edgeIdx + 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0]);
708  rowIdx = coordRowIdx * blkSize;
709  coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0]);
710  columnOffset = coordColumnOffset * blkSize;
711  entryOffset = (facePlaneOffset + edgeIdx * interiorPlaneOffset + faceLineOffset + (lCoarseNodesPerDim[1] - 2) * interiorLineOffset) * blkSize;
712  for (LO l = 0; l < blkSize; ++l) {
713  for (LO k = 0; k < 5; ++k) {
714  for (LO j = 0; j < 3; ++j) {
715  for (LO i = 0; i < 3; ++i) {
716  entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
717  values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i + 2];
718  }
719  }
720  }
721  }
722  for (LO l = 0; l < blkSize; ++l) {
723  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
724  }
725  for (int dim = 0; dim < numDimensions; ++dim) {
726  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
727  }
728 
729  // Edge 3
730  coordRowIdx = ((edgeIdx + 2) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] - 1);
731  rowIdx = coordRowIdx * blkSize;
732  coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0] + lFineNodesPerDim[1] * lFineNodesPerDim[0] - 1);
733  columnOffset = coordColumnOffset * blkSize;
734  entryOffset = (facePlaneOffset + (edgeIdx + 1) * interiorPlaneOffset - edgeStencilLength) * blkSize;
735  for (LO l = 0; l < blkSize; ++l) {
736  for (LO k = 0; k < 5; ++k) {
737  for (LO j = 0; j < 3; ++j) {
738  for (LO i = 0; i < 3; ++i) {
739  entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
740  values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
741  }
742  }
743  }
744  }
745  for (LO l = 0; l < blkSize; ++l) {
746  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
747  }
748  for (int dim = 0; dim < numDimensions; ++dim) {
749  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
750  }
751  }
752  }
753 
754  // TODO: KOKKOS parallel_for used from here. Not sure if it should be used for edges.
755  Kokkos::deep_copy(row_map, row_map_h);
756  Kokkos::deep_copy(entries, entries_h);
757  Kokkos::deep_copy(values, values_h);
758 
759  // Create views on device for nodes per dim
760  LOTupleView lFineNodesPerDim_d("lFineNodesPerDim");
761  LOTupleView lCoarseNodesPerDim_d("lCoarseNodesPerDim");
762 
763  typename Kokkos::View<LO[3], device_type>::HostMirror lCoarseNodesPerDim_h = Kokkos::create_mirror_view(lCoarseNodesPerDim_d);
764  typename Kokkos::View<LO[3], device_type>::HostMirror lFineNodesPerDim_h = Kokkos::create_mirror_view(lFineNodesPerDim_d);
765 
766  for (int dim = 0; dim < numDimensions; ++dim) {
767  lCoarseNodesPerDim_h(dim) = lCoarseNodesPerDim[dim];
768  lFineNodesPerDim_h(dim) = lFineNodesPerDim[dim];
769  }
770 
771  Kokkos::deep_copy(lCoarseNodesPerDim_d, lCoarseNodesPerDim_h);
772  Kokkos::deep_copy(lFineNodesPerDim_d, lFineNodesPerDim_h);
773 
774  // Faces in 0-1 plane
775  if ((lCoarseNodesPerDim[0] - 2 > 0) && (lCoarseNodesPerDim[1] - 2 > 0)) {
776  Kokkos::parallel_for(
777  "Faces in 0-1 plane region R",
778  Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[1] - 2) * (lCoarseNodesPerDim[0] - 2)),
779  KOKKOS_LAMBDA(const LO faceIdx) {
780  LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
781  LO gridIdx[3] = {0, 0, 0};
782  impl_scalar_type coeffs_d[5] = {1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0};
783  // Last step in the loop
784  // update the grid indices
785  // for next grid point
786  for (LO i = 0; i < faceIdx; i++) {
787  ++gridIdx[0];
788  if (gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
789  gridIdx[0] = 0;
790  ++gridIdx[1];
791  }
792  }
793 
794  // Face 0
795  coordRowIdx = ((gridIdx[1] + 1) * lCoarseNodesPerDim_d(0) + gridIdx[0] + 1);
796  rowIdx = coordRowIdx * blkSize;
797  coordColumnOffset = 3 * ((gridIdx[1] + 1) * lFineNodesPerDim_d(0) + gridIdx[0] + 1);
798  columnOffset = coordColumnOffset * blkSize;
799  entryOffset = (edgeLineOffset + edgeStencilLength + gridIdx[1] * faceLineOffset + gridIdx[0] * faceStencilLength) * blkSize;
800  for (LO l = 0; l < blkSize; ++l) {
801  for (LO k = 0; k < 3; ++k) {
802  for (LO j = 0; j < 5; ++j) {
803  for (LO i = 0; i < 5; ++i) {
804  entries(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = columnOffset + (k * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
805  values(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = coeffs_d[k + 2] * coeffs_d[j] * coeffs_d[i];
806  }
807  }
808  }
809  }
810  for (LO l = 0; l < blkSize; ++l) {
811  row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
812  }
813  for (int dim = 0; dim < numDimensions; ++dim) {
814  coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
815  }
816 
817  // Face 1
818  coordRowIdx += (lCoarseNodesPerDim_d(2) - 1) * lCoarseNodesPerDim_d(1) * lCoarseNodesPerDim_d(0);
819  rowIdx = coordRowIdx * blkSize;
820  coordColumnOffset += (lFineNodesPerDim_d(2) - 1) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0);
821  columnOffset = coordColumnOffset * blkSize;
822  entryOffset += (facePlaneOffset + (lCoarseNodesPerDim_d(2) - 2) * interiorPlaneOffset) * blkSize;
823  for (LO l = 0; l < blkSize; ++l) {
824  for (LO k = 0; k < 3; ++k) {
825  for (LO j = 0; j < 5; ++j) {
826  for (LO i = 0; i < 5; ++i) {
827  entries(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
828  values(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i];
829  }
830  }
831  }
832  }
833  for (LO l = 0; l < blkSize; ++l) {
834  row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
835  }
836  for (int dim = 0; dim < numDimensions; ++dim) {
837  coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
838  }
839  }); // parallel_for faces in 0-1 plane
840  }
841 
842  // Faces in 0-2 plane
843  if ((lCoarseNodesPerDim[0] - 2 > 0) && (lCoarseNodesPerDim[2] - 2 > 0)) {
844  Kokkos::parallel_for(
845  "Faces in 0-2 plane region R",
846  Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[2] - 2) * (lCoarseNodesPerDim[0] - 2)),
847  KOKKOS_LAMBDA(const LO faceIdx) {
848  LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
849  LO gridIdx[3] = {0, 0, 0};
850  impl_scalar_type coeffs_d[5] = {1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0};
851  // Last step in the loop
852  // update the grid indices
853  // for next grid point
854  for (LO i = 0; i < faceIdx; i++) {
855  ++gridIdx[0];
856  if (gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
857  gridIdx[0] = 0;
858  ++gridIdx[2];
859  }
860  }
861 
862  // Face 0
863  coordRowIdx = ((gridIdx[2] + 1) * lCoarseNodesPerDim_d(1) * lCoarseNodesPerDim_d(0) + (gridIdx[0] + 1));
864  rowIdx = coordRowIdx * blkSize;
865  coordColumnOffset = ((gridIdx[2] + 1) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + gridIdx[0] + 1) * 3;
866  columnOffset = coordColumnOffset * blkSize;
867  entryOffset = (facePlaneOffset + gridIdx[2] * interiorPlaneOffset + edgeStencilLength + gridIdx[0] * faceStencilLength) * blkSize;
868  for (LO l = 0; l < blkSize; ++l) {
869  for (LO k = 0; k < 5; ++k) {
870  for (LO j = 0; j < 3; ++j) {
871  for (LO i = 0; i < 5; ++i) {
872  entries(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + j * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
873  values(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j + 2] * coeffs_d[i];
874  }
875  }
876  }
877  }
878  for (LO l = 0; l < blkSize; ++l) {
879  row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
880  }
881  for (int dim = 0; dim < numDimensions; ++dim) {
882  coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
883  }
884 
885  // Face 1
886  coordRowIdx += (lCoarseNodesPerDim_d(1) - 1) * lCoarseNodesPerDim_d(0);
887  rowIdx = coordRowIdx * blkSize;
888  coordColumnOffset += (lFineNodesPerDim_d(1) - 1) * lFineNodesPerDim_d(0);
889  columnOffset = coordColumnOffset * blkSize;
890  entryOffset += (faceLineOffset + (lCoarseNodesPerDim_d(1) - 2) * interiorLineOffset) * blkSize;
891  for (LO l = 0; l < blkSize; ++l) {
892  for (LO k = 0; k < 5; ++k) {
893  for (LO j = 0; j < 3; ++j) {
894  for (LO i = 0; i < 5; ++i) {
895  entries(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
896  values(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i];
897  }
898  }
899  }
900  }
901  for (LO l = 0; l < blkSize; ++l) {
902  row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
903  }
904  for (int dim = 0; dim < numDimensions; ++dim) {
905  coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
906  }
907  }); // parallel_for faces in 0-2 plane
908  }
909 
910  // Faces in 1-2 plane
911  if ((lCoarseNodesPerDim[1] - 2 > 0) && (lCoarseNodesPerDim[2] - 2 > 0)) {
912  Kokkos::parallel_for(
913  "Faces in 1-2 plane region R",
914  Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[2] - 2) * (lCoarseNodesPerDim[1] - 2)),
915  KOKKOS_LAMBDA(const LO faceIdx) {
916  LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
917  LO gridIdx[3] = {0, 0, 0};
918  impl_scalar_type coeffs_d[5] = {1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0};
919  // Last step in the loop
920  // update the grid indices
921  // for next grid point
922  for (LO i = 0; i < faceIdx; i++) {
923  ++gridIdx[1];
924  if (gridIdx[1] == lCoarseNodesPerDim_d(1) - 2) {
925  gridIdx[1] = 0;
926  ++gridIdx[2];
927  }
928  }
929 
930  // Face 0
931  coordRowIdx = ((gridIdx[2] + 1) * lCoarseNodesPerDim_d(1) * lCoarseNodesPerDim_d(0) + (gridIdx[1] + 1) * lCoarseNodesPerDim_d(0));
932  rowIdx = coordRowIdx * blkSize;
933  coordColumnOffset = ((gridIdx[2] + 1) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (gridIdx[1] + 1) * lFineNodesPerDim_d(0)) * 3;
934  columnOffset = coordColumnOffset * blkSize;
935  entryOffset = (facePlaneOffset + gridIdx[2] * interiorPlaneOffset + faceLineOffset + gridIdx[1] * interiorLineOffset) * blkSize;
936  for (LO l = 0; l < blkSize; ++l) {
937  for (LO k = 0; k < 5; ++k) {
938  for (LO j = 0; j < 5; ++j) {
939  for (LO i = 0; i < 3; ++i) {
940  entries(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i) * blkSize + l;
941  values(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i + 2];
942  }
943  }
944  }
945  }
946  for (LO l = 0; l < blkSize; ++l) {
947  row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
948  }
949  for (int dim = 0; dim < numDimensions; ++dim) {
950  coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
951  }
952 
953  // Face 1
954  coordRowIdx += (lCoarseNodesPerDim_d(0) - 1);
955  rowIdx = coordRowIdx * blkSize;
956  coordColumnOffset += (lFineNodesPerDim_d(0) - 1);
957  columnOffset = coordColumnOffset * blkSize;
958  entryOffset += (faceStencilLength + (lCoarseNodesPerDim_d(0) - 2) * interiorStencilLength) * blkSize;
959  for (LO l = 0; l < blkSize; ++l) {
960  for (LO k = 0; k < 5; ++k) {
961  for (LO j = 0; j < 5; ++j) {
962  for (LO i = 0; i < 3; ++i) {
963  entries(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
964  values(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i];
965  }
966  }
967  }
968  }
969  for (LO l = 0; l < blkSize; ++l) {
970  row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
971  }
972  for (int dim = 0; dim < numDimensions; ++dim) {
973  coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
974  }
975  }); // parallel_for faces in 1-2 plane
976  }
977 
978  if (numInteriors > 0) {
979  // Allocate and compute arrays
980  // containing column offsets
981  // and values associated with
982  // interior points
983  LO countRowEntries = 0;
984  Kokkos::View<LO[125]> coordColumnOffsets_d("coordColOffset");
985  auto coordColumnOffsets_h = Kokkos::create_mirror_view(coordColumnOffsets_d);
986 
987  for (LO k = -2; k < 3; ++k) {
988  for (LO j = -2; j < 3; ++j) {
989  for (LO i = -2; i < 3; ++i) {
990  coordColumnOffsets_h(countRowEntries) = k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i;
991  ++countRowEntries;
992  }
993  }
994  }
995  Kokkos::deep_copy(coordColumnOffsets_d, coordColumnOffsets_h);
996 
997  LO countValues = 0;
998  Kokkos::View<impl_scalar_type*> interiorValues_d("interiorValues", 125);
999  auto interiorValues_h = Kokkos::create_mirror_view(interiorValues_d);
1000  for (LO k = 0; k < 5; ++k) {
1001  for (LO j = 0; j < 5; ++j) {
1002  for (LO i = 0; i < 5; ++i) {
1003  interiorValues_h(countValues) = coeffs[k] * coeffs[j] * coeffs[i];
1004  ++countValues;
1005  }
1006  }
1007  }
1008  Kokkos::deep_copy(interiorValues_d, interiorValues_h);
1009 
1010  Kokkos::parallel_for(
1011  "interior idx region R", Kokkos::RangePolicy<typename device_type::execution_space>(0, numInteriors),
1012  KOKKOS_LAMBDA(const LO interiorIdx) {
1013  LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
1014  LO gridIdx[3];
1015  gridIdx[0] = 0;
1016  gridIdx[1] = 0;
1017  gridIdx[2] = 0;
1018  // First step in the loop
1019  // update the grid indices
1020  // for the grid point
1021  for (LO i = 0; i < interiorIdx; i++) {
1022  ++gridIdx[0];
1023  if (gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
1024  gridIdx[0] = 0;
1025  ++gridIdx[1];
1026  if (gridIdx[1] == lCoarseNodesPerDim_d(1) - 2) {
1027  gridIdx[1] = 0;
1028  ++gridIdx[2];
1029  }
1030  }
1031  }
1032 
1033  coordRowIdx = ((gridIdx[2] + 1) * lCoarseNodesPerDim_d(0) * lCoarseNodesPerDim_d(1) + (gridIdx[1] + 1) * lCoarseNodesPerDim_d(0) + gridIdx[0] + 1);
1034  rowIdx = coordRowIdx * blkSize;
1035  coordColumnOffset = ((gridIdx[2] + 1) * 3 * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (gridIdx[1] + 1) * 3 * lFineNodesPerDim_d(0) + (gridIdx[0] + 1) * 3);
1036  columnOffset = coordColumnOffset * blkSize;
1037  entryOffset = (facePlaneOffset + faceLineOffset + faceStencilLength + gridIdx[2] * interiorPlaneOffset + gridIdx[1] * interiorLineOffset + gridIdx[0] * interiorStencilLength) * blkSize;
1038  for (LO l = 0; l < blkSize; ++l) {
1039  row_map(rowIdx + 1 + l) = entryOffset + interiorStencilLength * (l + 1);
1040  }
1041  // Fill the column indices
1042  // and values in the approproate
1043  // views.
1044  for (LO l = 0; l < blkSize; ++l) {
1045  for (LO entryIdx = 0; entryIdx < interiorStencilLength; ++entryIdx) {
1046  entries(entryOffset + entryIdx + interiorStencilLength * l) = columnOffset + coordColumnOffsets_d(entryIdx) * blkSize + l;
1047  values(entryOffset + entryIdx + interiorStencilLength * l) = interiorValues_d(entryIdx);
1048  }
1049  }
1050  for (int dim = 0; dim < numDimensions; ++dim) {
1051  coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
1052  }
1053  }); // Kokkos::parallel_for interior idx
1054  //
1055  }
1056 
1057  local_graph_type localGraph(entries, row_map);
1058  local_matrix_type localR("R", numCols, values, localGraph);
1059 
1060  R = MatrixFactory::Build(localR, // the local data
1061  rowMap, // rowMap
1062  A->getRowMap(), // colMap
1063  A->getRowMap(), // domainMap == colMap
1064  rowMap, // rangeMap == rowMap
1065  Teuchos::null); // params for optimized construction
1066 
1067 } // Build3D()
1068 
1069 } // namespace MueLu
1070 
1071 #define MUELU_REGIONRFACTORY_KOKKOS_SHORT
1072 #endif // MUELU_REGIONRFACTORY_DEF_HPP
typename Kokkos::View< LO[3], device_type > LOTupleView
MueLu::DefaultLocalOrdinal LocalOrdinal
static Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Build(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node >> &map, size_t NumVectors, bool zeroOut=true)
basic_FancyOStream & setShowProcRank(const bool showProcRank)
T & get(const std::string &name, T def_value)
LocalOrdinal LO
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
MueLu::DefaultNode Node
static const NoFactory * get()
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
void Build3D(const int numDimensions, Array< LO > &lFineNodesPerDim, const RCP< Matrix > &A, const RCP< realvaluedmultivector_type > &fineCoordinates, RCP< Matrix > &R, RCP< realvaluedmultivector_type > &coarseCoordinates, Array< LO > &lCoarseNodesPerDim) const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:63
bool isSublist(const std::string &name) const
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
basic_FancyOStream & setShowAllFrontMatter(const bool showAllFrontMatter)
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
RCP< const ParameterList > GetValidParameterList() const
Input.
ParameterList & sublist(const std::string &name, bool mustAlreadyExist=false, const std::string &docString="")
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:51
std::string toString(const T &t)