MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_RegionRFactory_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_REGIONRFACTORY_KOKKOS_DEF_HPP
11 #define MUELU_REGIONRFACTORY_KOKKOS_DEF_HPP
12 
13 #include "Kokkos_UnorderedMap.hpp"
14 
16 
17 #include <Xpetra_Matrix.hpp>
19 
20 #include "MueLu_Types.hpp"
21 
22 namespace MueLu {
23 
24 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
27  RCP<ParameterList> validParamList = rcp(new ParameterList());
28 
29  validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null,
30  "Generating factory of the matrix A");
31  validParamList->set<RCP<const FactoryBase> >("numDimensions", Teuchos::null,
32  "Number of spatial dimensions in the problem.");
33  validParamList->set<RCP<const FactoryBase> >("lNodesPerDim", Teuchos::null,
34  "Number of local nodes per spatial dimension on the fine grid.");
35  validParamList->set<RCP<const FactoryBase> >("Nullspace", Teuchos::null,
36  "Fine level nullspace used to construct the coarse level nullspace.");
37  validParamList->set<RCP<const FactoryBase> >("Coordinates", Teuchos::null,
38  "Fine level coordinates used to construct piece-wise linear prolongator and coarse level coordinates.");
39  validParamList->set<bool>("keep coarse coords", false, "Flag to keep coordinates for special coarse grid solve");
40 
41  return validParamList;
42 } // GetValidParameterList()
43 
44 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
46  DeclareInput(Level& fineLevel, Level& /* coarseLevel */) const {
47  Input(fineLevel, "A");
48  Input(fineLevel, "numDimensions");
49  Input(fineLevel, "lNodesPerDim");
50  Input(fineLevel, "Nullspace");
51  Input(fineLevel, "Coordinates");
52 
53 } // DeclareInput()
54 
55 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
57  Build(Level& fineLevel, Level& coarseLevel) const {
58  // Set debug outputs based on environment variable
60  if (const char* dbg = std::getenv("MUELU_REGIONRFACTORY_DEBUG")) {
61  out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
62  out->setShowAllFrontMatter(false).setShowProcRank(true);
63  } else {
64  out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
65  }
66 
67  *out << "Starting RegionRFactory_kokkos::Build." << std::endl;
68 
69  // First get the inputs from the fineLevel
70  const int numDimensions = Get<int>(fineLevel, "numDimensions");
71  Array<LO> lFineNodesPerDim(3, Teuchos::OrdinalTraits<LO>::one());
72  {
73  Array<LO> lNodesPerDim = Get<Array<LO> >(fineLevel, "lNodesPerDim");
74  for (int dim = 0; dim < numDimensions; ++dim) {
75  lFineNodesPerDim[dim] = lNodesPerDim[dim];
76  }
77  }
78  *out << "numDimensions " << numDimensions << " and lFineNodesPerDim: " << lFineNodesPerDim
79  << std::endl;
80 
81  // Let us check that the inputs verify our assumptions
82  if (numDimensions < 1 || numDimensions > 3) {
83  throw std::runtime_error("numDimensions must be 1, 2 or 3!");
84  }
85  for (int dim = 0; dim < numDimensions; ++dim) {
86  if (lFineNodesPerDim[dim] % 3 != 1) {
87  throw std::runtime_error("The number of fine node in each direction need to be 3n+1");
88  }
89  }
90  Array<LO> lCoarseNodesPerDim(3, Teuchos::OrdinalTraits<LO>::one());
91 
92  const RCP<Matrix> A = Get<RCP<Matrix> >(fineLevel, "A");
93 
94  RCP<realvaluedmultivector_type> fineCoordinates, coarseCoordinates;
95  fineCoordinates = Get<RCP<realvaluedmultivector_type> >(fineLevel, "Coordinates");
96  if (static_cast<int>(fineCoordinates->getNumVectors()) != numDimensions) {
97  throw std::runtime_error("The number of vectors in the coordinates is not equal to numDimensions!");
98  }
99 
100  // Let us create R and pass it down to the
101  // appropriate specialization and see what we
102  // get back!
103  RCP<Matrix> R;
104 
105  if (numDimensions == 1) {
106  throw std::runtime_error("RegionRFactory_kokkos no implemented for 1D case yet.");
107  } else if (numDimensions == 2) {
108  throw std::runtime_error("RegionRFactory_kokkos no implemented for 2D case yet.");
109  } else if (numDimensions == 3) {
110  Build3D(numDimensions, lFineNodesPerDim, A, fineCoordinates,
111  R, coarseCoordinates, lCoarseNodesPerDim);
112  }
113 
114  const Teuchos::ParameterList& pL = GetParameterList();
115 
116  // Reuse pattern if available (multiple solve)
117  RCP<ParameterList> Tparams;
118  if (pL.isSublist("matrixmatrix: kernel params"))
119  Tparams = rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
120  else
121  Tparams = rcp(new ParameterList);
122 
123  // R->describe(*out, Teuchos::VERB_EXTREME);
124  *out << "Compute P=R^t" << std::endl;
125  // By default, we don't need global constants for transpose
126  Tparams->set("compute global constants: temporaries", Tparams->get("compute global constants: temporaries", false));
127  Tparams->set("compute global constants", Tparams->get("compute global constants", false));
128  std::string label = "MueLu::RegionR-transR" + Teuchos::toString(coarseLevel.GetLevelID());
129  RCP<Matrix> P = Utilities::Transpose(*R, true, label, Tparams);
130 
131  *out << "Compute coarse nullspace" << std::endl;
132  RCP<MultiVector> fineNullspace = Get<RCP<MultiVector> >(fineLevel, "Nullspace");
133  RCP<MultiVector> coarseNullspace = MultiVectorFactory::Build(R->getRowMap(),
134  fineNullspace->getNumVectors());
135  R->apply(*fineNullspace, *coarseNullspace, Teuchos::NO_TRANS, Teuchos::ScalarTraits<SC>::one(),
137 
138  *out << "Set data on coarse level" << std::endl;
139  Set(coarseLevel, "numDimensions", numDimensions);
140  Set(coarseLevel, "lNodesPerDim", lCoarseNodesPerDim);
141  Set(coarseLevel, "Nullspace", coarseNullspace);
142  Set(coarseLevel, "Coordinates", coarseCoordinates);
143  if (pL.get<bool>("keep coarse coords")) {
144  coarseLevel.Set<RCP<realvaluedmultivector_type> >("Coordinates2", coarseCoordinates, NoFactory::get());
145  }
146 
147  R->SetFixedBlockSize(A->GetFixedBlockSize());
148  P->SetFixedBlockSize(A->GetFixedBlockSize());
149 
150  Set(coarseLevel, "R", R);
151  Set(coarseLevel, "P", P);
152 
153 } // Build()
154 
155 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
157  Build3D(const int numDimensions,
158  Teuchos::Array<LocalOrdinal>& lFineNodesPerDim,
159  const RCP<Matrix>& A,
161  RCP<Matrix>& R,
163  Teuchos::Array<LocalOrdinal>& lCoarseNodesPerDim) const {
164  using local_matrix_type = typename CrsMatrix::local_matrix_type;
165  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
166  using row_map_type = typename local_matrix_type::row_map_type::non_const_type;
167  using entries_type = typename local_matrix_type::index_type::non_const_type;
168  using values_type = typename local_matrix_type::values_type::non_const_type;
169 #if KOKKOS_VERSION >= 40799
170  using impl_scalar_type = typename KokkosKernels::ArithTraits<Scalar>::val_type;
171 #else
172  using impl_scalar_type = typename Kokkos::ArithTraits<Scalar>::val_type;
173 #endif
174 
175  // Set debug outputs based on environment variable
177  if (const char* dbg = std::getenv("MUELU_REGIONRFACTORY_DEBUG")) {
178  out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
179  out->setShowAllFrontMatter(false).setShowProcRank(true);
180  } else {
181  out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
182  }
183 
184  // Now compute number of coarse grid points
185  for (int dim = 0; dim < numDimensions; ++dim) {
186  lCoarseNodesPerDim[dim] = lFineNodesPerDim[dim] / 3 + 1;
187  }
188  *out << "lCoarseNodesPerDim " << lCoarseNodesPerDim << std::endl;
189 
190  // Grab the block size here and multiply all existing offsets by it
191  const LO blkSize = A->GetFixedBlockSize();
192  *out << "blkSize " << blkSize << std::endl;
193 
194  // Based on lCoarseNodesPerDim and lFineNodesPerDim
195  // we can compute numRows, numCols and NNZ for R
196  const LO numRows = blkSize * lCoarseNodesPerDim[0] * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[2];
197  const LO numCols = blkSize * lFineNodesPerDim[0] * lFineNodesPerDim[1] * lFineNodesPerDim[2];
198 
199  // Create the coarse coordinates multivector
200  // so we can fill it on the fly while computing
201  // the restriction operator
202  RCP<Map> rowMap = MapFactory::Build(A->getRowMap()->lib(),
204  numRows,
205  A->getRowMap()->getIndexBase(),
206  A->getRowMap()->getComm());
207 
208  RCP<Map> coordRowMap = MapFactory::Build(A->getRowMap()->lib(),
210  lCoarseNodesPerDim[0] * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[2],
211  A->getRowMap()->getIndexBase(),
212  A->getRowMap()->getComm());
213 
214  coarseCoordinates = Xpetra::MultiVectorFactory<real_type, LO, GO, NO>::Build(coordRowMap,
215  numDimensions);
216 
217  // Get device views of coordinates
218  auto fineCoordsView = fineCoordinates->getLocalViewDevice(Xpetra::Access::ReadOnly);
219  auto coarseCoordsView = coarseCoordinates->getLocalViewDevice(Xpetra::Access::OverwriteAll);
220 
221  Array<ArrayRCP<const real_type> > fineCoordData(numDimensions);
222  Array<ArrayRCP<real_type> > coarseCoordData(numDimensions);
223  for (int dim = 0; dim < numDimensions; ++dim) {
224  fineCoordData[dim] = fineCoordinates->getData(dim);
225  coarseCoordData[dim] = coarseCoordinates->getDataNonConst(dim);
226  }
227 
228  // Let us set some parameter that will be useful
229  // while constructing R
230 
231  // Length of interpolation stencils based on geometry
232  const LO cornerStencilLength = 27;
233  const LO edgeStencilLength = 45;
234  const LO faceStencilLength = 75;
235  const LO interiorStencilLength = 125;
236 
237  // Number of corner, edge, face and interior nodes
238  const LO numCorners = 8;
239  const LO numEdges = 4 * (lCoarseNodesPerDim[0] - 2) + 4 * (lCoarseNodesPerDim[1] - 2) + 4 * (lCoarseNodesPerDim[2] - 2);
240  const LO numFaces = 2 * (lCoarseNodesPerDim[0] - 2) * (lCoarseNodesPerDim[1] - 2) + 2 * (lCoarseNodesPerDim[0] - 2) * (lCoarseNodesPerDim[2] - 2) + 2 * (lCoarseNodesPerDim[1] - 2) * (lCoarseNodesPerDim[2] - 2);
241  const LO numInteriors = (lCoarseNodesPerDim[0] - 2) * (lCoarseNodesPerDim[1] - 2) * (lCoarseNodesPerDim[2] - 2);
242 
243  const LO nnz = (numCorners * cornerStencilLength + numEdges * edgeStencilLength + numFaces * faceStencilLength + numInteriors * interiorStencilLength) * blkSize;
244 
245  // Having the number of rows and columns we can genrate
246  // the appropriate maps for our operator.
247 
248  *out << "R statistics:" << std::endl
249  << " -numRows= " << numRows << std::endl
250  << " -numCols= " << numCols << std::endl
251  << " -nnz= " << nnz << std::endl;
252 
253  row_map_type row_map(Kokkos::ViewAllocateWithoutInitializing("row_map"), numRows + 1);
254  typename row_map_type::host_mirror_type row_map_h = Kokkos::create_mirror_view(row_map);
255 
256  entries_type entries(Kokkos::ViewAllocateWithoutInitializing("entries"), nnz);
257  typename entries_type::host_mirror_type entries_h = Kokkos::create_mirror_view(entries);
258 
259  values_type values(Kokkos::ViewAllocateWithoutInitializing("values"), nnz);
260  typename values_type::host_mirror_type values_h = Kokkos::create_mirror_view(values);
261 
262  // Compute the basic interpolation
263  // coefficients for 1D rate of 3
264  // coarsening.
265  Array<SC> coeffs({1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0});
266  row_map_h(0) = 0;
267 
268  // Define some offsets that
269  // will be needed often later on
270  const LO edgeLineOffset = 2 * cornerStencilLength + (lCoarseNodesPerDim[0] - 2) * edgeStencilLength;
271  const LO faceLineOffset = 2 * edgeStencilLength + (lCoarseNodesPerDim[0] - 2) * faceStencilLength;
272  const LO interiorLineOffset = 2 * faceStencilLength + (lCoarseNodesPerDim[0] - 2) * interiorStencilLength;
273 
274  const LO facePlaneOffset = 2 * edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset;
275  const LO interiorPlaneOffset = 2 * faceLineOffset + (lCoarseNodesPerDim[1] - 2) * interiorLineOffset;
276 
277  // Let us take care of the corners
278  // first since we always have
279  // corners to deal with!
280  {
281  // Corner 1
282  LO coordRowIdx = 0, rowIdx = 0, coordColumnOffset = 0, columnOffset = 0, entryOffset = 0;
283  for (LO l = 0; l < blkSize; ++l) {
284  for (LO k = 0; k < 3; ++k) {
285  for (LO j = 0; j < 3; ++j) {
286  for (LO i = 0; i < 3; ++i) {
287  entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i) * blkSize + l;
288  values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j + 2] * coeffs[i + 2];
289  }
290  }
291  }
292  }
293  for (LO l = 0; l < blkSize; ++l) {
294  row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
295  }
296  for (int dim = 0; dim < numDimensions; ++dim) {
297  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
298  }
299 
300  // Corner 5
301  coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
302  rowIdx = coordRowIdx * blkSize;
303  coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
304  columnOffset = coordColumnOffset * blkSize;
305  entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
306  for (LO l = 0; l < blkSize; ++l) {
307  for (LO k = 0; k < 3; ++k) {
308  for (LO j = 0; j < 3; ++j) {
309  for (LO i = 0; i < 3; ++i) {
310  entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i) * blkSize + l;
311  values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i + 2];
312  }
313  }
314  }
315  }
316  for (LO l = 0; l < blkSize; ++l) {
317  row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
318  }
319  for (int dim = 0; dim < numDimensions; ++dim) {
320  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
321  }
322 
323  // Corner 2
324  coordRowIdx = (lCoarseNodesPerDim[0] - 1);
325  rowIdx = coordRowIdx * blkSize;
326  coordColumnOffset = (lFineNodesPerDim[0] - 1);
327  columnOffset = coordColumnOffset * blkSize;
328  entryOffset = (cornerStencilLength + (lCoarseNodesPerDim[0] - 2) * edgeStencilLength) * blkSize;
329  for (LO l = 0; l < blkSize; ++l) {
330  for (LO k = 0; k < 3; ++k) {
331  for (LO j = 0; j < 3; ++j) {
332  for (LO i = 0; i < 3; ++i) {
333  entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + (i - 2)) * blkSize + l;
334  values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j + 2] * coeffs[i];
335  }
336  }
337  }
338  }
339  for (LO l = 0; l < blkSize; ++l) {
340  row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
341  }
342  for (int dim = 0; dim < numDimensions; ++dim) {
343  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
344  }
345 
346  // Corner 6
347  coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
348  rowIdx = coordRowIdx * blkSize;
349  coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
350  columnOffset = coordColumnOffset * blkSize;
351  entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
352  for (LO l = 0; l < blkSize; ++l) {
353  for (LO k = 0; k < 3; ++k) {
354  for (LO j = 0; j < 3; ++j) {
355  for (LO i = 0; i < 3; ++i) {
356  entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
357  values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i];
358  }
359  }
360  }
361  }
362  for (LO l = 0; l < blkSize; ++l) {
363  row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
364  }
365  for (int dim = 0; dim < numDimensions; ++dim) {
366  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
367  }
368 
369  // Corner 3
370  coordRowIdx = (lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0];
371  rowIdx = coordRowIdx * blkSize;
372  coordColumnOffset = (lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0];
373  columnOffset = coordColumnOffset * blkSize;
374  entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset) * blkSize;
375  for (LO l = 0; l < blkSize; ++l) {
376  for (LO k = 0; k < 3; ++k) {
377  for (LO j = 0; j < 3; ++j) {
378  for (LO i = 0; i < 3; ++i) {
379  entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
380  values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i + 2];
381  }
382  }
383  }
384  }
385  for (LO l = 0; l < blkSize; ++l) {
386  row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
387  }
388  for (int dim = 0; dim < numDimensions; ++dim) {
389  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
390  }
391 
392  // Corner 7
393  coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
394  rowIdx = coordRowIdx * blkSize;
395  coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
396  columnOffset = coordColumnOffset * blkSize;
397  entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
398  for (LO l = 0; l < blkSize; ++l) {
399  for (LO k = 0; k < 3; ++k) {
400  for (LO j = 0; j < 3; ++j) {
401  for (LO i = 0; i < 3; ++i) {
402  entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
403  values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i + 2];
404  }
405  }
406  }
407  }
408  for (LO l = 0; l < blkSize; ++l) {
409  row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
410  }
411  for (int dim = 0; dim < numDimensions; ++dim) {
412  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
413  }
414 
415  // Corner 4
416  coordRowIdx = (lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] - 1);
417  rowIdx = coordRowIdx * blkSize;
418  coordColumnOffset = (lFineNodesPerDim[1] * lFineNodesPerDim[0] - 1);
419  columnOffset = coordColumnOffset * blkSize;
420  entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset +
421  cornerStencilLength + (lCoarseNodesPerDim[0] - 2) * edgeStencilLength) *
422  blkSize;
423  for (LO l = 0; l < blkSize; ++l) {
424  for (LO k = 0; k < 3; ++k) {
425  for (LO j = 0; j < 3; ++j) {
426  for (LO i = 0; i < 3; ++i) {
427  entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + (i - 2)) * blkSize + l;
428  values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i];
429  }
430  }
431  }
432  }
433  for (LO l = 0; l < blkSize; ++l) {
434  row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
435  }
436  for (int dim = 0; dim < numDimensions; ++dim) {
437  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
438  }
439 
440  // Corner 8
441  coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
442  rowIdx = coordRowIdx * blkSize;
443  coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
444  columnOffset = coordColumnOffset * blkSize;
445  entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
446  for (LO l = 0; l < blkSize; ++l) {
447  for (LO k = 0; k < 3; ++k) {
448  for (LO j = 0; j < 3; ++j) {
449  for (LO i = 0; i < 3; ++i) {
450  entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + (i - 2)) * blkSize + l;
451  values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
452  }
453  }
454  }
455  }
456  for (LO l = 0; l < blkSize; ++l) {
457  row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
458  }
459  for (int dim = 0; dim < numDimensions; ++dim) {
460  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
461  }
462  } // Corners are done!
463 
464  // Edges along 0 direction
465  if (lCoarseNodesPerDim[0] - 2 > 0) {
466  LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
467  for (LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[0] - 2; ++edgeIdx) {
468  // Edge 0
469  coordRowIdx = (edgeIdx + 1);
470  rowIdx = coordRowIdx * blkSize;
471  coordColumnOffset = (edgeIdx + 1) * 3;
472  columnOffset = coordColumnOffset * blkSize;
473  entryOffset = (cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
474  for (LO l = 0; l < blkSize; ++l) {
475  for (LO k = 0; k < 3; ++k) {
476  for (LO j = 0; j < 3; ++j) {
477  for (LO i = 0; i < 5; ++i) {
478  entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
479  values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j + 2] * coeffs[i];
480  }
481  }
482  }
483  }
484  for (LO l = 0; l < blkSize; ++l) {
485  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
486  }
487  for (int dim = 0; dim < numDimensions; ++dim) {
488  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
489  }
490 
491  // Edge 1
492  coordRowIdx = ((lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0] + edgeIdx + 1);
493  rowIdx = coordRowIdx * blkSize;
494  coordColumnOffset = ((lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0] + (edgeIdx + 1) * 3);
495  columnOffset = coordColumnOffset * blkSize;
496  entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset + cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
497  for (LO l = 0; l < blkSize; ++l) {
498  for (LO k = 0; k < 3; ++k) {
499  for (LO j = 0; j < 3; ++j) {
500  for (LO i = 0; i < 5; ++i) {
501  entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
502  values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i];
503  }
504  }
505  }
506  }
507  for (LO l = 0; l < blkSize; ++l) {
508  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
509  }
510  for (int dim = 0; dim < numDimensions; ++dim) {
511  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
512  }
513 
514  // Edge 2
515  coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + edgeIdx + 1);
516  rowIdx = coordRowIdx * blkSize;
517  coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (edgeIdx + 1) * 3);
518  columnOffset = coordColumnOffset * blkSize;
519  entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
520  for (LO l = 0; l < blkSize; ++l) {
521  for (LO k = 0; k < 3; ++k) {
522  for (LO j = 0; j < 3; ++j) {
523  for (LO i = 0; i < 5; ++i) {
524  entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
525  values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i];
526  }
527  }
528  }
529  }
530  for (LO l = 0; l < blkSize; ++l) {
531  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
532  }
533  for (int dim = 0; dim < numDimensions; ++dim) {
534  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
535  }
536 
537  // Edge 3
538  coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0] + edgeIdx + 1);
539  rowIdx = coordRowIdx * blkSize;
540  coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0] + (edgeIdx + 1) * 3);
541  columnOffset = coordColumnOffset * blkSize;
542  entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset + cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
543  for (LO l = 0; l < blkSize; ++l) {
544  for (LO k = 0; k < 3; ++k) {
545  for (LO j = 0; j < 3; ++j) {
546  for (LO i = 0; i < 5; ++i) {
547  entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
548  values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
549  }
550  }
551  }
552  }
553  for (LO l = 0; l < blkSize; ++l) {
554  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
555  }
556  for (int dim = 0; dim < numDimensions; ++dim) {
557  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
558  }
559  }
560  }
561 
562  // Edges along 1 direction
563  if (lCoarseNodesPerDim[1] - 2 > 0) {
564  LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
565  for (LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[1] - 2; ++edgeIdx) {
566  // Edge 0
567  coordRowIdx = (edgeIdx + 1) * lCoarseNodesPerDim[0];
568  rowIdx = coordRowIdx * blkSize;
569  coordColumnOffset = (edgeIdx + 1) * 3 * lFineNodesPerDim[0];
570  columnOffset = coordColumnOffset * blkSize;
571  entryOffset = (edgeLineOffset + edgeIdx * faceLineOffset) * blkSize;
572  for (LO l = 0; l < blkSize; ++l) {
573  for (LO k = 0; k < 3; ++k) {
574  for (LO j = 0; j < 5; ++j) {
575  for (LO i = 0; i < 3; ++i) {
576  entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
577  values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i + 2];
578  }
579  }
580  }
581  }
582  for (LO l = 0; l < blkSize; ++l) {
583  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
584  }
585  for (int dim = 0; dim < numDimensions; ++dim) {
586  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
587  }
588 
589  // Edge 1
590  coordRowIdx = ((edgeIdx + 1) * lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
591  rowIdx = coordRowIdx * blkSize;
592  coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
593  columnOffset = coordColumnOffset * blkSize;
594  entryOffset = (edgeLineOffset + edgeIdx * faceLineOffset + edgeStencilLength + (lCoarseNodesPerDim[0] - 2) * faceStencilLength) * blkSize;
595  for (LO l = 0; l < blkSize; ++l) {
596  for (LO k = 0; k < 3; ++k) {
597  for (LO j = 0; j < 5; ++j) {
598  for (LO i = 0; i < 3; ++i) {
599  entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
600  values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i];
601  }
602  }
603  }
604  }
605  for (LO l = 0; l < blkSize; ++l) {
606  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
607  }
608  for (int dim = 0; dim < numDimensions; ++dim) {
609  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
610  }
611 
612  // Edge 2
613  coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (edgeIdx + 1) * lCoarseNodesPerDim[0]);
614  rowIdx = coordRowIdx * blkSize;
615  coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (edgeIdx + 1) * 3 * lFineNodesPerDim[0]);
616  columnOffset = coordColumnOffset * blkSize;
617  entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + edgeLineOffset + edgeIdx * faceLineOffset) * blkSize;
618  for (LO l = 0; l < blkSize; ++l) {
619  for (LO k = 0; k < 3; ++k) {
620  for (LO j = 0; j < 5; ++j) {
621  for (LO i = 0; i < 3; ++i) {
622  entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
623  values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i + 2];
624  }
625  }
626  }
627  }
628  for (LO l = 0; l < blkSize; ++l) {
629  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
630  }
631  for (int dim = 0; dim < numDimensions; ++dim) {
632  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
633  }
634 
635  // Edge 3
636  coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (edgeIdx + 1) * lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
637  rowIdx = coordRowIdx * blkSize;
638  coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (edgeIdx + 1) * 3 * lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
639  columnOffset = coordColumnOffset * blkSize;
640  entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + edgeLineOffset + edgeIdx * faceLineOffset + edgeStencilLength + (lCoarseNodesPerDim[0] - 2) * faceStencilLength) * blkSize;
641  for (LO l = 0; l < blkSize; ++l) {
642  for (LO k = 0; k < 3; ++k) {
643  for (LO j = 0; j < 5; ++j) {
644  for (LO i = 0; i < 3; ++i) {
645  entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
646  values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
647  }
648  }
649  }
650  }
651  for (LO l = 0; l < blkSize; ++l) {
652  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
653  }
654  for (int dim = 0; dim < numDimensions; ++dim) {
655  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
656  }
657  }
658  }
659 
660  // Edges along 2 direction
661  if (lCoarseNodesPerDim[2] - 2 > 0) {
662  LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
663  for (LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[2] - 2; ++edgeIdx) {
664  // Edge 0
665  coordRowIdx = (edgeIdx + 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
666  rowIdx = coordRowIdx * blkSize;
667  coordColumnOffset = (edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0];
668  columnOffset = coordColumnOffset * blkSize;
669  entryOffset = (facePlaneOffset + edgeIdx * interiorPlaneOffset) * blkSize;
670  for (LO l = 0; l < blkSize; ++l) {
671  for (LO k = 0; k < 5; ++k) {
672  for (LO j = 0; j < 3; ++j) {
673  for (LO i = 0; i < 3; ++i) {
674  entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i) * blkSize + l;
675  values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i + 2];
676  }
677  }
678  }
679  }
680  for (LO l = 0; l < blkSize; ++l) {
681  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
682  }
683  for (int dim = 0; dim < numDimensions; ++dim) {
684  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
685  }
686 
687  // Edge 1
688  coordRowIdx = ((edgeIdx + 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
689  rowIdx = coordRowIdx * blkSize;
690  coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
691  columnOffset = coordColumnOffset * blkSize;
692  entryOffset = (facePlaneOffset + faceLineOffset - edgeStencilLength + edgeIdx * interiorPlaneOffset) * blkSize;
693  for (LO l = 0; l < blkSize; ++l) {
694  for (LO k = 0; k < 5; ++k) {
695  for (LO j = 0; j < 3; ++j) {
696  for (LO i = 0; i < 3; ++i) {
697  entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
698  values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i];
699  }
700  }
701  }
702  }
703  for (LO l = 0; l < blkSize; ++l) {
704  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
705  }
706  for (int dim = 0; dim < numDimensions; ++dim) {
707  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
708  }
709 
710  // Edge 2
711  coordRowIdx = ((edgeIdx + 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0]);
712  rowIdx = coordRowIdx * blkSize;
713  coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0]);
714  columnOffset = coordColumnOffset * blkSize;
715  entryOffset = (facePlaneOffset + edgeIdx * interiorPlaneOffset + faceLineOffset + (lCoarseNodesPerDim[1] - 2) * interiorLineOffset) * blkSize;
716  for (LO l = 0; l < blkSize; ++l) {
717  for (LO k = 0; k < 5; ++k) {
718  for (LO j = 0; j < 3; ++j) {
719  for (LO i = 0; i < 3; ++i) {
720  entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
721  values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i + 2];
722  }
723  }
724  }
725  }
726  for (LO l = 0; l < blkSize; ++l) {
727  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
728  }
729  for (int dim = 0; dim < numDimensions; ++dim) {
730  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
731  }
732 
733  // Edge 3
734  coordRowIdx = ((edgeIdx + 2) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] - 1);
735  rowIdx = coordRowIdx * blkSize;
736  coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0] + lFineNodesPerDim[1] * lFineNodesPerDim[0] - 1);
737  columnOffset = coordColumnOffset * blkSize;
738  entryOffset = (facePlaneOffset + (edgeIdx + 1) * interiorPlaneOffset - edgeStencilLength) * blkSize;
739  for (LO l = 0; l < blkSize; ++l) {
740  for (LO k = 0; k < 5; ++k) {
741  for (LO j = 0; j < 3; ++j) {
742  for (LO i = 0; i < 3; ++i) {
743  entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
744  values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
745  }
746  }
747  }
748  }
749  for (LO l = 0; l < blkSize; ++l) {
750  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
751  }
752  for (int dim = 0; dim < numDimensions; ++dim) {
753  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
754  }
755  }
756  }
757 
758  // TODO: KOKKOS parallel_for used from here. Not sure if it should be used for edges.
759  Kokkos::deep_copy(row_map, row_map_h);
760  Kokkos::deep_copy(entries, entries_h);
761  Kokkos::deep_copy(values, values_h);
762 
763  // Create views on device for nodes per dim
764  LOTupleView lFineNodesPerDim_d("lFineNodesPerDim");
765  LOTupleView lCoarseNodesPerDim_d("lCoarseNodesPerDim");
766 
767  typename Kokkos::View<LO[3], device_type>::host_mirror_type lCoarseNodesPerDim_h = Kokkos::create_mirror_view(lCoarseNodesPerDim_d);
768  typename Kokkos::View<LO[3], device_type>::host_mirror_type lFineNodesPerDim_h = Kokkos::create_mirror_view(lFineNodesPerDim_d);
769 
770  for (int dim = 0; dim < numDimensions; ++dim) {
771  lCoarseNodesPerDim_h(dim) = lCoarseNodesPerDim[dim];
772  lFineNodesPerDim_h(dim) = lFineNodesPerDim[dim];
773  }
774 
775  Kokkos::deep_copy(lCoarseNodesPerDim_d, lCoarseNodesPerDim_h);
776  Kokkos::deep_copy(lFineNodesPerDim_d, lFineNodesPerDim_h);
777 
778  // Faces in 0-1 plane
779  if ((lCoarseNodesPerDim[0] - 2 > 0) && (lCoarseNodesPerDim[1] - 2 > 0)) {
780  Kokkos::parallel_for(
781  "Faces in 0-1 plane region R",
782  Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[1] - 2) * (lCoarseNodesPerDim[0] - 2)),
783  KOKKOS_LAMBDA(const LO faceIdx) {
784  LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
785  LO gridIdx[3] = {0, 0, 0};
786  impl_scalar_type coeffs_d[5] = {1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0};
787  // Last step in the loop
788  // update the grid indices
789  // for next grid point
790  for (LO i = 0; i < faceIdx; i++) {
791  ++gridIdx[0];
792  if (gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
793  gridIdx[0] = 0;
794  ++gridIdx[1];
795  }
796  }
797 
798  // Face 0
799  coordRowIdx = ((gridIdx[1] + 1) * lCoarseNodesPerDim_d(0) + gridIdx[0] + 1);
800  rowIdx = coordRowIdx * blkSize;
801  coordColumnOffset = 3 * ((gridIdx[1] + 1) * lFineNodesPerDim_d(0) + gridIdx[0] + 1);
802  columnOffset = coordColumnOffset * blkSize;
803  entryOffset = (edgeLineOffset + edgeStencilLength + gridIdx[1] * faceLineOffset + gridIdx[0] * faceStencilLength) * blkSize;
804  for (LO l = 0; l < blkSize; ++l) {
805  for (LO k = 0; k < 3; ++k) {
806  for (LO j = 0; j < 5; ++j) {
807  for (LO i = 0; i < 5; ++i) {
808  entries(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = columnOffset + (k * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
809  values(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = coeffs_d[k + 2] * coeffs_d[j] * coeffs_d[i];
810  }
811  }
812  }
813  }
814  for (LO l = 0; l < blkSize; ++l) {
815  row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
816  }
817  for (int dim = 0; dim < numDimensions; ++dim) {
818  coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
819  }
820 
821  // Face 1
822  coordRowIdx += (lCoarseNodesPerDim_d(2) - 1) * lCoarseNodesPerDim_d(1) * lCoarseNodesPerDim_d(0);
823  rowIdx = coordRowIdx * blkSize;
824  coordColumnOffset += (lFineNodesPerDim_d(2) - 1) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0);
825  columnOffset = coordColumnOffset * blkSize;
826  entryOffset += (facePlaneOffset + (lCoarseNodesPerDim_d(2) - 2) * interiorPlaneOffset) * blkSize;
827  for (LO l = 0; l < blkSize; ++l) {
828  for (LO k = 0; k < 3; ++k) {
829  for (LO j = 0; j < 5; ++j) {
830  for (LO i = 0; i < 5; ++i) {
831  entries(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
832  values(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i];
833  }
834  }
835  }
836  }
837  for (LO l = 0; l < blkSize; ++l) {
838  row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
839  }
840  for (int dim = 0; dim < numDimensions; ++dim) {
841  coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
842  }
843  }); // parallel_for faces in 0-1 plane
844  }
845 
846  // Faces in 0-2 plane
847  if ((lCoarseNodesPerDim[0] - 2 > 0) && (lCoarseNodesPerDim[2] - 2 > 0)) {
848  Kokkos::parallel_for(
849  "Faces in 0-2 plane region R",
850  Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[2] - 2) * (lCoarseNodesPerDim[0] - 2)),
851  KOKKOS_LAMBDA(const LO faceIdx) {
852  LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
853  LO gridIdx[3] = {0, 0, 0};
854  impl_scalar_type coeffs_d[5] = {1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0};
855  // Last step in the loop
856  // update the grid indices
857  // for next grid point
858  for (LO i = 0; i < faceIdx; i++) {
859  ++gridIdx[0];
860  if (gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
861  gridIdx[0] = 0;
862  ++gridIdx[2];
863  }
864  }
865 
866  // Face 0
867  coordRowIdx = ((gridIdx[2] + 1) * lCoarseNodesPerDim_d(1) * lCoarseNodesPerDim_d(0) + (gridIdx[0] + 1));
868  rowIdx = coordRowIdx * blkSize;
869  coordColumnOffset = ((gridIdx[2] + 1) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + gridIdx[0] + 1) * 3;
870  columnOffset = coordColumnOffset * blkSize;
871  entryOffset = (facePlaneOffset + gridIdx[2] * interiorPlaneOffset + edgeStencilLength + gridIdx[0] * faceStencilLength) * blkSize;
872  for (LO l = 0; l < blkSize; ++l) {
873  for (LO k = 0; k < 5; ++k) {
874  for (LO j = 0; j < 3; ++j) {
875  for (LO i = 0; i < 5; ++i) {
876  entries(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + j * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
877  values(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j + 2] * coeffs_d[i];
878  }
879  }
880  }
881  }
882  for (LO l = 0; l < blkSize; ++l) {
883  row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
884  }
885  for (int dim = 0; dim < numDimensions; ++dim) {
886  coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
887  }
888 
889  // Face 1
890  coordRowIdx += (lCoarseNodesPerDim_d(1) - 1) * lCoarseNodesPerDim_d(0);
891  rowIdx = coordRowIdx * blkSize;
892  coordColumnOffset += (lFineNodesPerDim_d(1) - 1) * lFineNodesPerDim_d(0);
893  columnOffset = coordColumnOffset * blkSize;
894  entryOffset += (faceLineOffset + (lCoarseNodesPerDim_d(1) - 2) * interiorLineOffset) * blkSize;
895  for (LO l = 0; l < blkSize; ++l) {
896  for (LO k = 0; k < 5; ++k) {
897  for (LO j = 0; j < 3; ++j) {
898  for (LO i = 0; i < 5; ++i) {
899  entries(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
900  values(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i];
901  }
902  }
903  }
904  }
905  for (LO l = 0; l < blkSize; ++l) {
906  row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
907  }
908  for (int dim = 0; dim < numDimensions; ++dim) {
909  coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
910  }
911  }); // parallel_for faces in 0-2 plane
912  }
913 
914  // Faces in 1-2 plane
915  if ((lCoarseNodesPerDim[1] - 2 > 0) && (lCoarseNodesPerDim[2] - 2 > 0)) {
916  Kokkos::parallel_for(
917  "Faces in 1-2 plane region R",
918  Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[2] - 2) * (lCoarseNodesPerDim[1] - 2)),
919  KOKKOS_LAMBDA(const LO faceIdx) {
920  LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
921  LO gridIdx[3] = {0, 0, 0};
922  impl_scalar_type coeffs_d[5] = {1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0};
923  // Last step in the loop
924  // update the grid indices
925  // for next grid point
926  for (LO i = 0; i < faceIdx; i++) {
927  ++gridIdx[1];
928  if (gridIdx[1] == lCoarseNodesPerDim_d(1) - 2) {
929  gridIdx[1] = 0;
930  ++gridIdx[2];
931  }
932  }
933 
934  // Face 0
935  coordRowIdx = ((gridIdx[2] + 1) * lCoarseNodesPerDim_d(1) * lCoarseNodesPerDim_d(0) + (gridIdx[1] + 1) * lCoarseNodesPerDim_d(0));
936  rowIdx = coordRowIdx * blkSize;
937  coordColumnOffset = ((gridIdx[2] + 1) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (gridIdx[1] + 1) * lFineNodesPerDim_d(0)) * 3;
938  columnOffset = coordColumnOffset * blkSize;
939  entryOffset = (facePlaneOffset + gridIdx[2] * interiorPlaneOffset + faceLineOffset + gridIdx[1] * interiorLineOffset) * blkSize;
940  for (LO l = 0; l < blkSize; ++l) {
941  for (LO k = 0; k < 5; ++k) {
942  for (LO j = 0; j < 5; ++j) {
943  for (LO i = 0; i < 3; ++i) {
944  entries(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i) * blkSize + l;
945  values(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i + 2];
946  }
947  }
948  }
949  }
950  for (LO l = 0; l < blkSize; ++l) {
951  row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
952  }
953  for (int dim = 0; dim < numDimensions; ++dim) {
954  coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
955  }
956 
957  // Face 1
958  coordRowIdx += (lCoarseNodesPerDim_d(0) - 1);
959  rowIdx = coordRowIdx * blkSize;
960  coordColumnOffset += (lFineNodesPerDim_d(0) - 1);
961  columnOffset = coordColumnOffset * blkSize;
962  entryOffset += (faceStencilLength + (lCoarseNodesPerDim_d(0) - 2) * interiorStencilLength) * blkSize;
963  for (LO l = 0; l < blkSize; ++l) {
964  for (LO k = 0; k < 5; ++k) {
965  for (LO j = 0; j < 5; ++j) {
966  for (LO i = 0; i < 3; ++i) {
967  entries(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
968  values(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i];
969  }
970  }
971  }
972  }
973  for (LO l = 0; l < blkSize; ++l) {
974  row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
975  }
976  for (int dim = 0; dim < numDimensions; ++dim) {
977  coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
978  }
979  }); // parallel_for faces in 1-2 plane
980  }
981 
982  if (numInteriors > 0) {
983  // Allocate and compute arrays
984  // containing column offsets
985  // and values associated with
986  // interior points
987  LO countRowEntries = 0;
988  Kokkos::View<LO[125]> coordColumnOffsets_d("coordColOffset");
989  auto coordColumnOffsets_h = Kokkos::create_mirror_view(coordColumnOffsets_d);
990 
991  for (LO k = -2; k < 3; ++k) {
992  for (LO j = -2; j < 3; ++j) {
993  for (LO i = -2; i < 3; ++i) {
994  coordColumnOffsets_h(countRowEntries) = k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i;
995  ++countRowEntries;
996  }
997  }
998  }
999  Kokkos::deep_copy(coordColumnOffsets_d, coordColumnOffsets_h);
1000 
1001  LO countValues = 0;
1002  Kokkos::View<impl_scalar_type*> interiorValues_d("interiorValues", 125);
1003  auto interiorValues_h = Kokkos::create_mirror_view(interiorValues_d);
1004  for (LO k = 0; k < 5; ++k) {
1005  for (LO j = 0; j < 5; ++j) {
1006  for (LO i = 0; i < 5; ++i) {
1007  interiorValues_h(countValues) = coeffs[k] * coeffs[j] * coeffs[i];
1008  ++countValues;
1009  }
1010  }
1011  }
1012  Kokkos::deep_copy(interiorValues_d, interiorValues_h);
1013 
1014  Kokkos::parallel_for(
1015  "interior idx region R", Kokkos::RangePolicy<typename device_type::execution_space>(0, numInteriors),
1016  KOKKOS_LAMBDA(const LO interiorIdx) {
1017  LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
1018  LO gridIdx[3];
1019  gridIdx[0] = 0;
1020  gridIdx[1] = 0;
1021  gridIdx[2] = 0;
1022  // First step in the loop
1023  // update the grid indices
1024  // for the grid point
1025  for (LO i = 0; i < interiorIdx; i++) {
1026  ++gridIdx[0];
1027  if (gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
1028  gridIdx[0] = 0;
1029  ++gridIdx[1];
1030  if (gridIdx[1] == lCoarseNodesPerDim_d(1) - 2) {
1031  gridIdx[1] = 0;
1032  ++gridIdx[2];
1033  }
1034  }
1035  }
1036 
1037  coordRowIdx = ((gridIdx[2] + 1) * lCoarseNodesPerDim_d(0) * lCoarseNodesPerDim_d(1) + (gridIdx[1] + 1) * lCoarseNodesPerDim_d(0) + gridIdx[0] + 1);
1038  rowIdx = coordRowIdx * blkSize;
1039  coordColumnOffset = ((gridIdx[2] + 1) * 3 * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (gridIdx[1] + 1) * 3 * lFineNodesPerDim_d(0) + (gridIdx[0] + 1) * 3);
1040  columnOffset = coordColumnOffset * blkSize;
1041  entryOffset = (facePlaneOffset + faceLineOffset + faceStencilLength + gridIdx[2] * interiorPlaneOffset + gridIdx[1] * interiorLineOffset + gridIdx[0] * interiorStencilLength) * blkSize;
1042  for (LO l = 0; l < blkSize; ++l) {
1043  row_map(rowIdx + 1 + l) = entryOffset + interiorStencilLength * (l + 1);
1044  }
1045  // Fill the column indices
1046  // and values in the approproate
1047  // views.
1048  for (LO l = 0; l < blkSize; ++l) {
1049  for (LO entryIdx = 0; entryIdx < interiorStencilLength; ++entryIdx) {
1050  entries(entryOffset + entryIdx + interiorStencilLength * l) = columnOffset + coordColumnOffsets_d(entryIdx) * blkSize + l;
1051  values(entryOffset + entryIdx + interiorStencilLength * l) = interiorValues_d(entryIdx);
1052  }
1053  }
1054  for (int dim = 0; dim < numDimensions; ++dim) {
1055  coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
1056  }
1057  }); // Kokkos::parallel_for interior idx
1058  //
1059  }
1060 
1061  local_graph_type localGraph(entries, row_map);
1062  local_matrix_type localR("R", numCols, values, localGraph);
1063 
1064  R = MatrixFactory::Build(localR, // the local data
1065  rowMap, // rowMap
1066  A->getRowMap(), // colMap
1067  A->getRowMap(), // domainMap == colMap
1068  rowMap, // rangeMap == rowMap
1069  Teuchos::null); // params for optimized construction
1070 
1071 } // Build3D()
1072 
1073 } // namespace MueLu
1074 
1075 #define MUELU_REGIONRFACTORY_KOKKOS_SHORT
1076 #endif // MUELU_REGIONRFACTORY_DEF_HPP
typename Kokkos::View< LO[3], device_type > LOTupleView
MueLu::DefaultLocalOrdinal LocalOrdinal
static Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Build(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node >> &map, size_t NumVectors, bool zeroOut=true)
basic_FancyOStream & setShowProcRank(const bool showProcRank)
T & get(const std::string &name, T def_value)
LocalOrdinal LO
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
MueLu::DefaultNode Node
static const NoFactory * get()
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
void Build3D(const int numDimensions, Array< LO > &lFineNodesPerDim, const RCP< Matrix > &A, const RCP< realvaluedmultivector_type > &fineCoordinates, RCP< Matrix > &R, RCP< realvaluedmultivector_type > &coarseCoordinates, Array< LO > &lCoarseNodesPerDim) const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:63
bool isSublist(const std::string &name) const
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
basic_FancyOStream & setShowAllFrontMatter(const bool showAllFrontMatter)
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
RCP< const ParameterList > GetValidParameterList() const
Input.
ParameterList & sublist(const std::string &name, bool mustAlreadyExist=false, const std::string &docString="")
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:51
std::string toString(const T &t)