MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_RegionRFactory_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_REGIONRFACTORY_KOKKOS_DEF_HPP
47 #define MUELU_REGIONRFACTORY_KOKKOS_DEF_HPP
48 
49 #include "Kokkos_UnorderedMap.hpp"
50 
52 
53 #include <Xpetra_Matrix.hpp>
55 
56 #include "MueLu_Types.hpp"
57 
58 namespace MueLu {
59 
60 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
63  RCP<ParameterList> validParamList = rcp(new ParameterList());
64 
65  validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null,
66  "Generating factory of the matrix A");
67  validParamList->set<RCP<const FactoryBase> >("numDimensions", Teuchos::null,
68  "Number of spatial dimensions in the problem.");
69  validParamList->set<RCP<const FactoryBase> >("lNodesPerDim", Teuchos::null,
70  "Number of local nodes per spatial dimension on the fine grid.");
71  validParamList->set<RCP<const FactoryBase> >("Nullspace", Teuchos::null,
72  "Fine level nullspace used to construct the coarse level nullspace.");
73  validParamList->set<RCP<const FactoryBase> >("Coordinates", Teuchos::null,
74  "Fine level coordinates used to construct piece-wise linear prolongator and coarse level coordinates.");
75  validParamList->set<bool>("keep coarse coords", false, "Flag to keep coordinates for special coarse grid solve");
76 
77  return validParamList;
78 } // GetValidParameterList()
79 
80 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
82  DeclareInput(Level& fineLevel, Level& /* coarseLevel */) const {
83  Input(fineLevel, "A");
84  Input(fineLevel, "numDimensions");
85  Input(fineLevel, "lNodesPerDim");
86  Input(fineLevel, "Nullspace");
87  Input(fineLevel, "Coordinates");
88 
89 } // DeclareInput()
90 
91 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
93  Build(Level& fineLevel, Level& coarseLevel) const {
94  // Set debug outputs based on environment variable
96  if (const char* dbg = std::getenv("MUELU_REGIONRFACTORY_DEBUG")) {
97  out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
98  out->setShowAllFrontMatter(false).setShowProcRank(true);
99  } else {
100  out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
101  }
102 
103  *out << "Starting RegionRFactory_kokkos::Build." << std::endl;
104 
105  // First get the inputs from the fineLevel
106  const int numDimensions = Get<int>(fineLevel, "numDimensions");
107  Array<LO> lFineNodesPerDim(3, Teuchos::OrdinalTraits<LO>::one());
108  {
109  Array<LO> lNodesPerDim = Get<Array<LO> >(fineLevel, "lNodesPerDim");
110  for (int dim = 0; dim < numDimensions; ++dim) {
111  lFineNodesPerDim[dim] = lNodesPerDim[dim];
112  }
113  }
114  *out << "numDimensions " << numDimensions << " and lFineNodesPerDim: " << lFineNodesPerDim
115  << std::endl;
116 
117  // Let us check that the inputs verify our assumptions
118  if (numDimensions < 1 || numDimensions > 3) {
119  throw std::runtime_error("numDimensions must be 1, 2 or 3!");
120  }
121  for (int dim = 0; dim < numDimensions; ++dim) {
122  if (lFineNodesPerDim[dim] % 3 != 1) {
123  throw std::runtime_error("The number of fine node in each direction need to be 3n+1");
124  }
125  }
126  Array<LO> lCoarseNodesPerDim(3, Teuchos::OrdinalTraits<LO>::one());
127 
128  const RCP<Matrix> A = Get<RCP<Matrix> >(fineLevel, "A");
129 
130  RCP<realvaluedmultivector_type> fineCoordinates, coarseCoordinates;
131  fineCoordinates = Get<RCP<realvaluedmultivector_type> >(fineLevel, "Coordinates");
132  if (static_cast<int>(fineCoordinates->getNumVectors()) != numDimensions) {
133  throw std::runtime_error("The number of vectors in the coordinates is not equal to numDimensions!");
134  }
135 
136  // Let us create R and pass it down to the
137  // appropriate specialization and see what we
138  // get back!
139  RCP<Matrix> R;
140 
141  if (numDimensions == 1) {
142  throw std::runtime_error("RegionRFactory_kokkos no implemented for 1D case yet.");
143  } else if (numDimensions == 2) {
144  throw std::runtime_error("RegionRFactory_kokkos no implemented for 2D case yet.");
145  } else if (numDimensions == 3) {
146  Build3D(numDimensions, lFineNodesPerDim, A, fineCoordinates,
147  R, coarseCoordinates, lCoarseNodesPerDim);
148  }
149 
150  const Teuchos::ParameterList& pL = GetParameterList();
151 
152  // Reuse pattern if available (multiple solve)
153  RCP<ParameterList> Tparams;
154  if (pL.isSublist("matrixmatrix: kernel params"))
155  Tparams = rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
156  else
157  Tparams = rcp(new ParameterList);
158 
159  // R->describe(*out, Teuchos::VERB_EXTREME);
160  *out << "Compute P=R^t" << std::endl;
161  // By default, we don't need global constants for transpose
162  Tparams->set("compute global constants: temporaries", Tparams->get("compute global constants: temporaries", false));
163  Tparams->set("compute global constants", Tparams->get("compute global constants", false));
164  std::string label = "MueLu::RegionR-transR" + Teuchos::toString(coarseLevel.GetLevelID());
165  RCP<Matrix> P = Utilities::Transpose(*R, true, label, Tparams);
166 
167  *out << "Compute coarse nullspace" << std::endl;
168  RCP<MultiVector> fineNullspace = Get<RCP<MultiVector> >(fineLevel, "Nullspace");
169  RCP<MultiVector> coarseNullspace = MultiVectorFactory::Build(R->getRowMap(),
170  fineNullspace->getNumVectors());
171  R->apply(*fineNullspace, *coarseNullspace, Teuchos::NO_TRANS, Teuchos::ScalarTraits<SC>::one(),
173 
174  *out << "Set data on coarse level" << std::endl;
175  Set(coarseLevel, "numDimensions", numDimensions);
176  Set(coarseLevel, "lNodesPerDim", lCoarseNodesPerDim);
177  Set(coarseLevel, "Nullspace", coarseNullspace);
178  Set(coarseLevel, "Coordinates", coarseCoordinates);
179  if (pL.get<bool>("keep coarse coords")) {
180  coarseLevel.Set<RCP<realvaluedmultivector_type> >("Coordinates2", coarseCoordinates, NoFactory::get());
181  }
182 
183  R->SetFixedBlockSize(A->GetFixedBlockSize());
184  P->SetFixedBlockSize(A->GetFixedBlockSize());
185 
186  Set(coarseLevel, "R", R);
187  Set(coarseLevel, "P", P);
188 
189 } // Build()
190 
191 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
193  Build3D(const int numDimensions,
194  Teuchos::Array<LocalOrdinal>& lFineNodesPerDim,
195  const RCP<Matrix>& A,
197  RCP<Matrix>& R,
199  Teuchos::Array<LocalOrdinal>& lCoarseNodesPerDim) const {
200  using local_matrix_type = typename CrsMatrix::local_matrix_type;
201  using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
202  using row_map_type = typename local_matrix_type::row_map_type::non_const_type;
203  using entries_type = typename local_matrix_type::index_type::non_const_type;
204  using values_type = typename local_matrix_type::values_type::non_const_type;
205  using impl_scalar_type = typename Kokkos::ArithTraits<Scalar>::val_type;
206 
207  // Set debug outputs based on environment variable
209  if (const char* dbg = std::getenv("MUELU_REGIONRFACTORY_DEBUG")) {
210  out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
211  out->setShowAllFrontMatter(false).setShowProcRank(true);
212  } else {
213  out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
214  }
215 
216  // Now compute number of coarse grid points
217  for (int dim = 0; dim < numDimensions; ++dim) {
218  lCoarseNodesPerDim[dim] = lFineNodesPerDim[dim] / 3 + 1;
219  }
220  *out << "lCoarseNodesPerDim " << lCoarseNodesPerDim << std::endl;
221 
222  // Grab the block size here and multiply all existing offsets by it
223  const LO blkSize = A->GetFixedBlockSize();
224  *out << "blkSize " << blkSize << std::endl;
225 
226  // Based on lCoarseNodesPerDim and lFineNodesPerDim
227  // we can compute numRows, numCols and NNZ for R
228  const LO numRows = blkSize * lCoarseNodesPerDim[0] * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[2];
229  const LO numCols = blkSize * lFineNodesPerDim[0] * lFineNodesPerDim[1] * lFineNodesPerDim[2];
230 
231  // Create the coarse coordinates multivector
232  // so we can fill it on the fly while computing
233  // the restriction operator
234  RCP<Map> rowMap = MapFactory::Build(A->getRowMap()->lib(),
236  numRows,
237  A->getRowMap()->getIndexBase(),
238  A->getRowMap()->getComm());
239 
240  RCP<Map> coordRowMap = MapFactory::Build(A->getRowMap()->lib(),
242  lCoarseNodesPerDim[0] * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[2],
243  A->getRowMap()->getIndexBase(),
244  A->getRowMap()->getComm());
245 
246  coarseCoordinates = Xpetra::MultiVectorFactory<real_type, LO, GO, NO>::Build(coordRowMap,
247  numDimensions);
248 
249  // Get device views of coordinates
250  auto fineCoordsView = fineCoordinates->getDeviceLocalView(Xpetra::Access::ReadOnly);
251  auto coarseCoordsView = coarseCoordinates->getDeviceLocalView(Xpetra::Access::OverwriteAll);
252 
253  Array<ArrayRCP<const real_type> > fineCoordData(numDimensions);
254  Array<ArrayRCP<real_type> > coarseCoordData(numDimensions);
255  for (int dim = 0; dim < numDimensions; ++dim) {
256  fineCoordData[dim] = fineCoordinates->getData(dim);
257  coarseCoordData[dim] = coarseCoordinates->getDataNonConst(dim);
258  }
259 
260  // Let us set some parameter that will be useful
261  // while constructing R
262 
263  // Length of interpolation stencils based on geometry
264  const LO cornerStencilLength = 27;
265  const LO edgeStencilLength = 45;
266  const LO faceStencilLength = 75;
267  const LO interiorStencilLength = 125;
268 
269  // Number of corner, edge, face and interior nodes
270  const LO numCorners = 8;
271  const LO numEdges = 4 * (lCoarseNodesPerDim[0] - 2) + 4 * (lCoarseNodesPerDim[1] - 2) + 4 * (lCoarseNodesPerDim[2] - 2);
272  const LO numFaces = 2 * (lCoarseNodesPerDim[0] - 2) * (lCoarseNodesPerDim[1] - 2) + 2 * (lCoarseNodesPerDim[0] - 2) * (lCoarseNodesPerDim[2] - 2) + 2 * (lCoarseNodesPerDim[1] - 2) * (lCoarseNodesPerDim[2] - 2);
273  const LO numInteriors = (lCoarseNodesPerDim[0] - 2) * (lCoarseNodesPerDim[1] - 2) * (lCoarseNodesPerDim[2] - 2);
274 
275  const LO nnz = (numCorners * cornerStencilLength + numEdges * edgeStencilLength + numFaces * faceStencilLength + numInteriors * interiorStencilLength) * blkSize;
276 
277  // Having the number of rows and columns we can genrate
278  // the appropriate maps for our operator.
279 
280  *out << "R statistics:" << std::endl
281  << " -numRows= " << numRows << std::endl
282  << " -numCols= " << numCols << std::endl
283  << " -nnz= " << nnz << std::endl;
284 
285  row_map_type row_map(Kokkos::ViewAllocateWithoutInitializing("row_map"), numRows + 1);
286  typename row_map_type::HostMirror row_map_h = Kokkos::create_mirror_view(row_map);
287 
288  entries_type entries(Kokkos::ViewAllocateWithoutInitializing("entries"), nnz);
289  typename entries_type::HostMirror entries_h = Kokkos::create_mirror_view(entries);
290 
291  values_type values(Kokkos::ViewAllocateWithoutInitializing("values"), nnz);
292  typename values_type::HostMirror values_h = Kokkos::create_mirror_view(values);
293 
294  // Compute the basic interpolation
295  // coefficients for 1D rate of 3
296  // coarsening.
297  Array<SC> coeffs({1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0});
298  row_map_h(0) = 0;
299 
300  // Define some offsets that
301  // will be needed often later on
302  const LO edgeLineOffset = 2 * cornerStencilLength + (lCoarseNodesPerDim[0] - 2) * edgeStencilLength;
303  const LO faceLineOffset = 2 * edgeStencilLength + (lCoarseNodesPerDim[0] - 2) * faceStencilLength;
304  const LO interiorLineOffset = 2 * faceStencilLength + (lCoarseNodesPerDim[0] - 2) * interiorStencilLength;
305 
306  const LO facePlaneOffset = 2 * edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset;
307  const LO interiorPlaneOffset = 2 * faceLineOffset + (lCoarseNodesPerDim[1] - 2) * interiorLineOffset;
308 
309  // Let us take care of the corners
310  // first since we always have
311  // corners to deal with!
312  {
313  // Corner 1
314  LO coordRowIdx = 0, rowIdx = 0, coordColumnOffset = 0, columnOffset = 0, entryOffset = 0;
315  for (LO l = 0; l < blkSize; ++l) {
316  for (LO k = 0; k < 3; ++k) {
317  for (LO j = 0; j < 3; ++j) {
318  for (LO i = 0; i < 3; ++i) {
319  entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i) * blkSize + l;
320  values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j + 2] * coeffs[i + 2];
321  }
322  }
323  }
324  }
325  for (LO l = 0; l < blkSize; ++l) {
326  row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
327  }
328  for (int dim = 0; dim < numDimensions; ++dim) {
329  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
330  }
331 
332  // Corner 5
333  coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
334  rowIdx = coordRowIdx * blkSize;
335  coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
336  columnOffset = coordColumnOffset * blkSize;
337  entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
338  for (LO l = 0; l < blkSize; ++l) {
339  for (LO k = 0; k < 3; ++k) {
340  for (LO j = 0; j < 3; ++j) {
341  for (LO i = 0; i < 3; ++i) {
342  entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i) * blkSize + l;
343  values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i + 2];
344  }
345  }
346  }
347  }
348  for (LO l = 0; l < blkSize; ++l) {
349  row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
350  }
351  for (int dim = 0; dim < numDimensions; ++dim) {
352  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
353  }
354 
355  // Corner 2
356  coordRowIdx = (lCoarseNodesPerDim[0] - 1);
357  rowIdx = coordRowIdx * blkSize;
358  coordColumnOffset = (lFineNodesPerDim[0] - 1);
359  columnOffset = coordColumnOffset * blkSize;
360  entryOffset = (cornerStencilLength + (lCoarseNodesPerDim[0] - 2) * edgeStencilLength) * blkSize;
361  for (LO l = 0; l < blkSize; ++l) {
362  for (LO k = 0; k < 3; ++k) {
363  for (LO j = 0; j < 3; ++j) {
364  for (LO i = 0; i < 3; ++i) {
365  entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + (i - 2)) * blkSize + l;
366  values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j + 2] * coeffs[i];
367  }
368  }
369  }
370  }
371  for (LO l = 0; l < blkSize; ++l) {
372  row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
373  }
374  for (int dim = 0; dim < numDimensions; ++dim) {
375  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
376  }
377 
378  // Corner 6
379  coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
380  rowIdx = coordRowIdx * blkSize;
381  coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
382  columnOffset = coordColumnOffset * blkSize;
383  entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
384  for (LO l = 0; l < blkSize; ++l) {
385  for (LO k = 0; k < 3; ++k) {
386  for (LO j = 0; j < 3; ++j) {
387  for (LO i = 0; i < 3; ++i) {
388  entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
389  values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i];
390  }
391  }
392  }
393  }
394  for (LO l = 0; l < blkSize; ++l) {
395  row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
396  }
397  for (int dim = 0; dim < numDimensions; ++dim) {
398  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
399  }
400 
401  // Corner 3
402  coordRowIdx = (lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0];
403  rowIdx = coordRowIdx * blkSize;
404  coordColumnOffset = (lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0];
405  columnOffset = coordColumnOffset * blkSize;
406  entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset) * blkSize;
407  for (LO l = 0; l < blkSize; ++l) {
408  for (LO k = 0; k < 3; ++k) {
409  for (LO j = 0; j < 3; ++j) {
410  for (LO i = 0; i < 3; ++i) {
411  entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
412  values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i + 2];
413  }
414  }
415  }
416  }
417  for (LO l = 0; l < blkSize; ++l) {
418  row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
419  }
420  for (int dim = 0; dim < numDimensions; ++dim) {
421  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
422  }
423 
424  // Corner 7
425  coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
426  rowIdx = coordRowIdx * blkSize;
427  coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
428  columnOffset = coordColumnOffset * blkSize;
429  entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
430  for (LO l = 0; l < blkSize; ++l) {
431  for (LO k = 0; k < 3; ++k) {
432  for (LO j = 0; j < 3; ++j) {
433  for (LO i = 0; i < 3; ++i) {
434  entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
435  values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i + 2];
436  }
437  }
438  }
439  }
440  for (LO l = 0; l < blkSize; ++l) {
441  row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
442  }
443  for (int dim = 0; dim < numDimensions; ++dim) {
444  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
445  }
446 
447  // Corner 4
448  coordRowIdx = (lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] - 1);
449  rowIdx = coordRowIdx * blkSize;
450  coordColumnOffset = (lFineNodesPerDim[1] * lFineNodesPerDim[0] - 1);
451  columnOffset = coordColumnOffset * blkSize;
452  entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset +
453  cornerStencilLength + (lCoarseNodesPerDim[0] - 2) * edgeStencilLength) *
454  blkSize;
455  for (LO l = 0; l < blkSize; ++l) {
456  for (LO k = 0; k < 3; ++k) {
457  for (LO j = 0; j < 3; ++j) {
458  for (LO i = 0; i < 3; ++i) {
459  entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + (i - 2)) * blkSize + l;
460  values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i];
461  }
462  }
463  }
464  }
465  for (LO l = 0; l < blkSize; ++l) {
466  row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
467  }
468  for (int dim = 0; dim < numDimensions; ++dim) {
469  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
470  }
471 
472  // Corner 8
473  coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
474  rowIdx = coordRowIdx * blkSize;
475  coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
476  columnOffset = coordColumnOffset * blkSize;
477  entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
478  for (LO l = 0; l < blkSize; ++l) {
479  for (LO k = 0; k < 3; ++k) {
480  for (LO j = 0; j < 3; ++j) {
481  for (LO i = 0; i < 3; ++i) {
482  entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + (i - 2)) * blkSize + l;
483  values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
484  }
485  }
486  }
487  }
488  for (LO l = 0; l < blkSize; ++l) {
489  row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
490  }
491  for (int dim = 0; dim < numDimensions; ++dim) {
492  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
493  }
494  } // Corners are done!
495 
496  // Edges along 0 direction
497  if (lCoarseNodesPerDim[0] - 2 > 0) {
498  LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
499  for (LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[0] - 2; ++edgeIdx) {
500  // Edge 0
501  coordRowIdx = (edgeIdx + 1);
502  rowIdx = coordRowIdx * blkSize;
503  coordColumnOffset = (edgeIdx + 1) * 3;
504  columnOffset = coordColumnOffset * blkSize;
505  entryOffset = (cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
506  for (LO l = 0; l < blkSize; ++l) {
507  for (LO k = 0; k < 3; ++k) {
508  for (LO j = 0; j < 3; ++j) {
509  for (LO i = 0; i < 5; ++i) {
510  entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
511  values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j + 2] * coeffs[i];
512  }
513  }
514  }
515  }
516  for (LO l = 0; l < blkSize; ++l) {
517  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
518  }
519  for (int dim = 0; dim < numDimensions; ++dim) {
520  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
521  }
522 
523  // Edge 1
524  coordRowIdx = ((lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0] + edgeIdx + 1);
525  rowIdx = coordRowIdx * blkSize;
526  coordColumnOffset = ((lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0] + (edgeIdx + 1) * 3);
527  columnOffset = coordColumnOffset * blkSize;
528  entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset + cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
529  for (LO l = 0; l < blkSize; ++l) {
530  for (LO k = 0; k < 3; ++k) {
531  for (LO j = 0; j < 3; ++j) {
532  for (LO i = 0; i < 5; ++i) {
533  entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
534  values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i];
535  }
536  }
537  }
538  }
539  for (LO l = 0; l < blkSize; ++l) {
540  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
541  }
542  for (int dim = 0; dim < numDimensions; ++dim) {
543  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
544  }
545 
546  // Edge 2
547  coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + edgeIdx + 1);
548  rowIdx = coordRowIdx * blkSize;
549  coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (edgeIdx + 1) * 3);
550  columnOffset = coordColumnOffset * blkSize;
551  entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
552  for (LO l = 0; l < blkSize; ++l) {
553  for (LO k = 0; k < 3; ++k) {
554  for (LO j = 0; j < 3; ++j) {
555  for (LO i = 0; i < 5; ++i) {
556  entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
557  values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i];
558  }
559  }
560  }
561  }
562  for (LO l = 0; l < blkSize; ++l) {
563  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
564  }
565  for (int dim = 0; dim < numDimensions; ++dim) {
566  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
567  }
568 
569  // Edge 3
570  coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0] + edgeIdx + 1);
571  rowIdx = coordRowIdx * blkSize;
572  coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0] + (edgeIdx + 1) * 3);
573  columnOffset = coordColumnOffset * blkSize;
574  entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset + cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
575  for (LO l = 0; l < blkSize; ++l) {
576  for (LO k = 0; k < 3; ++k) {
577  for (LO j = 0; j < 3; ++j) {
578  for (LO i = 0; i < 5; ++i) {
579  entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
580  values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
581  }
582  }
583  }
584  }
585  for (LO l = 0; l < blkSize; ++l) {
586  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
587  }
588  for (int dim = 0; dim < numDimensions; ++dim) {
589  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
590  }
591  }
592  }
593 
594  // Edges along 1 direction
595  if (lCoarseNodesPerDim[1] - 2 > 0) {
596  LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
597  for (LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[1] - 2; ++edgeIdx) {
598  // Edge 0
599  coordRowIdx = (edgeIdx + 1) * lCoarseNodesPerDim[0];
600  rowIdx = coordRowIdx * blkSize;
601  coordColumnOffset = (edgeIdx + 1) * 3 * lFineNodesPerDim[0];
602  columnOffset = coordColumnOffset * blkSize;
603  entryOffset = (edgeLineOffset + edgeIdx * faceLineOffset) * blkSize;
604  for (LO l = 0; l < blkSize; ++l) {
605  for (LO k = 0; k < 3; ++k) {
606  for (LO j = 0; j < 5; ++j) {
607  for (LO i = 0; i < 3; ++i) {
608  entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
609  values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i + 2];
610  }
611  }
612  }
613  }
614  for (LO l = 0; l < blkSize; ++l) {
615  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
616  }
617  for (int dim = 0; dim < numDimensions; ++dim) {
618  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
619  }
620 
621  // Edge 1
622  coordRowIdx = ((edgeIdx + 1) * lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
623  rowIdx = coordRowIdx * blkSize;
624  coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
625  columnOffset = coordColumnOffset * blkSize;
626  entryOffset = (edgeLineOffset + edgeIdx * faceLineOffset + edgeStencilLength + (lCoarseNodesPerDim[0] - 2) * faceStencilLength) * blkSize;
627  for (LO l = 0; l < blkSize; ++l) {
628  for (LO k = 0; k < 3; ++k) {
629  for (LO j = 0; j < 5; ++j) {
630  for (LO i = 0; i < 3; ++i) {
631  entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
632  values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i];
633  }
634  }
635  }
636  }
637  for (LO l = 0; l < blkSize; ++l) {
638  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
639  }
640  for (int dim = 0; dim < numDimensions; ++dim) {
641  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
642  }
643 
644  // Edge 2
645  coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (edgeIdx + 1) * lCoarseNodesPerDim[0]);
646  rowIdx = coordRowIdx * blkSize;
647  coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (edgeIdx + 1) * 3 * lFineNodesPerDim[0]);
648  columnOffset = coordColumnOffset * blkSize;
649  entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + edgeLineOffset + edgeIdx * faceLineOffset) * blkSize;
650  for (LO l = 0; l < blkSize; ++l) {
651  for (LO k = 0; k < 3; ++k) {
652  for (LO j = 0; j < 5; ++j) {
653  for (LO i = 0; i < 3; ++i) {
654  entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
655  values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i + 2];
656  }
657  }
658  }
659  }
660  for (LO l = 0; l < blkSize; ++l) {
661  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
662  }
663  for (int dim = 0; dim < numDimensions; ++dim) {
664  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
665  }
666 
667  // Edge 3
668  coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (edgeIdx + 1) * lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
669  rowIdx = coordRowIdx * blkSize;
670  coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (edgeIdx + 1) * 3 * lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
671  columnOffset = coordColumnOffset * blkSize;
672  entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + edgeLineOffset + edgeIdx * faceLineOffset + edgeStencilLength + (lCoarseNodesPerDim[0] - 2) * faceStencilLength) * blkSize;
673  for (LO l = 0; l < blkSize; ++l) {
674  for (LO k = 0; k < 3; ++k) {
675  for (LO j = 0; j < 5; ++j) {
676  for (LO i = 0; i < 3; ++i) {
677  entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
678  values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
679  }
680  }
681  }
682  }
683  for (LO l = 0; l < blkSize; ++l) {
684  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
685  }
686  for (int dim = 0; dim < numDimensions; ++dim) {
687  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
688  }
689  }
690  }
691 
692  // Edges along 2 direction
693  if (lCoarseNodesPerDim[2] - 2 > 0) {
694  LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
695  for (LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[2] - 2; ++edgeIdx) {
696  // Edge 0
697  coordRowIdx = (edgeIdx + 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
698  rowIdx = coordRowIdx * blkSize;
699  coordColumnOffset = (edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0];
700  columnOffset = coordColumnOffset * blkSize;
701  entryOffset = (facePlaneOffset + edgeIdx * interiorPlaneOffset) * blkSize;
702  for (LO l = 0; l < blkSize; ++l) {
703  for (LO k = 0; k < 5; ++k) {
704  for (LO j = 0; j < 3; ++j) {
705  for (LO i = 0; i < 3; ++i) {
706  entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i) * blkSize + l;
707  values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i + 2];
708  }
709  }
710  }
711  }
712  for (LO l = 0; l < blkSize; ++l) {
713  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
714  }
715  for (int dim = 0; dim < numDimensions; ++dim) {
716  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
717  }
718 
719  // Edge 1
720  coordRowIdx = ((edgeIdx + 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
721  rowIdx = coordRowIdx * blkSize;
722  coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
723  columnOffset = coordColumnOffset * blkSize;
724  entryOffset = (facePlaneOffset + faceLineOffset - edgeStencilLength + edgeIdx * interiorPlaneOffset) * blkSize;
725  for (LO l = 0; l < blkSize; ++l) {
726  for (LO k = 0; k < 5; ++k) {
727  for (LO j = 0; j < 3; ++j) {
728  for (LO i = 0; i < 3; ++i) {
729  entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
730  values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i];
731  }
732  }
733  }
734  }
735  for (LO l = 0; l < blkSize; ++l) {
736  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
737  }
738  for (int dim = 0; dim < numDimensions; ++dim) {
739  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
740  }
741 
742  // Edge 2
743  coordRowIdx = ((edgeIdx + 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0]);
744  rowIdx = coordRowIdx * blkSize;
745  coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0]);
746  columnOffset = coordColumnOffset * blkSize;
747  entryOffset = (facePlaneOffset + edgeIdx * interiorPlaneOffset + faceLineOffset + (lCoarseNodesPerDim[1] - 2) * interiorLineOffset) * blkSize;
748  for (LO l = 0; l < blkSize; ++l) {
749  for (LO k = 0; k < 5; ++k) {
750  for (LO j = 0; j < 3; ++j) {
751  for (LO i = 0; i < 3; ++i) {
752  entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
753  values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i + 2];
754  }
755  }
756  }
757  }
758  for (LO l = 0; l < blkSize; ++l) {
759  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
760  }
761  for (int dim = 0; dim < numDimensions; ++dim) {
762  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
763  }
764 
765  // Edge 3
766  coordRowIdx = ((edgeIdx + 2) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] - 1);
767  rowIdx = coordRowIdx * blkSize;
768  coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0] + lFineNodesPerDim[1] * lFineNodesPerDim[0] - 1);
769  columnOffset = coordColumnOffset * blkSize;
770  entryOffset = (facePlaneOffset + (edgeIdx + 1) * interiorPlaneOffset - edgeStencilLength) * blkSize;
771  for (LO l = 0; l < blkSize; ++l) {
772  for (LO k = 0; k < 5; ++k) {
773  for (LO j = 0; j < 3; ++j) {
774  for (LO i = 0; i < 3; ++i) {
775  entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
776  values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
777  }
778  }
779  }
780  }
781  for (LO l = 0; l < blkSize; ++l) {
782  row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
783  }
784  for (int dim = 0; dim < numDimensions; ++dim) {
785  coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
786  }
787  }
788  }
789 
790  // TODO: KOKKOS parallel_for used from here. Not sure if it should be used for edges.
791  Kokkos::deep_copy(row_map, row_map_h);
792  Kokkos::deep_copy(entries, entries_h);
793  Kokkos::deep_copy(values, values_h);
794 
795  // Create views on device for nodes per dim
796  LOTupleView lFineNodesPerDim_d("lFineNodesPerDim");
797  LOTupleView lCoarseNodesPerDim_d("lCoarseNodesPerDim");
798 
799  typename Kokkos::View<LO[3], device_type>::HostMirror lCoarseNodesPerDim_h = Kokkos::create_mirror_view(lCoarseNodesPerDim_d);
800  typename Kokkos::View<LO[3], device_type>::HostMirror lFineNodesPerDim_h = Kokkos::create_mirror_view(lFineNodesPerDim_d);
801 
802  for (int dim = 0; dim < numDimensions; ++dim) {
803  lCoarseNodesPerDim_h(dim) = lCoarseNodesPerDim[dim];
804  lFineNodesPerDim_h(dim) = lFineNodesPerDim[dim];
805  }
806 
807  Kokkos::deep_copy(lCoarseNodesPerDim_d, lCoarseNodesPerDim_h);
808  Kokkos::deep_copy(lFineNodesPerDim_d, lFineNodesPerDim_h);
809 
810  // Faces in 0-1 plane
811  if ((lCoarseNodesPerDim[0] - 2 > 0) && (lCoarseNodesPerDim[1] - 2 > 0)) {
812  Kokkos::parallel_for(
813  "Faces in 0-1 plane region R",
814  Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[1] - 2) * (lCoarseNodesPerDim[0] - 2)),
815  KOKKOS_LAMBDA(const LO faceIdx) {
816  LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
817  LO gridIdx[3] = {0, 0, 0};
818  impl_scalar_type coeffs_d[5] = {1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0};
819  // Last step in the loop
820  // update the grid indices
821  // for next grid point
822  for (LO i = 0; i < faceIdx; i++) {
823  ++gridIdx[0];
824  if (gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
825  gridIdx[0] = 0;
826  ++gridIdx[1];
827  }
828  }
829 
830  // Face 0
831  coordRowIdx = ((gridIdx[1] + 1) * lCoarseNodesPerDim_d(0) + gridIdx[0] + 1);
832  rowIdx = coordRowIdx * blkSize;
833  coordColumnOffset = 3 * ((gridIdx[1] + 1) * lFineNodesPerDim_d(0) + gridIdx[0] + 1);
834  columnOffset = coordColumnOffset * blkSize;
835  entryOffset = (edgeLineOffset + edgeStencilLength + gridIdx[1] * faceLineOffset + gridIdx[0] * faceStencilLength) * blkSize;
836  for (LO l = 0; l < blkSize; ++l) {
837  for (LO k = 0; k < 3; ++k) {
838  for (LO j = 0; j < 5; ++j) {
839  for (LO i = 0; i < 5; ++i) {
840  entries(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = columnOffset + (k * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
841  values(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = coeffs_d[k + 2] * coeffs_d[j] * coeffs_d[i];
842  }
843  }
844  }
845  }
846  for (LO l = 0; l < blkSize; ++l) {
847  row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
848  }
849  for (int dim = 0; dim < numDimensions; ++dim) {
850  coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
851  }
852 
853  // Face 1
854  coordRowIdx += (lCoarseNodesPerDim_d(2) - 1) * lCoarseNodesPerDim_d(1) * lCoarseNodesPerDim_d(0);
855  rowIdx = coordRowIdx * blkSize;
856  coordColumnOffset += (lFineNodesPerDim_d(2) - 1) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0);
857  columnOffset = coordColumnOffset * blkSize;
858  entryOffset += (facePlaneOffset + (lCoarseNodesPerDim_d(2) - 2) * interiorPlaneOffset) * blkSize;
859  for (LO l = 0; l < blkSize; ++l) {
860  for (LO k = 0; k < 3; ++k) {
861  for (LO j = 0; j < 5; ++j) {
862  for (LO i = 0; i < 5; ++i) {
863  entries(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
864  values(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i];
865  }
866  }
867  }
868  }
869  for (LO l = 0; l < blkSize; ++l) {
870  row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
871  }
872  for (int dim = 0; dim < numDimensions; ++dim) {
873  coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
874  }
875  }); // parallel_for faces in 0-1 plane
876  }
877 
878  // Faces in 0-2 plane
879  if ((lCoarseNodesPerDim[0] - 2 > 0) && (lCoarseNodesPerDim[2] - 2 > 0)) {
880  Kokkos::parallel_for(
881  "Faces in 0-2 plane region R",
882  Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[2] - 2) * (lCoarseNodesPerDim[0] - 2)),
883  KOKKOS_LAMBDA(const LO faceIdx) {
884  LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
885  LO gridIdx[3] = {0, 0, 0};
886  impl_scalar_type coeffs_d[5] = {1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0};
887  // Last step in the loop
888  // update the grid indices
889  // for next grid point
890  for (LO i = 0; i < faceIdx; i++) {
891  ++gridIdx[0];
892  if (gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
893  gridIdx[0] = 0;
894  ++gridIdx[2];
895  }
896  }
897 
898  // Face 0
899  coordRowIdx = ((gridIdx[2] + 1) * lCoarseNodesPerDim_d(1) * lCoarseNodesPerDim_d(0) + (gridIdx[0] + 1));
900  rowIdx = coordRowIdx * blkSize;
901  coordColumnOffset = ((gridIdx[2] + 1) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + gridIdx[0] + 1) * 3;
902  columnOffset = coordColumnOffset * blkSize;
903  entryOffset = (facePlaneOffset + gridIdx[2] * interiorPlaneOffset + edgeStencilLength + gridIdx[0] * faceStencilLength) * blkSize;
904  for (LO l = 0; l < blkSize; ++l) {
905  for (LO k = 0; k < 5; ++k) {
906  for (LO j = 0; j < 3; ++j) {
907  for (LO i = 0; i < 5; ++i) {
908  entries(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + j * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
909  values(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j + 2] * coeffs_d[i];
910  }
911  }
912  }
913  }
914  for (LO l = 0; l < blkSize; ++l) {
915  row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
916  }
917  for (int dim = 0; dim < numDimensions; ++dim) {
918  coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
919  }
920 
921  // Face 1
922  coordRowIdx += (lCoarseNodesPerDim_d(1) - 1) * lCoarseNodesPerDim_d(0);
923  rowIdx = coordRowIdx * blkSize;
924  coordColumnOffset += (lFineNodesPerDim_d(1) - 1) * lFineNodesPerDim_d(0);
925  columnOffset = coordColumnOffset * blkSize;
926  entryOffset += (faceLineOffset + (lCoarseNodesPerDim_d(1) - 2) * interiorLineOffset) * blkSize;
927  for (LO l = 0; l < blkSize; ++l) {
928  for (LO k = 0; k < 5; ++k) {
929  for (LO j = 0; j < 3; ++j) {
930  for (LO i = 0; i < 5; ++i) {
931  entries(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
932  values(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i];
933  }
934  }
935  }
936  }
937  for (LO l = 0; l < blkSize; ++l) {
938  row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
939  }
940  for (int dim = 0; dim < numDimensions; ++dim) {
941  coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
942  }
943  }); // parallel_for faces in 0-2 plane
944  }
945 
946  // Faces in 1-2 plane
947  if ((lCoarseNodesPerDim[1] - 2 > 0) && (lCoarseNodesPerDim[2] - 2 > 0)) {
948  Kokkos::parallel_for(
949  "Faces in 1-2 plane region R",
950  Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[2] - 2) * (lCoarseNodesPerDim[1] - 2)),
951  KOKKOS_LAMBDA(const LO faceIdx) {
952  LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
953  LO gridIdx[3] = {0, 0, 0};
954  impl_scalar_type coeffs_d[5] = {1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0};
955  // Last step in the loop
956  // update the grid indices
957  // for next grid point
958  for (LO i = 0; i < faceIdx; i++) {
959  ++gridIdx[1];
960  if (gridIdx[1] == lCoarseNodesPerDim_d(1) - 2) {
961  gridIdx[1] = 0;
962  ++gridIdx[2];
963  }
964  }
965 
966  // Face 0
967  coordRowIdx = ((gridIdx[2] + 1) * lCoarseNodesPerDim_d(1) * lCoarseNodesPerDim_d(0) + (gridIdx[1] + 1) * lCoarseNodesPerDim_d(0));
968  rowIdx = coordRowIdx * blkSize;
969  coordColumnOffset = ((gridIdx[2] + 1) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (gridIdx[1] + 1) * lFineNodesPerDim_d(0)) * 3;
970  columnOffset = coordColumnOffset * blkSize;
971  entryOffset = (facePlaneOffset + gridIdx[2] * interiorPlaneOffset + faceLineOffset + gridIdx[1] * interiorLineOffset) * blkSize;
972  for (LO l = 0; l < blkSize; ++l) {
973  for (LO k = 0; k < 5; ++k) {
974  for (LO j = 0; j < 5; ++j) {
975  for (LO i = 0; i < 3; ++i) {
976  entries(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i) * blkSize + l;
977  values(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i + 2];
978  }
979  }
980  }
981  }
982  for (LO l = 0; l < blkSize; ++l) {
983  row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
984  }
985  for (int dim = 0; dim < numDimensions; ++dim) {
986  coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
987  }
988 
989  // Face 1
990  coordRowIdx += (lCoarseNodesPerDim_d(0) - 1);
991  rowIdx = coordRowIdx * blkSize;
992  coordColumnOffset += (lFineNodesPerDim_d(0) - 1);
993  columnOffset = coordColumnOffset * blkSize;
994  entryOffset += (faceStencilLength + (lCoarseNodesPerDim_d(0) - 2) * interiorStencilLength) * blkSize;
995  for (LO l = 0; l < blkSize; ++l) {
996  for (LO k = 0; k < 5; ++k) {
997  for (LO j = 0; j < 5; ++j) {
998  for (LO i = 0; i < 3; ++i) {
999  entries(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
1000  values(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i];
1001  }
1002  }
1003  }
1004  }
1005  for (LO l = 0; l < blkSize; ++l) {
1006  row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
1007  }
1008  for (int dim = 0; dim < numDimensions; ++dim) {
1009  coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
1010  }
1011  }); // parallel_for faces in 1-2 plane
1012  }
1013 
1014  if (numInteriors > 0) {
1015  // Allocate and compute arrays
1016  // containing column offsets
1017  // and values associated with
1018  // interior points
1019  LO countRowEntries = 0;
1020  Kokkos::View<LO[125]> coordColumnOffsets_d("coordColOffset");
1021  auto coordColumnOffsets_h = Kokkos::create_mirror_view(coordColumnOffsets_d);
1022 
1023  for (LO k = -2; k < 3; ++k) {
1024  for (LO j = -2; j < 3; ++j) {
1025  for (LO i = -2; i < 3; ++i) {
1026  coordColumnOffsets_h(countRowEntries) = k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i;
1027  ++countRowEntries;
1028  }
1029  }
1030  }
1031  Kokkos::deep_copy(coordColumnOffsets_d, coordColumnOffsets_h);
1032 
1033  LO countValues = 0;
1034  Kokkos::View<impl_scalar_type*> interiorValues_d("interiorValues", 125);
1035  auto interiorValues_h = Kokkos::create_mirror_view(interiorValues_d);
1036  for (LO k = 0; k < 5; ++k) {
1037  for (LO j = 0; j < 5; ++j) {
1038  for (LO i = 0; i < 5; ++i) {
1039  interiorValues_h(countValues) = coeffs[k] * coeffs[j] * coeffs[i];
1040  ++countValues;
1041  }
1042  }
1043  }
1044  Kokkos::deep_copy(interiorValues_d, interiorValues_h);
1045 
1046  Kokkos::parallel_for(
1047  "interior idx region R", Kokkos::RangePolicy<typename device_type::execution_space>(0, numInteriors),
1048  KOKKOS_LAMBDA(const LO interiorIdx) {
1049  LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
1050  LO gridIdx[3];
1051  gridIdx[0] = 0;
1052  gridIdx[1] = 0;
1053  gridIdx[2] = 0;
1054  // First step in the loop
1055  // update the grid indices
1056  // for the grid point
1057  for (LO i = 0; i < interiorIdx; i++) {
1058  ++gridIdx[0];
1059  if (gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
1060  gridIdx[0] = 0;
1061  ++gridIdx[1];
1062  if (gridIdx[1] == lCoarseNodesPerDim_d(1) - 2) {
1063  gridIdx[1] = 0;
1064  ++gridIdx[2];
1065  }
1066  }
1067  }
1068 
1069  coordRowIdx = ((gridIdx[2] + 1) * lCoarseNodesPerDim_d(0) * lCoarseNodesPerDim_d(1) + (gridIdx[1] + 1) * lCoarseNodesPerDim_d(0) + gridIdx[0] + 1);
1070  rowIdx = coordRowIdx * blkSize;
1071  coordColumnOffset = ((gridIdx[2] + 1) * 3 * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (gridIdx[1] + 1) * 3 * lFineNodesPerDim_d(0) + (gridIdx[0] + 1) * 3);
1072  columnOffset = coordColumnOffset * blkSize;
1073  entryOffset = (facePlaneOffset + faceLineOffset + faceStencilLength + gridIdx[2] * interiorPlaneOffset + gridIdx[1] * interiorLineOffset + gridIdx[0] * interiorStencilLength) * blkSize;
1074  for (LO l = 0; l < blkSize; ++l) {
1075  row_map(rowIdx + 1 + l) = entryOffset + interiorStencilLength * (l + 1);
1076  }
1077  // Fill the column indices
1078  // and values in the approproate
1079  // views.
1080  for (LO l = 0; l < blkSize; ++l) {
1081  for (LO entryIdx = 0; entryIdx < interiorStencilLength; ++entryIdx) {
1082  entries(entryOffset + entryIdx + interiorStencilLength * l) = columnOffset + coordColumnOffsets_d(entryIdx) * blkSize + l;
1083  values(entryOffset + entryIdx + interiorStencilLength * l) = interiorValues_d(entryIdx);
1084  }
1085  }
1086  for (int dim = 0; dim < numDimensions; ++dim) {
1087  coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
1088  }
1089  }); // Kokkos::parallel_for interior idx
1090  //
1091  }
1092 
1093  local_graph_type localGraph(entries, row_map);
1094  local_matrix_type localR("R", numCols, values, localGraph);
1095 
1096  R = MatrixFactory::Build(localR, // the local data
1097  rowMap, // rowMap
1098  A->getRowMap(), // colMap
1099  A->getRowMap(), // domainMap == colMap
1100  rowMap, // rangeMap == rowMap
1101  Teuchos::null); // params for optimized construction
1102 
1103 } // Build3D()
1104 
1105 } // namespace MueLu
1106 
1107 #define MUELU_REGIONRFACTORY_KOKKOS_SHORT
1108 #endif // MUELU_REGIONRFACTORY_DEF_HPP
typename Kokkos::View< LO[3], device_type > LOTupleView
MueLu::DefaultLocalOrdinal LocalOrdinal
static Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Build(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node >> &map, size_t NumVectors, bool zeroOut=true)
basic_FancyOStream & setShowProcRank(const bool showProcRank)
T & get(const std::string &name, T def_value)
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
LocalOrdinal LO
MueLu::DefaultNode Node
static const NoFactory * get()
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
void Build3D(const int numDimensions, Array< LO > &lFineNodesPerDim, const RCP< Matrix > &A, const RCP< realvaluedmultivector_type > &fineCoordinates, RCP< Matrix > &R, RCP< realvaluedmultivector_type > &coarseCoordinates, Array< LO > &lCoarseNodesPerDim) const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
bool isSublist(const std::string &name) const
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
basic_FancyOStream & setShowAllFrontMatter(const bool showAllFrontMatter)
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
RCP< const ParameterList > GetValidParameterList() const
Input.
ParameterList & sublist(const std::string &name, bool mustAlreadyExist=false, const std::string &docString="")
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:76
std::string toString(const T &t)