10 #ifndef MUELU_REGIONRFACTORY_KOKKOS_DEF_HPP
11 #define MUELU_REGIONRFACTORY_KOKKOS_DEF_HPP
13 #include "Kokkos_UnorderedMap.hpp"
24 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
30 "Generating factory of the matrix A");
32 "Number of spatial dimensions in the problem.");
34 "Number of local nodes per spatial dimension on the fine grid.");
36 "Fine level nullspace used to construct the coarse level nullspace.");
38 "Fine level coordinates used to construct piece-wise linear prolongator and coarse level coordinates.");
39 validParamList->
set<
bool>(
"keep coarse coords",
false,
"Flag to keep coordinates for special coarse grid solve");
41 return validParamList;
44 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
47 Input(fineLevel,
"A");
48 Input(fineLevel,
"numDimensions");
49 Input(fineLevel,
"lNodesPerDim");
50 Input(fineLevel,
"Nullspace");
51 Input(fineLevel,
"Coordinates");
55 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
60 if (
const char* dbg = std::getenv(
"MUELU_REGIONRFACTORY_DEBUG")) {
61 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
67 *out <<
"Starting RegionRFactory_kokkos::Build." << std::endl;
70 const int numDimensions = Get<int>(fineLevel,
"numDimensions");
73 Array<LO> lNodesPerDim = Get<Array<LO> >(fineLevel,
"lNodesPerDim");
74 for (
int dim = 0; dim < numDimensions; ++dim) {
75 lFineNodesPerDim[dim] = lNodesPerDim[dim];
78 *out <<
"numDimensions " << numDimensions <<
" and lFineNodesPerDim: " << lFineNodesPerDim
82 if (numDimensions < 1 || numDimensions > 3) {
83 throw std::runtime_error(
"numDimensions must be 1, 2 or 3!");
85 for (
int dim = 0; dim < numDimensions; ++dim) {
86 if (lFineNodesPerDim[dim] % 3 != 1) {
87 throw std::runtime_error(
"The number of fine node in each direction need to be 3n+1");
92 const RCP<Matrix> A = Get<RCP<Matrix> >(fineLevel,
"A");
95 fineCoordinates = Get<RCP<realvaluedmultivector_type> >(fineLevel,
"Coordinates");
96 if (static_cast<int>(fineCoordinates->getNumVectors()) != numDimensions) {
97 throw std::runtime_error(
"The number of vectors in the coordinates is not equal to numDimensions!");
105 if (numDimensions == 1) {
106 throw std::runtime_error(
"RegionRFactory_kokkos no implemented for 1D case yet.");
107 }
else if (numDimensions == 2) {
108 throw std::runtime_error(
"RegionRFactory_kokkos no implemented for 2D case yet.");
109 }
else if (numDimensions == 3) {
110 Build3D(numDimensions, lFineNodesPerDim, A, fineCoordinates,
111 R, coarseCoordinates, lCoarseNodesPerDim);
118 if (pL.
isSublist(
"matrixmatrix: kernel params"))
124 *out <<
"Compute P=R^t" << std::endl;
126 Tparams->
set(
"compute global constants: temporaries", Tparams->
get(
"compute global constants: temporaries",
false));
127 Tparams->
set(
"compute global constants", Tparams->
get(
"compute global constants",
false));
131 *out <<
"Compute coarse nullspace" << std::endl;
132 RCP<MultiVector> fineNullspace = Get<RCP<MultiVector> >(fineLevel,
"Nullspace");
133 RCP<MultiVector> coarseNullspace = MultiVectorFactory::Build(R->getRowMap(),
134 fineNullspace->getNumVectors());
138 *out <<
"Set data on coarse level" << std::endl;
139 Set(coarseLevel,
"numDimensions", numDimensions);
140 Set(coarseLevel,
"lNodesPerDim", lCoarseNodesPerDim);
141 Set(coarseLevel,
"Nullspace", coarseNullspace);
142 Set(coarseLevel,
"Coordinates", coarseCoordinates);
143 if (pL.
get<
bool>(
"keep coarse coords")) {
147 R->SetFixedBlockSize(A->GetFixedBlockSize());
148 P->SetFixedBlockSize(A->GetFixedBlockSize());
150 Set(coarseLevel,
"R", R);
151 Set(coarseLevel,
"P", P);
155 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
164 using local_matrix_type =
typename CrsMatrix::local_matrix_type;
165 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
166 using row_map_type =
typename local_matrix_type::row_map_type::non_const_type;
167 using entries_type =
typename local_matrix_type::index_type::non_const_type;
168 using values_type =
typename local_matrix_type::values_type::non_const_type;
169 using impl_scalar_type =
typename Kokkos::ArithTraits<Scalar>::val_type;
173 if (
const char* dbg = std::getenv(
"MUELU_REGIONRFACTORY_DEBUG")) {
174 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
181 for (
int dim = 0; dim < numDimensions; ++dim) {
182 lCoarseNodesPerDim[dim] = lFineNodesPerDim[dim] / 3 + 1;
184 *out <<
"lCoarseNodesPerDim " << lCoarseNodesPerDim << std::endl;
187 const LO blkSize = A->GetFixedBlockSize();
188 *out <<
"blkSize " << blkSize << std::endl;
192 const LO numRows = blkSize * lCoarseNodesPerDim[0] * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[2];
193 const LO numCols = blkSize * lFineNodesPerDim[0] * lFineNodesPerDim[1] * lFineNodesPerDim[2];
198 RCP<Map> rowMap = MapFactory::Build(A->getRowMap()->lib(),
201 A->getRowMap()->getIndexBase(),
202 A->getRowMap()->getComm());
204 RCP<Map> coordRowMap = MapFactory::Build(A->getRowMap()->lib(),
206 lCoarseNodesPerDim[0] * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[2],
207 A->getRowMap()->getIndexBase(),
208 A->getRowMap()->getComm());
214 auto fineCoordsView = fineCoordinates->getDeviceLocalView(Xpetra::Access::ReadOnly);
215 auto coarseCoordsView = coarseCoordinates->getDeviceLocalView(Xpetra::Access::OverwriteAll);
219 for (
int dim = 0; dim < numDimensions; ++dim) {
220 fineCoordData[dim] = fineCoordinates->getData(dim);
221 coarseCoordData[dim] = coarseCoordinates->getDataNonConst(dim);
228 const LO cornerStencilLength = 27;
229 const LO edgeStencilLength = 45;
230 const LO faceStencilLength = 75;
231 const LO interiorStencilLength = 125;
234 const LO numCorners = 8;
235 const LO numEdges = 4 * (lCoarseNodesPerDim[0] - 2) + 4 * (lCoarseNodesPerDim[1] - 2) + 4 * (lCoarseNodesPerDim[2] - 2);
236 const LO numFaces = 2 * (lCoarseNodesPerDim[0] - 2) * (lCoarseNodesPerDim[1] - 2) + 2 * (lCoarseNodesPerDim[0] - 2) * (lCoarseNodesPerDim[2] - 2) + 2 * (lCoarseNodesPerDim[1] - 2) * (lCoarseNodesPerDim[2] - 2);
237 const LO numInteriors = (lCoarseNodesPerDim[0] - 2) * (lCoarseNodesPerDim[1] - 2) * (lCoarseNodesPerDim[2] - 2);
239 const LO nnz = (numCorners * cornerStencilLength + numEdges * edgeStencilLength + numFaces * faceStencilLength + numInteriors * interiorStencilLength) * blkSize;
244 *out <<
"R statistics:" << std::endl
245 <<
" -numRows= " << numRows << std::endl
246 <<
" -numCols= " << numCols << std::endl
247 <<
" -nnz= " << nnz << std::endl;
249 row_map_type row_map(Kokkos::ViewAllocateWithoutInitializing(
"row_map"), numRows + 1);
250 typename row_map_type::HostMirror row_map_h = Kokkos::create_mirror_view(row_map);
252 entries_type entries(Kokkos::ViewAllocateWithoutInitializing(
"entries"), nnz);
253 typename entries_type::HostMirror entries_h = Kokkos::create_mirror_view(entries);
255 values_type values(Kokkos::ViewAllocateWithoutInitializing(
"values"), nnz);
256 typename values_type::HostMirror values_h = Kokkos::create_mirror_view(values);
261 Array<SC> coeffs({1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0});
266 const LO edgeLineOffset = 2 * cornerStencilLength + (lCoarseNodesPerDim[0] - 2) * edgeStencilLength;
267 const LO faceLineOffset = 2 * edgeStencilLength + (lCoarseNodesPerDim[0] - 2) * faceStencilLength;
268 const LO interiorLineOffset = 2 * faceStencilLength + (lCoarseNodesPerDim[0] - 2) * interiorStencilLength;
270 const LO facePlaneOffset = 2 * edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset;
271 const LO interiorPlaneOffset = 2 * faceLineOffset + (lCoarseNodesPerDim[1] - 2) * interiorLineOffset;
278 LO coordRowIdx = 0, rowIdx = 0, coordColumnOffset = 0, columnOffset = 0, entryOffset = 0;
279 for (
LO l = 0; l < blkSize; ++l) {
280 for (
LO k = 0; k < 3; ++k) {
281 for (
LO j = 0; j < 3; ++j) {
282 for (
LO i = 0; i < 3; ++i) {
283 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i) * blkSize + l;
284 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j + 2] * coeffs[i + 2];
289 for (
LO l = 0; l < blkSize; ++l) {
290 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
292 for (
int dim = 0; dim < numDimensions; ++dim) {
293 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
297 coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
298 rowIdx = coordRowIdx * blkSize;
299 coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
300 columnOffset = coordColumnOffset * blkSize;
301 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
302 for (
LO l = 0; l < blkSize; ++l) {
303 for (
LO k = 0; k < 3; ++k) {
304 for (
LO j = 0; j < 3; ++j) {
305 for (
LO i = 0; i < 3; ++i) {
306 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i) * blkSize + l;
307 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i + 2];
312 for (
LO l = 0; l < blkSize; ++l) {
313 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
315 for (
int dim = 0; dim < numDimensions; ++dim) {
316 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
320 coordRowIdx = (lCoarseNodesPerDim[0] - 1);
321 rowIdx = coordRowIdx * blkSize;
322 coordColumnOffset = (lFineNodesPerDim[0] - 1);
323 columnOffset = coordColumnOffset * blkSize;
324 entryOffset = (cornerStencilLength + (lCoarseNodesPerDim[0] - 2) * edgeStencilLength) * blkSize;
325 for (
LO l = 0; l < blkSize; ++l) {
326 for (
LO k = 0; k < 3; ++k) {
327 for (
LO j = 0; j < 3; ++j) {
328 for (
LO i = 0; i < 3; ++i) {
329 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + (i - 2)) * blkSize + l;
330 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j + 2] * coeffs[i];
335 for (
LO l = 0; l < blkSize; ++l) {
336 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
338 for (
int dim = 0; dim < numDimensions; ++dim) {
339 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
343 coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
344 rowIdx = coordRowIdx * blkSize;
345 coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
346 columnOffset = coordColumnOffset * blkSize;
347 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
348 for (
LO l = 0; l < blkSize; ++l) {
349 for (
LO k = 0; k < 3; ++k) {
350 for (
LO j = 0; j < 3; ++j) {
351 for (
LO i = 0; i < 3; ++i) {
352 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
353 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i];
358 for (
LO l = 0; l < blkSize; ++l) {
359 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
361 for (
int dim = 0; dim < numDimensions; ++dim) {
362 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
366 coordRowIdx = (lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0];
367 rowIdx = coordRowIdx * blkSize;
368 coordColumnOffset = (lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0];
369 columnOffset = coordColumnOffset * blkSize;
370 entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset) * blkSize;
371 for (
LO l = 0; l < blkSize; ++l) {
372 for (
LO k = 0; k < 3; ++k) {
373 for (
LO j = 0; j < 3; ++j) {
374 for (
LO i = 0; i < 3; ++i) {
375 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
376 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i + 2];
381 for (
LO l = 0; l < blkSize; ++l) {
382 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
384 for (
int dim = 0; dim < numDimensions; ++dim) {
385 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
389 coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
390 rowIdx = coordRowIdx * blkSize;
391 coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
392 columnOffset = coordColumnOffset * blkSize;
393 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
394 for (
LO l = 0; l < blkSize; ++l) {
395 for (
LO k = 0; k < 3; ++k) {
396 for (
LO j = 0; j < 3; ++j) {
397 for (
LO i = 0; i < 3; ++i) {
398 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
399 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i + 2];
404 for (
LO l = 0; l < blkSize; ++l) {
405 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
407 for (
int dim = 0; dim < numDimensions; ++dim) {
408 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
412 coordRowIdx = (lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] - 1);
413 rowIdx = coordRowIdx * blkSize;
414 coordColumnOffset = (lFineNodesPerDim[1] * lFineNodesPerDim[0] - 1);
415 columnOffset = coordColumnOffset * blkSize;
416 entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset +
417 cornerStencilLength + (lCoarseNodesPerDim[0] - 2) * edgeStencilLength) *
419 for (
LO l = 0; l < blkSize; ++l) {
420 for (
LO k = 0; k < 3; ++k) {
421 for (
LO j = 0; j < 3; ++j) {
422 for (
LO i = 0; i < 3; ++i) {
423 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + (i - 2)) * blkSize + l;
424 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i];
429 for (
LO l = 0; l < blkSize; ++l) {
430 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
432 for (
int dim = 0; dim < numDimensions; ++dim) {
433 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
437 coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
438 rowIdx = coordRowIdx * blkSize;
439 coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
440 columnOffset = coordColumnOffset * blkSize;
441 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
442 for (
LO l = 0; l < blkSize; ++l) {
443 for (
LO k = 0; k < 3; ++k) {
444 for (
LO j = 0; j < 3; ++j) {
445 for (
LO i = 0; i < 3; ++i) {
446 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + (i - 2)) * blkSize + l;
447 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
452 for (
LO l = 0; l < blkSize; ++l) {
453 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
455 for (
int dim = 0; dim < numDimensions; ++dim) {
456 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
461 if (lCoarseNodesPerDim[0] - 2 > 0) {
462 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
463 for (
LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[0] - 2; ++edgeIdx) {
465 coordRowIdx = (edgeIdx + 1);
466 rowIdx = coordRowIdx * blkSize;
467 coordColumnOffset = (edgeIdx + 1) * 3;
468 columnOffset = coordColumnOffset * blkSize;
469 entryOffset = (cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
470 for (
LO l = 0; l < blkSize; ++l) {
471 for (
LO k = 0; k < 3; ++k) {
472 for (
LO j = 0; j < 3; ++j) {
473 for (
LO i = 0; i < 5; ++i) {
474 entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
475 values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j + 2] * coeffs[i];
480 for (
LO l = 0; l < blkSize; ++l) {
481 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
483 for (
int dim = 0; dim < numDimensions; ++dim) {
484 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
488 coordRowIdx = ((lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0] + edgeIdx + 1);
489 rowIdx = coordRowIdx * blkSize;
490 coordColumnOffset = ((lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0] + (edgeIdx + 1) * 3);
491 columnOffset = coordColumnOffset * blkSize;
492 entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset + cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
493 for (
LO l = 0; l < blkSize; ++l) {
494 for (
LO k = 0; k < 3; ++k) {
495 for (
LO j = 0; j < 3; ++j) {
496 for (
LO i = 0; i < 5; ++i) {
497 entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
498 values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i];
503 for (
LO l = 0; l < blkSize; ++l) {
504 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
506 for (
int dim = 0; dim < numDimensions; ++dim) {
507 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
511 coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + edgeIdx + 1);
512 rowIdx = coordRowIdx * blkSize;
513 coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (edgeIdx + 1) * 3);
514 columnOffset = coordColumnOffset * blkSize;
515 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
516 for (
LO l = 0; l < blkSize; ++l) {
517 for (
LO k = 0; k < 3; ++k) {
518 for (
LO j = 0; j < 3; ++j) {
519 for (
LO i = 0; i < 5; ++i) {
520 entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
521 values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i];
526 for (
LO l = 0; l < blkSize; ++l) {
527 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
529 for (
int dim = 0; dim < numDimensions; ++dim) {
530 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
534 coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0] + edgeIdx + 1);
535 rowIdx = coordRowIdx * blkSize;
536 coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0] + (edgeIdx + 1) * 3);
537 columnOffset = coordColumnOffset * blkSize;
538 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset + cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
539 for (
LO l = 0; l < blkSize; ++l) {
540 for (
LO k = 0; k < 3; ++k) {
541 for (
LO j = 0; j < 3; ++j) {
542 for (
LO i = 0; i < 5; ++i) {
543 entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
544 values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
549 for (
LO l = 0; l < blkSize; ++l) {
550 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
552 for (
int dim = 0; dim < numDimensions; ++dim) {
553 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
559 if (lCoarseNodesPerDim[1] - 2 > 0) {
560 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
561 for (
LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[1] - 2; ++edgeIdx) {
563 coordRowIdx = (edgeIdx + 1) * lCoarseNodesPerDim[0];
564 rowIdx = coordRowIdx * blkSize;
565 coordColumnOffset = (edgeIdx + 1) * 3 * lFineNodesPerDim[0];
566 columnOffset = coordColumnOffset * blkSize;
567 entryOffset = (edgeLineOffset + edgeIdx * faceLineOffset) * blkSize;
568 for (
LO l = 0; l < blkSize; ++l) {
569 for (
LO k = 0; k < 3; ++k) {
570 for (
LO j = 0; j < 5; ++j) {
571 for (
LO i = 0; i < 3; ++i) {
572 entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
573 values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i + 2];
578 for (
LO l = 0; l < blkSize; ++l) {
579 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
581 for (
int dim = 0; dim < numDimensions; ++dim) {
582 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
586 coordRowIdx = ((edgeIdx + 1) * lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
587 rowIdx = coordRowIdx * blkSize;
588 coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
589 columnOffset = coordColumnOffset * blkSize;
590 entryOffset = (edgeLineOffset + edgeIdx * faceLineOffset + edgeStencilLength + (lCoarseNodesPerDim[0] - 2) * faceStencilLength) * blkSize;
591 for (
LO l = 0; l < blkSize; ++l) {
592 for (
LO k = 0; k < 3; ++k) {
593 for (
LO j = 0; j < 5; ++j) {
594 for (
LO i = 0; i < 3; ++i) {
595 entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
596 values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i];
601 for (
LO l = 0; l < blkSize; ++l) {
602 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
604 for (
int dim = 0; dim < numDimensions; ++dim) {
605 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
609 coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (edgeIdx + 1) * lCoarseNodesPerDim[0]);
610 rowIdx = coordRowIdx * blkSize;
611 coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (edgeIdx + 1) * 3 * lFineNodesPerDim[0]);
612 columnOffset = coordColumnOffset * blkSize;
613 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + edgeLineOffset + edgeIdx * faceLineOffset) * blkSize;
614 for (
LO l = 0; l < blkSize; ++l) {
615 for (
LO k = 0; k < 3; ++k) {
616 for (
LO j = 0; j < 5; ++j) {
617 for (
LO i = 0; i < 3; ++i) {
618 entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
619 values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i + 2];
624 for (
LO l = 0; l < blkSize; ++l) {
625 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
627 for (
int dim = 0; dim < numDimensions; ++dim) {
628 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
632 coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (edgeIdx + 1) * lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
633 rowIdx = coordRowIdx * blkSize;
634 coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (edgeIdx + 1) * 3 * lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
635 columnOffset = coordColumnOffset * blkSize;
636 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + edgeLineOffset + edgeIdx * faceLineOffset + edgeStencilLength + (lCoarseNodesPerDim[0] - 2) * faceStencilLength) * blkSize;
637 for (
LO l = 0; l < blkSize; ++l) {
638 for (
LO k = 0; k < 3; ++k) {
639 for (
LO j = 0; j < 5; ++j) {
640 for (
LO i = 0; i < 3; ++i) {
641 entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
642 values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
647 for (
LO l = 0; l < blkSize; ++l) {
648 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
650 for (
int dim = 0; dim < numDimensions; ++dim) {
651 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
657 if (lCoarseNodesPerDim[2] - 2 > 0) {
658 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
659 for (
LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[2] - 2; ++edgeIdx) {
661 coordRowIdx = (edgeIdx + 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
662 rowIdx = coordRowIdx * blkSize;
663 coordColumnOffset = (edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0];
664 columnOffset = coordColumnOffset * blkSize;
665 entryOffset = (facePlaneOffset + edgeIdx * interiorPlaneOffset) * blkSize;
666 for (
LO l = 0; l < blkSize; ++l) {
667 for (
LO k = 0; k < 5; ++k) {
668 for (
LO j = 0; j < 3; ++j) {
669 for (
LO i = 0; i < 3; ++i) {
670 entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i) * blkSize + l;
671 values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i + 2];
676 for (
LO l = 0; l < blkSize; ++l) {
677 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
679 for (
int dim = 0; dim < numDimensions; ++dim) {
680 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
684 coordRowIdx = ((edgeIdx + 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
685 rowIdx = coordRowIdx * blkSize;
686 coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
687 columnOffset = coordColumnOffset * blkSize;
688 entryOffset = (facePlaneOffset + faceLineOffset - edgeStencilLength + edgeIdx * interiorPlaneOffset) * blkSize;
689 for (
LO l = 0; l < blkSize; ++l) {
690 for (
LO k = 0; k < 5; ++k) {
691 for (
LO j = 0; j < 3; ++j) {
692 for (
LO i = 0; i < 3; ++i) {
693 entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
694 values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i];
699 for (
LO l = 0; l < blkSize; ++l) {
700 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
702 for (
int dim = 0; dim < numDimensions; ++dim) {
703 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
707 coordRowIdx = ((edgeIdx + 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0]);
708 rowIdx = coordRowIdx * blkSize;
709 coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0]);
710 columnOffset = coordColumnOffset * blkSize;
711 entryOffset = (facePlaneOffset + edgeIdx * interiorPlaneOffset + faceLineOffset + (lCoarseNodesPerDim[1] - 2) * interiorLineOffset) * blkSize;
712 for (
LO l = 0; l < blkSize; ++l) {
713 for (
LO k = 0; k < 5; ++k) {
714 for (
LO j = 0; j < 3; ++j) {
715 for (
LO i = 0; i < 3; ++i) {
716 entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
717 values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i + 2];
722 for (
LO l = 0; l < blkSize; ++l) {
723 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
725 for (
int dim = 0; dim < numDimensions; ++dim) {
726 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
730 coordRowIdx = ((edgeIdx + 2) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] - 1);
731 rowIdx = coordRowIdx * blkSize;
732 coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0] + lFineNodesPerDim[1] * lFineNodesPerDim[0] - 1);
733 columnOffset = coordColumnOffset * blkSize;
734 entryOffset = (facePlaneOffset + (edgeIdx + 1) * interiorPlaneOffset - edgeStencilLength) * blkSize;
735 for (
LO l = 0; l < blkSize; ++l) {
736 for (
LO k = 0; k < 5; ++k) {
737 for (
LO j = 0; j < 3; ++j) {
738 for (
LO i = 0; i < 3; ++i) {
739 entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
740 values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
745 for (
LO l = 0; l < blkSize; ++l) {
746 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
748 for (
int dim = 0; dim < numDimensions; ++dim) {
749 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
755 Kokkos::deep_copy(row_map, row_map_h);
756 Kokkos::deep_copy(entries, entries_h);
757 Kokkos::deep_copy(values, values_h);
760 LOTupleView lFineNodesPerDim_d(
"lFineNodesPerDim");
761 LOTupleView lCoarseNodesPerDim_d(
"lCoarseNodesPerDim");
763 typename Kokkos::View<LO[3], device_type>::HostMirror lCoarseNodesPerDim_h = Kokkos::create_mirror_view(lCoarseNodesPerDim_d);
764 typename Kokkos::View<LO[3], device_type>::HostMirror lFineNodesPerDim_h = Kokkos::create_mirror_view(lFineNodesPerDim_d);
766 for (
int dim = 0; dim < numDimensions; ++dim) {
767 lCoarseNodesPerDim_h(dim) = lCoarseNodesPerDim[dim];
768 lFineNodesPerDim_h(dim) = lFineNodesPerDim[dim];
771 Kokkos::deep_copy(lCoarseNodesPerDim_d, lCoarseNodesPerDim_h);
772 Kokkos::deep_copy(lFineNodesPerDim_d, lFineNodesPerDim_h);
775 if ((lCoarseNodesPerDim[0] - 2 > 0) && (lCoarseNodesPerDim[1] - 2 > 0)) {
776 Kokkos::parallel_for(
777 "Faces in 0-1 plane region R",
778 Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[1] - 2) * (lCoarseNodesPerDim[0] - 2)),
779 KOKKOS_LAMBDA(
const LO faceIdx) {
780 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
781 LO gridIdx[3] = {0, 0, 0};
782 impl_scalar_type coeffs_d[5] = {1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0};
786 for (
LO i = 0; i < faceIdx; i++) {
788 if (gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
795 coordRowIdx = ((gridIdx[1] + 1) * lCoarseNodesPerDim_d(0) + gridIdx[0] + 1);
796 rowIdx = coordRowIdx * blkSize;
797 coordColumnOffset = 3 * ((gridIdx[1] + 1) * lFineNodesPerDim_d(0) + gridIdx[0] + 1);
798 columnOffset = coordColumnOffset * blkSize;
799 entryOffset = (edgeLineOffset + edgeStencilLength + gridIdx[1] * faceLineOffset + gridIdx[0] * faceStencilLength) * blkSize;
800 for (
LO l = 0; l < blkSize; ++l) {
801 for (
LO k = 0; k < 3; ++k) {
802 for (
LO j = 0; j < 5; ++j) {
803 for (
LO i = 0; i < 5; ++i) {
804 entries(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = columnOffset + (k * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
805 values(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = coeffs_d[k + 2] * coeffs_d[j] * coeffs_d[i];
810 for (
LO l = 0; l < blkSize; ++l) {
811 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
813 for (
int dim = 0; dim < numDimensions; ++dim) {
814 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
818 coordRowIdx += (lCoarseNodesPerDim_d(2) - 1) * lCoarseNodesPerDim_d(1) * lCoarseNodesPerDim_d(0);
819 rowIdx = coordRowIdx * blkSize;
820 coordColumnOffset += (lFineNodesPerDim_d(2) - 1) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0);
821 columnOffset = coordColumnOffset * blkSize;
822 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim_d(2) - 2) * interiorPlaneOffset) * blkSize;
823 for (
LO l = 0; l < blkSize; ++l) {
824 for (
LO k = 0; k < 3; ++k) {
825 for (
LO j = 0; j < 5; ++j) {
826 for (
LO i = 0; i < 5; ++i) {
827 entries(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
828 values(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i];
833 for (
LO l = 0; l < blkSize; ++l) {
834 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
836 for (
int dim = 0; dim < numDimensions; ++dim) {
837 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
843 if ((lCoarseNodesPerDim[0] - 2 > 0) && (lCoarseNodesPerDim[2] - 2 > 0)) {
844 Kokkos::parallel_for(
845 "Faces in 0-2 plane region R",
846 Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[2] - 2) * (lCoarseNodesPerDim[0] - 2)),
847 KOKKOS_LAMBDA(
const LO faceIdx) {
848 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
849 LO gridIdx[3] = {0, 0, 0};
850 impl_scalar_type coeffs_d[5] = {1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0};
854 for (
LO i = 0; i < faceIdx; i++) {
856 if (gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
863 coordRowIdx = ((gridIdx[2] + 1) * lCoarseNodesPerDim_d(1) * lCoarseNodesPerDim_d(0) + (gridIdx[0] + 1));
864 rowIdx = coordRowIdx * blkSize;
865 coordColumnOffset = ((gridIdx[2] + 1) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + gridIdx[0] + 1) * 3;
866 columnOffset = coordColumnOffset * blkSize;
867 entryOffset = (facePlaneOffset + gridIdx[2] * interiorPlaneOffset + edgeStencilLength + gridIdx[0] * faceStencilLength) * blkSize;
868 for (
LO l = 0; l < blkSize; ++l) {
869 for (
LO k = 0; k < 5; ++k) {
870 for (
LO j = 0; j < 3; ++j) {
871 for (
LO i = 0; i < 5; ++i) {
872 entries(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + j * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
873 values(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j + 2] * coeffs_d[i];
878 for (
LO l = 0; l < blkSize; ++l) {
879 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
881 for (
int dim = 0; dim < numDimensions; ++dim) {
882 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
886 coordRowIdx += (lCoarseNodesPerDim_d(1) - 1) * lCoarseNodesPerDim_d(0);
887 rowIdx = coordRowIdx * blkSize;
888 coordColumnOffset += (lFineNodesPerDim_d(1) - 1) * lFineNodesPerDim_d(0);
889 columnOffset = coordColumnOffset * blkSize;
890 entryOffset += (faceLineOffset + (lCoarseNodesPerDim_d(1) - 2) * interiorLineOffset) * blkSize;
891 for (
LO l = 0; l < blkSize; ++l) {
892 for (
LO k = 0; k < 5; ++k) {
893 for (
LO j = 0; j < 3; ++j) {
894 for (
LO i = 0; i < 5; ++i) {
895 entries(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
896 values(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i];
901 for (
LO l = 0; l < blkSize; ++l) {
902 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
904 for (
int dim = 0; dim < numDimensions; ++dim) {
905 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
911 if ((lCoarseNodesPerDim[1] - 2 > 0) && (lCoarseNodesPerDim[2] - 2 > 0)) {
912 Kokkos::parallel_for(
913 "Faces in 1-2 plane region R",
914 Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[2] - 2) * (lCoarseNodesPerDim[1] - 2)),
915 KOKKOS_LAMBDA(
const LO faceIdx) {
916 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
917 LO gridIdx[3] = {0, 0, 0};
918 impl_scalar_type coeffs_d[5] = {1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0};
922 for (
LO i = 0; i < faceIdx; i++) {
924 if (gridIdx[1] == lCoarseNodesPerDim_d(1) - 2) {
931 coordRowIdx = ((gridIdx[2] + 1) * lCoarseNodesPerDim_d(1) * lCoarseNodesPerDim_d(0) + (gridIdx[1] + 1) * lCoarseNodesPerDim_d(0));
932 rowIdx = coordRowIdx * blkSize;
933 coordColumnOffset = ((gridIdx[2] + 1) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (gridIdx[1] + 1) * lFineNodesPerDim_d(0)) * 3;
934 columnOffset = coordColumnOffset * blkSize;
935 entryOffset = (facePlaneOffset + gridIdx[2] * interiorPlaneOffset + faceLineOffset + gridIdx[1] * interiorLineOffset) * blkSize;
936 for (
LO l = 0; l < blkSize; ++l) {
937 for (
LO k = 0; k < 5; ++k) {
938 for (
LO j = 0; j < 5; ++j) {
939 for (
LO i = 0; i < 3; ++i) {
940 entries(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i) * blkSize + l;
941 values(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i + 2];
946 for (
LO l = 0; l < blkSize; ++l) {
947 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
949 for (
int dim = 0; dim < numDimensions; ++dim) {
950 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
954 coordRowIdx += (lCoarseNodesPerDim_d(0) - 1);
955 rowIdx = coordRowIdx * blkSize;
956 coordColumnOffset += (lFineNodesPerDim_d(0) - 1);
957 columnOffset = coordColumnOffset * blkSize;
958 entryOffset += (faceStencilLength + (lCoarseNodesPerDim_d(0) - 2) * interiorStencilLength) * blkSize;
959 for (
LO l = 0; l < blkSize; ++l) {
960 for (
LO k = 0; k < 5; ++k) {
961 for (
LO j = 0; j < 5; ++j) {
962 for (
LO i = 0; i < 3; ++i) {
963 entries(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
964 values(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i];
969 for (
LO l = 0; l < blkSize; ++l) {
970 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
972 for (
int dim = 0; dim < numDimensions; ++dim) {
973 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
978 if (numInteriors > 0) {
983 LO countRowEntries = 0;
984 Kokkos::View<LO[125]> coordColumnOffsets_d(
"coordColOffset");
985 auto coordColumnOffsets_h = Kokkos::create_mirror_view(coordColumnOffsets_d);
987 for (
LO k = -2; k < 3; ++k) {
988 for (
LO j = -2; j < 3; ++j) {
989 for (
LO i = -2; i < 3; ++i) {
990 coordColumnOffsets_h(countRowEntries) = k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i;
995 Kokkos::deep_copy(coordColumnOffsets_d, coordColumnOffsets_h);
998 Kokkos::View<impl_scalar_type*> interiorValues_d(
"interiorValues", 125);
999 auto interiorValues_h = Kokkos::create_mirror_view(interiorValues_d);
1000 for (
LO k = 0; k < 5; ++k) {
1001 for (
LO j = 0; j < 5; ++j) {
1002 for (
LO i = 0; i < 5; ++i) {
1003 interiorValues_h(countValues) = coeffs[k] * coeffs[j] * coeffs[i];
1008 Kokkos::deep_copy(interiorValues_d, interiorValues_h);
1010 Kokkos::parallel_for(
1011 "interior idx region R", Kokkos::RangePolicy<typename device_type::execution_space>(0, numInteriors),
1012 KOKKOS_LAMBDA(
const LO interiorIdx) {
1013 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
1021 for (
LO i = 0; i < interiorIdx; i++) {
1023 if (gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
1026 if (gridIdx[1] == lCoarseNodesPerDim_d(1) - 2) {
1033 coordRowIdx = ((gridIdx[2] + 1) * lCoarseNodesPerDim_d(0) * lCoarseNodesPerDim_d(1) + (gridIdx[1] + 1) * lCoarseNodesPerDim_d(0) + gridIdx[0] + 1);
1034 rowIdx = coordRowIdx * blkSize;
1035 coordColumnOffset = ((gridIdx[2] + 1) * 3 * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (gridIdx[1] + 1) * 3 * lFineNodesPerDim_d(0) + (gridIdx[0] + 1) * 3);
1036 columnOffset = coordColumnOffset * blkSize;
1037 entryOffset = (facePlaneOffset + faceLineOffset + faceStencilLength + gridIdx[2] * interiorPlaneOffset + gridIdx[1] * interiorLineOffset + gridIdx[0] * interiorStencilLength) * blkSize;
1038 for (
LO l = 0; l < blkSize; ++l) {
1039 row_map(rowIdx + 1 + l) = entryOffset + interiorStencilLength * (l + 1);
1044 for (
LO l = 0; l < blkSize; ++l) {
1045 for (
LO entryIdx = 0; entryIdx < interiorStencilLength; ++entryIdx) {
1046 entries(entryOffset + entryIdx + interiorStencilLength * l) = columnOffset + coordColumnOffsets_d(entryIdx) * blkSize + l;
1047 values(entryOffset + entryIdx + interiorStencilLength * l) = interiorValues_d(entryIdx);
1050 for (
int dim = 0; dim < numDimensions; ++dim) {
1051 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
1057 local_graph_type localGraph(entries, row_map);
1058 local_matrix_type localR(
"R", numCols, values, localGraph);
1060 R = MatrixFactory::Build(localR,
1071 #define MUELU_REGIONRFACTORY_KOKKOS_SHORT
1072 #endif // MUELU_REGIONRFACTORY_DEF_HPP
typename Kokkos::View< LO[3], device_type > LOTupleView
MueLu::DefaultLocalOrdinal LocalOrdinal
static Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Build(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node >> &map, size_t NumVectors, bool zeroOut=true)
basic_FancyOStream & setShowProcRank(const bool showProcRank)
T & get(const std::string &name, T def_value)
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
static const NoFactory * get()
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
void Build3D(const int numDimensions, Array< LO > &lFineNodesPerDim, const RCP< Matrix > &A, const RCP< realvaluedmultivector_type > &fineCoordinates, RCP< Matrix > &R, RCP< realvaluedmultivector_type > &coarseCoordinates, Array< LO > &lCoarseNodesPerDim) const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Class that holds all level-specific information.
bool isSublist(const std::string &name) const
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
basic_FancyOStream & setShowAllFrontMatter(const bool showAllFrontMatter)
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
RCP< const ParameterList > GetValidParameterList() const
Input.
ParameterList & sublist(const std::string &name, bool mustAlreadyExist=false, const std::string &docString="")
int GetLevelID() const
Return level number.
std::string toString(const T &t)