10 #ifndef MUELU_REGIONRFACTORY_KOKKOS_DEF_HPP
11 #define MUELU_REGIONRFACTORY_KOKKOS_DEF_HPP
13 #include "Kokkos_UnorderedMap.hpp"
24 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
30 "Generating factory of the matrix A");
32 "Number of spatial dimensions in the problem.");
34 "Number of local nodes per spatial dimension on the fine grid.");
36 "Fine level nullspace used to construct the coarse level nullspace.");
38 "Fine level coordinates used to construct piece-wise linear prolongator and coarse level coordinates.");
39 validParamList->
set<
bool>(
"keep coarse coords",
false,
"Flag to keep coordinates for special coarse grid solve");
41 return validParamList;
44 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
47 Input(fineLevel,
"A");
48 Input(fineLevel,
"numDimensions");
49 Input(fineLevel,
"lNodesPerDim");
50 Input(fineLevel,
"Nullspace");
51 Input(fineLevel,
"Coordinates");
55 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
60 if (
const char* dbg = std::getenv(
"MUELU_REGIONRFACTORY_DEBUG")) {
61 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
67 *out <<
"Starting RegionRFactory_kokkos::Build." << std::endl;
70 const int numDimensions = Get<int>(fineLevel,
"numDimensions");
73 Array<LO> lNodesPerDim = Get<Array<LO> >(fineLevel,
"lNodesPerDim");
74 for (
int dim = 0; dim < numDimensions; ++dim) {
75 lFineNodesPerDim[dim] = lNodesPerDim[dim];
78 *out <<
"numDimensions " << numDimensions <<
" and lFineNodesPerDim: " << lFineNodesPerDim
82 if (numDimensions < 1 || numDimensions > 3) {
83 throw std::runtime_error(
"numDimensions must be 1, 2 or 3!");
85 for (
int dim = 0; dim < numDimensions; ++dim) {
86 if (lFineNodesPerDim[dim] % 3 != 1) {
87 throw std::runtime_error(
"The number of fine node in each direction need to be 3n+1");
92 const RCP<Matrix> A = Get<RCP<Matrix> >(fineLevel,
"A");
95 fineCoordinates = Get<RCP<realvaluedmultivector_type> >(fineLevel,
"Coordinates");
96 if (static_cast<int>(fineCoordinates->getNumVectors()) != numDimensions) {
97 throw std::runtime_error(
"The number of vectors in the coordinates is not equal to numDimensions!");
105 if (numDimensions == 1) {
106 throw std::runtime_error(
"RegionRFactory_kokkos no implemented for 1D case yet.");
107 }
else if (numDimensions == 2) {
108 throw std::runtime_error(
"RegionRFactory_kokkos no implemented for 2D case yet.");
109 }
else if (numDimensions == 3) {
110 Build3D(numDimensions, lFineNodesPerDim, A, fineCoordinates,
111 R, coarseCoordinates, lCoarseNodesPerDim);
118 if (pL.
isSublist(
"matrixmatrix: kernel params"))
124 *out <<
"Compute P=R^t" << std::endl;
126 Tparams->
set(
"compute global constants: temporaries", Tparams->
get(
"compute global constants: temporaries",
false));
127 Tparams->
set(
"compute global constants", Tparams->
get(
"compute global constants",
false));
131 *out <<
"Compute coarse nullspace" << std::endl;
132 RCP<MultiVector> fineNullspace = Get<RCP<MultiVector> >(fineLevel,
"Nullspace");
133 RCP<MultiVector> coarseNullspace = MultiVectorFactory::Build(R->getRowMap(),
134 fineNullspace->getNumVectors());
138 *out <<
"Set data on coarse level" << std::endl;
139 Set(coarseLevel,
"numDimensions", numDimensions);
140 Set(coarseLevel,
"lNodesPerDim", lCoarseNodesPerDim);
141 Set(coarseLevel,
"Nullspace", coarseNullspace);
142 Set(coarseLevel,
"Coordinates", coarseCoordinates);
143 if (pL.
get<
bool>(
"keep coarse coords")) {
147 R->SetFixedBlockSize(A->GetFixedBlockSize());
148 P->SetFixedBlockSize(A->GetFixedBlockSize());
150 Set(coarseLevel,
"R", R);
151 Set(coarseLevel,
"P", P);
155 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
164 using local_matrix_type =
typename CrsMatrix::local_matrix_type;
165 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
166 using row_map_type =
typename local_matrix_type::row_map_type::non_const_type;
167 using entries_type =
typename local_matrix_type::index_type::non_const_type;
168 using values_type =
typename local_matrix_type::values_type::non_const_type;
169 #if KOKKOS_VERSION >= 40799
170 using impl_scalar_type =
typename KokkosKernels::ArithTraits<Scalar>::val_type;
172 using impl_scalar_type =
typename Kokkos::ArithTraits<Scalar>::val_type;
177 if (
const char* dbg = std::getenv(
"MUELU_REGIONRFACTORY_DEBUG")) {
178 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
185 for (
int dim = 0; dim < numDimensions; ++dim) {
186 lCoarseNodesPerDim[dim] = lFineNodesPerDim[dim] / 3 + 1;
188 *out <<
"lCoarseNodesPerDim " << lCoarseNodesPerDim << std::endl;
191 const LO blkSize = A->GetFixedBlockSize();
192 *out <<
"blkSize " << blkSize << std::endl;
196 const LO numRows = blkSize * lCoarseNodesPerDim[0] * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[2];
197 const LO numCols = blkSize * lFineNodesPerDim[0] * lFineNodesPerDim[1] * lFineNodesPerDim[2];
202 RCP<Map> rowMap = MapFactory::Build(A->getRowMap()->lib(),
205 A->getRowMap()->getIndexBase(),
206 A->getRowMap()->getComm());
208 RCP<Map> coordRowMap = MapFactory::Build(A->getRowMap()->lib(),
210 lCoarseNodesPerDim[0] * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[2],
211 A->getRowMap()->getIndexBase(),
212 A->getRowMap()->getComm());
218 auto fineCoordsView = fineCoordinates->getLocalViewDevice(Xpetra::Access::ReadOnly);
219 auto coarseCoordsView = coarseCoordinates->getLocalViewDevice(Xpetra::Access::OverwriteAll);
223 for (
int dim = 0; dim < numDimensions; ++dim) {
224 fineCoordData[dim] = fineCoordinates->getData(dim);
225 coarseCoordData[dim] = coarseCoordinates->getDataNonConst(dim);
232 const LO cornerStencilLength = 27;
233 const LO edgeStencilLength = 45;
234 const LO faceStencilLength = 75;
235 const LO interiorStencilLength = 125;
238 const LO numCorners = 8;
239 const LO numEdges = 4 * (lCoarseNodesPerDim[0] - 2) + 4 * (lCoarseNodesPerDim[1] - 2) + 4 * (lCoarseNodesPerDim[2] - 2);
240 const LO numFaces = 2 * (lCoarseNodesPerDim[0] - 2) * (lCoarseNodesPerDim[1] - 2) + 2 * (lCoarseNodesPerDim[0] - 2) * (lCoarseNodesPerDim[2] - 2) + 2 * (lCoarseNodesPerDim[1] - 2) * (lCoarseNodesPerDim[2] - 2);
241 const LO numInteriors = (lCoarseNodesPerDim[0] - 2) * (lCoarseNodesPerDim[1] - 2) * (lCoarseNodesPerDim[2] - 2);
243 const LO nnz = (numCorners * cornerStencilLength + numEdges * edgeStencilLength + numFaces * faceStencilLength + numInteriors * interiorStencilLength) * blkSize;
248 *out <<
"R statistics:" << std::endl
249 <<
" -numRows= " << numRows << std::endl
250 <<
" -numCols= " << numCols << std::endl
251 <<
" -nnz= " << nnz << std::endl;
253 row_map_type row_map(Kokkos::ViewAllocateWithoutInitializing(
"row_map"), numRows + 1);
254 typename row_map_type::host_mirror_type row_map_h = Kokkos::create_mirror_view(row_map);
256 entries_type entries(Kokkos::ViewAllocateWithoutInitializing(
"entries"), nnz);
257 typename entries_type::host_mirror_type entries_h = Kokkos::create_mirror_view(entries);
259 values_type values(Kokkos::ViewAllocateWithoutInitializing(
"values"), nnz);
260 typename values_type::host_mirror_type values_h = Kokkos::create_mirror_view(values);
265 Array<SC> coeffs({1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0});
270 const LO edgeLineOffset = 2 * cornerStencilLength + (lCoarseNodesPerDim[0] - 2) * edgeStencilLength;
271 const LO faceLineOffset = 2 * edgeStencilLength + (lCoarseNodesPerDim[0] - 2) * faceStencilLength;
272 const LO interiorLineOffset = 2 * faceStencilLength + (lCoarseNodesPerDim[0] - 2) * interiorStencilLength;
274 const LO facePlaneOffset = 2 * edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset;
275 const LO interiorPlaneOffset = 2 * faceLineOffset + (lCoarseNodesPerDim[1] - 2) * interiorLineOffset;
282 LO coordRowIdx = 0, rowIdx = 0, coordColumnOffset = 0, columnOffset = 0, entryOffset = 0;
283 for (
LO l = 0; l < blkSize; ++l) {
284 for (
LO k = 0; k < 3; ++k) {
285 for (
LO j = 0; j < 3; ++j) {
286 for (
LO i = 0; i < 3; ++i) {
287 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i) * blkSize + l;
288 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j + 2] * coeffs[i + 2];
293 for (
LO l = 0; l < blkSize; ++l) {
294 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
296 for (
int dim = 0; dim < numDimensions; ++dim) {
297 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
301 coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
302 rowIdx = coordRowIdx * blkSize;
303 coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
304 columnOffset = coordColumnOffset * blkSize;
305 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
306 for (
LO l = 0; l < blkSize; ++l) {
307 for (
LO k = 0; k < 3; ++k) {
308 for (
LO j = 0; j < 3; ++j) {
309 for (
LO i = 0; i < 3; ++i) {
310 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i) * blkSize + l;
311 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i + 2];
316 for (
LO l = 0; l < blkSize; ++l) {
317 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
319 for (
int dim = 0; dim < numDimensions; ++dim) {
320 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
324 coordRowIdx = (lCoarseNodesPerDim[0] - 1);
325 rowIdx = coordRowIdx * blkSize;
326 coordColumnOffset = (lFineNodesPerDim[0] - 1);
327 columnOffset = coordColumnOffset * blkSize;
328 entryOffset = (cornerStencilLength + (lCoarseNodesPerDim[0] - 2) * edgeStencilLength) * blkSize;
329 for (
LO l = 0; l < blkSize; ++l) {
330 for (
LO k = 0; k < 3; ++k) {
331 for (
LO j = 0; j < 3; ++j) {
332 for (
LO i = 0; i < 3; ++i) {
333 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + (i - 2)) * blkSize + l;
334 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j + 2] * coeffs[i];
339 for (
LO l = 0; l < blkSize; ++l) {
340 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
342 for (
int dim = 0; dim < numDimensions; ++dim) {
343 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
347 coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
348 rowIdx = coordRowIdx * blkSize;
349 coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
350 columnOffset = coordColumnOffset * blkSize;
351 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
352 for (
LO l = 0; l < blkSize; ++l) {
353 for (
LO k = 0; k < 3; ++k) {
354 for (
LO j = 0; j < 3; ++j) {
355 for (
LO i = 0; i < 3; ++i) {
356 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
357 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i];
362 for (
LO l = 0; l < blkSize; ++l) {
363 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
365 for (
int dim = 0; dim < numDimensions; ++dim) {
366 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
370 coordRowIdx = (lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0];
371 rowIdx = coordRowIdx * blkSize;
372 coordColumnOffset = (lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0];
373 columnOffset = coordColumnOffset * blkSize;
374 entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset) * blkSize;
375 for (
LO l = 0; l < blkSize; ++l) {
376 for (
LO k = 0; k < 3; ++k) {
377 for (
LO j = 0; j < 3; ++j) {
378 for (
LO i = 0; i < 3; ++i) {
379 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
380 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i + 2];
385 for (
LO l = 0; l < blkSize; ++l) {
386 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
388 for (
int dim = 0; dim < numDimensions; ++dim) {
389 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
393 coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
394 rowIdx = coordRowIdx * blkSize;
395 coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
396 columnOffset = coordColumnOffset * blkSize;
397 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
398 for (
LO l = 0; l < blkSize; ++l) {
399 for (
LO k = 0; k < 3; ++k) {
400 for (
LO j = 0; j < 3; ++j) {
401 for (
LO i = 0; i < 3; ++i) {
402 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
403 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i + 2];
408 for (
LO l = 0; l < blkSize; ++l) {
409 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
411 for (
int dim = 0; dim < numDimensions; ++dim) {
412 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
416 coordRowIdx = (lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] - 1);
417 rowIdx = coordRowIdx * blkSize;
418 coordColumnOffset = (lFineNodesPerDim[1] * lFineNodesPerDim[0] - 1);
419 columnOffset = coordColumnOffset * blkSize;
420 entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset +
421 cornerStencilLength + (lCoarseNodesPerDim[0] - 2) * edgeStencilLength) *
423 for (
LO l = 0; l < blkSize; ++l) {
424 for (
LO k = 0; k < 3; ++k) {
425 for (
LO j = 0; j < 3; ++j) {
426 for (
LO i = 0; i < 3; ++i) {
427 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + (i - 2)) * blkSize + l;
428 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i];
433 for (
LO l = 0; l < blkSize; ++l) {
434 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
436 for (
int dim = 0; dim < numDimensions; ++dim) {
437 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
441 coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
442 rowIdx = coordRowIdx * blkSize;
443 coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
444 columnOffset = coordColumnOffset * blkSize;
445 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
446 for (
LO l = 0; l < blkSize; ++l) {
447 for (
LO k = 0; k < 3; ++k) {
448 for (
LO j = 0; j < 3; ++j) {
449 for (
LO i = 0; i < 3; ++i) {
450 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + (i - 2)) * blkSize + l;
451 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
456 for (
LO l = 0; l < blkSize; ++l) {
457 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
459 for (
int dim = 0; dim < numDimensions; ++dim) {
460 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
465 if (lCoarseNodesPerDim[0] - 2 > 0) {
466 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
467 for (
LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[0] - 2; ++edgeIdx) {
469 coordRowIdx = (edgeIdx + 1);
470 rowIdx = coordRowIdx * blkSize;
471 coordColumnOffset = (edgeIdx + 1) * 3;
472 columnOffset = coordColumnOffset * blkSize;
473 entryOffset = (cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
474 for (
LO l = 0; l < blkSize; ++l) {
475 for (
LO k = 0; k < 3; ++k) {
476 for (
LO j = 0; j < 3; ++j) {
477 for (
LO i = 0; i < 5; ++i) {
478 entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
479 values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j + 2] * coeffs[i];
484 for (
LO l = 0; l < blkSize; ++l) {
485 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
487 for (
int dim = 0; dim < numDimensions; ++dim) {
488 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
492 coordRowIdx = ((lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0] + edgeIdx + 1);
493 rowIdx = coordRowIdx * blkSize;
494 coordColumnOffset = ((lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0] + (edgeIdx + 1) * 3);
495 columnOffset = coordColumnOffset * blkSize;
496 entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset + cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
497 for (
LO l = 0; l < blkSize; ++l) {
498 for (
LO k = 0; k < 3; ++k) {
499 for (
LO j = 0; j < 3; ++j) {
500 for (
LO i = 0; i < 5; ++i) {
501 entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
502 values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i];
507 for (
LO l = 0; l < blkSize; ++l) {
508 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
510 for (
int dim = 0; dim < numDimensions; ++dim) {
511 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
515 coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + edgeIdx + 1);
516 rowIdx = coordRowIdx * blkSize;
517 coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (edgeIdx + 1) * 3);
518 columnOffset = coordColumnOffset * blkSize;
519 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
520 for (
LO l = 0; l < blkSize; ++l) {
521 for (
LO k = 0; k < 3; ++k) {
522 for (
LO j = 0; j < 3; ++j) {
523 for (
LO i = 0; i < 5; ++i) {
524 entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
525 values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i];
530 for (
LO l = 0; l < blkSize; ++l) {
531 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
533 for (
int dim = 0; dim < numDimensions; ++dim) {
534 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
538 coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0] + edgeIdx + 1);
539 rowIdx = coordRowIdx * blkSize;
540 coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0] + (edgeIdx + 1) * 3);
541 columnOffset = coordColumnOffset * blkSize;
542 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset + cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
543 for (
LO l = 0; l < blkSize; ++l) {
544 for (
LO k = 0; k < 3; ++k) {
545 for (
LO j = 0; j < 3; ++j) {
546 for (
LO i = 0; i < 5; ++i) {
547 entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
548 values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
553 for (
LO l = 0; l < blkSize; ++l) {
554 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
556 for (
int dim = 0; dim < numDimensions; ++dim) {
557 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
563 if (lCoarseNodesPerDim[1] - 2 > 0) {
564 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
565 for (
LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[1] - 2; ++edgeIdx) {
567 coordRowIdx = (edgeIdx + 1) * lCoarseNodesPerDim[0];
568 rowIdx = coordRowIdx * blkSize;
569 coordColumnOffset = (edgeIdx + 1) * 3 * lFineNodesPerDim[0];
570 columnOffset = coordColumnOffset * blkSize;
571 entryOffset = (edgeLineOffset + edgeIdx * faceLineOffset) * blkSize;
572 for (
LO l = 0; l < blkSize; ++l) {
573 for (
LO k = 0; k < 3; ++k) {
574 for (
LO j = 0; j < 5; ++j) {
575 for (
LO i = 0; i < 3; ++i) {
576 entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
577 values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i + 2];
582 for (
LO l = 0; l < blkSize; ++l) {
583 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
585 for (
int dim = 0; dim < numDimensions; ++dim) {
586 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
590 coordRowIdx = ((edgeIdx + 1) * lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
591 rowIdx = coordRowIdx * blkSize;
592 coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
593 columnOffset = coordColumnOffset * blkSize;
594 entryOffset = (edgeLineOffset + edgeIdx * faceLineOffset + edgeStencilLength + (lCoarseNodesPerDim[0] - 2) * faceStencilLength) * blkSize;
595 for (
LO l = 0; l < blkSize; ++l) {
596 for (
LO k = 0; k < 3; ++k) {
597 for (
LO j = 0; j < 5; ++j) {
598 for (
LO i = 0; i < 3; ++i) {
599 entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
600 values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i];
605 for (
LO l = 0; l < blkSize; ++l) {
606 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
608 for (
int dim = 0; dim < numDimensions; ++dim) {
609 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
613 coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (edgeIdx + 1) * lCoarseNodesPerDim[0]);
614 rowIdx = coordRowIdx * blkSize;
615 coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (edgeIdx + 1) * 3 * lFineNodesPerDim[0]);
616 columnOffset = coordColumnOffset * blkSize;
617 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + edgeLineOffset + edgeIdx * faceLineOffset) * blkSize;
618 for (
LO l = 0; l < blkSize; ++l) {
619 for (
LO k = 0; k < 3; ++k) {
620 for (
LO j = 0; j < 5; ++j) {
621 for (
LO i = 0; i < 3; ++i) {
622 entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
623 values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i + 2];
628 for (
LO l = 0; l < blkSize; ++l) {
629 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
631 for (
int dim = 0; dim < numDimensions; ++dim) {
632 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
636 coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (edgeIdx + 1) * lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
637 rowIdx = coordRowIdx * blkSize;
638 coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (edgeIdx + 1) * 3 * lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
639 columnOffset = coordColumnOffset * blkSize;
640 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + edgeLineOffset + edgeIdx * faceLineOffset + edgeStencilLength + (lCoarseNodesPerDim[0] - 2) * faceStencilLength) * blkSize;
641 for (
LO l = 0; l < blkSize; ++l) {
642 for (
LO k = 0; k < 3; ++k) {
643 for (
LO j = 0; j < 5; ++j) {
644 for (
LO i = 0; i < 3; ++i) {
645 entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
646 values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
651 for (
LO l = 0; l < blkSize; ++l) {
652 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
654 for (
int dim = 0; dim < numDimensions; ++dim) {
655 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
661 if (lCoarseNodesPerDim[2] - 2 > 0) {
662 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
663 for (
LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[2] - 2; ++edgeIdx) {
665 coordRowIdx = (edgeIdx + 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
666 rowIdx = coordRowIdx * blkSize;
667 coordColumnOffset = (edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0];
668 columnOffset = coordColumnOffset * blkSize;
669 entryOffset = (facePlaneOffset + edgeIdx * interiorPlaneOffset) * blkSize;
670 for (
LO l = 0; l < blkSize; ++l) {
671 for (
LO k = 0; k < 5; ++k) {
672 for (
LO j = 0; j < 3; ++j) {
673 for (
LO i = 0; i < 3; ++i) {
674 entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i) * blkSize + l;
675 values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i + 2];
680 for (
LO l = 0; l < blkSize; ++l) {
681 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
683 for (
int dim = 0; dim < numDimensions; ++dim) {
684 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
688 coordRowIdx = ((edgeIdx + 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
689 rowIdx = coordRowIdx * blkSize;
690 coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
691 columnOffset = coordColumnOffset * blkSize;
692 entryOffset = (facePlaneOffset + faceLineOffset - edgeStencilLength + edgeIdx * interiorPlaneOffset) * blkSize;
693 for (
LO l = 0; l < blkSize; ++l) {
694 for (
LO k = 0; k < 5; ++k) {
695 for (
LO j = 0; j < 3; ++j) {
696 for (
LO i = 0; i < 3; ++i) {
697 entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
698 values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i];
703 for (
LO l = 0; l < blkSize; ++l) {
704 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
706 for (
int dim = 0; dim < numDimensions; ++dim) {
707 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
711 coordRowIdx = ((edgeIdx + 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0]);
712 rowIdx = coordRowIdx * blkSize;
713 coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0]);
714 columnOffset = coordColumnOffset * blkSize;
715 entryOffset = (facePlaneOffset + edgeIdx * interiorPlaneOffset + faceLineOffset + (lCoarseNodesPerDim[1] - 2) * interiorLineOffset) * blkSize;
716 for (
LO l = 0; l < blkSize; ++l) {
717 for (
LO k = 0; k < 5; ++k) {
718 for (
LO j = 0; j < 3; ++j) {
719 for (
LO i = 0; i < 3; ++i) {
720 entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
721 values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i + 2];
726 for (
LO l = 0; l < blkSize; ++l) {
727 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
729 for (
int dim = 0; dim < numDimensions; ++dim) {
730 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
734 coordRowIdx = ((edgeIdx + 2) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] - 1);
735 rowIdx = coordRowIdx * blkSize;
736 coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0] + lFineNodesPerDim[1] * lFineNodesPerDim[0] - 1);
737 columnOffset = coordColumnOffset * blkSize;
738 entryOffset = (facePlaneOffset + (edgeIdx + 1) * interiorPlaneOffset - edgeStencilLength) * blkSize;
739 for (
LO l = 0; l < blkSize; ++l) {
740 for (
LO k = 0; k < 5; ++k) {
741 for (
LO j = 0; j < 3; ++j) {
742 for (
LO i = 0; i < 3; ++i) {
743 entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
744 values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
749 for (
LO l = 0; l < blkSize; ++l) {
750 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
752 for (
int dim = 0; dim < numDimensions; ++dim) {
753 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
759 Kokkos::deep_copy(row_map, row_map_h);
760 Kokkos::deep_copy(entries, entries_h);
761 Kokkos::deep_copy(values, values_h);
764 LOTupleView lFineNodesPerDim_d(
"lFineNodesPerDim");
765 LOTupleView lCoarseNodesPerDim_d(
"lCoarseNodesPerDim");
767 typename Kokkos::View<LO[3], device_type>::host_mirror_type lCoarseNodesPerDim_h = Kokkos::create_mirror_view(lCoarseNodesPerDim_d);
768 typename Kokkos::View<LO[3], device_type>::host_mirror_type lFineNodesPerDim_h = Kokkos::create_mirror_view(lFineNodesPerDim_d);
770 for (
int dim = 0; dim < numDimensions; ++dim) {
771 lCoarseNodesPerDim_h(dim) = lCoarseNodesPerDim[dim];
772 lFineNodesPerDim_h(dim) = lFineNodesPerDim[dim];
775 Kokkos::deep_copy(lCoarseNodesPerDim_d, lCoarseNodesPerDim_h);
776 Kokkos::deep_copy(lFineNodesPerDim_d, lFineNodesPerDim_h);
779 if ((lCoarseNodesPerDim[0] - 2 > 0) && (lCoarseNodesPerDim[1] - 2 > 0)) {
780 Kokkos::parallel_for(
781 "Faces in 0-1 plane region R",
782 Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[1] - 2) * (lCoarseNodesPerDim[0] - 2)),
783 KOKKOS_LAMBDA(
const LO faceIdx) {
784 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
785 LO gridIdx[3] = {0, 0, 0};
786 impl_scalar_type coeffs_d[5] = {1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0};
790 for (
LO i = 0; i < faceIdx; i++) {
792 if (gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
799 coordRowIdx = ((gridIdx[1] + 1) * lCoarseNodesPerDim_d(0) + gridIdx[0] + 1);
800 rowIdx = coordRowIdx * blkSize;
801 coordColumnOffset = 3 * ((gridIdx[1] + 1) * lFineNodesPerDim_d(0) + gridIdx[0] + 1);
802 columnOffset = coordColumnOffset * blkSize;
803 entryOffset = (edgeLineOffset + edgeStencilLength + gridIdx[1] * faceLineOffset + gridIdx[0] * faceStencilLength) * blkSize;
804 for (
LO l = 0; l < blkSize; ++l) {
805 for (
LO k = 0; k < 3; ++k) {
806 for (
LO j = 0; j < 5; ++j) {
807 for (
LO i = 0; i < 5; ++i) {
808 entries(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = columnOffset + (k * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
809 values(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = coeffs_d[k + 2] * coeffs_d[j] * coeffs_d[i];
814 for (
LO l = 0; l < blkSize; ++l) {
815 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
817 for (
int dim = 0; dim < numDimensions; ++dim) {
818 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
822 coordRowIdx += (lCoarseNodesPerDim_d(2) - 1) * lCoarseNodesPerDim_d(1) * lCoarseNodesPerDim_d(0);
823 rowIdx = coordRowIdx * blkSize;
824 coordColumnOffset += (lFineNodesPerDim_d(2) - 1) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0);
825 columnOffset = coordColumnOffset * blkSize;
826 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim_d(2) - 2) * interiorPlaneOffset) * blkSize;
827 for (
LO l = 0; l < blkSize; ++l) {
828 for (
LO k = 0; k < 3; ++k) {
829 for (
LO j = 0; j < 5; ++j) {
830 for (
LO i = 0; i < 5; ++i) {
831 entries(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
832 values(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i];
837 for (
LO l = 0; l < blkSize; ++l) {
838 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
840 for (
int dim = 0; dim < numDimensions; ++dim) {
841 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
847 if ((lCoarseNodesPerDim[0] - 2 > 0) && (lCoarseNodesPerDim[2] - 2 > 0)) {
848 Kokkos::parallel_for(
849 "Faces in 0-2 plane region R",
850 Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[2] - 2) * (lCoarseNodesPerDim[0] - 2)),
851 KOKKOS_LAMBDA(
const LO faceIdx) {
852 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
853 LO gridIdx[3] = {0, 0, 0};
854 impl_scalar_type coeffs_d[5] = {1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0};
858 for (
LO i = 0; i < faceIdx; i++) {
860 if (gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
867 coordRowIdx = ((gridIdx[2] + 1) * lCoarseNodesPerDim_d(1) * lCoarseNodesPerDim_d(0) + (gridIdx[0] + 1));
868 rowIdx = coordRowIdx * blkSize;
869 coordColumnOffset = ((gridIdx[2] + 1) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + gridIdx[0] + 1) * 3;
870 columnOffset = coordColumnOffset * blkSize;
871 entryOffset = (facePlaneOffset + gridIdx[2] * interiorPlaneOffset + edgeStencilLength + gridIdx[0] * faceStencilLength) * blkSize;
872 for (
LO l = 0; l < blkSize; ++l) {
873 for (
LO k = 0; k < 5; ++k) {
874 for (
LO j = 0; j < 3; ++j) {
875 for (
LO i = 0; i < 5; ++i) {
876 entries(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + j * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
877 values(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j + 2] * coeffs_d[i];
882 for (
LO l = 0; l < blkSize; ++l) {
883 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
885 for (
int dim = 0; dim < numDimensions; ++dim) {
886 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
890 coordRowIdx += (lCoarseNodesPerDim_d(1) - 1) * lCoarseNodesPerDim_d(0);
891 rowIdx = coordRowIdx * blkSize;
892 coordColumnOffset += (lFineNodesPerDim_d(1) - 1) * lFineNodesPerDim_d(0);
893 columnOffset = coordColumnOffset * blkSize;
894 entryOffset += (faceLineOffset + (lCoarseNodesPerDim_d(1) - 2) * interiorLineOffset) * blkSize;
895 for (
LO l = 0; l < blkSize; ++l) {
896 for (
LO k = 0; k < 5; ++k) {
897 for (
LO j = 0; j < 3; ++j) {
898 for (
LO i = 0; i < 5; ++i) {
899 entries(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
900 values(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i];
905 for (
LO l = 0; l < blkSize; ++l) {
906 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
908 for (
int dim = 0; dim < numDimensions; ++dim) {
909 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
915 if ((lCoarseNodesPerDim[1] - 2 > 0) && (lCoarseNodesPerDim[2] - 2 > 0)) {
916 Kokkos::parallel_for(
917 "Faces in 1-2 plane region R",
918 Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[2] - 2) * (lCoarseNodesPerDim[1] - 2)),
919 KOKKOS_LAMBDA(
const LO faceIdx) {
920 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
921 LO gridIdx[3] = {0, 0, 0};
922 impl_scalar_type coeffs_d[5] = {1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0};
926 for (
LO i = 0; i < faceIdx; i++) {
928 if (gridIdx[1] == lCoarseNodesPerDim_d(1) - 2) {
935 coordRowIdx = ((gridIdx[2] + 1) * lCoarseNodesPerDim_d(1) * lCoarseNodesPerDim_d(0) + (gridIdx[1] + 1) * lCoarseNodesPerDim_d(0));
936 rowIdx = coordRowIdx * blkSize;
937 coordColumnOffset = ((gridIdx[2] + 1) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (gridIdx[1] + 1) * lFineNodesPerDim_d(0)) * 3;
938 columnOffset = coordColumnOffset * blkSize;
939 entryOffset = (facePlaneOffset + gridIdx[2] * interiorPlaneOffset + faceLineOffset + gridIdx[1] * interiorLineOffset) * blkSize;
940 for (
LO l = 0; l < blkSize; ++l) {
941 for (
LO k = 0; k < 5; ++k) {
942 for (
LO j = 0; j < 5; ++j) {
943 for (
LO i = 0; i < 3; ++i) {
944 entries(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i) * blkSize + l;
945 values(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i + 2];
950 for (
LO l = 0; l < blkSize; ++l) {
951 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
953 for (
int dim = 0; dim < numDimensions; ++dim) {
954 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
958 coordRowIdx += (lCoarseNodesPerDim_d(0) - 1);
959 rowIdx = coordRowIdx * blkSize;
960 coordColumnOffset += (lFineNodesPerDim_d(0) - 1);
961 columnOffset = coordColumnOffset * blkSize;
962 entryOffset += (faceStencilLength + (lCoarseNodesPerDim_d(0) - 2) * interiorStencilLength) * blkSize;
963 for (
LO l = 0; l < blkSize; ++l) {
964 for (
LO k = 0; k < 5; ++k) {
965 for (
LO j = 0; j < 5; ++j) {
966 for (
LO i = 0; i < 3; ++i) {
967 entries(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
968 values(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i];
973 for (
LO l = 0; l < blkSize; ++l) {
974 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
976 for (
int dim = 0; dim < numDimensions; ++dim) {
977 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
982 if (numInteriors > 0) {
987 LO countRowEntries = 0;
988 Kokkos::View<LO[125]> coordColumnOffsets_d(
"coordColOffset");
989 auto coordColumnOffsets_h = Kokkos::create_mirror_view(coordColumnOffsets_d);
991 for (
LO k = -2; k < 3; ++k) {
992 for (
LO j = -2; j < 3; ++j) {
993 for (
LO i = -2; i < 3; ++i) {
994 coordColumnOffsets_h(countRowEntries) = k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i;
999 Kokkos::deep_copy(coordColumnOffsets_d, coordColumnOffsets_h);
1002 Kokkos::View<impl_scalar_type*> interiorValues_d(
"interiorValues", 125);
1003 auto interiorValues_h = Kokkos::create_mirror_view(interiorValues_d);
1004 for (
LO k = 0; k < 5; ++k) {
1005 for (
LO j = 0; j < 5; ++j) {
1006 for (
LO i = 0; i < 5; ++i) {
1007 interiorValues_h(countValues) = coeffs[k] * coeffs[j] * coeffs[i];
1012 Kokkos::deep_copy(interiorValues_d, interiorValues_h);
1014 Kokkos::parallel_for(
1015 "interior idx region R", Kokkos::RangePolicy<typename device_type::execution_space>(0, numInteriors),
1016 KOKKOS_LAMBDA(
const LO interiorIdx) {
1017 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
1025 for (
LO i = 0; i < interiorIdx; i++) {
1027 if (gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
1030 if (gridIdx[1] == lCoarseNodesPerDim_d(1) - 2) {
1037 coordRowIdx = ((gridIdx[2] + 1) * lCoarseNodesPerDim_d(0) * lCoarseNodesPerDim_d(1) + (gridIdx[1] + 1) * lCoarseNodesPerDim_d(0) + gridIdx[0] + 1);
1038 rowIdx = coordRowIdx * blkSize;
1039 coordColumnOffset = ((gridIdx[2] + 1) * 3 * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (gridIdx[1] + 1) * 3 * lFineNodesPerDim_d(0) + (gridIdx[0] + 1) * 3);
1040 columnOffset = coordColumnOffset * blkSize;
1041 entryOffset = (facePlaneOffset + faceLineOffset + faceStencilLength + gridIdx[2] * interiorPlaneOffset + gridIdx[1] * interiorLineOffset + gridIdx[0] * interiorStencilLength) * blkSize;
1042 for (
LO l = 0; l < blkSize; ++l) {
1043 row_map(rowIdx + 1 + l) = entryOffset + interiorStencilLength * (l + 1);
1048 for (
LO l = 0; l < blkSize; ++l) {
1049 for (
LO entryIdx = 0; entryIdx < interiorStencilLength; ++entryIdx) {
1050 entries(entryOffset + entryIdx + interiorStencilLength * l) = columnOffset + coordColumnOffsets_d(entryIdx) * blkSize + l;
1051 values(entryOffset + entryIdx + interiorStencilLength * l) = interiorValues_d(entryIdx);
1054 for (
int dim = 0; dim < numDimensions; ++dim) {
1055 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
1061 local_graph_type localGraph(entries, row_map);
1062 local_matrix_type localR(
"R", numCols, values, localGraph);
1064 R = MatrixFactory::Build(localR,
1075 #define MUELU_REGIONRFACTORY_KOKKOS_SHORT
1076 #endif // MUELU_REGIONRFACTORY_DEF_HPP
typename Kokkos::View< LO[3], device_type > LOTupleView
MueLu::DefaultLocalOrdinal LocalOrdinal
static Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Build(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node >> &map, size_t NumVectors, bool zeroOut=true)
basic_FancyOStream & setShowProcRank(const bool showProcRank)
T & get(const std::string &name, T def_value)
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
static const NoFactory * get()
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
void Build3D(const int numDimensions, Array< LO > &lFineNodesPerDim, const RCP< Matrix > &A, const RCP< realvaluedmultivector_type > &fineCoordinates, RCP< Matrix > &R, RCP< realvaluedmultivector_type > &coarseCoordinates, Array< LO > &lCoarseNodesPerDim) const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Class that holds all level-specific information.
bool isSublist(const std::string &name) const
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
basic_FancyOStream & setShowAllFrontMatter(const bool showAllFrontMatter)
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
RCP< const ParameterList > GetValidParameterList() const
Input.
ParameterList & sublist(const std::string &name, bool mustAlreadyExist=false, const std::string &docString="")
int GetLevelID() const
Return level number.
std::string toString(const T &t)