46 #ifndef MUELU_REGIONRFACTORY_KOKKOS_DEF_HPP
47 #define MUELU_REGIONRFACTORY_KOKKOS_DEF_HPP
49 #include "Kokkos_UnorderedMap.hpp"
60 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
66 "Generating factory of the matrix A");
68 "Number of spatial dimensions in the problem.");
70 "Number of local nodes per spatial dimension on the fine grid.");
72 "Fine level nullspace used to construct the coarse level nullspace.");
74 "Fine level coordinates used to construct piece-wise linear prolongator and coarse level coordinates.");
75 validParamList->
set<
bool>(
"keep coarse coords",
false,
"Flag to keep coordinates for special coarse grid solve");
77 return validParamList;
80 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
83 Input(fineLevel,
"A");
84 Input(fineLevel,
"numDimensions");
85 Input(fineLevel,
"lNodesPerDim");
86 Input(fineLevel,
"Nullspace");
87 Input(fineLevel,
"Coordinates");
91 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
96 if (
const char* dbg = std::getenv(
"MUELU_REGIONRFACTORY_DEBUG")) {
97 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
103 *out <<
"Starting RegionRFactory_kokkos::Build." << std::endl;
106 const int numDimensions = Get<int>(fineLevel,
"numDimensions");
109 Array<LO> lNodesPerDim = Get<Array<LO> >(fineLevel,
"lNodesPerDim");
110 for (
int dim = 0; dim < numDimensions; ++dim) {
111 lFineNodesPerDim[dim] = lNodesPerDim[dim];
114 *out <<
"numDimensions " << numDimensions <<
" and lFineNodesPerDim: " << lFineNodesPerDim
118 if (numDimensions < 1 || numDimensions > 3) {
119 throw std::runtime_error(
"numDimensions must be 1, 2 or 3!");
121 for (
int dim = 0; dim < numDimensions; ++dim) {
122 if (lFineNodesPerDim[dim] % 3 != 1) {
123 throw std::runtime_error(
"The number of fine node in each direction need to be 3n+1");
128 const RCP<Matrix> A = Get<RCP<Matrix> >(fineLevel,
"A");
131 fineCoordinates = Get<RCP<realvaluedmultivector_type> >(fineLevel,
"Coordinates");
132 if (static_cast<int>(fineCoordinates->getNumVectors()) != numDimensions) {
133 throw std::runtime_error(
"The number of vectors in the coordinates is not equal to numDimensions!");
141 if (numDimensions == 1) {
142 throw std::runtime_error(
"RegionRFactory_kokkos no implemented for 1D case yet.");
143 }
else if (numDimensions == 2) {
144 throw std::runtime_error(
"RegionRFactory_kokkos no implemented for 2D case yet.");
145 }
else if (numDimensions == 3) {
146 Build3D(numDimensions, lFineNodesPerDim, A, fineCoordinates,
147 R, coarseCoordinates, lCoarseNodesPerDim);
154 if (pL.
isSublist(
"matrixmatrix: kernel params"))
160 *out <<
"Compute P=R^t" << std::endl;
162 Tparams->
set(
"compute global constants: temporaries", Tparams->
get(
"compute global constants: temporaries",
false));
163 Tparams->
set(
"compute global constants", Tparams->
get(
"compute global constants",
false));
167 *out <<
"Compute coarse nullspace" << std::endl;
168 RCP<MultiVector> fineNullspace = Get<RCP<MultiVector> >(fineLevel,
"Nullspace");
169 RCP<MultiVector> coarseNullspace = MultiVectorFactory::Build(R->getRowMap(),
170 fineNullspace->getNumVectors());
174 *out <<
"Set data on coarse level" << std::endl;
175 Set(coarseLevel,
"numDimensions", numDimensions);
176 Set(coarseLevel,
"lNodesPerDim", lCoarseNodesPerDim);
177 Set(coarseLevel,
"Nullspace", coarseNullspace);
178 Set(coarseLevel,
"Coordinates", coarseCoordinates);
179 if (pL.
get<
bool>(
"keep coarse coords")) {
183 R->SetFixedBlockSize(A->GetFixedBlockSize());
184 P->SetFixedBlockSize(A->GetFixedBlockSize());
186 Set(coarseLevel,
"R", R);
187 Set(coarseLevel,
"P", P);
191 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
200 using local_matrix_type =
typename CrsMatrix::local_matrix_type;
201 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
202 using row_map_type =
typename local_matrix_type::row_map_type::non_const_type;
203 using entries_type =
typename local_matrix_type::index_type::non_const_type;
204 using values_type =
typename local_matrix_type::values_type::non_const_type;
205 using impl_scalar_type =
typename Kokkos::ArithTraits<Scalar>::val_type;
209 if (
const char* dbg = std::getenv(
"MUELU_REGIONRFACTORY_DEBUG")) {
210 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
217 for (
int dim = 0; dim < numDimensions; ++dim) {
218 lCoarseNodesPerDim[dim] = lFineNodesPerDim[dim] / 3 + 1;
220 *out <<
"lCoarseNodesPerDim " << lCoarseNodesPerDim << std::endl;
223 const LO blkSize = A->GetFixedBlockSize();
224 *out <<
"blkSize " << blkSize << std::endl;
228 const LO numRows = blkSize * lCoarseNodesPerDim[0] * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[2];
229 const LO numCols = blkSize * lFineNodesPerDim[0] * lFineNodesPerDim[1] * lFineNodesPerDim[2];
234 RCP<Map> rowMap = MapFactory::Build(A->getRowMap()->lib(),
237 A->getRowMap()->getIndexBase(),
238 A->getRowMap()->getComm());
240 RCP<Map> coordRowMap = MapFactory::Build(A->getRowMap()->lib(),
242 lCoarseNodesPerDim[0] * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[2],
243 A->getRowMap()->getIndexBase(),
244 A->getRowMap()->getComm());
250 auto fineCoordsView = fineCoordinates->getDeviceLocalView(Xpetra::Access::ReadOnly);
251 auto coarseCoordsView = coarseCoordinates->getDeviceLocalView(Xpetra::Access::OverwriteAll);
255 for (
int dim = 0; dim < numDimensions; ++dim) {
256 fineCoordData[dim] = fineCoordinates->getData(dim);
257 coarseCoordData[dim] = coarseCoordinates->getDataNonConst(dim);
264 const LO cornerStencilLength = 27;
265 const LO edgeStencilLength = 45;
266 const LO faceStencilLength = 75;
267 const LO interiorStencilLength = 125;
270 const LO numCorners = 8;
271 const LO numEdges = 4 * (lCoarseNodesPerDim[0] - 2) + 4 * (lCoarseNodesPerDim[1] - 2) + 4 * (lCoarseNodesPerDim[2] - 2);
272 const LO numFaces = 2 * (lCoarseNodesPerDim[0] - 2) * (lCoarseNodesPerDim[1] - 2) + 2 * (lCoarseNodesPerDim[0] - 2) * (lCoarseNodesPerDim[2] - 2) + 2 * (lCoarseNodesPerDim[1] - 2) * (lCoarseNodesPerDim[2] - 2);
273 const LO numInteriors = (lCoarseNodesPerDim[0] - 2) * (lCoarseNodesPerDim[1] - 2) * (lCoarseNodesPerDim[2] - 2);
275 const LO nnz = (numCorners * cornerStencilLength + numEdges * edgeStencilLength + numFaces * faceStencilLength + numInteriors * interiorStencilLength) * blkSize;
280 *out <<
"R statistics:" << std::endl
281 <<
" -numRows= " << numRows << std::endl
282 <<
" -numCols= " << numCols << std::endl
283 <<
" -nnz= " << nnz << std::endl;
285 row_map_type row_map(Kokkos::ViewAllocateWithoutInitializing(
"row_map"), numRows + 1);
286 typename row_map_type::HostMirror row_map_h = Kokkos::create_mirror_view(row_map);
288 entries_type entries(Kokkos::ViewAllocateWithoutInitializing(
"entries"), nnz);
289 typename entries_type::HostMirror entries_h = Kokkos::create_mirror_view(entries);
291 values_type values(Kokkos::ViewAllocateWithoutInitializing(
"values"), nnz);
292 typename values_type::HostMirror values_h = Kokkos::create_mirror_view(values);
297 Array<SC> coeffs({1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0});
302 const LO edgeLineOffset = 2 * cornerStencilLength + (lCoarseNodesPerDim[0] - 2) * edgeStencilLength;
303 const LO faceLineOffset = 2 * edgeStencilLength + (lCoarseNodesPerDim[0] - 2) * faceStencilLength;
304 const LO interiorLineOffset = 2 * faceStencilLength + (lCoarseNodesPerDim[0] - 2) * interiorStencilLength;
306 const LO facePlaneOffset = 2 * edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset;
307 const LO interiorPlaneOffset = 2 * faceLineOffset + (lCoarseNodesPerDim[1] - 2) * interiorLineOffset;
314 LO coordRowIdx = 0, rowIdx = 0, coordColumnOffset = 0, columnOffset = 0, entryOffset = 0;
315 for (
LO l = 0; l < blkSize; ++l) {
316 for (
LO k = 0; k < 3; ++k) {
317 for (
LO j = 0; j < 3; ++j) {
318 for (
LO i = 0; i < 3; ++i) {
319 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i) * blkSize + l;
320 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j + 2] * coeffs[i + 2];
325 for (
LO l = 0; l < blkSize; ++l) {
326 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
328 for (
int dim = 0; dim < numDimensions; ++dim) {
329 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
333 coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
334 rowIdx = coordRowIdx * blkSize;
335 coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
336 columnOffset = coordColumnOffset * blkSize;
337 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
338 for (
LO l = 0; l < blkSize; ++l) {
339 for (
LO k = 0; k < 3; ++k) {
340 for (
LO j = 0; j < 3; ++j) {
341 for (
LO i = 0; i < 3; ++i) {
342 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i) * blkSize + l;
343 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i + 2];
348 for (
LO l = 0; l < blkSize; ++l) {
349 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
351 for (
int dim = 0; dim < numDimensions; ++dim) {
352 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
356 coordRowIdx = (lCoarseNodesPerDim[0] - 1);
357 rowIdx = coordRowIdx * blkSize;
358 coordColumnOffset = (lFineNodesPerDim[0] - 1);
359 columnOffset = coordColumnOffset * blkSize;
360 entryOffset = (cornerStencilLength + (lCoarseNodesPerDim[0] - 2) * edgeStencilLength) * blkSize;
361 for (
LO l = 0; l < blkSize; ++l) {
362 for (
LO k = 0; k < 3; ++k) {
363 for (
LO j = 0; j < 3; ++j) {
364 for (
LO i = 0; i < 3; ++i) {
365 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + (i - 2)) * blkSize + l;
366 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j + 2] * coeffs[i];
371 for (
LO l = 0; l < blkSize; ++l) {
372 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
374 for (
int dim = 0; dim < numDimensions; ++dim) {
375 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
379 coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
380 rowIdx = coordRowIdx * blkSize;
381 coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
382 columnOffset = coordColumnOffset * blkSize;
383 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
384 for (
LO l = 0; l < blkSize; ++l) {
385 for (
LO k = 0; k < 3; ++k) {
386 for (
LO j = 0; j < 3; ++j) {
387 for (
LO i = 0; i < 3; ++i) {
388 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
389 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i];
394 for (
LO l = 0; l < blkSize; ++l) {
395 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
397 for (
int dim = 0; dim < numDimensions; ++dim) {
398 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
402 coordRowIdx = (lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0];
403 rowIdx = coordRowIdx * blkSize;
404 coordColumnOffset = (lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0];
405 columnOffset = coordColumnOffset * blkSize;
406 entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset) * blkSize;
407 for (
LO l = 0; l < blkSize; ++l) {
408 for (
LO k = 0; k < 3; ++k) {
409 for (
LO j = 0; j < 3; ++j) {
410 for (
LO i = 0; i < 3; ++i) {
411 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
412 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i + 2];
417 for (
LO l = 0; l < blkSize; ++l) {
418 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
420 for (
int dim = 0; dim < numDimensions; ++dim) {
421 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
425 coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
426 rowIdx = coordRowIdx * blkSize;
427 coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
428 columnOffset = coordColumnOffset * blkSize;
429 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
430 for (
LO l = 0; l < blkSize; ++l) {
431 for (
LO k = 0; k < 3; ++k) {
432 for (
LO j = 0; j < 3; ++j) {
433 for (
LO i = 0; i < 3; ++i) {
434 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
435 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i + 2];
440 for (
LO l = 0; l < blkSize; ++l) {
441 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
443 for (
int dim = 0; dim < numDimensions; ++dim) {
444 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
448 coordRowIdx = (lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] - 1);
449 rowIdx = coordRowIdx * blkSize;
450 coordColumnOffset = (lFineNodesPerDim[1] * lFineNodesPerDim[0] - 1);
451 columnOffset = coordColumnOffset * blkSize;
452 entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset +
453 cornerStencilLength + (lCoarseNodesPerDim[0] - 2) * edgeStencilLength) *
455 for (
LO l = 0; l < blkSize; ++l) {
456 for (
LO k = 0; k < 3; ++k) {
457 for (
LO j = 0; j < 3; ++j) {
458 for (
LO i = 0; i < 3; ++i) {
459 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + (i - 2)) * blkSize + l;
460 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i];
465 for (
LO l = 0; l < blkSize; ++l) {
466 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
468 for (
int dim = 0; dim < numDimensions; ++dim) {
469 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
473 coordRowIdx += (lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
474 rowIdx = coordRowIdx * blkSize;
475 coordColumnOffset += (lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0];
476 columnOffset = coordColumnOffset * blkSize;
477 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset) * blkSize;
478 for (
LO l = 0; l < blkSize; ++l) {
479 for (
LO k = 0; k < 3; ++k) {
480 for (
LO j = 0; j < 3; ++j) {
481 for (
LO i = 0; i < 3; ++i) {
482 entries_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + (i - 2)) * blkSize + l;
483 values_h(entryOffset + k * 9 + j * 3 + i + cornerStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
488 for (
LO l = 0; l < blkSize; ++l) {
489 row_map_h(rowIdx + 1 + l) = entryOffset + cornerStencilLength * (l + 1);
491 for (
int dim = 0; dim < numDimensions; ++dim) {
492 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
497 if (lCoarseNodesPerDim[0] - 2 > 0) {
498 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
499 for (
LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[0] - 2; ++edgeIdx) {
501 coordRowIdx = (edgeIdx + 1);
502 rowIdx = coordRowIdx * blkSize;
503 coordColumnOffset = (edgeIdx + 1) * 3;
504 columnOffset = coordColumnOffset * blkSize;
505 entryOffset = (cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
506 for (
LO l = 0; l < blkSize; ++l) {
507 for (
LO k = 0; k < 3; ++k) {
508 for (
LO j = 0; j < 3; ++j) {
509 for (
LO i = 0; i < 5; ++i) {
510 entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
511 values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j + 2] * coeffs[i];
516 for (
LO l = 0; l < blkSize; ++l) {
517 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
519 for (
int dim = 0; dim < numDimensions; ++dim) {
520 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
524 coordRowIdx = ((lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0] + edgeIdx + 1);
525 rowIdx = coordRowIdx * blkSize;
526 coordColumnOffset = ((lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0] + (edgeIdx + 1) * 3);
527 columnOffset = coordColumnOffset * blkSize;
528 entryOffset = (edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset + cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
529 for (
LO l = 0; l < blkSize; ++l) {
530 for (
LO k = 0; k < 3; ++k) {
531 for (
LO j = 0; j < 3; ++j) {
532 for (
LO i = 0; i < 5; ++i) {
533 entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
534 values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i];
539 for (
LO l = 0; l < blkSize; ++l) {
540 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
542 for (
int dim = 0; dim < numDimensions; ++dim) {
543 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
547 coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + edgeIdx + 1);
548 rowIdx = coordRowIdx * blkSize;
549 coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (edgeIdx + 1) * 3);
550 columnOffset = coordColumnOffset * blkSize;
551 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
552 for (
LO l = 0; l < blkSize; ++l) {
553 for (
LO k = 0; k < 3; ++k) {
554 for (
LO j = 0; j < 3; ++j) {
555 for (
LO i = 0; i < 5; ++i) {
556 entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
557 values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i];
562 for (
LO l = 0; l < blkSize; ++l) {
563 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
565 for (
int dim = 0; dim < numDimensions; ++dim) {
566 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
570 coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0] + edgeIdx + 1);
571 rowIdx = coordRowIdx * blkSize;
572 coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0] + (edgeIdx + 1) * 3);
573 columnOffset = coordColumnOffset * blkSize;
574 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + edgeLineOffset + (lCoarseNodesPerDim[1] - 2) * faceLineOffset + cornerStencilLength + edgeIdx * edgeStencilLength) * blkSize;
575 for (
LO l = 0; l < blkSize; ++l) {
576 for (
LO k = 0; k < 3; ++k) {
577 for (
LO j = 0; j < 3; ++j) {
578 for (
LO i = 0; i < 5; ++i) {
579 entries_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
580 values_h(entryOffset + k * 15 + j * 5 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
585 for (
LO l = 0; l < blkSize; ++l) {
586 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
588 for (
int dim = 0; dim < numDimensions; ++dim) {
589 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
595 if (lCoarseNodesPerDim[1] - 2 > 0) {
596 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
597 for (
LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[1] - 2; ++edgeIdx) {
599 coordRowIdx = (edgeIdx + 1) * lCoarseNodesPerDim[0];
600 rowIdx = coordRowIdx * blkSize;
601 coordColumnOffset = (edgeIdx + 1) * 3 * lFineNodesPerDim[0];
602 columnOffset = coordColumnOffset * blkSize;
603 entryOffset = (edgeLineOffset + edgeIdx * faceLineOffset) * blkSize;
604 for (
LO l = 0; l < blkSize; ++l) {
605 for (
LO k = 0; k < 3; ++k) {
606 for (
LO j = 0; j < 5; ++j) {
607 for (
LO i = 0; i < 3; ++i) {
608 entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
609 values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i + 2];
614 for (
LO l = 0; l < blkSize; ++l) {
615 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
617 for (
int dim = 0; dim < numDimensions; ++dim) {
618 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
622 coordRowIdx = ((edgeIdx + 1) * lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
623 rowIdx = coordRowIdx * blkSize;
624 coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
625 columnOffset = coordColumnOffset * blkSize;
626 entryOffset = (edgeLineOffset + edgeIdx * faceLineOffset + edgeStencilLength + (lCoarseNodesPerDim[0] - 2) * faceStencilLength) * blkSize;
627 for (
LO l = 0; l < blkSize; ++l) {
628 for (
LO k = 0; k < 3; ++k) {
629 for (
LO j = 0; j < 5; ++j) {
630 for (
LO i = 0; i < 3; ++i) {
631 entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + (k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
632 values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k + 2] * coeffs[j] * coeffs[i];
637 for (
LO l = 0; l < blkSize; ++l) {
638 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
640 for (
int dim = 0; dim < numDimensions; ++dim) {
641 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
645 coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (edgeIdx + 1) * lCoarseNodesPerDim[0]);
646 rowIdx = coordRowIdx * blkSize;
647 coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (edgeIdx + 1) * 3 * lFineNodesPerDim[0]);
648 columnOffset = coordColumnOffset * blkSize;
649 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + edgeLineOffset + edgeIdx * faceLineOffset) * blkSize;
650 for (
LO l = 0; l < blkSize; ++l) {
651 for (
LO k = 0; k < 3; ++k) {
652 for (
LO j = 0; j < 5; ++j) {
653 for (
LO i = 0; i < 3; ++i) {
654 entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
655 values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i + 2];
660 for (
LO l = 0; l < blkSize; ++l) {
661 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
663 for (
int dim = 0; dim < numDimensions; ++dim) {
664 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
668 coordRowIdx = ((lCoarseNodesPerDim[2] - 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (edgeIdx + 1) * lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
669 rowIdx = coordRowIdx * blkSize;
670 coordColumnOffset = ((lFineNodesPerDim[2] - 1) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (edgeIdx + 1) * 3 * lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
671 columnOffset = coordColumnOffset * blkSize;
672 entryOffset = (facePlaneOffset + (lCoarseNodesPerDim[2] - 2) * interiorPlaneOffset + edgeLineOffset + edgeIdx * faceLineOffset + edgeStencilLength + (lCoarseNodesPerDim[0] - 2) * faceStencilLength) * blkSize;
673 for (
LO l = 0; l < blkSize; ++l) {
674 for (
LO k = 0; k < 3; ++k) {
675 for (
LO j = 0; j < 5; ++j) {
676 for (
LO i = 0; i < 3; ++i) {
677 entries_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
678 values_h(entryOffset + k * 15 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
683 for (
LO l = 0; l < blkSize; ++l) {
684 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
686 for (
int dim = 0; dim < numDimensions; ++dim) {
687 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
693 if (lCoarseNodesPerDim[2] - 2 > 0) {
694 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
695 for (
LO edgeIdx = 0; edgeIdx < lCoarseNodesPerDim[2] - 2; ++edgeIdx) {
697 coordRowIdx = (edgeIdx + 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0];
698 rowIdx = coordRowIdx * blkSize;
699 coordColumnOffset = (edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0];
700 columnOffset = coordColumnOffset * blkSize;
701 entryOffset = (facePlaneOffset + edgeIdx * interiorPlaneOffset) * blkSize;
702 for (
LO l = 0; l < blkSize; ++l) {
703 for (
LO k = 0; k < 5; ++k) {
704 for (
LO j = 0; j < 3; ++j) {
705 for (
LO i = 0; i < 3; ++i) {
706 entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i) * blkSize + l;
707 values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i + 2];
712 for (
LO l = 0; l < blkSize; ++l) {
713 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
715 for (
int dim = 0; dim < numDimensions; ++dim) {
716 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
720 coordRowIdx = ((edgeIdx + 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + lCoarseNodesPerDim[0] - 1);
721 rowIdx = coordRowIdx * blkSize;
722 coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0] + lFineNodesPerDim[0] - 1);
723 columnOffset = coordColumnOffset * blkSize;
724 entryOffset = (facePlaneOffset + faceLineOffset - edgeStencilLength + edgeIdx * interiorPlaneOffset) * blkSize;
725 for (
LO l = 0; l < blkSize; ++l) {
726 for (
LO k = 0; k < 5; ++k) {
727 for (
LO j = 0; j < 3; ++j) {
728 for (
LO i = 0; i < 3; ++i) {
729 entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i - 2) * blkSize + l;
730 values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j + 2] * coeffs[i];
735 for (
LO l = 0; l < blkSize; ++l) {
736 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
738 for (
int dim = 0; dim < numDimensions; ++dim) {
739 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
743 coordRowIdx = ((edgeIdx + 1) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] + (lCoarseNodesPerDim[1] - 1) * lCoarseNodesPerDim[0]);
744 rowIdx = coordRowIdx * blkSize;
745 coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (lFineNodesPerDim[1] - 1) * lFineNodesPerDim[0]);
746 columnOffset = coordColumnOffset * blkSize;
747 entryOffset = (facePlaneOffset + edgeIdx * interiorPlaneOffset + faceLineOffset + (lCoarseNodesPerDim[1] - 2) * interiorLineOffset) * blkSize;
748 for (
LO l = 0; l < blkSize; ++l) {
749 for (
LO k = 0; k < 5; ++k) {
750 for (
LO j = 0; j < 3; ++j) {
751 for (
LO i = 0; i < 3; ++i) {
752 entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i) * blkSize + l;
753 values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i + 2];
758 for (
LO l = 0; l < blkSize; ++l) {
759 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
761 for (
int dim = 0; dim < numDimensions; ++dim) {
762 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
766 coordRowIdx = ((edgeIdx + 2) * lCoarseNodesPerDim[1] * lCoarseNodesPerDim[0] - 1);
767 rowIdx = coordRowIdx * blkSize;
768 coordColumnOffset = ((edgeIdx + 1) * 3 * lFineNodesPerDim[1] * lFineNodesPerDim[0] + lFineNodesPerDim[1] * lFineNodesPerDim[0] - 1);
769 columnOffset = coordColumnOffset * blkSize;
770 entryOffset = (facePlaneOffset + (edgeIdx + 1) * interiorPlaneOffset - edgeStencilLength) * blkSize;
771 for (
LO l = 0; l < blkSize; ++l) {
772 for (
LO k = 0; k < 5; ++k) {
773 for (
LO j = 0; j < 3; ++j) {
774 for (
LO i = 0; i < 3; ++i) {
775 entries_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim[1] * lFineNodesPerDim[0] + (j - 2) * lFineNodesPerDim[0] + i - 2) * blkSize + l;
776 values_h(entryOffset + k * 9 + j * 3 + i + edgeStencilLength * l) = coeffs[k] * coeffs[j] * coeffs[i];
781 for (
LO l = 0; l < blkSize; ++l) {
782 row_map_h(rowIdx + 1 + l) = entryOffset + edgeStencilLength * (l + 1);
784 for (
int dim = 0; dim < numDimensions; ++dim) {
785 coarseCoordData[dim][coordRowIdx] = fineCoordData[dim][coordColumnOffset];
791 Kokkos::deep_copy(row_map, row_map_h);
792 Kokkos::deep_copy(entries, entries_h);
793 Kokkos::deep_copy(values, values_h);
796 LOTupleView lFineNodesPerDim_d(
"lFineNodesPerDim");
797 LOTupleView lCoarseNodesPerDim_d(
"lCoarseNodesPerDim");
799 typename Kokkos::View<LO[3], device_type>::HostMirror lCoarseNodesPerDim_h = Kokkos::create_mirror_view(lCoarseNodesPerDim_d);
800 typename Kokkos::View<LO[3], device_type>::HostMirror lFineNodesPerDim_h = Kokkos::create_mirror_view(lFineNodesPerDim_d);
802 for (
int dim = 0; dim < numDimensions; ++dim) {
803 lCoarseNodesPerDim_h(dim) = lCoarseNodesPerDim[dim];
804 lFineNodesPerDim_h(dim) = lFineNodesPerDim[dim];
807 Kokkos::deep_copy(lCoarseNodesPerDim_d, lCoarseNodesPerDim_h);
808 Kokkos::deep_copy(lFineNodesPerDim_d, lFineNodesPerDim_h);
811 if ((lCoarseNodesPerDim[0] - 2 > 0) && (lCoarseNodesPerDim[1] - 2 > 0)) {
812 Kokkos::parallel_for(
813 "Faces in 0-1 plane region R",
814 Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[1] - 2) * (lCoarseNodesPerDim[0] - 2)),
815 KOKKOS_LAMBDA(
const LO faceIdx) {
816 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
817 LO gridIdx[3] = {0, 0, 0};
818 impl_scalar_type coeffs_d[5] = {1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0};
822 for (
LO i = 0; i < faceIdx; i++) {
824 if (gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
831 coordRowIdx = ((gridIdx[1] + 1) * lCoarseNodesPerDim_d(0) + gridIdx[0] + 1);
832 rowIdx = coordRowIdx * blkSize;
833 coordColumnOffset = 3 * ((gridIdx[1] + 1) * lFineNodesPerDim_d(0) + gridIdx[0] + 1);
834 columnOffset = coordColumnOffset * blkSize;
835 entryOffset = (edgeLineOffset + edgeStencilLength + gridIdx[1] * faceLineOffset + gridIdx[0] * faceStencilLength) * blkSize;
836 for (
LO l = 0; l < blkSize; ++l) {
837 for (
LO k = 0; k < 3; ++k) {
838 for (
LO j = 0; j < 5; ++j) {
839 for (
LO i = 0; i < 5; ++i) {
840 entries(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = columnOffset + (k * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
841 values(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = coeffs_d[k + 2] * coeffs_d[j] * coeffs_d[i];
846 for (
LO l = 0; l < blkSize; ++l) {
847 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
849 for (
int dim = 0; dim < numDimensions; ++dim) {
850 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
854 coordRowIdx += (lCoarseNodesPerDim_d(2) - 1) * lCoarseNodesPerDim_d(1) * lCoarseNodesPerDim_d(0);
855 rowIdx = coordRowIdx * blkSize;
856 coordColumnOffset += (lFineNodesPerDim_d(2) - 1) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0);
857 columnOffset = coordColumnOffset * blkSize;
858 entryOffset += (facePlaneOffset + (lCoarseNodesPerDim_d(2) - 2) * interiorPlaneOffset) * blkSize;
859 for (
LO l = 0; l < blkSize; ++l) {
860 for (
LO k = 0; k < 3; ++k) {
861 for (
LO j = 0; j < 5; ++j) {
862 for (
LO i = 0; i < 5; ++i) {
863 entries(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
864 values(entryOffset + k * 25 + j * 5 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i];
869 for (
LO l = 0; l < blkSize; ++l) {
870 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
872 for (
int dim = 0; dim < numDimensions; ++dim) {
873 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
879 if ((lCoarseNodesPerDim[0] - 2 > 0) && (lCoarseNodesPerDim[2] - 2 > 0)) {
880 Kokkos::parallel_for(
881 "Faces in 0-2 plane region R",
882 Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[2] - 2) * (lCoarseNodesPerDim[0] - 2)),
883 KOKKOS_LAMBDA(
const LO faceIdx) {
884 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
885 LO gridIdx[3] = {0, 0, 0};
886 impl_scalar_type coeffs_d[5] = {1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0};
890 for (
LO i = 0; i < faceIdx; i++) {
892 if (gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
899 coordRowIdx = ((gridIdx[2] + 1) * lCoarseNodesPerDim_d(1) * lCoarseNodesPerDim_d(0) + (gridIdx[0] + 1));
900 rowIdx = coordRowIdx * blkSize;
901 coordColumnOffset = ((gridIdx[2] + 1) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + gridIdx[0] + 1) * 3;
902 columnOffset = coordColumnOffset * blkSize;
903 entryOffset = (facePlaneOffset + gridIdx[2] * interiorPlaneOffset + edgeStencilLength + gridIdx[0] * faceStencilLength) * blkSize;
904 for (
LO l = 0; l < blkSize; ++l) {
905 for (
LO k = 0; k < 5; ++k) {
906 for (
LO j = 0; j < 3; ++j) {
907 for (
LO i = 0; i < 5; ++i) {
908 entries(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + j * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
909 values(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j + 2] * coeffs_d[i];
914 for (
LO l = 0; l < blkSize; ++l) {
915 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
917 for (
int dim = 0; dim < numDimensions; ++dim) {
918 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
922 coordRowIdx += (lCoarseNodesPerDim_d(1) - 1) * lCoarseNodesPerDim_d(0);
923 rowIdx = coordRowIdx * blkSize;
924 coordColumnOffset += (lFineNodesPerDim_d(1) - 1) * lFineNodesPerDim_d(0);
925 columnOffset = coordColumnOffset * blkSize;
926 entryOffset += (faceLineOffset + (lCoarseNodesPerDim_d(1) - 2) * interiorLineOffset) * blkSize;
927 for (
LO l = 0; l < blkSize; ++l) {
928 for (
LO k = 0; k < 5; ++k) {
929 for (
LO j = 0; j < 3; ++j) {
930 for (
LO i = 0; i < 5; ++i) {
931 entries(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
932 values(entryOffset + k * 15 + j * 5 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i];
937 for (
LO l = 0; l < blkSize; ++l) {
938 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
940 for (
int dim = 0; dim < numDimensions; ++dim) {
941 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
947 if ((lCoarseNodesPerDim[1] - 2 > 0) && (lCoarseNodesPerDim[2] - 2 > 0)) {
948 Kokkos::parallel_for(
949 "Faces in 1-2 plane region R",
950 Kokkos::RangePolicy<typename device_type::execution_space>(0, (lCoarseNodesPerDim[2] - 2) * (lCoarseNodesPerDim[1] - 2)),
951 KOKKOS_LAMBDA(
const LO faceIdx) {
952 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
953 LO gridIdx[3] = {0, 0, 0};
954 impl_scalar_type coeffs_d[5] = {1.0 / 3.0, 2.0 / 3.0, 1.0, 2.0 / 3.0, 1.0 / 3.0};
958 for (
LO i = 0; i < faceIdx; i++) {
960 if (gridIdx[1] == lCoarseNodesPerDim_d(1) - 2) {
967 coordRowIdx = ((gridIdx[2] + 1) * lCoarseNodesPerDim_d(1) * lCoarseNodesPerDim_d(0) + (gridIdx[1] + 1) * lCoarseNodesPerDim_d(0));
968 rowIdx = coordRowIdx * blkSize;
969 coordColumnOffset = ((gridIdx[2] + 1) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (gridIdx[1] + 1) * lFineNodesPerDim_d(0)) * 3;
970 columnOffset = coordColumnOffset * blkSize;
971 entryOffset = (facePlaneOffset + gridIdx[2] * interiorPlaneOffset + faceLineOffset + gridIdx[1] * interiorLineOffset) * blkSize;
972 for (
LO l = 0; l < blkSize; ++l) {
973 for (
LO k = 0; k < 5; ++k) {
974 for (
LO j = 0; j < 5; ++j) {
975 for (
LO i = 0; i < 3; ++i) {
976 entries(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i) * blkSize + l;
977 values(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i + 2];
982 for (
LO l = 0; l < blkSize; ++l) {
983 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
985 for (
int dim = 0; dim < numDimensions; ++dim) {
986 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
990 coordRowIdx += (lCoarseNodesPerDim_d(0) - 1);
991 rowIdx = coordRowIdx * blkSize;
992 coordColumnOffset += (lFineNodesPerDim_d(0) - 1);
993 columnOffset = coordColumnOffset * blkSize;
994 entryOffset += (faceStencilLength + (lCoarseNodesPerDim_d(0) - 2) * interiorStencilLength) * blkSize;
995 for (
LO l = 0; l < blkSize; ++l) {
996 for (
LO k = 0; k < 5; ++k) {
997 for (
LO j = 0; j < 5; ++j) {
998 for (
LO i = 0; i < 3; ++i) {
999 entries(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = columnOffset + ((k - 2) * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (j - 2) * lFineNodesPerDim_d(0) + i - 2) * blkSize + l;
1000 values(entryOffset + k * 15 + j * 3 + i + faceStencilLength * l) = coeffs_d[k] * coeffs_d[j] * coeffs_d[i];
1005 for (
LO l = 0; l < blkSize; ++l) {
1006 row_map(rowIdx + 1 + l) = entryOffset + faceStencilLength * (l + 1);
1008 for (
int dim = 0; dim < numDimensions; ++dim) {
1009 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
1014 if (numInteriors > 0) {
1019 LO countRowEntries = 0;
1020 Kokkos::View<LO[125]> coordColumnOffsets_d(
"coordColOffset");
1021 auto coordColumnOffsets_h = Kokkos::create_mirror_view(coordColumnOffsets_d);
1023 for (
LO k = -2; k < 3; ++k) {
1024 for (
LO j = -2; j < 3; ++j) {
1025 for (
LO i = -2; i < 3; ++i) {
1026 coordColumnOffsets_h(countRowEntries) = k * lFineNodesPerDim[1] * lFineNodesPerDim[0] + j * lFineNodesPerDim[0] + i;
1031 Kokkos::deep_copy(coordColumnOffsets_d, coordColumnOffsets_h);
1034 Kokkos::View<impl_scalar_type*> interiorValues_d(
"interiorValues", 125);
1035 auto interiorValues_h = Kokkos::create_mirror_view(interiorValues_d);
1036 for (
LO k = 0; k < 5; ++k) {
1037 for (
LO j = 0; j < 5; ++j) {
1038 for (
LO i = 0; i < 5; ++i) {
1039 interiorValues_h(countValues) = coeffs[k] * coeffs[j] * coeffs[i];
1044 Kokkos::deep_copy(interiorValues_d, interiorValues_h);
1046 Kokkos::parallel_for(
1047 "interior idx region R", Kokkos::RangePolicy<typename device_type::execution_space>(0, numInteriors),
1048 KOKKOS_LAMBDA(
const LO interiorIdx) {
1049 LO coordRowIdx, rowIdx, coordColumnOffset, columnOffset, entryOffset;
1057 for (
LO i = 0; i < interiorIdx; i++) {
1059 if (gridIdx[0] == lCoarseNodesPerDim_d(0) - 2) {
1062 if (gridIdx[1] == lCoarseNodesPerDim_d(1) - 2) {
1069 coordRowIdx = ((gridIdx[2] + 1) * lCoarseNodesPerDim_d(0) * lCoarseNodesPerDim_d(1) + (gridIdx[1] + 1) * lCoarseNodesPerDim_d(0) + gridIdx[0] + 1);
1070 rowIdx = coordRowIdx * blkSize;
1071 coordColumnOffset = ((gridIdx[2] + 1) * 3 * lFineNodesPerDim_d(1) * lFineNodesPerDim_d(0) + (gridIdx[1] + 1) * 3 * lFineNodesPerDim_d(0) + (gridIdx[0] + 1) * 3);
1072 columnOffset = coordColumnOffset * blkSize;
1073 entryOffset = (facePlaneOffset + faceLineOffset + faceStencilLength + gridIdx[2] * interiorPlaneOffset + gridIdx[1] * interiorLineOffset + gridIdx[0] * interiorStencilLength) * blkSize;
1074 for (
LO l = 0; l < blkSize; ++l) {
1075 row_map(rowIdx + 1 + l) = entryOffset + interiorStencilLength * (l + 1);
1080 for (
LO l = 0; l < blkSize; ++l) {
1081 for (
LO entryIdx = 0; entryIdx < interiorStencilLength; ++entryIdx) {
1082 entries(entryOffset + entryIdx + interiorStencilLength * l) = columnOffset + coordColumnOffsets_d(entryIdx) * blkSize + l;
1083 values(entryOffset + entryIdx + interiorStencilLength * l) = interiorValues_d(entryIdx);
1086 for (
int dim = 0; dim < numDimensions; ++dim) {
1087 coarseCoordsView(coordRowIdx, dim) = fineCoordsView(coordColumnOffset, dim);
1093 local_graph_type localGraph(entries, row_map);
1094 local_matrix_type localR(
"R", numCols, values, localGraph);
1096 R = MatrixFactory::Build(localR,
1107 #define MUELU_REGIONRFACTORY_KOKKOS_SHORT
1108 #endif // MUELU_REGIONRFACTORY_DEF_HPP
typename Kokkos::View< LO[3], device_type > LOTupleView
MueLu::DefaultLocalOrdinal LocalOrdinal
static Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Build(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node >> &map, size_t NumVectors, bool zeroOut=true)
basic_FancyOStream & setShowProcRank(const bool showProcRank)
T & get(const std::string &name, T def_value)
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
static const NoFactory * get()
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
void Build3D(const int numDimensions, Array< LO > &lFineNodesPerDim, const RCP< Matrix > &A, const RCP< realvaluedmultivector_type > &fineCoordinates, RCP< Matrix > &R, RCP< realvaluedmultivector_type > &coarseCoordinates, Array< LO > &lCoarseNodesPerDim) const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Class that holds all level-specific information.
bool isSublist(const std::string &name) const
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
basic_FancyOStream & setShowAllFrontMatter(const bool showAllFrontMatter)
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
RCP< const ParameterList > GetValidParameterList() const
Input.
ParameterList & sublist(const std::string &name, bool mustAlreadyExist=false, const std::string &docString="")
int GetLevelID() const
Return level number.
std::string toString(const T &t)