10 #ifndef TPETRA_BLOCKCRSMATRIX_HELPERS_DEF_HPP
11 #define TPETRA_BLOCKCRSMATRIX_HELPERS_DEF_HPP
15 #include "Tpetra_BlockCrsMatrix.hpp"
16 #include "Tpetra_CrsMatrix.hpp"
17 #include "Tpetra_HashTable.hpp"
18 #include "Tpetra_Import.hpp"
19 #include "Tpetra_Map.hpp"
20 #include "Tpetra_MultiVector.hpp"
21 #include "Teuchos_ParameterList.hpp"
22 #include "Teuchos_ScalarTraits.hpp"
28 template <
class Scalar,
class LO,
class GO,
class Node>
30 Teuchos::ParameterList pl;
32 out.open(fileName.c_str());
36 template <
class Scalar,
class LO,
class GO,
class Node>
39 out.open(fileName.c_str());
43 template <
class Scalar,
class LO,
class GO,
class Node>
45 Teuchos::ParameterList pl;
49 template <
class Scalar,
class LO,
class GO,
class Node>
54 typedef Teuchos::OrdinalTraits<GO> TOT;
61 RCP<const map_type>
const rowMap = A.
getRowMap();
62 RCP<const Teuchos::Comm<int>> comm = rowMap->getComm();
63 const int myRank = comm->getRank();
64 const size_t numProcs = comm->getSize();
67 bool alwaysUseParallelAlgorithm =
false;
68 if (params.isParameter(
"always use parallel algorithm"))
69 alwaysUseParallelAlgorithm = params.get<
bool>(
"always use parallel algorithm");
70 bool printMatrixMarketHeader =
true;
71 if (params.isParameter(
"print MatrixMarket header"))
72 printMatrixMarketHeader = params.get<
bool>(
"print MatrixMarket header");
74 if (printMatrixMarketHeader && myRank == 0) {
75 std::time_t now = std::time(NULL);
77 const std::string dataTypeStr =
78 Teuchos::ScalarTraits<Scalar>::isComplex ?
"complex" :
"real";
88 os <<
"%%MatrixMarket matrix coordinate " << dataTypeStr <<
" general" << std::endl;
89 os <<
"% time stamp: " << ctime(&now);
90 os <<
"% written from " << numProcs <<
" processes" << std::endl;
91 os <<
"% point representation of Tpetra::BlockCrsMatrix" << std::endl;
94 os <<
"% " << numRows <<
" block rows, " << numCols <<
" block columns" << std::endl;
96 os <<
"% block size " << blockSize << std::endl;
97 os << numRows * blockSize <<
" " << numCols * blockSize <<
" " << A.
getGlobalNumEntries() * blockSize * blockSize << std::endl;
100 if (numProcs == 1 && !alwaysUseParallelAlgorithm) {
103 size_t numRows = rowMap->getLocalNumElements();
106 RCP<const map_type> allMeshGidsMap = rcp(
new map_type(TOT::invalid(), numRows, A.
getIndexBase(), comm));
109 mv_type allMeshGids(allMeshGidsMap, 1);
110 Teuchos::ArrayRCP<GO> allMeshGidsData = allMeshGids.getDataNonConst(0);
112 for (
size_t i = 0; i < numRows; i++)
113 allMeshGidsData[i] = rowMap->getGlobalElement(i);
114 allMeshGidsData = Teuchos::null;
117 size_t stripSize = allMeshGids.getGlobalLength() / numProcs;
118 size_t remainder = allMeshGids.getGlobalLength() % numProcs;
120 size_t curStripSize = 0;
121 Teuchos::Array<GO> importMeshGidList;
122 for (
size_t i = 0; i < numProcs; i++) {
124 curStripSize = stripSize;
125 if (i < remainder) curStripSize++;
126 importMeshGidList.resize(curStripSize);
127 for (
size_t j = 0; j < curStripSize; j++) importMeshGidList[j] = j + curStart + A.
getIndexBase();
128 curStart += curStripSize;
131 TEUCHOS_TEST_FOR_EXCEPTION(myRank > 0 && curStripSize != 0,
132 std::runtime_error,
"Tpetra::blockCrsMatrixWriter: (pid " << myRank <<
") map size should be zero, but is " << curStripSize);
133 RCP<map_type> importMeshGidMap = rcp(
new map_type(TOT::invalid(), importMeshGidList(), A.
getIndexBase(), comm));
134 import_type gidImporter(allMeshGidsMap, importMeshGidMap);
135 mv_type importMeshGids(importMeshGidMap, 1);
136 importMeshGids.doImport(allMeshGids, gidImporter,
INSERT);
142 Teuchos::ArrayRCP<const GO> importMeshGidsData = importMeshGids.getData(0);
143 Teuchos::Array<GO> importMeshGidsGO;
144 importMeshGidsGO.reserve(importMeshGidsData.size());
145 for (
typename Teuchos::ArrayRCP<const GO>::size_type j = 0; j < importMeshGidsData.size(); ++j)
146 importMeshGidsGO.push_back(importMeshGidsData[j]);
147 RCP<const map_type> importMap = rcp(
new map_type(TOT::invalid(), importMeshGidsGO(), rowMap->getIndexBase(), comm));
149 import_type importer(rowMap, importMap);
151 RCP<crs_graph_type> graph =
createCrsGraph(importMap, numEntriesPerRow);
152 RCP<const map_type> domainMap = A.getCrsGraph().
getDomainMap();
153 graph->doImport(A.getCrsGraph(), importer,
INSERT);
154 graph->fillComplete(domainMap, importMap);
156 block_crs_matrix_type importA(*graph, A.
getBlockSize());
157 importA.doImport(A, importer,
INSERT);
165 template <
class Scalar,
class LO,
class GO,
class Node>
170 using bcrs_local_inds_host_view_type =
typename bcrs_type::local_inds_host_view_type;
171 using bcrs_values_host_view_type =
typename bcrs_type::values_host_view_type;
172 using impl_scalar_type =
typename bcrs_type::impl_scalar_type;
175 RCP<const map_type> rowMap = A.
getRowMap();
176 RCP<const map_type> colMap = A.
getColMap();
177 RCP<const Teuchos::Comm<int>> comm = rowMap->getComm();
178 const int myRank = comm->getRank();
180 const size_t meshRowOffset = rowMap->getIndexBase();
181 const size_t meshColOffset = colMap->getIndexBase();
182 TEUCHOS_TEST_FOR_EXCEPTION(meshRowOffset != meshColOffset,
184 "Tpetra::writeMatrixStrip: "
185 "mesh row index base != mesh column index base");
189 std::runtime_error,
"Tpetra::writeMatrixStrip: pid " << myRank <<
" should have 0 rows but has " << A.
getLocalNumRows());
191 std::runtime_error,
"Tpetra::writeMatrixStrip: pid " << myRank <<
" should have 0 columns but has " << A.
getLocalNumCols());
196 "Tpetra::writeMatrixStrip: "
197 "number of rows on pid 0 does not match global number of rows");
202 bool precisionChanged =
false;
203 int oldPrecision = 0;
204 if (params.isParameter(
"precision")) {
205 oldPrecision = os.precision(params.get<
int>(
"precision"));
206 precisionChanged =
true;
209 if (params.isParameter(
"zero-based indexing")) {
210 if (params.get<
bool>(
"zero-based indexing") ==
true)
215 for (localRowInd = 0; localRowInd < numLocalRows; ++localRowInd) {
217 bcrs_local_inds_host_view_type localColInds;
218 bcrs_values_host_view_type vals;
221 numEntries = localColInds.extent(0);
222 GO globalMeshRowID = rowMap->getGlobalElement(localRowInd) - meshRowOffset;
224 for (LO k = 0; k < numEntries; ++k) {
225 GO globalMeshColID = colMap->getGlobalElement(localColInds[k]) - meshColOffset;
226 const impl_scalar_type *curBlock = vals.data() + blockSize * blockSize * k;
228 for (LO j = 0; j < blockSize; ++j) {
229 GO globalPointRowID = globalMeshRowID * blockSize + j + pointOffset;
230 for (LO i = 0; i < blockSize; ++i) {
231 GO globalPointColID = globalMeshColID * blockSize + i + pointOffset;
232 const impl_scalar_type curVal = curBlock[i + j * blockSize];
234 os << globalPointRowID <<
" " << globalPointColID <<
" ";
235 if (Teuchos::ScalarTraits<impl_scalar_type>::isComplex) {
238 os << Teuchos::ScalarTraits<impl_scalar_type>::real(curVal) <<
" "
239 << Teuchos::ScalarTraits<impl_scalar_type>::imag(curVal);
248 if (precisionChanged)
249 os.precision(oldPrecision);
250 TEUCHOS_TEST_FOR_EXCEPTION(err != 0,
252 "Tpetra::writeMatrixStrip: "
253 "error getting view of local row "
258 template <
class Scalar,
class LO,
class GO,
class Node>
259 Teuchos::RCP<Tpetra::CrsGraph<LO, GO, Node>>
261 TEUCHOS_FUNC_TIME_MONITOR_DIFF(
"Tpetra::getBlockCrsGraph", getBlockCrsGraph0);
270 using Teuchos::Array;
271 using Teuchos::ArrayView;
278 using local_graph_device_type =
typename crs_matrix_type::local_graph_device_type;
279 using row_map_type =
typename local_graph_device_type::row_map_type::non_const_type;
280 using entries_type =
typename local_graph_device_type::entries_type::non_const_type;
282 using offset_type =
typename row_map_type::non_const_value_type;
284 using execution_space =
typename Node::execution_space;
285 using range_type = Kokkos::RangePolicy<execution_space, LO>;
287 const map_type &pointRowMap = *(pointMatrix.
getRowMap());
288 RCP<const map_type> meshRowMap, meshColMap, meshDomainMap, meshRangeMap;
290 const map_type &pointColMap = *(pointMatrix.
getColMap());
291 const map_type &pointDomainMap = *(pointMatrix.
getDomainMap());
292 const map_type &pointRangeMap = *(pointMatrix.
getRangeMap());
295 TEUCHOS_FUNC_TIME_MONITOR_DIFF(
"Tpetra::getBlockCrsGraph::createMeshMaps", getBlockCrsGraph1);
296 meshRowMap = createMeshMap<LO, GO, Node>(blockSize, pointRowMap, use_LID);
297 meshColMap = createMeshMap<LO, GO, Node>(blockSize, pointColMap, use_LID);
298 meshDomainMap = createMeshMap<LO, GO, Node>(blockSize, pointDomainMap, use_LID);
299 meshRangeMap = createMeshMap<LO, GO, Node>(blockSize, pointRangeMap, use_LID);
300 Kokkos::DefaultExecutionSpace().fence();
303 if (meshColMap.is_null())
throw std::runtime_error(
"ERROR: Cannot create mesh colmap");
305 auto localMeshColMap = meshColMap->getLocalMap();
306 auto localPointColMap = pointColMap.getLocalMap();
307 auto localPointRowMap = pointRowMap.getLocalMap();
309 RCP<crs_graph_type> meshCrsGraph;
311 const offset_type bs2 = blockSize * blockSize;
314 TEUCHOS_FUNC_TIME_MONITOR_DIFF(
"Tpetra::getBlockCrsGraph::LID", getBlockCrsGraph2);
315 auto pointLocalGraph = pointMatrix.
getCrsGraph()->getLocalGraphDevice();
316 auto pointRowptr = pointLocalGraph.row_map;
317 auto pointColind = pointLocalGraph.entries;
319 TEUCHOS_TEST_FOR_EXCEPTION(pointColind.extent(0) % bs2 != 0,
321 "Tpetra::getBlockCrsGraph: "
322 "local number of non zero entries is not a multiple of blockSize^2 ");
324 LO block_rows = pointRowptr.extent(0) == 0 ? 0 : (pointRowptr.extent(0) - 1) / blockSize;
325 row_map_type blockRowptr(
"blockRowptr", block_rows + 1);
326 entries_type blockColind(
"blockColind", pointColind.extent(0) / (bs2));
328 Kokkos::parallel_for(
329 "fillMesh", range_type(0, block_rows), KOKKOS_LAMBDA(
const LO i) {
330 const LO offset_b = pointRowptr(i * blockSize) / bs2;
331 const LO offset_b_max = pointRowptr((i + 1) * blockSize) / bs2;
333 if (i == block_rows - 1)
334 blockRowptr(i + 1) = offset_b_max;
335 blockRowptr(i) = offset_b;
337 const LO offset_p = pointRowptr(i * blockSize);
339 for (LO k = 0; k < offset_b_max - offset_b; ++k) {
340 blockColind(offset_b + k) = pointColind(offset_p + k * blockSize) / blockSize;
344 meshCrsGraph = rcp(
new crs_graph_type(meshRowMap, meshColMap, blockRowptr, blockColind));
345 meshCrsGraph->fillComplete(meshDomainMap, meshRangeMap);
346 Kokkos::DefaultExecutionSpace().fence();
348 TEUCHOS_FUNC_TIME_MONITOR_DIFF(
"Tpetra::getBlockCrsGraph::GID", getBlockCrsGraph3);
349 auto pointLocalGraph = pointMatrix.
getCrsGraph()->getLocalGraphDevice();
350 auto pointRowptr = pointLocalGraph.row_map;
351 auto pointColind = pointLocalGraph.entries;
353 TEUCHOS_TEST_FOR_EXCEPTION(pointColind.extent(0) % bs2 != 0,
355 "Tpetra::getBlockCrsGraph: "
356 "local number of non zero entries is not a multiple of blockSize^2 ");
358 LO block_rows = pointRowptr.extent(0) == 0 ? 0 : (pointRowptr.extent(0) - 1) / blockSize;
359 row_map_type blockRowptr(
"blockRowptr", block_rows + 1);
360 entries_type blockColind(
"blockColind", pointColind.extent(0) / (bs2));
362 Kokkos::parallel_for(
363 "fillMesh", range_type(0, block_rows), KOKKOS_LAMBDA(
const LO i) {
364 const LO offset_b = pointRowptr(i * blockSize) / bs2;
365 const LO offset_b_max = pointRowptr((i + 1) * blockSize) / bs2;
367 if (i == block_rows - 1)
368 blockRowptr(i + 1) = offset_b_max;
369 blockRowptr(i) = offset_b;
371 const LO offset_p = pointRowptr(i * blockSize);
372 const LO offset_p_max = pointRowptr((i + 1) * blockSize);
375 for (LO p_i = 0; p_i < offset_p_max - offset_p; ++p_i) {
376 auto bcol_GID = localPointColMap.getGlobalElement(pointColind(offset_p + p_i)) / blockSize;
377 auto bcol_LID = localMeshColMap.getLocalElement(bcol_GID);
379 bool visited =
false;
380 for (LO k = 0; k < filled_block; ++k) {
381 if (blockColind(offset_b + k) == bcol_LID)
385 blockColind(offset_b + filled_block) = bcol_LID;
391 meshCrsGraph = rcp(
new crs_graph_type(meshRowMap, meshColMap, blockRowptr, blockColind));
392 meshCrsGraph->fillComplete(meshDomainMap, meshRangeMap);
393 Kokkos::DefaultExecutionSpace().fence();
399 template <
class Scalar,
class LO,
class GO,
class Node>
400 Teuchos::RCP<BlockCrsMatrix<Scalar, LO, GO, Node>>
402 TEUCHOS_FUNC_TIME_MONITOR_DIFF(
"Tpetra::convertToBlockCrsMatrix", convertToBlockCrsMatrix0);
411 using Teuchos::Array;
412 using Teuchos::ArrayView;
418 using local_graph_device_type =
typename crs_matrix_type::local_graph_device_type;
419 using local_matrix_device_type =
typename crs_matrix_type::local_matrix_device_type;
420 using row_map_type =
typename local_graph_device_type::row_map_type::non_const_type;
421 using values_type =
typename local_matrix_device_type::values_type::non_const_type;
423 using offset_type =
typename row_map_type::non_const_value_type;
425 using execution_space =
typename Node::execution_space;
426 using range_type = Kokkos::RangePolicy<execution_space, LO>;
428 RCP<block_crs_matrix_type> blockMatrix;
430 const offset_type bs2 = blockSize * blockSize;
435 TEUCHOS_FUNC_TIME_MONITOR_DIFF(
"Tpetra::convertToBlockCrsMatrix::LID", convertToBlockCrsMatrix1);
436 auto pointLocalGraph = pointMatrix.
getCrsGraph()->getLocalGraphDevice();
437 auto pointRowptr = pointLocalGraph.row_map;
438 auto pointColind = pointLocalGraph.entries;
440 offset_type block_rows = pointRowptr.extent(0) == 0 ? 0 : (pointRowptr.extent(0) - 1) / blockSize;
441 values_type blockValues(
"values", meshCrsGraph->getLocalNumEntries() * bs2);
443 auto blockRowptr = meshCrsGraph->getLocalGraphDevice().row_map;
445 Kokkos::parallel_for(
446 "copyblockValues", range_type(0, block_rows), KOKKOS_LAMBDA(
const LO i) {
447 const offset_type blkBeg = blockRowptr[i];
448 const offset_type numBlocks = blockRowptr[i + 1] - blkBeg;
451 for (offset_type block = 0; block < numBlocks; block++) {
453 for (LO little_row = 0; little_row < blockSize; little_row++) {
454 offset_type point_row_offset = pointRowptr[i * blockSize + little_row];
455 for (LO little_col = 0; little_col < blockSize; little_col++) {
456 blockValues((blkBeg + block) * bs2 + little_row * blockSize + little_col) =
457 pointValues[point_row_offset + block * blockSize + little_col];
462 blockMatrix = rcp(
new block_crs_matrix_type(*meshCrsGraph, blockValues, blockSize));
463 Kokkos::DefaultExecutionSpace().fence();
465 TEUCHOS_FUNC_TIME_MONITOR_DIFF(
"Tpetra::convertToBlockCrsMatrix::GID", convertToBlockCrsMatrix2);
466 auto localMeshColMap = meshCrsGraph->getColMap()->getLocalMap();
467 auto localPointColMap = pointMatrix.
getColMap()->getLocalMap();
469 values_type blockValues(
"values", meshCrsGraph->getLocalNumEntries() * bs2);
471 auto pointLocalGraph = pointMatrix.
getCrsGraph()->getLocalGraphDevice();
472 auto pointRowptr = pointLocalGraph.row_map;
473 auto pointColind = pointLocalGraph.entries;
475 offset_type block_rows = pointRowptr.extent(0) == 0 ? 0 : (pointRowptr.extent(0) - 1) / blockSize;
477 auto blockRowptr = meshCrsGraph->getLocalGraphDevice().row_map;
478 auto blockColind = meshCrsGraph->getLocalGraphDevice().entries;
480 row_map_type pointGColind(
"pointGColind", pointColind.extent(0));
482 Kokkos::parallel_for(
483 "computePointGColind", range_type(0, pointColind.extent(0)), KOKKOS_LAMBDA(
const LO i) {
484 pointGColind(i) = localPointColMap.getGlobalElement(pointColind(i));
487 row_map_type blockGColind(
"blockGColind", blockColind.extent(0));
489 Kokkos::parallel_for(
490 "computeBlockGColind", range_type(0, blockGColind.extent(0)), KOKKOS_LAMBDA(
const LO i) {
491 blockGColind(i) = localMeshColMap.getGlobalElement(blockColind(i));
494 Kokkos::parallel_for(
495 "copyblockValues", range_type(0, block_rows), KOKKOS_LAMBDA(
const LO i) {
496 const offset_type blkBeg = blockRowptr[i];
497 const offset_type numBlocks = blockRowptr[i + 1] - blkBeg;
499 for (offset_type point_i = 0; point_i < pointRowptr[i * blockSize + 1] - pointRowptr[i * blockSize]; point_i++) {
500 offset_type block_inv =
static_cast<offset_type
>(-1);
501 offset_type little_col_inv =
static_cast<offset_type
>(-1);
502 for (offset_type block_2 = 0; block_2 < numBlocks; block_2++) {
503 for (LO little_col_2 = 0; little_col_2 < blockSize; little_col_2++) {
504 if (blockGColind(blkBeg + block_2) * blockSize + little_col_2 == pointGColind(pointRowptr[i * blockSize] + point_i)) {
506 little_col_inv = little_col_2;
510 if (block_inv != static_cast<offset_type>(-1))
514 for (LO little_row = 0; little_row < blockSize; little_row++) {
515 blockValues((blkBeg + block_inv) * bs2 + little_row * blockSize + little_col_inv) = pointValues[pointRowptr[i * blockSize + little_row] + point_i];
520 blockMatrix = rcp(
new block_crs_matrix_type(*meshCrsGraph, blockValues, blockSize));
521 Kokkos::DefaultExecutionSpace().fence();
527 template <
class Scalar,
class LO,
class GO,
class Node>
528 Teuchos::RCP<Tpetra::CrsMatrix<Scalar, LO, GO, Node>>
531 using execution_space =
typename Node::execution_space;
532 using dev_row_view_t =
typename crs_t::local_graph_device_type::row_map_type::non_const_type;
533 using dev_col_view_t =
typename crs_t::local_graph_device_type::entries_type::non_const_type;
534 using dev_val_view_t =
typename crs_t::local_matrix_device_type::values_type::non_const_type;
535 using range_type = Kokkos::RangePolicy<execution_space, size_t>;
536 using team_policy = Kokkos::TeamPolicy<execution_space>;
537 using member_type =
typename team_policy::member_type;
538 using scratch_view = Kokkos::View<bool *, typename execution_space::scratch_memory_space>;
539 using Ordinal =
typename dev_row_view_t::non_const_value_type;
543 auto row_ptrs = local.graph.row_map;
544 auto col_inds = local.graph.entries;
545 auto values = local.values;
553 dev_row_view_t new_rowmap(
"new_rowmap", nrows + 1);
554 const auto blocks_per_row = nrows / blockSize;
555 dev_row_view_t active_block_row_map(
"active_block_row_map", blocks_per_row + 1);
556 const int max_threads = execution_space().concurrency();
557 assert(blockSize > 1);
558 assert(nrows % blockSize == 0);
559 const int mem_level = 1;
560 const int bytes = scratch_view::shmem_size(blocks_per_row);
562 if (max_threads >= blockSize) {
564 team_policy tp(blocks_per_row, blockSize);
567 Kokkos::parallel_for(
568 "countActiveBlocks", tp.set_scratch_size(mem_level, Kokkos::PerTeam(bytes)), KOKKOS_LAMBDA(
const member_type &team) {
569 Ordinal block_row = team.league_rank();
571 scratch_view row_block_active(team.team_scratch(mem_level), blocks_per_row);
573 Kokkos::PerTeam(team), [&]() {
574 for (
size_t row_block_idx = 0; row_block_idx < blocks_per_row; ++row_block_idx) {
575 row_block_active(row_block_idx) =
false;
582 Kokkos::parallel_for(
583 Kokkos::TeamThreadRange(team, blockSize), [&](Ordinal block_offset) {
584 Ordinal row = block_row * blockSize + block_offset;
585 Ordinal row_itr = row_ptrs(row);
586 Ordinal row_end = row_ptrs(row + 1);
588 for (
size_t row_block_idx = 0; row_block_idx < blocks_per_row; ++row_block_idx) {
589 const Ordinal first_possible_col_in_block = row_block_idx * blockSize;
590 const Ordinal first_possible_col_in_next_block = (row_block_idx + 1) * blockSize;
591 Ordinal curr_nnz_col = col_inds(row_itr);
592 while (curr_nnz_col >= first_possible_col_in_block && curr_nnz_col < first_possible_col_in_next_block && row_itr < row_end) {
594 row_block_active(row_block_idx) =
true;
596 if (row_itr == row_end)
break;
597 curr_nnz_col = col_inds(row_itr);
605 Kokkos::PerTeam(team), [&]() {
607 for (
size_t row_block_idx = 0; row_block_idx < blocks_per_row; ++row_block_idx) {
608 if (row_block_active(row_block_idx)) {
612 active_block_row_map(block_row) = count;
618 team_policy tp(blocks_per_row, 1);
621 Kokkos::parallel_for(
622 "countActiveBlocks", tp.set_scratch_size(mem_level, Kokkos::PerTeam(bytes)), KOKKOS_LAMBDA(
const member_type &team) {
623 Ordinal block_row = team.league_rank();
625 scratch_view row_block_active(team.team_scratch(mem_level), blocks_per_row);
626 for (
size_t row_block_idx = 0; row_block_idx < blocks_per_row; ++row_block_idx) {
627 row_block_active(row_block_idx) =
false;
631 for (
int block_offset = 0; block_offset < blockSize; ++block_offset) {
632 Ordinal row = block_row * blockSize + block_offset;
633 Ordinal row_itr = row_ptrs(row);
634 Ordinal row_end = row_ptrs(row + 1);
636 for (
size_t row_block_idx = 0; row_block_idx < blocks_per_row; ++row_block_idx) {
637 const Ordinal first_possible_col_in_block = row_block_idx * blockSize;
638 const Ordinal first_possible_col_in_next_block = (row_block_idx + 1) * blockSize;
639 Ordinal curr_nnz_col = col_inds(row_itr);
640 while (curr_nnz_col >= first_possible_col_in_block && curr_nnz_col < first_possible_col_in_next_block && row_itr < row_end) {
642 row_block_active(row_block_idx) =
true;
644 if (row_itr == row_end)
break;
645 curr_nnz_col = col_inds(row_itr);
651 for (
size_t row_block_idx = 0; row_block_idx < blocks_per_row; ++row_block_idx) {
652 if (row_block_active(row_block_idx)) {
656 active_block_row_map(block_row) = count;
660 Ordinal nnz_block_count = 0;
661 KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum<
662 execution_space>(active_block_row_map.extent(0), active_block_row_map, nnz_block_count);
663 dev_col_view_t block_col_ids(
"block_col_ids", nnz_block_count);
666 if (max_threads >= blockSize) {
668 team_policy tp(blocks_per_row, blockSize);
670 Kokkos::parallel_for(
671 "findActiveBlocks", tp.set_scratch_size(mem_level, Kokkos::PerTeam(bytes)), KOKKOS_LAMBDA(
const member_type &team) {
672 Ordinal block_row = team.league_rank();
674 scratch_view row_block_active(team.team_scratch(mem_level), blocks_per_row);
676 Kokkos::PerTeam(team), [&]() {
677 for (
size_t row_block_idx = 0; row_block_idx < blocks_per_row; ++row_block_idx) {
678 row_block_active(row_block_idx) =
false;
685 Kokkos::parallel_for(
686 Kokkos::TeamThreadRange(team, blockSize), [&](Ordinal block_offset) {
687 Ordinal row = block_row * blockSize + block_offset;
688 Ordinal row_itr = row_ptrs(row);
689 Ordinal row_end = row_ptrs(row + 1);
691 for (
size_t row_block_idx = 0; row_block_idx < blocks_per_row; ++row_block_idx) {
692 const Ordinal first_possible_col_in_block = row_block_idx * blockSize;
693 const Ordinal first_possible_col_in_next_block = (row_block_idx + 1) * blockSize;
694 Ordinal curr_nnz_col = col_inds(row_itr);
695 while (curr_nnz_col >= first_possible_col_in_block && curr_nnz_col < first_possible_col_in_next_block && row_itr < row_end) {
697 row_block_active(row_block_idx) =
true;
699 if (row_itr == row_end)
break;
700 curr_nnz_col = col_inds(row_itr);
708 Kokkos::PerTeam(team), [&]() {
709 Ordinal offset = active_block_row_map[block_row];
710 for (
size_t row_block_idx = 0; row_block_idx < blocks_per_row; ++row_block_idx) {
711 if (row_block_active(row_block_idx)) {
712 block_col_ids(offset) = row_block_idx;
719 team_policy tp(blocks_per_row, 1);
721 Kokkos::parallel_for(
722 "findActiveBlocks", tp.set_scratch_size(mem_level, Kokkos::PerTeam(bytes)), KOKKOS_LAMBDA(
const member_type &team) {
723 Ordinal block_row = team.league_rank();
725 scratch_view row_block_active(team.team_scratch(mem_level), blocks_per_row);
726 for (
size_t row_block_idx = 0; row_block_idx < blocks_per_row; ++row_block_idx) {
727 row_block_active(row_block_idx) =
false;
731 for (
int block_offset = 0; block_offset < blockSize; ++block_offset) {
732 Ordinal row = block_row * blockSize + block_offset;
733 Ordinal row_itr = row_ptrs(row);
734 Ordinal row_end = row_ptrs(row + 1);
736 for (
size_t row_block_idx = 0; row_block_idx < blocks_per_row; ++row_block_idx) {
737 const Ordinal first_possible_col_in_block = row_block_idx * blockSize;
738 const Ordinal first_possible_col_in_next_block = (row_block_idx + 1) * blockSize;
739 Ordinal curr_nnz_col = col_inds(row_itr);
740 while (curr_nnz_col >= first_possible_col_in_block && curr_nnz_col < first_possible_col_in_next_block && row_itr < row_end) {
742 row_block_active(row_block_idx) =
true;
744 if (row_itr == row_end)
break;
745 curr_nnz_col = col_inds(row_itr);
750 Ordinal offset = active_block_row_map[block_row];
751 for (
size_t row_block_idx = 0; row_block_idx < blocks_per_row; ++row_block_idx) {
752 if (row_block_active(row_block_idx)) {
753 block_col_ids(offset) = row_block_idx;
761 Kokkos::parallel_for(
762 "sizing", range_type(0, nrows), KOKKOS_LAMBDA(
const size_t row) {
763 const auto block_row = row / blockSize;
764 const Ordinal block_row_begin = active_block_row_map(block_row);
765 const Ordinal block_row_end = active_block_row_map(block_row + 1);
766 const Ordinal row_nnz = (block_row_end - block_row_begin) * blockSize;
767 new_rowmap(row) = row_nnz;
770 Ordinal new_nnz_count = 0;
771 KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum<
772 execution_space>(new_rowmap.extent(0), new_rowmap, new_nnz_count);
774 dev_col_view_t new_col_ids(
"new_col_ids", new_nnz_count);
775 dev_val_view_t new_vals(
"new_vals", new_nnz_count);
776 Kokkos::parallel_for(
777 "entries", range_type(0, nrows), KOKKOS_LAMBDA(
const size_t row) {
778 Ordinal row_itr = row_ptrs(row);
779 Ordinal row_end = row_ptrs(row + 1);
780 Ordinal row_itr_new = new_rowmap(row);
782 Ordinal block_row = row / blockSize;
783 Ordinal block_row_begin = active_block_row_map(block_row);
784 Ordinal block_row_end = active_block_row_map(block_row + 1);
786 for (Ordinal row_block_idx = block_row_begin; row_block_idx < block_row_end; ++row_block_idx) {
787 const Ordinal block_col = block_col_ids(row_block_idx);
788 const Ordinal first_possible_col_in_block = block_col * blockSize;
789 const Ordinal first_possible_col_in_next_block = (block_col + 1) * blockSize;
790 for (Ordinal possible_col = first_possible_col_in_block; possible_col < first_possible_col_in_next_block; ++possible_col, ++row_itr_new) {
791 new_col_ids(row_itr_new) = possible_col;
792 Ordinal curr_nnz_col = col_inds(row_itr);
793 if (possible_col == curr_nnz_col && row_itr < row_end) {
795 new_vals(row_itr_new) = values(row_itr);
803 auto crs_row_map = pointMatrix.
getRowMap();
804 auto crs_col_map = pointMatrix.
getColMap();
805 Teuchos::RCP<crs_t> result = Teuchos::rcp(
new crs_t(crs_row_map, crs_col_map, new_rowmap, new_col_ids, new_vals));
806 result->fillComplete();
810 template <
class Scalar,
class LO,
class GO,
class Node>
811 Teuchos::RCP<Tpetra::CrsMatrix<Scalar, LO, GO, Node>>
814 using dev_row_view_t =
typename crs_t::local_graph_device_type::row_map_type::non_const_type;
815 using dev_col_view_t =
typename crs_t::local_graph_device_type::entries_type::non_const_type;
816 using dev_val_view_t =
typename crs_t::local_matrix_device_type::values_type::non_const_type;
817 #if KOKKOS_VERSION >= 40799
818 using impl_scalar_t =
typename KokkosKernels::ArithTraits<Scalar>::val_type;
820 using impl_scalar_t =
typename Kokkos::ArithTraits<Scalar>::val_type;
822 #if KOKKOS_VERSION >= 40799
823 using STS = KokkosKernels::ArithTraits<impl_scalar_t>;
825 using STS = Kokkos::ArithTraits<impl_scalar_t>;
827 using Ordinal =
typename dev_row_view_t::non_const_value_type;
828 using execution_space =
typename Node::execution_space;
829 using range_type = Kokkos::RangePolicy<execution_space, size_t>;
832 auto local = pointMatrix.getLocalMatrixHost();
833 auto row_ptrs = local.graph.row_map;
834 auto col_inds = local.graph.entries;
835 auto values = local.values;
839 dev_row_view_t new_rowmap(
"new_rowmap", nrows + 1);
840 const impl_scalar_t zero = STS::zero();
841 Kokkos::parallel_for(
842 "sizing", range_type(0, nrows), KOKKOS_LAMBDA(
const size_t row) {
843 const Ordinal row_nnz_begin = row_ptrs(row);
844 Ordinal row_nnzs = 0;
845 for (Ordinal row_nnz = row_nnz_begin; row_nnz < row_ptrs(row + 1); ++row_nnz) {
846 const impl_scalar_t value = values(row_nnz);
851 new_rowmap(row) = row_nnzs;
854 Ordinal real_nnzs = 0;
855 KokkosKernels::Impl::kk_exclusive_parallel_prefix_sum<
856 execution_space>(new_rowmap.extent(0), new_rowmap, real_nnzs);
858 dev_col_view_t new_col_ids(
"new_col_ids", real_nnzs);
859 dev_val_view_t new_vals(
"new_vals", real_nnzs);
860 Kokkos::parallel_for(
861 "entries", range_type(0, nrows), KOKKOS_LAMBDA(
const size_t row) {
862 Ordinal row_nnzs = 0;
863 const Ordinal old_row_nnz_begin = row_ptrs(row);
864 const Ordinal new_row_nnz_begin = new_rowmap(row);
865 for (Ordinal old_row_nnz = old_row_nnz_begin; old_row_nnz < row_ptrs(row + 1); ++old_row_nnz) {
866 const impl_scalar_t value = values(old_row_nnz);
868 new_col_ids(new_row_nnz_begin + row_nnzs) = col_inds(old_row_nnz);
869 new_vals(new_row_nnz_begin + row_nnzs) = value;
876 auto crs_row_map = pointMatrix.
getRowMap();
877 auto crs_col_map = pointMatrix.
getColMap();
878 Teuchos::RCP<crs_t> result = Teuchos::rcp(
new crs_t(crs_row_map, crs_col_map, new_rowmap, new_col_ids, new_vals));
879 result->fillComplete();
883 template <
class LO,
class GO,
class Node>
884 Teuchos::RCP<const Tpetra::Map<LO, GO, Node>>
886 typedef Teuchos::OrdinalTraits<Tpetra::global_size_t> TOT;
890 using execution_space =
typename Node::execution_space;
891 using range_type = Kokkos::RangePolicy<execution_space, size_t>;
894 LO block_rows = pointGlobalID.extent(0) / blockSize;
895 Kokkos::View<GO *, typename map_type::device_type> meshGlobalID(
"meshGlobalID", block_rows);
897 Kokkos::parallel_for(
898 "fillMeshMap", range_type(0, block_rows), KOKKOS_LAMBDA(
const LO i) {
899 meshGlobalID(i) = pointGlobalID(i * blockSize) / blockSize;
902 Teuchos::RCP<const map_type> meshMap = Teuchos::rcp(
new map_type(TOT::invalid(), meshGlobalID, 0, pointMap.
getComm()));
907 Teuchos::Array<GO> meshGids;
913 meshGids.reserve(pointGids.size());
915 for (
int i = 0; i < pointGids.size(); ++i) {
916 GO meshGid = (pointGids[i] - indexBase) / blockSize + indexBase;
917 if (hashTable.get(meshGid) == -1) {
918 hashTable.
add(meshGid, 1);
919 meshGids.push_back(meshGid);
923 Teuchos::RCP<const map_type> meshMap = Teuchos::rcp(
new map_type(TOT::invalid(), meshGids(), 0, pointMap.
getComm()));
928 template <
class LO,
class GO,
class Node>
929 Teuchos::RCP<const Tpetra::Map<LO, GO, Node>>
931 typedef Teuchos::OrdinalTraits<Tpetra::global_size_t> TOT;
936 Teuchos::Array<GO> pointGids(blockGids.size() * blockSize);
939 for (LO i = 0, ct = 0; i < (LO)blockGids.size(); i++) {
940 GO base = (blockGids[i] - indexBase) * blockSize + indexBase;
941 for (LO j = 0; j < blockSize; j++, ct++)
942 pointGids[i * blockSize + j] = base + j;
945 Teuchos::RCP<const map_type> pointMap = Teuchos::rcp(
new map_type(TOT::invalid(), pointGids(), 0, blockMap.
getComm()));
949 template <
class Scalar,
class LO,
class GO,
class Node>
950 Teuchos::RCP<CrsMatrix<Scalar, LO, GO, Node>>
952 using Teuchos::Array;
953 using Teuchos::ArrayView;
960 using crs_graph_type =
typename block_crs_matrix_type::crs_graph_type;
961 using local_graph_device_type =
typename crs_matrix_type::local_graph_device_type;
962 using local_matrix_device_type =
typename crs_matrix_type::local_matrix_device_type;
963 using row_map_type =
typename local_graph_device_type::row_map_type::non_const_type;
964 using entries_type =
typename local_graph_device_type::entries_type::non_const_type;
965 using values_type =
typename local_matrix_device_type::values_type::non_const_type;
967 using row_map_type_const =
typename local_graph_device_type::row_map_type;
968 using entries_type_const =
typename local_graph_device_type::entries_type;
970 using little_block_type =
typename block_crs_matrix_type::const_little_block_type;
971 using offset_type =
typename row_map_type::non_const_value_type;
972 using column_type =
typename entries_type::non_const_value_type;
974 using execution_space =
typename Node::execution_space;
975 using range_type = Kokkos::RangePolicy<execution_space, LO>;
978 const offset_type bs2 = blocksize * blocksize;
980 size_t point_nnz = block_nnz * bs2;
983 RCP<const map_type> pointDomainMap = blockMatrix.
getDomainMap();
984 RCP<const map_type> pointRangeMap = blockMatrix.
getRangeMap();
987 RCP<const map_type> blockRowMap = blockMatrix.
getRowMap();
988 RCP<const map_type> pointRowMap = createPointMap<LO, GO, Node>(blocksize, *blockRowMap);
990 RCP<const map_type> blockColMap = blockMatrix.
getColMap();
991 RCP<const map_type> pointColMap = createPointMap<LO, GO, Node>(blocksize, *blockColMap);
995 const crs_graph_type &blockGraph = blockMatrix.getCrsGraph();
996 LO point_rows = (LO)pointRowMap->getLocalNumElements();
997 LO block_rows = (LO)blockRowMap->getLocalNumElements();
998 auto blockValues = blockMatrix.getValuesDevice();
999 auto blockLocalGraph = blockGraph.getLocalGraphDevice();
1000 row_map_type_const blockRowptr = blockLocalGraph.row_map;
1001 entries_type_const blockColind = blockLocalGraph.entries;
1004 row_map_type rowptr(
"row_map", point_rows + 1);
1005 entries_type colind(
"entries", point_nnz);
1006 values_type values(
"values", point_nnz);
1009 Kokkos::parallel_for(
1010 "fillRowPtr", range_type(0, block_rows), KOKKOS_LAMBDA(
const LO i) {
1011 offset_type base = blockRowptr[i];
1012 offset_type diff = blockRowptr[i + 1] - base;
1013 for (LO j = 0; j < blocksize; j++) {
1014 rowptr[i * blocksize + j] = base * bs2 + j * diff * blocksize;
1018 if (i == block_rows - 1) {
1019 rowptr[block_rows * blocksize] = blockRowptr[i + 1] * bs2;
1023 Kokkos::parallel_for(
1024 "copyEntriesAndValues", range_type(0, block_rows), KOKKOS_LAMBDA(
const LO i) {
1025 const offset_type blkBeg = blockRowptr[i];
1026 const offset_type numBlocks = blockRowptr[i + 1] - blkBeg;
1029 for (offset_type block = 0; block < numBlocks; block++) {
1030 column_type point_col_base = blockColind[blkBeg + block] * blocksize;
1031 little_block_type my_block(blockValues.data() + (blkBeg + block) * bs2, blocksize, blocksize);
1034 for (LO little_row = 0; little_row < blocksize; little_row++) {
1035 offset_type point_row_offset = rowptr[i * blocksize + little_row];
1036 for (LO little_col = 0; little_col < blocksize; little_col++) {
1037 colind[point_row_offset + block * blocksize + little_col] = point_col_base + little_col;
1038 values[point_row_offset + block * blocksize + little_col] = my_block(little_row, little_col);
1045 RCP<crs_matrix_type> pointCrsMatrix = rcp(
new crs_matrix_type(pointRowMap, pointColMap, 0));
1046 pointCrsMatrix->setAllValues(rowptr, colind, values);
1049 pointCrsMatrix->expertStaticFillComplete(pointDomainMap, pointRangeMap);
1051 return pointCrsMatrix;
1062 #define TPETRA_BLOCKCRSMATRIX_HELPERS_INSTANT(S, LO, GO, NODE) \
1063 template void blockCrsMatrixWriter(BlockCrsMatrix<S, LO, GO, NODE> const &A, std::string const &fileName); \
1064 template void blockCrsMatrixWriter(BlockCrsMatrix<S, LO, GO, NODE> const &A, std::string const &fileName, Teuchos::ParameterList const ¶ms); \
1065 template void blockCrsMatrixWriter(BlockCrsMatrix<S, LO, GO, NODE> const &A, std::ostream &os); \
1066 template void blockCrsMatrixWriter(BlockCrsMatrix<S, LO, GO, NODE> const &A, std::ostream &os, Teuchos::ParameterList const ¶ms); \
1067 template void writeMatrixStrip(BlockCrsMatrix<S, LO, GO, NODE> const &A, std::ostream &os, Teuchos::ParameterList const ¶ms); \
1068 template Teuchos::RCP<BlockCrsMatrix<S, LO, GO, NODE>> convertToBlockCrsMatrix(const CrsMatrix<S, LO, GO, NODE> &pointMatrix, const LO &blockSize, bool use_LID); \
1069 template Teuchos::RCP<CrsMatrix<S, LO, GO, NODE>> fillLogicalBlocks(const CrsMatrix<S, LO, GO, NODE> &pointMatrix, const LO &blockSize); \
1070 template Teuchos::RCP<CrsMatrix<S, LO, GO, NODE>> unfillFormerBlockCrs(const CrsMatrix<S, LO, GO, NODE> &pointMatrix); \
1071 template Teuchos::RCP<CrsMatrix<S, LO, GO, NODE>> convertToCrsMatrix(const BlockCrsMatrix<S, LO, GO, NODE> &blockMatrix); \
1072 template Teuchos::RCP<CrsGraph<LO, GO, NODE>> getBlockCrsGraph(const Tpetra::CrsMatrix<S, LO, GO, NODE> &pointMatrix, const LO &blockSize, bool use_local_ID);
1077 #define TPETRA_CREATEMESHMAP_INSTANT(LO, GO, NODE) \
1078 template Teuchos::RCP<const Map<LO, GO, NODE>> createMeshMap(const LO &blockSize, const Map<LO, GO, NODE> &pointMap, bool use_local_ID); \
1079 template Teuchos::RCP<const Map<LO, GO, NODE>> createPointMap(const LO &blockSize, const Map<LO, GO, NODE> &blockMap);
1081 #endif // TPETRA_BLOCKCRSMATRIX_HELPERS_DEF_HPP
Teuchos::RCP< const Tpetra::Map< LO, GO, Node > > createPointMap(LO const &blockSize, const Tpetra::Map< LO, GO, Node > &blockMap)
Helper function to generate a point map from a block map (with a given block size) GIDs associated wi...
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
virtual size_t getLocalNumEntries() const override
The local number of stored (structurally nonzero) entries.
virtual global_size_t getGlobalNumCols() const override
The global number of columns of this matrix.
void blockCrsMatrixWriter(BlockCrsMatrix< Scalar, LO, GO, Node > const &A, std::string const &fileName)
Helper function to write a BlockCrsMatrix. Calls the 3-argument version.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
virtual LO getBlockSize() const override
The number of degrees of freedom per mesh point.
Teuchos::RCP< Tpetra::CrsMatrix< Scalar, LO, GO, Node > > unfillFormerBlockCrs(const Tpetra::CrsMatrix< Scalar, LO, GO, Node > &pointMatrix)
Unfill all point entries in a logical block of a CrsMatrix with zeroes. This can be called after conv...
Sparse matrix whose entries are small dense square blocks, all of the same dimensions.
Teuchos::RCP< const Tpetra::Map< LO, GO, Node > > createMeshMap(LO const &blockSize, const Tpetra::Map< LO, GO, Node > &pointMap, bool use_local_ID=false)
Helper function to generate a mesh map from a point map. Important! It's assumed that point GIDs asso...
Teuchos::RCP< const map_type > getRangeMap() const override
The range Map of this matrix.
One or more distributed dense vectors.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, over all processes in the graph's communicator...
global_indices_array_device_type getMyGlobalIndicesDevice() const
Return a view of the global indices owned by this process on the Map's device.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
Teuchos::RCP< const crs_graph_type > getCrsGraph() const
This matrix's graph, as a CrsGraph.
size_t getLocalNumRows() const override
get the local number of block rows
void writeMatrixStrip(BlockCrsMatrix< Scalar, LO, GO, Node > const &A, std::ostream &os, Teuchos::ParameterList const ¶ms)
Helper function called by blockCrsMatrixWriter.
void getLocalRowView(LO LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const override
Get a view of the (mesh, i.e., block) row, using local (mesh, i.e., block) indices.
Teuchos::RCP< const map_type > getColMap() const override
get the (mesh) map for the columns of this block matrix.
Teuchos::ArrayView< const global_ordinal_type > getLocalElementList() const
Return a NONOWNING view of the global indices owned by this process.
Teuchos::RCP< const map_type > getDomainMap() const override
Get the (point) domain Map of this matrix.
Teuchos::RCP< const map_type > getRowMap() const override
get the (mesh) map for the rows of this block matrix.
Insert new values that don't currently exist.
global_ordinal_type getIndexBase() const
The index base for this Map.
global_size_t getGlobalNumRows() const override
get the global number of block rows
TPETRA_DETAILS_ALWAYS_INLINE local_matrix_device_type getLocalMatrixDevice() const
The local sparse matrix.
virtual GO getIndexBase() const override
The index base for global indices in this matrix.
virtual global_size_t getGlobalNumEntries() const override
The global number of stored (structurally nonzero) entries.
Teuchos::RCP< Tpetra::CrsMatrix< Scalar, LO, GO, Node > > fillLogicalBlocks(const Tpetra::CrsMatrix< Scalar, LO, GO, Node > &pointMatrix, const LO &blockSize)
Fill all point entries in a logical block of a CrsMatrix with zeroes. This should be called before co...
Teuchos::RCP< const map_type > getRangeMap() const override
Get the (point) range Map of this matrix.
Teuchos::RCP< CrsMatrix< Scalar, LO, GO, Node > > convertToCrsMatrix(const Tpetra::BlockCrsMatrix< Scalar, LO, GO, Node > &blockMatrix)
Non-member constructor that creates a point CrsMatrix from an existing BlockCrsMatrix.
Teuchos::RCP< CrsGraph< LocalOrdinal, GlobalOrdinal, Node > > createCrsGraph(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node >> &map, size_t maxNumEntriesPerRow=0, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Nonmember function to create an empty CrsGraph given a row Map and the max number of entries allowed ...
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
Teuchos::RCP< Tpetra::CrsGraph< LO, GO, Node > > getBlockCrsGraph(const Tpetra::CrsMatrix< Scalar, LO, GO, Node > &pointMatrix, const LO &blockSize, bool use_local_ID=true)
Non-member constructor that creates the CrsGraph of a BlockCrsMatrix from an existing point CrsMatrix...
Teuchos::RCP< const map_type > getDomainMap() const override
The domain Map of this matrix.
void add(const KeyType key, const ValueType value)
Add a key and its value to the hash table.
Teuchos::RCP< BlockCrsMatrix< Scalar, LO, GO, Node > > convertToBlockCrsMatrix(const Tpetra::CrsMatrix< Scalar, LO, GO, Node > &pointMatrix, const LO &blockSize, bool use_local_ID=true)
Non-member constructor that creates a BlockCrsMatrix from an existing point CrsMatrix.
local_matrix_device_type::values_type::const_type getLocalValuesDevice(Access::ReadOnlyStruct s) const
Get the Kokkos local values on device, read only.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
virtual size_t getLocalNumCols() const override
The number of columns needed to apply the forward operator on this node.
size_t getLocalNumRows() const override
The number of matrix rows owned by the calling process.
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.