10 #ifndef MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_
11 #define MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_
13 #include <Xpetra_Map.hpp>
15 #include <Xpetra_MultiVectorFactory.hpp>
16 #include <Xpetra_MapFactory.hpp>
19 #include "KokkosKernels_Handle.hpp"
20 #include "KokkosSparse_spgemm.hpp"
21 #include "KokkosSparse_spmv.hpp"
25 #include "MueLu_Aggregates.hpp"
26 #include "MueLu_LWGraph.hpp"
27 #include "MueLu_LWGraph_kokkos.hpp"
32 #include "MueLu_Utilities.hpp"
36 namespace NotayUtils {
37 template <
class LocalOrdinal>
39 return min + as<LocalOrdinal>((max - min + 1) * (static_cast<double>(std::rand()) / (RAND_MAX + 1.0)));
42 template <
class LocalOrdinal>
45 LO n = Teuchos::as<LO>(list.
size());
46 for (LO i = 0; i < n - 1; i++)
51 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
55 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
60 #undef SET_VALID_ENTRY
65 validParamList->
set<
RCP<const FactoryBase>>(
"DofsPerNode", null,
"Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
67 return validParamList;
70 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
72 Input(currentLevel,
"A");
73 Input(currentLevel,
"Graph");
74 Input(currentLevel,
"DofsPerNode");
77 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
81 using MT =
typename STS::magnitudeType;
86 if (
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
87 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
95 const MT kappa =
static_cast<MT
>(pL.
get<
double>(
"aggregation: Dirichlet threshold"));
98 "Pairwise requires kappa > 2"
99 " otherwise all rows are considered as Dirichlet rows.");
104 maxNumIter = pL.
get<
int>(
"aggregation: pairwise: size");
107 "NotayAggregationFactory::Build(): \"aggregation: pairwise: size\""
108 " must be a strictly positive integer");
112 graph = Get<RCP<LWGraph>>(currentLevel,
"Graph");
114 auto graph_k = Get<RCP<LWGraph_kokkos>>(currentLevel,
"Graph");
115 graph = graph_k->copyToHost();
121 aggregates->setObjectLabel(
"PW");
126 std::vector<unsigned> aggStat(numRows,
READY);
128 const int DofsPerNode = Get<int>(currentLevel,
"DofsPerNode");
130 "Pairwise only supports one dof per node");
137 std::string orderingStr = pL.
get<std::string>(
"aggregation: ordering");
144 ordering = O_NATURAL;
145 if (orderingStr ==
"random")
147 else if (orderingStr ==
"natural") {
148 }
else if (orderingStr ==
"cuthill-mckee" || orderingStr ==
"cm")
149 ordering = O_CUTHILL_MCKEE;
159 for (
LO i = 0; i < numRows; i++)
160 orderingVector[i] = i;
161 if (ordering == O_RANDOM)
163 else if (ordering == O_CUTHILL_MCKEE) {
165 auto localVector = rcmVector->getData(0);
166 for (
LO i = 0; i < numRows; i++)
167 orderingVector[i] = localVector[i];
171 LO numNonAggregatedNodes = numRows, numDirichletNodes = 0;
172 BuildInitialAggregates(pL, A, orderingVector(), kappa,
173 *aggregates, aggStat, numNonAggregatedNodes, numDirichletNodes);
175 "Initial pairwise aggregation failed to aggregate all nodes");
176 LO numLocalAggregates = aggregates->GetNumAggregates();
177 GetOStream(
Statistics0) <<
"Init : " << numLocalAggregates <<
" - "
178 << A->getLocalNumRows() / numLocalAggregates << std::endl;
187 LO numLocalDirichletNodes = numDirichletNodes;
188 Array<LO> localVertex2AggId(aggregates->GetVertex2AggId()->getData(0).view(0, numRows));
189 BuildOnRankLocalMatrix(A->getLocalMatrixDevice(), coarseLocalA);
190 for (
LO aggregationIter = 1; aggregationIter < maxNumIter; ++aggregationIter) {
192 BuildIntermediateProlongator(coarseLocalA.numRows(), numLocalDirichletNodes, numLocalAggregates,
193 localVertex2AggId(), intermediateP);
196 BuildCoarseLocalMatrix(intermediateP, coarseLocalA);
201 std::vector<std::vector<LO>> agg2vertex(numLocalAggregates);
202 auto vertex2AggId = aggregates->GetVertex2AggId()->getData(0);
203 for (
LO i = 0; i < (
LO)numRows; i++) {
206 LO agg = vertex2AggId[i];
207 agg2vertex[agg].push_back(i);
210 typename row_sum_type::host_mirror_type rowSum_h = Kokkos::create_mirror_view(rowSum);
211 for (
LO i = 0; i < numRows; i++) {
215 int agg = vertex2AggId[i];
216 std::vector<LO>& myagg = agg2vertex[agg];
218 size_t nnz = A->getNumEntriesInLocalRow(i);
221 A->getLocalRowView(i, indices, vals);
224 for (
LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
226 if (indices[colidx] < numRows) {
227 for (
LO j = 0; j < (
LO)myagg.size(); j++)
228 if (vertex2AggId[indices[colidx]] == agg)
232 *out <<
"- ADDING col " << indices[colidx] <<
" = " << vals[colidx] << std::endl;
233 mysum += vals[colidx];
235 *out <<
"- NOT ADDING col " << indices[colidx] <<
" = " << vals[colidx] << std::endl;
239 rowSum_h[agg] = mysum;
241 Kokkos::deep_copy(rowSum, rowSum_h);
246 for (
LO i = 0; i < numRows; i++)
247 localOrderingVector[i] = i;
248 if (ordering == O_RANDOM)
250 else if (ordering == O_CUTHILL_MCKEE) {
252 auto localVector = rcmVector->getData(0);
253 for (
LO i = 0; i < numRows; i++)
254 localOrderingVector[i] = localVector[i];
258 numLocalAggregates = 0;
259 numNonAggregatedNodes =
static_cast<LO>(coarseLocalA.numRows());
260 std::vector<LO> localAggStat(numNonAggregatedNodes,
READY);
261 localVertex2AggId.resize(numNonAggregatedNodes, -1);
262 BuildFurtherAggregates(pL, A, localOrderingVector, coarseLocalA, kappa, rowSum,
263 localAggStat, localVertex2AggId,
264 numLocalAggregates, numNonAggregatedNodes);
268 numLocalDirichletNodes = 0;
272 ArrayRCP<LO> vertex2AggId = vertex2AggIdMV->getDataNonConst(0);
273 for (
size_t vertexIdx = 0; vertexIdx < A->getLocalNumRows(); ++vertexIdx) {
274 LO oldAggIdx = vertex2AggId[vertexIdx];
276 vertex2AggId[vertexIdx] = localVertex2AggId[oldAggIdx];
281 GetOStream(
Statistics0) <<
"Iter " << aggregationIter <<
": " << numLocalAggregates <<
" - "
282 << A->getLocalNumRows() / numLocalAggregates << std::endl;
284 aggregates->SetNumAggregates(numLocalAggregates);
285 aggregates->AggregatesCrossProcessors(
false);
286 aggregates->ComputeAggregateSizes(
true );
289 Set(currentLevel,
"Aggregates", aggregates);
290 GetOStream(
Statistics0) << aggregates->description() << std::endl;
293 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
300 std::vector<unsigned>& aggStat,
301 LO& numNonAggregatedNodes,
302 LO& numDirichletNodes)
const {
303 Monitor m(*
this,
"BuildInitialAggregates");
305 using MT =
typename STS::magnitudeType;
309 if (
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
310 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
319 const MT MT_TWO = MT_ONE + MT_ONE;
323 const MT kappa_init = kappa / (kappa - MT_TWO);
324 const LO numRows = aggStat.size();
325 const int myRank = A->getMap()->getComm()->getRank();
329 double tie_criterion = params.
get<
double>(
"aggregation: pairwise: tie threshold");
330 double tie_less = 1.0 - tie_criterion;
331 double tie_more = 1.0 + tie_criterion;
356 for (
LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
357 MT aii = STS::magnitude(D[row]);
358 MT rowsum = AbsRs[row];
360 if (aii >= kappa_init * rowsum) {
361 *out <<
"Flagging index " << row <<
" as dirichlet "
362 "aii >= kappa*rowsum = "
363 << aii <<
" >= " << kappa_init <<
" " << rowsum << std::endl;
365 --numNonAggregatedNodes;
371 LO aggIndex = LO_ZERO;
372 for (
LO i = 0; i < numRows; i++) {
373 LO current_idx = orderingVector[i];
375 if (aggStat[current_idx] !=
READY)
378 MT best_mu = MT_ZERO;
379 LO best_idx = LO_INVALID;
381 size_t nnz = A->getNumEntriesInLocalRow(current_idx);
384 A->getLocalRowView(current_idx, indices, vals);
386 MT aii = STS::real(D[current_idx]);
387 MT si = STS::real(S[current_idx]);
388 for (
LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
390 LO col = indices[colidx];
391 SC val = vals[colidx];
392 if (current_idx == col || col >= numRows || aggStat[col] !=
READY || val == SC_ZERO)
395 MT aij = STS::real(val);
396 MT ajj = STS::real(D[col]);
397 MT sj = -STS::real(S[col]);
398 if (aii - si + ajj - sj >= MT_ZERO) {
400 MT mu_top = MT_TWO / (MT_ONE / aii + MT_ONE / ajj);
401 MT mu_bottom = -aij + MT_ONE / (MT_ONE / (aii - si) + MT_ONE / (ajj - sj));
402 MT mu = mu_top / mu_bottom;
405 if (mu > MT_ZERO && (best_idx == LO_INVALID || mu < best_mu * tie_less ||
406 (mu < best_mu * tie_more && orderingVector[col] < orderingVector[best_idx]))) {
409 *out <<
"[" << current_idx <<
"] Column UPDATED " << col <<
": "
410 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
411 <<
" = " << aii - si + ajj - sj <<
", aij = " << aij <<
", mu = " << mu << std::endl;
413 *out <<
"[" << current_idx <<
"] Column NOT UPDATED " << col <<
": "
414 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
415 <<
" = " << aii - si + ajj - sj <<
", aij = " << aij <<
", mu = " << mu << std::endl;
418 *out <<
"[" << current_idx <<
"] Column FAILED " << col <<
": "
419 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
420 <<
" = " << aii - si + ajj - sj << std::endl;
424 if (best_idx == LO_INVALID) {
425 *out <<
"No node buddy found for index " << current_idx
426 <<
" [agg " << aggIndex <<
"]\n"
432 aggStat[current_idx] =
ONEPT;
433 vertex2AggId[current_idx] = aggIndex;
434 procWinner[current_idx] = myRank;
435 numNonAggregatedNodes--;
440 if (best_mu <= kappa) {
441 *out <<
"Node buddies (" << current_idx <<
"," << best_idx <<
") [agg " << aggIndex <<
"]" << std::endl;
445 vertex2AggId[current_idx] = aggIndex;
446 vertex2AggId[best_idx] = aggIndex;
447 procWinner[current_idx] = myRank;
448 procWinner[best_idx] = myRank;
449 numNonAggregatedNodes -= 2;
453 *out <<
"No buddy found for index " << current_idx <<
","
454 " but aggregating as singleton [agg "
455 << aggIndex <<
"]" << std::endl;
457 aggStat[current_idx] =
ONEPT;
458 vertex2AggId[current_idx] = aggIndex;
459 procWinner[current_idx] = myRank;
460 numNonAggregatedNodes--;
466 *out <<
"vertex2aggid :";
467 for (
int i = 0; i < static_cast<int>(vertex2AggId.
size()); ++i) {
468 *out << i <<
"(" << vertex2AggId[i] <<
")";
476 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
481 const typename Matrix::local_matrix_type& coarseA,
483 #
if KOKKOS_VERSION >= 40799
484 const Kokkos::View<
typename KokkosKernels::ArithTraits<Scalar>::val_type*,
486 const Kokkos::View<
typename Kokkos::ArithTraits<Scalar>::val_type*,
489 typename Matrix::local_matrix_type::device_type>& rowSum,
490 std::vector<LocalOrdinal>& localAggStat,
492 LO& numLocalAggregates,
493 LO& numNonAggregatedNodes)
const {
494 Monitor m(*
this,
"BuildFurtherAggregates");
498 if (
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
499 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
505 using value_type =
typename local_matrix_type::value_type;
506 #if KOKKOS_VERSION >= 40799
507 const value_type KAT_zero = KokkosKernels::ArithTraits<value_type>::zero();
509 const value_type KAT_zero = Kokkos::ArithTraits<value_type>::zero();
518 double tie_criterion = params.
get<
double>(
"aggregation: pairwise: tie threshold");
519 double tie_less = 1.0 - tie_criterion;
520 double tie_more = 1.0 + tie_criterion;
522 typename row_sum_type::host_mirror_type rowSum_h = Kokkos::create_mirror_view(rowSum);
523 Kokkos::deep_copy(rowSum_h, rowSum);
528 const LO numRows =
static_cast<LO>(coarseA.numRows());
529 typename local_matrix_type::values_type::host_mirror_type diagA_h(
"diagA host", numRows);
530 typename local_matrix_type::row_map_type::host_mirror_type row_map_h = Kokkos::create_mirror_view(coarseA.graph.row_map);
531 Kokkos::deep_copy(row_map_h, coarseA.graph.row_map);
532 typename local_matrix_type::index_type::host_mirror_type entries_h = Kokkos::create_mirror_view(coarseA.graph.entries);
533 Kokkos::deep_copy(entries_h, coarseA.graph.entries);
534 typename local_matrix_type::values_type::host_mirror_type values_h = Kokkos::create_mirror_view(coarseA.values);
535 Kokkos::deep_copy(values_h, coarseA.values);
536 for (
LO rowIdx = 0; rowIdx < numRows; ++rowIdx) {
537 for (
LO entryIdx = static_cast<LO>(row_map_h(rowIdx));
538 entryIdx < static_cast<LO>(row_map_h(rowIdx + 1));
540 if (rowIdx == static_cast<LO>(entries_h(entryIdx))) {
541 diagA_h(rowIdx) = values_h(entryIdx);
546 for (
LO currentIdx = 0; currentIdx < numRows; ++currentIdx) {
547 if (localAggStat[currentIdx] !=
READY) {
555 for (
auto entryIdx = row_map_h(currentIdx); entryIdx < row_map_h(currentIdx + 1); ++entryIdx) {
556 const LO colIdx =
static_cast<LO>(entries_h(entryIdx));
557 if (currentIdx == colIdx || colIdx >= numRows || localAggStat[colIdx] !=
READY || values_h(entryIdx) == KAT_zero) {
564 if (aii - si + ajj - sj >= MT_zero) {
565 const magnitude_type mu_top = MT_two / (MT_one / aii + MT_one / ajj);
566 const magnitude_type mu_bottom = -aij + MT_one / (MT_one / (aii - si) + MT_one / (ajj - sj));
570 if (mu > MT_zero && (bestIdx == LO_INVALID || mu < best_mu * tie_less ||
571 (mu < best_mu * tie_more && orderingVector[colIdx] < orderingVector[bestIdx]))) {
574 *out <<
"[" << currentIdx <<
"] Column UPDATED " << colIdx <<
": "
575 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
576 <<
" = " << aii - si + ajj - sj <<
", aij = " << aij <<
" mu = " << mu << std::endl;
578 *out <<
"[" << currentIdx <<
"] Column NOT UPDATED " << colIdx <<
": "
579 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
580 <<
" = " << aii - si + ajj - sj <<
", aij = " << aij <<
", mu = " << mu << std::endl;
583 *out <<
"[" << currentIdx <<
"] Column FAILED " << colIdx <<
": "
584 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
585 <<
" = " << aii - si + ajj - sj << std::endl;
590 localAggStat[currentIdx] =
ONEPT;
591 localVertex2AggID[currentIdx] = numLocalAggregates;
592 --numNonAggregatedNodes;
593 ++numLocalAggregates;
595 if (best_mu <= kappa) {
596 *out <<
"Node buddies (" << currentIdx <<
"," << bestIdx <<
") [agg " << numLocalAggregates <<
"]" << std::endl;
599 localVertex2AggID[currentIdx] = numLocalAggregates;
600 --numNonAggregatedNodes;
603 localVertex2AggID[bestIdx] = numLocalAggregates;
604 --numNonAggregatedNodes;
606 ++numLocalAggregates;
608 *out <<
"No buddy found for index " << currentIdx <<
","
609 " but aggregating as singleton [agg "
610 << numLocalAggregates <<
"]" << std::endl;
612 localAggStat[currentIdx] =
ONEPT;
613 localVertex2AggID[currentIdx] = numLocalAggregates;
614 --numNonAggregatedNodes;
615 ++numLocalAggregates;
622 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
625 typename Matrix::local_matrix_type& onrankA)
const {
626 Monitor m(*
this,
"BuildOnRankLocalMatrix");
630 if (
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
631 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
637 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
638 using values_type =
typename local_matrix_type::values_type;
639 using size_type =
typename local_graph_type::size_type;
640 using col_index_type =
typename local_graph_type::data_type;
641 using array_layout =
typename local_graph_type::array_layout;
642 using memory_traits =
typename local_graph_type::memory_traits;
643 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
644 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
649 const int numRows =
static_cast<int>(localA.numRows());
650 row_pointer_type rowPtr(
"onrankA row pointer", numRows + 1);
651 typename row_pointer_type::host_mirror_type rowPtr_h = Kokkos::create_mirror_view(rowPtr);
652 typename local_graph_type::row_map_type::host_mirror_type origRowPtr_h = Kokkos::create_mirror_view(localA.graph.row_map);
653 typename local_graph_type::entries_type::host_mirror_type origColind_h = Kokkos::create_mirror_view(localA.graph.entries);
654 typename values_type::host_mirror_type origValues_h = Kokkos::create_mirror_view(localA.values);
655 Kokkos::deep_copy(origRowPtr_h, localA.graph.row_map);
656 Kokkos::deep_copy(origColind_h, localA.graph.entries);
657 Kokkos::deep_copy(origValues_h, localA.values);
661 for (
int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
662 for (size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
663 if (origColind_h(entryIdx) < numRows) {
664 rowPtr_h(rowIdx + 1) += 1;
667 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx + 1) + rowPtr_h(rowIdx);
669 Kokkos::deep_copy(rowPtr, rowPtr_h);
671 const LO nnzOnrankA = rowPtr_h(numRows);
674 col_indices_type colInd(
"onrankA column indices", rowPtr_h(numRows));
675 values_type values(
"onrankA values", rowPtr_h(numRows));
676 typename col_indices_type::host_mirror_type colInd_h = Kokkos::create_mirror_view(colInd);
677 typename values_type::host_mirror_type values_h = Kokkos::create_mirror_view(values);
679 for (
int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
681 for (size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
682 if (origColind_h(entryIdx) < numRows) {
683 colInd_h(rowPtr_h(rowIdx) + entriesInRow) = origColind_h(entryIdx);
684 values_h(rowPtr_h(rowIdx) + entriesInRow) = origValues_h(entryIdx);
689 Kokkos::deep_copy(colInd, colInd_h);
690 Kokkos::deep_copy(values, values_h);
693 nnzOnrankA, values, rowPtr, colInd);
696 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
702 typename Matrix::local_matrix_type& intermediateP)
const {
703 Monitor m(*
this,
"BuildIntermediateProlongator");
707 if (
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
708 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
714 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
715 using values_type =
typename local_matrix_type::values_type;
716 using size_type =
typename local_graph_type::size_type;
717 using col_index_type =
typename local_graph_type::data_type;
718 using array_layout =
typename local_graph_type::array_layout;
719 using memory_traits =
typename local_graph_type::memory_traits;
720 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
721 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
725 const int intermediatePnnz = numRows - numDirichletNodes;
726 row_pointer_type rowPtr(
"intermediateP row pointer", numRows + 1);
727 col_indices_type colInd(
"intermediateP column indices", intermediatePnnz);
728 values_type values(
"intermediateP values", intermediatePnnz);
729 typename row_pointer_type::host_mirror_type rowPtr_h = Kokkos::create_mirror_view(rowPtr);
730 typename col_indices_type::host_mirror_type colInd_h = Kokkos::create_mirror_view(colInd);
733 for (
int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
735 if (localVertex2AggID[rowIdx] == LO_INVALID) {
736 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx);
738 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx) + 1;
739 colInd_h(rowPtr_h(rowIdx)) = localVertex2AggID[rowIdx];
743 Kokkos::deep_copy(rowPtr, rowPtr_h);
744 Kokkos::deep_copy(colInd, colInd_h);
745 #if KOKKOS_VERSION >= 40799
746 Kokkos::deep_copy(values, KokkosKernels::ArithTraits<typename values_type::value_type>::one());
748 Kokkos::deep_copy(values, Kokkos::ArithTraits<typename values_type::value_type>::one());
752 numRows, numLocalAggregates, intermediatePnnz,
753 values, rowPtr, colInd);
756 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
759 typename Matrix::local_matrix_type& coarseA)
const {
760 Monitor m(*
this,
"BuildCoarseLocalMatrix");
762 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
763 using values_type =
typename local_matrix_type::values_type;
764 using size_type =
typename local_graph_type::size_type;
765 using col_index_type =
typename local_graph_type::data_type;
766 using array_layout =
typename local_graph_type::array_layout;
767 using memory_traits =
typename local_graph_type::memory_traits;
768 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
769 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
772 localSpGEMM(coarseA, intermediateP,
"AP", AP);
785 row_pointer_type rowPtrPt(Kokkos::ViewAllocateWithoutInitializing(
"Pt row pointer"),
786 intermediateP.numCols() + 1);
787 col_indices_type colIndPt(Kokkos::ViewAllocateWithoutInitializing(
"Pt column indices"),
788 intermediateP.nnz());
789 values_type valuesPt(Kokkos::ViewAllocateWithoutInitializing(
"Pt values"),
790 intermediateP.nnz());
792 typename row_pointer_type::host_mirror_type rowPtrPt_h = Kokkos::create_mirror_view(rowPtrPt);
793 typename col_indices_type::host_mirror_type entries_h = Kokkos::create_mirror_view(intermediateP.graph.entries);
794 Kokkos::deep_copy(entries_h, intermediateP.graph.entries);
795 Kokkos::deep_copy(rowPtrPt_h, 0);
796 for (size_type entryIdx = 0; entryIdx < intermediateP.nnz(); ++entryIdx) {
797 rowPtrPt_h(entries_h(entryIdx) + 1) += 1;
799 for (
LO rowIdx = 0; rowIdx < intermediateP.numCols(); ++rowIdx) {
800 rowPtrPt_h(rowIdx + 1) += rowPtrPt_h(rowIdx);
802 Kokkos::deep_copy(rowPtrPt, rowPtrPt_h);
804 typename row_pointer_type::host_mirror_type rowPtrP_h = Kokkos::create_mirror_view(intermediateP.graph.row_map);
805 Kokkos::deep_copy(rowPtrP_h, intermediateP.graph.row_map);
806 typename col_indices_type::host_mirror_type colIndP_h = Kokkos::create_mirror_view(intermediateP.graph.entries);
807 Kokkos::deep_copy(colIndP_h, intermediateP.graph.entries);
808 typename values_type::host_mirror_type valuesP_h = Kokkos::create_mirror_view(intermediateP.values);
809 Kokkos::deep_copy(valuesP_h, intermediateP.values);
810 typename col_indices_type::host_mirror_type colIndPt_h = Kokkos::create_mirror_view(colIndPt);
811 typename values_type::host_mirror_type valuesPt_h = Kokkos::create_mirror_view(valuesPt);
812 const col_index_type invalidColumnIndex = KokkosSparse::OrdinalTraits<col_index_type>::invalid();
813 Kokkos::deep_copy(colIndPt_h, invalidColumnIndex);
815 col_index_type colIdx = 0;
816 for (
LO rowIdx = 0; rowIdx < intermediateP.numRows(); ++rowIdx) {
817 for (size_type entryIdxP = rowPtrP_h(rowIdx); entryIdxP < rowPtrP_h(rowIdx + 1); ++entryIdxP) {
818 colIdx = entries_h(entryIdxP);
819 for (size_type entryIdxPt = rowPtrPt_h(colIdx); entryIdxPt < rowPtrPt_h(colIdx + 1); ++entryIdxPt) {
820 if (colIndPt_h(entryIdxPt) == invalidColumnIndex) {
821 colIndPt_h(entryIdxPt) = rowIdx;
822 valuesPt_h(entryIdxPt) = valuesP_h(entryIdxP);
829 Kokkos::deep_copy(colIndPt, colIndPt_h);
830 Kokkos::deep_copy(valuesPt, valuesPt_h);
833 intermediateP.numCols(),
834 intermediateP.numRows(),
836 valuesPt, rowPtrPt, colIndPt);
839 localSpGEMM(intermediatePt, AP,
"coarseA", coarseA);
842 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
845 const typename Matrix::local_matrix_type& B,
846 const std::string matrixLabel,
847 typename Matrix::local_matrix_type& C)
const {
848 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
849 using values_type =
typename local_matrix_type::values_type;
850 using size_type =
typename local_graph_type::size_type;
851 using col_index_type =
typename local_graph_type::data_type;
852 using array_layout =
typename local_graph_type::array_layout;
853 using memory_space =
typename device_type::memory_space;
854 using memory_traits =
typename local_graph_type::memory_traits;
855 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
856 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
859 int team_work_size = 16;
860 std::string myalg(
"SPGEMM_KK_MEMORY");
861 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
862 KokkosKernels::Experimental::KokkosKernelsHandle<
typename row_pointer_type::const_value_type,
863 typename col_indices_type::const_value_type,
864 typename values_type::const_value_type,
869 kh.create_spgemm_handle(alg_enum);
870 kh.set_team_work_size(team_work_size);
873 row_pointer_type rowPtrC(Kokkos::ViewAllocateWithoutInitializing(
"C row pointer"),
875 col_indices_type colIndC;
879 KokkosSparse::spgemm_symbolic(&kh, A.numRows(),
880 B.numRows(), B.numCols(),
881 A.graph.row_map, A.graph.entries,
false,
882 B.graph.row_map, B.graph.entries,
false,
886 size_t nnzC = kh.get_spgemm_handle()->get_c_nnz();
888 colIndC = col_indices_type(Kokkos::ViewAllocateWithoutInitializing(
"C column inds"), nnzC);
889 valuesC = values_type(Kokkos::ViewAllocateWithoutInitializing(
"C values"), nnzC);
893 KokkosSparse::spgemm_numeric(&kh, A.numRows(),
894 B.numRows(), B.numCols(),
895 A.graph.row_map, A.graph.entries, A.values,
false,
896 B.graph.row_map, B.graph.entries, B.values,
false,
897 rowPtrC, colIndC, valuesC);
898 kh.destroy_spgemm_handle();
900 C =
local_matrix_type(matrixLabel, A.numRows(), B.numCols(), nnzC, valuesC, rowPtrC, colIndC);
LocalOrdinal RandomOrdinal(LocalOrdinal min, LocalOrdinal max)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
MueLu::DefaultLocalOrdinal LocalOrdinal
void Build(Level ¤tLevel) const
Build aggregates.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
Container class for aggregation information.
typename Matrix::local_matrix_type local_matrix_type
typename Teuchos::ScalarTraits< Scalar >::magnitudeType magnitude_type
basic_FancyOStream & setShowProcRank(const bool showProcRank)
T & get(const std::string &name, T def_value)
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
static RCP< Vector > GetMatrixOverlappedDeletedRowsum(const Matrix &A)
Extract Overlapped Matrix Deleted Rowsum.
static magnitudeType real(T a)
void BuildInitialAggregates(const Teuchos::ParameterList ¶ms, const RCP< const Matrix > &A, const ArrayView< const LO > &orderingVector, const magnitude_type kappa, Aggregates &aggregates, std::vector< unsigned > &aggStat, LO &numNonAggregatedNodes, LO &numDirichletNodes) const
Initial aggregation phase.
void DeclareInput(Level ¤tLevel) const
Input.
void BuildIntermediateProlongator(const LO numRows, const LO numDirichletNodes, const LO numLocalAggregates, const ArrayView< const LO > &localVertex2AggID, local_matrix_type &intermediateP) const
Construction of a local prolongator with values equal to 1.0.
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
static RCP< Xpetra::Vector< LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node > > CuthillMcKee(const Matrix &Op)
void BuildFurtherAggregates(const Teuchos::ParameterList ¶ms, const RCP< const Matrix > &A, const Teuchos::ArrayView< const LO > &orderingVector, const local_matrix_type &coarseA, const magnitude_type kappa, const row_sum_type &rowSum, std::vector< LO > &localAggStat, Array< LO > &localVertex2AggID, LO &numLocalAggregates, LO &numNonAggregatedNodes) const
Further aggregation phase increases coarsening rate by a factor of ~2 per iteration.
bool isParameter(const std::string &name) const
Print statistics that do not involve significant additional computation.
void BuildCoarseLocalMatrix(const local_matrix_type &intermediateP, local_matrix_type &coarseA) const
Implementation of a local Galerkin projection called inside BuildFurtherAggregates.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
typename device_type::execution_space execution_space
static RCP< Xpetra::Vector< Magnitude, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixOverlappedAbsDeletedRowsum(const Matrix &A)
void BuildOnRankLocalMatrix(const local_matrix_type &localA, local_matrix_type &onRankA) const
basic_FancyOStream & setShowAllFrontMatter(const bool showAllFrontMatter)
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
static RCP< Vector > GetMatrixOverlappedDiagonal(const Matrix &A)
Extract Overlapped Matrix Diagonal.
void localSpGEMM(const local_matrix_type &A, const local_matrix_type &B, const std::string matrixLabel, local_matrix_type &C) const
Wrapper for kokkos-kernels' spgemm that takes in CrsMatrix.
Timer to be used in non-factories.
void RandomReorder(Teuchos::Array< LocalOrdinal > &list)
#define SET_VALID_ENTRY(name)
Exception throws to report errors in the internal logical of the program.
typename Kokkos::View< impl_scalar_type *, Kokkos::LayoutLeft, device_type > row_sum_type
void SetNumAggregates(LO nAggregates)
Set number of local aggregates on current processor.