10 #ifndef MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_
11 #define MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_
13 #include <Xpetra_Map.hpp>
15 #include <Xpetra_MultiVectorFactory.hpp>
16 #include <Xpetra_MapFactory.hpp>
19 #include "KokkosKernels_Handle.hpp"
20 #include "KokkosSparse_spgemm.hpp"
21 #include "KokkosSparse_spmv.hpp"
25 #include "MueLu_Aggregates.hpp"
26 #include "MueLu_LWGraph.hpp"
27 #include "MueLu_LWGraph_kokkos.hpp"
32 #include "MueLu_Utilities.hpp"
36 namespace NotayUtils {
37 template <
class LocalOrdinal>
39 return min + as<LocalOrdinal>((max - min + 1) * (static_cast<double>(std::rand()) / (RAND_MAX + 1.0)));
42 template <
class LocalOrdinal>
45 LO n = Teuchos::as<LO>(list.
size());
46 for (LO i = 0; i < n - 1; i++)
51 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
55 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
60 #undef SET_VALID_ENTRY
65 validParamList->
set<
RCP<const FactoryBase>>(
"DofsPerNode", null,
"Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
67 return validParamList;
70 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
72 Input(currentLevel,
"A");
73 Input(currentLevel,
"Graph");
74 Input(currentLevel,
"DofsPerNode");
77 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
81 using MT =
typename STS::magnitudeType;
86 if (
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
87 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
95 const MT kappa =
static_cast<MT
>(pL.
get<
double>(
"aggregation: Dirichlet threshold"));
98 "Pairwise requires kappa > 2"
99 " otherwise all rows are considered as Dirichlet rows.");
104 maxNumIter = pL.
get<
int>(
"aggregation: pairwise: size");
107 "NotayAggregationFactory::Build(): \"aggregation: pairwise: size\""
108 " must be a strictly positive integer");
112 graph = Get<RCP<LWGraph>>(currentLevel,
"Graph");
114 auto graph_k = Get<RCP<LWGraph_kokkos>>(currentLevel,
"Graph");
115 graph = graph_k->copyToHost();
121 aggregates->setObjectLabel(
"PW");
126 std::vector<unsigned> aggStat(numRows,
READY);
128 const int DofsPerNode = Get<int>(currentLevel,
"DofsPerNode");
130 "Pairwise only supports one dof per node");
137 std::string orderingStr = pL.
get<std::string>(
"aggregation: ordering");
144 ordering = O_NATURAL;
145 if (orderingStr ==
"random")
147 else if (orderingStr ==
"natural") {
148 }
else if (orderingStr ==
"cuthill-mckee" || orderingStr ==
"cm")
149 ordering = O_CUTHILL_MCKEE;
159 for (
LO i = 0; i < numRows; i++)
160 orderingVector[i] = i;
161 if (ordering == O_RANDOM)
163 else if (ordering == O_CUTHILL_MCKEE) {
165 auto localVector = rcmVector->getData(0);
166 for (
LO i = 0; i < numRows; i++)
167 orderingVector[i] = localVector[i];
171 LO numNonAggregatedNodes = numRows, numDirichletNodes = 0;
172 BuildInitialAggregates(pL, A, orderingVector(), kappa,
173 *aggregates, aggStat, numNonAggregatedNodes, numDirichletNodes);
175 "Initial pairwise aggregation failed to aggregate all nodes");
176 LO numLocalAggregates = aggregates->GetNumAggregates();
177 GetOStream(
Statistics0) <<
"Init : " << numLocalAggregates <<
" - "
178 << A->getLocalNumRows() / numLocalAggregates << std::endl;
187 LO numLocalDirichletNodes = numDirichletNodes;
188 Array<LO> localVertex2AggId(aggregates->GetVertex2AggId()->getData(0).view(0, numRows));
189 BuildOnRankLocalMatrix(A->getLocalMatrixDevice(), coarseLocalA);
190 for (
LO aggregationIter = 1; aggregationIter < maxNumIter; ++aggregationIter) {
192 BuildIntermediateProlongator(coarseLocalA.numRows(), numLocalDirichletNodes, numLocalAggregates,
193 localVertex2AggId(), intermediateP);
196 BuildCoarseLocalMatrix(intermediateP, coarseLocalA);
201 std::vector<std::vector<LO>> agg2vertex(numLocalAggregates);
202 auto vertex2AggId = aggregates->GetVertex2AggId()->getData(0);
203 for (
LO i = 0; i < (
LO)numRows; i++) {
206 LO agg = vertex2AggId[i];
207 agg2vertex[agg].push_back(i);
210 typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum);
211 for (
LO i = 0; i < numRows; i++) {
215 int agg = vertex2AggId[i];
216 std::vector<LO>& myagg = agg2vertex[agg];
218 size_t nnz = A->getNumEntriesInLocalRow(i);
221 A->getLocalRowView(i, indices, vals);
224 for (
LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
226 if (indices[colidx] < numRows) {
227 for (
LO j = 0; j < (
LO)myagg.size(); j++)
228 if (vertex2AggId[indices[colidx]] == agg)
232 *out <<
"- ADDING col " << indices[colidx] <<
" = " << vals[colidx] << std::endl;
233 mysum += vals[colidx];
235 *out <<
"- NOT ADDING col " << indices[colidx] <<
" = " << vals[colidx] << std::endl;
239 rowSum_h[agg] = mysum;
241 Kokkos::deep_copy(rowSum, rowSum_h);
246 for (
LO i = 0; i < numRows; i++)
247 localOrderingVector[i] = i;
248 if (ordering == O_RANDOM)
250 else if (ordering == O_CUTHILL_MCKEE) {
252 auto localVector = rcmVector->getData(0);
253 for (
LO i = 0; i < numRows; i++)
254 localOrderingVector[i] = localVector[i];
258 numLocalAggregates = 0;
259 numNonAggregatedNodes =
static_cast<LO>(coarseLocalA.numRows());
260 std::vector<LO> localAggStat(numNonAggregatedNodes,
READY);
261 localVertex2AggId.resize(numNonAggregatedNodes, -1);
262 BuildFurtherAggregates(pL, A, localOrderingVector, coarseLocalA, kappa, rowSum,
263 localAggStat, localVertex2AggId,
264 numLocalAggregates, numNonAggregatedNodes);
268 numLocalDirichletNodes = 0;
272 ArrayRCP<LO> vertex2AggId = vertex2AggIdMV->getDataNonConst(0);
273 for (
size_t vertexIdx = 0; vertexIdx < A->getLocalNumRows(); ++vertexIdx) {
274 LO oldAggIdx = vertex2AggId[vertexIdx];
276 vertex2AggId[vertexIdx] = localVertex2AggId[oldAggIdx];
281 GetOStream(
Statistics0) <<
"Iter " << aggregationIter <<
": " << numLocalAggregates <<
" - "
282 << A->getLocalNumRows() / numLocalAggregates << std::endl;
284 aggregates->SetNumAggregates(numLocalAggregates);
285 aggregates->AggregatesCrossProcessors(
false);
286 aggregates->ComputeAggregateSizes(
true );
289 Set(currentLevel,
"Aggregates", aggregates);
290 GetOStream(
Statistics0) << aggregates->description() << std::endl;
293 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
300 std::vector<unsigned>& aggStat,
301 LO& numNonAggregatedNodes,
302 LO& numDirichletNodes)
const {
303 Monitor m(*
this,
"BuildInitialAggregates");
305 using MT =
typename STS::magnitudeType;
309 if (
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
310 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
319 const MT MT_TWO = MT_ONE + MT_ONE;
323 const MT kappa_init = kappa / (kappa - MT_TWO);
324 const LO numRows = aggStat.size();
325 const int myRank = A->getMap()->getComm()->getRank();
329 double tie_criterion = params.
get<
double>(
"aggregation: pairwise: tie threshold");
330 double tie_less = 1.0 - tie_criterion;
331 double tie_more = 1.0 + tie_criterion;
356 for (
LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
357 MT aii = STS::magnitude(D[row]);
358 MT rowsum = AbsRs[row];
360 if (aii >= kappa_init * rowsum) {
361 *out <<
"Flagging index " << row <<
" as dirichlet "
362 "aii >= kappa*rowsum = "
363 << aii <<
" >= " << kappa_init <<
" " << rowsum << std::endl;
365 --numNonAggregatedNodes;
371 LO aggIndex = LO_ZERO;
372 for (
LO i = 0; i < numRows; i++) {
373 LO current_idx = orderingVector[i];
375 if (aggStat[current_idx] !=
READY)
378 MT best_mu = MT_ZERO;
379 LO best_idx = LO_INVALID;
381 size_t nnz = A->getNumEntriesInLocalRow(current_idx);
384 A->getLocalRowView(current_idx, indices, vals);
386 MT aii = STS::real(D[current_idx]);
387 MT si = STS::real(S[current_idx]);
388 for (
LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
390 LO col = indices[colidx];
391 SC val = vals[colidx];
392 if (current_idx == col || col >= numRows || aggStat[col] !=
READY || val == SC_ZERO)
395 MT aij = STS::real(val);
396 MT ajj = STS::real(D[col]);
397 MT sj = -STS::real(S[col]);
398 if (aii - si + ajj - sj >= MT_ZERO) {
400 MT mu_top = MT_TWO / (MT_ONE / aii + MT_ONE / ajj);
401 MT mu_bottom = -aij + MT_ONE / (MT_ONE / (aii - si) + MT_ONE / (ajj - sj));
402 MT mu = mu_top / mu_bottom;
405 if (mu > MT_ZERO && (best_idx == LO_INVALID || mu < best_mu * tie_less ||
406 (mu < best_mu * tie_more && orderingVector[col] < orderingVector[best_idx]))) {
409 *out <<
"[" << current_idx <<
"] Column UPDATED " << col <<
": "
410 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
411 <<
" = " << aii - si + ajj - sj <<
", aij = " << aij <<
", mu = " << mu << std::endl;
413 *out <<
"[" << current_idx <<
"] Column NOT UPDATED " << col <<
": "
414 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
415 <<
" = " << aii - si + ajj - sj <<
", aij = " << aij <<
", mu = " << mu << std::endl;
418 *out <<
"[" << current_idx <<
"] Column FAILED " << col <<
": "
419 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
420 <<
" = " << aii - si + ajj - sj << std::endl;
424 if (best_idx == LO_INVALID) {
425 *out <<
"No node buddy found for index " << current_idx
426 <<
" [agg " << aggIndex <<
"]\n"
432 aggStat[current_idx] =
ONEPT;
433 vertex2AggId[current_idx] = aggIndex;
434 procWinner[current_idx] = myRank;
435 numNonAggregatedNodes--;
440 if (best_mu <= kappa) {
441 *out <<
"Node buddies (" << current_idx <<
"," << best_idx <<
") [agg " << aggIndex <<
"]" << std::endl;
445 vertex2AggId[current_idx] = aggIndex;
446 vertex2AggId[best_idx] = aggIndex;
447 procWinner[current_idx] = myRank;
448 procWinner[best_idx] = myRank;
449 numNonAggregatedNodes -= 2;
453 *out <<
"No buddy found for index " << current_idx <<
","
454 " but aggregating as singleton [agg "
455 << aggIndex <<
"]" << std::endl;
457 aggStat[current_idx] =
ONEPT;
458 vertex2AggId[current_idx] = aggIndex;
459 procWinner[current_idx] = myRank;
460 numNonAggregatedNodes--;
466 *out <<
"vertex2aggid :";
467 for (
int i = 0; i < static_cast<int>(vertex2AggId.
size()); ++i) {
468 *out << i <<
"(" << vertex2AggId[i] <<
")";
476 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
481 const typename Matrix::local_matrix_type& coarseA,
483 const Kokkos::View<
typename Kokkos::ArithTraits<Scalar>::val_type*,
485 typename Matrix::local_matrix_type::device_type>& rowSum,
486 std::vector<LocalOrdinal>& localAggStat,
488 LO& numLocalAggregates,
489 LO& numNonAggregatedNodes)
const {
490 Monitor m(*
this,
"BuildFurtherAggregates");
494 if (
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
495 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
501 using value_type =
typename local_matrix_type::value_type;
502 const value_type KAT_zero = Kokkos::ArithTraits<value_type>::zero();
510 double tie_criterion = params.
get<
double>(
"aggregation: pairwise: tie threshold");
511 double tie_less = 1.0 - tie_criterion;
512 double tie_more = 1.0 + tie_criterion;
514 typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum);
515 Kokkos::deep_copy(rowSum_h, rowSum);
520 const LO numRows =
static_cast<LO>(coarseA.numRows());
521 typename local_matrix_type::values_type::HostMirror diagA_h(
"diagA host", numRows);
522 typename local_matrix_type::row_map_type::HostMirror row_map_h = Kokkos::create_mirror_view(coarseA.graph.row_map);
523 Kokkos::deep_copy(row_map_h, coarseA.graph.row_map);
524 typename local_matrix_type::index_type::HostMirror entries_h = Kokkos::create_mirror_view(coarseA.graph.entries);
525 Kokkos::deep_copy(entries_h, coarseA.graph.entries);
526 typename local_matrix_type::values_type::HostMirror values_h = Kokkos::create_mirror_view(coarseA.values);
527 Kokkos::deep_copy(values_h, coarseA.values);
528 for (
LO rowIdx = 0; rowIdx < numRows; ++rowIdx) {
529 for (
LO entryIdx = static_cast<LO>(row_map_h(rowIdx));
530 entryIdx < static_cast<LO>(row_map_h(rowIdx + 1));
532 if (rowIdx == static_cast<LO>(entries_h(entryIdx))) {
533 diagA_h(rowIdx) = values_h(entryIdx);
538 for (
LO currentIdx = 0; currentIdx < numRows; ++currentIdx) {
539 if (localAggStat[currentIdx] !=
READY) {
547 for (
auto entryIdx = row_map_h(currentIdx); entryIdx < row_map_h(currentIdx + 1); ++entryIdx) {
548 const LO colIdx =
static_cast<LO>(entries_h(entryIdx));
549 if (currentIdx == colIdx || colIdx >= numRows || localAggStat[colIdx] !=
READY || values_h(entryIdx) == KAT_zero) {
556 if (aii - si + ajj - sj >= MT_zero) {
557 const magnitude_type mu_top = MT_two / (MT_one / aii + MT_one / ajj);
558 const magnitude_type mu_bottom = -aij + MT_one / (MT_one / (aii - si) + MT_one / (ajj - sj));
562 if (mu > MT_zero && (bestIdx == LO_INVALID || mu < best_mu * tie_less ||
563 (mu < best_mu * tie_more && orderingVector[colIdx] < orderingVector[bestIdx]))) {
566 *out <<
"[" << currentIdx <<
"] Column UPDATED " << colIdx <<
": "
567 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
568 <<
" = " << aii - si + ajj - sj <<
", aij = " << aij <<
" mu = " << mu << std::endl;
570 *out <<
"[" << currentIdx <<
"] Column NOT UPDATED " << colIdx <<
": "
571 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
572 <<
" = " << aii - si + ajj - sj <<
", aij = " << aij <<
", mu = " << mu << std::endl;
575 *out <<
"[" << currentIdx <<
"] Column FAILED " << colIdx <<
": "
576 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
577 <<
" = " << aii - si + ajj - sj << std::endl;
582 localAggStat[currentIdx] =
ONEPT;
583 localVertex2AggID[currentIdx] = numLocalAggregates;
584 --numNonAggregatedNodes;
585 ++numLocalAggregates;
587 if (best_mu <= kappa) {
588 *out <<
"Node buddies (" << currentIdx <<
"," << bestIdx <<
") [agg " << numLocalAggregates <<
"]" << std::endl;
591 localVertex2AggID[currentIdx] = numLocalAggregates;
592 --numNonAggregatedNodes;
595 localVertex2AggID[bestIdx] = numLocalAggregates;
596 --numNonAggregatedNodes;
598 ++numLocalAggregates;
600 *out <<
"No buddy found for index " << currentIdx <<
","
601 " but aggregating as singleton [agg "
602 << numLocalAggregates <<
"]" << std::endl;
604 localAggStat[currentIdx] =
ONEPT;
605 localVertex2AggID[currentIdx] = numLocalAggregates;
606 --numNonAggregatedNodes;
607 ++numLocalAggregates;
614 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
617 typename Matrix::local_matrix_type& onrankA)
const {
618 Monitor m(*
this,
"BuildOnRankLocalMatrix");
622 if (
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
623 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
629 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
630 using values_type =
typename local_matrix_type::values_type;
631 using size_type =
typename local_graph_type::size_type;
632 using col_index_type =
typename local_graph_type::data_type;
633 using array_layout =
typename local_graph_type::array_layout;
634 using memory_traits =
typename local_graph_type::memory_traits;
635 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
636 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
641 const int numRows =
static_cast<int>(localA.numRows());
642 row_pointer_type rowPtr(
"onrankA row pointer", numRows + 1);
643 typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr);
644 typename local_graph_type::row_map_type::HostMirror origRowPtr_h = Kokkos::create_mirror_view(localA.graph.row_map);
645 typename local_graph_type::entries_type::HostMirror origColind_h = Kokkos::create_mirror_view(localA.graph.entries);
646 typename values_type::HostMirror origValues_h = Kokkos::create_mirror_view(localA.values);
647 Kokkos::deep_copy(origRowPtr_h, localA.graph.row_map);
648 Kokkos::deep_copy(origColind_h, localA.graph.entries);
649 Kokkos::deep_copy(origValues_h, localA.values);
653 for (
int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
654 for (size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
655 if (origColind_h(entryIdx) < numRows) {
656 rowPtr_h(rowIdx + 1) += 1;
659 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx + 1) + rowPtr_h(rowIdx);
661 Kokkos::deep_copy(rowPtr, rowPtr_h);
663 const LO nnzOnrankA = rowPtr_h(numRows);
666 col_indices_type colInd(
"onrankA column indices", rowPtr_h(numRows));
667 values_type values(
"onrankA values", rowPtr_h(numRows));
668 typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd);
669 typename values_type::HostMirror values_h = Kokkos::create_mirror_view(values);
671 for (
int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
673 for (size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
674 if (origColind_h(entryIdx) < numRows) {
675 colInd_h(rowPtr_h(rowIdx) + entriesInRow) = origColind_h(entryIdx);
676 values_h(rowPtr_h(rowIdx) + entriesInRow) = origValues_h(entryIdx);
681 Kokkos::deep_copy(colInd, colInd_h);
682 Kokkos::deep_copy(values, values_h);
685 nnzOnrankA, values, rowPtr, colInd);
688 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
694 typename Matrix::local_matrix_type& intermediateP)
const {
695 Monitor m(*
this,
"BuildIntermediateProlongator");
699 if (
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
700 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
706 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
707 using values_type =
typename local_matrix_type::values_type;
708 using size_type =
typename local_graph_type::size_type;
709 using col_index_type =
typename local_graph_type::data_type;
710 using array_layout =
typename local_graph_type::array_layout;
711 using memory_traits =
typename local_graph_type::memory_traits;
712 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
713 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
717 const int intermediatePnnz = numRows - numDirichletNodes;
718 row_pointer_type rowPtr(
"intermediateP row pointer", numRows + 1);
719 col_indices_type colInd(
"intermediateP column indices", intermediatePnnz);
720 values_type values(
"intermediateP values", intermediatePnnz);
721 typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr);
722 typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd);
725 for (
int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
727 if (localVertex2AggID[rowIdx] == LO_INVALID) {
728 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx);
730 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx) + 1;
731 colInd_h(rowPtr_h(rowIdx)) = localVertex2AggID[rowIdx];
735 Kokkos::deep_copy(rowPtr, rowPtr_h);
736 Kokkos::deep_copy(colInd, colInd_h);
737 Kokkos::deep_copy(values, Kokkos::ArithTraits<typename values_type::value_type>::one());
740 numRows, numLocalAggregates, intermediatePnnz,
741 values, rowPtr, colInd);
744 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
747 typename Matrix::local_matrix_type& coarseA)
const {
748 Monitor m(*
this,
"BuildCoarseLocalMatrix");
750 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
751 using values_type =
typename local_matrix_type::values_type;
752 using size_type =
typename local_graph_type::size_type;
753 using col_index_type =
typename local_graph_type::data_type;
754 using array_layout =
typename local_graph_type::array_layout;
755 using memory_traits =
typename local_graph_type::memory_traits;
756 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
757 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
760 localSpGEMM(coarseA, intermediateP,
"AP", AP);
773 row_pointer_type rowPtrPt(Kokkos::ViewAllocateWithoutInitializing(
"Pt row pointer"),
774 intermediateP.numCols() + 1);
775 col_indices_type colIndPt(Kokkos::ViewAllocateWithoutInitializing(
"Pt column indices"),
776 intermediateP.nnz());
777 values_type valuesPt(Kokkos::ViewAllocateWithoutInitializing(
"Pt values"),
778 intermediateP.nnz());
780 typename row_pointer_type::HostMirror rowPtrPt_h = Kokkos::create_mirror_view(rowPtrPt);
781 typename col_indices_type::HostMirror entries_h = Kokkos::create_mirror_view(intermediateP.graph.entries);
782 Kokkos::deep_copy(entries_h, intermediateP.graph.entries);
783 Kokkos::deep_copy(rowPtrPt_h, 0);
784 for (size_type entryIdx = 0; entryIdx < intermediateP.nnz(); ++entryIdx) {
785 rowPtrPt_h(entries_h(entryIdx) + 1) += 1;
787 for (
LO rowIdx = 0; rowIdx < intermediateP.numCols(); ++rowIdx) {
788 rowPtrPt_h(rowIdx + 1) += rowPtrPt_h(rowIdx);
790 Kokkos::deep_copy(rowPtrPt, rowPtrPt_h);
792 typename row_pointer_type::HostMirror rowPtrP_h = Kokkos::create_mirror_view(intermediateP.graph.row_map);
793 Kokkos::deep_copy(rowPtrP_h, intermediateP.graph.row_map);
794 typename col_indices_type::HostMirror colIndP_h = Kokkos::create_mirror_view(intermediateP.graph.entries);
795 Kokkos::deep_copy(colIndP_h, intermediateP.graph.entries);
796 typename values_type::HostMirror valuesP_h = Kokkos::create_mirror_view(intermediateP.values);
797 Kokkos::deep_copy(valuesP_h, intermediateP.values);
798 typename col_indices_type::HostMirror colIndPt_h = Kokkos::create_mirror_view(colIndPt);
799 typename values_type::HostMirror valuesPt_h = Kokkos::create_mirror_view(valuesPt);
800 const col_index_type invalidColumnIndex = KokkosSparse::OrdinalTraits<col_index_type>::invalid();
801 Kokkos::deep_copy(colIndPt_h, invalidColumnIndex);
803 col_index_type colIdx = 0;
804 for (
LO rowIdx = 0; rowIdx < intermediateP.numRows(); ++rowIdx) {
805 for (size_type entryIdxP = rowPtrP_h(rowIdx); entryIdxP < rowPtrP_h(rowIdx + 1); ++entryIdxP) {
806 colIdx = entries_h(entryIdxP);
807 for (size_type entryIdxPt = rowPtrPt_h(colIdx); entryIdxPt < rowPtrPt_h(colIdx + 1); ++entryIdxPt) {
808 if (colIndPt_h(entryIdxPt) == invalidColumnIndex) {
809 colIndPt_h(entryIdxPt) = rowIdx;
810 valuesPt_h(entryIdxPt) = valuesP_h(entryIdxP);
817 Kokkos::deep_copy(colIndPt, colIndPt_h);
818 Kokkos::deep_copy(valuesPt, valuesPt_h);
821 intermediateP.numCols(),
822 intermediateP.numRows(),
824 valuesPt, rowPtrPt, colIndPt);
827 localSpGEMM(intermediatePt, AP,
"coarseA", coarseA);
830 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
833 const typename Matrix::local_matrix_type& B,
834 const std::string matrixLabel,
835 typename Matrix::local_matrix_type& C)
const {
836 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
837 using values_type =
typename local_matrix_type::values_type;
838 using size_type =
typename local_graph_type::size_type;
839 using col_index_type =
typename local_graph_type::data_type;
840 using array_layout =
typename local_graph_type::array_layout;
841 using memory_space =
typename device_type::memory_space;
842 using memory_traits =
typename local_graph_type::memory_traits;
843 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
844 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
847 int team_work_size = 16;
848 std::string myalg(
"SPGEMM_KK_MEMORY");
849 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
850 KokkosKernels::Experimental::KokkosKernelsHandle<
typename row_pointer_type::const_value_type,
851 typename col_indices_type::const_value_type,
852 typename values_type::const_value_type,
857 kh.create_spgemm_handle(alg_enum);
858 kh.set_team_work_size(team_work_size);
861 row_pointer_type rowPtrC(Kokkos::ViewAllocateWithoutInitializing(
"C row pointer"),
863 col_indices_type colIndC;
867 KokkosSparse::Experimental::spgemm_symbolic(&kh, A.numRows(),
868 B.numRows(), B.numCols(),
869 A.graph.row_map, A.graph.entries,
false,
870 B.graph.row_map, B.graph.entries,
false,
874 size_t nnzC = kh.get_spgemm_handle()->get_c_nnz();
876 colIndC = col_indices_type(Kokkos::ViewAllocateWithoutInitializing(
"C column inds"), nnzC);
877 valuesC = values_type(Kokkos::ViewAllocateWithoutInitializing(
"C values"), nnzC);
881 KokkosSparse::Experimental::spgemm_numeric(&kh, A.numRows(),
882 B.numRows(), B.numCols(),
883 A.graph.row_map, A.graph.entries, A.values,
false,
884 B.graph.row_map, B.graph.entries, B.values,
false,
885 rowPtrC, colIndC, valuesC);
886 kh.destroy_spgemm_handle();
888 C =
local_matrix_type(matrixLabel, A.numRows(), B.numCols(), nnzC, valuesC, rowPtrC, colIndC);
LocalOrdinal RandomOrdinal(LocalOrdinal min, LocalOrdinal max)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
MueLu::DefaultLocalOrdinal LocalOrdinal
void Build(Level ¤tLevel) const
Build aggregates.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
Container class for aggregation information.
typename Matrix::local_matrix_type local_matrix_type
typename Teuchos::ScalarTraits< Scalar >::magnitudeType magnitude_type
basic_FancyOStream & setShowProcRank(const bool showProcRank)
T & get(const std::string &name, T def_value)
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
static RCP< Vector > GetMatrixOverlappedDeletedRowsum(const Matrix &A)
Extract Overlapped Matrix Deleted Rowsum.
static magnitudeType real(T a)
void BuildInitialAggregates(const Teuchos::ParameterList ¶ms, const RCP< const Matrix > &A, const ArrayView< const LO > &orderingVector, const magnitude_type kappa, Aggregates &aggregates, std::vector< unsigned > &aggStat, LO &numNonAggregatedNodes, LO &numDirichletNodes) const
Initial aggregation phase.
void DeclareInput(Level ¤tLevel) const
Input.
void BuildIntermediateProlongator(const LO numRows, const LO numDirichletNodes, const LO numLocalAggregates, const ArrayView< const LO > &localVertex2AggID, local_matrix_type &intermediateP) const
Construction of a local prolongator with values equal to 1.0.
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
static RCP< Xpetra::Vector< LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node > > CuthillMcKee(const Matrix &Op)
void BuildFurtherAggregates(const Teuchos::ParameterList ¶ms, const RCP< const Matrix > &A, const Teuchos::ArrayView< const LO > &orderingVector, const local_matrix_type &coarseA, const magnitude_type kappa, const row_sum_type &rowSum, std::vector< LO > &localAggStat, Array< LO > &localVertex2AggID, LO &numLocalAggregates, LO &numNonAggregatedNodes) const
Further aggregation phase increases coarsening rate by a factor of ~2 per iteration.
bool isParameter(const std::string &name) const
Print statistics that do not involve significant additional computation.
void BuildCoarseLocalMatrix(const local_matrix_type &intermediateP, local_matrix_type &coarseA) const
Implementation of a local Galerkin projection called inside BuildFurtherAggregates.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
typename device_type::execution_space execution_space
static RCP< Xpetra::Vector< Magnitude, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixOverlappedAbsDeletedRowsum(const Matrix &A)
void BuildOnRankLocalMatrix(const local_matrix_type &localA, local_matrix_type &onRankA) const
basic_FancyOStream & setShowAllFrontMatter(const bool showAllFrontMatter)
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
static RCP< Vector > GetMatrixOverlappedDiagonal(const Matrix &A)
Extract Overlapped Matrix Diagonal.
void localSpGEMM(const local_matrix_type &A, const local_matrix_type &B, const std::string matrixLabel, local_matrix_type &C) const
Wrapper for kokkos-kernels' spgemm that takes in CrsMatrix.
Timer to be used in non-factories.
void RandomReorder(Teuchos::Array< LocalOrdinal > &list)
#define SET_VALID_ENTRY(name)
Exception throws to report errors in the internal logical of the program.
typename Kokkos::View< impl_scalar_type *, Kokkos::LayoutLeft, device_type > row_sum_type
void SetNumAggregates(LO nAggregates)
Set number of local aggregates on current processor.