46 #ifndef MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_
47 #define MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_
49 #include <Xpetra_Map.hpp>
50 #include <Xpetra_Vector.hpp>
51 #include <Xpetra_MultiVectorFactory.hpp>
52 #include <Xpetra_MapFactory.hpp>
53 #include <Xpetra_VectorFactory.hpp>
55 #include "KokkosKernels_Handle.hpp"
56 #include "KokkosSparse_spgemm.hpp"
57 #include "KokkosSparse_spmv.hpp"
61 #include "MueLu_Aggregates.hpp"
67 #include "MueLu_Utilities.hpp"
69 #if defined(HAVE_MUELU_KOKKOS_REFACTOR)
70 #include "MueLu_Utilities_kokkos.hpp"
75 namespace NotayUtils {
76 template <
class LocalOrdinal>
78 return min + as<LocalOrdinal>((max-min+1) * (static_cast<double>(std::rand()) / (RAND_MAX + 1.0)));
81 template <
class LocalOrdinal>
84 LO n = Teuchos::as<LO>(list.
size());
85 for(LO i = 0; i < n-1; i++)
90 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
96 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
102 #undef SET_VALID_ENTRY
107 validParamList->
set<
RCP<const FactoryBase> >(
"DofsPerNode", null,
"Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
108 validParamList->
set<
RCP<const FactoryBase> >(
"AggregateQualities", null,
"Generating factory for variable \'AggregateQualities\'");
111 return validParamList;
114 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
118 Input(currentLevel,
"A");
119 Input(currentLevel,
"Graph");
120 Input(currentLevel,
"DofsPerNode");
121 if (pL.
get<
bool>(
"aggregation: compute aggregate qualities")) {
122 Input(currentLevel,
"AggregateQualities");
129 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
133 using MT =
typename STS::magnitudeType;
138 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
139 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
147 const MT kappa =
static_cast<MT
>(pL.
get<
double>(
"aggregation: Dirichlet threshold"));
150 "Pairwise requires kappa > 2"
151 " otherwise all rows are considered as Dirichlet rows.");
156 maxNumIter = pL.
get<
int>(
"aggregation: pairwise: size");
159 "NotayAggregationFactory::Build(): \"aggregation: pairwise: size\""
160 " must be a strictly positive integer");
174 std::vector<unsigned> aggStat(numRows,
READY);
177 const int DofsPerNode = Get<int>(currentLevel,
"DofsPerNode");
179 "Pairwise only supports one dof per node");
186 std::string orderingStr = pL.
get<std::string>(
"aggregation: ordering");
193 ordering = O_NATURAL;
194 if (orderingStr ==
"random" ) ordering = O_RANDOM;
195 else if(orderingStr ==
"natural") {}
196 #if defined(HAVE_MUELU_KOKKOS_REFACTOR)
197 else if(orderingStr ==
"cuthill-mckee" || orderingStr ==
"cm") ordering = O_CUTHILL_MCKEE;
208 for (LO i = 0; i < numRows; i++)
209 orderingVector[i] = i;
210 if (ordering == O_RANDOM)
212 #if defined(HAVE_MUELU_KOKKOS_REFACTOR)
213 else if (ordering == O_CUTHILL_MCKEE) {
215 auto localVector = rcmVector->getData(0);
216 for (LO i = 0; i < numRows; i++)
217 orderingVector[i] = localVector[i];
222 LO numNonAggregatedNodes = numRows, numDirichletNodes = 0;
223 BuildInitialAggregates(pL, A, orderingVector(), kappa,
224 *aggregates, aggStat, numNonAggregatedNodes, numDirichletNodes);
226 "Initial pairwise aggregation failed to aggregate all nodes");
228 GetOStream(
Statistics0) <<
"Init : " << numLocalAggregates <<
" - "
229 << A->getNodeNumRows() / numLocalAggregates << std::endl;
238 LO numLocalDirichletNodes = numDirichletNodes;
240 BuildOnRankLocalMatrix(A->getLocalMatrix(), coarseLocalA);
241 for(LO aggregationIter = 1; aggregationIter < maxNumIter; ++aggregationIter) {
243 BuildIntermediateProlongator(coarseLocalA.numRows(), numLocalDirichletNodes, numLocalAggregates,
244 localVertex2AggId(), intermediateP);
247 BuildCoarseLocalMatrix(intermediateP, coarseLocalA);
252 std::vector<std::vector<LO> > agg2vertex(numLocalAggregates);
254 for(LO i=0; i<(LO)numRows; i++) {
257 LO agg=vertex2AggId[i];
258 agg2vertex[agg].push_back(i);
261 typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum);
262 for(LO i = 0; i < numRows; i++) {
266 int agg = vertex2AggId[i];
267 std::vector<LO> & myagg = agg2vertex[agg];
269 size_t nnz = A->getNumEntriesInLocalRow(i);
272 A->getLocalRowView(i, indices, vals);
275 for (LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
277 if(indices[colidx] < numRows) {
278 for(LO j=0; j<(LO)myagg.size(); j++)
279 if (vertex2AggId[indices[colidx]] == agg)
283 *out <<
"- ADDING col "<<indices[colidx]<<
" = "<<vals[colidx] << std::endl;
284 mysum += vals[colidx];
287 *out <<
"- NOT ADDING col "<<indices[colidx]<<
" = "<<vals[colidx] << std::endl;
291 rowSum_h[agg] = mysum;
298 for (LO i = 0; i < numRows; i++)
299 localOrderingVector[i] = i;
300 if (ordering == O_RANDOM)
302 #if defined(HAVE_MUELU_KOKKOS_REFACTOR)
303 else if (ordering == O_CUTHILL_MCKEE) {
305 auto localVector = rcmVector->getData(0);
306 for (LO i = 0; i < numRows; i++)
307 localOrderingVector[i] = localVector[i];
312 numLocalAggregates = 0;
313 numNonAggregatedNodes =
static_cast<LO
>(coarseLocalA.numRows());
314 std::vector<LO> localAggStat(numNonAggregatedNodes,
READY);
315 localVertex2AggId.resize(numNonAggregatedNodes, -1);
316 BuildFurtherAggregates(pL, A, localOrderingVector, coarseLocalA, kappa, rowSum,
317 localAggStat, localVertex2AggId,
318 numLocalAggregates, numNonAggregatedNodes);
322 numLocalDirichletNodes = 0;
326 ArrayRCP<LO> vertex2AggId = vertex2AggIdMV->getDataNonConst(0);
327 for(
size_t vertexIdx = 0; vertexIdx < A->getNodeNumRows(); ++vertexIdx) {
328 LO oldAggIdx = vertex2AggId[vertexIdx];
330 vertex2AggId[vertexIdx] = localVertex2AggId[oldAggIdx];
335 GetOStream(
Statistics0) <<
"Iter " << aggregationIter <<
": " << numLocalAggregates <<
" - "
336 << A->getNodeNumRows() / numLocalAggregates << std::endl;
343 Set(currentLevel,
"Aggregates", aggregates);
348 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
355 std::vector<unsigned>& aggStat,
356 LO& numNonAggregatedNodes,
357 LO& numDirichletNodes)
const {
359 Monitor m(*
this,
"BuildInitialAggregates");
361 using MT =
typename STS::magnitudeType;
362 using RealValuedVector = Xpetra::Vector<MT,LocalOrdinal,GlobalOrdinal,Node>;
365 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
366 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
376 const MT MT_TWO = MT_ONE + MT_ONE;
380 const MT kappa_init = kappa / (kappa - MT_TWO);
381 const LO numRows = aggStat.size();
382 const int myRank = A->getMap()->getComm()->getRank();
386 double tie_criterion = params.
get<
double>(
"aggregation: pairwise: tie threshold");
387 double tie_less = 1.0 - tie_criterion;
388 double tie_more = 1.0 + tie_criterion;
413 for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getNodeNumElements()); ++row) {
414 MT aii = STS::magnitude(D[row]);
415 MT rowsum = AbsRs[row];
417 if(aii >= kappa_init * rowsum) {
418 *out <<
"Flagging index " << row <<
" as dirichlet "
419 "aii >= kappa*rowsum = " << aii <<
" >= " << kappa_init <<
" " << rowsum << std::endl;
421 --numNonAggregatedNodes;
428 LO aggIndex = LO_ZERO;
429 for(LO i = 0; i < numRows; i++) {
430 LO current_idx = orderingVector[i];
432 if(aggStat[current_idx] !=
READY)
435 MT best_mu = MT_ZERO;
436 LO best_idx = LO_INVALID;
438 size_t nnz = A->getNumEntriesInLocalRow(current_idx);
441 A->getLocalRowView(current_idx, indices, vals);
443 MT aii = STS::real(D[current_idx]);
444 MT si = STS::real(S[current_idx]);
445 for (LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
447 LO col = indices[colidx];
448 SC val = vals[colidx];
449 if(current_idx == col || aggStat[col] !=
READY || col > numRows || val == SC_ZERO)
452 MT aij = STS::real(val);
453 MT ajj = STS::real(D[col]);
454 MT sj = - STS::real(S[col]);
455 if(aii - si + ajj - sj >= MT_ZERO) {
457 MT mu_top = MT_TWO / ( MT_ONE / aii + MT_ONE / ajj);
458 MT mu_bottom = - aij + MT_ONE / ( MT_ONE / (aii - si) + MT_ONE / (ajj - sj) );
459 MT mu = mu_top / mu_bottom;
462 if (mu > MT_ZERO && (best_idx == LO_INVALID || mu < best_mu * tie_less ||
463 (mu < best_mu*tie_more && orderingVector[col] < orderingVector[best_idx]))) {
466 *out <<
"[" << current_idx <<
"] Column UPDATED " << col <<
": "
467 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
468 <<
" = " << aii - si + ajj - sj<<
", aij = "<<aij <<
", mu = " << mu << std::endl;
471 *out <<
"[" << current_idx <<
"] Column NOT UPDATED " << col <<
": "
472 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
473 <<
" = " << aii - si + ajj - sj <<
", aij = "<<aij<<
", mu = " << mu << std::endl;
477 *out <<
"[" << current_idx <<
"] Column FAILED " << col <<
": "
478 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
479 <<
" = " << aii - si + ajj - sj << std::endl;
483 if(best_idx == LO_INVALID) {
484 *out <<
"No node buddy found for index " << current_idx
485 <<
" [agg " << aggIndex <<
"]\n" << std::endl;
490 aggStat[current_idx] =
ONEPT;
491 vertex2AggId[current_idx] = aggIndex;
492 procWinner[current_idx] = myRank;
493 numNonAggregatedNodes--;
498 if(best_mu <= kappa) {
499 *out <<
"Node buddies (" << current_idx <<
"," << best_idx <<
") [agg " << aggIndex <<
"]" << std::endl;
503 vertex2AggId[current_idx] = aggIndex;
504 vertex2AggId[best_idx] = aggIndex;
505 procWinner[current_idx] = myRank;
506 procWinner[best_idx] = myRank;
507 numNonAggregatedNodes-=2;
511 *out <<
"No buddy found for index " << current_idx <<
","
512 " but aggregating as singleton [agg " << aggIndex <<
"]" << std::endl;
514 aggStat[current_idx] =
ONEPT;
515 vertex2AggId[current_idx] = aggIndex;
516 procWinner[current_idx] = myRank;
517 numNonAggregatedNodes--;
523 *out <<
"vertex2aggid :";
524 for(
int i = 0; i < static_cast<int>(vertex2AggId.
size()); ++i) {
525 *out << i <<
"(" << vertex2AggId[i] <<
")";
533 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
538 const typename Matrix::local_matrix_type& coarseA,
540 const Kokkos::View<
typename Kokkos::ArithTraits<Scalar>::val_type*,
542 typename Matrix::local_matrix_type::device_type>& rowSum,
543 std::vector<LocalOrdinal>& localAggStat,
545 LO& numLocalAggregates,
546 LO& numNonAggregatedNodes)
const {
547 Monitor m(*
this,
"BuildFurtherAggregates");
551 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
552 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
558 using value_type =
typename local_matrix_type::value_type;
559 const value_type KAT_zero = Kokkos::ArithTraits<value_type>::zero();
567 double tie_criterion = params.
get<
double>(
"aggregation: pairwise: tie threshold");
568 double tie_less = 1.0 - tie_criterion;
569 double tie_more = 1.0 + tie_criterion;
571 typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum);
577 const LO numRows =
static_cast<LO
>(coarseA.numRows());
578 typename local_matrix_type::values_type::HostMirror diagA_h(
"diagA host", numRows);
579 typename local_matrix_type::row_map_type::HostMirror row_map_h
580 = Kokkos::create_mirror_view(coarseA.graph.row_map);
582 typename local_matrix_type::index_type::HostMirror entries_h
583 = Kokkos::create_mirror_view(coarseA.graph.entries);
585 typename local_matrix_type::values_type::HostMirror values_h
586 = Kokkos::create_mirror_view(coarseA.values);
588 for(LO rowIdx = 0; rowIdx < numRows; ++rowIdx) {
589 for(LO entryIdx = static_cast<LO>(row_map_h(rowIdx));
590 entryIdx < static_cast<LO>(row_map_h(rowIdx + 1));
592 if(rowIdx == static_cast<LO>(entries_h(entryIdx))) {
593 diagA_h(rowIdx) = values_h(entryIdx);
598 for(LO currentIdx = 0; currentIdx < numRows; ++currentIdx) {
599 if(localAggStat[currentIdx] !=
READY) {
607 for(
auto entryIdx = row_map_h(currentIdx); entryIdx < row_map_h(currentIdx + 1); ++entryIdx) {
608 const LO colIdx =
static_cast<LO
>(entries_h(entryIdx));
609 if(currentIdx == colIdx || localAggStat[colIdx] !=
READY || values_h(entryIdx) == KAT_zero || colIdx > numRows) {
616 if(aii - si + ajj - sj >= MT_zero) {
617 const magnitude_type mu_top = MT_two / ( MT_one/aii + MT_one/ajj );
618 const magnitude_type mu_bottom = -aij + MT_one / (MT_one / (aii - si) + MT_one / (ajj - sj));
622 if (mu > MT_zero && (bestIdx == LO_INVALID || mu < best_mu * tie_less ||
623 (mu < best_mu*tie_more && orderingVector[colIdx] < orderingVector[bestIdx]))) {
626 *out <<
"[" << currentIdx <<
"] Column UPDATED " << colIdx <<
": "
627 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
628 <<
" = " << aii - si + ajj - sj <<
", aij = "<<aij<<
" mu = " << mu << std::endl;
631 *out <<
"[" << currentIdx <<
"] Column NOT UPDATED " << colIdx <<
": "
632 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
633 <<
" = " << aii - si + ajj - sj <<
", aij = "<<aij<<
", mu = " << mu << std::endl;
636 *out <<
"[" << currentIdx <<
"] Column FAILED " << colIdx <<
": "
637 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
638 <<
" = " << aii - si + ajj - sj << std::endl;
643 localAggStat[currentIdx] =
ONEPT;
644 localVertex2AggID[currentIdx] = numLocalAggregates;
645 --numNonAggregatedNodes;
646 ++numLocalAggregates;
648 if(best_mu <= kappa) {
649 *out <<
"Node buddies (" << currentIdx <<
"," << bestIdx <<
") [agg " << numLocalAggregates <<
"]" << std::endl;
652 localVertex2AggID[currentIdx] = numLocalAggregates;
653 --numNonAggregatedNodes;
656 localVertex2AggID[bestIdx] = numLocalAggregates;
657 --numNonAggregatedNodes;
659 ++numLocalAggregates;
661 *out <<
"No buddy found for index " << currentIdx <<
","
662 " but aggregating as singleton [agg " << numLocalAggregates <<
"]" << std::endl;
664 localAggStat[currentIdx] =
ONEPT;
665 localVertex2AggID[currentIdx] = numLocalAggregates;
666 --numNonAggregatedNodes;
667 ++numLocalAggregates;
674 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
677 typename Matrix::local_matrix_type& onrankA)
const {
678 Monitor m(*
this,
"BuildOnRankLocalMatrix");
682 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
683 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
689 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
690 using values_type =
typename local_matrix_type::values_type;
691 using size_type =
typename local_graph_type::size_type;
692 using col_index_type =
typename local_graph_type::data_type;
693 using array_layout =
typename local_graph_type::array_layout;
694 using memory_traits =
typename local_graph_type::memory_traits;
701 const int numRows =
static_cast<int>(localA.numRows());
702 row_pointer_type rowPtr(
"onrankA row pointer", numRows + 1);
703 typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr);
704 typename local_graph_type::row_map_type::HostMirror origRowPtr_h
705 = Kokkos::create_mirror_view(localA.graph.row_map);
706 typename local_graph_type::entries_type::HostMirror origColind_h
707 = Kokkos::create_mirror_view(localA.graph.entries);
708 typename values_type::HostMirror origValues_h
709 = Kokkos::create_mirror_view(localA.values);
716 for(
int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
717 for(size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
718 if(origColind_h(entryIdx) < numRows) {rowPtr_h(rowIdx + 1) += 1;}
720 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx + 1) + rowPtr_h(rowIdx);
724 const LO nnzOnrankA = rowPtr_h(numRows);
727 col_indices_type colInd(
"onrankA column indices", rowPtr_h(numRows));
728 values_type values(
"onrankA values", rowPtr_h(numRows));
729 typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd);
730 typename values_type::HostMirror values_h = Kokkos::create_mirror_view(values);
732 for(
int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
734 for(size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
735 if(origColind_h(entryIdx) < numRows) {
736 colInd_h(rowPtr_h(rowIdx) + entriesInRow) = origColind_h(entryIdx);
737 values_h(rowPtr_h(rowIdx) + entriesInRow) = origValues_h(entryIdx);
746 nnzOnrankA, values, rowPtr, colInd);
749 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
755 typename Matrix::local_matrix_type& intermediateP)
const {
756 Monitor m(*
this,
"BuildIntermediateProlongator");
760 if(
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
761 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
767 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
768 using values_type =
typename local_matrix_type::values_type;
769 using size_type =
typename local_graph_type::size_type;
770 using col_index_type =
typename local_graph_type::data_type;
771 using array_layout =
typename local_graph_type::array_layout;
772 using memory_traits =
typename local_graph_type::memory_traits;
778 const int intermediatePnnz = numRows - numDirichletNodes;
779 row_pointer_type rowPtr(
"intermediateP row pointer", numRows + 1);
780 col_indices_type colInd(
"intermediateP column indices", intermediatePnnz);
781 values_type values(
"intermediateP values", intermediatePnnz);
782 typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr);
783 typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd);
786 for(
int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
788 if(localVertex2AggID[rowIdx] == LO_INVALID) {
789 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx);
791 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx) + 1;
792 colInd_h(rowPtr_h(rowIdx)) = localVertex2AggID[rowIdx];
798 Kokkos::deep_copy(values, Kokkos::ArithTraits<typename values_type::value_type>::one());
801 numRows, numLocalAggregates, intermediatePnnz,
802 values, rowPtr, colInd);
805 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
808 typename Matrix::local_matrix_type& coarseA)
const {
809 Monitor m(*
this,
"BuildCoarseLocalMatrix");
811 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
812 using values_type =
typename local_matrix_type::values_type;
813 using size_type =
typename local_graph_type::size_type;
814 using col_index_type =
typename local_graph_type::data_type;
815 using array_layout =
typename local_graph_type::array_layout;
816 using memory_traits =
typename local_graph_type::memory_traits;
821 localSpGEMM(coarseA, intermediateP,
"AP", AP);
834 row_pointer_type rowPtrPt(Kokkos::ViewAllocateWithoutInitializing(
"Pt row pointer"),
835 intermediateP.numCols() + 1);
836 col_indices_type colIndPt(Kokkos::ViewAllocateWithoutInitializing(
"Pt column indices"),
837 intermediateP.nnz());
838 values_type valuesPt(Kokkos::ViewAllocateWithoutInitializing(
"Pt values"),
839 intermediateP.nnz());
841 typename row_pointer_type::HostMirror rowPtrPt_h = Kokkos::create_mirror_view(rowPtrPt);
843 for(size_type entryIdx = 0; entryIdx < intermediateP.nnz(); ++entryIdx) {
844 rowPtrPt_h(intermediateP.graph.entries(entryIdx) + 1) += 1;
846 for(LO rowIdx = 0; rowIdx < intermediateP.numCols(); ++rowIdx) {
847 rowPtrPt_h(rowIdx + 1) += rowPtrPt_h(rowIdx);
851 typename row_pointer_type::HostMirror rowPtrP_h = Kokkos::create_mirror_view(intermediateP.graph.row_map);
853 typename col_indices_type::HostMirror colIndP_h = Kokkos::create_mirror_view(intermediateP.graph.entries);
855 typename values_type::HostMirror valuesP_h = Kokkos::create_mirror_view(intermediateP.values);
857 typename col_indices_type::HostMirror colIndPt_h = Kokkos::create_mirror_view(colIndPt);
858 typename values_type::HostMirror valuesPt_h = Kokkos::create_mirror_view(valuesPt);
859 const col_index_type invalidColumnIndex = KokkosSparse::OrdinalTraits<col_index_type>::invalid();
862 col_index_type colIdx = 0;
863 for(LO rowIdx = 0; rowIdx < intermediateP.numRows(); ++rowIdx) {
864 for(size_type entryIdxP = rowPtrP_h(rowIdx); entryIdxP < rowPtrP_h(rowIdx + 1); ++entryIdxP) {
865 colIdx = intermediateP.graph.entries(entryIdxP);
866 for(size_type entryIdxPt = rowPtrPt_h(colIdx); entryIdxPt < rowPtrPt_h(colIdx + 1); ++entryIdxPt) {
867 if(colIndPt_h(entryIdxPt) == invalidColumnIndex) {
868 colIndPt_h(entryIdxPt) = rowIdx;
869 valuesPt_h(entryIdxPt) = valuesP_h(entryIdxP);
880 intermediateP.numCols(),
881 intermediateP.numRows(),
883 valuesPt, rowPtrPt, colIndPt);
886 localSpGEMM(intermediatePt, AP,
"coarseA", coarseA);
889 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
892 const typename Matrix::local_matrix_type& B,
893 const std::string matrixLabel,
894 typename Matrix::local_matrix_type& C)
const {
896 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
897 using values_type =
typename local_matrix_type::values_type;
898 using size_type =
typename local_graph_type::size_type;
899 using col_index_type =
typename local_graph_type::data_type;
900 using array_layout =
typename local_graph_type::array_layout;
901 using memory_space =
typename device_type::memory_space;
902 using memory_traits =
typename local_graph_type::memory_traits;
907 int team_work_size = 16;
908 std::string myalg(
"SPGEMM_KK_MEMORY");
909 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
910 KokkosKernels::Experimental::KokkosKernelsHandle<
typename row_pointer_type::const_value_type,
911 typename col_indices_type::const_value_type,
912 typename values_type::const_value_type,
916 kh.create_spgemm_handle(alg_enum);
917 kh.set_team_work_size(team_work_size);
920 row_pointer_type rowPtrC(Kokkos::ViewAllocateWithoutInitializing(
"C row pointer"),
922 col_indices_type colIndC;
926 KokkosSparse::Experimental::spgemm_symbolic(&kh, A.numRows(),
927 B.numRows(), B.numCols(),
928 A.graph.row_map, A.graph.entries,
false,
929 B.graph.row_map, B.graph.entries,
false,
933 size_t nnzC = kh.get_spgemm_handle()->get_c_nnz();
935 colIndC = col_indices_type(Kokkos::ViewAllocateWithoutInitializing(
"C column inds"), nnzC);
936 valuesC = values_type(Kokkos::ViewAllocateWithoutInitializing(
"C values"), nnzC);
940 KokkosSparse::Experimental::spgemm_numeric(&kh, A.numRows(),
941 B.numRows(), B.numCols(),
942 A.graph.row_map, A.graph.entries, A.values,
false,
943 B.graph.row_map, B.graph.entries, B.values,
false,
944 rowPtrC, colIndC, valuesC);
945 kh.destroy_spgemm_handle();
947 C =
local_matrix_type(matrixLabel, A.numRows(), B.numCols(), nnzC, valuesC, rowPtrC, colIndC);
LocalOrdinal RandomOrdinal(LocalOrdinal min, LocalOrdinal max)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
MueLu::DefaultLocalOrdinal LocalOrdinal
void deep_copy(const View< DT, DP...> &dst, typename ViewTraits< DT, DP...>::const_value_type &value, typename std::enable_if< std::is_same< typename ViewTraits< DT, DP...>::specialize, void >::value >::type *=nullptr)
void Build(Level ¤tLevel) const
Build aggregates.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
Container class for aggregation information.
typename Matrix::local_matrix_type local_matrix_type
typename Teuchos::ScalarTraits< Scalar >::magnitudeType magnitude_type
basic_FancyOStream & setShowProcRank(const bool showProcRank)
T & get(const std::string &name, T def_value)
virtual size_t GetNodeNumVertices() const =0
Return number of vertices owned by the calling node.
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
static magnitudeType real(T a)
void BuildInitialAggregates(const Teuchos::ParameterList ¶ms, const RCP< const Matrix > &A, const ArrayView< const LO > &orderingVector, const magnitude_type kappa, Aggregates &aggregates, std::vector< unsigned > &aggStat, LO &numNonAggregatedNodes, LO &numDirichletNodes) const
Initial aggregation phase.
static RCP< Vector > GetMatrixOverlappedDeletedRowsum(const Matrix &A)
Extract Overlapped Matrix Deleted Rowsum.
Teuchos::RCP< Xpetra::Vector< LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node > > CuthillMcKee(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op)
void DeclareInput(Level ¤tLevel) const
Input.
void BuildIntermediateProlongator(const LO numRows, const LO numDirichletNodes, const LO numLocalAggregates, const ArrayView< const LO > &localVertex2AggID, local_matrix_type &intermediateP) const
Construction of a local prolongator with values equal to 1.0.
Teuchos::ArrayRCP< LO > ComputeAggregateSizes(bool forceRecompute=false) const
Compute sizes of aggregates.
void BuildFurtherAggregates(const Teuchos::ParameterList ¶ms, const RCP< const Matrix > &A, const Teuchos::ArrayView< const LO > &orderingVector, const local_matrix_type &coarseA, const magnitude_type kappa, const row_sum_type &rowSum, std::vector< LO > &localAggStat, Array< LO > &localVertex2AggID, LO &numLocalAggregates, LO &numNonAggregatedNodes) const
Further aggregation phase increases coarsening rate by a factor of ~2 per iteration.
bool isParameter(const std::string &name) const
Print statistics that do not involve significant additional computation.
void BuildCoarseLocalMatrix(const local_matrix_type &intermediateP, local_matrix_type &coarseA) const
Implementation of a local Galerkin projection called inside BuildFurtherAggregates.
static RCP< Xpetra::Vector< Magnitude, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixOverlappedAbsDeletedRowsum(const Matrix &A)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
typename device_type::execution_space execution_space
LO GetNumAggregates() const
returns the number of aggregates of the current processor. Note: could/should be renamed to GetNumLoc...
void BuildOnRankLocalMatrix(const local_matrix_type &localA, local_matrix_type &onRankA) const
virtual void setObjectLabel(const std::string &objectLabel)
basic_FancyOStream & setShowAllFrontMatter(const bool showAllFrontMatter)
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
void localSpGEMM(const local_matrix_type &A, const local_matrix_type &B, const std::string matrixLabel, local_matrix_type &C) const
Wrapper for kokkos-kernels' spgemm that takes in CrsMatrix.
static RCP< Xpetra::Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixOverlappedDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Timer to be used in non-factories.
void RandomReorder(Teuchos::Array< LocalOrdinal > &list)
#define SET_VALID_ENTRY(name)
Exception throws to report errors in the internal logical of the program.
std::string description() const
Return a simple one-line description of this object.
void AggregatesCrossProcessors(const bool &flag)
Record whether aggregates include DOFs from other processes.
typename Kokkos::View< impl_scalar_type *, Kokkos::LayoutLeft, device_type > row_sum_type
void SetNumAggregates(LO nAggregates)
Set number of local aggregates on current processor.