46 #ifndef MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_
47 #define MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_
49 #include <Xpetra_Map.hpp>
51 #include <Xpetra_MultiVectorFactory.hpp>
52 #include <Xpetra_MapFactory.hpp>
55 #include "KokkosKernels_Handle.hpp"
56 #include "KokkosSparse_spgemm.hpp"
57 #include "KokkosSparse_spmv.hpp"
61 #include "MueLu_Aggregates.hpp"
62 #include "MueLu_LWGraph.hpp"
67 #include "MueLu_Utilities.hpp"
71 namespace NotayUtils {
72 template <
class LocalOrdinal>
74 return min + as<LocalOrdinal>((max - min + 1) * (static_cast<double>(std::rand()) / (RAND_MAX + 1.0)));
77 template <
class LocalOrdinal>
80 LO n = Teuchos::as<LO>(list.
size());
81 for (LO i = 0; i < n - 1; i++)
86 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
90 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
96 #undef SET_VALID_ENTRY
101 validParamList->
set<
RCP<const FactoryBase> >(
"DofsPerNode", null,
"Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
102 validParamList->
set<
RCP<const FactoryBase> >(
"AggregateQualities", null,
"Generating factory for variable \'AggregateQualities\'");
104 return validParamList;
107 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
111 Input(currentLevel,
"A");
112 Input(currentLevel,
"Graph");
113 Input(currentLevel,
"DofsPerNode");
114 if (pL.
get<
bool>(
"aggregation: compute aggregate qualities")) {
115 Input(currentLevel,
"AggregateQualities");
119 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
123 using MT =
typename STS::magnitudeType;
128 if (
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
129 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
137 const MT kappa =
static_cast<MT
>(pL.
get<
double>(
"aggregation: Dirichlet threshold"));
140 "Pairwise requires kappa > 2"
141 " otherwise all rows are considered as Dirichlet rows.");
146 maxNumIter = pL.
get<
int>(
"aggregation: pairwise: size");
149 "NotayAggregationFactory::Build(): \"aggregation: pairwise: size\""
150 " must be a strictly positive integer");
157 aggregates->setObjectLabel(
"PW");
162 std::vector<unsigned> aggStat(numRows,
READY);
164 const int DofsPerNode = Get<int>(currentLevel,
"DofsPerNode");
166 "Pairwise only supports one dof per node");
173 std::string orderingStr = pL.
get<std::string>(
"aggregation: ordering");
180 ordering = O_NATURAL;
181 if (orderingStr ==
"random")
183 else if (orderingStr ==
"natural") {
184 }
else if (orderingStr ==
"cuthill-mckee" || orderingStr ==
"cm")
185 ordering = O_CUTHILL_MCKEE;
195 for (
LO i = 0; i < numRows; i++)
196 orderingVector[i] = i;
197 if (ordering == O_RANDOM)
199 else if (ordering == O_CUTHILL_MCKEE) {
201 auto localVector = rcmVector->getData(0);
202 for (
LO i = 0; i < numRows; i++)
203 orderingVector[i] = localVector[i];
207 LO numNonAggregatedNodes = numRows, numDirichletNodes = 0;
208 BuildInitialAggregates(pL, A, orderingVector(), kappa,
209 *aggregates, aggStat, numNonAggregatedNodes, numDirichletNodes);
211 "Initial pairwise aggregation failed to aggregate all nodes");
212 LO numLocalAggregates = aggregates->GetNumAggregates();
213 GetOStream(
Statistics0) <<
"Init : " << numLocalAggregates <<
" - "
214 << A->getLocalNumRows() / numLocalAggregates << std::endl;
223 LO numLocalDirichletNodes = numDirichletNodes;
224 Array<LO> localVertex2AggId(aggregates->GetVertex2AggId()->getData(0).view(0, numRows));
225 BuildOnRankLocalMatrix(A->getLocalMatrixDevice(), coarseLocalA);
226 for (
LO aggregationIter = 1; aggregationIter < maxNumIter; ++aggregationIter) {
228 BuildIntermediateProlongator(coarseLocalA.numRows(), numLocalDirichletNodes, numLocalAggregates,
229 localVertex2AggId(), intermediateP);
232 BuildCoarseLocalMatrix(intermediateP, coarseLocalA);
237 std::vector<std::vector<LO> > agg2vertex(numLocalAggregates);
238 auto vertex2AggId = aggregates->GetVertex2AggId()->getData(0);
239 for (
LO i = 0; i < (
LO)numRows; i++) {
242 LO agg = vertex2AggId[i];
243 agg2vertex[agg].push_back(i);
246 typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum);
247 for (
LO i = 0; i < numRows; i++) {
251 int agg = vertex2AggId[i];
252 std::vector<LO>& myagg = agg2vertex[agg];
254 size_t nnz = A->getNumEntriesInLocalRow(i);
257 A->getLocalRowView(i, indices, vals);
260 for (
LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
262 if (indices[colidx] < numRows) {
263 for (
LO j = 0; j < (
LO)myagg.size(); j++)
264 if (vertex2AggId[indices[colidx]] == agg)
268 *out <<
"- ADDING col " << indices[colidx] <<
" = " << vals[colidx] << std::endl;
269 mysum += vals[colidx];
271 *out <<
"- NOT ADDING col " << indices[colidx] <<
" = " << vals[colidx] << std::endl;
275 rowSum_h[agg] = mysum;
277 Kokkos::deep_copy(rowSum, rowSum_h);
282 for (
LO i = 0; i < numRows; i++)
283 localOrderingVector[i] = i;
284 if (ordering == O_RANDOM)
286 else if (ordering == O_CUTHILL_MCKEE) {
288 auto localVector = rcmVector->getData(0);
289 for (
LO i = 0; i < numRows; i++)
290 localOrderingVector[i] = localVector[i];
294 numLocalAggregates = 0;
295 numNonAggregatedNodes =
static_cast<LO>(coarseLocalA.numRows());
296 std::vector<LO> localAggStat(numNonAggregatedNodes,
READY);
297 localVertex2AggId.resize(numNonAggregatedNodes, -1);
298 BuildFurtherAggregates(pL, A, localOrderingVector, coarseLocalA, kappa, rowSum,
299 localAggStat, localVertex2AggId,
300 numLocalAggregates, numNonAggregatedNodes);
304 numLocalDirichletNodes = 0;
308 ArrayRCP<LO> vertex2AggId = vertex2AggIdMV->getDataNonConst(0);
309 for (
size_t vertexIdx = 0; vertexIdx < A->getLocalNumRows(); ++vertexIdx) {
310 LO oldAggIdx = vertex2AggId[vertexIdx];
312 vertex2AggId[vertexIdx] = localVertex2AggId[oldAggIdx];
317 GetOStream(
Statistics0) <<
"Iter " << aggregationIter <<
": " << numLocalAggregates <<
" - "
318 << A->getLocalNumRows() / numLocalAggregates << std::endl;
320 aggregates->SetNumAggregates(numLocalAggregates);
321 aggregates->AggregatesCrossProcessors(
false);
322 aggregates->ComputeAggregateSizes(
true );
325 Set(currentLevel,
"Aggregates", aggregates);
326 GetOStream(
Statistics0) << aggregates->description() << std::endl;
329 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
336 std::vector<unsigned>& aggStat,
337 LO& numNonAggregatedNodes,
338 LO& numDirichletNodes)
const {
339 Monitor m(*
this,
"BuildInitialAggregates");
341 using MT =
typename STS::magnitudeType;
345 if (
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
346 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
355 const MT MT_TWO = MT_ONE + MT_ONE;
359 const MT kappa_init = kappa / (kappa - MT_TWO);
360 const LO numRows = aggStat.size();
361 const int myRank = A->getMap()->getComm()->getRank();
365 double tie_criterion = params.
get<
double>(
"aggregation: pairwise: tie threshold");
366 double tie_less = 1.0 - tie_criterion;
367 double tie_more = 1.0 + tie_criterion;
392 for (
LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
393 MT aii = STS::magnitude(D[row]);
394 MT rowsum = AbsRs[row];
396 if (aii >= kappa_init * rowsum) {
397 *out <<
"Flagging index " << row <<
" as dirichlet "
398 "aii >= kappa*rowsum = "
399 << aii <<
" >= " << kappa_init <<
" " << rowsum << std::endl;
401 --numNonAggregatedNodes;
407 LO aggIndex = LO_ZERO;
408 for (
LO i = 0; i < numRows; i++) {
409 LO current_idx = orderingVector[i];
411 if (aggStat[current_idx] !=
READY)
414 MT best_mu = MT_ZERO;
415 LO best_idx = LO_INVALID;
417 size_t nnz = A->getNumEntriesInLocalRow(current_idx);
420 A->getLocalRowView(current_idx, indices, vals);
422 MT aii = STS::real(D[current_idx]);
423 MT si = STS::real(S[current_idx]);
424 for (
LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
426 LO col = indices[colidx];
427 SC val = vals[colidx];
428 if (current_idx == col || col >= numRows || aggStat[col] !=
READY || val == SC_ZERO)
431 MT aij = STS::real(val);
432 MT ajj = STS::real(D[col]);
433 MT sj = -STS::real(S[col]);
434 if (aii - si + ajj - sj >= MT_ZERO) {
436 MT mu_top = MT_TWO / (MT_ONE / aii + MT_ONE / ajj);
437 MT mu_bottom = -aij + MT_ONE / (MT_ONE / (aii - si) + MT_ONE / (ajj - sj));
438 MT mu = mu_top / mu_bottom;
441 if (mu > MT_ZERO && (best_idx == LO_INVALID || mu < best_mu * tie_less ||
442 (mu < best_mu * tie_more && orderingVector[col] < orderingVector[best_idx]))) {
445 *out <<
"[" << current_idx <<
"] Column UPDATED " << col <<
": "
446 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
447 <<
" = " << aii - si + ajj - sj <<
", aij = " << aij <<
", mu = " << mu << std::endl;
449 *out <<
"[" << current_idx <<
"] Column NOT UPDATED " << col <<
": "
450 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
451 <<
" = " << aii - si + ajj - sj <<
", aij = " << aij <<
", mu = " << mu << std::endl;
454 *out <<
"[" << current_idx <<
"] Column FAILED " << col <<
": "
455 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
456 <<
" = " << aii - si + ajj - sj << std::endl;
460 if (best_idx == LO_INVALID) {
461 *out <<
"No node buddy found for index " << current_idx
462 <<
" [agg " << aggIndex <<
"]\n"
468 aggStat[current_idx] =
ONEPT;
469 vertex2AggId[current_idx] = aggIndex;
470 procWinner[current_idx] = myRank;
471 numNonAggregatedNodes--;
476 if (best_mu <= kappa) {
477 *out <<
"Node buddies (" << current_idx <<
"," << best_idx <<
") [agg " << aggIndex <<
"]" << std::endl;
481 vertex2AggId[current_idx] = aggIndex;
482 vertex2AggId[best_idx] = aggIndex;
483 procWinner[current_idx] = myRank;
484 procWinner[best_idx] = myRank;
485 numNonAggregatedNodes -= 2;
489 *out <<
"No buddy found for index " << current_idx <<
","
490 " but aggregating as singleton [agg "
491 << aggIndex <<
"]" << std::endl;
493 aggStat[current_idx] =
ONEPT;
494 vertex2AggId[current_idx] = aggIndex;
495 procWinner[current_idx] = myRank;
496 numNonAggregatedNodes--;
502 *out <<
"vertex2aggid :";
503 for (
int i = 0; i < static_cast<int>(vertex2AggId.
size()); ++i) {
504 *out << i <<
"(" << vertex2AggId[i] <<
")";
512 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
517 const typename Matrix::local_matrix_type& coarseA,
519 const Kokkos::View<
typename Kokkos::ArithTraits<Scalar>::val_type*,
521 typename Matrix::local_matrix_type::device_type>& rowSum,
522 std::vector<LocalOrdinal>& localAggStat,
524 LO& numLocalAggregates,
525 LO& numNonAggregatedNodes)
const {
526 Monitor m(*
this,
"BuildFurtherAggregates");
530 if (
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
531 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
537 using value_type =
typename local_matrix_type::value_type;
538 const value_type KAT_zero = Kokkos::ArithTraits<value_type>::zero();
546 double tie_criterion = params.
get<
double>(
"aggregation: pairwise: tie threshold");
547 double tie_less = 1.0 - tie_criterion;
548 double tie_more = 1.0 + tie_criterion;
550 typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum);
551 Kokkos::deep_copy(rowSum_h, rowSum);
556 const LO numRows =
static_cast<LO>(coarseA.numRows());
557 typename local_matrix_type::values_type::HostMirror diagA_h(
"diagA host", numRows);
558 typename local_matrix_type::row_map_type::HostMirror row_map_h = Kokkos::create_mirror_view(coarseA.graph.row_map);
559 Kokkos::deep_copy(row_map_h, coarseA.graph.row_map);
560 typename local_matrix_type::index_type::HostMirror entries_h = Kokkos::create_mirror_view(coarseA.graph.entries);
561 Kokkos::deep_copy(entries_h, coarseA.graph.entries);
562 typename local_matrix_type::values_type::HostMirror values_h = Kokkos::create_mirror_view(coarseA.values);
563 Kokkos::deep_copy(values_h, coarseA.values);
564 for (
LO rowIdx = 0; rowIdx < numRows; ++rowIdx) {
565 for (
LO entryIdx = static_cast<LO>(row_map_h(rowIdx));
566 entryIdx < static_cast<LO>(row_map_h(rowIdx + 1));
568 if (rowIdx == static_cast<LO>(entries_h(entryIdx))) {
569 diagA_h(rowIdx) = values_h(entryIdx);
574 for (
LO currentIdx = 0; currentIdx < numRows; ++currentIdx) {
575 if (localAggStat[currentIdx] !=
READY) {
583 for (
auto entryIdx = row_map_h(currentIdx); entryIdx < row_map_h(currentIdx + 1); ++entryIdx) {
584 const LO colIdx =
static_cast<LO>(entries_h(entryIdx));
585 if (currentIdx == colIdx || colIdx >= numRows || localAggStat[colIdx] !=
READY || values_h(entryIdx) == KAT_zero) {
592 if (aii - si + ajj - sj >= MT_zero) {
593 const magnitude_type mu_top = MT_two / (MT_one / aii + MT_one / ajj);
594 const magnitude_type mu_bottom = -aij + MT_one / (MT_one / (aii - si) + MT_one / (ajj - sj));
598 if (mu > MT_zero && (bestIdx == LO_INVALID || mu < best_mu * tie_less ||
599 (mu < best_mu * tie_more && orderingVector[colIdx] < orderingVector[bestIdx]))) {
602 *out <<
"[" << currentIdx <<
"] Column UPDATED " << colIdx <<
": "
603 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
604 <<
" = " << aii - si + ajj - sj <<
", aij = " << aij <<
" mu = " << mu << std::endl;
606 *out <<
"[" << currentIdx <<
"] Column NOT UPDATED " << colIdx <<
": "
607 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
608 <<
" = " << aii - si + ajj - sj <<
", aij = " << aij <<
", mu = " << mu << std::endl;
611 *out <<
"[" << currentIdx <<
"] Column FAILED " << colIdx <<
": "
612 <<
"aii - si + ajj - sj = " << aii <<
" - " << si <<
" + " << ajj <<
" - " << sj
613 <<
" = " << aii - si + ajj - sj << std::endl;
618 localAggStat[currentIdx] =
ONEPT;
619 localVertex2AggID[currentIdx] = numLocalAggregates;
620 --numNonAggregatedNodes;
621 ++numLocalAggregates;
623 if (best_mu <= kappa) {
624 *out <<
"Node buddies (" << currentIdx <<
"," << bestIdx <<
") [agg " << numLocalAggregates <<
"]" << std::endl;
627 localVertex2AggID[currentIdx] = numLocalAggregates;
628 --numNonAggregatedNodes;
631 localVertex2AggID[bestIdx] = numLocalAggregates;
632 --numNonAggregatedNodes;
634 ++numLocalAggregates;
636 *out <<
"No buddy found for index " << currentIdx <<
","
637 " but aggregating as singleton [agg "
638 << numLocalAggregates <<
"]" << std::endl;
640 localAggStat[currentIdx] =
ONEPT;
641 localVertex2AggID[currentIdx] = numLocalAggregates;
642 --numNonAggregatedNodes;
643 ++numLocalAggregates;
650 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
653 typename Matrix::local_matrix_type& onrankA)
const {
654 Monitor m(*
this,
"BuildOnRankLocalMatrix");
658 if (
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
659 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
665 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
666 using values_type =
typename local_matrix_type::values_type;
667 using size_type =
typename local_graph_type::size_type;
668 using col_index_type =
typename local_graph_type::data_type;
669 using array_layout =
typename local_graph_type::array_layout;
670 using memory_traits =
typename local_graph_type::memory_traits;
671 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
672 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
677 const int numRows =
static_cast<int>(localA.numRows());
678 row_pointer_type rowPtr(
"onrankA row pointer", numRows + 1);
679 typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr);
680 typename local_graph_type::row_map_type::HostMirror origRowPtr_h = Kokkos::create_mirror_view(localA.graph.row_map);
681 typename local_graph_type::entries_type::HostMirror origColind_h = Kokkos::create_mirror_view(localA.graph.entries);
682 typename values_type::HostMirror origValues_h = Kokkos::create_mirror_view(localA.values);
683 Kokkos::deep_copy(origRowPtr_h, localA.graph.row_map);
684 Kokkos::deep_copy(origColind_h, localA.graph.entries);
685 Kokkos::deep_copy(origValues_h, localA.values);
689 for (
int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
690 for (size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
691 if (origColind_h(entryIdx) < numRows) {
692 rowPtr_h(rowIdx + 1) += 1;
695 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx + 1) + rowPtr_h(rowIdx);
697 Kokkos::deep_copy(rowPtr, rowPtr_h);
699 const LO nnzOnrankA = rowPtr_h(numRows);
702 col_indices_type colInd(
"onrankA column indices", rowPtr_h(numRows));
703 values_type values(
"onrankA values", rowPtr_h(numRows));
704 typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd);
705 typename values_type::HostMirror values_h = Kokkos::create_mirror_view(values);
707 for (
int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
709 for (size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
710 if (origColind_h(entryIdx) < numRows) {
711 colInd_h(rowPtr_h(rowIdx) + entriesInRow) = origColind_h(entryIdx);
712 values_h(rowPtr_h(rowIdx) + entriesInRow) = origValues_h(entryIdx);
717 Kokkos::deep_copy(colInd, colInd_h);
718 Kokkos::deep_copy(values, values_h);
721 nnzOnrankA, values, rowPtr, colInd);
724 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
730 typename Matrix::local_matrix_type& intermediateP)
const {
731 Monitor m(*
this,
"BuildIntermediateProlongator");
735 if (
const char* dbg = std::getenv(
"MUELU_PAIRWISEAGGREGATION_DEBUG")) {
736 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
742 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
743 using values_type =
typename local_matrix_type::values_type;
744 using size_type =
typename local_graph_type::size_type;
745 using col_index_type =
typename local_graph_type::data_type;
746 using array_layout =
typename local_graph_type::array_layout;
747 using memory_traits =
typename local_graph_type::memory_traits;
748 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
749 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
753 const int intermediatePnnz = numRows - numDirichletNodes;
754 row_pointer_type rowPtr(
"intermediateP row pointer", numRows + 1);
755 col_indices_type colInd(
"intermediateP column indices", intermediatePnnz);
756 values_type values(
"intermediateP values", intermediatePnnz);
757 typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr);
758 typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd);
761 for (
int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
763 if (localVertex2AggID[rowIdx] == LO_INVALID) {
764 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx);
766 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx) + 1;
767 colInd_h(rowPtr_h(rowIdx)) = localVertex2AggID[rowIdx];
771 Kokkos::deep_copy(rowPtr, rowPtr_h);
772 Kokkos::deep_copy(colInd, colInd_h);
773 Kokkos::deep_copy(values, Kokkos::ArithTraits<typename values_type::value_type>::one());
776 numRows, numLocalAggregates, intermediatePnnz,
777 values, rowPtr, colInd);
780 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
783 typename Matrix::local_matrix_type& coarseA)
const {
784 Monitor m(*
this,
"BuildCoarseLocalMatrix");
786 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
787 using values_type =
typename local_matrix_type::values_type;
788 using size_type =
typename local_graph_type::size_type;
789 using col_index_type =
typename local_graph_type::data_type;
790 using array_layout =
typename local_graph_type::array_layout;
791 using memory_traits =
typename local_graph_type::memory_traits;
792 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
793 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
796 localSpGEMM(coarseA, intermediateP,
"AP", AP);
809 row_pointer_type rowPtrPt(Kokkos::ViewAllocateWithoutInitializing(
"Pt row pointer"),
810 intermediateP.numCols() + 1);
811 col_indices_type colIndPt(Kokkos::ViewAllocateWithoutInitializing(
"Pt column indices"),
812 intermediateP.nnz());
813 values_type valuesPt(Kokkos::ViewAllocateWithoutInitializing(
"Pt values"),
814 intermediateP.nnz());
816 typename row_pointer_type::HostMirror rowPtrPt_h = Kokkos::create_mirror_view(rowPtrPt);
817 typename col_indices_type::HostMirror entries_h = Kokkos::create_mirror_view(intermediateP.graph.entries);
818 Kokkos::deep_copy(entries_h, intermediateP.graph.entries);
819 Kokkos::deep_copy(rowPtrPt_h, 0);
820 for (size_type entryIdx = 0; entryIdx < intermediateP.nnz(); ++entryIdx) {
821 rowPtrPt_h(entries_h(entryIdx) + 1) += 1;
823 for (
LO rowIdx = 0; rowIdx < intermediateP.numCols(); ++rowIdx) {
824 rowPtrPt_h(rowIdx + 1) += rowPtrPt_h(rowIdx);
826 Kokkos::deep_copy(rowPtrPt, rowPtrPt_h);
828 typename row_pointer_type::HostMirror rowPtrP_h = Kokkos::create_mirror_view(intermediateP.graph.row_map);
829 Kokkos::deep_copy(rowPtrP_h, intermediateP.graph.row_map);
830 typename col_indices_type::HostMirror colIndP_h = Kokkos::create_mirror_view(intermediateP.graph.entries);
831 Kokkos::deep_copy(colIndP_h, intermediateP.graph.entries);
832 typename values_type::HostMirror valuesP_h = Kokkos::create_mirror_view(intermediateP.values);
833 Kokkos::deep_copy(valuesP_h, intermediateP.values);
834 typename col_indices_type::HostMirror colIndPt_h = Kokkos::create_mirror_view(colIndPt);
835 typename values_type::HostMirror valuesPt_h = Kokkos::create_mirror_view(valuesPt);
836 const col_index_type invalidColumnIndex = KokkosSparse::OrdinalTraits<col_index_type>::invalid();
837 Kokkos::deep_copy(colIndPt_h, invalidColumnIndex);
839 col_index_type colIdx = 0;
840 for (
LO rowIdx = 0; rowIdx < intermediateP.numRows(); ++rowIdx) {
841 for (size_type entryIdxP = rowPtrP_h(rowIdx); entryIdxP < rowPtrP_h(rowIdx + 1); ++entryIdxP) {
842 colIdx = entries_h(entryIdxP);
843 for (size_type entryIdxPt = rowPtrPt_h(colIdx); entryIdxPt < rowPtrPt_h(colIdx + 1); ++entryIdxPt) {
844 if (colIndPt_h(entryIdxPt) == invalidColumnIndex) {
845 colIndPt_h(entryIdxPt) = rowIdx;
846 valuesPt_h(entryIdxPt) = valuesP_h(entryIdxP);
853 Kokkos::deep_copy(colIndPt, colIndPt_h);
854 Kokkos::deep_copy(valuesPt, valuesPt_h);
857 intermediateP.numCols(),
858 intermediateP.numRows(),
860 valuesPt, rowPtrPt, colIndPt);
863 localSpGEMM(intermediatePt, AP,
"coarseA", coarseA);
866 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
869 const typename Matrix::local_matrix_type& B,
870 const std::string matrixLabel,
871 typename Matrix::local_matrix_type& C)
const {
872 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
873 using values_type =
typename local_matrix_type::values_type;
874 using size_type =
typename local_graph_type::size_type;
875 using col_index_type =
typename local_graph_type::data_type;
876 using array_layout =
typename local_graph_type::array_layout;
877 using memory_space =
typename device_type::memory_space;
878 using memory_traits =
typename local_graph_type::memory_traits;
879 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
880 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
883 int team_work_size = 16;
884 std::string myalg(
"SPGEMM_KK_MEMORY");
885 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
886 KokkosKernels::Experimental::KokkosKernelsHandle<
typename row_pointer_type::const_value_type,
887 typename col_indices_type::const_value_type,
888 typename values_type::const_value_type,
893 kh.create_spgemm_handle(alg_enum);
894 kh.set_team_work_size(team_work_size);
897 row_pointer_type rowPtrC(Kokkos::ViewAllocateWithoutInitializing(
"C row pointer"),
899 col_indices_type colIndC;
903 KokkosSparse::Experimental::spgemm_symbolic(&kh, A.numRows(),
904 B.numRows(), B.numCols(),
905 A.graph.row_map, A.graph.entries,
false,
906 B.graph.row_map, B.graph.entries,
false,
910 size_t nnzC = kh.get_spgemm_handle()->get_c_nnz();
912 colIndC = col_indices_type(Kokkos::ViewAllocateWithoutInitializing(
"C column inds"), nnzC);
913 valuesC = values_type(Kokkos::ViewAllocateWithoutInitializing(
"C values"), nnzC);
917 KokkosSparse::Experimental::spgemm_numeric(&kh, A.numRows(),
918 B.numRows(), B.numCols(),
919 A.graph.row_map, A.graph.entries, A.values,
false,
920 B.graph.row_map, B.graph.entries, B.values,
false,
921 rowPtrC, colIndC, valuesC);
922 kh.destroy_spgemm_handle();
924 C =
local_matrix_type(matrixLabel, A.numRows(), B.numCols(), nnzC, valuesC, rowPtrC, colIndC);
LocalOrdinal RandomOrdinal(LocalOrdinal min, LocalOrdinal max)
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
MueLu::DefaultLocalOrdinal LocalOrdinal
void Build(Level ¤tLevel) const
Build aggregates.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
Container class for aggregation information.
typename Matrix::local_matrix_type local_matrix_type
typename Teuchos::ScalarTraits< Scalar >::magnitudeType magnitude_type
basic_FancyOStream & setShowProcRank(const bool showProcRank)
T & get(const std::string &name, T def_value)
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
static RCP< Vector > GetMatrixOverlappedDeletedRowsum(const Matrix &A)
Extract Overlapped Matrix Deleted Rowsum.
static magnitudeType real(T a)
void BuildInitialAggregates(const Teuchos::ParameterList ¶ms, const RCP< const Matrix > &A, const ArrayView< const LO > &orderingVector, const magnitude_type kappa, Aggregates &aggregates, std::vector< unsigned > &aggStat, LO &numNonAggregatedNodes, LO &numDirichletNodes) const
Initial aggregation phase.
void DeclareInput(Level ¤tLevel) const
Input.
void BuildIntermediateProlongator(const LO numRows, const LO numDirichletNodes, const LO numLocalAggregates, const ArrayView< const LO > &localVertex2AggID, local_matrix_type &intermediateP) const
Construction of a local prolongator with values equal to 1.0.
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
static RCP< Xpetra::Vector< LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node > > CuthillMcKee(const Matrix &Op)
void BuildFurtherAggregates(const Teuchos::ParameterList ¶ms, const RCP< const Matrix > &A, const Teuchos::ArrayView< const LO > &orderingVector, const local_matrix_type &coarseA, const magnitude_type kappa, const row_sum_type &rowSum, std::vector< LO > &localAggStat, Array< LO > &localVertex2AggID, LO &numLocalAggregates, LO &numNonAggregatedNodes) const
Further aggregation phase increases coarsening rate by a factor of ~2 per iteration.
bool isParameter(const std::string &name) const
Print statistics that do not involve significant additional computation.
void BuildCoarseLocalMatrix(const local_matrix_type &intermediateP, local_matrix_type &coarseA) const
Implementation of a local Galerkin projection called inside BuildFurtherAggregates.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
typename device_type::execution_space execution_space
static RCP< Xpetra::Vector< Magnitude, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixOverlappedAbsDeletedRowsum(const Matrix &A)
void BuildOnRankLocalMatrix(const local_matrix_type &localA, local_matrix_type &onRankA) const
basic_FancyOStream & setShowAllFrontMatter(const bool showAllFrontMatter)
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
static RCP< Vector > GetMatrixOverlappedDiagonal(const Matrix &A)
Extract Overlapped Matrix Diagonal.
void localSpGEMM(const local_matrix_type &A, const local_matrix_type &B, const std::string matrixLabel, local_matrix_type &C) const
Wrapper for kokkos-kernels' spgemm that takes in CrsMatrix.
Timer to be used in non-factories.
void RandomReorder(Teuchos::Array< LocalOrdinal > &list)
#define SET_VALID_ENTRY(name)
Exception throws to report errors in the internal logical of the program.
typename Kokkos::View< impl_scalar_type *, Kokkos::LayoutLeft, device_type > row_sum_type
void SetNumAggregates(LO nAggregates)
Set number of local aggregates on current processor.