46 #ifndef MUELU_AGGREGATIONPHASE3ALGORITHM_KOKKOS_DEF_HPP
47 #define MUELU_AGGREGATIONPHASE3ALGORITHM_KOKKOS_DEF_HPP
49 #include <Teuchos_Comm.hpp>
50 #include <Teuchos_CommHelpers.hpp>
56 #include "MueLu_Aggregates.hpp"
58 #include "MueLu_LWGraph_kokkos.hpp"
67 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
72 Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
73 LO& numNonAggregatedNodes)
const {
75 if (params.
get<
bool>(
"aggregation: deterministic")) {
76 Monitor m(*
this,
"BuildAggregatesDeterministic");
77 BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
79 Monitor m(*
this,
"BuildAggregatesRandom");
80 BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
86 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
91 Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
92 LO& numNonAggregatedNodes)
const {
93 bool error_on_isolated = params.
get<
bool>(
"aggregation: error on nodes with no on-rank neighbors");
94 bool makeNonAdjAggs = params.
get<
bool>(
"aggregation: phase3 avoid singletons");
97 const int myRank = graph.
GetComm()->getRank();
99 auto vertex2AggId = aggregates.
GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
100 auto procWinner = aggregates.
GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
104 auto lclLWGraph = graph;
106 Kokkos::View<LO, device_type> numAggregates(
"numAggregates");
109 Kokkos::View<unsigned*, device_type> aggStatOld(Kokkos::ViewAllocateWithoutInitializing(
"Initial aggregation status"), aggStat.extent(0));
110 Kokkos::deep_copy(aggStatOld, aggStat);
111 Kokkos::View<LO, device_type> numNonAggregated(
"numNonAggregated");
112 Kokkos::deep_copy(numNonAggregated, numNonAggregatedNodes);
113 for (
int color = 1; color < numColors + 1; ++color) {
114 Kokkos::parallel_for(
115 "Aggregation Phase 3: aggregates clean-up",
116 Kokkos::RangePolicy<execution_space>(0, numRows),
117 KOKKOS_LAMBDA(
const LO nodeIdx) {
119 if ((colors(nodeIdx) != color) ||
121 (aggStatOld(nodeIdx) ==
IGNORED)) {
126 auto neighbors = lclLWGraph.getNeighborVertices(nodeIdx);
131 bool isNewAggregate =
false;
132 for (
int neigh = 0; neigh < neighbors.length; ++neigh) {
133 neighIdx = neighbors(neigh);
135 if ((neighIdx != nodeIdx) &&
136 lclLWGraph.isLocalNeighborVertex(neighIdx) &&
137 (aggStatOld(neighIdx) ==
READY)) {
138 isNewAggregate =
true;
144 if (isNewAggregate) {
147 const LO aggId = Kokkos::atomic_fetch_add(&numAggregates(), 1);
149 procWinner(nodeIdx, 0) = myRank;
150 vertex2AggId(nodeIdx, 0) = aggId;
152 Kokkos::atomic_decrement(&numNonAggregated());
153 for (
int neigh = 0; neigh < neighbors.length; ++neigh) {
154 neighIdx = neighbors(neigh);
155 if ((neighIdx != nodeIdx) &&
156 lclLWGraph.isLocalNeighborVertex(neighIdx) &&
157 (aggStatOld(neighIdx) ==
READY)) {
159 procWinner(neighIdx, 0) = myRank;
160 vertex2AggId(neighIdx, 0) = aggId;
161 Kokkos::atomic_decrement(&numNonAggregated());
169 for (
int neigh = 0; neigh < neighbors.length; ++neigh) {
170 neighIdx = neighbors(neigh);
171 if (lclLWGraph.isLocalNeighborVertex(neighIdx) &&
174 procWinner(nodeIdx, 0) = myRank;
175 vertex2AggId(nodeIdx, 0) = vertex2AggId(neighIdx, 0);
176 Kokkos::atomic_decrement(&numNonAggregated());
183 if (makeNonAdjAggs) {
184 for (
LO otherNodeIdx = 0; otherNodeIdx < numRows; ++otherNodeIdx) {
185 if ((otherNodeIdx != nodeIdx) &&
188 procWinner(nodeIdx, 0) = myRank;
189 vertex2AggId(nodeIdx, 0) = vertex2AggId(otherNodeIdx, 0);
190 Kokkos::atomic_decrement(&numNonAggregated());
198 if (!error_on_isolated) {
199 const LO aggId = Kokkos::atomic_fetch_add(&numAggregates(), 1);
201 procWinner(nodeIdx, 0) = myRank;
202 vertex2AggId(nodeIdx, 0) = aggId;
203 Kokkos::atomic_decrement(&numNonAggregated());
209 Kokkos::deep_copy(aggStatOld, aggStat);
212 auto numNonAggregated_h = Kokkos::create_mirror_view(numNonAggregated);
213 Kokkos::deep_copy(numNonAggregated_h, numNonAggregated);
214 numNonAggregatedNodes = numNonAggregated_h();
215 if ((error_on_isolated) && (numNonAggregatedNodes > 0)) {
217 std::ostringstream oss;
218 oss <<
"MueLu::AggregationPhase3Algorithm::BuildAggregates: MueLu has detected a non-Dirichlet node that has no on-rank neighbors and is terminating (by user request). " << std::endl;
219 oss <<
"If this error is being generated at level 0, this is due to an initial partitioning problem in your matrix." << std::endl;
220 oss <<
"If this error is being generated at any other level, try turning on repartitioning, which may fix this problem." << std::endl;
225 auto numAggregates_h = Kokkos::create_mirror_view(numAggregates);
226 Kokkos::deep_copy(numAggregates_h, numAggregates);
232 #endif // MUELU_AGGREGATIONPHASE3ALGORITHM_KOKKOS_DEF_HPP
Lightweight MueLu representation of a compressed row storage graph.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
Container class for aggregation information.
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
T & get(const std::string &name, T def_value)
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
LO GetGraphNumColors()
Get the number of colors needed by the distance 2 coloring.
void BuildAggregates(const ParameterList ¶ms, const LWGraph_kokkos &graph, Aggregates &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
Local aggregation.
colors_view_type & GetGraphColors()
Get a distance 2 coloring of the underlying graph. The coloring is computed and set during Phase1 of ...
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
Timer to be used in non-factories.
Exception throws to report errors in the internal logical of the program.
void BuildAggregatesRandom(const ParameterList ¶ms, const LWGraph_kokkos &graph, Aggregates &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
const RCP< const Teuchos::Comm< int > > GetComm() const
void SetNumAggregates(LO nAggregates)
Set number of local aggregates on current processor.