10 #ifndef MUELU_AGGREGATIONPHASE2BALGORITHM_DEF_HPP_
11 #define MUELU_AGGREGATIONPHASE2BALGORITHM_DEF_HPP_
13 #include <Teuchos_Comm.hpp>
14 #include <Teuchos_CommHelpers.hpp>
20 #include "MueLu_Aggregates.hpp"
22 #include "MueLu_LWGraph.hpp"
29 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
31 Monitor m(*
this,
"BuildAggregatesNonKokkos");
32 bool matchMLbehavior = params.
get<
bool>(
"aggregation: match ML phase2b");
35 const int myRank = graph.
GetComm()->getRank();
42 const LO defaultConnectWeight = 100;
43 const LO penaltyConnectWeight = 10;
45 std::vector<LO> aggWeight(numLocalAggregates, 0);
46 std::vector<LO> connectWeight(numRows, defaultConnectWeight);
47 std::vector<LO> aggPenalties(numRows, 0);
55 for (
int k = 0; k < 2; k++) {
56 for (
LO i = 0; i < numRows; i++) {
57 if (aggStat[i] !=
READY)
62 for (
int j = 0; j < neighOfINode.length; j++) {
63 LO neigh = neighOfINode(j);
67 aggWeight[vertex2AggId[neigh]] += connectWeight[neigh];
70 int bestScore = -100000;
74 for (
int j = 0; j < neighOfINode.length; j++) {
75 LO neigh = neighOfINode(j);
76 int aggId = vertex2AggId[neigh];
80 int score = aggWeight[aggId] - aggPenalties[aggId];
82 if (score > bestScore) {
85 bestConnect = connectWeight[neigh];
87 }
else if (aggId == bestAggId && connectWeight[neigh] > bestConnect) {
88 bestConnect = connectWeight[neigh];
98 vertex2AggId[i] = bestAggId;
99 procWinner[i] = myRank;
101 numNonAggregatedNodes--;
103 aggPenalties[bestAggId]++;
104 connectWeight[i] = bestConnect - penaltyConnectWeight;
112 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
118 LO& numNonAggregatedNodes)
const {
119 if (params.
get<
bool>(
"aggregation: deterministic")) {
120 Monitor m(*
this,
"BuildAggregatesDeterministic");
121 BuildAggregates<true>(params, graph, aggregates, aggStat, numNonAggregatedNodes);
123 Monitor m(*
this,
"BuildAggregatesRandom");
124 BuildAggregates<false>(params, graph, aggregates, aggStat, numNonAggregatedNodes);
129 template <
class AggStatType,
class ProcWinnerType,
class Vertex2AggType,
class ColorsType,
class LocalGraphType,
class AggPenaltyType,
class LO,
bool deterministic,
bool matchMLbehavior>
145 ExpansionFunctor(AggStatType& aggStat_, ProcWinnerType& procWinner_, Vertex2AggType& vertex2AggId_, ColorsType& colors_, LocalGraphType& lclLWGraph_, AggPenaltyType& aggPenalties_, AggPenaltyType& aggPenaltyUpdates_, AggPenaltyType& connectWeight_,
LO penaltyConnectWeight_,
LO color_,
LO rank_)
158 ExpansionFunctor(AggStatType& aggStat_, ProcWinnerType& procWinner_, Vertex2AggType& vertex2AggId_, ColorsType& colors_, LocalGraphType& lclLWGraph_, AggPenaltyType& aggPenalties_, AggPenaltyType& connectWeight_,
LO penaltyConnectWeight_,
LO color_,
LO rank_)
170 KOKKOS_INLINE_FUNCTION
175 int bestScore = -100000;
177 int bestConnect = -1;
179 auto neighOfINode =
lclLWGraph.getNeighborVertices(i);
181 for (
int j = 0; j < neighOfINode.length; j++) {
182 LO neigh = neighOfINode(j);
184 if (
lclLWGraph.isLocalNeighborVertex(neigh) &&
188 for (
int k = 0; k < neighOfINode.length; k++) {
189 LO neigh2 = neighOfINode(k);
190 if (
lclLWGraph.isLocalNeighborVertex(neigh2) &&
196 if (matchMLbehavior && (aggWeight == 0))
201 if (score > bestScore) {
206 }
else if (aggId == bestAggId &&
212 if (bestScore >= 0) {
217 if constexpr (deterministic) {
228 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
229 template <
bool deterministic>
235 LO& numNonAggregatedNodes)
const {
239 bool matchMLbehavior = params.
get<
bool>(
"aggregation: match ML phase2b");
242 const int myRank = graph.
GetComm()->getRank();
244 auto vertex2AggId = aggregates.
GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
245 auto procWinner = aggregates.
GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
250 const LO defaultConnectWeight = 100;
251 const LO penaltyConnectWeight = 10;
253 Kokkos::View<LO*, device_type> connectWeight(Kokkos::ViewAllocateWithoutInitializing(
"connectWeight"), numRows);
254 Kokkos::View<LO*, device_type> aggPenalties(
"aggPenalties", numLocalAggregates);
255 Kokkos::View<LO*, device_type> aggPenaltyUpdates;
257 aggPenaltyUpdates = Kokkos::View<LO*, device_type>(
"aggPenaltyUpdates", numLocalAggregates);
259 Kokkos::deep_copy(connectWeight, defaultConnectWeight);
269 int maxNodesPerAggregate = params.
get<
int>(
"aggregation: max agg size");
270 if (maxNodesPerAggregate == std::numeric_limits<int>::max()) {
274 for (
LO color = 1; color <= numColors; ++color) {
277 LO numAggregated = 0;
279 if constexpr (deterministic) {
280 if (matchMLbehavior) {
281 auto functor =
ExpansionFunctor<decltype(aggStat), decltype(procWinner), decltype(vertex2AggId), decltype(colors), decltype(graph), decltype(aggPenalties), LO, true, true>(aggStat, procWinner, vertex2AggId, colors, graph, aggPenalties, aggPenaltyUpdates, connectWeight, penaltyConnectWeight, color, myRank);
283 Kokkos::parallel_reduce(
"Aggregation Phase 2b: aggregates expansion",
284 Kokkos::RangePolicy<execution_space>(0, numRows),
288 auto functor =
ExpansionFunctor<decltype(aggStat), decltype(procWinner), decltype(vertex2AggId), decltype(colors), decltype(graph), decltype(aggPenalties), LO, true, false>(aggStat, procWinner, vertex2AggId, colors, graph, aggPenalties, aggPenaltyUpdates, connectWeight, penaltyConnectWeight, color, myRank);
290 Kokkos::parallel_reduce(
"Aggregation Phase 2b: aggregates expansion",
291 Kokkos::RangePolicy<execution_space>(0, numRows),
296 if (matchMLbehavior) {
297 auto functor =
ExpansionFunctor<decltype(aggStat), decltype(procWinner), decltype(vertex2AggId), decltype(colors), decltype(graph), decltype(aggPenalties), LO, false, true>(aggStat, procWinner, vertex2AggId, colors, graph, aggPenalties, connectWeight, penaltyConnectWeight, color, myRank);
299 Kokkos::parallel_reduce(
"Aggregation Phase 2b: aggregates expansion",
300 Kokkos::RangePolicy<execution_space>(0, numRows),
304 auto functor =
ExpansionFunctor<decltype(aggStat), decltype(procWinner), decltype(vertex2AggId), decltype(colors), decltype(graph), decltype(aggPenalties), LO, false, false>(aggStat, procWinner, vertex2AggId, colors, graph, aggPenalties, connectWeight, penaltyConnectWeight, color, myRank);
306 Kokkos::parallel_reduce(
"Aggregation Phase 2b: aggregates expansion",
307 Kokkos::RangePolicy<execution_space>(0, numRows),
313 if constexpr (deterministic) {
314 Kokkos::parallel_for(
315 "Aggregation Phase 2b: updating agg penalties",
316 Kokkos::RangePolicy<execution_space>(0, numLocalAggregates),
317 KOKKOS_LAMBDA(
const LO agg) {
318 aggPenalties(agg) += aggPenaltyUpdates(agg);
319 aggPenaltyUpdates(agg) = 0;
323 numNonAggregatedNodes -= numAggregated;
Kokkos::View< unsigned *, typename LWGraphHostType::device_type > AggStatHostType
Lightweight MueLu representation of a compressed row storage graph.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
Container class for aggregation information.
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
T & get(const std::string &name, T def_value)
ExpansionFunctor(AggStatType &aggStat_, ProcWinnerType &procWinner_, Vertex2AggType &vertex2AggId_, ColorsType &colors_, LocalGraphType &lclLWGraph_, AggPenaltyType &aggPenalties_, AggPenaltyType &aggPenaltyUpdates_, AggPenaltyType &connectWeight_, LO penaltyConnectWeight_, LO color_, LO rank_)
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
Vertex2AggType vertex2AggId
typename device_type::execution_space execution_space
void BuildAggregatesNonKokkos(const ParameterList ¶ms, const LWGraph &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatHostType &aggStat, LO &numNonAggregatedNodes) const
Local aggregation.
LO GetGraphNumColors()
Get the number of colors needed by the distance 2 coloring.
ExpansionFunctor(AggStatType &aggStat_, ProcWinnerType &procWinner_, Vertex2AggType &vertex2AggId_, ColorsType &colors_, LocalGraphType &lclLWGraph_, AggPenaltyType &aggPenalties_, AggPenaltyType &connectWeight_, LO penaltyConnectWeight_, LO color_, LO rank_)
KOKKOS_INLINE_FUNCTION void operator()(const LO &i, LO &tmpNumAggregated) const
colors_view_type & GetGraphColors()
Get a distance 2 coloring of the underlying graph. The coloring is computed and set during Phase1 of ...
KOKKOS_INLINE_FUNCTION bool isLocalNeighborVertex(LO i) const
Return true if vertex with local id 'v' is on current process.
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
void BuildAggregates(const ParameterList ¶ms, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
AggPenaltyType aggPenalties
LocalGraphType lclLWGraph
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex 'v'.
Timer to be used in non-factories.
AggPenaltyType connectWeight
ProcWinnerType procWinner
Lightweight MueLu representation of a compressed row storage graph.
AggPenaltyType aggPenaltyUpdates
typename std::conditional< OnHost, Kokkos::Device< Kokkos::Serial, Kokkos::HostSpace >, typename Node::device_type >::type device_type
const RCP< const Teuchos::Comm< int > > GetComm() const
Kokkos::View< unsigned *, typename LWGraphType::device_type > AggStatType