46 #ifndef MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP
47 #define MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP
49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
51 #include <Teuchos_Comm.hpp>
52 #include <Teuchos_CommHelpers.hpp>
54 #include <Xpetra_Vector.hpp>
58 #include "MueLu_Aggregates_kokkos.hpp"
60 #include "MueLu_LWGraph_kokkos.hpp"
63 #include "Kokkos_Sort.hpp"
67 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
68 void AggregationPhase2aAlgorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
69 BuildAggregates(
const ParameterList& params,
70 const LWGraph_kokkos& graph,
71 Aggregates_kokkos& aggregates,
73 LO& numNonAggregatedNodes)
const {
75 if(params.get<
bool>(
"aggregation: deterministic")) {
76 Monitor m(*
this,
"BuildAggregatesDeterministic");
77 BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes);
79 Monitor m(*
this,
"BuildAggregatesRandom");
80 BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
85 template <
class LO,
class GO,
class Node>
86 void AggregationPhase2aAlgorithm_kokkos<LO, GO, Node>::
87 BuildAggregatesRandom(
const ParameterList& params,
88 const LWGraph_kokkos& graph,
89 Aggregates_kokkos& aggregates,
91 LO& numNonAggregatedNodes)
const
93 const int minNodesPerAggregate = params.get<
int>(
"aggregation: min agg size");
94 const int maxNodesPerAggregate = params.get<
int>(
"aggregation: max agg size");
95 bool includeRootInAgg = params.get<
bool>(
"aggregation: phase2a include root");
97 const LO numRows = graph.GetNodeNumVertices();
98 const int myRank = graph.GetComm()->getRank();
100 auto vertex2AggId = aggregates.GetVertex2AggId()->template getLocalView<memory_space>();
101 auto procWinner = aggregates.GetProcWinner() ->template getLocalView<memory_space>();
102 auto colors = aggregates.GetGraphColors();
103 const LO numColors = aggregates.GetGraphNumColors();
105 LO numLocalNodes = numRows;
106 LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
108 const double aggFactor = 0.5;
109 double factor =
static_cast<double>(numLocalAggregated)/(numLocalNodes+1);
110 factor =
pow(factor, aggFactor);
118 Kokkos::create_mirror_view(numLocalAggregates);
119 h_numLocalAggregates() = aggregates.GetNumAggregates();
124 for(
int color = 2; color < numColors + 1; ++color) {
125 LO tmpNumNonAggregatedNodes = 0;
128 KOKKOS_LAMBDA (
const LO rootCandidate, LO& lNumNonAggregatedNodes) {
129 if(aggStat(rootCandidate) ==
READY &&
130 colors(rootCandidate) == color) {
133 if (includeRootInAgg)
138 auto neighbors = graph.getNeighborVertices(rootCandidate);
143 for(
int j = 0; j < neighbors.length; ++j) {
144 LO neigh = neighbors(j);
145 if(neigh != rootCandidate) {
146 if(graph.isLocalNeighborVertex(neigh) &&
147 (aggStat(neigh) ==
READY) &&
148 (aggSize < maxNodesPerAggregate)) {
157 if(aggSize > minNodesPerAggregate &&
158 ((includeRootInAgg && aggSize-1 > factor*numNeighbors) ||
159 (!includeRootInAgg && aggSize > factor*numNeighbors))) {
162 LO aggIndex = Kokkos::
163 atomic_fetch_add(&numLocalAggregates(), 1);
165 LO numAggregated = 0;
167 if (includeRootInAgg) {
170 vertex2AggId(rootCandidate, 0) = aggIndex;
171 procWinner(rootCandidate, 0) = myRank;
173 --lNumNonAggregatedNodes;
176 for(
int neighIdx = 0; neighIdx < neighbors.length; ++neighIdx) {
177 LO neigh = neighbors(neighIdx);
178 if(neigh != rootCandidate) {
179 if(graph.isLocalNeighborVertex(neigh) &&
180 (aggStat(neigh) ==
READY) &&
181 (numAggregated < aggSize)) {
183 vertex2AggId(neigh, 0) = aggIndex;
184 procWinner(neigh, 0) = myRank;
187 --lNumNonAggregatedNodes;
193 }, tmpNumNonAggregatedNodes);
194 numNonAggregatedNodes += tmpNumNonAggregatedNodes;
199 aggregates.SetNumAggregates(h_numLocalAggregates());
202 template <
class LO,
class GO,
class Node>
203 void AggregationPhase2aAlgorithm_kokkos<LO, GO, Node>::
204 BuildAggregatesDeterministic(
const ParameterList& params,
205 const LWGraph_kokkos& graph,
206 Aggregates_kokkos& aggregates,
208 LO& numNonAggregatedNodes)
const
210 const int minNodesPerAggregate = params.get<
int>(
"aggregation: min agg size");
211 const int maxNodesPerAggregate = params.get<
int>(
"aggregation: max agg size");
213 const LO numRows = graph.GetNodeNumVertices();
214 const int myRank = graph.GetComm()->getRank();
216 auto vertex2AggId = aggregates.GetVertex2AggId()->template getLocalView<memory_space>();
217 auto procWinner = aggregates.GetProcWinner() ->template getLocalView<memory_space>();
218 auto colors = aggregates.GetGraphColors();
219 const LO numColors = aggregates.GetGraphNumColors();
221 LO numLocalNodes = procWinner.size();
222 LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
224 const double aggFactor = 0.5;
225 double factor = as<double>(numLocalAggregated)/(numLocalNodes+1);
226 factor =
pow(factor, aggFactor);
230 Kokkos::create_mirror_view(numLocalAggregates);
231 h_numLocalAggregates() = aggregates.GetNumAggregates();
249 auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots);
250 for(
int color = 1; color < numColors + 1; ++color) {
255 KOKKOS_LAMBDA(
const LO rootCandidate) {
256 if(aggStat(rootCandidate) ==
READY &&
257 colors(rootCandidate) == color) {
259 auto neighbors = graph.getNeighborVertices(rootCandidate);
263 for(
int j = 0; j < neighbors.length; ++j) {
264 LO neigh = neighbors(j);
265 if(neigh != rootCandidate)
267 if(graph.isLocalNeighborVertex(neigh) &&
268 aggStat(neigh) ==
READY &&
269 aggSize < maxNodesPerAggregate)
278 if(aggSize > minNodesPerAggregate && aggSize > factor*numNeighbors) {
279 LO newRootIndex = Kokkos::atomic_fetch_add(&numNewRoots(), 1);
280 newRoots(newRootIndex) = rootCandidate;
286 if(h_numNewRoots() > 0) {
288 Kokkos::sort(newRoots, 0, h_numNewRoots());
290 LO tmpNumNonAggregatedNodes = 0;
294 KOKKOS_LAMBDA (
const LO newRootIndex, LO& lNumNonAggregatedNodes) {
295 LO root = newRoots(newRootIndex);
296 LO newAggID = numLocalAggregates() + newRootIndex;
297 auto neighbors = graph.getNeighborVertices(root);
300 vertex2AggId(root, 0) = newAggID;
302 for(
int j = 0; j < neighbors.length; ++j) {
303 LO neigh = neighbors(j);
305 if(graph.isLocalNeighborVertex(neigh) &&
306 aggStat(neigh) ==
READY &&
307 aggSize < maxNodesPerAggregate) {
309 vertex2AggId(neigh, 0) = newAggID;
310 procWinner(neigh, 0) = myRank;
315 lNumNonAggregatedNodes -= aggSize;
316 }, tmpNumNonAggregatedNodes);
317 numNonAggregatedNodes += tmpNumNonAggregatedNodes;
318 h_numLocalAggregates() += h_numNewRoots();
322 aggregates.SetNumAggregates(h_numLocalAggregates());
327 #endif // HAVE_MUELU_KOKKOS_REFACTOR
328 #endif // MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP
void deep_copy(const View< DT, DP...> &dst, typename ViewTraits< DT, DP...>::const_value_type &value, typename std::enable_if< std::is_same< typename ViewTraits< DT, DP...>::specialize, void >::value >::type *=nullptr)
KOKKOS_INLINE_FUNCTION Kokkos::complex< RealType > pow(const complex< RealType > &x, const RealType &e)
void parallel_reduce(const std::string &label, const PolicyType &policy, const FunctorType &functor, ReturnType &return_value, typename std::enable_if< Kokkos::Impl::is_execution_policy< PolicyType >::value >::type *=nullptr)
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename std::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=nullptr)