46 #ifndef MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP
47 #define MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP
49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
51 #include <Teuchos_Comm.hpp>
52 #include <Teuchos_CommHelpers.hpp>
58 #include "MueLu_Aggregates_kokkos.hpp"
60 #include "MueLu_LWGraph_kokkos.hpp"
63 #include "Kokkos_Sort.hpp"
67 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
68 void AggregationPhase2aAlgorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
69 BuildAggregates(
const ParameterList& params,
70 const LWGraph_kokkos& graph,
71 Aggregates_kokkos& aggregates,
73 LO& numNonAggregatedNodes)
const {
75 if(params.get<
bool>(
"aggregation: deterministic")) {
76 Monitor m(*
this,
"BuildAggregatesDeterministic");
77 BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes);
79 Monitor m(*
this,
"BuildAggregatesRandom");
80 BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
85 template <
class LO,
class GO,
class Node>
86 void AggregationPhase2aAlgorithm_kokkos<LO, GO, Node>::
87 BuildAggregatesRandom(
const ParameterList& params,
88 const LWGraph_kokkos& graph,
89 Aggregates_kokkos& aggregates,
91 LO& numNonAggregatedNodes)
const
93 using memory_space =
typename LWGraph_kokkos::memory_space;
94 using execution_space =
typename LWGraph_kokkos::execution_space;
96 const int minNodesPerAggregate = params.get<
int>(
"aggregation: min agg size");
97 const int maxNodesPerAggregate = params.get<
int>(
"aggregation: max agg size");
99 const LO numRows = graph.GetNodeNumVertices();
100 const int myRank = graph.GetComm()->getRank();
102 auto vertex2AggId = aggregates.GetVertex2AggId()->template getLocalView<memory_space>();
103 auto procWinner = aggregates.GetProcWinner() ->template getLocalView<memory_space>();
104 auto colors = aggregates.GetGraphColors();
105 const LO numColors = aggregates.GetGraphNumColors();
107 LO numLocalNodes = numRows;
108 LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
110 const double aggFactor = 0.5;
111 double factor =
static_cast<double>(numLocalAggregated)/(numLocalNodes+1);
112 factor =
pow(factor, aggFactor);
120 Kokkos::create_mirror_view(numLocalAggregates);
121 h_numLocalAggregates() = aggregates.GetNumAggregates();
126 for(
int color = 2; color < numColors + 1; ++color) {
127 LO tmpNumNonAggregatedNodes = 0;
130 KOKKOS_LAMBDA (
const LO rootCandidate,
LO& lNumNonAggregatedNodes) {
131 if(aggStat(rootCandidate) ==
READY &&
132 colors(rootCandidate) == color) {
135 auto neighbors = graph.getNeighborVertices(rootCandidate);
140 for(
int j = 0; j < neighbors.length; ++j) {
141 LO neigh = neighbors(j);
142 if(neigh != rootCandidate) {
143 if(graph.isLocalNeighborVertex(neigh) &&
144 aggStat(neigh) ==
READY &&
145 aggSize < maxNodesPerAggregate) {
155 if(aggSize > minNodesPerAggregate &&
156 aggSize > factor*numNeighbors) {
159 LO aggIndex = Kokkos::
160 atomic_fetch_add(&numLocalAggregates(), 1);
162 for(
int j = 0; j < neighbors.length; ++j) {
163 LO neigh = neighbors(j);
164 if(neigh != rootCandidate) {
165 if(graph.isLocalNeighborVertex(neigh) &&
166 aggStat(neigh) ==
READY &&
167 aggSize < maxNodesPerAggregate) {
169 vertex2AggId(neigh, 0) = aggIndex;
170 procWinner(neigh, 0) = myRank;
174 lNumNonAggregatedNodes -= aggSize;
177 }, tmpNumNonAggregatedNodes);
178 numNonAggregatedNodes += tmpNumNonAggregatedNodes;
183 aggregates.SetNumAggregates(h_numLocalAggregates());
186 template <
class LO,
class GO,
class Node>
187 void AggregationPhase2aAlgorithm_kokkos<LO, GO, Node>::
188 BuildAggregatesDeterministic(
const ParameterList& params,
189 const LWGraph_kokkos& graph,
190 Aggregates_kokkos& aggregates,
192 LO& numNonAggregatedNodes)
const
194 using memory_space =
typename LWGraph_kokkos::memory_space;
195 using execution_space =
typename LWGraph_kokkos::execution_space;
197 const int minNodesPerAggregate = params.get<
int>(
"aggregation: min agg size");
198 const int maxNodesPerAggregate = params.get<
int>(
"aggregation: max agg size");
200 const LO numRows = graph.GetNodeNumVertices();
201 const int myRank = graph.GetComm()->getRank();
203 auto vertex2AggId = aggregates.GetVertex2AggId()->template getLocalView<memory_space>();
204 auto procWinner = aggregates.GetProcWinner() ->template getLocalView<memory_space>();
205 auto colors = aggregates.GetGraphColors();
206 const LO numColors = aggregates.GetGraphNumColors();
208 LO numLocalNodes = procWinner.size();
209 LO numLocalAggregated = numLocalNodes - numNonAggregatedNodes;
211 const double aggFactor = 0.5;
212 double factor = as<double>(numLocalAggregated)/(numLocalNodes+1);
213 factor =
pow(factor, aggFactor);
217 Kokkos::create_mirror_view(numLocalAggregates);
218 h_numLocalAggregates() = aggregates.GetNumAggregates();
236 auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots);
237 for(
int color = 1; color < numColors + 1; ++color) {
242 KOKKOS_LAMBDA(
const LO rootCandidate) {
243 if(aggStat(rootCandidate) ==
READY &&
244 colors(rootCandidate) == color) {
246 auto neighbors = graph.getNeighborVertices(rootCandidate);
250 for(
int j = 0; j < neighbors.length; ++j) {
251 LO neigh = neighbors(j);
252 if(neigh != rootCandidate)
254 if(graph.isLocalNeighborVertex(neigh) &&
255 aggStat(neigh) ==
READY &&
256 aggSize < maxNodesPerAggregate)
265 if(aggSize > minNodesPerAggregate && aggSize > factor*numNeighbors) {
266 LO newRootIndex = Kokkos::atomic_fetch_add(&numNewRoots(), 1);
267 newRoots(newRootIndex) = rootCandidate;
273 if(h_numNewRoots() > 0) {
275 Kokkos::sort(newRoots, 0, h_numNewRoots());
277 LO tmpNumNonAggregatedNodes = 0;
281 KOKKOS_LAMBDA (
const LO newRootIndex,
LO& lNumNonAggregatedNodes) {
282 LO root = newRoots(newRootIndex);
283 LO newAggID = numLocalAggregates() + newRootIndex;
284 auto neighbors = graph.getNeighborVertices(root);
287 vertex2AggId(root, 0) = newAggID;
289 for(
int j = 0; j < neighbors.length; ++j) {
290 LO neigh = neighbors(j);
292 if(graph.isLocalNeighborVertex(neigh) &&
293 aggStat(neigh) ==
READY &&
294 aggSize < maxNodesPerAggregate) {
296 vertex2AggId(neigh, 0) = newAggID;
297 procWinner(neigh, 0) = myRank;
302 lNumNonAggregatedNodes -= aggSize;
303 }, tmpNumNonAggregatedNodes);
304 numNonAggregatedNodes += tmpNumNonAggregatedNodes;
305 h_numLocalAggregates() += h_numNewRoots();
309 aggregates.SetNumAggregates(h_numLocalAggregates());
314 #endif // HAVE_MUELU_KOKKOS_REFACTOR
315 #endif // MUELU_AGGREGATIONPHASE2AALGORITHM_KOKKOS_DEF_HPP
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=0)
void parallel_reduce(const std::string &label, const PolicyType &policy, const FunctorType &functor, ReturnType &return_value, typename Impl::enable_if< Kokkos::Impl::is_execution_policy< PolicyType >::value >::type *=0)
KOKKOS_INLINE_FUNCTION Kokkos::complex< RealType > pow(const complex< RealType > &x, const RealType &e)
void deep_copy(const View< DT, DP...> &dst, typename ViewTraits< DT, DP...>::const_value_type &value, typename std::enable_if< std::is_same< typename ViewTraits< DT, DP...>::specialize, void >::value >::type *=0)