46 #ifndef MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP
47 #define MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP
49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
54 #include <Teuchos_Comm.hpp>
55 #include <Teuchos_CommHelpers.hpp>
61 #include "MueLu_Aggregates_kokkos.hpp"
63 #include "MueLu_LWGraph_kokkos.hpp"
66 #include "Kokkos_Sort.hpp"
71 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
72 void AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
74 const LWGraph_kokkos& graph,
75 Aggregates_kokkos& aggregates,
77 LO& numNonAggregatedNodes)
const {
79 using memory_space =
typename LWGraph_kokkos::memory_space;
81 std::string orderingStr = params.
get<std::string>(
"aggregation: ordering");
82 int maxNeighAlreadySelected = params.
get<
int> (
"aggregation: max selected neighbors");
83 int minNodesPerAggregate = params.
get<
int> (
"aggregation: min agg size");
84 int maxNodesPerAggregate = params.
get<
int> (
"aggregation: max agg size");
86 Algorithm algorithm = Algorithm::Serial;
87 std::string algoParamName =
"aggregation: phase 1 algorithm";
90 algorithm = algorithmFromName(params.
get<std::string>(algoParamName));
94 Exceptions::RuntimeError,
95 "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!");
100 if(algorithm == Algorithm::Distance2)
102 if(params.
get<
bool>(
"aggregation: deterministic"))
104 Monitor m(*
this,
"BuildAggregatesDeterministic");
105 BuildAggregatesDeterministic(maxNodesPerAggregate, graph,
106 aggregates, aggStat, numNonAggregatedNodes);
108 Monitor m(*
this,
"BuildAggregatesRandom");
109 BuildAggregatesDistance2(maxNodesPerAggregate, graph,
110 aggregates, aggStat, numNonAggregatedNodes);
115 Monitor m(*
this,
"BuildAggregatesSerial");
117 = Kokkos::create_mirror(aggStat);
119 std::vector<unsigned> aggstat;
120 aggstat.resize(aggStatHost.
extent(0));
121 for(
size_t idx = 0; idx < aggStatHost.
extent(0); ++idx) {
122 aggstat[idx] = aggStatHost(idx);
125 BuildAggregatesSerial(graph, aggregates, aggstat, numNonAggregatedNodes, minNodesPerAggregate,
126 maxNodesPerAggregate, maxNeighAlreadySelected, orderingStr);
128 for(
size_t idx = 0; idx < aggStatHost.
extent(0); ++idx) {
129 aggStatHost(idx) = aggstat[idx];
136 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
137 void AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
138 BuildAggregatesSerial(
const LWGraph_kokkos& graph, Aggregates_kokkos& aggregates,
139 std::vector<unsigned>& aggStat,
LO& numNonAggregatedNodes,
140 LO minNodesPerAggregate,
LO maxNodesPerAggregate,
141 LO maxNeighAlreadySelected, std::string& orderingStr)
const
149 ordering = O_NATURAL;
150 if (orderingStr ==
"natural") ordering = O_NATURAL;
151 if (orderingStr ==
"random" ) ordering = O_RANDOM;
152 if (orderingStr ==
"graph" ) ordering = O_GRAPH;
154 const LO numRows = graph.GetNodeNumVertices();
155 const int myRank = graph.GetComm()->getRank();
157 ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0);
158 ArrayRCP<LO> procWinner = aggregates.GetProcWinner() ->getDataNonConst(0);
160 LO numLocalAggregates = aggregates.GetNumAggregates();
162 ArrayRCP<LO> randomVector;
163 if (ordering == O_RANDOM) {
164 randomVector = arcp<LO>(numRows);
165 for (
LO i = 0; i < numRows; i++)
167 RandomReorder(randomVector);
172 std::vector<int> aggList(graph.getNodeMaxNumRowEntries());
174 std::queue<LO> graphOrderQueue;
177 for (
LO i = 0; i < numRows; i++) {
179 LO rootCandidate = 0;
180 if (ordering == O_NATURAL) rootCandidate = i;
181 else if (ordering == O_RANDOM) rootCandidate = randomVector[i];
182 else if (ordering == O_GRAPH) {
184 if (graphOrderQueue.size() == 0) {
186 for (
LO jnode = 0; jnode < numRows; jnode++)
187 if (aggStat[jnode] ==
READY) {
188 graphOrderQueue.push(jnode);
192 if (graphOrderQueue.size() == 0) {
196 rootCandidate = graphOrderQueue.front();
197 graphOrderQueue.pop();
200 if (aggStat[rootCandidate] !=
READY)
205 aggList[aggSize++] = rootCandidate;
207 auto neighOfINode = graph.getNeighborVertices(rootCandidate);
213 if ((ordering == O_NATURAL || ordering == O_RANDOM) &&
214 as<int>(neighOfINode.length) < minNodesPerAggregate) {
218 LO numAggregatedNeighbours = 0;
220 for (
int j = 0; j < as<int>(neighOfINode.length); j++) {
221 LO neigh = neighOfINode(j);
223 if (neigh != rootCandidate && graph.isLocalNeighborVertex(neigh)) {
225 if (aggStat[neigh] ==
READY || aggStat[neigh] ==
NOTSEL) {
234 if (aggSize < as<size_t>(maxNodesPerAggregate))
235 aggList[aggSize++] = neigh;
238 numAggregatedNeighbours++;
244 if ((numAggregatedNeighbours <= maxNeighAlreadySelected) &&
245 (aggSize >= as<size_t>(minNodesPerAggregate))) {
248 aggregates.SetIsRoot(rootCandidate);
249 aggIndex = numLocalAggregates++;
251 for (
size_t k = 0; k < aggSize; k++) {
253 vertex2AggId[aggList[k]] = aggIndex;
254 procWinner [aggList[k]] = myRank;
257 numNonAggregatedNodes -= aggSize;
261 aggStat[rootCandidate] =
NOTSEL;
268 if (ordering == O_GRAPH) {
273 for (
size_t k = 0; k < aggSize; k++) {
274 auto neighOfJNode = graph.getNeighborVertices(aggList[k]);
276 for (
int j = 0; j < as<int>(neighOfJNode.length); j++) {
277 LO neigh = neighOfJNode(j);
279 if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] ==
READY)
280 graphOrderQueue.push(neigh);
288 for (
LO i = 0; i < numRows; i++)
293 aggregates.SetNumAggregates(numLocalAggregates);
296 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
297 void AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
298 BuildAggregatesDistance2(
const LO maxAggSize,
299 const LWGraph_kokkos& graph,
300 Aggregates_kokkos& aggregates,
302 LO& numNonAggregatedNodes)
const
304 using memory_space =
typename LWGraph_kokkos::memory_space;
305 using execution_space =
typename LWGraph_kokkos::execution_space;
307 const LO numRows = graph.GetNodeNumVertices();
308 const int myRank = graph.GetComm()->getRank();
311 auto vertex2AggId = aggregates.GetVertex2AggId()->template getLocalView<memory_space>();
312 auto procWinner = aggregates.GetProcWinner() ->template getLocalView<memory_space>();
313 auto colors = aggregates.GetGraphColors();
315 LO numLocalAggregates = aggregates.GetNumAggregates();
317 LO tmpNumLocalAggregates = 0;
320 KOKKOS_LAMBDA (
const LO i,
LO& lnumLocalAggregates) {
321 if(colors(i) == 1 && aggStat(i) ==
READY) {
322 const LO idx = Kokkos::atomic_fetch_add (&aggCount(), 1);
323 vertex2AggId(i, 0) = idx;
325 ++lnumLocalAggregates;
326 procWinner(i, 0) = myRank;
328 }, tmpNumLocalAggregates);
329 numLocalAggregates += tmpNumLocalAggregates;
339 auto aggSizesScatterView = Kokkos::Experimental::create_scatter_view(aggSizesView);
342 KOKKOS_LAMBDA (
const LO i) {
343 auto aggSizesScatterViewAccess = aggSizesScatterView.access();
344 if(vertex2AggId(i, 0) >= 0)
345 aggSizesScatterViewAccess(vertex2AggId(i, 0)) += 1;
347 Kokkos::Experimental::contribute(aggSizesView, aggSizesScatterView);
352 KOKKOS_LAMBDA (
const size_t i,
LO & lNumNonAggregatedNodes) {
354 && (aggStat(i) ==
READY || aggStat(i) ==
NOTSEL)) {
357 auto neighbors = graph.getNeighborVertices(i);
358 for(
LO j = 0; j < neighbors.length; ++j) {
359 auto nei = neighbors.colidx(j);
360 if(graph.isLocalNeighborVertex(nei) && colors(nei) == 1
365 LO agg = vertex2AggId(nei, 0);
366 const LO aggSize = Kokkos::atomic_fetch_add (&aggSizesView(agg),
368 if(aggSize < maxAggSize) {
370 vertex2AggId(i, 0) = agg;
371 procWinner(i, 0) = myRank;
376 Kokkos::atomic_decrement(&aggSizesView(agg));
382 lNumNonAggregatedNodes++;
385 }, numNonAggregatedNodes);
388 aggregates.SetNumAggregates(numLocalAggregates);
391 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
392 void AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
393 BuildAggregatesDeterministic(
const LO maxAggSize,
394 const LWGraph_kokkos& graph,
395 Aggregates_kokkos& aggregates,
397 LO& numNonAggregatedNodes)
const
399 using graph_t =
typename LWGraph_kokkos::local_graph_type;
400 using memory_space =
typename graph_t::device_type::memory_space;
401 using execution_space =
typename graph_t::device_type::execution_space;
403 const LO numRows = graph.GetNodeNumVertices();
404 const int myRank = graph.GetComm()->getRank();
406 auto vertex2AggId = aggregates.GetVertex2AggId()->template getLocalView<memory_space>();
407 auto procWinner = aggregates.GetProcWinner() ->template getLocalView<memory_space>();
408 auto colors = aggregates.GetGraphColors();
410 LO numLocalAggregates = aggregates.GetNumAggregates();
413 auto h_nla = Kokkos::create_mirror_view(numLocalAggregatesView);
414 h_nla() = numLocalAggregates;
420 auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots);
425 KOKKOS_LAMBDA(
const LO i)
427 if(colors(i) == 1 && aggStat(i) ==
READY)
430 newRoots(Kokkos::atomic_fetch_add(&numNewRoots(), 1)) = i;
435 Kokkos::sort(newRoots, 0, h_numNewRoots());
436 LO numAggregated = 0;
439 KOKKOS_LAMBDA(
const LO rootIndex,
LO& lnumAggregated)
441 LO root = newRoots(rootIndex);
442 LO aggID = numLocalAggregatesView() + rootIndex;
444 vertex2AggId(root, 0) = aggID;
445 procWinner(root, 0) = myRank;
447 auto neighOfRoot = graph.getNeighborVertices(root);
448 for(
LO n = 0; n < neighOfRoot.length; n++)
450 LO neigh = neighOfRoot(n);
451 if (graph.isLocalNeighborVertex(neigh) && aggStat(neigh) ==
READY)
454 vertex2AggId(neigh, 0) = aggID;
455 procWinner(neigh, 0) = myRank;
458 if(aggSize == maxAggSize)
465 lnumAggregated += aggSize;
467 numNonAggregatedNodes -= numAggregated;
469 aggregates.SetNumAggregates(numLocalAggregates + h_numNewRoots());
472 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
473 void AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::RandomReorder(ArrayRCP<LO> list)
const {
476 for(
int i = 0; i < n-1; i++)
477 std::swap(list[i], list[RandomOrdinal(i,n-1)]);
480 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
481 int AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::RandomOrdinal(
int min,
int max)
const {
482 return min + as<int>((max-min+1) * (static_cast<double>(std::rand()) / (RAND_MAX + 1.0)));
487 #endif // HAVE_MUELU_KOKKOS_REFACTOR
488 #endif // MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=0)
T & get(const std::string &name, T def_value)
void parallel_reduce(const std::string &label, const PolicyType &policy, const FunctorType &functor, ReturnType &return_value, typename Impl::enable_if< Kokkos::Impl::is_execution_policy< PolicyType >::value >::type *=0)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
void deep_copy(const View< DT, DP...> &dst, typename ViewTraits< DT, DP...>::const_value_type &value, typename std::enable_if< std::is_same< typename ViewTraits< DT, DP...>::specialize, void >::value >::type *=0)
bool isParameter(const std::string &name) const
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< std::is_integral< iType >::value, size_t >::type extent(const iType &r) const noexcept