46 #ifndef MUELU_AGGREGATIONPHASE1ALGORITHM_DEF_HPP_
47 #define MUELU_AGGREGATIONPHASE1ALGORITHM_DEF_HPP_
51 #include <Teuchos_Comm.hpp>
52 #include <Teuchos_CommHelpers.hpp>
58 #include "MueLu_LWGraph.hpp"
59 #include "MueLu_Aggregates.hpp"
63 #include "Kokkos_Sort.hpp"
64 #include <Kokkos_ScatterView.hpp>
68 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
71 LO& numNonAggregatedNodes)
const {
72 Monitor m(*
this,
"BuildAggregatesNonKokkos");
74 std::string orderingStr = params.
get<std::string>(
"aggregation: ordering");
75 int maxNeighAlreadySelected = params.
get<
int>(
"aggregation: max selected neighbors");
76 int minNodesPerAggregate = params.
get<
int>(
"aggregation: min agg size");
77 int maxNodesPerAggregate = params.
get<
int>(
"aggregation: max agg size");
78 bool matchMLBehavior = params.
get<
bool>(
"aggregation: match ML phase1");
81 "MueLu::UncoupledAggregationAlgorithm::BuildAggregatesNonKokkos: minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!");
89 if (orderingStr ==
"natural") ordering = O_NATURAL;
90 if (orderingStr ==
"random") ordering = O_RANDOM;
91 if (orderingStr ==
"graph") ordering = O_GRAPH;
94 const int myRank = graph.
GetComm()->getRank();
102 if (ordering == O_RANDOM) {
103 randomVector = arcp<LO>(numRows);
104 for (
LO i = 0; i < numRows; i++)
113 std::queue<LO> graphOrderQueue;
116 for (
LO i = 0; i < numRows; i++) {
118 LO rootCandidate = 0;
119 if (ordering == O_NATURAL)
121 else if (ordering == O_RANDOM)
122 rootCandidate = randomVector[i];
123 else if (ordering == O_GRAPH) {
124 if (graphOrderQueue.size() == 0) {
126 for (
LO jnode = 0; jnode < numRows; jnode++)
127 if (aggStat[jnode] ==
READY) {
128 graphOrderQueue.push(jnode);
132 if (graphOrderQueue.size() == 0) {
136 rootCandidate = graphOrderQueue.front();
137 graphOrderQueue.pop();
140 if (aggStat[rootCandidate] !=
READY)
145 aggList[aggSize++] = rootCandidate;
153 if ((ordering == O_NATURAL || ordering == O_RANDOM) &&
154 neighOfINode.length < minNodesPerAggregate) {
158 LO numAggregatedNeighbours = 0;
160 for (
int j = 0; j < neighOfINode.length; j++) {
161 LO neigh = neighOfINode(j);
164 if (aggStat[neigh] ==
READY || aggStat[neigh] ==
NOTSEL) {
173 if (aggSize < as<size_t>(maxNodesPerAggregate))
174 aggList[aggSize++] = neigh;
176 }
else if (!matchMLBehavior || aggStat[neigh] !=
IGNORED) {
179 numAggregatedNeighbours++;
185 if ((numAggregatedNeighbours <= maxNeighAlreadySelected) &&
186 (aggSize >= as<size_t>(minNodesPerAggregate))) {
190 aggIndex = numLocalAggregates++;
192 for (
size_t k = 0; k < aggSize; k++) {
194 vertex2AggId[aggList[k]] = aggIndex;
195 procWinner[aggList[k]] = myRank;
198 numNonAggregatedNodes -= aggSize;
202 aggStat[rootCandidate] =
NOTSEL;
209 if (ordering == O_GRAPH) {
214 for (
size_t k = 0; k < aggSize; k++) {
217 for (
int j = 0; j < neighOfJNode.length; j++) {
218 LO neigh = neighOfJNode(j);
221 graphOrderQueue.push(neigh);
229 for (
LO i = 0; i < numRows; i++)
237 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
241 for (
int i = 0; i < n - 1; i++)
245 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
247 return min + as<int>((max - min + 1) * (static_cast<double>(std::rand()) / (RAND_MAX + 1.0)));
250 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
256 LO& numNonAggregatedNodes)
const {
257 int minNodesPerAggregate = params.
get<
int>(
"aggregation: min agg size");
258 int maxNodesPerAggregate = params.
get<
int>(
"aggregation: max agg size");
262 "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!");
269 if (params.
get<
bool>(
"aggregation: deterministic")) {
270 Monitor m(*
this,
"BuildAggregatesDeterministic");
271 BuildAggregatesDeterministic(maxNodesPerAggregate, graph,
272 aggregates, aggStat, numNonAggregatedNodes);
274 Monitor m(*
this,
"BuildAggregatesRandom");
275 BuildAggregatesRandom(maxNodesPerAggregate, graph,
276 aggregates, aggStat, numNonAggregatedNodes);
281 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
287 LO& numNonAggregatedNodes)
const {
292 const int myRank = graph.
GetComm()->getRank();
295 auto vertex2AggId = aggregates.
GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
296 auto procWinner = aggregates.
GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
299 auto lclLWGraph = graph;
301 LO numAggregatedNodes = 0;
303 Kokkos::View<LO, device_type> aggCount(
"aggCount");
304 Kokkos::deep_copy(aggCount, numLocalAggregates);
305 Kokkos::parallel_for(
306 "Aggregation Phase 1: initial reduction over color == 1",
307 Kokkos::RangePolicy<LO, execution_space>(0, numRows),
308 KOKKOS_LAMBDA(
const LO nodeIdx) {
309 if (colors(nodeIdx) == 1 && aggStat(nodeIdx) ==
READY) {
310 const LO aggIdx = Kokkos::atomic_fetch_add(&aggCount(), 1);
311 vertex2AggId(nodeIdx, 0) = aggIdx;
313 procWinner(nodeIdx, 0) = myRank;
319 numAggregatedNodes -= numLocalAggregates;
320 Kokkos::deep_copy(numLocalAggregates, aggCount);
321 numAggregatedNodes += numLocalAggregates;
327 Kokkos::View<LO*, device_type> aggSizesView(
"aggSizes", numLocalAggregates);
331 auto aggSizesScatterView = Kokkos::Experimental::create_scatter_view(aggSizesView);
332 Kokkos::parallel_for(
333 "Aggregation Phase 1: compute initial aggregates size",
334 Kokkos::RangePolicy<LO, execution_space>(0, numRows),
335 KOKKOS_LAMBDA(
const LO nodeIdx) {
336 auto aggSizesScatterViewAccess = aggSizesScatterView.access();
337 if (vertex2AggId(nodeIdx, 0) >= 0)
338 aggSizesScatterViewAccess(vertex2AggId(nodeIdx, 0)) += 1;
340 Kokkos::Experimental::contribute(aggSizesView, aggSizesScatterView);
343 LO tmpNumAggregatedNodes = 0;
344 Kokkos::parallel_reduce(
345 "Aggregation Phase 1: main parallel_reduce over aggSizes",
346 Kokkos::RangePolicy<size_t, execution_space>(0, numRows),
347 KOKKOS_LAMBDA(
const size_t nodeIdx,
LO& lNumAggregatedNodes) {
348 if (colors(nodeIdx) != 1 && (aggStat(nodeIdx) ==
READY || aggStat(nodeIdx) ==
NOTSEL)) {
351 auto neighbors = lclLWGraph.getNeighborVertices(nodeIdx);
352 for (
LO j = 0; j < neighbors.length; ++j) {
353 auto nei = neighbors.colidx(j);
354 if (lclLWGraph.isLocalNeighborVertex(nei) && colors(nei) == 1 && aggStat(nei) ==
AGGREGATED) {
357 LO agg = vertex2AggId(nei, 0);
358 const LO aggSize = Kokkos::atomic_fetch_add(&aggSizesView(agg),
360 if (aggSize < maxAggSize) {
362 vertex2AggId(nodeIdx, 0) = agg;
363 procWinner(nodeIdx, 0) = myRank;
365 ++lNumAggregatedNodes;
369 Kokkos::atomic_decrement(&aggSizesView(agg));
376 if (aggStat(nodeIdx) ==
NOTSEL) {
377 aggStat(nodeIdx) =
READY;
381 tmpNumAggregatedNodes);
382 numAggregatedNodes += tmpNumAggregatedNodes;
383 numNonAggregatedNodes -= numAggregatedNodes;
389 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
395 LO& numNonAggregatedNodes)
const {
400 const int myRank = graph.
GetComm()->getRank();
402 auto vertex2AggId = aggregates.
GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
403 auto procWinner = aggregates.
GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
406 auto lclLWGraph = graph;
409 Kokkos::View<LO, device_type> numLocalAggregatesView(
"Num aggregates");
411 auto h_nla = Kokkos::create_mirror_view(numLocalAggregatesView);
412 h_nla() = numLocalAggregates;
413 Kokkos::deep_copy(numLocalAggregatesView, h_nla);
416 Kokkos::View<LO*, device_type> newRoots(
"New root LIDs", numNonAggregatedNodes);
417 Kokkos::View<LO, device_type> numNewRoots(
"Number of new aggregates of current color");
418 auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots);
421 Kokkos::parallel_for(
422 "Aggregation Phase 1: building list of new roots",
423 Kokkos::RangePolicy<execution_space>(0, numRows),
424 KOKKOS_LAMBDA(
const LO i) {
425 if (colors(i) == 1 && aggStat(i) ==
READY) {
427 newRoots(Kokkos::atomic_fetch_add(&numNewRoots(), 1)) = i;
430 Kokkos::deep_copy(h_numNewRoots, numNewRoots);
432 Kokkos::sort(newRoots, 0, h_numNewRoots());
433 LO numAggregated = 0;
434 Kokkos::parallel_reduce(
435 "Aggregation Phase 1: aggregating nodes",
436 Kokkos::RangePolicy<execution_space>(0, h_numNewRoots()),
437 KOKKOS_LAMBDA(
const LO rootIndex,
LO& lnumAggregated) {
438 LO root = newRoots(rootIndex);
439 LO aggID = numLocalAggregatesView() + rootIndex;
441 vertex2AggId(root, 0) = aggID;
442 procWinner(root, 0) = myRank;
444 auto neighOfRoot = lclLWGraph.getNeighborVertices(root);
445 for (
LO n = 0; n < neighOfRoot.length; n++) {
446 LO neigh = neighOfRoot(n);
447 if (lclLWGraph.isLocalNeighborVertex(neigh) && aggStat(neigh) ==
READY) {
449 vertex2AggId(neigh, 0) = aggID;
450 procWinner(neigh, 0) = myRank;
453 if (aggSize == maxAggSize) {
459 lnumAggregated += aggSize;
462 numNonAggregatedNodes -= numAggregated;
Kokkos::View< unsigned *, typename LWGraphHostType::device_type > AggStatHostType
LocalOrdinal RandomOrdinal(LocalOrdinal min, LocalOrdinal max)
void BuildAggregatesRandom(const LO maxAggSize, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
Lightweight MueLu representation of a compressed row storage graph.
void BuildAggregatesNonKokkos(const ParameterList ¶ms, const LWGraph &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatHostType &aggStat, LO &numNonAggregatedNodes) const
Local aggregation.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
Container class for aggregation information.
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
T & get(const std::string &name, T def_value)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
void SetIsRoot(LO i, bool value=true)
Set root node information.
typename device_type::execution_space execution_space
KOKKOS_INLINE_FUNCTION size_type getLocalMaxNumRowEntries() const
Returns the maximum number of entries across all rows/columns on this node.
colors_view_type & GetGraphColors()
Get a distance 2 coloring of the underlying graph. The coloring is computed and set during Phase1 of ...
void BuildAggregatesDeterministic(const LO maxAggSize, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
KOKKOS_INLINE_FUNCTION bool isLocalNeighborVertex(LO i) const
Return true if vertex with local id 'v' is on current process.
void RandomReorder(ArrayRCP< LO > list) const
Utility to take a list of integers and reorder them randomly (by using a local permutation).
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex 'v'.
void BuildAggregates(const Teuchos::ParameterList ¶ms, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
Timer to be used in non-factories.
void RandomReorder(Teuchos::Array< LocalOrdinal > &list)
Lightweight MueLu representation of a compressed row storage graph.
Exception throws to report errors in the internal logical of the program.
typename std::conditional< OnHost, Kokkos::Device< Kokkos::Serial, Kokkos::HostSpace >, typename Node::device_type >::type device_type
int RandomOrdinal(int min, int max) const
Generate a random number in the range [min, max].
const RCP< const Teuchos::Comm< int > > GetComm() const
Kokkos::View< unsigned *, typename LWGraphType::device_type > AggStatType
void SetNumAggregates(LO nAggregates)
Set number of local aggregates on current processor.