10 #ifndef MUELU_AGGREGATIONPHASE1ALGORITHM_DEF_HPP_
11 #define MUELU_AGGREGATIONPHASE1ALGORITHM_DEF_HPP_
15 #include <Teuchos_Comm.hpp>
16 #include <Teuchos_CommHelpers.hpp>
22 #include "MueLu_LWGraph.hpp"
23 #include "MueLu_Aggregates.hpp"
27 #include "Kokkos_Sort.hpp"
28 #include <Kokkos_ScatterView.hpp>
32 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
35 LO& numNonAggregatedNodes)
const {
36 Monitor m(*
this,
"BuildAggregatesNonKokkos");
38 std::string orderingStr = params.
get<std::string>(
"aggregation: ordering");
39 int maxNeighAlreadySelected = params.
get<
int>(
"aggregation: max selected neighbors");
40 int minNodesPerAggregate = params.
get<
int>(
"aggregation: min agg size");
41 int maxNodesPerAggregate = params.
get<
int>(
"aggregation: max agg size");
42 bool matchMLBehavior = params.
get<
bool>(
"aggregation: match ML phase1");
45 "MueLu::UncoupledAggregationAlgorithm::BuildAggregatesNonKokkos: minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!");
53 if (orderingStr ==
"natural") ordering = O_NATURAL;
54 if (orderingStr ==
"random") ordering = O_RANDOM;
55 if (orderingStr ==
"graph") ordering = O_GRAPH;
58 const int myRank = graph.
GetComm()->getRank();
66 if (ordering == O_RANDOM) {
67 randomVector = arcp<LO>(numRows);
68 for (
LO i = 0; i < numRows; i++)
77 std::queue<LO> graphOrderQueue;
80 for (
LO i = 0; i < numRows; i++) {
83 if (ordering == O_NATURAL)
85 else if (ordering == O_RANDOM)
86 rootCandidate = randomVector[i];
87 else if (ordering == O_GRAPH) {
88 if (graphOrderQueue.size() == 0) {
90 for (
LO jnode = 0; jnode < numRows; jnode++)
91 if (aggStat[jnode] ==
READY) {
92 graphOrderQueue.push(jnode);
96 if (graphOrderQueue.size() == 0) {
100 rootCandidate = graphOrderQueue.front();
101 graphOrderQueue.pop();
104 if (aggStat[rootCandidate] !=
READY)
109 aggList[aggSize++] = rootCandidate;
117 if ((ordering == O_NATURAL || ordering == O_RANDOM) &&
118 neighOfINode.length < minNodesPerAggregate) {
122 LO numAggregatedNeighbours = 0;
124 for (
int j = 0; j < neighOfINode.length; j++) {
125 LO neigh = neighOfINode(j);
128 if (aggStat[neigh] ==
READY || aggStat[neigh] ==
NOTSEL) {
137 if (aggSize < as<size_t>(maxNodesPerAggregate))
138 aggList[aggSize++] = neigh;
140 }
else if (!matchMLBehavior || aggStat[neigh] !=
IGNORED) {
143 numAggregatedNeighbours++;
149 if ((numAggregatedNeighbours <= maxNeighAlreadySelected) &&
150 (aggSize >= as<size_t>(minNodesPerAggregate))) {
154 aggIndex = numLocalAggregates++;
156 for (
size_t k = 0; k < aggSize; k++) {
158 vertex2AggId[aggList[k]] = aggIndex;
159 procWinner[aggList[k]] = myRank;
162 numNonAggregatedNodes -= aggSize;
166 aggStat[rootCandidate] =
NOTSEL;
173 if (ordering == O_GRAPH) {
178 for (
size_t k = 0; k < aggSize; k++) {
181 for (
int j = 0; j < neighOfJNode.length; j++) {
182 LO neigh = neighOfJNode(j);
185 graphOrderQueue.push(neigh);
193 for (
LO i = 0; i < numRows; i++)
201 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
205 for (
int i = 0; i < n - 1; i++)
209 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
211 return min + as<int>((max - min + 1) * (static_cast<double>(std::rand()) / (RAND_MAX + 1.0)));
214 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
220 LO& numNonAggregatedNodes)
const {
221 int minNodesPerAggregate = params.
get<
int>(
"aggregation: min agg size");
222 int maxNodesPerAggregate = params.
get<
int>(
"aggregation: max agg size");
226 "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!");
233 if (params.
get<
bool>(
"aggregation: deterministic")) {
234 Monitor m(*
this,
"BuildAggregatesDeterministic");
235 BuildAggregatesDeterministic(maxNodesPerAggregate, graph,
236 aggregates, aggStat, numNonAggregatedNodes);
238 Monitor m(*
this,
"BuildAggregatesRandom");
239 BuildAggregatesRandom(maxNodesPerAggregate, graph,
240 aggregates, aggStat, numNonAggregatedNodes);
245 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
251 LO& numNonAggregatedNodes)
const {
256 const int myRank = graph.
GetComm()->getRank();
259 auto vertex2AggId = aggregates.
GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
260 auto procWinner = aggregates.
GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
263 auto lclLWGraph = graph;
265 LO numAggregatedNodes = 0;
267 Kokkos::View<LO, device_type> aggCount(
"aggCount");
268 Kokkos::deep_copy(aggCount, numLocalAggregates);
269 Kokkos::parallel_for(
270 "Aggregation Phase 1: initial reduction over color == 1",
271 Kokkos::RangePolicy<LO, execution_space>(0, numRows),
272 KOKKOS_LAMBDA(
const LO nodeIdx) {
273 if (colors(nodeIdx) == 1 && aggStat(nodeIdx) ==
READY) {
274 const LO aggIdx = Kokkos::atomic_fetch_add(&aggCount(), 1);
275 vertex2AggId(nodeIdx, 0) = aggIdx;
277 procWinner(nodeIdx, 0) = myRank;
283 numAggregatedNodes -= numLocalAggregates;
284 Kokkos::deep_copy(numLocalAggregates, aggCount);
285 numAggregatedNodes += numLocalAggregates;
291 Kokkos::View<LO*, device_type> aggSizesView(
"aggSizes", numLocalAggregates);
295 auto aggSizesScatterView = Kokkos::Experimental::create_scatter_view(aggSizesView);
296 Kokkos::parallel_for(
297 "Aggregation Phase 1: compute initial aggregates size",
298 Kokkos::RangePolicy<LO, execution_space>(0, numRows),
299 KOKKOS_LAMBDA(
const LO nodeIdx) {
300 auto aggSizesScatterViewAccess = aggSizesScatterView.access();
301 if (vertex2AggId(nodeIdx, 0) >= 0)
302 aggSizesScatterViewAccess(vertex2AggId(nodeIdx, 0)) += 1;
304 Kokkos::Experimental::contribute(aggSizesView, aggSizesScatterView);
307 LO tmpNumAggregatedNodes = 0;
308 Kokkos::parallel_reduce(
309 "Aggregation Phase 1: main parallel_reduce over aggSizes",
310 Kokkos::RangePolicy<size_t, execution_space>(0, numRows),
311 KOKKOS_LAMBDA(
const size_t nodeIdx,
LO& lNumAggregatedNodes) {
312 if (colors(nodeIdx) != 1 && (aggStat(nodeIdx) ==
READY || aggStat(nodeIdx) ==
NOTSEL)) {
315 auto neighbors = lclLWGraph.getNeighborVertices(nodeIdx);
316 for (
LO j = 0; j < neighbors.length; ++j) {
317 auto nei = neighbors.colidx(j);
318 if (lclLWGraph.isLocalNeighborVertex(nei) && colors(nei) == 1 && aggStat(nei) ==
AGGREGATED) {
321 LO agg = vertex2AggId(nei, 0);
322 const LO aggSize = Kokkos::atomic_fetch_add(&aggSizesView(agg),
324 if (aggSize < maxAggSize) {
326 vertex2AggId(nodeIdx, 0) = agg;
327 procWinner(nodeIdx, 0) = myRank;
329 ++lNumAggregatedNodes;
333 Kokkos::atomic_dec(&aggSizesView(agg));
340 if (aggStat(nodeIdx) ==
NOTSEL) {
341 aggStat(nodeIdx) =
READY;
345 tmpNumAggregatedNodes);
346 numAggregatedNodes += tmpNumAggregatedNodes;
347 numNonAggregatedNodes -= numAggregatedNodes;
353 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
359 LO& numNonAggregatedNodes)
const {
364 const int myRank = graph.
GetComm()->getRank();
366 auto vertex2AggId = aggregates.
GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
367 auto procWinner = aggregates.
GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
370 auto lclLWGraph = graph;
373 Kokkos::View<LO, device_type> numLocalAggregatesView(
"Num aggregates");
375 auto h_nla = Kokkos::create_mirror_view(numLocalAggregatesView);
376 h_nla() = numLocalAggregates;
377 Kokkos::deep_copy(numLocalAggregatesView, h_nla);
380 Kokkos::View<LO*, device_type> newRoots(
"New root LIDs", numNonAggregatedNodes);
381 Kokkos::View<LO, device_type> numNewRoots(
"Number of new aggregates of current color");
382 auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots);
385 Kokkos::parallel_for(
386 "Aggregation Phase 1: building list of new roots",
387 Kokkos::RangePolicy<execution_space>(0, numRows),
388 KOKKOS_LAMBDA(
const LO i) {
389 if (colors(i) == 1 && aggStat(i) ==
READY) {
391 newRoots(Kokkos::atomic_fetch_add(&numNewRoots(), 1)) = i;
394 Kokkos::deep_copy(h_numNewRoots, numNewRoots);
396 Kokkos::sort(newRoots, 0, h_numNewRoots());
397 LO numAggregated = 0;
398 Kokkos::parallel_reduce(
399 "Aggregation Phase 1: aggregating nodes",
400 Kokkos::RangePolicy<execution_space>(0, h_numNewRoots()),
401 KOKKOS_LAMBDA(
const LO rootIndex,
LO& lnumAggregated) {
402 LO root = newRoots(rootIndex);
403 LO aggID = numLocalAggregatesView() + rootIndex;
405 vertex2AggId(root, 0) = aggID;
406 procWinner(root, 0) = myRank;
408 auto neighOfRoot = lclLWGraph.getNeighborVertices(root);
409 for (
LO n = 0; n < neighOfRoot.length; n++) {
410 LO neigh = neighOfRoot(n);
411 if (lclLWGraph.isLocalNeighborVertex(neigh) && aggStat(neigh) ==
READY) {
413 vertex2AggId(neigh, 0) = aggID;
414 procWinner(neigh, 0) = myRank;
417 if (aggSize == maxAggSize) {
423 lnumAggregated += aggSize;
426 numNonAggregatedNodes -= numAggregated;
Kokkos::View< unsigned *, typename LWGraphHostType::device_type > AggStatHostType
LocalOrdinal RandomOrdinal(LocalOrdinal min, LocalOrdinal max)
void BuildAggregatesRandom(const LO maxAggSize, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
Lightweight MueLu representation of a compressed row storage graph.
void BuildAggregatesNonKokkos(const ParameterList ¶ms, const LWGraph &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatHostType &aggStat, LO &numNonAggregatedNodes) const
Local aggregation.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
Container class for aggregation information.
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
T & get(const std::string &name, T def_value)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
void SetIsRoot(LO i, bool value=true)
Set root node information.
typename device_type::execution_space execution_space
KOKKOS_INLINE_FUNCTION size_type getLocalMaxNumRowEntries() const
Returns the maximum number of entries across all rows/columns on this node.
colors_view_type & GetGraphColors()
Get a distance 2 coloring of the underlying graph. The coloring is computed and set during Phase1 of ...
void BuildAggregatesDeterministic(const LO maxAggSize, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
KOKKOS_INLINE_FUNCTION bool isLocalNeighborVertex(LO i) const
Return true if vertex with local id 'v' is on current process.
void RandomReorder(ArrayRCP< LO > list) const
Utility to take a list of integers and reorder them randomly (by using a local permutation).
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
KOKKOS_INLINE_FUNCTION neighbor_vertices_type getNeighborVertices(LO i) const
Return the list of vertices adjacent to the vertex 'v'.
void BuildAggregates(const Teuchos::ParameterList ¶ms, const LWGraph_kokkos &graph, Aggregates &aggregates, typename AggregationAlgorithmBase< LocalOrdinal, GlobalOrdinal, Node >::AggStatType &aggStat, LO &numNonAggregatedNodes) const
Timer to be used in non-factories.
void RandomReorder(Teuchos::Array< LocalOrdinal > &list)
Lightweight MueLu representation of a compressed row storage graph.
Exception throws to report errors in the internal logical of the program.
typename std::conditional< OnHost, Kokkos::Device< Kokkos::Serial, Kokkos::HostSpace >, typename Node::device_type >::type device_type
int RandomOrdinal(int min, int max) const
Generate a random number in the range [min, max].
const RCP< const Teuchos::Comm< int > > GetComm() const
Kokkos::View< unsigned *, typename LWGraphType::device_type > AggStatType
void SetNumAggregates(LO nAggregates)
Set number of local aggregates on current processor.