46 #ifndef MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP
47 #define MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP
52 #include <Teuchos_Comm.hpp>
53 #include <Teuchos_CommHelpers.hpp>
59 #include "MueLu_Aggregates.hpp"
61 #include "MueLu_LWGraph_kokkos.hpp"
64 #include "Kokkos_Sort.hpp"
65 #include <Kokkos_ScatterView.hpp>
69 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
74 Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
75 LO& numNonAggregatedNodes)
const {
76 int minNodesPerAggregate = params.
get<
int>(
"aggregation: min agg size");
77 int maxNodesPerAggregate = params.
get<
int>(
"aggregation: max agg size");
81 "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!");
88 if (params.
get<
bool>(
"aggregation: deterministic")) {
89 Monitor m(*
this,
"BuildAggregatesDeterministic");
90 BuildAggregatesDeterministic(maxNodesPerAggregate, graph,
91 aggregates, aggStat, numNonAggregatedNodes);
93 Monitor m(*
this,
"BuildAggregatesRandom");
94 BuildAggregatesRandom(maxNodesPerAggregate, graph,
95 aggregates, aggStat, numNonAggregatedNodes);
100 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
105 Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
106 LO& numNonAggregatedNodes)
const {
108 const int myRank = graph.
GetComm()->getRank();
111 auto vertex2AggId = aggregates.
GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
112 auto procWinner = aggregates.
GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
115 auto lclLWGraph = graph;
117 LO numAggregatedNodes = 0;
119 Kokkos::View<LO, device_type> aggCount(
"aggCount");
120 Kokkos::deep_copy(aggCount, numLocalAggregates);
121 Kokkos::parallel_for(
122 "Aggregation Phase 1: initial reduction over color == 1",
123 Kokkos::RangePolicy<LO, execution_space>(0, numRows),
124 KOKKOS_LAMBDA(
const LO nodeIdx) {
125 if (colors(nodeIdx) == 1 && aggStat(nodeIdx) ==
READY) {
126 const LO aggIdx = Kokkos::atomic_fetch_add(&aggCount(), 1);
127 vertex2AggId(nodeIdx, 0) = aggIdx;
129 procWinner(nodeIdx, 0) = myRank;
135 numAggregatedNodes -= numLocalAggregates;
136 Kokkos::deep_copy(numLocalAggregates, aggCount);
137 numAggregatedNodes += numLocalAggregates;
143 Kokkos::View<LO*, device_type> aggSizesView(
"aggSizes", numLocalAggregates);
147 auto aggSizesScatterView = Kokkos::Experimental::create_scatter_view(aggSizesView);
148 Kokkos::parallel_for(
149 "Aggregation Phase 1: compute initial aggregates size",
150 Kokkos::RangePolicy<LO, execution_space>(0, numRows),
151 KOKKOS_LAMBDA(
const LO nodeIdx) {
152 auto aggSizesScatterViewAccess = aggSizesScatterView.access();
153 if (vertex2AggId(nodeIdx, 0) >= 0)
154 aggSizesScatterViewAccess(vertex2AggId(nodeIdx, 0)) += 1;
156 Kokkos::Experimental::contribute(aggSizesView, aggSizesScatterView);
159 LO tmpNumAggregatedNodes = 0;
160 Kokkos::parallel_reduce(
161 "Aggregation Phase 1: main parallel_reduce over aggSizes",
162 Kokkos::RangePolicy<size_t, execution_space>(0, numRows),
163 KOKKOS_LAMBDA(
const size_t nodeIdx,
LO& lNumAggregatedNodes) {
164 if (colors(nodeIdx) != 1 && (aggStat(nodeIdx) ==
READY || aggStat(nodeIdx) ==
NOTSEL)) {
167 auto neighbors = lclLWGraph.getNeighborVertices(nodeIdx);
168 for (
LO j = 0; j < neighbors.length; ++j) {
169 auto nei = neighbors.colidx(j);
170 if (lclLWGraph.isLocalNeighborVertex(nei) && colors(nei) == 1 && aggStat(nei) ==
AGGREGATED) {
173 LO agg = vertex2AggId(nei, 0);
174 const LO aggSize = Kokkos::atomic_fetch_add(&aggSizesView(agg),
176 if (aggSize < maxAggSize) {
178 vertex2AggId(nodeIdx, 0) = agg;
179 procWinner(nodeIdx, 0) = myRank;
181 ++lNumAggregatedNodes;
185 Kokkos::atomic_decrement(&aggSizesView(agg));
192 if (aggStat(nodeIdx) ==
NOTSEL) {
193 aggStat(nodeIdx) =
READY;
197 tmpNumAggregatedNodes);
198 numAggregatedNodes += tmpNumAggregatedNodes;
199 numNonAggregatedNodes -= numAggregatedNodes;
205 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
210 Kokkos::View<unsigned*, typename LWGraph_kokkos::device_type>& aggStat,
211 LO& numNonAggregatedNodes)
const {
213 const int myRank = graph.
GetComm()->getRank();
215 auto vertex2AggId = aggregates.
GetVertex2AggId()->getDeviceLocalView(Xpetra::Access::ReadWrite);
216 auto procWinner = aggregates.
GetProcWinner()->getDeviceLocalView(Xpetra::Access::ReadWrite);
219 auto lclLWGraph = graph;
222 Kokkos::View<LO, device_type> numLocalAggregatesView(
"Num aggregates");
224 auto h_nla = Kokkos::create_mirror_view(numLocalAggregatesView);
225 h_nla() = numLocalAggregates;
226 Kokkos::deep_copy(numLocalAggregatesView, h_nla);
229 Kokkos::View<LO*, device_type> newRoots(
"New root LIDs", numNonAggregatedNodes);
230 Kokkos::View<LO, device_type> numNewRoots(
"Number of new aggregates of current color");
231 auto h_numNewRoots = Kokkos::create_mirror_view(numNewRoots);
234 Kokkos::parallel_for(
235 "Aggregation Phase 1: building list of new roots",
236 Kokkos::RangePolicy<execution_space>(0, numRows),
237 KOKKOS_LAMBDA(
const LO i) {
238 if (colors(i) == 1 && aggStat(i) ==
READY) {
240 newRoots(Kokkos::atomic_fetch_add(&numNewRoots(), 1)) = i;
243 Kokkos::deep_copy(h_numNewRoots, numNewRoots);
245 Kokkos::sort(newRoots, 0, h_numNewRoots());
246 LO numAggregated = 0;
247 Kokkos::parallel_reduce(
248 "Aggregation Phase 1: aggregating nodes",
249 Kokkos::RangePolicy<execution_space>(0, h_numNewRoots()),
250 KOKKOS_LAMBDA(
const LO rootIndex,
LO& lnumAggregated) {
251 LO root = newRoots(rootIndex);
252 LO aggID = numLocalAggregatesView() + rootIndex;
254 vertex2AggId(root, 0) = aggID;
255 procWinner(root, 0) = myRank;
257 auto neighOfRoot = lclLWGraph.getNeighborVertices(root);
258 for (
LO n = 0; n < neighOfRoot.length; n++) {
259 LO neigh = neighOfRoot(n);
260 if (lclLWGraph.isLocalNeighborVertex(neigh) && aggStat(neigh) ==
READY) {
262 vertex2AggId(neigh, 0) = aggID;
263 procWinner(neigh, 0) = myRank;
266 if (aggSize == maxAggSize) {
272 lnumAggregated += aggSize;
275 numNonAggregatedNodes -= numAggregated;
282 #endif // MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP
void BuildAggregatesRandom(const LO maxAggSize, const LWGraph_kokkos &graph, Aggregates &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
void BuildAggregatesDeterministic(const LO maxAggSize, const LWGraph_kokkos &graph, Aggregates &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
Lightweight MueLu representation of a compressed row storage graph.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
Container class for aggregation information.
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
T & get(const std::string &name, T def_value)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
colors_view_type & GetGraphColors()
Get a distance 2 coloring of the underlying graph. The coloring is computed and set during Phase1 of ...
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
void BuildAggregates(const Teuchos::ParameterList ¶ms, const LWGraph_kokkos &graph, Aggregates &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
Local aggregation.
Timer to be used in non-factories.
Exception throws to report errors in the internal logical of the program.
const RCP< const Teuchos::Comm< int > > GetComm() const
void SetNumAggregates(LO nAggregates)
Set number of local aggregates on current processor.