46 #ifndef MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP
47 #define MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP
49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
54 #include <Teuchos_Comm.hpp>
55 #include <Teuchos_CommHelpers.hpp>
61 #include "MueLu_Aggregates_kokkos.hpp"
63 #include "MueLu_LWGraph_kokkos.hpp"
66 #include "KokkosGraph_Distance2ColorHandle.hpp"
67 #include "KokkosGraph_Distance2Color.hpp"
71 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
72 void AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
73 BuildAggregates(
const ParameterList& params,
const LWGraph_kokkos& graph, Aggregates_kokkos& aggregates, std::vector<unsigned>& aggStat,
74 LO& numNonAggregatedNodes)
const {
75 Monitor m(*
this,
"BuildAggregates");
77 std::string orderingStr = params.get<std::string>(
"aggregation: ordering");
78 int maxNeighAlreadySelected = params.get<
int> (
"aggregation: max selected neighbors");
79 int minNodesPerAggregate = params.get<
int> (
"aggregation: min agg size");
80 int maxNodesPerAggregate = params.get<
int> (
"aggregation: max agg size");
82 Algorithm algorithm = Algorithm::Serial;
83 std::string algoParamName =
"aggregation: phase 1 algorithm";
84 if(params.isParameter(algoParamName))
86 algorithm = algorithmFromName(params.get<std::string>(
"aggregation: phase 1 algorithm"));
90 "MueLu::UncoupledAggregationAlgorithm::BuildAggregates: minNodesPerAggregate must be smaller or equal to MaxNodePerAggregate!");
95 if(algorithm == Algorithm::Distance2)
97 BuildAggregatesDistance2(graph, aggregates, aggStat, numNonAggregatedNodes, maxNodesPerAggregate);
101 BuildAggregatesSerial(graph, aggregates, aggStat, numNonAggregatedNodes,
102 minNodesPerAggregate, maxNodesPerAggregate, maxNeighAlreadySelected, orderingStr);
107 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
108 void AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
109 BuildAggregatesSerial(
const LWGraph_kokkos& graph, Aggregates_kokkos& aggregates,
110 std::vector<unsigned>& aggStat,
LO& numNonAggregatedNodes,
111 LO minNodesPerAggregate,
LO maxNodesPerAggregate,
112 LO maxNeighAlreadySelected, std::string& orderingStr)
const
120 ordering = O_NATURAL;
121 if (orderingStr ==
"natural") ordering = O_NATURAL;
122 if (orderingStr ==
"random" ) ordering = O_RANDOM;
123 if (orderingStr ==
"graph" ) ordering = O_GRAPH;
125 const LO numRows = graph.GetNodeNumVertices();
126 const int myRank = graph.GetComm()->getRank();
128 ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0);
129 ArrayRCP<LO> procWinner = aggregates.GetProcWinner() ->getDataNonConst(0);
131 LO numLocalAggregates = aggregates.GetNumAggregates();
133 ArrayRCP<LO> randomVector;
134 if (ordering == O_RANDOM) {
135 randomVector = arcp<LO>(numRows);
136 for (
LO i = 0; i < numRows; i++)
138 RandomReorder(randomVector);
143 std::vector<int> aggList(graph.getNodeMaxNumRowEntries());
145 std::queue<LO> graphOrderQueue;
148 for (
LO i = 0; i < numRows; i++) {
150 LO rootCandidate = 0;
151 if (ordering == O_NATURAL) rootCandidate = i;
152 else if (ordering == O_RANDOM) rootCandidate = randomVector[i];
153 else if (ordering == O_GRAPH) {
155 if (graphOrderQueue.size() == 0) {
157 for (
LO jnode = 0; jnode < numRows; jnode++)
158 if (aggStat[jnode] ==
READY) {
159 graphOrderQueue.push(jnode);
163 if (graphOrderQueue.size() == 0) {
167 rootCandidate = graphOrderQueue.front();
168 graphOrderQueue.pop();
171 if (aggStat[rootCandidate] !=
READY)
176 aggList[aggSize++] = rootCandidate;
178 auto neighOfINode = graph.getNeighborVertices(rootCandidate);
184 if ((ordering == O_NATURAL || ordering == O_RANDOM) &&
185 as<int>(neighOfINode.length) < minNodesPerAggregate) {
189 LO numAggregatedNeighbours = 0;
191 for (
int j = 0; j < as<int>(neighOfINode.length); j++) {
192 LO neigh = neighOfINode(j);
194 if (neigh != rootCandidate && graph.isLocalNeighborVertex(neigh)) {
196 if (aggStat[neigh] ==
READY || aggStat[neigh] ==
NOTSEL) {
205 if (aggSize < as<size_t>(maxNodesPerAggregate))
206 aggList[aggSize++] = neigh;
209 numAggregatedNeighbours++;
215 if ((numAggregatedNeighbours <= maxNeighAlreadySelected) &&
216 (aggSize >= as<size_t>(minNodesPerAggregate))) {
219 aggregates.SetIsRoot(rootCandidate);
220 aggIndex = numLocalAggregates++;
222 for (
size_t k = 0; k < aggSize; k++) {
224 vertex2AggId[aggList[k]] = aggIndex;
225 procWinner [aggList[k]] = myRank;
228 numNonAggregatedNodes -= aggSize;
232 aggStat[rootCandidate] =
NOTSEL;
239 if (ordering == O_GRAPH) {
244 for (
size_t k = 0; k < aggSize; k++) {
245 auto neighOfJNode = graph.getNeighborVertices(aggList[k]);
247 for (
int j = 0; j < as<int>(neighOfJNode.length); j++) {
248 LO neigh = neighOfJNode(j);
250 if (graph.isLocalNeighborVertex(neigh) && aggStat[neigh] ==
READY)
251 graphOrderQueue.push(neigh);
259 for (
LO i = 0; i < numRows; i++)
264 aggregates.SetNumAggregates(numLocalAggregates);
267 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
268 void AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
269 BuildAggregatesDistance2(
const LWGraph_kokkos& graph, Aggregates_kokkos& aggregates,
270 std::vector<unsigned>& aggStat,
LO& numNonAggregatedNodes,
LO maxAggSize)
const
272 const LO numRows = graph.GetNodeNumVertices();
273 const int myRank = graph.GetComm()->getRank();
275 ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0);
276 ArrayRCP<LO> procWinner = aggregates.GetProcWinner() ->getDataNonConst(0);
278 LO numLocalAggregates = aggregates.GetNumAggregates();
281 std::vector<LocalOrdinal> rowptrs;
282 rowptrs.reserve(numRows + 1);
283 std::vector<LocalOrdinal> colinds;
284 colinds.reserve(graph.GetNodeNumEdges());
286 rowptrs.push_back(0);
287 for(LocalOrdinal row = 0; row < numRows; row++)
289 auto entries = graph.getNeighborVertices(row);
290 for(LocalOrdinal i = 0; i < entries.length; i++)
292 colinds.push_back(entries.colidx(i));
294 rowptrs.push_back(colinds.size());
298 typedef typename Tpetra::CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::local_graph_type graph_t;
299 typedef typename graph_t::device_type device_t;
300 typedef typename device_t::memory_space memory_space;
301 typedef typename device_t::execution_space execution_space;
302 typedef typename graph_t::row_map_type::non_const_type rowptrs_view;
303 typedef typename rowptrs_view::HostMirror host_rowptrs_view;
304 typedef typename graph_t::entries_type::non_const_type colinds_view;
305 typedef typename colinds_view::HostMirror host_colinds_view;
307 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
308 typename rowptrs_view::const_value_type,
typename colinds_view::const_value_type,
typename colinds_view::const_value_type,
309 execution_space, memory_space, memory_space> KernelHandle;
313 kh.create_distance2_graph_coloring_handle();
316 auto coloringHandle = kh.get_distance2_graph_coloring_handle();
327 coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_SERIAL );
330 rowptrs_view aRowptrs(
"A device rowptrs", rowptrs.size());
331 colinds_view aColinds(
"A device colinds", colinds.size());
334 host_rowptrs_view aHostRowptrs = Kokkos::create_mirror_view(aRowptrs);
335 for(
size_t i = 0; i < rowptrs.size(); i++)
337 aHostRowptrs(i) = rowptrs[i];
340 host_colinds_view aHostColinds = Kokkos::create_mirror_view(aColinds);
341 for(
size_t i = 0; i < colinds.size(); i++)
343 aHostColinds(i) = colinds[i];
349 KokkosGraph::Experimental::graph_compute_distance2_color(&kh, numRows, numRows, aRowptrs, aColinds, aRowptrs, aColinds);
352 auto colorsDevice = coloringHandle->get_vertex_colors();
354 auto colors = Kokkos::create_mirror_view(colorsDevice);
358 kh.destroy_distance2_graph_coloring_handle();
361 LocalOrdinal aggCount = 0;
362 for(LocalOrdinal i = 0; i < numRows; i++)
364 if(colors(i) == 1 && aggStat[i] ==
READY)
366 vertex2AggId[i] = aggCount++;
368 numLocalAggregates++;
369 procWinner[i] = myRank;
372 numNonAggregatedNodes = 0;
373 std::vector<LocalOrdinal> aggSizes(numLocalAggregates, 0);
374 for(
int i = 0; i < numRows; i++)
376 if(vertex2AggId[i] >= 0)
377 aggSizes[vertex2AggId[i]]++;
380 for(LocalOrdinal i = 0; i < numRows; i++)
382 if(colors(i) != 1 && (aggStat[i] ==
READY || aggStat[i] ==
NOTSEL))
386 auto neighbors = graph.getNeighborVertices(i);
387 for(LocalOrdinal j = 0; j < neighbors.length; j++)
389 auto nei = neighbors.colidx(j);
390 LocalOrdinal agg = vertex2AggId[nei];
391 if(graph.isLocalNeighborVertex(nei) && colors(nei) == 1 && aggStat[nei] ==
AGGREGATED && aggSizes[agg] < maxAggSize)
394 vertex2AggId[i] = agg;
397 procWinner[i] = myRank;
404 numNonAggregatedNodes++;
410 aggregates.SetNumAggregates(numLocalAggregates);
413 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
414 void AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::RandomReorder(ArrayRCP<LO> list)
const {
417 for(
int i = 0; i < n-1; i++)
418 std::swap(list[i], list[RandomOrdinal(i,n-1)]);
421 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
422 int AggregationPhase1Algorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::RandomOrdinal(
int min,
int max)
const {
423 return min + as<int>((max-min+1) * (static_cast<double>(std::rand()) / (RAND_MAX + 1.0)));
428 #endif // HAVE_MUELU_KOKKOS_REFACTOR
429 #endif // MUELU_AGGREGATIONPHASE1ALGORITHM_KOKKOS_DEF_HPP
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
void deep_copy(const View< DT, DP...> &dst, typename ViewTraits< DT, DP...>::const_value_type &value, typename std::enable_if< std::is_same< typename ViewTraits< DT, DP...>::specialize, void >::value >::type *=0)