46 #ifndef MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP 
   47 #define MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP 
   49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR 
   51 #include <Teuchos_Comm.hpp> 
   52 #include <Teuchos_CommHelpers.hpp> 
   58 #include "MueLu_Aggregates_kokkos.hpp" 
   60 #include "MueLu_LWGraph_kokkos.hpp" 
   67   template <
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
   68   void AggregationPhase2bAlgorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
 
   69   BuildAggregates(
const ParameterList& params,
 
   70                   const LWGraph_kokkos& graph,
 
   71                   Aggregates_kokkos& aggregates,
 
   73                   LO& numNonAggregatedNodes)
 const {
 
   75     if(params.get<
bool>(
"aggregation: deterministic")) {
 
   76       Monitor m(*
this, 
"BuildAggregatesDeterministic");
 
   77       BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes);
 
   79       Monitor m(*
this, 
"BuildAggregatesRandom");
 
   80       BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
 
   85   template <
class LO, 
class GO, 
class Node>
 
   86   void AggregationPhase2bAlgorithm_kokkos<LO, GO, Node>::
 
   87   BuildAggregatesRandom(
const ParameterList& params,
 
   88                         const LWGraph_kokkos& graph,
 
   89                         Aggregates_kokkos& aggregates,
 
   91                         LO& numNonAggregatedNodes)
 const {
 
   92     using memory_space    = 
typename LWGraph_kokkos::memory_space;
 
   93     using execution_space = 
typename LWGraph_kokkos::execution_space;
 
   95     const LO  numRows = graph.GetNodeNumVertices();
 
   96     const int myRank  = graph.GetComm()->getRank();
 
   98     auto vertex2AggId           = aggregates.GetVertex2AggId()->template getLocalView<memory_space>();
 
   99     auto procWinner             = aggregates.GetProcWinner()  ->template getLocalView<memory_space>();
 
  100     auto colors                 = aggregates.GetGraphColors();
 
  101     const LO numColors          = aggregates.GetGraphNumColors();
 
  102     const LO numLocalAggregates = aggregates.GetNumAggregates();
 
  104     const LO defaultConnectWeight = 100;
 
  105     const LO penaltyConnectWeight = 10;
 
  121     int maxNodesPerAggregate = params.get<
int>(
"aggregation: max agg size");
 
  122     if(maxNodesPerAggregate == std::numeric_limits<int>::max()) {maxIters = 1;}
 
  123     for (
int iter = 0; 
iter < maxIters; ++
iter) {
 
  124       for(
LO color = 1; color <= numColors; ++color) {
 
  129         LO numAggregated = 0;
 
  132                                 KOKKOS_LAMBDA (
const LO i, 
LO& tmpNumAggregated) {
 
  133                                   if (aggStat(i) != 
READY || colors(i) != color)
 
  136                                   auto neighOfINode = graph.getNeighborVertices(i);
 
  137                                   for (
int j = 0; j < neighOfINode.length; j++) {
 
  138                                     LO neigh = neighOfINode(j);
 
  142                                     if (graph.isLocalNeighborVertex(neigh) &&
 
  144                                       Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)),
 
  145                                                          connectWeight(neigh));
 
  148                                   int bestScore   = -100000;
 
  150                                   int bestConnect = -1;
 
  152                                   for (
int j = 0; j < neighOfINode.length; j++) {
 
  153                                     LO neigh = neighOfINode(j);
 
  155                                     if (graph.isLocalNeighborVertex(neigh) &&
 
  157                                       auto aggId = vertex2AggId(neigh, 0);
 
  158                                       int score = aggWeight(aggId) - aggPenalties(aggId);
 
  160                                       if (score > bestScore) {
 
  163                                         bestConnect = connectWeight(neigh);
 
  165                                       } 
else if (aggId == bestAggId &&
 
  166                                                  connectWeight(neigh) > bestConnect) {
 
  167                                         bestConnect = connectWeight(neigh);
 
  171                                   if (bestScore >= 0) {
 
  173                                     vertex2AggId(i, 0) = bestAggId;
 
  174                                     procWinner(i, 0)   = myRank;
 
  176                                     Kokkos::atomic_add(&aggPenalties(bestAggId), 1);
 
  177                                     connectWeight(i) = bestConnect - penaltyConnectWeight;
 
  181         numNonAggregatedNodes -= numAggregated;
 
  189   template <
class LO, 
class GO, 
class Node>
 
  190   void AggregationPhase2bAlgorithm_kokkos<LO, GO, Node>::
 
  191   BuildAggregatesDeterministic(
const ParameterList& params,
 
  192                                const LWGraph_kokkos& graph,
 
  193                                Aggregates_kokkos& aggregates,
 
  195                                LO& numNonAggregatedNodes)
 const {
 
  196     using memory_space    = 
typename LWGraph_kokkos::memory_space;
 
  197     using execution_space = 
typename LWGraph_kokkos::execution_space;
 
  199     const LO  numRows = graph.GetNodeNumVertices();
 
  200     const int myRank  = graph.GetComm()->getRank();
 
  202     auto vertex2AggId     = aggregates.GetVertex2AggId()->template getLocalView<memory_space>();
 
  203     auto procWinner       = aggregates.GetProcWinner()  ->template getLocalView<memory_space>();
 
  204     auto colors           = aggregates.GetGraphColors();
 
  205     const LO numColors    = aggregates.GetGraphNumColors();
 
  206     LO numLocalAggregates = aggregates.GetNumAggregates();
 
  208     const int defaultConnectWeight = 100;
 
  209     const int penaltyConnectWeight = 10;
 
  225     int maxNodesPerAggregate = params.get<
int>(
"aggregation: max agg size");
 
  226     if(maxNodesPerAggregate == std::numeric_limits<int>::max()) {maxIters = 1;}
 
  227     for (
int iter = 0; 
iter < maxIters; ++
iter) {
 
  228       for(
LO color = 1; color <= numColors; color++) {
 
  233         LO numAggregated = 0;
 
  236           KOKKOS_LAMBDA (
const LO i)
 
  238             if (aggStat(i) != 
READY || colors(i) != color)
 
  240             auto neighOfINode = graph.getNeighborVertices(i);
 
  241             for (
int j = 0; j < neighOfINode.length; j++) {
 
  242               LO neigh = neighOfINode(j);
 
  245               if (graph.isLocalNeighborVertex(neigh) &&
 
  247               Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)),
 
  248                   connectWeight(neigh));
 
  251         execution_space().fence();
 
  254           KOKKOS_LAMBDA (
const LO i, 
LO& tmpNumAggregated)
 
  256             if (aggStat(i) != 
READY || colors(i) != color)
 
  258             int bestScore   = -100000;
 
  260             int bestConnect = -1;
 
  262             auto neighOfINode = graph.getNeighborVertices(i);
 
  263             for (
int j = 0; j < neighOfINode.length; j++) {
 
  264               LO neigh = neighOfINode(j);
 
  266               if (graph.isLocalNeighborVertex(neigh) &&
 
  268                 auto aggId = vertex2AggId(neigh, 0);
 
  269                 int score = aggWeight(aggId) - aggPenalties(aggId);
 
  271                 if (score > bestScore) {
 
  274                   bestConnect = connectWeight(neigh);
 
  276                 } 
else if (aggId == bestAggId &&
 
  277                     connectWeight(neigh) > bestConnect) {
 
  278                   bestConnect = connectWeight(neigh);
 
  282             if (bestScore >= 0) {
 
  284               vertex2AggId(i, 0) = bestAggId;
 
  285               procWinner(i, 0)   = myRank;
 
  287               Kokkos::atomic_add(&aggPenaltyUpdates(bestAggId), 1);
 
  288               connectWeight(i) = bestConnect - penaltyConnectWeight;
 
  292         execution_space().fence();
 
  295           KOKKOS_LAMBDA (
const LO agg)
 
  297             aggPenalties(agg) += aggPenaltyUpdates(agg);
 
  298             aggPenaltyUpdates(agg) = 0;
 
  300         numNonAggregatedNodes -= numAggregated;
 
  306 #endif // HAVE_MUELU_KOKKOS_REFACTOR 
  307 #endif // MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP 
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=0)
 
void parallel_reduce(const std::string &label, const PolicyType &policy, const FunctorType &functor, ReturnType &return_value, typename Impl::enable_if< Kokkos::Impl::is_execution_policy< PolicyType >::value >::type *=0)
 
void deep_copy(const View< DT, DP...> &dst, typename ViewTraits< DT, DP...>::const_value_type &value, typename std::enable_if< std::is_same< typename ViewTraits< DT, DP...>::specialize, void >::value >::type *=0)