46 #ifndef MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP 
   47 #define MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP 
   49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR 
   51 #include <Teuchos_Comm.hpp> 
   52 #include <Teuchos_CommHelpers.hpp> 
   54 #include <Xpetra_Vector.hpp> 
   58 #include "MueLu_Aggregates_kokkos.hpp" 
   60 #include "MueLu_LWGraph_kokkos.hpp" 
   67   template <
class LocalOrdinal, 
class GlobalOrdinal, 
class Node>
 
   68   void AggregationPhase2bAlgorithm_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
 
   69   BuildAggregates(
const ParameterList& params,
 
   70                   const LWGraph_kokkos& graph,
 
   71                   Aggregates_kokkos& aggregates,
 
   73                   LO& numNonAggregatedNodes)
 const {
 
   75     if(params.get<
bool>(
"aggregation: deterministic")) {
 
   76       Monitor m(*
this, 
"BuildAggregatesDeterministic");
 
   77       BuildAggregatesDeterministic(params, graph, aggregates, aggStat, numNonAggregatedNodes);
 
   79       Monitor m(*
this, 
"BuildAggregatesRandom");
 
   80       BuildAggregatesRandom(params, graph, aggregates, aggStat, numNonAggregatedNodes);
 
   85   template <
class LO, 
class GO, 
class Node>
 
   86   void AggregationPhase2bAlgorithm_kokkos<LO, GO, Node>::
 
   87   BuildAggregatesRandom(
const ParameterList& params,
 
   88                         const LWGraph_kokkos& graph,
 
   89                         Aggregates_kokkos& aggregates,
 
   91                         LO& numNonAggregatedNodes)
 const {
 
   93     const LO  numRows = graph.GetNodeNumVertices();
 
   94     const int myRank  = graph.GetComm()->getRank();
 
   96     auto vertex2AggId           = aggregates.GetVertex2AggId()->template getLocalView<memory_space>();
 
   97     auto procWinner             = aggregates.GetProcWinner()  ->template getLocalView<memory_space>();
 
   98     auto colors                 = aggregates.GetGraphColors();
 
   99     const LO numColors          = aggregates.GetGraphNumColors();
 
  100     const LO numLocalAggregates = aggregates.GetNumAggregates();
 
  102     const LO defaultConnectWeight = 100;
 
  103     const LO penaltyConnectWeight = 10;
 
  119     int maxNodesPerAggregate = params.get<
int>(
"aggregation: max agg size");
 
  120     if(maxNodesPerAggregate == std::numeric_limits<int>::max()) {maxIters = 1;}
 
  121     for (
int iter = 0; 
iter < maxIters; ++
iter) {
 
  122       for(LO color = 1; color <= numColors; ++color) {
 
  127         LO numAggregated = 0;
 
  130                                 KOKKOS_LAMBDA (
const LO i, LO& tmpNumAggregated) {
 
  131                                   if (aggStat(i) != 
READY || colors(i) != color)
 
  134                                   auto neighOfINode = graph.getNeighborVertices(i);
 
  135                                   for (
int j = 0; j < neighOfINode.length; j++) {
 
  136                                     LO neigh = neighOfINode(j);
 
  140                                     if (graph.isLocalNeighborVertex(neigh) &&
 
  142                                       Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)),
 
  143                                                          connectWeight(neigh));
 
  146                                   int bestScore   = -100000;
 
  148                                   int bestConnect = -1;
 
  150                                   for (
int j = 0; j < neighOfINode.length; j++) {
 
  151                                     LO neigh = neighOfINode(j);
 
  153                                     if (graph.isLocalNeighborVertex(neigh) &&
 
  155                                       auto aggId = vertex2AggId(neigh, 0);
 
  156                                       int score = aggWeight(aggId) - aggPenalties(aggId);
 
  158                                       if (score > bestScore) {
 
  161                                         bestConnect = connectWeight(neigh);
 
  163                                       } 
else if (aggId == bestAggId &&
 
  164                                                  connectWeight(neigh) > bestConnect) {
 
  165                                         bestConnect = connectWeight(neigh);
 
  169                                   if (bestScore >= 0) {
 
  171                                     vertex2AggId(i, 0) = bestAggId;
 
  172                                     procWinner(i, 0)   = myRank;
 
  174                                     Kokkos::atomic_add(&aggPenalties(bestAggId), 1);
 
  175                                     connectWeight(i) = bestConnect - penaltyConnectWeight;
 
  179         numNonAggregatedNodes -= numAggregated;
 
  187   template <
class LO, 
class GO, 
class Node>
 
  188   void AggregationPhase2bAlgorithm_kokkos<LO, GO, Node>::
 
  189   BuildAggregatesDeterministic(
const ParameterList& params,
 
  190                                const LWGraph_kokkos& graph,
 
  191                                Aggregates_kokkos& aggregates,
 
  193                                LO& numNonAggregatedNodes)
 const {
 
  195     const LO  numRows = graph.GetNodeNumVertices();
 
  196     const int myRank  = graph.GetComm()->getRank();
 
  198     auto vertex2AggId     = aggregates.GetVertex2AggId()->template getLocalView<memory_space>();
 
  199     auto procWinner       = aggregates.GetProcWinner()  ->template getLocalView<memory_space>();
 
  200     auto colors           = aggregates.GetGraphColors();
 
  201     const LO numColors    = aggregates.GetGraphNumColors();
 
  202     LO numLocalAggregates = aggregates.GetNumAggregates();
 
  204     const int defaultConnectWeight = 100;
 
  205     const int penaltyConnectWeight = 10;
 
  221     int maxNodesPerAggregate = params.get<
int>(
"aggregation: max agg size");
 
  222     if(maxNodesPerAggregate == std::numeric_limits<int>::max()) {maxIters = 1;}
 
  223     for (
int iter = 0; 
iter < maxIters; ++
iter) {
 
  224       for(LO color = 1; color <= numColors; color++) {
 
  229         LO numAggregated = 0;
 
  232           KOKKOS_LAMBDA (
const LO i)
 
  234             if (aggStat(i) != 
READY || colors(i) != color)
 
  236             auto neighOfINode = graph.getNeighborVertices(i);
 
  237             for (
int j = 0; j < neighOfINode.length; j++) {
 
  238               LO neigh = neighOfINode(j);
 
  241               if (graph.isLocalNeighborVertex(neigh) &&
 
  243               Kokkos::atomic_add(&aggWeight(vertex2AggId(neigh, 0)),
 
  244                   connectWeight(neigh));
 
  247         execution_space().fence();
 
  250           KOKKOS_LAMBDA (
const LO i, LO& tmpNumAggregated)
 
  252             if (aggStat(i) != 
READY || colors(i) != color)
 
  254             int bestScore   = -100000;
 
  256             int bestConnect = -1;
 
  258             auto neighOfINode = graph.getNeighborVertices(i);
 
  259             for (
int j = 0; j < neighOfINode.length; j++) {
 
  260               LO neigh = neighOfINode(j);
 
  262               if (graph.isLocalNeighborVertex(neigh) &&
 
  264                 auto aggId = vertex2AggId(neigh, 0);
 
  265                 int score = aggWeight(aggId) - aggPenalties(aggId);
 
  267                 if (score > bestScore) {
 
  270                   bestConnect = connectWeight(neigh);
 
  272                 } 
else if (aggId == bestAggId &&
 
  273                     connectWeight(neigh) > bestConnect) {
 
  274                   bestConnect = connectWeight(neigh);
 
  278             if (bestScore >= 0) {
 
  280               vertex2AggId(i, 0) = bestAggId;
 
  281               procWinner(i, 0)   = myRank;
 
  283               Kokkos::atomic_add(&aggPenaltyUpdates(bestAggId), 1);
 
  284               connectWeight(i) = bestConnect - penaltyConnectWeight;
 
  288         execution_space().fence();
 
  291           KOKKOS_LAMBDA (
const LO agg)
 
  293             aggPenalties(agg) += aggPenaltyUpdates(agg);
 
  294             aggPenaltyUpdates(agg) = 0;
 
  296         numNonAggregatedNodes -= numAggregated;
 
  302 #endif // HAVE_MUELU_KOKKOS_REFACTOR 
  303 #endif // MUELU_AGGREGATIONPHASE2BALGORITHM_KOKKOS_DEF_HPP 
void deep_copy(const View< DT, DP...> &dst, typename ViewTraits< DT, DP...>::const_value_type &value, typename std::enable_if< std::is_same< typename ViewTraits< DT, DP...>::specialize, void >::value >::type *=nullptr)
void parallel_reduce(const std::string &label, const PolicyType &policy, const FunctorType &functor, ReturnType &return_value, typename std::enable_if< Kokkos::Impl::is_execution_policy< PolicyType >::value >::type *=nullptr)
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename std::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=nullptr)