MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_UncoupledAggregationFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_UNCOUPLEDAGGREGATIONFACTORY_DEF_HPP_
11 #define MUELU_UNCOUPLEDAGGREGATIONFACTORY_DEF_HPP_
12 
13 #include <climits>
14 
15 #include <Xpetra_Map.hpp>
16 #include <Xpetra_Vector.hpp>
17 #include <Xpetra_MultiVectorFactory.hpp>
18 #include <Xpetra_VectorFactory.hpp>
19 #include <sstream>
20 
22 
23 #include "MueLu_InterfaceAggregationAlgorithm.hpp"
24 #include "MueLu_OnePtAggregationAlgorithm.hpp"
25 #include "MueLu_PreserveDirichletAggregationAlgorithm.hpp"
26 
27 #include "MueLu_AggregationPhase1Algorithm.hpp"
28 #include "MueLu_AggregationPhase2aAlgorithm.hpp"
29 #include "MueLu_AggregationPhase2bAlgorithm.hpp"
30 #include "MueLu_AggregationPhase3Algorithm.hpp"
31 
32 #include "MueLu_Level.hpp"
33 #include "MueLu_LWGraph.hpp"
34 #include "MueLu_Aggregates.hpp"
35 #include "MueLu_MasterList.hpp"
36 #include "MueLu_Monitor.hpp"
37 
38 #include "KokkosGraph_Distance2ColorHandle.hpp"
39 #include "KokkosGraph_Distance2Color.hpp"
40 #include "KokkosGraph_MIS2.hpp"
41 
42 namespace MueLu {
43 
44 template <class LocalOrdinal, class GlobalOrdinal, class Node>
46  : bDefinitionPhase_(true) {}
47 
48 template <class LocalOrdinal, class GlobalOrdinal, class Node>
50 
51 template <class LocalOrdinal, class GlobalOrdinal, class Node>
53  RCP<ParameterList> validParamList = rcp(new ParameterList());
54 
55  // Aggregation parameters (used in aggregation algorithms)
56  // TODO introduce local member function for each aggregation algorithm such that each aggregation algorithm can define its own parameters
57 
58 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
59  SET_VALID_ENTRY("aggregation: max agg size");
60  SET_VALID_ENTRY("aggregation: min agg size");
61  SET_VALID_ENTRY("aggregation: max selected neighbors");
62  SET_VALID_ENTRY("aggregation: ordering");
63  validParamList->getEntry("aggregation: ordering").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("natural", "graph", "random"))));
64  SET_VALID_ENTRY("aggregation: deterministic");
65  SET_VALID_ENTRY("aggregation: coloring algorithm");
66  SET_VALID_ENTRY("aggregation: enable phase 1");
67  SET_VALID_ENTRY("aggregation: enable phase 2a");
68  SET_VALID_ENTRY("aggregation: enable phase 2b");
69  SET_VALID_ENTRY("aggregation: enable phase 3");
70  SET_VALID_ENTRY("aggregation: match ML phase1");
71  SET_VALID_ENTRY("aggregation: match ML phase2a");
72  SET_VALID_ENTRY("aggregation: match ML phase2b");
73  SET_VALID_ENTRY("aggregation: phase2a agg factor");
74  SET_VALID_ENTRY("aggregation: preserve Dirichlet points");
75  SET_VALID_ENTRY("aggregation: allow user-specified singletons");
76  SET_VALID_ENTRY("aggregation: use interface aggregation");
77  SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors");
78  SET_VALID_ENTRY("aggregation: phase3 avoid singletons");
79  SET_VALID_ENTRY("aggregation: phase 1 algorithm");
80  SET_VALID_ENTRY("aggregation: backend");
81  validParamList->getEntry("aggregation: backend").setValidator(rcp(new Teuchos::StringValidator(Teuchos::tuple<std::string>("default", "host", "kokkos"))));
82 #undef SET_VALID_ENTRY
83 
84  // general variables needed in AggregationFactory
85  validParamList->set<RCP<const FactoryBase>>("Graph", null, "Generating factory of the graph");
86  validParamList->set<RCP<const FactoryBase>>("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
87 
88  // special variables necessary for OnePtAggregationAlgorithm
89  validParamList->set<std::string>("OnePt aggregate map name", "", "Name of input map for single node aggregates. (default='')");
90  validParamList->set<std::string>("OnePt aggregate map factory", "", "Generating factory of (DOF) map for single node aggregates.");
91  // validParamList->set< RCP<const FactoryBase> >("OnePt aggregate map factory", NoFactory::getRCP(), "Generating factory of (DOF) map for single node aggregates.");
92 
93  // InterfaceAggregation parameters
94  // validParamList->set< bool > ("aggregation: use interface aggregation", "false", "Flag to trigger aggregation along an interface using specified aggregate seeds.");
95  validParamList->set<std::string>("Interface aggregate map name", "", "Name of input map for interface aggregates. (default='')");
96  validParamList->set<std::string>("Interface aggregate map factory", "", "Generating factory of (DOF) map for interface aggregates.");
97  validParamList->set<RCP<const FactoryBase>>("nodeOnInterface", Teuchos::null, "Array specifying whether or not a node is on the interface (1 or 0).");
98 
99  return validParamList;
100 }
101 
102 template <class LocalOrdinal, class GlobalOrdinal, class Node>
104  Input(currentLevel, "Graph");
105  Input(currentLevel, "DofsPerNode");
106 
107  const ParameterList& pL = GetParameterList();
108 
109  // request special data necessary for OnePtAggregationAlgorithm
110  std::string mapOnePtName = pL.get<std::string>("OnePt aggregate map name");
111  if (mapOnePtName.length() > 0) {
112  std::string mapOnePtFactName = pL.get<std::string>("OnePt aggregate map factory");
113  if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") {
114  currentLevel.DeclareInput(mapOnePtName, NoFactory::get());
115  } else {
116  RCP<const FactoryBase> mapOnePtFact = GetFactory(mapOnePtFactName);
117  currentLevel.DeclareInput(mapOnePtName, mapOnePtFact.get());
118  }
119  }
120 
121  // request special data necessary for InterfaceAggregation
122  if (pL.get<bool>("aggregation: use interface aggregation") == true) {
123  if (currentLevel.GetLevelID() == 0) {
124  if (currentLevel.IsAvailable("nodeOnInterface", NoFactory::get())) {
125  currentLevel.DeclareInput("nodeOnInterface", NoFactory::get(), this);
126  } else {
127  TEUCHOS_TEST_FOR_EXCEPTION(currentLevel.IsAvailable("nodeOnInterface", NoFactory::get()),
129  "nodeOnInterface was not provided by the user on level0!");
130  }
131  } else {
132  Input(currentLevel, "nodeOnInterface");
133  }
134  }
135 }
136 
137 template <class LocalOrdinal, class GlobalOrdinal, class Node>
139  FactoryMonitor m(*this, "Build", currentLevel);
140 
141  ParameterList pL = GetParameterList();
142  bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed
143 
144  if (pL.get<int>("aggregation: max agg size") == -1)
145  pL.set("aggregation: max agg size", INT_MAX);
146 
147  // define aggregation algorithms
148  RCP<const FactoryBase> graphFact = GetFactory("Graph");
149 
150  // TODO Can we keep different aggregation algorithms over more Build calls?
151  algos_.clear();
152  algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm(graphFact)));
153  if (pL.get<bool>("aggregation: use interface aggregation") == true) algos_.push_back(rcp(new InterfaceAggregationAlgorithm(graphFact)));
154  if (pL.get<bool>("aggregation: allow user-specified singletons") == true) algos_.push_back(rcp(new OnePtAggregationAlgorithm(graphFact)));
155  if (pL.get<bool>("aggregation: enable phase 1") == true) algos_.push_back(rcp(new AggregationPhase1Algorithm(graphFact)));
156  if (pL.get<bool>("aggregation: enable phase 2a") == true) algos_.push_back(rcp(new AggregationPhase2aAlgorithm(graphFact)));
157  if (pL.get<bool>("aggregation: enable phase 2b") == true) algos_.push_back(rcp(new AggregationPhase2bAlgorithm(graphFact)));
158  if (pL.get<bool>("aggregation: enable phase 3") == true) algos_.push_back(rcp(new AggregationPhase3Algorithm(graphFact)));
159 
160  std::string mapOnePtName = pL.get<std::string>("OnePt aggregate map name");
161  RCP<Map> OnePtMap = Teuchos::null;
162  if (mapOnePtName.length()) {
163  std::string mapOnePtFactName = pL.get<std::string>("OnePt aggregate map factory");
164  if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") {
165  OnePtMap = currentLevel.Get<RCP<Map>>(mapOnePtName, NoFactory::get());
166  } else {
167  RCP<const FactoryBase> mapOnePtFact = GetFactory(mapOnePtFactName);
168  OnePtMap = currentLevel.Get<RCP<Map>>(mapOnePtName, mapOnePtFact.get());
169  }
170  }
171 
172  // Set map for interface aggregates
173  std::string mapInterfaceName = pL.get<std::string>("Interface aggregate map name");
174  RCP<Map> InterfaceMap = Teuchos::null;
175 
176  RCP<const LWGraph> graph;
177  RCP<const LWGraph_kokkos> graph_kokkos;
178  RCP<Aggregates> aggregates;
180  LO numRows;
181 
182  const std::string aggregationBackend = pL.get<std::string>("aggregation: backend");
183 
184  // "Graph" can have type "LWGraph" or "LWGraph_kokkos".
185  // The aggregation phases can call either "BuildAggregatesNonKokkos" or "BuildAggregates".
186 
187  // "aggregation: backend" can take values "default", "non-Kokkos" or "Kokkos".
188  // "default": run depending on the type of "Graph"
189  // "non-Kokkos": run the non-Kokkos aggregation, moving "Graph" to host if necessary
190  // "Kokkos": run the Kokkos aggregation, potentially move "Graph", moving "Graph" to device if necessary
191 
192  bool runOnHost;
193  if (IsType<RCP<LWGraph>>(currentLevel, "Graph")) {
194  if ((aggregationBackend == "default") || (aggregationBackend == "non-Kokkos")) {
195  graph = Get<RCP<LWGraph>>(currentLevel, "Graph");
196  aggregates = rcp(new Aggregates(*graph));
197  comm = graph->GetComm();
198  numRows = graph->GetNodeNumVertices();
199  runOnHost = true;
200  } else {
201  RCP<LWGraph> tmp_graph = Get<RCP<LWGraph>>(currentLevel, "Graph");
202  graph_kokkos = tmp_graph->copyToDevice();
203  aggregates = rcp(new Aggregates(*graph_kokkos));
204  comm = graph_kokkos->GetComm();
205  numRows = graph_kokkos->GetNodeNumVertices();
206  runOnHost = false;
207  }
208  } else if (IsType<RCP<LWGraph_kokkos>>(currentLevel, "Graph")) {
209  if ((aggregationBackend == "default") || (aggregationBackend == "Kokkos")) {
210  graph_kokkos = Get<RCP<LWGraph_kokkos>>(currentLevel, "Graph");
211  aggregates = rcp(new Aggregates(*graph_kokkos));
212  comm = graph_kokkos->GetComm();
213  numRows = graph_kokkos->GetNodeNumVertices();
214  runOnHost = false;
215  } else {
216  RCP<LWGraph_kokkos> tmp_graph_kokkos = Get<RCP<LWGraph_kokkos>>(currentLevel, "Graph");
217  graph = tmp_graph_kokkos->copyToHost();
218  aggregates = rcp(new Aggregates(*graph));
219  comm = graph->GetComm();
220  numRows = graph->GetNodeNumVertices();
221  runOnHost = true;
222  }
223  } else {
224  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Graph has bad type.");
225  }
226 
227  if (!runOnHost) {
228  TEUCHOS_TEST_FOR_EXCEPTION(pL.get<bool>("aggregation: use interface aggregation"), std::invalid_argument, "Option: 'aggregation: use interface aggregation' is not supported in the Kokkos version of uncoupled aggregation");
229  // Sanity Checking: match ML behavior is not supported in UncoupledAggregation_Kokkos in Phase 1 , but it is in 2a and 2b
230  TEUCHOS_TEST_FOR_EXCEPTION(pL.get<bool>("aggregation: match ML phase1"), std::invalid_argument, "Option: 'aggregation: match ML phase1' is not supported in the Kokkos version of uncoupled aggregation");
231  }
232 
233  // Build
234  aggregates->setObjectLabel("UC");
235 
236  // construct aggStat information
239  AggStatHostType aggStatHost;
240  AggStatType aggStat;
241 
242  if (runOnHost) {
243  aggStatHost = AggStatHostType(Kokkos::ViewAllocateWithoutInitializing("aggregation status"), numRows);
244  Kokkos::deep_copy(aggStatHost, READY);
245  } else {
246  aggStat = AggStatType(Kokkos::ViewAllocateWithoutInitializing("aggregation status"), numRows);
247  Kokkos::deep_copy(aggStat, READY);
248  }
249 
250  // interface
251  if (pL.get<bool>("aggregation: use interface aggregation") == true) {
252  Teuchos::Array<LO> nodeOnInterface = Get<Array<LO>>(currentLevel, "nodeOnInterface");
253  for (LO i = 0; i < numRows; i++) {
254  if (nodeOnInterface[i])
255  aggStatHost[i] = INTERFACE;
256  }
257  }
258 
259  // Dirichlet nodes
260  {
261  if (runOnHost) {
262  auto dirichletBoundaryMap = graph->GetBoundaryNodeMap();
263  Kokkos::parallel_for(
264  "MueLu - UncoupledAggregation: tagging boundary nodes in aggStat",
265  Kokkos::RangePolicy<LocalOrdinal, typename LWGraph::execution_space>(0, numRows),
266  KOKKOS_LAMBDA(const LocalOrdinal nodeIdx) {
267  if (dirichletBoundaryMap(nodeIdx) == true) {
268  aggStatHost(nodeIdx) = BOUNDARY;
269  }
270  });
271  } else {
272  auto dirichletBoundaryMap = graph_kokkos->GetBoundaryNodeMap();
273  Kokkos::parallel_for(
274  "MueLu - UncoupledAggregation: tagging boundary nodes in aggStat",
275  Kokkos::RangePolicy<LocalOrdinal, typename LWGraph_kokkos::execution_space>(0, numRows),
276  KOKKOS_LAMBDA(const LocalOrdinal nodeIdx) {
277  if (dirichletBoundaryMap(nodeIdx) == true) {
278  aggStat(nodeIdx) = BOUNDARY;
279  }
280  });
281  }
282  }
283 
284  if (OnePtMap != Teuchos::null) {
285  LO nDofsPerNode = Get<LO>(currentLevel, "DofsPerNode");
286 
287  if (runOnHost) {
288  GO indexBase = graph->GetDomainMap()->getIndexBase();
289  for (LO i = 0; i < numRows; i++) {
290  // reconstruct global row id (FIXME only works for contiguous maps)
291  GO grid = (graph->GetDomainMap()->getGlobalElement(i) - indexBase) * nDofsPerNode + indexBase;
292 
293  for (LO kr = 0; kr < nDofsPerNode; kr++)
294  if (OnePtMap->isNodeGlobalElement(grid + kr))
295  aggStatHost(i) = ONEPT;
296  }
297  } else {
298  GO indexBase = graph_kokkos->GetDomainMap()->getIndexBase();
299  auto lclDomainMap = graph_kokkos->GetDomainMap()->getLocalMap();
300  auto lclOnePtMap = OnePtMap->getLocalMap();
301  const LocalOrdinal INVALID = Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid();
302  Kokkos::parallel_for(
303  "MueLu - UncoupledAggregation: tagging OnePt map",
304  Kokkos::RangePolicy<LocalOrdinal, typename LWGraph_kokkos::execution_space>(0, numRows),
305  KOKKOS_LAMBDA(const LocalOrdinal i) {
306  // reconstruct global row id (FIXME only works for contiguous maps)
307  GO grid = (lclDomainMap.getGlobalElement(i) - indexBase) * nDofsPerNode + indexBase;
308 
309  for (LO kr = 0; kr < nDofsPerNode; kr++)
310  if (lclOnePtMap.getLocalElement(grid + kr) != INVALID)
311  aggStat(i) = ONEPT;
312  });
313  }
314  }
315 
316  LO numNonAggregatedNodes = numRows;
317  std::string aggAlgo = pL.get<std::string>("aggregation: coloring algorithm");
318  if (aggAlgo == "mis2 coarsening" || aggAlgo == "mis2 aggregation") {
319  TEUCHOS_ASSERT(!runOnHost);
320 
321  SubFactoryMonitor sfm(*this, "Algo \"MIS2\"", currentLevel);
322  using graph_t = typename LWGraph_kokkos::local_graph_type;
323  using device_t = typename graph_t::device_type;
324  using exec_space = typename device_t::execution_space;
325  using rowmap_t = typename graph_t::row_map_type;
326  using colinds_t = typename graph_t::entries_type;
327  using lno_t = typename colinds_t::non_const_value_type;
328  rowmap_t aRowptrs = graph_kokkos->getRowPtrs();
329  colinds_t aColinds = graph_kokkos->getEntries();
330  lno_t numAggs = 0;
331  typename colinds_t::non_const_type labels;
332 
333  if (aggAlgo == "mis2 coarsening") {
334  if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: MIS-2 coarsening" << std::endl;
335  labels = KokkosGraph::graph_mis2_coarsen<device_t, rowmap_t, colinds_t>(aRowptrs, aColinds, numAggs);
336  } else if (aggAlgo == "mis2 aggregation") {
337  if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: MIS-2 aggregation" << std::endl;
338  labels = KokkosGraph::graph_mis2_aggregate<device_t, rowmap_t, colinds_t>(aRowptrs, aColinds, numAggs);
339  }
340  {
341  {
342  // find aggregates that are not empty
343  Kokkos::View<bool*, typename device_t::memory_space> has_nodes("has_nodes", numAggs);
344  Kokkos::parallel_for(
345  Kokkos::RangePolicy<exec_space>(0, numRows),
346  KOKKOS_LAMBDA(lno_t i) {
347  if (aggStat(i) == READY)
348  Kokkos::atomic_assign(&has_nodes(labels(i)), true);
349  });
350 
351  // compute aggIds for non-empty aggs
352  Kokkos::View<LO*, typename device_t::memory_space> new_labels("new_labels", numAggs);
353  Kokkos::parallel_scan(
354  Kokkos::RangePolicy<exec_space>(0, numAggs),
355  KOKKOS_LAMBDA(lno_t i, lno_t & update, const bool is_final) {
356  if (is_final)
357  new_labels(i) = update;
358  if (has_nodes(i))
359  ++update;
360  },
361  numAggs);
362 
363  // reassign aggIds
364  Kokkos::parallel_for(
365  Kokkos::RangePolicy<exec_space>(0, numRows),
366  KOKKOS_LAMBDA(lno_t i) {
367  labels(i) = new_labels(labels(i));
368  });
369  }
370 
371  auto vertex2AggId = aggregates->GetVertex2AggId()->getLocalViewDevice(Xpetra::Access::ReadWrite);
372  auto procWinner = aggregates->GetProcWinner()->getLocalViewDevice(Xpetra::Access::OverwriteAll);
373  int rank = comm->getRank();
374  Kokkos::parallel_for(
375  Kokkos::RangePolicy<exec_space>(0, numRows),
376  KOKKOS_LAMBDA(lno_t i) {
377  if (aggStat(i) == READY) {
378 #ifdef HAVE_MUELU_DEBUG
379  KOKKOS_ASSERT(labels(i) >= 0);
380 #endif
381  procWinner(i, 0) = rank;
382  aggStat(i) = AGGREGATED;
383  vertex2AggId(i, 0) = labels(i);
384  } else {
385  procWinner(i, 0) = MUELU_UNASSIGNED;
386  aggStat(i) = IGNORED;
387  vertex2AggId(i, 0) = MUELU_UNAGGREGATED;
388  }
389  });
390  }
391  numNonAggregatedNodes = 0;
392  aggregates->SetNumAggregates(numAggs);
393  } else {
394  if (!runOnHost) {
395  DoGraphColoring(currentLevel, aggAlgo, pL.get<bool>("aggregation: deterministic"), graph_kokkos, aggregates);
396  if (IsPrint(Statistics1)) {
397  GetOStream(Statistics1) << " num colors: " << aggregates->GetGraphNumColors() << std::endl;
398  }
399  }
400 
401  std::vector<GO> localStats;
402  if (IsPrint(Statistics1)) {
403  localStats = std::vector<GO>(1 + 2 * algos_.size());
404  localStats[0] = numRows;
405  }
406  for (size_t a = 0; a < algos_.size(); a++) {
407  std::string phase = algos_[a]->description();
408 
409  SubFactoryMonitor sfm2(*this, "Algo \"" + phase + "\"" + (numNonAggregatedNodes == 0 ? " [skipped since no nodes are left to aggregate]" : ""), currentLevel);
410  int oldRank = algos_[a]->SetProcRankVerbose(this->GetProcRankVerbose());
411  if (numNonAggregatedNodes > 0) {
412  if (runOnHost)
413  algos_[a]->BuildAggregatesNonKokkos(pL, *graph, *aggregates, aggStatHost, numNonAggregatedNodes);
414  else
415  algos_[a]->BuildAggregates(pL, *graph_kokkos, *aggregates, aggStat, numNonAggregatedNodes);
416  }
417  algos_[a]->SetProcRankVerbose(oldRank);
418 
419  if (IsPrint(Statistics1)) {
420  localStats[2 * a + 1] = numRows - numNonAggregatedNodes; // num local aggregated nodes
421  localStats[2 * a + 2] = aggregates->GetNumAggregates(); // num local aggregates
422  }
423  }
424  if (IsPrint(Statistics1)) {
425  std::vector<GO> globalStats(1 + 2 * algos_.size());
426  Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, (int)localStats.size(), localStats.data(), globalStats.data());
427  GO numGlobalRows = globalStats[0];
428  GO numGlobalAggregatedPrev = 0, numGlobalAggsPrev = 0;
429  std::stringstream ss;
430  for (size_t a = 0; a < algos_.size(); a++) {
431  std::string phase = algos_[a]->description();
432  GO numGlobalAggregated = globalStats[2 * a + 1];
433  GO numGlobalAggs = globalStats[2 * a + 2];
434  GO numGlobalNonAggregatedNodes = numGlobalRows - numGlobalAggregatedPrev;
435  double aggPercent = 100 * as<double>(numGlobalAggregated) / as<double>(numGlobalRows);
436  if (aggPercent > 99.99 && aggPercent < 100.00) {
437  // Due to round off (for instance, for 140465733/140466897), we could
438  // get 100.00% display even if there are some remaining nodes. This
439  // is bad from the users point of view. It is much better to change
440  // it to display 99.99%.
441  aggPercent = 99.99;
442  }
443 
444  ss << "Algo \"" + phase + "\"" + (numGlobalNonAggregatedNodes == 0 ? " [skipped since no nodes are left to aggregate]" : "") << std::endl
445  << " aggregated : " << (numGlobalAggregated - numGlobalAggregatedPrev) << " (phase), " << std::fixed
446  << std::setprecision(2) << numGlobalAggregated << "/" << numGlobalRows << " [" << aggPercent << "%] (total)\n"
447  << " remaining : " << numGlobalRows - numGlobalAggregated << "\n"
448  << " aggregates : " << numGlobalAggs - numGlobalAggsPrev << " (phase), " << numGlobalAggs << " (total)" << std::endl;
449  numGlobalAggregatedPrev = numGlobalAggregated;
450  numGlobalAggsPrev = numGlobalAggs;
451  }
452  GetOStream(Statistics1) << ss.str();
453  }
454  }
455 
456  TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, "MueLu::UncoupledAggregationFactory::Build: Leftover nodes found! Error!");
457 
458  aggregates->AggregatesCrossProcessors(false);
459  aggregates->ComputeAggregateSizes(true /*forceRecompute*/);
460 
461  Set(currentLevel, "Aggregates", aggregates);
462 }
463 
464 template <class LocalOrdinal, class GlobalOrdinal, class Node>
466  DoGraphColoring(Level& currentLevel,
467  const std::string& aggAlgo,
468  const bool deterministic,
469  const RCP<const LWGraph_kokkos> graph,
470  RCP<Aggregates> aggregates) const {
471  SubFactoryMonitor sfm(*this, "Algo \"Graph Coloring\"", currentLevel);
472 
473  // LBV on Sept 06 2019: the note below is a little worrisome,
474  // can we guarantee that MueLu is never used on a non-symmetric
475  // graph?
476  // note: just using colinds_view in place of scalar_view_t type
477  // (it won't be used at all by symbolic SPGEMM)
478  using graph_t = typename LWGraph_kokkos::local_graph_type;
479  using KernelHandle = KokkosKernels::Experimental::
480  KokkosKernelsHandle<typename graph_t::row_map_type::value_type,
481  typename graph_t::entries_type::value_type,
482  typename graph_t::entries_type::value_type,
483  typename graph_t::device_type::execution_space,
484  typename graph_t::device_type::memory_space,
485  typename graph_t::device_type::memory_space>;
486  KernelHandle kh;
487  // leave gc algorithm choice as the default
488  kh.create_distance2_graph_coloring_handle();
489 
490  // get the distance-2 graph coloring handle
491  auto coloringHandle = kh.get_distance2_graph_coloring_handle();
492 
493  const LO numRows = graph->GetNodeNumVertices();
494 
495  // Set the distance-2 graph coloring algorithm to use.
496  // Options:
497  // COLORING_D2_DEFAULT - Let the kernel handle pick the variation
498  // COLORING_D2_SERIAL - Use the legacy serial-only implementation
499  // COLORING_D2_VB - Use the parallel vertex based direct method
500  // COLORING_D2_VB_BIT - Same as VB but using the bitvector forbidden array
501  // COLORING_D2_VB_BIT_EF - Add experimental edge-filtering to VB_BIT
502  // COLORING_D2_NB_BIT - Net-based coloring (generally the fastest)
503  if (deterministic) {
504  coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_SERIAL);
505  if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: serial" << std::endl;
506  } else if (aggAlgo == "serial") {
507  coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_SERIAL);
508  if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: serial" << std::endl;
509  } else if (aggAlgo == "default") {
510  coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_DEFAULT);
511  if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: default" << std::endl;
512  } else if (aggAlgo == "vertex based") {
513  coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_VB);
514  if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: vertex based" << std::endl;
515  } else if (aggAlgo == "vertex based bit set") {
516  coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_VB_BIT);
517  if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: vertex based bit set" << std::endl;
518  } else if (aggAlgo == "edge filtering") {
519  coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_VB_BIT_EF);
520  if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: edge filtering" << std::endl;
521  } else if (aggAlgo == "net based bit set") {
522  coloringHandle->set_algorithm(KokkosGraph::COLORING_D2_NB_BIT);
523  if (IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: net based bit set" << std::endl;
524  } else {
525  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unrecognized distance 2 coloring algorithm, valid options are: serial, default, matrix squared, vertex based, vertex based bit set, edge filtering")
526  }
527 
528  // Create device views for graph rowptrs/colinds
529  typename graph_t::row_map_type aRowptrs = graph->getRowPtrs();
530  typename graph_t::entries_type aColinds = graph->getEntries();
531 
532  // run d2 graph coloring
533  // graph is symmetric so row map/entries and col map/entries are the same
534  {
535  SubFactoryMonitor sfm2(*this, "Algo \"Graph Coloring\": KokkosGraph Call", currentLevel); // CMS HACK
536  KokkosGraph::Experimental::graph_color_distance2(&kh, numRows, aRowptrs, aColinds);
537  }
538 
539  // extract the colors and store them in the aggregates
540  aggregates->SetGraphColors(coloringHandle->get_vertex_colors());
541  aggregates->SetGraphNumColors(static_cast<LO>(coloringHandle->get_num_colors()));
542 
543  // clean up coloring handle
544  kh.destroy_distance2_graph_coloring_handle();
545 }
546 
547 } // namespace MueLu
548 
549 #endif /* MUELU_UNCOUPLEDAGGREGATIONFACTORY_DEF_HPP_ */
Kokkos::View< unsigned *, typename LWGraphHostType::device_type > AggStatHostType
#define MUELU_UNASSIGNED
Algorithm for coarsening a graph with uncoupled aggregation. keep special marked nodes as singleton n...
RCP< MueLu::LWGraph< LocalOrdinal, GlobalOrdinal, Node > > copyToHost()
MueLu::DefaultLocalOrdinal LocalOrdinal
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access). Usage: Level-&gt;Get&lt; RCP&lt;Matrix&gt; &gt;(&quot;A&quot;, factory) if factory == NULL =&gt; use default factory.
void DoGraphColoring(Level &currentLevel, const std::string &aggAlgo, const bool deterministic, const RCP< const LWGraph_kokkos > graph, RCP< Aggregates > aggregates) const
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
KOKKOS_INLINE_FUNCTION row_type getRowPtrs() const
Return the row pointers of the local graph.
void SetGraphNumColors(const LO graphNumColors)
Set the number of colors needed by the distance 2 coloring.
Container class for aggregation information.
KOKKOS_INLINE_FUNCTION LO GetNumAggregates() const
typename std::conditional< OnHost, typename local_graph_device_type::HostMirror, local_graph_device_type >::type local_graph_type
void setValidator(RCP< const ParameterEntryValidator > const &validator)
virtual ~UncoupledAggregationFactory()
Destructor.
GlobalOrdinal GO
T & get(const std::string &name, T def_value)
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Print more statistics.
LocalOrdinal LO
T * get() const
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
KOKKOS_INLINE_FUNCTION size_type GetNodeNumVertices() const
Return number of graph vertices.
KOKKOS_INLINE_FUNCTION const boundary_nodes_type GetBoundaryNodeMap() const
Returns map with global ids of boundary nodes.
void DeclareInput(Level &currentLevel) const
Input.
static const NoFactory * get()
Algorithm for coarsening a graph with uncoupled aggregation. creates aggregates along an interface us...
Builds one-to-one aggregates for all Dirichlet boundary nodes. For some applications this might be ne...
LO GetGraphNumColors()
Get the number of colors needed by the distance 2 coloring.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:63
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
#define MUELU_UNAGGREGATED
KOKKOS_INLINE_FUNCTION entries_type getEntries() const
Return the list entries in the local graph.
void SetGraphColors(colors_view_type graphColors)
Set a distance 2 coloring of the underlying graph. The coloring is computed and set during Phase1 of ...
virtual void setObjectLabel(const std::string &objectLabel)
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
#define SET_VALID_ENTRY(name)
Among unaggregated points, see if we can make a reasonable size aggregate out of it.IdeaAmong unaggregated points, see if we can make a reasonable size aggregate out of it. We do this by looking at neighbors and seeing how many are unaggregated and on my processor. Loosely, base the number of new aggregates created on the percentage of unaggregated nodes.
void Build(Level &currentLevel) const
Build aggregates.
Add leftovers to existing aggregatesIdeaIn phase 2b non-aggregated nodes are added to existing aggreg...
RCP< MueLu::LWGraph_kokkos< LocalOrdinal, GlobalOrdinal, Node > > copyToDevice()
const RCP< const Map > GetDomainMap() const
KOKKOS_INLINE_FUNCTION void AggregatesCrossProcessors(const bool &flag)
Record whether aggregates include DOFs from other processes.
Algorithm for coarsening a graph with uncoupled aggregation.
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:51
Exception throws to report errors in the internal logical of the program.
#define TEUCHOS_ASSERT(assertion_test)
Handle leftover nodes. Try to avoid singleton nodesIdeaIn phase 3 we try to stick unaggregated nodes ...
ParameterEntry & getEntry(const std::string &name)
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
const RCP< const Teuchos::Comm< int > > GetComm() const
aggregates_sizes_type::const_type ComputeAggregateSizes(bool forceRecompute=false) const
Compute sizes of aggregates.
Kokkos::View< unsigned *, typename LWGraphType::device_type > AggStatType
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need&#39;s value has been saved.
void SetNumAggregates(LO nAggregates)
Set number of local aggregates on current processor.