MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_UncoupledAggregationFactory_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_UNCOUPLEDAGGREGATIONFACTORY_KOKKOS_DEF_HPP_
47 #define MUELU_UNCOUPLEDAGGREGATIONFACTORY_KOKKOS_DEF_HPP_
48 
49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
50 
51 #include <climits>
52 
53 #include <Xpetra_Map.hpp>
54 #include <Xpetra_Vector.hpp>
55 #include <Xpetra_MultiVectorFactory.hpp>
56 #include <Xpetra_VectorFactory.hpp>
57 
59 
60 #include "MueLu_OnePtAggregationAlgorithm_kokkos.hpp"
61 #include "MueLu_PreserveDirichletAggregationAlgorithm_kokkos.hpp"
62 #include "MueLu_IsolatedNodeAggregationAlgorithm_kokkos.hpp"
63 
64 #include "MueLu_AggregationPhase1Algorithm_kokkos.hpp"
65 #include "MueLu_AggregationPhase2aAlgorithm_kokkos.hpp"
66 #include "MueLu_AggregationPhase2bAlgorithm_kokkos.hpp"
67 #include "MueLu_AggregationPhase3Algorithm_kokkos.hpp"
68 
69 #include "MueLu_Level.hpp"
70 #include "MueLu_LWGraph_kokkos.hpp"
71 #include "MueLu_Aggregates_kokkos.hpp"
72 #include "MueLu_MasterList.hpp"
73 #include "MueLu_Monitor.hpp"
74 #include "MueLu_AmalgamationInfo.hpp"
75 #include "MueLu_Utilities.hpp" // for sum_all and similar stuff...
76 
77 #include "KokkosGraph_Distance2ColorHandle.hpp"
78 #include "KokkosGraph_Distance2Color.hpp"
79 
80 namespace MueLu {
81 
82  template <class LocalOrdinal, class GlobalOrdinal, class Node>
83  UncoupledAggregationFactory_kokkos<LocalOrdinal, GlobalOrdinal, Node>::UncoupledAggregationFactory_kokkos()
84  : bDefinitionPhase_(true)
85  { }
86 
87  template <class LocalOrdinal, class GlobalOrdinal, class Node>
88  RCP<const ParameterList> UncoupledAggregationFactory_kokkos<LocalOrdinal, GlobalOrdinal, Node>::GetValidParameterList() const {
89  RCP<ParameterList> validParamList = rcp(new ParameterList());
90 
91  // Aggregation parameters (used in aggregation algorithms)
92  // TODO introduce local member function for each aggregation algorithm such that each aggregation algorithm can define its own parameters
93 
95 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
96  SET_VALID_ENTRY("aggregation: max agg size");
97  SET_VALID_ENTRY("aggregation: min agg size");
98  SET_VALID_ENTRY("aggregation: max selected neighbors");
99  SET_VALID_ENTRY("aggregation: ordering");
100  validParamList->getEntry("aggregation: ordering").setValidator(
101  rcp(new validatorType(Teuchos::tuple<std::string>("natural", "graph", "random"), "aggregation: ordering")));
102  SET_VALID_ENTRY("aggregation: deterministic");
103  SET_VALID_ENTRY("aggregation: coloring algorithm");
104  SET_VALID_ENTRY("aggregation: enable phase 1");
105  SET_VALID_ENTRY("aggregation: enable phase 2a");
106  SET_VALID_ENTRY("aggregation: enable phase 2b");
107  SET_VALID_ENTRY("aggregation: enable phase 3");
108  SET_VALID_ENTRY("aggregation: phase2a include root");
109  SET_VALID_ENTRY("aggregation: phase3 avoid singletons");
110  SET_VALID_ENTRY("aggregation: error on nodes with no on-rank neighbors");
111  SET_VALID_ENTRY("aggregation: preserve Dirichlet points");
112  SET_VALID_ENTRY("aggregation: allow user-specified singletons");
113 #undef SET_VALID_ENTRY
114 
115  // general variables needed in AggregationFactory
116  validParamList->set< RCP<const FactoryBase> >("Graph", null, "Generating factory of the graph");
117  validParamList->set< RCP<const FactoryBase> >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
118 
119  // special variables necessary for OnePtAggregationAlgorithm
120  validParamList->set< std::string > ("OnePt aggregate map name", "", "Name of input map for single node aggregates. (default='')");
121  validParamList->set< std::string > ("OnePt aggregate map factory", "", "Generating factory of (DOF) map for single node aggregates.");
122  //validParamList->set< RCP<const FactoryBase> >("OnePt aggregate map factory", NoFactory::getRCP(), "Generating factory of (DOF) map for single node aggregates.");
123 
124  return validParamList;
125  }
126 
127  template <class LocalOrdinal, class GlobalOrdinal, class Node>
128  void UncoupledAggregationFactory_kokkos<LocalOrdinal, GlobalOrdinal, Node>::DeclareInput(Level& currentLevel) const {
129  Input(currentLevel, "Graph");
130  Input(currentLevel, "DofsPerNode");
131 
132  const ParameterList& pL = GetParameterList();
133 
134  // request special data necessary for OnePtAggregationAlgorithm
135  std::string mapOnePtName = pL.get<std::string>("OnePt aggregate map name");
136  if (mapOnePtName.length() > 0) {
137  std::string mapOnePtFactName = pL.get<std::string>("OnePt aggregate map factory");
138  if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") {
139  currentLevel.DeclareInput(mapOnePtName, NoFactory::get());
140  } else {
141  RCP<const FactoryBase> mapOnePtFact = GetFactory(mapOnePtFactName);
142  currentLevel.DeclareInput(mapOnePtName, mapOnePtFact.get());
143  }
144  }
145  }
146 
147  template <class LocalOrdinal, class GlobalOrdinal, class Node>
148  void UncoupledAggregationFactory_kokkos<LocalOrdinal, GlobalOrdinal, Node>::
149  Build(Level &currentLevel) const {
150  using execution_space = typename LWGraph_kokkos::execution_space;
151  using memory_space = typename LWGraph_kokkos::memory_space;
152  using local_ordinal_type = typename LWGraph_kokkos::local_ordinal_type;
153  FactoryMonitor m(*this, "Build", currentLevel);
154 
155  ParameterList pL = GetParameterList();
156  bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed
157 
158  if (pL.get<int>("aggregation: max agg size") == -1)
159  pL.set("aggregation: max agg size", INT_MAX);
160 
161  // define aggregation algorithms
162  RCP<const FactoryBase> graphFact = GetFactory("Graph");
163 
164  // TODO Can we keep different aggregation algorithms over more Build calls?
165  algos_.clear();
166  algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm_kokkos(graphFact)));
167  if (pL.get<bool>("aggregation: allow user-specified singletons") == true) algos_.push_back(rcp(new OnePtAggregationAlgorithm_kokkos (graphFact)));
168  if (pL.get<bool>("aggregation: enable phase 1" ) == true) algos_.push_back(rcp(new AggregationPhase1Algorithm_kokkos (graphFact)));
169  if (pL.get<bool>("aggregation: enable phase 2a") == true) algos_.push_back(rcp(new AggregationPhase2aAlgorithm_kokkos (graphFact)));
170  if (pL.get<bool>("aggregation: enable phase 2b") == true) algos_.push_back(rcp(new AggregationPhase2bAlgorithm_kokkos (graphFact)));
171  if (pL.get<bool>("aggregation: enable phase 3" ) == true) algos_.push_back(rcp(new AggregationPhase3Algorithm_kokkos (graphFact)));
172 
173  std::string mapOnePtName = pL.get<std::string>("OnePt aggregate map name");
174  RCP<Map> OnePtMap = Teuchos::null;
175  if (mapOnePtName.length()) {
176  std::string mapOnePtFactName = pL.get<std::string>("OnePt aggregate map factory");
177  if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") {
178  OnePtMap = currentLevel.Get<RCP<Map> >(mapOnePtName, NoFactory::get());
179  } else {
180  RCP<const FactoryBase> mapOnePtFact = GetFactory(mapOnePtFactName);
181  OnePtMap = currentLevel.Get<RCP<Map> >(mapOnePtName, mapOnePtFact.get());
182  }
183  }
184 
185  RCP<const LWGraph_kokkos> graph = Get< RCP<LWGraph_kokkos> >(currentLevel, "Graph");
186 
187  // Build
188  RCP<Aggregates_kokkos> aggregates = rcp(new Aggregates_kokkos(*graph));
189  aggregates->setObjectLabel("UC");
190 
191  const LO numRows = graph->GetNodeNumVertices();
192 
193  // construct aggStat information
194  Kokkos::View<unsigned*, typename LWGraph_kokkos::memory_space> aggStat(Kokkos::ViewAllocateWithoutInitializing("aggregation status"),
195  numRows);
196  Kokkos::deep_copy(aggStat, READY);
197 
198  // LBV on Sept 06 2019: re-commenting out the dirichlet boundary map
199  // even if the map is correctly extracted from the graph, aggStat is
200  // now a Kokkos::View<unsinged*, memory_space> and filling it will
201  // require a parallel_for or to copy it to the Host which is not really
202  // good from a performance point of view.
203  // If dirichletBoundaryMap was an actual Xpetra::Map, one could call
204  // getLocalMap to have a Kokkos::View on the appropriate memory_space
205  // instead of an ArrayRCP.
206  {
207  typename LWGraph_kokkos::boundary_nodes_type dirichletBoundaryMap = graph->GetBoundaryNodeMap();
208  Kokkos::parallel_for("MueLu - UncoupledAggregation: tagging boundary nodes in aggStat",
210  KOKKOS_LAMBDA(const local_ordinal_type nodeIdx) {
211  if (dirichletBoundaryMap(nodeIdx) == true) {
212  aggStat(nodeIdx) = BOUNDARY;
213  }
214  });
215  }
216 
217  LO nDofsPerNode = Get<LO>(currentLevel, "DofsPerNode");
218  GO indexBase = graph->GetDomainMap()->getIndexBase();
219  if (OnePtMap != Teuchos::null) {
221  = Kokkos::create_mirror_view(aggStat);
222  Kokkos::deep_copy(aggStatHost, aggStat);
223 
224  for (LO i = 0; i < numRows; i++) {
225  // reconstruct global row id (FIXME only works for contiguous maps)
226  GO grid = (graph->GetDomainMap()->getGlobalElement(i)-indexBase) * nDofsPerNode + indexBase;
227 
228  for (LO kr = 0; kr < nDofsPerNode; kr++)
229  if (OnePtMap->isNodeGlobalElement(grid + kr))
230  aggStatHost(i) = ONEPT;
231  }
232 
233  Kokkos::deep_copy(aggStat, aggStatHost);
234  }
235 
236 
237  const RCP<const Teuchos::Comm<int> > comm = graph->GetComm();
238  GO numGlobalRows = 0;
239  if (IsPrint(Statistics1))
240  MueLu_sumAll(comm, as<GO>(numRows), numGlobalRows);
241 
242  {
243  SubFactoryMonitor sfm(*this, "Algo \"Graph Coloring\"", currentLevel);
244 
245  // LBV on Sept 06 2019: the note below is a little worrisome,
246  // can we guarantee that MueLu is never used on a non-symmetric
247  // graph?
248  // note: just using colinds_view in place of scalar_view_t type
249  // (it won't be used at all by symbolic SPGEMM)
250  using graph_t = typename LWGraph_kokkos::local_graph_type;
251  using KernelHandle = KokkosKernels::Experimental::
252  KokkosKernelsHandle<typename graph_t::row_map_type::value_type,
253  typename graph_t::entries_type::value_type,
254  typename graph_t::entries_type::value_type,
255  typename graph_t::device_type::execution_space,
256  typename graph_t::device_type::memory_space,
257  typename graph_t::device_type::memory_space>;
258  KernelHandle kh;
259  //leave gc algorithm choice as the default
260  kh.create_distance2_graph_coloring_handle();
261 
262  // get the distance-2 graph coloring handle
263  auto coloringHandle = kh.get_distance2_graph_coloring_handle();
264 
265  // Set the distance-2 graph coloring algorithm to use.
266  // Options:
267  // COLORING_D2_DEFAULT - Let the kernel handle pick the variation
268  // COLORING_D2_SERIAL - Use the legacy serial-only implementation
269  // COLORING_D2_VB - Use the parallel vertex based direct method
270  // COLORING_D2_VB_BIT - Same as VB but using the bitvector forbidden array
271  // COLORING_D2_VB_BIT_EF - Add experimental edge-filtering to VB_BIT
272  // COLORING_D2_NB_BIT - Net-based coloring (generally the fastest)
273  if(pL.get<bool>("aggregation: deterministic") == true) {
274  coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_SERIAL );
275  if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: serial" << std::endl;
276  } else if(pL.get<std::string>("aggregation: coloring algorithm") == "serial") {
277  coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_SERIAL );
278  if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: serial" << std::endl;
279  } else if(pL.get<std::string>("aggregation: coloring algorithm") == "default") {
280  coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_DEFAULT );
281  if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: default" << std::endl;
282  } else if(pL.get<std::string>("aggregation: coloring algorithm") == "vertex based") {
283  coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_VB );
284  if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: vertex based" << std::endl;
285  } else if(pL.get<std::string>("aggregation: coloring algorithm") == "vertex based bit set") {
286  coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_VB_BIT );
287  if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: vertex based bit set" << std::endl;
288  } else if(pL.get<std::string>("aggregation: coloring algorithm") == "edge filtering") {
289  coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_VB_BIT_EF );
290  if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: edge filtering" << std::endl;
291  } else if(pL.get<std::string>("aggregation: coloring algorithm") == "net based bit set") {
292  coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_NB_BIT );
293  if(IsPrint(Statistics1)) GetOStream(Statistics1) << " algorithm: net based bit set" << std::endl;
294  } else {
295  TEUCHOS_TEST_FOR_EXCEPTION(true,std::invalid_argument,"Unrecognized distance 2 coloring algorithm, valid options are: serial, default, matrix squared, vertex based, vertex based bit set, edge filtering")
296  }
297 
298  //Create device views for graph rowptrs/colinds
299  typename graph_t::row_map_type aRowptrs = graph->getRowPtrs();
300  typename graph_t::entries_type aColinds = graph->getEntries();
301 
302  //run d2 graph coloring
303  //graph is symmetric so row map/entries and col map/entries are the same
304  KokkosGraph::Experimental::graph_color_distance2(&kh, numRows, aRowptrs, aColinds);
305 
306  // extract the colors and store them in the aggregates
307  aggregates->SetGraphColors(coloringHandle->get_vertex_colors());
308  aggregates->SetGraphNumColors(static_cast<LO>(coloringHandle->get_num_colors()));
309 
310  //clean up coloring handle
311  kh.destroy_distance2_graph_coloring_handle();
312 
313  if (IsPrint(Statistics1)) {
314  GetOStream(Statistics1) << " num colors: " << aggregates->GetGraphNumColors() << std::endl;
315  }
316  }
317 
318  LO numNonAggregatedNodes = numRows;
319  GO numGlobalAggregatedPrev = 0, numGlobalAggsPrev = 0;
320  for (size_t a = 0; a < algos_.size(); a++) {
321  std::string phase = algos_[a]->description();
322  SubFactoryMonitor sfm(*this, "Algo \"" + phase + "\"", currentLevel);
323 
324  int oldRank = algos_[a]->SetProcRankVerbose(this->GetProcRankVerbose());
325  algos_[a]->BuildAggregates(pL, *graph, *aggregates, aggStat, numNonAggregatedNodes);
326  algos_[a]->SetProcRankVerbose(oldRank);
327 
328  if (IsPrint(Statistics1)) {
329  GO numLocalAggregated = numRows - numNonAggregatedNodes, numGlobalAggregated = 0;
330  GO numLocalAggs = aggregates->GetNumAggregates(), numGlobalAggs = 0;
331  MueLu_sumAll(comm, numLocalAggregated, numGlobalAggregated);
332  MueLu_sumAll(comm, numLocalAggs, numGlobalAggs);
333 
334  double aggPercent = 100*as<double>(numGlobalAggregated)/as<double>(numGlobalRows);
335  if (aggPercent > 99.99 && aggPercent < 100.00) {
336  // Due to round off (for instance, for 140465733/140466897), we could
337  // get 100.00% display even if there are some remaining nodes. This
338  // is bad from the users point of view. It is much better to change
339  // it to display 99.99%.
340  aggPercent = 99.99;
341  }
342  GetOStream(Statistics1) << " aggregated : " << (numGlobalAggregated - numGlobalAggregatedPrev) << " (phase), " << std::fixed
343  << std::setprecision(2) << numGlobalAggregated << "/" << numGlobalRows << " [" << aggPercent << "%] (total)\n"
344  << " remaining : " << numGlobalRows - numGlobalAggregated << "\n"
345  << " aggregates : " << numGlobalAggs-numGlobalAggsPrev << " (phase), " << numGlobalAggs << " (total)" << std::endl;
346  numGlobalAggregatedPrev = numGlobalAggregated;
347  numGlobalAggsPrev = numGlobalAggs;
348  }
349  }
350 
351  TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, "MueLu::UncoupledAggregationFactory::Build: Leftover nodes found! Error!");
352 
353  aggregates->AggregatesCrossProcessors(false);
354  aggregates->ComputeAggregateSizes(true/*forceRecompute*/);
355 
356  Set(currentLevel, "Aggregates", aggregates);
357 
358  GetOStream(Statistics1) << aggregates->description() << std::endl;
359  }
360 
361 } //namespace MueLu
362 
363 #endif // HAVE_MUELU_KOKKOS_REFACTOR
364 #endif /* MUELU_UNCOUPLEDAGGREGATIONFACTORY_DEF_HPP_ */
#define MueLu_sumAll(rcpComm, in, out)
void deep_copy(const View< DT, DP...> &dst, typename ViewTraits< DT, DP...>::const_value_type &value, typename std::enable_if< std::is_same< typename ViewTraits< DT, DP...>::specialize, void >::value >::type *=nullptr)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Print more statistics.
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename std::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=nullptr)
#define SET_VALID_ENTRY(name)