MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_UncoupledAggregationFactory_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_UNCOUPLEDAGGREGATIONFACTORY_KOKKOS_DEF_HPP_
47 #define MUELU_UNCOUPLEDAGGREGATIONFACTORY_KOKKOS_DEF_HPP_
48 
49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
50 
51 #include <climits>
52 
53 #include <Xpetra_Map.hpp>
54 #include <Xpetra_Vector.hpp>
55 #include <Xpetra_MultiVectorFactory.hpp>
56 #include <Xpetra_VectorFactory.hpp>
57 
59 
60 #include "MueLu_OnePtAggregationAlgorithm_kokkos.hpp"
61 #include "MueLu_PreserveDirichletAggregationAlgorithm_kokkos.hpp"
62 #include "MueLu_IsolatedNodeAggregationAlgorithm_kokkos.hpp"
63 
64 #include "MueLu_AggregationPhase1Algorithm_kokkos.hpp"
65 #include "MueLu_AggregationPhase2aAlgorithm_kokkos.hpp"
66 #include "MueLu_AggregationPhase2bAlgorithm_kokkos.hpp"
67 #include "MueLu_AggregationPhase3Algorithm_kokkos.hpp"
68 
69 #include "MueLu_Level.hpp"
70 #include "MueLu_LWGraph_kokkos.hpp"
71 #include "MueLu_Aggregates_kokkos.hpp"
72 #include "MueLu_MasterList.hpp"
73 #include "MueLu_Monitor.hpp"
74 #include "MueLu_AmalgamationInfo.hpp"
75 #include "MueLu_Utilities.hpp" // for sum_all and similar stuff...
76 
77 #include "KokkosGraph_Distance2ColorHandle.hpp"
78 #include "KokkosGraph_Distance2Color.hpp"
79 
80 namespace MueLu {
81 
82  template <class LocalOrdinal, class GlobalOrdinal, class Node>
83  UncoupledAggregationFactory_kokkos<LocalOrdinal, GlobalOrdinal, Node>::UncoupledAggregationFactory_kokkos()
84  : bDefinitionPhase_(true)
85  { }
86 
87  template <class LocalOrdinal, class GlobalOrdinal, class Node>
88  RCP<const ParameterList> UncoupledAggregationFactory_kokkos<LocalOrdinal, GlobalOrdinal, Node>::GetValidParameterList() const {
89  RCP<ParameterList> validParamList = rcp(new ParameterList());
90 
91  // Aggregation parameters (used in aggregation algorithms)
92  // TODO introduce local member function for each aggregation algorithm such that each aggregation algorithm can define its own parameters
93 
95 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
96  SET_VALID_ENTRY("aggregation: max agg size");
97  SET_VALID_ENTRY("aggregation: min agg size");
98  SET_VALID_ENTRY("aggregation: max selected neighbors");
99  SET_VALID_ENTRY("aggregation: ordering");
100  validParamList->getEntry("aggregation: ordering").setValidator(
101  rcp(new validatorType(Teuchos::tuple<std::string>("natural", "graph", "random"), "aggregation: ordering")));
102  SET_VALID_ENTRY("aggregation: enable phase 1");
103  SET_VALID_ENTRY("aggregation: phase 1 algorithm");
104  SET_VALID_ENTRY("aggregation: deterministic");
105  SET_VALID_ENTRY("aggregation: enable phase 2a");
106  SET_VALID_ENTRY("aggregation: enable phase 2b");
107  SET_VALID_ENTRY("aggregation: enable phase 3");
108  SET_VALID_ENTRY("aggregation: preserve Dirichlet points");
109  SET_VALID_ENTRY("aggregation: allow user-specified singletons");
110 #undef SET_VALID_ENTRY
111 
112  // general variables needed in AggregationFactory
113  validParamList->set< RCP<const FactoryBase> >("Graph", null, "Generating factory of the graph");
114  validParamList->set< RCP<const FactoryBase> >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
115 
116  // special variables necessary for OnePtAggregationAlgorithm
117  validParamList->set< std::string > ("OnePt aggregate map name", "", "Name of input map for single node aggregates. (default='')");
118  validParamList->set< std::string > ("OnePt aggregate map factory", "", "Generating factory of (DOF) map for single node aggregates.");
119  //validParamList->set< RCP<const FactoryBase> >("OnePt aggregate map factory", NoFactory::getRCP(), "Generating factory of (DOF) map for single node aggregates.");
120 
121  return validParamList;
122  }
123 
124  template <class LocalOrdinal, class GlobalOrdinal, class Node>
125  void UncoupledAggregationFactory_kokkos<LocalOrdinal, GlobalOrdinal, Node>::DeclareInput(Level& currentLevel) const {
126  Input(currentLevel, "Graph");
127  Input(currentLevel, "DofsPerNode");
128 
129  const ParameterList& pL = GetParameterList();
130 
131  // request special data necessary for OnePtAggregationAlgorithm
132  std::string mapOnePtName = pL.get<std::string>("OnePt aggregate map name");
133  if (mapOnePtName.length() > 0) {
134  std::string mapOnePtFactName = pL.get<std::string>("OnePt aggregate map factory");
135  if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") {
136  currentLevel.DeclareInput(mapOnePtName, NoFactory::get());
137  } else {
138  RCP<const FactoryBase> mapOnePtFact = GetFactory(mapOnePtFactName);
139  currentLevel.DeclareInput(mapOnePtName, mapOnePtFact.get());
140  }
141  }
142  }
143 
144  template <class LocalOrdinal, class GlobalOrdinal, class Node>
145  void UncoupledAggregationFactory_kokkos<LocalOrdinal, GlobalOrdinal, Node>::Build(Level &currentLevel) const {
146  FactoryMonitor m(*this, "Build", currentLevel);
147 
148  ParameterList pL = GetParameterList();
149  bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed
150 
151  if (pL.get<int>("aggregation: max agg size") == -1)
152  pL.set("aggregation: max agg size", INT_MAX);
153 
154  // define aggregation algorithms
155  RCP<const FactoryBase> graphFact = GetFactory("Graph");
156 
157  // TODO Can we keep different aggregation algorithms over more Build calls?
158  algos_.clear();
159  algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm_kokkos(graphFact)));
160  if (pL.get<bool>("aggregation: allow user-specified singletons") == true) algos_.push_back(rcp(new OnePtAggregationAlgorithm_kokkos (graphFact)));
161  if (pL.get<bool>("aggregation: enable phase 1" ) == true) algos_.push_back(rcp(new AggregationPhase1Algorithm_kokkos (graphFact)));
162  if (pL.get<bool>("aggregation: enable phase 2a") == true) algos_.push_back(rcp(new AggregationPhase2aAlgorithm_kokkos (graphFact)));
163  if (pL.get<bool>("aggregation: enable phase 2b") == true) algos_.push_back(rcp(new AggregationPhase2bAlgorithm_kokkos (graphFact)));
164  if (pL.get<bool>("aggregation: enable phase 3" ) == true) algos_.push_back(rcp(new AggregationPhase3Algorithm_kokkos (graphFact)));
165 
166  std::string mapOnePtName = pL.get<std::string>("OnePt aggregate map name");
167  RCP<Map> OnePtMap = Teuchos::null;
168  if (mapOnePtName.length()) {
169  std::string mapOnePtFactName = pL.get<std::string>("OnePt aggregate map factory");
170  if (mapOnePtFactName == "" || mapOnePtFactName == "NoFactory") {
171  OnePtMap = currentLevel.Get<RCP<Map> >(mapOnePtName, NoFactory::get());
172  } else {
173  RCP<const FactoryBase> mapOnePtFact = GetFactory(mapOnePtFactName);
174  OnePtMap = currentLevel.Get<RCP<Map> >(mapOnePtName, mapOnePtFact.get());
175  }
176  }
177 
178  RCP<const LWGraph_kokkos> graph = Get< RCP<LWGraph_kokkos> >(currentLevel, "Graph");
179 
180  // Build
181  RCP<Aggregates_kokkos> aggregates = rcp(new Aggregates_kokkos(*graph));
182  aggregates->setObjectLabel("UC");
183 
184  const LO numRows = graph->GetNodeNumVertices();
185 
186  // construct aggStat information
188  numRows);
189  Kokkos::deep_copy(aggStat, READY);
190 
191  // LBV on Sept 06 2019: re-commenting out the dirichlet boundary map
192  // even if the map is correctly extracted from the graph, aggStat is
193  // now a Kokkos::View<unsinged*, memory_space> and filling it will
194  // require a parallel_for or to copy it to the Host which is not really
195  // good from a performance point of view.
196  // If dirichletBoundaryMap was an actual Xpetra::Map, one could call
197  // getLocalMap to have a Kokkos::View on the appropriate memory_space
198  // instead of an ArrayRCP.
199  // // TODO
200  // //ArrayRCP<const bool> dirichletBoundaryMap = graph->GetBoundaryNodeMap();
201  // ArrayRCP<const bool> dirichletBoundaryMap;
202 
203  // if (dirichletBoundaryMap != Teuchos::null)
204  // for (LO i = 0; i < numRows; i++)
205  // if (dirichletBoundaryMap[i] == true)
206  // aggStat[i] = BOUNDARY;
207 
208  LO nDofsPerNode = Get<LO>(currentLevel, "DofsPerNode");
209  GO indexBase = graph->GetDomainMap()->getIndexBase();
210  if (OnePtMap != Teuchos::null) {
212  = Kokkos::create_mirror_view(aggStat);
213  Kokkos::deep_copy(aggStatHost, aggStat);
214 
215  for (LO i = 0; i < numRows; i++) {
216  // reconstruct global row id (FIXME only works for contiguous maps)
217  GO grid = (graph->GetDomainMap()->getGlobalElement(i)-indexBase) * nDofsPerNode + indexBase;
218 
219  for (LO kr = 0; kr < nDofsPerNode; kr++)
220  if (OnePtMap->isNodeGlobalElement(grid + kr))
221  aggStatHost(i) = ONEPT;
222  }
223 
224  Kokkos::deep_copy(aggStat, aggStatHost);
225  }
226 
227 
228  const RCP<const Teuchos::Comm<int> > comm = graph->GetComm();
229  GO numGlobalRows = 0;
230  if (IsPrint(Statistics1))
231  MueLu_sumAll(comm, as<GO>(numRows), numGlobalRows);
232 
233  {
234  SubFactoryMonitor sfm(*this, "Algo \"Graph Coloring\"", currentLevel);
235 
236  // LBV on Sept 06 2019: the note below is a little worrisome,
237  // can we guarantee that MueLu is never used on a non-symmetric
238  // graph?
239  // note: just using colinds_view in place of scalar_view_t type
240  // (it won't be used at all by symbolic SPGEMM)
241  using graph_t = typename LWGraph_kokkos::local_graph_type;
242  using KernelHandle = KokkosKernels::Experimental::
243  KokkosKernelsHandle<typename graph_t::row_map_type::value_type,
244  typename graph_t::entries_type::value_type,
245  typename graph_t::entries_type::value_type,
246  typename graph_t::device_type::execution_space,
247  typename graph_t::device_type::memory_space,
248  typename graph_t::device_type::memory_space>;
249  KernelHandle kh;
250  //leave gc algorithm choice as the default
251  kh.create_distance2_graph_coloring_handle();
252 
253  // get the distance-2 graph coloring handle
254  auto coloringHandle = kh.get_distance2_graph_coloring_handle();
255 
256  // Set the distance-2 graph coloring algorithm to use.
257  // Options:
258  // COLORING_D2_DEFAULT - Let the kernel handle pick the variation
259  // COLORING_D2_SERIAL - Use the legacy serial-only implementation
260  // COLORING_D2_MATRIX_SQUARED - Use the SPGEMM + D1GC method
261  // COLORING_D2_SPGEMM - Same as MATRIX_SQUARED
262  // COLORING_D2_VB - Use the parallel vertex based direct method
263  // COLORING_D2_VB_BIT - Same as VB but using the bitvector forbidden array
264  // COLORING_D2_VB_BIT_EF - Add experimental edge-filtering to VB_BIT
265  coloringHandle->set_algorithm( KokkosGraph::COLORING_D2_SERIAL );
266 
267  //Create device views for graph rowptrs/colinds
268  typename graph_t::row_map_type aRowptrs = graph->getRowPtrs();
269  typename graph_t::entries_type aColinds = graph->getEntries();
270 
271  //run d2 graph coloring
272  //graph is symmetric so row map/entries and col map/entries are the same
273  KokkosGraph::Experimental::graph_compute_distance2_color(&kh, numRows, numRows,
274  aRowptrs, aColinds,
275  aRowptrs, aColinds);
276 
277  // extract the colors and store them in the aggregates
278  aggregates->SetGraphColors(coloringHandle->get_vertex_colors());
279  aggregates->SetGraphNumColors(static_cast<LO>(coloringHandle->get_num_colors()));
280 
281  //clean up coloring handle
282  kh.destroy_distance2_graph_coloring_handle();
283  }
284 
285  LO numNonAggregatedNodes = numRows;
286  GO numGlobalAggregatedPrev = 0, numGlobalAggsPrev = 0;
287  for (size_t a = 0; a < algos_.size(); a++) {
288  std::string phase = algos_[a]->description();
289  SubFactoryMonitor sfm(*this, "Algo \"" + phase + "\"", currentLevel);
290 
291  int oldRank = algos_[a]->SetProcRankVerbose(this->GetProcRankVerbose());
292  algos_[a]->BuildAggregates(pL, *graph, *aggregates, aggStat, numNonAggregatedNodes);
293  algos_[a]->SetProcRankVerbose(oldRank);
294 
295  if (IsPrint(Statistics1)) {
296  GO numLocalAggregated = numRows - numNonAggregatedNodes, numGlobalAggregated = 0;
297  GO numLocalAggs = aggregates->GetNumAggregates(), numGlobalAggs = 0;
298  MueLu_sumAll(comm, numLocalAggregated, numGlobalAggregated);
299  MueLu_sumAll(comm, numLocalAggs, numGlobalAggs);
300 
301  double aggPercent = 100*as<double>(numGlobalAggregated)/as<double>(numGlobalRows);
302  if (aggPercent > 99.99 && aggPercent < 100.00) {
303  // Due to round off (for instance, for 140465733/140466897), we could
304  // get 100.00% display even if there are some remaining nodes. This
305  // is bad from the users point of view. It is much better to change
306  // it to display 99.99%.
307  aggPercent = 99.99;
308  }
309  GetOStream(Statistics1) << " aggregated : " << (numGlobalAggregated - numGlobalAggregatedPrev) << " (phase), " << std::fixed
310  << std::setprecision(2) << numGlobalAggregated << "/" << numGlobalRows << " [" << aggPercent << "%] (total)\n"
311  << " remaining : " << numGlobalRows - numGlobalAggregated << "\n"
312  << " aggregates : " << numGlobalAggs-numGlobalAggsPrev << " (phase), " << numGlobalAggs << " (total)" << std::endl;
313  numGlobalAggregatedPrev = numGlobalAggregated;
314  numGlobalAggsPrev = numGlobalAggs;
315  }
316  }
317 
318  TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, "MueLu::UncoupledAggregationFactory::Build: Leftover nodes found! Error!");
319 
320  aggregates->AggregatesCrossProcessors(false);
321  aggregates->ComputeAggregateSizes(true/*forceRecompute*/);
322 
323  Set(currentLevel, "Aggregates", aggregates);
324 
325  GetOStream(Statistics1) << aggregates->description() << std::endl;
326  }
327 
328 } //namespace MueLu
329 
330 #endif // HAVE_MUELU_KOKKOS_REFACTOR
331 #endif /* MUELU_UNCOUPLEDAGGREGATIONFACTORY_DEF_HPP_ */
#define MueLu_sumAll(rcpComm, in, out)
GlobalOrdinal GO
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Print more statistics.
LocalOrdinal LO
void deep_copy(const View< DT, DP...> &dst, typename ViewTraits< DT, DP...>::const_value_type &value, typename std::enable_if< std::is_same< typename ViewTraits< DT, DP...>::specialize, void >::value >::type *=0)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
#define SET_VALID_ENTRY(name)