MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_AggregationStructuredAlgorithm_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_AGGREGATIONSTRUCTUREDALGORITHM_KOKKOS_DEF_HPP
47 #define MUELU_AGGREGATIONSTRUCTUREDALGORITHM_KOKKOS_DEF_HPP
48 
49 
50 #include <Teuchos_Comm.hpp>
51 #include <Teuchos_CommHelpers.hpp>
52 
53 #include <Xpetra_MapFactory.hpp>
54 #include <Xpetra_Map.hpp>
56 #include <Xpetra_CrsGraph.hpp>
57 
58 #include "MueLu_Exceptions.hpp"
59 #include "MueLu_Monitor.hpp"
60 
61 #include "MueLu_LWGraph_kokkos.hpp"
62 #include "MueLu_Aggregates_kokkos.hpp"
63 #include "MueLu_IndexManager_kokkos.hpp"
65 
66 namespace MueLu {
67 
68  template <class LocalOrdinal, class GlobalOrdinal, class Node>
70  BuildAggregates(const Teuchos::ParameterList& /* params */, const LWGraph_kokkos& graph,
71  Aggregates_kokkos& aggregates,
73  LO& numNonAggregatedNodes) const {
74  Monitor m(*this, "BuildAggregates");
75 
77  if(const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) {
78  out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
79  out->setShowAllFrontMatter(false).setShowProcRank(true);
80  } else {
81  out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
82  }
83 
84  RCP<IndexManager_kokkos> geoData = aggregates.GetIndexManager();
85  const LO numLocalFineNodes= geoData->getNumLocalFineNodes();
86  const LO numCoarseNodes = geoData->getNumCoarseNodes();
87  LOVectorView vertex2AggId = aggregates.GetVertex2AggId()->template getLocalView<memory_space>();
88  LOVectorView procWinner = aggregates.GetProcWinner() ->template getLocalView<memory_space>();
89 
90  *out << "Loop over fine nodes and assign them to an aggregate and a rank" << std::endl;
91  LO numAggregatedNodes;
92  fillAggregatesFunctor fillAggregates(geoData,
93  graph.GetComm()->getRank(),
94  aggStat,
95  vertex2AggId,
96  procWinner);
97  Kokkos::parallel_reduce("StructuredAggregation: fill aggregates data",
98  Kokkos::RangePolicy<execution_space>(0, numLocalFineNodes),
99  fillAggregates,
100  numAggregatedNodes);
101 
102  *out << "numCoarseNodes= " << numCoarseNodes
103  << ", numAggregatedNodes= " << numAggregatedNodes << std::endl;
104  numNonAggregatedNodes = numNonAggregatedNodes - numAggregatedNodes;
105 
106  } // BuildAggregates()
107 
108 
109  template <class LocalOrdinal, class GlobalOrdinal, class Node>
111  BuildGraph(const LWGraph_kokkos& graph, RCP<IndexManager_kokkos>& geoData, const LO dofsPerNode,
112  RCP<CrsGraph>& myGraph) const {
113  Monitor m(*this, "BuildGraphP");
114 
116  if(const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) {
117  out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
118  out->setShowAllFrontMatter(false).setShowProcRank(true);
119  } else {
120  out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
121  }
122 
123  typedef typename CrsGraph::local_graph_type local_graph_type;
124 
125  // Compute the number of coarse points needed to interpolate quantities to a fine point
126  int numInterpolationPoints = 0;
127  if(geoData->getInterpolationOrder() == 0) {
128  numInterpolationPoints = 1;
129  } else if(geoData->getInterpolationOrder() == 1) {
130  // Compute 2^numDimensions using bit logic to avoid round-off errors from std::pow()
131  numInterpolationPoints = 1 << geoData->getNumDimensions();
132  }
133  *out << "numInterpolationPoints=" << numInterpolationPoints << std::endl;
134 
135  const LO numLocalFineNodes = geoData->getNumLocalFineNodes();
136  const LO numCoarseNodes = geoData->getNumCoarseNodes();
137  const LO numNnzEntries = dofsPerNode*(numCoarseNodes + numInterpolationPoints
138  *(numLocalFineNodes - numCoarseNodes));
139 
140  non_const_row_map_type rowPtr("Prolongator graph, rowPtr", dofsPerNode*(numLocalFineNodes + 1));
141  entries_type colIndex("Prolongator graph, colIndices", numNnzEntries);
142 
143  *out << "Compute prolongatorGraph data" << std::endl;
144  if(geoData->getInterpolationOrder() == 0) {
145  computeGraphDataConstantFunctor computeGraphData(geoData,
146  numCoarseNodes,
147  dofsPerNode,
148  geoData->getCoarseningRates(),
149  geoData->getCoarseningEndRates(),
150  geoData->getLocalFineNodesPerDir(),
151  rowPtr,
152  colIndex);
153  Kokkos::parallel_for("Structured Aggregation: compute loca graph data",
154  Kokkos::RangePolicy<execution_space>(0, numLocalFineNodes),
155  computeGraphData);
156  } else if(geoData->getInterpolationOrder() == 1) {
157  // Note, lbv 2018-11-08: in the piece-wise linear case I am computing the rowPtr
158  // using a parallel scan, it might be possible to do something faster than that
159  // by including this calculation in computeGraphDataLinearFunctor but at the moment
160  // all the ideas I have include a bunch of if statements which I would like to avoid.
161  computeGraphRowPtrFunctor computeGraphRowPtr(geoData,
162  dofsPerNode,
163  numInterpolationPoints,
164  numLocalFineNodes,
165  geoData->getCoarseningRates(),
166  geoData->getLocalFineNodesPerDir(),
167  rowPtr);
168  Kokkos::parallel_scan("Structured Aggregation: compute rowPtr for prolongator graph",
169  Kokkos::RangePolicy<execution_space>(0, numLocalFineNodes + 1),
170  computeGraphRowPtr);
171 
172  computeGraphDataLinearFunctor computeGraphData(geoData,
173  geoData->getNumDimensions(),
174  numCoarseNodes,
175  dofsPerNode,
176  numInterpolationPoints,
177  geoData->getCoarseningRates(),
178  geoData->getCoarseningEndRates(),
179  geoData->getLocalFineNodesPerDir(),
180  geoData->getCoarseNodesPerDir(),
181  rowPtr,
182  colIndex);
183  Kokkos::parallel_for("Structured Aggregation: compute loca graph data",
184  Kokkos::RangePolicy<execution_space>(0, numLocalFineNodes),
185  computeGraphData);
186  }
187 
188  local_graph_type myLocalGraph(colIndex, rowPtr);
189 
190  // Compute graph's colMap and domainMap
191  RCP<Map> colMap, domainMap;
192  *out << "Compute domain and column maps of the CrsGraph" << std::endl;
193  colMap = MapFactory::Build(graph.GetDomainMap()->lib(),
195  numCoarseNodes,
196  graph.GetDomainMap()->getIndexBase(),
197  graph.GetDomainMap()->getComm());
198  domainMap = colMap;
199 
200  myGraph = CrsGraphFactory::Build(myLocalGraph, graph.GetDomainMap(), colMap,
201  colMap, graph.GetDomainMap());
202 
203  } // BuildGraph()
204 
205 
206  template <class LocalOrdinal, class GlobalOrdinal, class Node>
209  const int myRank,
211  LOVectorView vertex2AggID,
212  LOVectorView procWinner) :
213  geoData_(*geoData), myRank_(myRank), aggStat_(aggStat),
214  vertex2AggID_(vertex2AggID), procWinner_(procWinner) {}
215 
216  template <class LocalOrdinal, class GlobalOrdinal, class Node>
217  KOKKOS_INLINE_FUNCTION
219  fillAggregatesFunctor::operator() (const LO nodeIdx, LO& lNumAggregatedNodes) const {
220  // Compute coarse ID associated with fine LID
221  LO rem, rate;
222  LO coarseNodeCoarseLID;
223  LO nodeFineTuple[3], coarseIdx[3];
224  auto coarseRate = geoData_.getCoarseningRates();
225  auto endRate = geoData_.getCoarseningEndRates();
226  auto lFineNodesPerDir = geoData_.getLocalFineNodesPerDir();
227  // Compute coarse ID associated with fine LID
228  geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple);
229 
230  for(int dim = 0; dim < 3; ++dim) {
231  coarseIdx[dim] = nodeFineTuple[dim] / coarseRate(dim);
232  rem = nodeFineTuple[dim] % coarseRate(dim);
233  rate = (nodeFineTuple[dim] < lFineNodesPerDir(dim) - endRate(dim)) ? coarseRate(dim) : endRate(dim);
234  if(rem > (rate / 2)) {++coarseIdx[dim];}
235  }
236 
237  geoData_.getCoarseTuple2CoarseLID(coarseIdx[0], coarseIdx[1], coarseIdx[2],
238  coarseNodeCoarseLID);
239 
240  vertex2AggID_(nodeIdx, 0) = coarseNodeCoarseLID;
241  procWinner_(nodeIdx, 0) = myRank_;
242  aggStat_(nodeIdx) = AGGREGATED;
243  ++lNumAggregatedNodes;
244 
245  } // fillAggregatesFunctor::operator()
246 
247  template <class LocalOrdinal, class GlobalOrdinal, class Node>
251  const LO NumGhostedNodes,
252  const LO dofsPerNode,
253  constIntTupleView coarseRate,
254  constIntTupleView endRate,
255  constLOTupleView lFineNodesPerDir,
256  non_const_row_map_type rowPtr,
257  entries_type colIndex) : geoData_(*geoData),
258  numGhostedNodes_(NumGhostedNodes), dofsPerNode_(dofsPerNode),
259  coarseRate_(coarseRate), endRate_(endRate), lFineNodesPerDir_(lFineNodesPerDir),
260  rowPtr_(rowPtr), colIndex_(colIndex) {
261 
262  } // computeGraphDataConstantFunctor()
263 
264  template <class LocalOrdinal, class GlobalOrdinal, class Node>
265  KOKKOS_INLINE_FUNCTION
268  LO nodeFineTuple[3] = {0, 0, 0};
269  LO nodeCoarseTuple[3] = {0, 0, 0};
270 
271  // Compute ghosted tuple associated with fine LID
272  geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple);
273 
274  // Compute coarse tuple associated with fine point
275  // then overwrite it with tuple associated with aggregate
276  LO rem, rate, coarseNodeCoarseLID;
277  for(int dim = 0; dim < 3; ++dim) {
278  nodeCoarseTuple[dim] = nodeFineTuple[dim] / coarseRate_(dim);
279  rem = nodeFineTuple[dim] % coarseRate_(dim);
280  if( nodeFineTuple[dim] < (lFineNodesPerDir_(dim) - endRate_(dim)) ) {
281  rate = coarseRate_(dim);
282  } else {
283  rate = endRate_(dim);
284  }
285  if(rem > (rate / 2)) {++nodeCoarseTuple[dim];}
286  }
287 
288  // get LID associted with aggregate
289  geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2],
290  coarseNodeCoarseLID);
291 
292  // store data into CrsGraph taking care of multiple dofs case
293  for(LO dof = 0; dof < dofsPerNode_; ++dof) {
294  rowPtr_(nodeIdx*dofsPerNode_ + dof + 1) = nodeIdx*dofsPerNode_ + dof + 1;
295  colIndex_(nodeIdx*dofsPerNode_ + dof) = coarseNodeCoarseLID*dofsPerNode_ + dof;
296  }
297 
298  } // computeGraphDataConstantFunctor::operator()
299 
300  template <class LocalOrdinal, class GlobalOrdinal, class Node>
303  const LO dofsPerNode,
304  const int numInterpolationPoints,
305  const LO numLocalRows,
306  constIntTupleView coarseRate,
307  constLOTupleView lFineNodesPerDir,
308  non_const_row_map_type rowPtr) :
309  geoData_(*geoData), dofsPerNode_(dofsPerNode),
310  numInterpolationPoints_(numInterpolationPoints), numLocalRows_(numLocalRows),
311  coarseRate_(coarseRate), lFineNodesPerDir_(lFineNodesPerDir), rowPtr_(rowPtr) {}
312 
313  template <class LocalOrdinal, class GlobalOrdinal, class Node>
314  KOKKOS_INLINE_FUNCTION
316  computeGraphRowPtrFunctor::operator() (const LO rowIdx, GO& update, const bool final) const {
317  if (final) {
318  // Kokkos uses a multipass algorithm to implement scan.
319  // Only update the array on the final pass. Updating the
320  // array before changing 'update' means that we do an
321  // exclusive scan. Update the array after for an inclusive
322  // scan.
323  rowPtr_(rowIdx) = update;
324  }
325  if (rowIdx < numLocalRows_) {
326  LO nodeIdx = rowIdx / dofsPerNode_;
327  bool allCoarse = true;
328  LO nodeFineTuple[3] = {0, 0, 0};
329  geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple);
330  for(int dim = 0; dim < 3; ++dim) {
331  const LO rem = nodeFineTuple[dim] % coarseRate_(dim);
332 
333  // Check if Fine node lies on Coarse Node
334  allCoarse = (allCoarse && ((rem == 0) || (nodeFineTuple[dim] == lFineNodesPerDir_(dim) - 1)));
335  }
336  update += (allCoarse ? 1 : numInterpolationPoints_);
337  }
338  } // computeGraphRowPtrFunctor::operator()
339 
340  template <class LocalOrdinal, class GlobalOrdinal, class Node>
343  const int numDimensions,
344  const LO numGhostedNodes,
345  const LO dofsPerNode,
346  const int numInterpolationPoints,
347  constIntTupleView coarseRate,
348  constIntTupleView endRate,
349  constLOTupleView lFineNodesPerDir,
350  constLOTupleView ghostedNodesPerDir,
351  non_const_row_map_type rowPtr,
352  entries_type colIndex) :
353  geoData_(*geoData), numDimensions_(numDimensions),
354  numGhostedNodes_(numGhostedNodes),
355  dofsPerNode_(dofsPerNode), numInterpolationPoints_(numInterpolationPoints),
356  coarseRate_(coarseRate), endRate_(endRate), lFineNodesPerDir_(lFineNodesPerDir),
357  ghostedNodesPerDir_(ghostedNodesPerDir), rowPtr_(rowPtr), colIndex_(colIndex) {
358 
359  } // computeGraphDataLinearFunctor()
360 
361  template <class LocalOrdinal, class GlobalOrdinal, class Node>
362  KOKKOS_INLINE_FUNCTION
365  LO nodeFineTuple[3] = {0, 0, 0};
366  LO nodeCoarseTuple[3] = {0, 0, 0};
367 
368  // Compute coarse ID associated with fine LID
369  geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple);
370 
371  LO coarseNodeCoarseLID;
372  bool allCoarse = false;
373  for(int dim = 0; dim < 3; ++dim) {
374  nodeCoarseTuple[dim] = nodeFineTuple[dim] / coarseRate_(dim);
375  }
376  if(rowPtr_(nodeIdx + 1) == rowPtr_(nodeIdx) + 1) {allCoarse = true;}
377 
378  geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2],
379  coarseNodeCoarseLID);
380 
381  if(allCoarse) {
382  // Fine node lies on Coarse node, easy case, we only need the LID of the coarse node.
383  for(LO dof = 0; dof < dofsPerNode_; ++dof) {
384  colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)) = coarseNodeCoarseLID*dofsPerNode_ + dof;
385  }
386  } else {
387 
388  for(int dim = 0; dim < numDimensions_; ++dim) {
389  if(nodeCoarseTuple[dim] == ghostedNodesPerDir_(dim) - 1) { --nodeCoarseTuple[dim]; }
390  }
391  // Compute Coarse Node LID
392  // Note lbv 10-06-2018: it is likely benefitial to remove the two if statments and somehow
393  // find out the number of dimensions before calling the opertor() of the functor.
394  for(LO dof = 0; dof < dofsPerNode_; ++dof) {
395  geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+0));
396  geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0]+1, nodeCoarseTuple[1], nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+1));
397  if(numDimensions_ > 1) {
398  geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0], nodeCoarseTuple[1]+1, nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+2));
399  geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0]+1, nodeCoarseTuple[1]+1, nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+3));
400  if(numDimensions_ > 2) {
401  geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+4));
402  geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0]+1, nodeCoarseTuple[1], nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+5));
403  geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1]+1, nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+6));
404  geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0]+1, nodeCoarseTuple[1]+1, nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+7));
405  }
406  }
407  }
408  }
409  } // computeGraphDataLinearFunctor::operator()
410 
411 } // end namespace
412 
413 
414 #endif /* MUELU_AGGREGATIONSTRUCTUREDALGORITHM_DEF_HPP_ */
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=0)
basic_FancyOStream & setShowProcRank(const bool showProcRank)
GlobalOrdinal GO
void parallel_reduce(const std::string &label, const PolicyType &policy, const FunctorType &functor, ReturnType &return_value, typename Impl::enable_if< Kokkos::Impl::is_execution_policy< PolicyType >::value >::type *=0)
LocalOrdinal LO
computeGraphRowPtrFunctor(RCP< IndexManager_kokkos > geoData, const LO dofsPerNode, const int numInterpolationPoints, const LO numLocalRows, constIntTupleView coarseRate, constLOTupleView lFineNodesPerDir, non_const_row_map_type rowPtr)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
basic_FancyOStream & setShowAllFrontMatter(const bool showAllFrontMatter)
KOKKOS_INLINE_FUNCTION void operator()(const LO rowIdx, GO &update, const bool final) const
computeGraphDataLinearFunctor(RCP< IndexManager_kokkos > geoData, const int numDimensions, const LO numGhostedNodes, const LO dofsPerNode, const int numInterpolationPoints, constIntTupleView coarseRate, constIntTupleView endRate, constLOTupleView lFineNodesPerDir, constLOTupleView ghostedNodesPerDir, non_const_row_map_type rowPtr, entries_type colIndex)
fillAggregatesFunctor(RCP< IndexManager_kokkos > geoData, const int myRank, Kokkos::View< unsigned *, memory_space > aggStat, LOVectorView vertex2AggID, LOVectorView procWinner)
Timer to be used in non-factories.
KOKKOS_INLINE_FUNCTION void operator()(const LO nodeIdx, LO &lNumAggregatedNodes) const
void BuildAggregates(const Teuchos::ParameterList &, const LWGraph_kokkos &, Aggregates_kokkos &, std::vector< unsigned > &, LO &) const
Local aggregation.
computeGraphDataConstantFunctor(RCP< IndexManager_kokkos > geoData, const LO numGhostedNodes, const LO dofsPerNode, constIntTupleView coarseRate, constIntTupleView endRate, constLOTupleView lFineNodesPerDir, non_const_row_map_type rowPtr, entries_type colIndex)
void BuildGraph(const LWGraph_kokkos &graph, RCP< IndexManager_kokkos > &geoData, const LO dofsPerNode, RCP< CrsGraph > &myGraph) const
Local aggregation.