MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_AggregationStructuredAlgorithm_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_AGGREGATIONSTRUCTUREDALGORITHM_KOKKOS_DEF_HPP
47 #define MUELU_AGGREGATIONSTRUCTUREDALGORITHM_KOKKOS_DEF_HPP
48 
49 
50 #include <Teuchos_Comm.hpp>
51 #include <Teuchos_CommHelpers.hpp>
52 
53 #include <Xpetra_MapFactory.hpp>
54 #include <Xpetra_Map.hpp>
55 #include <Xpetra_CrsGraphFactory.hpp>
56 #include <Xpetra_CrsGraph.hpp>
57 
58 #include "MueLu_Exceptions.hpp"
59 #include "MueLu_Monitor.hpp"
60 
61 #include "MueLu_LWGraph_kokkos.hpp"
62 #include "MueLu_Aggregates_kokkos.hpp"
63 #include "MueLu_IndexManager_kokkos.hpp"
65 
66 namespace MueLu {
67 
68  template <class LocalOrdinal, class GlobalOrdinal, class Node>
70  BuildAggregates(const Teuchos::ParameterList& /* params */, const LWGraph_kokkos& graph,
71  Aggregates_kokkos& aggregates,
73  LO& numNonAggregatedNodes) const {
74  Monitor m(*this, "BuildAggregates");
75 
77  if(const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) {
78  out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
79  out->setShowAllFrontMatter(false).setShowProcRank(true);
80  } else {
81  out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
82  }
83 
84  RCP<IndexManager_kokkos> geoData = aggregates.GetIndexManager();
85  const LO numLocalFineNodes= geoData->getNumLocalFineNodes();
86  const LO numCoarseNodes = geoData->getNumCoarseNodes();
87  LOVectorView vertex2AggId = aggregates.GetVertex2AggId()->template getLocalView<memory_space>();
88  LOVectorView procWinner = aggregates.GetProcWinner() ->template getLocalView<memory_space>();
89 
90  *out << "Loop over fine nodes and assign them to an aggregate and a rank" << std::endl;
91  LO numAggregatedNodes;
92  fillAggregatesFunctor fillAggregates(geoData,
93  graph.GetComm()->getRank(),
94  aggStat,
95  vertex2AggId,
96  procWinner);
97  Kokkos::parallel_reduce("StructuredAggregation: fill aggregates data",
98  Kokkos::RangePolicy<execution_space>(0, numLocalFineNodes),
99  fillAggregates,
100  numAggregatedNodes);
101 
102  *out << "numCoarseNodes= " << numCoarseNodes
103  << ", numAggregatedNodes= " << numAggregatedNodes << std::endl;
104  numNonAggregatedNodes = numNonAggregatedNodes - numAggregatedNodes;
105 
106  } // BuildAggregates()
107 
108 
109  template <class LocalOrdinal, class GlobalOrdinal, class Node>
111  BuildGraph(const LWGraph_kokkos& graph, RCP<IndexManager_kokkos>& geoData, const LO dofsPerNode,
112  RCP<CrsGraph>& myGraph) const {
113  Monitor m(*this, "BuildGraphP");
114 
116  if(const char* dbg = std::getenv("MUELU_STRUCTUREDALGORITHM_DEBUG")) {
117  out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
118  out->setShowAllFrontMatter(false).setShowProcRank(true);
119  } else {
120  out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
121  }
122 
123  // Compute the number of coarse points needed to interpolate quantities to a fine point
124  int numInterpolationPoints = 0;
125  if(geoData->getInterpolationOrder() == 0) {
126  numInterpolationPoints = 1;
127  } else if(geoData->getInterpolationOrder() == 1) {
128  // Compute 2^numDimensions using bit logic to avoid round-off errors from std::pow()
129  numInterpolationPoints = 1 << geoData->getNumDimensions();
130  }
131  *out << "numInterpolationPoints=" << numInterpolationPoints << std::endl;
132 
133  const LO numLocalFineNodes = geoData->getNumLocalFineNodes();
134  const LO numCoarseNodes = geoData->getNumCoarseNodes();
135  const LO numNnzEntries = dofsPerNode*(numCoarseNodes + numInterpolationPoints
136  *(numLocalFineNodes - numCoarseNodes));
137 
138  non_const_row_map_type rowPtr("Prolongator graph, rowPtr", dofsPerNode*(numLocalFineNodes + 1));
139  entries_type colIndex("Prolongator graph, colIndices", numNnzEntries);
140 
141  *out << "Compute prolongatorGraph data" << std::endl;
142  if(geoData->getInterpolationOrder() == 0) {
143  computeGraphDataConstantFunctor computeGraphData(geoData,
144  numCoarseNodes,
145  dofsPerNode,
146  geoData->getCoarseningRates(),
147  geoData->getCoarseningEndRates(),
148  geoData->getLocalFineNodesPerDir(),
149  rowPtr,
150  colIndex);
151  Kokkos::parallel_for("Structured Aggregation: compute loca graph data",
152  Kokkos::RangePolicy<execution_space>(0, numLocalFineNodes),
153  computeGraphData);
154  } else if(geoData->getInterpolationOrder() == 1) {
155  // Note, lbv 2018-11-08: in the piece-wise linear case I am computing the rowPtr
156  // using a parallel scan, it might be possible to do something faster than that
157  // by including this calculation in computeGraphDataLinearFunctor but at the moment
158  // all the ideas I have include a bunch of if statements which I would like to avoid.
159  computeGraphRowPtrFunctor computeGraphRowPtr(geoData,
160  dofsPerNode,
161  numInterpolationPoints,
162  numLocalFineNodes,
163  geoData->getCoarseningRates(),
164  geoData->getLocalFineNodesPerDir(),
165  rowPtr);
166  Kokkos::parallel_scan("Structured Aggregation: compute rowPtr for prolongator graph",
167  Kokkos::RangePolicy<execution_space>(0, numLocalFineNodes + 1),
168  computeGraphRowPtr);
169 
170  computeGraphDataLinearFunctor computeGraphData(geoData,
171  geoData->getNumDimensions(),
172  numCoarseNodes,
173  dofsPerNode,
174  numInterpolationPoints,
175  geoData->getCoarseningRates(),
176  geoData->getCoarseningEndRates(),
177  geoData->getLocalFineNodesPerDir(),
178  geoData->getCoarseNodesPerDir(),
179  rowPtr,
180  colIndex);
181  Kokkos::parallel_for("Structured Aggregation: compute loca graph data",
182  Kokkos::RangePolicy<execution_space>(0, numLocalFineNodes),
183  computeGraphData);
184  }
185 
186  local_graph_type myLocalGraph(colIndex, rowPtr);
187 
188  // Compute graph's colMap and domainMap
189  RCP<Map> colMap, domainMap;
190  *out << "Compute domain and column maps of the CrsGraph" << std::endl;
191  colMap = MapFactory::Build(graph.GetDomainMap()->lib(),
193  numCoarseNodes,
194  graph.GetDomainMap()->getIndexBase(),
195  graph.GetDomainMap()->getComm());
196  domainMap = colMap;
197 
198  myGraph = CrsGraphFactory::Build(myLocalGraph, graph.GetDomainMap(), colMap,
199  colMap, graph.GetDomainMap());
200 
201  } // BuildGraph()
202 
203 
204  template <class LocalOrdinal, class GlobalOrdinal, class Node>
207  const int myRank,
209  LOVectorView vertex2AggID,
210  LOVectorView procWinner) :
211  geoData_(*geoData), myRank_(myRank), aggStat_(aggStat),
212  vertex2AggID_(vertex2AggID), procWinner_(procWinner) {}
213 
214  template <class LocalOrdinal, class GlobalOrdinal, class Node>
215  KOKKOS_INLINE_FUNCTION
217  fillAggregatesFunctor::operator() (const LO nodeIdx, LO& lNumAggregatedNodes) const {
218  // Compute coarse ID associated with fine LID
219  LO rem, rate;
220  LO coarseNodeCoarseLID;
221  LO nodeFineTuple[3], coarseIdx[3];
222  auto coarseRate = geoData_.getCoarseningRates();
223  auto endRate = geoData_.getCoarseningEndRates();
224  auto lFineNodesPerDir = geoData_.getLocalFineNodesPerDir();
225  // Compute coarse ID associated with fine LID
226  geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple);
227 
228  for(int dim = 0; dim < 3; ++dim) {
229  coarseIdx[dim] = nodeFineTuple[dim] / coarseRate(dim);
230  rem = nodeFineTuple[dim] % coarseRate(dim);
231  rate = (nodeFineTuple[dim] < lFineNodesPerDir(dim) - endRate(dim)) ? coarseRate(dim) : endRate(dim);
232  if(rem > (rate / 2)) {++coarseIdx[dim];}
233  }
234 
235  geoData_.getCoarseTuple2CoarseLID(coarseIdx[0], coarseIdx[1], coarseIdx[2],
236  coarseNodeCoarseLID);
237 
238  vertex2AggID_(nodeIdx, 0) = coarseNodeCoarseLID;
239  procWinner_(nodeIdx, 0) = myRank_;
240  aggStat_(nodeIdx) = AGGREGATED;
241  ++lNumAggregatedNodes;
242 
243  } // fillAggregatesFunctor::operator()
244 
245  template <class LocalOrdinal, class GlobalOrdinal, class Node>
249  const LO NumGhostedNodes,
250  const LO dofsPerNode,
251  constIntTupleView coarseRate,
252  constIntTupleView endRate,
253  constLOTupleView lFineNodesPerDir,
254  non_const_row_map_type rowPtr,
255  entries_type colIndex) : geoData_(*geoData),
256  numGhostedNodes_(NumGhostedNodes), dofsPerNode_(dofsPerNode),
257  coarseRate_(coarseRate), endRate_(endRate), lFineNodesPerDir_(lFineNodesPerDir),
258  rowPtr_(rowPtr), colIndex_(colIndex) {
259 
260  } // computeGraphDataConstantFunctor()
261 
262  template <class LocalOrdinal, class GlobalOrdinal, class Node>
263  KOKKOS_INLINE_FUNCTION
266  LO nodeFineTuple[3] = {0, 0, 0};
267  LO nodeCoarseTuple[3] = {0, 0, 0};
268 
269  // Compute ghosted tuple associated with fine LID
270  geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple);
271 
272  // Compute coarse tuple associated with fine point
273  // then overwrite it with tuple associated with aggregate
274  LO rem, rate, coarseNodeCoarseLID;
275  for(int dim = 0; dim < 3; ++dim) {
276  nodeCoarseTuple[dim] = nodeFineTuple[dim] / coarseRate_(dim);
277  rem = nodeFineTuple[dim] % coarseRate_(dim);
278  if( nodeFineTuple[dim] < (lFineNodesPerDir_(dim) - endRate_(dim)) ) {
279  rate = coarseRate_(dim);
280  } else {
281  rate = endRate_(dim);
282  }
283  if(rem > (rate / 2)) {++nodeCoarseTuple[dim];}
284  }
285 
286  // get LID associted with aggregate
287  geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2],
288  coarseNodeCoarseLID);
289 
290  // store data into CrsGraph taking care of multiple dofs case
291  for(LO dof = 0; dof < dofsPerNode_; ++dof) {
292  rowPtr_(nodeIdx*dofsPerNode_ + dof + 1) = nodeIdx*dofsPerNode_ + dof + 1;
293  colIndex_(nodeIdx*dofsPerNode_ + dof) = coarseNodeCoarseLID*dofsPerNode_ + dof;
294  }
295 
296  } // computeGraphDataConstantFunctor::operator()
297 
298  template <class LocalOrdinal, class GlobalOrdinal, class Node>
301  const LO dofsPerNode,
302  const int numInterpolationPoints,
303  const LO numLocalRows,
304  constIntTupleView coarseRate,
305  constLOTupleView lFineNodesPerDir,
306  non_const_row_map_type rowPtr) :
307  geoData_(*geoData), dofsPerNode_(dofsPerNode),
308  numInterpolationPoints_(numInterpolationPoints), numLocalRows_(numLocalRows),
309  coarseRate_(coarseRate), lFineNodesPerDir_(lFineNodesPerDir), rowPtr_(rowPtr) {}
310 
311  template <class LocalOrdinal, class GlobalOrdinal, class Node>
312  KOKKOS_INLINE_FUNCTION
314  computeGraphRowPtrFunctor::operator() (const LO rowIdx, GO& update, const bool final) const {
315  if (final) {
316  // Kokkos uses a multipass algorithm to implement scan.
317  // Only update the array on the final pass. Updating the
318  // array before changing 'update' means that we do an
319  // exclusive scan. Update the array after for an inclusive
320  // scan.
321  rowPtr_(rowIdx) = update;
322  }
323  if (rowIdx < numLocalRows_) {
324  LO nodeIdx = rowIdx / dofsPerNode_;
325  bool allCoarse = true;
326  LO nodeFineTuple[3] = {0, 0, 0};
327  geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple);
328  for(int dim = 0; dim < 3; ++dim) {
329  const LO rem = nodeFineTuple[dim] % coarseRate_(dim);
330 
331  // Check if Fine node lies on Coarse Node
332  allCoarse = (allCoarse && ((rem == 0) || (nodeFineTuple[dim] == lFineNodesPerDir_(dim) - 1)));
333  }
334  update += (allCoarse ? 1 : numInterpolationPoints_);
335  }
336  } // computeGraphRowPtrFunctor::operator()
337 
338  template <class LocalOrdinal, class GlobalOrdinal, class Node>
341  const int numDimensions,
342  const LO numGhostedNodes,
343  const LO dofsPerNode,
344  const int numInterpolationPoints,
345  constIntTupleView coarseRate,
346  constIntTupleView endRate,
347  constLOTupleView lFineNodesPerDir,
348  constLOTupleView ghostedNodesPerDir,
349  non_const_row_map_type rowPtr,
350  entries_type colIndex) :
351  geoData_(*geoData), numDimensions_(numDimensions),
352  numGhostedNodes_(numGhostedNodes),
353  dofsPerNode_(dofsPerNode), numInterpolationPoints_(numInterpolationPoints),
354  coarseRate_(coarseRate), endRate_(endRate), lFineNodesPerDir_(lFineNodesPerDir),
355  ghostedNodesPerDir_(ghostedNodesPerDir), rowPtr_(rowPtr), colIndex_(colIndex) {
356 
357  } // computeGraphDataLinearFunctor()
358 
359  template <class LocalOrdinal, class GlobalOrdinal, class Node>
360  KOKKOS_INLINE_FUNCTION
363  LO nodeFineTuple[3] = {0, 0, 0};
364  LO nodeCoarseTuple[3] = {0, 0, 0};
365 
366  // Compute coarse ID associated with fine LID
367  geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple);
368 
369  LO coarseNodeCoarseLID;
370  bool allCoarse = false;
371  for(int dim = 0; dim < 3; ++dim) {
372  nodeCoarseTuple[dim] = nodeFineTuple[dim] / coarseRate_(dim);
373  }
374  if(rowPtr_(nodeIdx + 1) == rowPtr_(nodeIdx) + 1) {allCoarse = true;}
375 
376  geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2],
377  coarseNodeCoarseLID);
378 
379  if(allCoarse) {
380  // Fine node lies on Coarse node, easy case, we only need the LID of the coarse node.
381  for(LO dof = 0; dof < dofsPerNode_; ++dof) {
382  colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)) = coarseNodeCoarseLID*dofsPerNode_ + dof;
383  }
384  } else {
385 
386  for(int dim = 0; dim < numDimensions_; ++dim) {
387  if(nodeCoarseTuple[dim] == ghostedNodesPerDir_(dim) - 1) { --nodeCoarseTuple[dim]; }
388  }
389  // Compute Coarse Node LID
390  // Note lbv 10-06-2018: it is likely benefitial to remove the two if statments and somehow
391  // find out the number of dimensions before calling the opertor() of the functor.
392  for(LO dof = 0; dof < dofsPerNode_; ++dof) {
393  geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+0));
394  geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0]+1, nodeCoarseTuple[1], nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+1));
395  if(numDimensions_ > 1) {
396  geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0], nodeCoarseTuple[1]+1, nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+2));
397  geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0]+1, nodeCoarseTuple[1]+1, nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+3));
398  if(numDimensions_ > 2) {
399  geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+4));
400  geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0]+1, nodeCoarseTuple[1], nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+5));
401  geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1]+1, nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+6));
402  geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0]+1, nodeCoarseTuple[1]+1, nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+7));
403  }
404  }
405  }
406  }
407  } // computeGraphDataLinearFunctor::operator()
408 
409 } // end namespace
410 
411 
412 #endif /* MUELU_AGGREGATIONSTRUCTUREDALGORITHM_DEF_HPP_ */
typename local_graph_type::row_map_type::non_const_type non_const_row_map_type
typename Kokkos::View< const int[3], memory_space > constIntTupleView
basic_FancyOStream & setShowProcRank(const bool showProcRank)
decltype(std::declval< LOVector >().template getLocalView< memory_space >()) LOVectorView
computeGraphRowPtrFunctor(RCP< IndexManager_kokkos > geoData, const LO dofsPerNode, const int numInterpolationPoints, const LO numLocalRows, constIntTupleView coarseRate, constLOTupleView lFineNodesPerDir, non_const_row_map_type rowPtr)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
void BuildAggregates(const Teuchos::ParameterList &params, const LWGraph_kokkos &graph, Aggregates_kokkos &aggregates, Kokkos::View< unsigned *, memory_space > &aggStat, LO &numNonAggregatedNodes) const
Build aggregates object.
typename Kokkos::View< const LO[3], memory_space > constLOTupleView
void parallel_reduce(const std::string &label, const PolicyType &policy, const FunctorType &functor, ReturnType &return_value, typename std::enable_if< Kokkos::Impl::is_execution_policy< PolicyType >::value >::type *=nullptr)
basic_FancyOStream & setShowAllFrontMatter(const bool showAllFrontMatter)
KOKKOS_INLINE_FUNCTION void operator()(const LO rowIdx, GO &update, const bool final) const
computeGraphDataLinearFunctor(RCP< IndexManager_kokkos > geoData, const int numDimensions, const LO numGhostedNodes, const LO dofsPerNode, const int numInterpolationPoints, constIntTupleView coarseRate, constIntTupleView endRate, constLOTupleView lFineNodesPerDir, constLOTupleView ghostedNodesPerDir, non_const_row_map_type rowPtr, entries_type colIndex)
fillAggregatesFunctor(RCP< IndexManager_kokkos > geoData, const int myRank, Kokkos::View< unsigned *, memory_space > aggStat, LOVectorView vertex2AggID, LOVectorView procWinner)
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename std::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=nullptr)
Timer to be used in non-factories.
KOKKOS_INLINE_FUNCTION void operator()(const LO nodeIdx, LO &lNumAggregatedNodes) const
computeGraphDataConstantFunctor(RCP< IndexManager_kokkos > geoData, const LO numGhostedNodes, const LO dofsPerNode, constIntTupleView coarseRate, constIntTupleView endRate, constLOTupleView lFineNodesPerDir, non_const_row_map_type rowPtr, entries_type colIndex)
void BuildGraph(const LWGraph_kokkos &graph, RCP< IndexManager_kokkos > &geoData, const LO dofsPerNode, RCP< CrsGraph > &myGraph) const
Build a CrsGraph instead of aggregates.