46 #ifndef MUELU_AGGREGATIONSTRUCTUREDALGORITHM_KOKKOS_DEF_HPP
47 #define MUELU_AGGREGATIONSTRUCTUREDALGORITHM_KOKKOS_DEF_HPP
50 #include <Teuchos_Comm.hpp>
51 #include <Teuchos_CommHelpers.hpp>
53 #include <Xpetra_MapFactory.hpp>
54 #include <Xpetra_Map.hpp>
55 #include <Xpetra_CrsGraphFactory.hpp>
56 #include <Xpetra_CrsGraph.hpp>
61 #include "MueLu_LWGraph_kokkos.hpp"
62 #include "MueLu_Aggregates_kokkos.hpp"
63 #include "MueLu_IndexManager_kokkos.hpp"
68 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
71 Aggregates_kokkos& aggregates,
73 LO& numNonAggregatedNodes)
const {
74 Monitor m(*
this,
"BuildAggregates");
77 if(
const char* dbg = std::getenv(
"MUELU_STRUCTUREDALGORITHM_DEBUG")) {
78 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
85 const LO numLocalFineNodes= geoData->getNumLocalFineNodes();
86 const LO numCoarseNodes = geoData->getNumCoarseNodes();
87 LOVectorView vertex2AggId = aggregates.GetVertex2AggId()->template getLocalView<memory_space>();
88 LOVectorView procWinner = aggregates.GetProcWinner() ->template getLocalView<memory_space>();
90 *out <<
"Loop over fine nodes and assign them to an aggregate and a rank" << std::endl;
91 LO numAggregatedNodes;
93 graph.GetComm()->getRank(),
102 *out <<
"numCoarseNodes= " << numCoarseNodes
103 <<
", numAggregatedNodes= " << numAggregatedNodes << std::endl;
104 numNonAggregatedNodes = numNonAggregatedNodes - numAggregatedNodes;
109 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
113 Monitor m(*
this,
"BuildGraphP");
116 if(
const char* dbg = std::getenv(
"MUELU_STRUCTUREDALGORITHM_DEBUG")) {
117 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
124 int numInterpolationPoints = 0;
125 if(geoData->getInterpolationOrder() == 0) {
126 numInterpolationPoints = 1;
127 }
else if(geoData->getInterpolationOrder() == 1) {
129 numInterpolationPoints = 1 << geoData->getNumDimensions();
131 *out <<
"numInterpolationPoints=" << numInterpolationPoints << std::endl;
133 const LO numLocalFineNodes = geoData->getNumLocalFineNodes();
134 const LO numCoarseNodes = geoData->getNumCoarseNodes();
135 const LO numNnzEntries = dofsPerNode*(numCoarseNodes + numInterpolationPoints
136 *(numLocalFineNodes - numCoarseNodes));
139 entries_type colIndex(
"Prolongator graph, colIndices", numNnzEntries);
141 *out <<
"Compute prolongatorGraph data" << std::endl;
142 if(geoData->getInterpolationOrder() == 0) {
146 geoData->getCoarseningRates(),
147 geoData->getCoarseningEndRates(),
148 geoData->getLocalFineNodesPerDir(),
154 }
else if(geoData->getInterpolationOrder() == 1) {
161 numInterpolationPoints,
163 geoData->getCoarseningRates(),
164 geoData->getLocalFineNodesPerDir(),
166 Kokkos::parallel_scan(
"Structured Aggregation: compute rowPtr for prolongator graph",
171 geoData->getNumDimensions(),
174 numInterpolationPoints,
175 geoData->getCoarseningRates(),
176 geoData->getCoarseningEndRates(),
177 geoData->getLocalFineNodesPerDir(),
178 geoData->getCoarseNodesPerDir(),
190 *out <<
"Compute domain and column maps of the CrsGraph" << std::endl;
191 colMap = MapFactory::Build(graph.GetDomainMap()->lib(),
194 graph.GetDomainMap()->getIndexBase(),
195 graph.GetDomainMap()->getComm());
198 myGraph = CrsGraphFactory::Build(myLocalGraph, graph.GetDomainMap(), colMap,
199 colMap, graph.GetDomainMap());
204 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
211 geoData_(*geoData), myRank_(myRank), aggStat_(aggStat),
212 vertex2AggID_(vertex2AggID), procWinner_(procWinner) {}
214 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
215 KOKKOS_INLINE_FUNCTION
220 LO coarseNodeCoarseLID;
221 LO nodeFineTuple[3], coarseIdx[3];
222 auto coarseRate = geoData_.getCoarseningRates();
223 auto endRate = geoData_.getCoarseningEndRates();
224 auto lFineNodesPerDir = geoData_.getLocalFineNodesPerDir();
226 geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple);
228 for(
int dim = 0; dim < 3; ++dim) {
229 coarseIdx[dim] = nodeFineTuple[dim] / coarseRate(dim);
230 rem = nodeFineTuple[dim] % coarseRate(dim);
231 rate = (nodeFineTuple[dim] < lFineNodesPerDir(dim) - endRate(dim)) ? coarseRate(dim) : endRate(dim);
232 if(rem > (rate / 2)) {++coarseIdx[dim];}
235 geoData_.getCoarseTuple2CoarseLID(coarseIdx[0], coarseIdx[1], coarseIdx[2],
236 coarseNodeCoarseLID);
238 vertex2AggID_(nodeIdx, 0) = coarseNodeCoarseLID;
239 procWinner_(nodeIdx, 0) = myRank_;
241 ++lNumAggregatedNodes;
245 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
249 const LO NumGhostedNodes,
250 const LO dofsPerNode,
256 numGhostedNodes_(NumGhostedNodes), dofsPerNode_(dofsPerNode),
257 coarseRate_(coarseRate), endRate_(endRate), lFineNodesPerDir_(lFineNodesPerDir),
258 rowPtr_(rowPtr), colIndex_(colIndex) {
262 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
263 KOKKOS_INLINE_FUNCTION
266 LO nodeFineTuple[3] = {0, 0, 0};
267 LO nodeCoarseTuple[3] = {0, 0, 0};
270 geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple);
274 LO rem, rate, coarseNodeCoarseLID;
275 for(
int dim = 0; dim < 3; ++dim) {
276 nodeCoarseTuple[dim] = nodeFineTuple[dim] / coarseRate_(dim);
277 rem = nodeFineTuple[dim] % coarseRate_(dim);
278 if( nodeFineTuple[dim] < (lFineNodesPerDir_(dim) - endRate_(dim)) ) {
279 rate = coarseRate_(dim);
281 rate = endRate_(dim);
283 if(rem > (rate / 2)) {++nodeCoarseTuple[dim];}
287 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2],
288 coarseNodeCoarseLID);
291 for(LO dof = 0; dof < dofsPerNode_; ++dof) {
292 rowPtr_(nodeIdx*dofsPerNode_ + dof + 1) = nodeIdx*dofsPerNode_ + dof + 1;
293 colIndex_(nodeIdx*dofsPerNode_ + dof) = coarseNodeCoarseLID*dofsPerNode_ + dof;
298 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
301 const LO dofsPerNode,
302 const int numInterpolationPoints,
303 const LO numLocalRows,
307 geoData_(*geoData), dofsPerNode_(dofsPerNode),
308 numInterpolationPoints_(numInterpolationPoints), numLocalRows_(numLocalRows),
309 coarseRate_(coarseRate), lFineNodesPerDir_(lFineNodesPerDir), rowPtr_(rowPtr) {}
311 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
312 KOKKOS_INLINE_FUNCTION
321 rowPtr_(rowIdx) = update;
323 if (rowIdx < numLocalRows_) {
324 LO nodeIdx = rowIdx / dofsPerNode_;
325 bool allCoarse =
true;
326 LO nodeFineTuple[3] = {0, 0, 0};
327 geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple);
328 for(
int dim = 0; dim < 3; ++dim) {
329 const LO rem = nodeFineTuple[dim] % coarseRate_(dim);
332 allCoarse = (allCoarse && ((rem == 0) || (nodeFineTuple[dim] == lFineNodesPerDir_(dim) - 1)));
334 update += (allCoarse ? 1 : numInterpolationPoints_);
338 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
341 const int numDimensions,
342 const LO numGhostedNodes,
343 const LO dofsPerNode,
344 const int numInterpolationPoints,
351 geoData_(*geoData), numDimensions_(numDimensions),
352 numGhostedNodes_(numGhostedNodes),
353 dofsPerNode_(dofsPerNode), numInterpolationPoints_(numInterpolationPoints),
354 coarseRate_(coarseRate), endRate_(endRate), lFineNodesPerDir_(lFineNodesPerDir),
355 ghostedNodesPerDir_(ghostedNodesPerDir), rowPtr_(rowPtr), colIndex_(colIndex) {
359 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
360 KOKKOS_INLINE_FUNCTION
363 LO nodeFineTuple[3] = {0, 0, 0};
364 LO nodeCoarseTuple[3] = {0, 0, 0};
367 geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple);
369 LO coarseNodeCoarseLID;
370 bool allCoarse =
false;
371 for(
int dim = 0; dim < 3; ++dim) {
372 nodeCoarseTuple[dim] = nodeFineTuple[dim] / coarseRate_(dim);
374 if(rowPtr_(nodeIdx + 1) == rowPtr_(nodeIdx) + 1) {allCoarse =
true;}
376 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2],
377 coarseNodeCoarseLID);
381 for(LO dof = 0; dof < dofsPerNode_; ++dof) {
382 colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)) = coarseNodeCoarseLID*dofsPerNode_ + dof;
386 for(
int dim = 0; dim < numDimensions_; ++dim) {
387 if(nodeCoarseTuple[dim] == ghostedNodesPerDir_(dim) - 1) { --nodeCoarseTuple[dim]; }
392 for(LO dof = 0; dof < dofsPerNode_; ++dof) {
393 geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+0));
394 geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0]+1, nodeCoarseTuple[1], nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+1));
395 if(numDimensions_ > 1) {
396 geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0], nodeCoarseTuple[1]+1, nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+2));
397 geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0]+1, nodeCoarseTuple[1]+1, nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+3));
398 if(numDimensions_ > 2) {
399 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+4));
400 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0]+1, nodeCoarseTuple[1], nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+5));
401 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1]+1, nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+6));
402 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0]+1, nodeCoarseTuple[1]+1, nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+7));
typename local_graph_type::row_map_type::non_const_type non_const_row_map_type
KOKKOS_INLINE_FUNCTION void operator()(const LO nodeIdx) const
typename Kokkos::View< const int[3], memory_space > constIntTupleView
typename LWGraph_kokkos::local_graph_type local_graph_type
basic_FancyOStream & setShowProcRank(const bool showProcRank)
KOKKOS_INLINE_FUNCTION void operator()(const LO nodeIdx) const
decltype(std::declval< LOVector >().template getLocalView< memory_space >()) LOVectorView
computeGraphRowPtrFunctor(RCP< IndexManager_kokkos > geoData, const LO dofsPerNode, const int numInterpolationPoints, const LO numLocalRows, constIntTupleView coarseRate, constLOTupleView lFineNodesPerDir, non_const_row_map_type rowPtr)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
void BuildAggregates(const Teuchos::ParameterList ¶ms, const LWGraph_kokkos &graph, Aggregates_kokkos &aggregates, Kokkos::View< unsigned *, memory_space > &aggStat, LO &numNonAggregatedNodes) const
Build aggregates object.
typename Kokkos::View< const LO[3], memory_space > constLOTupleView
void parallel_reduce(const std::string &label, const PolicyType &policy, const FunctorType &functor, ReturnType &return_value, typename std::enable_if< Kokkos::Impl::is_execution_policy< PolicyType >::value >::type *=nullptr)
basic_FancyOStream & setShowAllFrontMatter(const bool showAllFrontMatter)
KOKKOS_INLINE_FUNCTION void operator()(const LO rowIdx, GO &update, const bool final) const
computeGraphDataLinearFunctor(RCP< IndexManager_kokkos > geoData, const int numDimensions, const LO numGhostedNodes, const LO dofsPerNode, const int numInterpolationPoints, constIntTupleView coarseRate, constIntTupleView endRate, constLOTupleView lFineNodesPerDir, constLOTupleView ghostedNodesPerDir, non_const_row_map_type rowPtr, entries_type colIndex)
fillAggregatesFunctor(RCP< IndexManager_kokkos > geoData, const int myRank, Kokkos::View< unsigned *, memory_space > aggStat, LOVectorView vertex2AggID, LOVectorView procWinner)
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename std::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=nullptr)
Timer to be used in non-factories.
KOKKOS_INLINE_FUNCTION void operator()(const LO nodeIdx, LO &lNumAggregatedNodes) const
computeGraphDataConstantFunctor(RCP< IndexManager_kokkos > geoData, const LO numGhostedNodes, const LO dofsPerNode, constIntTupleView coarseRate, constIntTupleView endRate, constLOTupleView lFineNodesPerDir, non_const_row_map_type rowPtr, entries_type colIndex)
void BuildGraph(const LWGraph_kokkos &graph, RCP< IndexManager_kokkos > &geoData, const LO dofsPerNode, RCP< CrsGraph > &myGraph) const
Build a CrsGraph instead of aggregates.
typename local_graph_type::entries_type entries_type