46 #ifndef MUELU_AGGREGATIONSTRUCTUREDALGORITHM_KOKKOS_DEF_HPP
47 #define MUELU_AGGREGATIONSTRUCTUREDALGORITHM_KOKKOS_DEF_HPP
50 #include <Teuchos_Comm.hpp>
51 #include <Teuchos_CommHelpers.hpp>
61 #include "MueLu_LWGraph_kokkos.hpp"
62 #include "MueLu_Aggregates_kokkos.hpp"
63 #include "MueLu_IndexManager_kokkos.hpp"
68 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
71 Aggregates_kokkos& aggregates,
73 LO& numNonAggregatedNodes)
const {
74 Monitor m(*
this,
"BuildAggregates");
77 if(
const char* dbg = std::getenv(
"MUELU_STRUCTUREDALGORITHM_DEBUG")) {
78 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
85 const LO numLocalFineNodes= geoData->getNumLocalFineNodes();
86 const LO numCoarseNodes = geoData->getNumCoarseNodes();
87 LOVectorView vertex2AggId = aggregates.GetVertex2AggId()->template getLocalView<memory_space>();
88 LOVectorView procWinner = aggregates.GetProcWinner() ->template getLocalView<memory_space>();
90 *out <<
"Loop over fine nodes and assign them to an aggregate and a rank" << std::endl;
91 LO numAggregatedNodes;
93 graph.GetComm()->getRank(),
102 *out <<
"numCoarseNodes= " << numCoarseNodes
103 <<
", numAggregatedNodes= " << numAggregatedNodes << std::endl;
104 numNonAggregatedNodes = numNonAggregatedNodes - numAggregatedNodes;
109 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
113 Monitor m(*
this,
"BuildGraphP");
116 if(
const char* dbg = std::getenv(
"MUELU_STRUCTUREDALGORITHM_DEBUG")) {
117 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
126 int numInterpolationPoints = 0;
127 if(geoData->getInterpolationOrder() == 0) {
128 numInterpolationPoints = 1;
129 }
else if(geoData->getInterpolationOrder() == 1) {
131 numInterpolationPoints = 1 << geoData->getNumDimensions();
133 *out <<
"numInterpolationPoints=" << numInterpolationPoints << std::endl;
135 const LO numLocalFineNodes = geoData->getNumLocalFineNodes();
136 const LO numCoarseNodes = geoData->getNumCoarseNodes();
137 const LO numNnzEntries = dofsPerNode*(numCoarseNodes + numInterpolationPoints
138 *(numLocalFineNodes - numCoarseNodes));
141 entries_type colIndex(
"Prolongator graph, colIndices", numNnzEntries);
143 *out <<
"Compute prolongatorGraph data" << std::endl;
144 if(geoData->getInterpolationOrder() == 0) {
148 geoData->getCoarseningRates(),
149 geoData->getCoarseningEndRates(),
150 geoData->getLocalFineNodesPerDir(),
156 }
else if(geoData->getInterpolationOrder() == 1) {
163 numInterpolationPoints,
165 geoData->getCoarseningRates(),
166 geoData->getLocalFineNodesPerDir(),
168 Kokkos::parallel_scan(
"Structured Aggregation: compute rowPtr for prolongator graph",
173 geoData->getNumDimensions(),
176 numInterpolationPoints,
177 geoData->getCoarseningRates(),
178 geoData->getCoarseningEndRates(),
179 geoData->getLocalFineNodesPerDir(),
180 geoData->getCoarseNodesPerDir(),
188 local_graph_type myLocalGraph(colIndex, rowPtr);
192 *out <<
"Compute domain and column maps of the CrsGraph" << std::endl;
193 colMap = MapFactory::Build(graph.GetDomainMap()->lib(),
196 graph.GetDomainMap()->getIndexBase(),
197 graph.GetDomainMap()->getComm());
200 myGraph = CrsGraphFactory::Build(myLocalGraph, graph.GetDomainMap(), colMap,
201 colMap, graph.GetDomainMap());
206 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
211 LOVectorView vertex2AggID,
212 LOVectorView procWinner) :
213 geoData_(*geoData), myRank_(myRank), aggStat_(aggStat),
214 vertex2AggID_(vertex2AggID), procWinner_(procWinner) {}
216 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
217 KOKKOS_INLINE_FUNCTION
222 LO coarseNodeCoarseLID;
223 LO nodeFineTuple[3], coarseIdx[3];
224 auto coarseRate = geoData_.getCoarseningRates();
225 auto endRate = geoData_.getCoarseningEndRates();
226 auto lFineNodesPerDir = geoData_.getLocalFineNodesPerDir();
228 geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple);
230 for(
int dim = 0; dim < 3; ++dim) {
231 coarseIdx[dim] = nodeFineTuple[dim] / coarseRate(dim);
232 rem = nodeFineTuple[dim] % coarseRate(dim);
233 rate = (nodeFineTuple[dim] < lFineNodesPerDir(dim) - endRate(dim)) ? coarseRate(dim) : endRate(dim);
234 if(rem > (rate / 2)) {++coarseIdx[dim];}
237 geoData_.getCoarseTuple2CoarseLID(coarseIdx[0], coarseIdx[1], coarseIdx[2],
238 coarseNodeCoarseLID);
240 vertex2AggID_(nodeIdx, 0) = coarseNodeCoarseLID;
241 procWinner_(nodeIdx, 0) = myRank_;
243 ++lNumAggregatedNodes;
247 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
251 const LO NumGhostedNodes,
252 const LO dofsPerNode,
258 numGhostedNodes_(NumGhostedNodes), dofsPerNode_(dofsPerNode),
259 coarseRate_(coarseRate), endRate_(endRate), lFineNodesPerDir_(lFineNodesPerDir),
260 rowPtr_(rowPtr), colIndex_(colIndex) {
264 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
265 KOKKOS_INLINE_FUNCTION
268 LO nodeFineTuple[3] = {0, 0, 0};
269 LO nodeCoarseTuple[3] = {0, 0, 0};
272 geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple);
276 LO rem, rate, coarseNodeCoarseLID;
277 for(
int dim = 0; dim < 3; ++dim) {
278 nodeCoarseTuple[dim] = nodeFineTuple[dim] / coarseRate_(dim);
279 rem = nodeFineTuple[dim] % coarseRate_(dim);
280 if( nodeFineTuple[dim] < (lFineNodesPerDir_(dim) - endRate_(dim)) ) {
281 rate = coarseRate_(dim);
283 rate = endRate_(dim);
285 if(rem > (rate / 2)) {++nodeCoarseTuple[dim];}
289 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2],
290 coarseNodeCoarseLID);
293 for(
LO dof = 0; dof < dofsPerNode_; ++dof) {
294 rowPtr_(nodeIdx*dofsPerNode_ + dof + 1) = nodeIdx*dofsPerNode_ + dof + 1;
295 colIndex_(nodeIdx*dofsPerNode_ + dof) = coarseNodeCoarseLID*dofsPerNode_ + dof;
300 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
303 const LO dofsPerNode,
304 const int numInterpolationPoints,
305 const LO numLocalRows,
309 geoData_(*geoData), dofsPerNode_(dofsPerNode),
310 numInterpolationPoints_(numInterpolationPoints), numLocalRows_(numLocalRows),
311 coarseRate_(coarseRate), lFineNodesPerDir_(lFineNodesPerDir), rowPtr_(rowPtr) {}
313 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
314 KOKKOS_INLINE_FUNCTION
323 rowPtr_(rowIdx) = update;
325 if (rowIdx < numLocalRows_) {
326 LO nodeIdx = rowIdx / dofsPerNode_;
327 bool allCoarse =
true;
328 LO nodeFineTuple[3] = {0, 0, 0};
329 geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple);
330 for(
int dim = 0; dim < 3; ++dim) {
331 const LO rem = nodeFineTuple[dim] % coarseRate_(dim);
334 allCoarse = (allCoarse && ((rem == 0) || (nodeFineTuple[dim] == lFineNodesPerDir_(dim) - 1)));
336 update += (allCoarse ? 1 : numInterpolationPoints_);
340 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
343 const int numDimensions,
344 const LO numGhostedNodes,
345 const LO dofsPerNode,
346 const int numInterpolationPoints,
353 geoData_(*geoData), numDimensions_(numDimensions),
354 numGhostedNodes_(numGhostedNodes),
355 dofsPerNode_(dofsPerNode), numInterpolationPoints_(numInterpolationPoints),
356 coarseRate_(coarseRate), endRate_(endRate), lFineNodesPerDir_(lFineNodesPerDir),
357 ghostedNodesPerDir_(ghostedNodesPerDir), rowPtr_(rowPtr), colIndex_(colIndex) {
361 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
362 KOKKOS_INLINE_FUNCTION
365 LO nodeFineTuple[3] = {0, 0, 0};
366 LO nodeCoarseTuple[3] = {0, 0, 0};
369 geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple);
371 LO coarseNodeCoarseLID;
372 bool allCoarse =
false;
373 for(
int dim = 0; dim < 3; ++dim) {
374 nodeCoarseTuple[dim] = nodeFineTuple[dim] / coarseRate_(dim);
376 if(rowPtr_(nodeIdx + 1) == rowPtr_(nodeIdx) + 1) {allCoarse =
true;}
378 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2],
379 coarseNodeCoarseLID);
383 for(
LO dof = 0; dof < dofsPerNode_; ++dof) {
384 colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)) = coarseNodeCoarseLID*dofsPerNode_ + dof;
388 for(
int dim = 0; dim < numDimensions_; ++dim) {
389 if(nodeCoarseTuple[dim] == ghostedNodesPerDir_(dim) - 1) { --nodeCoarseTuple[dim]; }
394 for(
LO dof = 0; dof < dofsPerNode_; ++dof) {
395 geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+0));
396 geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0]+1, nodeCoarseTuple[1], nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+1));
397 if(numDimensions_ > 1) {
398 geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0], nodeCoarseTuple[1]+1, nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+2));
399 geoData_.getCoarseTuple2CoarseLID( nodeCoarseTuple[0]+1, nodeCoarseTuple[1]+1, nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+3));
400 if(numDimensions_ > 2) {
401 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+4));
402 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0]+1, nodeCoarseTuple[1], nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+5));
403 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1]+1, nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+6));
404 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0]+1, nodeCoarseTuple[1]+1, nodeCoarseTuple[2]+1, colIndex_(rowPtr_(nodeIdx*dofsPerNode_ + dof)+7));
KOKKOS_INLINE_FUNCTION void operator()(const LO nodeIdx) const
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=0)
basic_FancyOStream & setShowProcRank(const bool showProcRank)
void parallel_reduce(const std::string &label, const PolicyType &policy, const FunctorType &functor, ReturnType &return_value, typename Impl::enable_if< Kokkos::Impl::is_execution_policy< PolicyType >::value >::type *=0)
local_graph_type::entries_type entries_type
KOKKOS_INLINE_FUNCTION void operator()(const LO nodeIdx) const
computeGraphRowPtrFunctor(RCP< IndexManager_kokkos > geoData, const LO dofsPerNode, const int numInterpolationPoints, const LO numLocalRows, constIntTupleView coarseRate, constLOTupleView lFineNodesPerDir, non_const_row_map_type rowPtr)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
local_graph_type::row_map_type::non_const_type non_const_row_map_type
basic_FancyOStream & setShowAllFrontMatter(const bool showAllFrontMatter)
KOKKOS_INLINE_FUNCTION void operator()(const LO rowIdx, GO &update, const bool final) const
computeGraphDataLinearFunctor(RCP< IndexManager_kokkos > geoData, const int numDimensions, const LO numGhostedNodes, const LO dofsPerNode, const int numInterpolationPoints, constIntTupleView coarseRate, constIntTupleView endRate, constLOTupleView lFineNodesPerDir, constLOTupleView ghostedNodesPerDir, non_const_row_map_type rowPtr, entries_type colIndex)
fillAggregatesFunctor(RCP< IndexManager_kokkos > geoData, const int myRank, Kokkos::View< unsigned *, memory_space > aggStat, LOVectorView vertex2AggID, LOVectorView procWinner)
Timer to be used in non-factories.
KOKKOS_INLINE_FUNCTION void operator()(const LO nodeIdx, LO &lNumAggregatedNodes) const
void BuildAggregates(const Teuchos::ParameterList &, const LWGraph_kokkos &, Aggregates_kokkos &, std::vector< unsigned > &, LO &) const
Local aggregation.
computeGraphDataConstantFunctor(RCP< IndexManager_kokkos > geoData, const LO numGhostedNodes, const LO dofsPerNode, constIntTupleView coarseRate, constIntTupleView endRate, constLOTupleView lFineNodesPerDir, non_const_row_map_type rowPtr, entries_type colIndex)
LWGraph_kokkos::local_graph_type local_graph_type
void BuildGraph(const LWGraph_kokkos &graph, RCP< IndexManager_kokkos > &geoData, const LO dofsPerNode, RCP< CrsGraph > &myGraph) const
Local aggregation.