46 #ifndef MUELU_AGGREGATIONSTRUCTUREDALGORITHM_KOKKOS_DEF_HPP
47 #define MUELU_AGGREGATIONSTRUCTUREDALGORITHM_KOKKOS_DEF_HPP
49 #include <Teuchos_Comm.hpp>
50 #include <Teuchos_CommHelpers.hpp>
52 #include <Xpetra_MapFactory.hpp>
53 #include <Xpetra_Map.hpp>
60 #include "MueLu_LWGraph_kokkos.hpp"
61 #include "MueLu_Aggregates.hpp"
62 #include "MueLu_IndexManager_kokkos.hpp"
67 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
71 Kokkos::View<unsigned*, device_type>& aggStat,
72 LO& numNonAggregatedNodes)
const {
73 Monitor m(*
this,
"BuildAggregates");
76 if (
const char* dbg = std::getenv(
"MUELU_STRUCTUREDALGORITHM_DEBUG")) {
77 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
89 *out <<
"Loop over fine nodes and assign them to an aggregate and a rank" << std::endl;
90 LO numAggregatedNodes;
96 Kokkos::parallel_reduce(
"StructuredAggregation: fill aggregates data",
97 Kokkos::RangePolicy<execution_space>(0, numLocalFineNodes),
101 *out <<
"numCoarseNodes= " << numCoarseNodes
102 <<
", numAggregatedNodes= " << numAggregatedNodes << std::endl;
103 numNonAggregatedNodes = numNonAggregatedNodes - numAggregatedNodes;
107 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
111 Monitor m(*
this,
"BuildGraphP");
114 if (
const char* dbg = std::getenv(
"MUELU_STRUCTUREDALGORITHM_DEBUG")) {
115 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
122 int numInterpolationPoints = 0;
124 numInterpolationPoints = 1;
129 *out <<
"numInterpolationPoints=" << numInterpolationPoints << std::endl;
133 const LO numNnzEntries = dofsPerNode * (numCoarseNodes + numInterpolationPoints * (numLocalFineNodes - numCoarseNodes));
136 entries_type colIndex(
"Prolongator graph, colIndices", numNnzEntries);
138 *out <<
"Compute prolongatorGraph data" << std::endl;
148 Kokkos::parallel_for(
"Structured Aggregation: compute loca graph data",
149 Kokkos::RangePolicy<execution_space>(0, numLocalFineNodes),
158 numInterpolationPoints,
163 Kokkos::parallel_scan(
"Structured Aggregation: compute rowPtr for prolongator graph",
164 Kokkos::RangePolicy<execution_space>(0, numLocalFineNodes + 1),
171 numInterpolationPoints,
178 Kokkos::parallel_for(
"Structured Aggregation: compute loca graph data",
179 Kokkos::RangePolicy<execution_space>(0, numLocalFineNodes),
187 *out <<
"Compute domain and column maps of the CrsGraph" << std::endl;
195 myGraph = CrsGraphFactory::Build(myLocalGraph, graph.
GetDomainMap(), colMap,
200 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
204 Kokkos::View<unsigned*, device_type> aggStat,
210 , vertex2AggID_(vertex2AggID)
211 , procWinner_(procWinner) {}
213 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
218 LO coarseNodeCoarseLID;
219 LO nodeFineTuple[3], coarseIdx[3];
220 auto coarseRate = geoData_.getCoarseningRates();
221 auto endRate = geoData_.getCoarseningEndRates();
222 auto lFineNodesPerDir = geoData_.getLocalFineNodesPerDir();
224 geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple);
226 for (
int dim = 0; dim < 3; ++dim) {
227 coarseIdx[dim] = nodeFineTuple[dim] / coarseRate(dim);
228 rem = nodeFineTuple[dim] % coarseRate(dim);
229 rate = (nodeFineTuple[dim] < lFineNodesPerDir(dim) - endRate(dim)) ? coarseRate(dim) : endRate(dim);
230 if (rem > (rate / 2)) {
235 geoData_.getCoarseTuple2CoarseLID(coarseIdx[0], coarseIdx[1], coarseIdx[2],
236 coarseNodeCoarseLID);
238 vertex2AggID_(nodeIdx, 0) = coarseNodeCoarseLID;
239 procWinner_(nodeIdx, 0) = myRank_;
241 ++lNumAggregatedNodes;
245 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
249 const LO NumGhostedNodes,
250 const LO dofsPerNode,
257 , numGhostedNodes_(NumGhostedNodes)
258 , dofsPerNode_(dofsPerNode)
259 , coarseRate_(coarseRate)
261 , lFineNodesPerDir_(lFineNodesPerDir)
263 , colIndex_(colIndex) {
266 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
269 LO nodeFineTuple[3] = {0, 0, 0};
270 LO nodeCoarseTuple[3] = {0, 0, 0};
273 geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple);
277 LO rem, rate, coarseNodeCoarseLID;
278 for (
int dim = 0; dim < 3; ++dim) {
279 nodeCoarseTuple[dim] = nodeFineTuple[dim] / coarseRate_(dim);
280 rem = nodeFineTuple[dim] % coarseRate_(dim);
281 if (nodeFineTuple[dim] < (lFineNodesPerDir_(dim) - endRate_(dim))) {
282 rate = coarseRate_(dim);
284 rate = endRate_(dim);
286 if (rem > (rate / 2)) {
287 ++nodeCoarseTuple[dim];
292 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2],
293 coarseNodeCoarseLID);
296 for (
LO dof = 0; dof < dofsPerNode_; ++dof) {
297 rowPtr_(nodeIdx * dofsPerNode_ + dof + 1) = nodeIdx * dofsPerNode_ + dof + 1;
298 colIndex_(nodeIdx * dofsPerNode_ + dof) = coarseNodeCoarseLID * dofsPerNode_ + dof;
303 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
306 const LO dofsPerNode,
307 const int numInterpolationPoints,
308 const LO numLocalRows,
313 , dofsPerNode_(dofsPerNode)
314 , numInterpolationPoints_(numInterpolationPoints)
315 , numLocalRows_(numLocalRows)
316 , coarseRate_(coarseRate)
317 , lFineNodesPerDir_(lFineNodesPerDir)
320 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
329 rowPtr_(rowIdx) = update;
331 if (rowIdx < numLocalRows_) {
332 LO nodeIdx = rowIdx / dofsPerNode_;
333 bool allCoarse =
true;
334 LO nodeFineTuple[3] = {0, 0, 0};
335 geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple);
336 for (
int dim = 0; dim < 3; ++dim) {
337 const LO rem = nodeFineTuple[dim] % coarseRate_(dim);
340 allCoarse = (allCoarse && ((rem == 0) || (nodeFineTuple[dim] == lFineNodesPerDir_(dim) - 1)));
342 update += (allCoarse ? 1 : numInterpolationPoints_);
346 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
349 const int numDimensions,
350 const LO numGhostedNodes,
351 const LO dofsPerNode,
352 const int numInterpolationPoints,
360 , numDimensions_(numDimensions)
361 , numGhostedNodes_(numGhostedNodes)
362 , dofsPerNode_(dofsPerNode)
363 , numInterpolationPoints_(numInterpolationPoints)
364 , coarseRate_(coarseRate)
366 , lFineNodesPerDir_(lFineNodesPerDir)
367 , ghostedNodesPerDir_(ghostedNodesPerDir)
369 , colIndex_(colIndex) {
372 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
375 LO nodeFineTuple[3] = {0, 0, 0};
376 LO nodeCoarseTuple[3] = {0, 0, 0};
379 geoData_.getFineLID2FineTuple(nodeIdx, nodeFineTuple);
381 LO coarseNodeCoarseLID;
382 bool allCoarse =
false;
383 for (
int dim = 0; dim < 3; ++dim) {
384 nodeCoarseTuple[dim] = nodeFineTuple[dim] / coarseRate_(dim);
386 if (rowPtr_(nodeIdx + 1) == rowPtr_(nodeIdx) + 1) {
390 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2],
391 coarseNodeCoarseLID);
395 for (
LO dof = 0; dof < dofsPerNode_; ++dof) {
396 colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof)) = coarseNodeCoarseLID * dofsPerNode_ + dof;
399 for (
int dim = 0; dim < numDimensions_; ++dim) {
400 if (nodeCoarseTuple[dim] == ghostedNodesPerDir_(dim) - 1) {
401 --nodeCoarseTuple[dim];
407 for (
LO dof = 0; dof < dofsPerNode_; ++dof) {
408 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof) + 0));
409 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0] + 1, nodeCoarseTuple[1], nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof) + 1));
410 if (numDimensions_ > 1) {
411 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1] + 1, nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof) + 2));
412 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0] + 1, nodeCoarseTuple[1] + 1, nodeCoarseTuple[2], colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof) + 3));
413 if (numDimensions_ > 2) {
414 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1], nodeCoarseTuple[2] + 1, colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof) + 4));
415 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0] + 1, nodeCoarseTuple[1], nodeCoarseTuple[2] + 1, colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof) + 5));
416 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0], nodeCoarseTuple[1] + 1, nodeCoarseTuple[2] + 1, colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof) + 6));
417 geoData_.getCoarseTuple2CoarseLID(nodeCoarseTuple[0] + 1, nodeCoarseTuple[1] + 1, nodeCoarseTuple[2] + 1, colIndex_(rowPtr_(nodeIdx * dofsPerNode_ + dof) + 7));
fillAggregatesFunctor(RCP< IndexManager_kokkos > geoData, const int myRank, Kokkos::View< unsigned *, device_type > aggStat, LOVectorView vertex2AggID, LOVectorView procWinner)
typename local_graph_type::row_map_type::non_const_type non_const_row_map_type
KOKKOS_INLINE_FUNCTION void operator()(const LO nodeIdx) const
Lightweight MueLu representation of a compressed row storage graph.
typename LWGraph_kokkos::local_graph_type local_graph_type
decltype(std::declval< LOVector >().getDeviceLocalView(Xpetra::Access::ReadWrite)) LOVectorView
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
Container class for aggregation information.
int getInterpolationOrder() const
basic_FancyOStream & setShowProcRank(const bool showProcRank)
KOKKOS_INLINE_FUNCTION intTupleView getCoarseningEndRates() const
KOKKOS_INLINE_FUNCTION intTupleView getCoarseningRates() const
KOKKOS_INLINE_FUNCTION void operator()(const LO nodeIdx) const
LO getNumCoarseNodes() const
int getNumDimensions() const
LO getNumLocalFineNodes() const
computeGraphRowPtrFunctor(RCP< IndexManager_kokkos > geoData, const LO dofsPerNode, const int numInterpolationPoints, const LO numLocalRows, constIntTupleView coarseRate, constLOTupleView lFineNodesPerDir, non_const_row_map_type rowPtr)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
typename Kokkos::View< const LO[3], device_type > constLOTupleView
basic_FancyOStream & setShowAllFrontMatter(const bool showAllFrontMatter)
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
KOKKOS_INLINE_FUNCTION void operator()(const LO rowIdx, GO &update, const bool final) const
computeGraphDataLinearFunctor(RCP< IndexManager_kokkos > geoData, const int numDimensions, const LO numGhostedNodes, const LO dofsPerNode, const int numInterpolationPoints, constIntTupleView coarseRate, constIntTupleView endRate, constLOTupleView lFineNodesPerDir, constLOTupleView ghostedNodesPerDir, non_const_row_map_type rowPtr, entries_type colIndex)
Timer to be used in non-factories.
const RCP< const Map > GetDomainMap() const
KOKKOS_INLINE_FUNCTION void operator()(const LO nodeIdx, LO &lNumAggregatedNodes) const
void BuildAggregates(const Teuchos::ParameterList ¶ms, const LWGraph_kokkos &graph, Aggregates &aggregates, Kokkos::View< unsigned *, device_type > &aggStat, LO &numNonAggregatedNodes) const
Build aggregates object.
KOKKOS_INLINE_FUNCTION LOTupleView getCoarseNodesPerDir() const
KOKKOS_INLINE_FUNCTION LOTupleView getLocalFineNodesPerDir() const
computeGraphDataConstantFunctor(RCP< IndexManager_kokkos > geoData, const LO numGhostedNodes, const LO dofsPerNode, constIntTupleView coarseRate, constIntTupleView endRate, constLOTupleView lFineNodesPerDir, non_const_row_map_type rowPtr, entries_type colIndex)
typename Kokkos::View< const int[3], device_type > constIntTupleView
const RCP< const Teuchos::Comm< int > > GetComm() const
void BuildGraph(const LWGraph_kokkos &graph, RCP< IndexManager_kokkos > &geoData, const LO dofsPerNode, RCP< CrsGraph > &myGraph) const
Build a CrsGraph instead of aggregates.
typename local_graph_type::entries_type entries_type
RCP< IndexManager_kokkos > & GetIndexManagerKokkos()
Get the index manager used by structured aggregation algorithms. This has to be done by the aggregati...