46 #ifndef MUELU_AGGREGATES_DEF_HPP
47 #define MUELU_AGGREGATES_DEF_HPP
49 #include <Xpetra_Map.hpp>
51 #include <Xpetra_MultiVectorFactory.hpp>
54 #include "MueLu_LWGraph_kokkos.hpp"
56 #include "MueLu_LWGraph.hpp"
62 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
65 numGlobalAggregates_ = 0;
67 vertex2AggId_ = LOMultiVectorFactory::Build(graph.
GetImportMap(), 1);
70 procWinner_ = LOVectorFactory::Build(graph.
GetImportMap());
76 aggregatesIncludeGhosts_ =
true;
79 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
83 numGlobalAggregates_ = 0;
85 vertex2AggId_ = LOMultiVectorFactory::Build(graph.
GetImportMap(), 1);
88 procWinner_ = LOVectorFactory::Build(graph.
GetImportMap());
94 aggregatesIncludeGhosts_ =
true;
97 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
101 numGlobalAggregates_ = 0;
103 vertex2AggId_ = LOMultiVectorFactory::Build(map, 1);
106 procWinner_ = LOVectorFactory::Build(map);
112 aggregatesIncludeGhosts_ =
true;
115 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
118 if (aggregateSizes_.size() && !forceRecompute) {
119 return aggregateSizes_;
125 int myPID = GetMap()->getComm()->getRank();
127 auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly);
128 auto procWinner = procWinner_->getDeviceLocalView(Xpetra::Access::ReadOnly);
131 Kokkos::parallel_for(
132 "MueLu:Aggregates:ComputeAggregateSizes:for",
range_type(0, procWinner.size()),
133 KOKKOS_LAMBDA(
const LO i) {
134 if (procWinner(i, 0) == myPID)
135 aggregateSizesAtomic(vertex2AggId(i, 0))++;
138 aggregateSizes_ = aggregateSizes;
140 return aggregateSizes;
144 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
148 auto aggregateSizes = this->ComputeAggregateSizes(forceRecompute);
151 if (!aggregateSizesHost_.is_allocated()) {
152 aggregateSizesHost_ = Kokkos::create_mirror_view(aggregateSizes);
153 Kokkos::deep_copy(aggregateSizesHost_, aggregateSizes);
157 Kokkos::deep_copy(aggregateSizesHost_, aggregateSizes);
163 return aggregateSizesArrayRCP;
166 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
169 using row_map_type =
typename local_graph_type::row_map_type;
170 using entries_type =
typename local_graph_type::entries_type;
171 using size_type =
typename local_graph_type::size_type;
173 auto numAggregates = numAggregates_;
175 if (static_cast<LO>(graph_.numRows()) == numAggregates)
178 auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly);
179 auto procWinner = procWinner_->getDeviceLocalView(Xpetra::Access::ReadOnly);
180 auto sizes = ComputeAggregateSizes();
183 typename row_map_type::non_const_type
rows(
"Agg_rows", numAggregates + 1);
186 Kokkos::parallel_scan(
187 "MueLu:Aggregates:GetGraph:compute_rows",
range_type(0, numAggregates),
188 KOKKOS_LAMBDA(
const LO i,
LO& update,
const bool& final_pass) {
191 rows(i + 1) = update;
194 decltype(rows) offsets(Kokkos::ViewAllocateWithoutInitializing(
"Agg_offsets"), numAggregates + 1);
195 Kokkos::deep_copy(offsets, rows);
197 int myPID = GetMap()->getComm()->getRank();
201 Kokkos::View<size_type, device_type> numNNZ_device = Kokkos::subview(rows, numAggregates);
202 typename Kokkos::View<size_type, device_type>::HostMirror numNNZ_host = Kokkos::create_mirror_view(numNNZ_device);
203 Kokkos::deep_copy(numNNZ_host, numNNZ_device);
204 numNNZ = numNNZ_host();
206 typename entries_type::non_const_type cols(Kokkos::ViewAllocateWithoutInitializing(
"Agg_cols"), numNNZ);
208 Kokkos::parallel_reduce(
209 "MueLu:Aggregates:GetGraph:compute_cols",
range_type(0, procWinner.size()),
210 KOKKOS_LAMBDA(
const LO i,
size_t& nnz) {
211 if (procWinner(i, 0) == myPID) {
212 typedef typename std::remove_reference<decltype(offsets(0))>::type atomic_incr_type;
213 auto idx = Kokkos::atomic_fetch_add(&offsets(vertex2AggId(i, 0)), atomic_incr_type(1));
220 "MueLu: Internal error: Something is wrong with aggregates graph construction: numNNZ = " << numNNZ <<
" != " << realnnz <<
" = realnnz");
227 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
229 LO numAggs = GetNumAggregates();
230 LO numNodes = vertex2AggId_->getLocalLength();
231 auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly);
232 typename aggregates_sizes_type::const_type aggSizes = ComputeAggregateSizes(
true);
235 aggPtr =
LO_view(
"aggPtr", numAggs + 1);
236 aggNodes =
LO_view(
"aggNodes", numNodes);
237 LO_view aggCurr(
"agg curr", numAggs + 1);
240 Kokkos::parallel_scan(
241 "MueLu:Aggregates:ComputeNodesInAggregate:scan",
range_type(0, numAggs + 1),
242 KOKKOS_LAMBDA(
const LO aggIdx,
LO& aggOffset,
bool final_pass) {
244 if (aggIdx < numAggs)
245 count = aggSizes(aggIdx);
247 aggPtr(aggIdx) = aggOffset;
248 aggCurr(aggIdx) = aggOffset;
249 if (aggIdx == numAggs)
250 aggCurr(numAggs) = 0;
256 LO numUnaggregated = 0;
257 Kokkos::parallel_reduce(
258 "MueLu:Aggregates:ComputeNodesInAggregate:unaggregatedSize",
range_type(0, numNodes),
259 KOKKOS_LAMBDA(
const LO nodeIdx,
LO& count) {
260 if (vertex2AggId(nodeIdx, 0) == INVALID)
264 unaggregated =
LO_view(
"unaggregated", numUnaggregated);
267 Kokkos::parallel_for(
268 "MueLu:Aggregates:ComputeNodesInAggregate:for",
range_type(0, numNodes),
269 KOKKOS_LAMBDA(
const LO nodeIdx) {
270 LO aggIdx = vertex2AggId(nodeIdx, 0);
271 if (aggIdx != INVALID) {
273 aggNodes(Kokkos::atomic_fetch_add(&aggCurr(aggIdx), 1)) = nodeIdx;
276 unaggregated(Kokkos::atomic_fetch_add(&aggCurr(numAggs), 1)) = nodeIdx;
281 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
283 if (numGlobalAggregates_ == -1)
289 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
294 if (numGlobalAggregates_ == -1)
295 out0 <<
"Global number of aggregates: not computed " << std::endl;
297 out0 <<
"Global number of aggregates: " << numGlobalAggregates_ << std::endl;
301 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
303 if (numGlobalAggregates_ != -1) {
304 LO nAggregates = GetNumAggregates();
305 GO nGlobalAggregates;
306 MueLu_sumAll(vertex2AggId_->getMap()->getComm(), (
GO)nAggregates, nGlobalAggregates);
307 SetNumGlobalAggregates(nGlobalAggregates);
309 return numGlobalAggregates_;
312 template <
class LocalOrdinal,
class GlobalOrdinal,
class Node>
315 return vertex2AggId_->getMap();
320 #endif // MUELU_AGGREGATES_DEF_HPP
Kokkos::View< LocalOrdinal *, device_type > aggregates_sizes_type
#define MueLu_sumAll(rcpComm, in, out)
Lightweight MueLu representation of a compressed row storage graph.
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
Container class for aggregation information.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
const RCP< const Map > GetMap() const
returns (overlapping) map of aggregate/node distribution
MueLu::DefaultGlobalOrdinal GlobalOrdinal
typename LWGraph_kokkos::local_graph_type local_graph_type
#define MUELU_UNAGGREGATED
void ComputeNodesInAggregate(LO_view &aggPtr, LO_view &aggNodes, LO_view &unaggregated) const
Generates a compressed list of nodes in each aggregate, where the entries in aggNodes[aggPtr[i]] up t...
GO GetNumGlobalAggregatesComputeIfNeeded()
Get global number of aggregates.
Teuchos::ArrayRCP< LocalOrdinal > ComputeAggregateSizesArrayRCP(bool forceRecompute=false) const
Compute sizes of aggregates.
Kokkos::RangePolicy< local_ordinal_type, execution_space > range_type
#define MUELU_DESCRIBE
Helper macro for implementing Describable::describe() for BaseClass objects.
const RCP< const Map > GetImportMap() const
Return overlapping import map (nodes).
Kokkos::View< local_ordinal_type *, device_type > LO_view
Lightweight MueLu representation of a compressed row storage graph.
Aggregates(const LWGraph &graph)
Standard constructor for Aggregates structure.
Exception throws to report errors in the internal logical of the program.
void print(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=verbLevel_default) const
Print the object with some verbosity level to an FancyOStream object.
std::string description() const
Return a simple one-line description of this object.
local_graph_type GetGraph() const
aggregates_sizes_type::const_type ComputeAggregateSizes(bool forceRecompute=false) const
Compute sizes of aggregates.
virtual std::string description() const
Return a simple one-line description of this object.