MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_Aggregates_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_AGGREGATES_DEF_HPP
47 #define MUELU_AGGREGATES_DEF_HPP
48 
49 #include <Xpetra_Map.hpp>
50 #include <Xpetra_Vector.hpp>
51 #include <Xpetra_MultiVectorFactory.hpp>
52 #include <Xpetra_VectorFactory.hpp>
53 
54 #include "MueLu_LWGraph_kokkos.hpp"
55 
56 #include "MueLu_LWGraph.hpp"
57 #include "MueLu_Utilities_decl.hpp"
59 
60 namespace MueLu {
61 
62 template <class LocalOrdinal, class GlobalOrdinal, class Node>
64  numAggregates_ = 0;
65  numGlobalAggregates_ = 0;
66 
67  vertex2AggId_ = LOMultiVectorFactory::Build(graph.GetImportMap(), 1);
68  vertex2AggId_->putScalar(MUELU_UNAGGREGATED);
69 
70  procWinner_ = LOVectorFactory::Build(graph.GetImportMap());
71  procWinner_->putScalar(MUELU_UNASSIGNED);
72 
73  isRoot_ = Teuchos::ArrayRCP<bool>(graph.GetImportMap()->getLocalNumElements(), false);
74 
75  // slow but safe, force TentativePFactory to build column map for P itself
76  aggregatesIncludeGhosts_ = true;
77 }
78 
79 template <class LocalOrdinal, class GlobalOrdinal, class Node>
82  numAggregates_ = 0;
83  numGlobalAggregates_ = 0;
84 
85  vertex2AggId_ = LOMultiVectorFactory::Build(graph.GetImportMap(), 1);
86  vertex2AggId_->putScalar(MUELU_UNAGGREGATED);
87 
88  procWinner_ = LOVectorFactory::Build(graph.GetImportMap());
89  procWinner_->putScalar(MUELU_UNASSIGNED);
90 
91  isRoot_ = Teuchos::ArrayRCP<bool>(graph.GetImportMap()->getLocalNumElements(), false);
92 
93  // slow but safe, force TentativePFactory to build column map for P itself
94  aggregatesIncludeGhosts_ = true;
95 }
96 
97 template <class LocalOrdinal, class GlobalOrdinal, class Node>
100  numAggregates_ = 0;
101  numGlobalAggregates_ = 0;
102 
103  vertex2AggId_ = LOMultiVectorFactory::Build(map, 1);
104  vertex2AggId_->putScalar(MUELU_UNAGGREGATED);
105 
106  procWinner_ = LOVectorFactory::Build(map);
107  procWinner_->putScalar(MUELU_UNASSIGNED);
108 
109  isRoot_ = Teuchos::ArrayRCP<bool>(map->getLocalNumElements(), false);
110 
111  // slow but safe, force TentativePFactory to build column map for P itself
112  aggregatesIncludeGhosts_ = true;
113 }
114 
115 template <class LocalOrdinal, class GlobalOrdinal, class Node>
118  if (aggregateSizes_.size() && !forceRecompute) {
119  return aggregateSizes_;
120 
121  } else {
122  // It is necessary to initialize this to 0
123  aggregates_sizes_type aggregateSizes("aggregates", numAggregates_);
124 
125  int myPID = GetMap()->getComm()->getRank();
126 
127  auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly);
128  auto procWinner = procWinner_->getDeviceLocalView(Xpetra::Access::ReadOnly);
129 
130  typename AppendTrait<decltype(aggregateSizes_), Kokkos::Atomic>::type aggregateSizesAtomic = aggregateSizes;
131  Kokkos::parallel_for(
132  "MueLu:Aggregates:ComputeAggregateSizes:for", range_type(0, procWinner.size()),
133  KOKKOS_LAMBDA(const LO i) {
134  if (procWinner(i, 0) == myPID)
135  aggregateSizesAtomic(vertex2AggId(i, 0))++;
136  });
137 
138  aggregateSizes_ = aggregateSizes;
139 
140  return aggregateSizes;
141  }
142 }
143 
144 template <class LocalOrdinal, class GlobalOrdinal, class Node>
147  ComputeAggregateSizesArrayRCP(bool forceRecompute) const {
148  auto aggregateSizes = this->ComputeAggregateSizes(forceRecompute);
149 
150  // if this is the first time this is called, setup the host mirror and fill it
151  if (!aggregateSizesHost_.is_allocated()) {
152  aggregateSizesHost_ = Kokkos::create_mirror_view(aggregateSizes);
153  Kokkos::deep_copy(aggregateSizesHost_, aggregateSizes);
154  } else {
155  // otherwise, only update if we forced a recompute
156  if (forceRecompute)
157  Kokkos::deep_copy(aggregateSizesHost_, aggregateSizes);
158  }
159 
160  // put the data in an ArrayRCP, but do not give it ownership of the data
161  Teuchos::ArrayRCP<LocalOrdinal> aggregateSizesArrayRCP(aggregateSizesHost_.data(), 0, aggregateSizesHost_.extent(0), false);
162 
163  return aggregateSizesArrayRCP;
164 }
165 
166 template <class LocalOrdinal, class GlobalOrdinal, class Node>
169  using row_map_type = typename local_graph_type::row_map_type;
170  using entries_type = typename local_graph_type::entries_type;
171  using size_type = typename local_graph_type::size_type;
172 
173  auto numAggregates = numAggregates_;
174 
175  if (static_cast<LO>(graph_.numRows()) == numAggregates)
176  return graph_;
177 
178  auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly);
179  auto procWinner = procWinner_->getDeviceLocalView(Xpetra::Access::ReadOnly);
180  auto sizes = ComputeAggregateSizes();
181 
182  // FIXME_KOKKOS: replace by ViewAllocateWithoutInitializing + rows(0) = 0.
183  typename row_map_type::non_const_type rows("Agg_rows", numAggregates + 1); // rows(0) = 0 automatically
184 
185  // parallel_scan (exclusive)
186  Kokkos::parallel_scan(
187  "MueLu:Aggregates:GetGraph:compute_rows", range_type(0, numAggregates),
188  KOKKOS_LAMBDA(const LO i, LO& update, const bool& final_pass) {
189  update += sizes(i);
190  if (final_pass)
191  rows(i + 1) = update;
192  });
193 
194  decltype(rows) offsets(Kokkos::ViewAllocateWithoutInitializing("Agg_offsets"), numAggregates + 1); // +1 is just for ease
195  Kokkos::deep_copy(offsets, rows);
196 
197  int myPID = GetMap()->getComm()->getRank();
198 
199  size_type numNNZ;
200  {
201  Kokkos::View<size_type, device_type> numNNZ_device = Kokkos::subview(rows, numAggregates);
202  typename Kokkos::View<size_type, device_type>::HostMirror numNNZ_host = Kokkos::create_mirror_view(numNNZ_device);
203  Kokkos::deep_copy(numNNZ_host, numNNZ_device);
204  numNNZ = numNNZ_host();
205  }
206  typename entries_type::non_const_type cols(Kokkos::ViewAllocateWithoutInitializing("Agg_cols"), numNNZ);
207  size_t realnnz = 0;
208  Kokkos::parallel_reduce(
209  "MueLu:Aggregates:GetGraph:compute_cols", range_type(0, procWinner.size()),
210  KOKKOS_LAMBDA(const LO i, size_t& nnz) {
211  if (procWinner(i, 0) == myPID) {
212  typedef typename std::remove_reference<decltype(offsets(0))>::type atomic_incr_type;
213  auto idx = Kokkos::atomic_fetch_add(&offsets(vertex2AggId(i, 0)), atomic_incr_type(1));
214  cols(idx) = i;
215  nnz++;
216  }
217  },
218  realnnz);
220  "MueLu: Internal error: Something is wrong with aggregates graph construction: numNNZ = " << numNNZ << " != " << realnnz << " = realnnz");
221 
222  graph_ = local_graph_type(cols, rows);
223 
224  return graph_;
225 }
226 
227 template <class LocalOrdinal, class GlobalOrdinal, class Node>
229  LO numAggs = GetNumAggregates();
230  LO numNodes = vertex2AggId_->getLocalLength();
231  auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly);
232  typename aggregates_sizes_type::const_type aggSizes = ComputeAggregateSizes(true);
234 
235  aggPtr = LO_view("aggPtr", numAggs + 1);
236  aggNodes = LO_view("aggNodes", numNodes);
237  LO_view aggCurr("agg curr", numAggs + 1);
238 
239  // Construct the "rowptr" and the counter
240  Kokkos::parallel_scan(
241  "MueLu:Aggregates:ComputeNodesInAggregate:scan", range_type(0, numAggs + 1),
242  KOKKOS_LAMBDA(const LO aggIdx, LO& aggOffset, bool final_pass) {
243  LO count = 0;
244  if (aggIdx < numAggs)
245  count = aggSizes(aggIdx);
246  if (final_pass) {
247  aggPtr(aggIdx) = aggOffset;
248  aggCurr(aggIdx) = aggOffset;
249  if (aggIdx == numAggs)
250  aggCurr(numAggs) = 0; // use this for counting unaggregated nodes
251  }
252  aggOffset += count;
253  });
254 
255  // Preallocate unaggregated to the correct size
256  LO numUnaggregated = 0;
257  Kokkos::parallel_reduce(
258  "MueLu:Aggregates:ComputeNodesInAggregate:unaggregatedSize", range_type(0, numNodes),
259  KOKKOS_LAMBDA(const LO nodeIdx, LO& count) {
260  if (vertex2AggId(nodeIdx, 0) == INVALID)
261  count++;
262  },
263  numUnaggregated);
264  unaggregated = LO_view("unaggregated", numUnaggregated);
265 
266  // Stick the nodes in each aggregate's spot
267  Kokkos::parallel_for(
268  "MueLu:Aggregates:ComputeNodesInAggregate:for", range_type(0, numNodes),
269  KOKKOS_LAMBDA(const LO nodeIdx) {
270  LO aggIdx = vertex2AggId(nodeIdx, 0);
271  if (aggIdx != INVALID) {
272  // atomic postincrement aggCurr(aggIdx) each time
273  aggNodes(Kokkos::atomic_fetch_add(&aggCurr(aggIdx), 1)) = nodeIdx;
274  } else {
275  // same, but using last entry of aggCurr for unaggregated nodes
276  unaggregated(Kokkos::atomic_fetch_add(&aggCurr(numAggs), 1)) = nodeIdx;
277  }
278  });
279 }
280 
281 template <class LocalOrdinal, class GlobalOrdinal, class Node>
283  if (numGlobalAggregates_ == -1)
284  return BaseClass::description() + "{nGlobalAggregates = not computed}";
285  else
286  return BaseClass::description() + "{nGlobalAggregates = " + toString(numGlobalAggregates_) + "}";
287 }
288 
289 template <class LocalOrdinal, class GlobalOrdinal, class Node>
292 
293  if (verbLevel & Statistics1) {
294  if (numGlobalAggregates_ == -1)
295  out0 << "Global number of aggregates: not computed " << std::endl;
296  else
297  out0 << "Global number of aggregates: " << numGlobalAggregates_ << std::endl;
298  }
299 }
300 
301 template <class LocalOrdinal, class GlobalOrdinal, class Node>
303  if (numGlobalAggregates_ != -1) {
304  LO nAggregates = GetNumAggregates();
305  GO nGlobalAggregates;
306  MueLu_sumAll(vertex2AggId_->getMap()->getComm(), (GO)nAggregates, nGlobalAggregates);
307  SetNumGlobalAggregates(nGlobalAggregates);
308  }
309  return numGlobalAggregates_;
310 }
311 
312 template <class LocalOrdinal, class GlobalOrdinal, class Node>
315  return vertex2AggId_->getMap();
316 }
317 
318 } // namespace MueLu
319 
320 #endif // MUELU_AGGREGATES_DEF_HPP
Kokkos::View< LocalOrdinal *, device_type > aggregates_sizes_type
#define MUELU_UNASSIGNED
#define MueLu_sumAll(rcpComm, in, out)
Lightweight MueLu representation of a compressed row storage graph.
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
Container class for aggregation information.
GlobalOrdinal GO
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Print more statistics.
const RCP< const Map > GetMap() const
returns (overlapping) map of aggregate/node distribution
LocalOrdinal LO
MueLu::DefaultGlobalOrdinal GlobalOrdinal
typename LWGraph_kokkos::local_graph_type local_graph_type
#define MUELU_UNAGGREGATED
void ComputeNodesInAggregate(LO_view &aggPtr, LO_view &aggNodes, LO_view &unaggregated) const
Generates a compressed list of nodes in each aggregate, where the entries in aggNodes[aggPtr[i]] up t...
GO GetNumGlobalAggregatesComputeIfNeeded()
Get global number of aggregates.
Teuchos::ArrayRCP< LocalOrdinal > ComputeAggregateSizesArrayRCP(bool forceRecompute=false) const
Compute sizes of aggregates.
Kokkos::RangePolicy< local_ordinal_type, execution_space > range_type
#define MUELU_DESCRIBE
Helper macro for implementing Describable::describe() for BaseClass objects.
const RCP< const Map > GetImportMap() const
Return overlapping import map (nodes).
Kokkos::View< local_ordinal_type *, device_type > LO_view
Lightweight MueLu representation of a compressed row storage graph.
Aggregates(const LWGraph &graph)
Standard constructor for Aggregates structure.
Exception throws to report errors in the internal logical of the program.
void print(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=verbLevel_default) const
Print the object with some verbosity level to an FancyOStream object.
std::string description() const
Return a simple one-line description of this object.
local_graph_type GetGraph() const
aggregates_sizes_type::const_type ComputeAggregateSizes(bool forceRecompute=false) const
Compute sizes of aggregates.
virtual std::string description() const
Return a simple one-line description of this object.