MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_Aggregates_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 // Tobias Wiesner (tawiesn@sandia.gov)
43 //
44 // ***********************************************************************
45 //
46 // @HEADER
47 #ifndef MUELU_AGGREGATES_KOKKOS_DEF_HPP
48 #define MUELU_AGGREGATES_KOKKOS_DEF_HPP
49 
50 #include <Xpetra_Map.hpp>
51 #include <Xpetra_Vector.hpp>
52 #include <Xpetra_MultiVectorFactory.hpp>
53 #include <Xpetra_VectorFactory.hpp>
54 
55 #include "MueLu_LWGraph_kokkos.hpp"
56 #include "MueLu_Utilities_decl.hpp"
58 
59 namespace MueLu {
60 
61  template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
62  Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Tpetra::KokkosCompat::KokkosDeviceWrapperNode<DeviceType> >::
63  Aggregates_kokkos(LWGraph_kokkos graph) {
64  numAggregates_ = 0;
65  numGlobalAggregates_ = 0;
66 
67  vertex2AggId_ = LOMultiVectorFactory::Build(graph.GetImportMap(), 1);
68  vertex2AggId_->putScalar(MUELU_UNAGGREGATED);
69 
70  procWinner_ = LOVectorFactory::Build(graph.GetImportMap());
71  procWinner_->putScalar(MUELU_UNASSIGNED);
72 
73  isRoot_ = Kokkos::View<bool*, device_type>(Kokkos::ViewAllocateWithoutInitializing("roots"), graph.GetImportMap()->getLocalNumElements());
74  Kokkos::deep_copy(isRoot_, false);
75 
76  // slow but safe, force TentativePFactory to build column map for P itself
77  aggregatesIncludeGhosts_ = true;
78  }
79 
80  template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
83  numAggregates_ = 0;
84  numGlobalAggregates_ = 0;
85 
86  vertex2AggId_ = LOMultiVectorFactory::Build(map, 1);
87  vertex2AggId_->putScalar(MUELU_UNAGGREGATED);
88 
89  procWinner_ = LOVectorFactory::Build(map);
90  procWinner_->putScalar(MUELU_UNASSIGNED);
91 
92  isRoot_ = Kokkos::View<bool*,device_type>(Kokkos::ViewAllocateWithoutInitializing("roots"), map->getLocalNumElements());
93  Kokkos::deep_copy(isRoot_, false);
94 
95  // slow but safe, force TentativePFactory to build column map for P itself
96  aggregatesIncludeGhosts_ = true;
97  }
98 
99  template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
102  if (aggregateSizes_.size() && !forceRecompute) {
103  return aggregateSizes_;
104 
105  } else {
106  // It is necessary to initialize this to 0
107  aggregates_sizes_type aggregateSizes("aggregates", numAggregates_);
108 
109  int myPID = GetMap()->getComm()->getRank();
110 
111  auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly);
112  auto procWinner = procWinner_ ->getDeviceLocalView(Xpetra::Access::ReadOnly);
113 
114  typename AppendTrait<decltype(aggregateSizes_), Kokkos::Atomic>::type aggregateSizesAtomic = aggregateSizes;
115  Kokkos::parallel_for("MueLu:Aggregates:ComputeAggregateSizes:for", range_type(0,procWinner.size()),
116  KOKKOS_LAMBDA(const LO i) {
117  if (procWinner(i, 0) == myPID)
118  aggregateSizesAtomic(vertex2AggId(i, 0))++;
119  });
120 
121  aggregateSizes_ = aggregateSizes;
122 
123  return aggregateSizes;
124  }
125 
126  }
127 
128  template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
131  using row_map_type = typename local_graph_type::row_map_type;
132  using entries_type = typename local_graph_type::entries_type;
133  using size_type = typename local_graph_type::size_type;
134 
135  auto numAggregates = numAggregates_;
136 
137  if (static_cast<LO>(graph_.numRows()) == numAggregates)
138  return graph_;
139 
140  auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly);
141  auto procWinner = procWinner_ ->getDeviceLocalView(Xpetra::Access::ReadOnly);
142  auto sizes = ComputeAggregateSizes();
143 
144  // FIXME_KOKKOS: replace by ViewAllocateWithoutInitializing + rows(0) = 0.
145  typename row_map_type::non_const_type rows("Agg_rows", numAggregates+1); // rows(0) = 0 automatically
146 
147  // parallel_scan (exclusive)
148  Kokkos::parallel_scan("MueLu:Aggregates:GetGraph:compute_rows", range_type(0, numAggregates),
149  KOKKOS_LAMBDA(const LO i, LO& update, const bool& final_pass) {
150  update += sizes(i);
151  if (final_pass)
152  rows(i+1) = update;
153  });
154 
155  decltype(rows) offsets(Kokkos::ViewAllocateWithoutInitializing("Agg_offsets"), numAggregates+1); // +1 is just for ease
156  Kokkos::deep_copy(offsets, rows);
157 
158  int myPID = GetMap()->getComm()->getRank();
159 
160  size_type numNNZ;
161  {
162  Kokkos::View<size_type, device_type> numNNZ_device = Kokkos::subview(rows, numAggregates);
163  typename Kokkos::View<size_type, device_type>::HostMirror numNNZ_host = Kokkos::create_mirror_view(numNNZ_device);
164  Kokkos::deep_copy(numNNZ_host, numNNZ_device);
165  numNNZ = numNNZ_host();
166  }
167  typename entries_type::non_const_type cols(Kokkos::ViewAllocateWithoutInitializing("Agg_cols"), numNNZ);
168  size_t realnnz = 0;
169  Kokkos::parallel_reduce("MueLu:Aggregates:GetGraph:compute_cols", range_type(0, procWinner.size()),
170  KOKKOS_LAMBDA(const LO i, size_t& nnz) {
171  if (procWinner(i, 0) == myPID) {
172  typedef typename std::remove_reference< decltype( offsets(0) ) >::type atomic_incr_type;
173  auto idx = Kokkos::atomic_fetch_add( &offsets(vertex2AggId(i,0)), atomic_incr_type(1));
174  cols(idx) = i;
175  nnz++;
176  }
177  }, realnnz);
179  "MueLu: Internal error: Something is wrong with aggregates graph construction: numNNZ = " << numNNZ << " != " << realnnz << " = realnnz");
180 
181  graph_ = local_graph_type(cols, rows);
182 
183  return graph_;
184  }
185 
186  template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
187  void
189  LO numAggs = GetNumAggregates();
190  LO numNodes = vertex2AggId_->getLocalLength();
191  auto vertex2AggId = vertex2AggId_->getDeviceLocalView(Xpetra::Access::ReadOnly);
192  typename aggregates_sizes_type::const_type aggSizes = ComputeAggregateSizes(true);
194 
195  aggPtr = LO_view("aggPtr",numAggs+1);
196  aggNodes = LO_view("aggNodes",numNodes);
197  LO_view aggCurr("agg curr",numAggs+1);
198 
199  // Construct the "rowptr" and the counter
200  Kokkos::parallel_scan("MueLu:Aggregates:ComputeNodesInAggregate:scan", range_type(0,numAggs+1),
201  KOKKOS_LAMBDA(const LO aggIdx, LO& aggOffset, bool final_pass) {
202  LO count = 0;
203  if(aggIdx < numAggs)
204  count = aggSizes(aggIdx);
205  if(final_pass) {
206  aggPtr(aggIdx) = aggOffset;
207  aggCurr(aggIdx) = aggOffset;
208  if(aggIdx==numAggs)
209  aggCurr(numAggs) = 0; // use this for counting unaggregated nodes
210  }
211  aggOffset += count;
212  });
213 
214  // Preallocate unaggregated to the correct size
215  LO numUnaggregated = 0;
216  Kokkos::parallel_reduce("MueLu:Aggregates:ComputeNodesInAggregate:unaggregatedSize", range_type(0,numNodes),
217  KOKKOS_LAMBDA(const LO nodeIdx, LO & count) {
218  if(vertex2AggId(nodeIdx,0)==INVALID)
219  count++;
220  }, numUnaggregated);
221  unaggregated = LO_view("unaggregated",numUnaggregated);
222 
223  // Stick the nodes in each aggregate's spot
224  Kokkos::parallel_for("MueLu:Aggregates:ComputeNodesInAggregate:for", range_type(0,numNodes),
225  KOKKOS_LAMBDA(const LO nodeIdx) {
226  LO aggIdx = vertex2AggId(nodeIdx,0);
227  if(aggIdx != INVALID) {
228  // atomic postincrement aggCurr(aggIdx) each time
229  aggNodes(Kokkos::atomic_fetch_add(&aggCurr(aggIdx),1)) = nodeIdx;
230  } else {
231  // same, but using last entry of aggCurr for unaggregated nodes
232  unaggregated(Kokkos::atomic_fetch_add(&aggCurr(numAggs),1)) = nodeIdx;
233  }
234  });
235 
236  }
237 
238  template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
240  if (numGlobalAggregates_ == -1) return BaseClass::description() + "{nGlobalAggregates = not computed}";
241  else return BaseClass::description() + "{nGlobalAggregates = " + toString(numGlobalAggregates_) + "}";
242  }
243 
244  template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
247 
248  if (verbLevel & Statistics1) {
249  if (numGlobalAggregates_ == -1) out0 << "Global number of aggregates: not computed " << std::endl;
250  else out0 << "Global number of aggregates: " << numGlobalAggregates_ << std::endl;
251  }
252  }
253 
254  template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
256 
257  if (numGlobalAggregates_ != -1) {
258  LO nAggregates = GetNumAggregates();
259  GO nGlobalAggregates;
260  MueLu_sumAll(vertex2AggId_->getMap()->getComm(), (GO)nAggregates, nGlobalAggregates);
261  SetNumGlobalAggregates(nGlobalAggregates);
262  }
263  return numGlobalAggregates_;
264  }
265 
266  template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
269  return vertex2AggId_->getMap();
270  }
271 
272 } //namespace MueLu
273 
274 #endif // MUELU_AGGREGATES_KOKKOS_DEF_HPP
#define MUELU_UNASSIGNED
#define MueLu_sumAll(rcpComm, in, out)
Lightweight MueLu representation of a compressed row storage graph.
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
GlobalOrdinal GO
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Print more statistics.
LocalOrdinal LO
MueLu::DefaultGlobalOrdinal GlobalOrdinal
#define MUELU_UNAGGREGATED
#define MUELU_DESCRIBE
Helper macro for implementing Describable::describe() for BaseClass objects.
Exception throws to report errors in the internal logical of the program.
virtual std::string description() const
Return a simple one-line description of this object.