MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_Aggregates_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 // Tobias Wiesner (tawiesn@sandia.gov)
43 //
44 // ***********************************************************************
45 //
46 // @HEADER
47 #ifndef MUELU_AGGREGATES_KOKKOS_DEF_HPP
48 #define MUELU_AGGREGATES_KOKKOS_DEF_HPP
49 
50 #include <Xpetra_Map.hpp>
51 #include <Xpetra_Vector.hpp>
53 #include <Xpetra_VectorFactory.hpp>
54 
55 #include "MueLu_LWGraph_kokkos.hpp"
58 
59 namespace MueLu {
60 
61  template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
62  Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
63  Aggregates_kokkos(LWGraph_kokkos graph) {
64  numAggregates_ = 0;
65 
66  vertex2AggId_ = LOVectorFactory::Build(graph.GetImportMap());
67  vertex2AggId_->putScalar(MUELU_UNAGGREGATED);
68 
69  procWinner_ = LOVectorFactory::Build(graph.GetImportMap());
70  procWinner_->putScalar(MUELU_UNASSIGNED);
71 
72  isRoot_ = Kokkos::View<bool*,DeviceType>(Kokkos::ViewAllocateWithoutInitializing("roots"), graph.GetImportMap()->getNodeNumElements());
73  Kokkos::deep_copy(isRoot_, false);
74 
75  // slow but safe, force TentativePFactory to build column map for P itself
76  aggregatesIncludeGhosts_ = true;
77  }
78 
79  template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
80  Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
81  Aggregates_kokkos(const RCP<const Map>& map) {
82  numAggregates_ = 0;
83 
84  vertex2AggId_ = LOVectorFactory::Build(map);
85  vertex2AggId_->putScalar(MUELU_UNAGGREGATED);
86 
87  procWinner_ = LOVectorFactory::Build(map);
88  procWinner_->putScalar(MUELU_UNASSIGNED);
89 
90  isRoot_ = Kokkos::View<bool*,DeviceType>(Kokkos::ViewAllocateWithoutInitializing("roots"), map->getNodeNumElements());
91  Kokkos::deep_copy(isRoot_, false);
92 
93  // slow but safe, force TentativePFactory to build column map for P itself
94  aggregatesIncludeGhosts_ = true;
95  }
96 
97  template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
98  typename Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::aggregates_sizes_type::const_type
99  Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::ComputeAggregateSizes(bool forceRecompute) const {
100  if (aggregateSizes_.size() && !forceRecompute) {
101  return aggregateSizes_;
102 
103  } else {
104  // It is necessary to initialize this to 0
105  aggregates_sizes_type aggregateSizes("aggregates", numAggregates_);
106 
107  int myPID = GetMap()->getComm()->getRank();
108 
109  auto vertex2AggId = vertex2AggId_->template getLocalView<DeviceType>();
110  auto procWinner = procWinner_ ->template getLocalView<DeviceType>();
111 
112  typename AppendTrait<decltype(aggregateSizes_), Kokkos::Atomic>::type aggregateSizesAtomic = aggregateSizes;
113  Kokkos::parallel_for("MueLu:Aggregates:ComputeAggregateSizes:for", range_type(0,procWinner.size()),
114  KOKKOS_LAMBDA(const LO i) {
115  if (procWinner(i, 0) == myPID)
116  aggregateSizesAtomic(vertex2AggId(i, 0))++;
117  });
118 
119  aggregateSizes_ = aggregateSizes;
120 
121  return aggregateSizes;
122  }
123 
124  }
125 
126  template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
127  typename Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::local_graph_type
128  Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::GetGraph() const {
129  typedef typename local_graph_type::row_map_type row_map_type;
130  typedef typename local_graph_type::entries_type entries_type;
131 
132  auto numAggregates = numAggregates_;
133 
134  if (static_cast<LO>(graph_.numRows()) == numAggregates)
135  return graph_;
136 
137  auto vertex2AggId = vertex2AggId_->template getLocalView<DeviceType>();
138  auto procWinner = procWinner_ ->template getLocalView<DeviceType>();
139  auto sizes = ComputeAggregateSizes();
140 
141  // FIXME_KOKKOS: replace by ViewAllocateWithoutInitializing + rows(0) = 0.
142  typename row_map_type::non_const_type rows("Agg_rows", numAggregates+1); // rows(0) = 0 automatically
143 
144  // parallel_scan (exclusive)
145  Kokkos::parallel_scan("MueLu:Aggregates:GetGraph:compute_rows", range_type(0, numAggregates),
146  KOKKOS_LAMBDA(const LO i, LO& update, const bool& final_pass) {
147  update += sizes(i);
148  if (final_pass)
149  rows(i+1) = update;
150  });
151 
152  decltype(rows) offsets(Kokkos::ViewAllocateWithoutInitializing("Agg_offsets"), numAggregates+1); // +1 is just for ease
153  Kokkos::deep_copy(offsets, rows);
154 
155  int myPID = GetMap()->getComm()->getRank();
156 
157  typename entries_type::non_const_type cols(Kokkos::ViewAllocateWithoutInitializing("Agg_cols"), rows(numAggregates));
158  size_t realnnz = 0;
159  Kokkos::parallel_reduce("MueLu:Aggregates:GetGraph:compute_cols", range_type(0, procWinner.size()),
160  KOKKOS_LAMBDA(const LO i, size_t& nnz) {
161  if (procWinner(i, 0) == myPID) {
162  typedef typename std::remove_reference< decltype( offsets(0) ) >::type atomic_incr_type;
163  auto idx = Kokkos::atomic_fetch_add( &offsets(vertex2AggId(i,0)), atomic_incr_type(1));
164  cols(idx) = i;
165  nnz++;
166  }
167  }, realnnz);
168  TEUCHOS_TEST_FOR_EXCEPTION(realnnz != rows(numAggregates), Exceptions::RuntimeError,
169  "MueLu: Internal error: Something is wrong with aggregates graph construction");
170 
171  graph_ = local_graph_type(cols, rows);
172 
173  return graph_;
174  }
175 
176  template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
177  std::string Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::description() const {
178  return BaseClass::description() + "{nGlobalAggregates = " + toString(GetNumGlobalAggregates()) + "}";
179  }
180 
181  template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
182  void Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::print(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel verbLevel) const {
184 
185  if (verbLevel & Statistics1)
186  out0 << "Global number of aggregates: " << GetNumGlobalAggregates() << std::endl;
187  }
188 
189  template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
190  GlobalOrdinal Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::GetNumGlobalAggregates() const {
191  LO nAggregates = GetNumAggregates();
192  GO nGlobalAggregates;
193  MueLu_sumAll(vertex2AggId_->getMap()->getComm(), (GO)nAggregates, nGlobalAggregates);
194  return nGlobalAggregates;
195  }
196 
197  template <class LocalOrdinal, class GlobalOrdinal, class DeviceType>
198  const RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>> >
199  Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::GetMap() const {
200  return vertex2AggId_->getMap();
201  }
202 
203 } //namespace MueLu
204 
205 #endif // MUELU_AGGREGATES_KOKKOS_DEF_HPP
#define MUELU_UNASSIGNED
#define MueLu_sumAll(rcpComm, in, out)
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=0)
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
GlobalOrdinal GO
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Print more statistics.
LocalOrdinal LO
void deep_copy(const View< DT, DP...> &dst, typename ViewTraits< DT, DP...>::const_value_type &value, typename std::enable_if< std::is_same< typename ViewTraits< DT, DP...>::specialize, void >::value >::type *=0)
#define MUELU_UNAGGREGATED
#define MUELU_DESCRIBE
Helper macro for implementing Describable::describe() for BaseClass objects.
virtual std::string description() const
Return a simple one-line description of this object.