47 #ifndef MUELU_AGGREGATES_KOKKOS_DEF_HPP 
   48 #define MUELU_AGGREGATES_KOKKOS_DEF_HPP 
   50 #include <Xpetra_Map.hpp> 
   52 #include <Xpetra_MultiVectorFactory.hpp> 
   55 #include "MueLu_LWGraph_kokkos.hpp" 
   61   template <
class LocalOrdinal, 
class GlobalOrdinal, 
class DeviceType>
 
   62   Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
 
   63   Aggregates_kokkos(LWGraph_kokkos graph) {
 
   66     vertex2AggId_ = LOVectorFactory::Build(graph.GetImportMap());
 
   69     procWinner_ = LOVectorFactory::Build(graph.GetImportMap());
 
   76     aggregatesIncludeGhosts_ = 
true;
 
   79   template <
class LocalOrdinal, 
class GlobalOrdinal, 
class DeviceType>
 
   80   Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
 
   81   Aggregates_kokkos(
const RCP<const Map>& map) {
 
   84     vertex2AggId_ = LOVectorFactory::Build(map);
 
   87     procWinner_ = LOVectorFactory::Build(map);
 
   94     aggregatesIncludeGhosts_ = 
true;
 
   97   template <
class LocalOrdinal, 
class GlobalOrdinal, 
class DeviceType>
 
   98   typename Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::aggregates_sizes_type::const_type
 
   99   Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::ComputeAggregateSizes(
bool forceRecompute)
 const {
 
  100     if (aggregateSizes_.size() && !forceRecompute) {
 
  101       return aggregateSizes_;
 
  105       aggregates_sizes_type aggregateSizes(
"aggregates", numAggregates_);
 
  107       int myPID = GetMap()->getComm()->getRank();
 
  109       auto vertex2AggId = vertex2AggId_->template getLocalView<DeviceType>();
 
  110       auto procWinner   = procWinner_  ->template getLocalView<DeviceType>();
 
  112       typename AppendTrait<decltype(aggregateSizes_), Kokkos::Atomic>::type aggregateSizesAtomic = aggregateSizes;
 
  113       Kokkos::parallel_for(
"MueLu:Aggregates:ComputeAggregateSizes:for", range_type(0,procWinner.size()),
 
  114         KOKKOS_LAMBDA(
const LO i) {
 
  115           if (procWinner(i, 0) == myPID)
 
  116             aggregateSizesAtomic(vertex2AggId(i, 0))++;
 
  119       aggregateSizes_ = aggregateSizes;
 
  121       return aggregateSizes;
 
  126   template <
class LocalOrdinal, 
class GlobalOrdinal, 
class DeviceType>
 
  127   typename Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::local_graph_type
 
  128   Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::GetGraph()
 const {
 
  129     typedef typename local_graph_type::row_map_type row_map_type;
 
  130     typedef typename local_graph_type::entries_type entries_type;
 
  132     auto numAggregates = numAggregates_;
 
  134     if (static_cast<LO>(graph_.numRows()) == numAggregates)
 
  137     auto vertex2AggId = vertex2AggId_->template getLocalView<DeviceType>();
 
  138     auto procWinner   = procWinner_  ->template getLocalView<DeviceType>();
 
  139     auto sizes        = ComputeAggregateSizes();
 
  142     typename row_map_type::non_const_type rows(
"Agg_rows", numAggregates+1);  
 
  145     Kokkos::parallel_scan(
"MueLu:Aggregates:GetGraph:compute_rows", range_type(0, numAggregates),
 
  146       KOKKOS_LAMBDA(
const LO i, 
LO& update, 
const bool& final_pass) {
 
  152     decltype(rows) offsets(Kokkos::ViewAllocateWithoutInitializing("Agg_offsets"), numAggregates+1); 
 
  153     Kokkos::deep_copy(offsets, rows);
 
  155     int myPID = GetMap()->getComm()->getRank();
 
  157     typename entries_type::non_const_type cols(Kokkos::ViewAllocateWithoutInitializing("Agg_cols"), rows(numAggregates));
 
  159     Kokkos::parallel_reduce("MueLu:Aggregates:GetGraph:compute_cols", range_type(0, procWinner.size()),
 
  160       KOKKOS_LAMBDA(const 
LO i, 
size_t& nnz) {
 
  161         if (procWinner(i, 0) == myPID) {
 
  162           typedef typename std::remove_reference< decltype( offsets(0) ) >::type atomic_incr_type;
 
  163           auto idx = Kokkos::atomic_fetch_add( &offsets(vertex2AggId(i,0)), atomic_incr_type(1));
 
  169         "MueLu: Internal error: Something is wrong with aggregates graph construction");
 
  171     graph_ = local_graph_type(cols, rows);
 
  176   template <
class LocalOrdinal, 
class GlobalOrdinal, 
class DeviceType>
 
  177   std::string Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::description()
 const {
 
  181   template <
class LocalOrdinal, 
class GlobalOrdinal, 
class DeviceType>
 
  186       out0 << 
"Global number of aggregates: " << GetNumGlobalAggregates() << std::endl;
 
  189   template <
class LocalOrdinal, 
class GlobalOrdinal, 
class DeviceType>
 
  190   GlobalOrdinal Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::GetNumGlobalAggregates()
 const {
 
  191     LO nAggregates = GetNumAggregates();
 
  192     GO nGlobalAggregates;
 
  193     MueLu_sumAll(vertex2AggId_->getMap()->getComm(), (
GO)nAggregates, nGlobalAggregates);
 
  194     return nGlobalAggregates;
 
  197   template <
class LocalOrdinal, 
class GlobalOrdinal, 
class DeviceType>
 
  198   const RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>> >
 
  199   Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::GetMap()
 const {
 
  200     return vertex2AggId_->getMap();
 
  205 #endif // MUELU_AGGREGATES_KOKKOS_DEF_HPP 
#define MueLu_sumAll(rcpComm, in, out)
 
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< Kokkos::Impl::is_execution_policy< ExecPolicy >::value >::type *=0)
 
std::string toString(const T &what)
Little helper function to convert non-string types to strings. 
 
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
 
void deep_copy(const View< DT, DP...> &dst, typename ViewTraits< DT, DP...>::const_value_type &value, typename std::enable_if< std::is_same< typename ViewTraits< DT, DP...>::specialize, void >::value >::type *=0)
 
MueLu::DefaultGlobalOrdinal GlobalOrdinal
 
#define MUELU_UNAGGREGATED
 
#define MUELU_DESCRIBE
Helper macro for implementing Describable::describe() for BaseClass objects. 
 
virtual std::string description() const 
Return a simple one-line description of this object.