Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_allReduceView.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_DETAILS_ALLREDUCEVIEW_HPP
11 #define TPETRA_DETAILS_ALLREDUCEVIEW_HPP
12 
15 #include "Kokkos_Core.hpp"
16 #include "Teuchos_CommHelpers.hpp"
17 #include "Tpetra_Details_temporaryViewUtils.hpp"
18 #include <limits>
19 #include <type_traits>
20 
23 
24 namespace Tpetra {
25 namespace Details {
26 
27 template<typename InputViewType, typename OutputViewType>
28 static void
29 allReduceRawContiguous (const OutputViewType& output,
30  const InputViewType& input,
31  const Teuchos::Comm<int>& comm)
32 {
33  using Teuchos::outArg;
34  using Teuchos::REDUCE_SUM;
35  using Teuchos::reduceAll;
36  using ValueType = typename InputViewType::non_const_value_type;
37  size_t count = input.span();
38  TEUCHOS_ASSERT( count <= size_t (INT_MAX) );
39  if(isInterComm(comm) && input.data() == output.data())
40  {
41  //Can't do in-place collective on an intercomm,
42  //so use a separate copy as the input.
43  typename InputViewType::array_layout layout(input.extent(0), input.extent(1), input.extent(2), input.extent(3), input.extent(4), input.extent(5), input.extent(6), input.extent(7));
44  Kokkos::View<typename InputViewType::non_const_data_type, typename InputViewType::array_layout, typename InputViewType::device_type>
45  tempInput(Kokkos::ViewAllocateWithoutInitializing("tempInput"), layout);
46  // DEEP_COPY REVIEW - This could be either DEVICE-TO-DEVICE or HOST-TO-HOST
47  // Either way, MPI is called right afterwards, meaning we'd need a sync on device
48  Kokkos::deep_copy(tempInput, input);
49  reduceAll<int, ValueType> (comm, REDUCE_SUM, static_cast<int> (count),
50  tempInput.data(), output.data());
51  }
52  else
53  reduceAll<int, ValueType> (comm, REDUCE_SUM, static_cast<int> (count),
54  input.data(), output.data());
55 }
56 
60 template<class InputViewType, class OutputViewType>
61 static void
62 allReduceView (const OutputViewType& output,
63  const InputViewType& input,
64  const Teuchos::Comm<int>& comm)
65 {
66 
67  // using execution_space = typename OutputViewType::execution_space;
68  const bool viewsAlias = output.data () == input.data ();
69  if (comm.getSize () == 1) {
70  if (! viewsAlias) {
71  // InputViewType and OutputViewType can't be AnonymousSpace
72  // Views, because deep_copy needs to know their memory spaces.
73  // DEEP_COPY REVIEW - NOT TESTED
74  Kokkos::deep_copy (output, input);
75  }
76  return;
77  }
78 
79  // we must ensure MPI can handle the pointers we pass it
80  // if GPUAware, we are done
81  // otherwise, if the views use GPUs, then we should copy them
82  using Layout = typename TempView::UnifiedContiguousLayout<InputViewType, OutputViewType>::type;
83  //if one or both is already in the correct layout, toLayout returns the same view
84  auto inputContig = TempView::toLayout<InputViewType, Layout>(input);
85  auto outputContig = TempView::toLayout<InputViewType, Layout>(output);
87  {
88  allReduceRawContiguous(outputContig, inputContig, comm);
89 
90  }
91  else
92  {
93  auto inputMPI = TempView::toMPISafe<decltype(inputContig), false>(inputContig);
94  auto outputMPI = TempView::toMPISafe<decltype(outputContig), false>(outputContig);
95  allReduceRawContiguous(outputMPI, inputMPI, comm);
96  // DEEP_COPY REVIEW - Could be either
97  Kokkos::deep_copy(outputContig, outputMPI);
98  }
99  // DEEP_COPY REVIEW - Could be either
100  Kokkos::deep_copy(output, outputContig);
101 }
102 
103 } // namespace Details
104 } // namespace Tpetra
105 
106 #endif // TPETRA_DETAILS_ALLREDUCEVIEW_HPP
static bool assumeMpiIsGPUAware()
Whether to assume that MPI is CUDA aware.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
static void allReduceView(const OutputViewType &output, const InputViewType &input, const Teuchos::Comm< int > &comm)
All-reduce from input Kokkos::View to output Kokkos::View.
bool isInterComm(const Teuchos::Comm< int > &)
Return true if and only if the input communicator wraps an MPI intercommunicator. ...
Declaration of Tpetra::Details::isInterComm.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.