42 #ifndef TPETRA_DETAILS_ALLREDUCEVIEW_HPP
43 #define TPETRA_DETAILS_ALLREDUCEVIEW_HPP
47 #include "Kokkos_Core.hpp"
48 #include "Teuchos_CommHelpers.hpp"
50 #include <type_traits>
62 template<
class ViewType>
63 struct view_uses_cuda_spaces {
64 static constexpr
bool value =
65 #ifdef KOKKOS_ENABLE_CUDA
66 std::is_same<typename ViewType::memory_space, Kokkos::CudaSpace>::value
67 || std::is_same<typename ViewType::memory_space, Kokkos::CudaUVMSpace>::value;
70 #endif // KOKKOS_ENABLE_CUDA
73 template<
class ViewType>
74 struct MakeContiguousBuffer {
75 static constexpr
bool is_contiguous_layout =
77 typename ViewType::array_layout,
78 Kokkos::LayoutLeft>::value ||
80 typename ViewType::array_layout,
81 Kokkos::LayoutRight>::value;
82 using contiguous_array_layout =
83 typename std::conditional<is_contiguous_layout,
84 typename ViewType::array_layout,
85 Kokkos::LayoutLeft>::type;
86 using contiguous_device_type =
87 typename std::conditional<
89 typename ViewType::memory_space,
90 Kokkos::HostSpace>::value,
91 typename ViewType::device_type,
92 Kokkos::HostSpace::device_type>::type;
93 using contiguous_buffer_type =
94 Kokkos::View<
typename ViewType::non_const_data_type,
95 contiguous_array_layout,
96 contiguous_device_type>;
98 static contiguous_array_layout
99 makeLayout (
const ViewType& view)
103 return contiguous_array_layout (view.extent (0), view.extent (1),
104 view.extent (2), view.extent (3),
105 view.extent (4), view.extent (5),
106 view.extent (6), view.extent (7));
109 static contiguous_buffer_type
110 make (
const ViewType& view)
112 using Kokkos::view_alloc;
113 using Kokkos::WithoutInitializing;
114 return contiguous_buffer_type
115 (view_alloc (view.label (), WithoutInitializing),
120 template<
class ViewType>
121 typename MakeContiguousBuffer<ViewType>::contiguous_buffer_type
122 makeContiguousBuffer (
const ViewType& view)
124 return MakeContiguousBuffer<ViewType>::make (view);
127 template<
class ValueType>
129 allReduceRawContiguous (ValueType output[],
130 const ValueType input[],
132 const Teuchos::Comm<int>& comm)
134 using Teuchos::outArg;
135 using Teuchos::REDUCE_SUM;
136 using Teuchos::reduceAll;
137 constexpr
size_t max_int = size_t (std::numeric_limits<int>::max ());
138 TEUCHOS_ASSERT( count <=
size_t (max_int) );
139 reduceAll<int, ValueType> (comm, REDUCE_SUM,
static_cast<int> (count),
151 template<
class InputViewType,
class OutputViewType>
154 const InputViewType& input,
155 const Teuchos::Comm<int>& comm)
179 const bool viewsAlias = output.data () == input.data ();
180 if (comm.getSize () == 1) {
192 const bool mpiCannotAccessBuffers =
196 view_uses_cuda_spaces<OutputViewType>::value
198 view_uses_cuda_spaces<InputViewType>::value
201 const bool needContiguousTemporaryBuffers =
203 mpiCannotAccessBuffers ||
205 (::Tpetra::Details::isInterComm (comm)
209 ! output.span_is_contiguous () ||
210 ! input.span_is_contiguous ();
212 if (needContiguousTemporaryBuffers) {
213 auto output_tmp = makeContiguousBuffer (output);
214 auto input_tmp = makeContiguousBuffer (input);
221 allReduceRawContiguous (output_tmp.data (), input_tmp.data (),
222 output_tmp.span (), comm);
226 allReduceRawContiguous (output.data (), input.data (),
227 output.span (), comm);
234 #endif // TPETRA_DETAILS_ALLREDUCEVIEW_HPP
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
static void allReduceView(const OutputViewType &output, const InputViewType &input, const Teuchos::Comm< int > &comm)
All-reduce from input Kokkos::View to output Kokkos::View.
static bool assumeMpiIsCudaAware()
Whether to assume that MPI is CUDA aware.
Declaration of Tpetra::Details::isInterComm.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.