Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_allReduceView.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_DETAILS_ALLREDUCEVIEW_HPP
11 #define TPETRA_DETAILS_ALLREDUCEVIEW_HPP
12 
15 #include "Kokkos_Core.hpp"
16 #include "Teuchos_CommHelpers.hpp"
17 #include "Tpetra_Details_temporaryViewUtils.hpp"
18 #include <limits>
19 #include <type_traits>
20 
23 
24 namespace Tpetra {
25 namespace Details {
26 
27 template <typename InputViewType, typename OutputViewType>
28 static void
29 allReduceRawContiguous(const OutputViewType& output,
30  const InputViewType& input,
31  const Teuchos::Comm<int>& comm) {
32  using Teuchos::outArg;
33  using Teuchos::REDUCE_SUM;
34  using Teuchos::reduceAll;
35  using ValueType = typename InputViewType::non_const_value_type;
36  size_t count = input.span();
37  TEUCHOS_ASSERT(count <= size_t(INT_MAX));
38  if (isInterComm(comm) && input.data() == output.data()) {
39  // Can't do in-place collective on an intercomm,
40  // so use a separate copy as the input.
41  typename InputViewType::array_layout layout(input.extent(0), input.extent(1), input.extent(2), input.extent(3), input.extent(4), input.extent(5), input.extent(6), input.extent(7));
42  Kokkos::View<typename InputViewType::non_const_data_type, typename InputViewType::array_layout, typename InputViewType::device_type>
43  tempInput(Kokkos::ViewAllocateWithoutInitializing("tempInput"), layout);
44  // DEEP_COPY REVIEW - This could be either DEVICE-TO-DEVICE or HOST-TO-HOST
45  // Either way, MPI is called right afterwards, meaning we'd need a sync on device
46  Kokkos::deep_copy(tempInput, input);
47  reduceAll<int, ValueType>(comm, REDUCE_SUM, static_cast<int>(count),
48  tempInput.data(), output.data());
49  } else
50  reduceAll<int, ValueType>(comm, REDUCE_SUM, static_cast<int>(count),
51  input.data(), output.data());
52 }
53 
57 template <class InputViewType, class OutputViewType>
58 static void
59 allReduceView(const OutputViewType& output,
60  const InputViewType& input,
61  const Teuchos::Comm<int>& comm) {
62  // using execution_space = typename OutputViewType::execution_space;
63  const bool viewsAlias = output.data() == input.data();
64  if (comm.getSize() == 1) {
65  if (!viewsAlias) {
66  // InputViewType and OutputViewType can't be AnonymousSpace
67  // Views, because deep_copy needs to know their memory spaces.
68  // DEEP_COPY REVIEW - NOT TESTED
69  Kokkos::deep_copy(output, input);
70  }
71  return;
72  }
73 
74  // we must ensure MPI can handle the pointers we pass it
75  // if GPUAware, we are done
76  // otherwise, if the views use GPUs, then we should copy them
77  using Layout = typename TempView::UnifiedContiguousLayout<InputViewType, OutputViewType>::type;
78  // if one or both is already in the correct layout, toLayout returns the same view
79  auto inputContig = TempView::toLayout<InputViewType, Layout>(input);
80  auto outputContig = TempView::toLayout<InputViewType, Layout>(output);
82  allReduceRawContiguous(outputContig, inputContig, comm);
83 
84  } else {
85  auto inputMPI = TempView::toMPISafe<decltype(inputContig), false>(inputContig);
86  auto outputMPI = TempView::toMPISafe<decltype(outputContig), false>(outputContig);
87  allReduceRawContiguous(outputMPI, inputMPI, comm);
88  // DEEP_COPY REVIEW - Could be either
89  Kokkos::deep_copy(outputContig, outputMPI);
90  }
91  // DEEP_COPY REVIEW - Could be either
92  Kokkos::deep_copy(output, outputContig);
93 }
94 
95 } // namespace Details
96 } // namespace Tpetra
97 
98 #endif // TPETRA_DETAILS_ALLREDUCEVIEW_HPP
static bool assumeMpiIsGPUAware()
Whether to assume that MPI is CUDA aware.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
static void allReduceView(const OutputViewType &output, const InputViewType &input, const Teuchos::Comm< int > &comm)
All-reduce from input Kokkos::View to output Kokkos::View.
bool isInterComm(const Teuchos::Comm< int > &)
Return true if and only if the input communicator wraps an MPI intercommunicator. ...
Declaration of Tpetra::Details::isInterComm.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.