Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_allReduceView.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_DETAILS_ALLREDUCEVIEW_HPP
43 #define TPETRA_DETAILS_ALLREDUCEVIEW_HPP
44 
47 #include "Kokkos_Core.hpp"
48 #include "Teuchos_CommHelpers.hpp"
49 #include "Tpetra_Details_temporaryViewUtils.hpp"
50 #include <limits>
51 #include <type_traits>
52 
55 
56 namespace Tpetra {
57 namespace Details {
58 
59 template<typename InputViewType, typename OutputViewType>
60 static void
61 allReduceRawContiguous (const OutputViewType& output,
62  const InputViewType& input,
63  const Teuchos::Comm<int>& comm)
64 {
65  using Teuchos::outArg;
66  using Teuchos::REDUCE_SUM;
67  using Teuchos::reduceAll;
68  using ValueType = typename InputViewType::non_const_value_type;
69  size_t count = input.span();
70  TEUCHOS_ASSERT( count <= size_t (INT_MAX) );
71  if(isInterComm(comm) && input.data() == output.data())
72  {
73  //Can't do in-place collective on an intercomm,
74  //so use a separate copy as the input.
75  typename InputViewType::array_layout layout(input.extent(0), input.extent(1), input.extent(2), input.extent(3), input.extent(4), input.extent(5), input.extent(6), input.extent(7));
76  Kokkos::View<typename InputViewType::non_const_data_type, typename InputViewType::array_layout, typename InputViewType::device_type>
77  tempInput(Kokkos::ViewAllocateWithoutInitializing("tempInput"), layout);
78  // DEEP_COPY REVIEW - This could be either DEVICE-TO-DEVICE or HOST-TO-HOST
79  // Either way, MPI is called right afterwards, meaning we'd need a sync on device
80  Kokkos::deep_copy(tempInput, input);
81  reduceAll<int, ValueType> (comm, REDUCE_SUM, static_cast<int> (count),
82  tempInput.data(), output.data());
83  }
84  else
85  reduceAll<int, ValueType> (comm, REDUCE_SUM, static_cast<int> (count),
86  input.data(), output.data());
87 }
88 
92 template<class InputViewType, class OutputViewType>
93 static void
94 allReduceView (const OutputViewType& output,
95  const InputViewType& input,
96  const Teuchos::Comm<int>& comm)
97 {
98 
99  // using execution_space = typename OutputViewType::execution_space;
100  const bool viewsAlias = output.data () == input.data ();
101  if (comm.getSize () == 1) {
102  if (! viewsAlias) {
103  // InputViewType and OutputViewType can't be AnonymousSpace
104  // Views, because deep_copy needs to know their memory spaces.
105  // DEEP_COPY REVIEW - NOT TESTED
106  Kokkos::deep_copy (output, input);
107  }
108  return;
109  }
110 
111  // we must ensure MPI can handle the pointers we pass it
112  // if GPUAware, we are done
113  // otherwise, if the views use GPUs, then we should copy them
114  using Layout = typename TempView::UnifiedContiguousLayout<InputViewType, OutputViewType>::type;
115  //if one or both is already in the correct layout, toLayout returns the same view
116  auto inputContig = TempView::toLayout<InputViewType, Layout>(input);
117  auto outputContig = TempView::toLayout<InputViewType, Layout>(output);
119  {
120  allReduceRawContiguous(outputContig, inputContig, comm);
121 
122  }
123  else
124  {
125  auto inputMPI = TempView::toMPISafe<decltype(inputContig), false>(inputContig);
126  auto outputMPI = TempView::toMPISafe<decltype(outputContig), false>(outputContig);
127  allReduceRawContiguous(outputMPI, inputMPI, comm);
128  // DEEP_COPY REVIEW - Could be either
129  Kokkos::deep_copy(outputContig, outputMPI);
130  }
131  // DEEP_COPY REVIEW - Could be either
132  Kokkos::deep_copy(output, outputContig);
133 }
134 
135 } // namespace Details
136 } // namespace Tpetra
137 
138 #endif // TPETRA_DETAILS_ALLREDUCEVIEW_HPP
static bool assumeMpiIsGPUAware()
Whether to assume that MPI is CUDA aware.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
static void allReduceView(const OutputViewType &output, const InputViewType &input, const Teuchos::Comm< int > &comm)
All-reduce from input Kokkos::View to output Kokkos::View.
bool isInterComm(const Teuchos::Comm< int > &)
Return true if and only if the input communicator wraps an MPI intercommunicator. ...
Declaration of Tpetra::Details::isInterComm.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.