Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_idot.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_DETAILS_IDOT_HPP
43 #define TPETRA_DETAILS_IDOT_HPP
44 
60 
61 #include "Tpetra_iallreduce.hpp"
62 #include "Tpetra_MultiVector.hpp"
63 #include "Tpetra_Vector.hpp"
64 #include "Teuchos_CommHelpers.hpp"
65 #include "KokkosBlas1_dot.hpp"
66 #include <stdexcept>
67 #include <sstream>
68 
69 namespace Tpetra {
70 namespace Details {
71 
72 // Temporary helper to get read-only view from multivector in requested space.
73 // TODO: when https://github.com/kokkos/kokkos/issues/3850 is resolved,
74 // remove this and just use templated getLocalView<Device>(ReadOnly).
75 template<typename MV>
76 struct GetReadOnly
77 {
78  using DevView = typename MV::dual_view_type::t_dev::const_type;
79  using HostView = typename MV::dual_view_type::t_host::const_type;
80 
81  template<typename exec_space>
82  static DevView get(const MV& x, typename std::enable_if<std::is_same<exec_space, typename MV::execution_space>::value>::type* = nullptr)
83  {
84  return x.getLocalViewDevice(Tpetra::Access::ReadOnly);
85  }
86 
87  template<typename exec_space>
88  static HostView get(const MV& x, typename std::enable_if<!std::is_same<exec_space, typename MV::execution_space>::value>::type* = nullptr)
89  {
90  return x.getLocalViewHost(Tpetra::Access::ReadOnly);
91  }
92 };
93 
95 
96 template<class MV, class ResultView, bool runOnDevice>
97 void idotLocal(const ResultView& localResult,
98  const MV& X,
99  const MV& Y)
100 {
101  using pair_type = Kokkos::pair<size_t, size_t>;
102  using exec_space = typename std::conditional<runOnDevice, typename MV::execution_space, Kokkos::DefaultHostExecutionSpace>::type;
103  //if the execution space can access localResult, use it directly. Otherwise need to make a copy.
104  static_assert(Kokkos::SpaceAccessibility<exec_space, typename ResultView::memory_space>::accessible,
105  "idotLocal: Execution space must be able to access localResult");
106  //If Dot executes on Serial, it requires the result to be HostSpace. If localResult is CudaUVMSpace,
107  //we can just treat it like HostSpace.
108  Kokkos::View<typename ResultView::data_type, typename exec_space::memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged>>
109  localResultUnmanaged(localResult.data(), localResult.extent(0));
110  const size_t numRows = X.getLocalLength ();
111  const pair_type rowRange (0, numRows);
112  const size_t X_numVecs = X.getNumVectors ();
113  const size_t Y_numVecs = Y.getNumVectors ();
114  const size_t numVecs = X_numVecs > Y_numVecs ? X_numVecs : Y_numVecs;
115  // Check compatibility of number of columns; allow special cases of
116  // a multivector dot a single vector, or vice versa.
117  if (X_numVecs != Y_numVecs &&
118  X_numVecs != size_t (1) &&
119  Y_numVecs != size_t (1)) {
120  std::ostringstream os;
121  os << "Tpetra::idot: X.getNumVectors() = " << X_numVecs
122  << " != Y.getNumVectors() = " << Y_numVecs
123  << ", but neither is 1.";
124  throw std::invalid_argument (os.str ());
125  }
126  auto X_lcl = GetReadOnly<MV>::template get<exec_space>(X);
127  auto Y_lcl = GetReadOnly<MV>::template get<exec_space>(Y);
128  //Can the multivector dot kernel be used?
129  bool useMVDot = X.isConstantStride() && Y.isConstantStride() && X_numVecs == Y_numVecs;
130  if(useMVDot)
131  {
132  if (numVecs == size_t (1)) {
133  auto X_lcl_1d = Kokkos::subview (X_lcl, rowRange, 0);
134  auto Y_lcl_1d = Kokkos::subview (Y_lcl, rowRange, 0);
135  auto result_0d = Kokkos::subview (localResultUnmanaged, 0);
136  KokkosBlas::dot (result_0d, X_lcl_1d, Y_lcl_1d);
137  }
138  else {
139  auto X_lcl_2d = Kokkos::subview (X_lcl, rowRange, pair_type (0, X_numVecs));
140  auto Y_lcl_2d = Kokkos::subview (Y_lcl, rowRange, pair_type (0, Y_numVecs));
141  KokkosBlas::dot (localResultUnmanaged, X_lcl_2d, Y_lcl_2d);
142  }
143  }
144  else
145  {
146  auto XWhichVectors = Tpetra::getMultiVectorWhichVectors(X);
147  auto YWhichVectors = Tpetra::getMultiVectorWhichVectors(Y);
148  //Need to compute each dot individually, using 1D subviews of X_lcl and Y_lcl
149  for(size_t vec = 0; vec < numVecs; vec++) {
150  //Get the Vectors for each column of X and Y that will be dotted together.
151  //Note: "which vectors" is not populated for constant stride MVs (but it's the identity mapping)
152  size_t Xj = (numVecs == X_numVecs) ? vec : 0;
153  Xj = X.isConstantStride() ? Xj : XWhichVectors[Xj];
154  size_t Yj = (numVecs == Y_numVecs) ? vec : 0;
155  Yj = Y.isConstantStride() ? Yj : YWhichVectors[Yj];
156  auto Xcol = Kokkos::subview(X_lcl, rowRange, Xj);
157  auto Ycol = Kokkos::subview(Y_lcl, rowRange, Yj);
158 
159  //Compute the rank-1 dot product, and place the result in an element of localResult
160  KokkosBlas::dot(Kokkos::subview(localResultUnmanaged, vec), Xcol, Ycol);
161  }
162  }
163 }
164 
165 //Helper to avoid extra instantiations of KokkosBlas::dot and iallreduce.
166 template<typename MV, typename ResultView>
167 struct IdotHelper
168 {
169  using dot_type = typename MV::dot_type;
170 
171  //First version: use result directly
172  template<typename exec_space>
173  static std::shared_ptr< ::Tpetra::Details::CommRequest> run(
174  const ResultView& globalResult, const MV& X, const MV& Y,
175  typename std::enable_if<Kokkos::SpaceAccessibility<exec_space, typename ResultView::memory_space>::accessible>::type* = nullptr)
176 
177  {
178  constexpr bool runOnDevice = std::is_same<exec_space, typename MV::execution_space>::value;
179  idotLocal<MV, ResultView, runOnDevice>(globalResult, X, Y);
180  //Fence because we're giving result of device kernel to MPI
181  if(runOnDevice)
182  exec_space().fence();
183  auto comm = X.getMap()->getComm();
184  return iallreduce(globalResult, globalResult, ::Teuchos::REDUCE_SUM, *comm);
185  }
186 
187  //Second version: use a temporary local result, because exec_space can't write to globalResult
188  template<typename exec_space>
189  static std::shared_ptr< ::Tpetra::Details::CommRequest> run(
190  const ResultView& globalResult, const MV& X, const MV& Y,
191  typename std::enable_if<!Kokkos::SpaceAccessibility<exec_space, typename ResultView::memory_space>::accessible>::type* = nullptr)
192  {
193  constexpr bool runOnDevice = std::is_same<exec_space, typename MV::execution_space>::value;
194  Kokkos::View<dot_type*, typename exec_space::memory_space> localResult(Kokkos::ViewAllocateWithoutInitializing("idot:localResult"), X.getNumVectors());
195  idotLocal<MV, decltype(localResult), runOnDevice>(localResult, X, Y);
196  //Fence because we're giving result of device kernel to MPI
197  if(runOnDevice)
198  exec_space().fence();
199  auto comm = X.getMap()->getComm();
200  return iallreduce(localResult, globalResult, ::Teuchos::REDUCE_SUM, *comm);
201  }
202 
203 };
204 
205 
208 template<class MV, class ResultView>
209 std::shared_ptr< ::Tpetra::Details::CommRequest>
210 idotImpl(const ResultView& globalResult,
211  const MV& X,
212  const MV& Y)
213 {
214  static_assert(std::is_same<typename ResultView::non_const_value_type, typename MV::dot_type>::value,
215  "Tpetra::idot: result view's element type must match MV::dot_type");
216 
217  // Execution space to use for dot kernel(s) is whichever has access to up-to-date X.
218  if(X.need_sync_device())
219  {
220  //run on host.
221  return IdotHelper<MV, ResultView>::template run<Kokkos::DefaultHostExecutionSpace>(globalResult, X, Y);
222  }
223  else
224  {
225  //run on device.
226  return IdotHelper<MV, ResultView>::template run<typename MV::execution_space>(globalResult, X, Y);
227  }
228 }
229 } // namespace Details
230 
231 //
232 // SKIP DOWN TO HERE
233 //
234 
287 template<class SC, class LO, class GO, class NT>
288 std::shared_ptr< ::Tpetra::Details::CommRequest>
289 idot (typename ::Tpetra::MultiVector<SC, LO, GO, NT>::dot_type* resultRaw,
290  const ::Tpetra::MultiVector<SC, LO, GO, NT>& X,
291  const ::Tpetra::MultiVector<SC, LO, GO, NT>& Y)
292 {
293  using dot_type = typename ::Tpetra::Vector<SC, LO, GO, NT>::dot_type;
294  const size_t X_numVecs = X.getNumVectors ();
295  const size_t Y_numVecs = Y.getNumVectors ();
296  const size_t numVecs = (X_numVecs > Y_numVecs) ? X_numVecs : Y_numVecs;
297  Kokkos::View<dot_type*, Kokkos::HostSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>>
298  resultView(resultRaw, numVecs);
299  return Details::idotImpl(resultView, X, Y);
300 }
301 
364 template<class SC, class LO, class GO, class NT>
365 std::shared_ptr< ::Tpetra::Details::CommRequest>
366 idot (const Kokkos::View<typename ::Tpetra::MultiVector<SC, LO, GO, NT>::dot_type*,
367  typename ::Tpetra::MultiVector<SC, LO, GO, NT>::device_type>& result,
368  const ::Tpetra::MultiVector<SC, LO, GO, NT>& X,
369  const ::Tpetra::MultiVector<SC, LO, GO, NT>& Y)
370 {
371  return Details::idotImpl(result, X, Y);
372 }
373 
415 template<class SC, class LO, class GO, class NT>
416 std::shared_ptr< ::Tpetra::Details::CommRequest>
417 idot (const Kokkos::View<typename ::Tpetra::Vector<SC, LO, GO, NT>::dot_type,
418  typename ::Tpetra::Vector<SC, LO, GO, NT>::device_type>& result,
419  const ::Tpetra::Vector<SC, LO, GO, NT>& X,
420  const ::Tpetra::Vector<SC, LO, GO, NT>& Y)
421 {
422  using dot_type = typename ::Tpetra::Vector<SC, LO, GO, NT>::dot_type;
423  using result_device_t = typename ::Tpetra::Vector<SC, LO, GO, NT>::device_type;
424  Kokkos::View<dot_type*, result_device_t, Kokkos::MemoryTraits<Kokkos::Unmanaged>> result1D(result.data(), 1);
425  return Details::idotImpl(result1D, X, Y);
426 }
427 
428 } // namespace Tpetra
429 
430 #endif // TPETRA_DETAILS_IDOT_HPP
std::shared_ptr< ::Tpetra::Details::CommRequest > idotImpl(const ResultView &globalResult, const MV &X, const MV &Y)
Internal (common) version of idot, a global dot product that uses a non-blocking MPI reduction...
void idotLocal(const ResultView &localResult, const MV &X, const MV &Y)
Compute dot product locally. Where the kernel runs controlled by runOnDevice.
Definition: Tpetra_idot.hpp:97
std::shared_ptr< ::Tpetra::Details::CommRequest > idot(typename::Tpetra::MultiVector< SC, LO, GO, NT >::dot_type *resultRaw, const ::Tpetra::MultiVector< SC, LO, GO, NT > &X, const ::Tpetra::MultiVector< SC, LO, GO, NT > &Y)
Nonblocking dot product, with either Tpetra::MultiVector or Tpetra::Vector inputs, and raw pointer or raw array output.
Declaration of Tpetra::iallreduce.