Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_copyConvert.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_DETAILS_COPYCONVERT_HPP
11 #define TPETRA_DETAILS_COPYCONVERT_HPP
12 
17 
18 #include "TpetraCore_config.h"
19 #include "Kokkos_Core.hpp"
20 #if KOKKOS_VERSION >= 40799
21 #include "KokkosKernels_ArithTraits.hpp"
22 #else
23 #include "Kokkos_ArithTraits.hpp"
24 #endif
25 #include <sstream>
26 #include <stdexcept>
27 #include <type_traits>
28 
29 namespace Tpetra {
30 namespace Details {
31 
32 //
33 // Implementation details for copyConvert (see below).
34 // Users should skip over this anonymous namespace.
35 //
36 namespace { // (anonymous)
37 
38 // We need separate implementations for both (T,complex) and
39 // (complex,T), but we can't just overload for both cases, because
40 // that would be ambiguous (e.g., (complex,complex)).
41 template <class OutputValueType,
42  class InputValueType,
43  const bool outputIsComplex =
44 #if KOKKOS_VERSION >= 40799
45  KokkosKernels::ArithTraits<OutputValueType>::is_complex,
46 #else
47  Kokkos::ArithTraits<OutputValueType>::is_complex,
48 #endif
49  const bool inputIsComplex =
50 #if KOKKOS_VERSION >= 40799
51  KokkosKernels::ArithTraits<InputValueType>::is_complex>
52 #else
53  Kokkos::ArithTraits<InputValueType>::is_complex>
54 #endif
55 struct ConvertValue {
56  static KOKKOS_INLINE_FUNCTION void
57  convert(OutputValueType& dst, const InputValueType& src) {
58  // This looks trivial, but it actually invokes OutputValueType's
59  // constructor, so that needs to be marked as a __host__
60  // __device__ function (e.g., via the KOKKOS_FUNCTION or
61  // KOKKOS_INLINE_FUNCTION macros).
62  dst = OutputValueType(src);
63  }
64 };
65 
66 template <class OutputRealType, class InputComplexType>
67 struct ConvertValue<OutputRealType, InputComplexType, false, true> {
68  static KOKKOS_INLINE_FUNCTION void
69  convert(OutputRealType& dst,
70  const InputComplexType& src) {
71  // OutputRealType's constructor needs to be marked with either
72  // KOKKOS_FUNCTION or KOKKOS_INLINE_FUNCTION.
73 #if KOKKOS_VERSION >= 40799
74  using KAI = KokkosKernels::ArithTraits<InputComplexType>;
75 #else
76  using KAI = Kokkos::ArithTraits<InputComplexType>;
77 #endif
78  dst = OutputRealType(KAI::real(src));
79  }
80 };
81 
82 template <class OutputComplexType, class InputRealType>
83 struct ConvertValue<OutputComplexType, InputRealType, true, false> {
84  static KOKKOS_INLINE_FUNCTION void
85  convert(OutputComplexType& dst,
86  const InputRealType& src) {
87  // OutputComplexType's constructor needs to be marked with
88  // either KOKKOS_FUNCTION or KOKKOS_INLINE_FUNCTION.
89  using output_mag_type =
90 #if KOKKOS_VERSION >= 40799
91  typename KokkosKernels::ArithTraits<OutputComplexType>::mag_type;
92 #else
93  typename Kokkos::ArithTraits<OutputComplexType>::mag_type;
94 #endif
95 #if KOKKOS_VERSION >= 40799
96  using KAM = KokkosKernels::ArithTraits<output_mag_type>;
97 #else
98  using KAM = Kokkos::ArithTraits<output_mag_type>;
99 #endif
100  dst = OutputComplexType(src, KAM::zero());
101  }
102 };
103 
104 template <class OutputValueType,
105  class InputValueType>
106 KOKKOS_INLINE_FUNCTION void
107 convertValue(OutputValueType& dst, const InputValueType& src) {
108  ConvertValue<OutputValueType, InputValueType>::convert(dst, src);
109 }
110 
115 template <class OutputViewType,
116  class InputViewType,
117  const int rank = static_cast<int>(OutputViewType::rank)>
118 class CopyConvertFunctor {};
119 
120 template <class OutputViewType,
121  class InputViewType>
122 class CopyConvertFunctor<OutputViewType, InputViewType, 1> {
123  private:
124  static_assert(static_cast<int>(OutputViewType::rank) == 1 &&
125  static_cast<int>(InputViewType::rank) == 1,
126  "CopyConvertFunctor (implements Tpetra::Details::copyConvert): "
127  "OutputViewType and InputViewType must both have rank 1.");
128  OutputViewType dst_;
129  InputViewType src_;
130 
131  public:
132  using index_type = typename OutputViewType::size_type;
133 
134  CopyConvertFunctor(const OutputViewType& dst,
135  const InputViewType& src)
136  : dst_(dst)
137  , src_(src) {}
138 
139  KOKKOS_INLINE_FUNCTION void
140  operator()(const index_type i) const {
141  convertValue(dst_(i), src_(i));
142  }
143 };
144 
145 template <class OutputViewType,
146  class InputViewType>
147 class CopyConvertFunctor<OutputViewType, InputViewType, 2> {
148  public:
149  using index_type = typename OutputViewType::size_type;
150 
151  private:
152  static_assert(static_cast<int>(OutputViewType::rank) == 2 &&
153  static_cast<int>(InputViewType::rank) == 2,
154  "CopyConvertFunctor (implements Tpetra::Details::copyConvert): "
155  "OutputViewType and InputViewType must both have rank 2.");
156  OutputViewType dst_;
157  InputViewType src_;
158  index_type numCols_;
159 
160  public:
161  CopyConvertFunctor(const OutputViewType& dst,
162  const InputViewType& src)
163  : dst_(dst)
164  , src_(src)
165  , numCols_(dst.extent(1)) {}
166 
167  KOKKOS_INLINE_FUNCTION void
168  operator()(const index_type i) const {
169  const index_type numCols = numCols_;
170  for (index_type j = 0; j < numCols; ++j) {
171  convertValue(dst_(i, j), src_(i, j));
172  }
173  }
174 };
175 
177 template <class OutputViewType, class InputViewType>
178 class CanUseKokkosDeepCopy {
179  private:
180  static constexpr bool sameValueType =
181  std::is_same<typename OutputViewType::non_const_value_type,
182  typename InputViewType::non_const_value_type>::value;
183  static constexpr bool sameMemorySpace =
184  std::is_same<typename OutputViewType::memory_space,
185  typename InputViewType::memory_space>::value;
186  static constexpr bool sameLayout =
187  std::is_same<typename OutputViewType::array_layout,
188  typename InputViewType::array_layout>::value;
189 
190  public:
191  static constexpr bool value =
192  sameValueType && (sameMemorySpace || sameLayout);
193 };
194 
213 template <class OutputViewType,
214  class InputViewType,
215  const bool canUseKokkosDeepCopy =
216  CanUseKokkosDeepCopy<OutputViewType, InputViewType>::value,
217  const bool outputExecSpaceCanAccessInputMemSpace =
218  Kokkos::SpaceAccessibility<
219  typename OutputViewType::memory_space,
220  typename InputViewType::memory_space>::accessible>
221 struct CopyConvertImpl {
222  static void
223  run(const OutputViewType& dst,
224  const InputViewType& src);
225 };
226 
228 template <class OutputViewType,
229  class InputViewType,
230  const bool outputExecSpaceCanAccessInputMemSpace>
231 struct CopyConvertImpl<OutputViewType, InputViewType,
232  true, outputExecSpaceCanAccessInputMemSpace> {
233  static void
234  run(const OutputViewType& dst,
235  const InputViewType& src) {
236  // NOTE: It's important to do the addition _inside_ the
237  // reinterpret-cast. If you reinterpret_cast the separate
238  // results, you may get the wrong answer (e.g., because
239  // ptrdiff_t is signed, and pointers may have arbitrary 64-bit
240  // virtual addresses). I'm speaking from experience here.
241  const ptrdiff_t dst_beg = reinterpret_cast<ptrdiff_t>(dst.data());
242  const ptrdiff_t dst_end =
243  reinterpret_cast<ptrdiff_t>(dst.data() + dst.span());
244  const ptrdiff_t src_beg = reinterpret_cast<ptrdiff_t>(src.data());
245  const ptrdiff_t src_end =
246  reinterpret_cast<ptrdiff_t>(src.data() + src.span());
247 
248  if (dst_end > src_beg && src_end > dst_beg) {
249  // dst and src alias each other, so we can't call
250  // Kokkos::deep_copy(dst,src) directly (Kokkos detects this
251  // and throws, at least in debug mode). Instead, we make
252  // temporary host storage (create_mirror always makes a new
253  // allocation, unlike create_mirror_view). Use host because
254  // it's cheaper to allocate. Hopefully users aren't doing
255  // aliased copies in a tight loop.
256  auto src_copy = Kokkos::create_mirror(Kokkos::HostSpace(), src);
257  // DEEP_COPY REVIEW - NOT TESTED
258  Kokkos::deep_copy(src_copy, src);
259  // DEEP_COPY REVIEW - NOT TESTED
260  Kokkos::deep_copy(dst, src_copy);
261  } else { // no aliasing
262  // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
263  using execution_space = typename OutputViewType::execution_space;
264  Kokkos::deep_copy(execution_space(), dst, src);
265  }
266  }
267 };
268 
271 template <class OutputViewType,
272  class InputViewType>
273 struct CopyConvertImpl<OutputViewType,
274  InputViewType,
275  false,
276  true> {
277  static void
278  run(const OutputViewType& dst,
279  const InputViewType& src) {
280  using functor_type = CopyConvertFunctor<OutputViewType, InputViewType>;
281  using execution_space = typename OutputViewType::execution_space;
282  using index_type = typename OutputViewType::size_type;
283  using range_type = Kokkos::RangePolicy<execution_space, index_type>;
284  Kokkos::parallel_for("Tpetra::Details::copyConvert",
285  range_type(0, dst.extent(0)),
286  functor_type(dst, src));
287  }
288 };
289 
296 template <class OutputViewType,
297  class InputViewType>
298 struct CopyConvertImpl<OutputViewType, InputViewType, false, false> {
299  static void
300  run(const OutputViewType& dst,
301  const InputViewType& src) {
302  using output_memory_space = typename OutputViewType::memory_space;
303  using output_execution_space = typename OutputViewType::execution_space;
304  auto src_outputSpaceCopy =
305  Kokkos::create_mirror_view(output_memory_space(), src);
306  // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
307  Kokkos::deep_copy(output_execution_space(), src_outputSpaceCopy, src);
308 
309  // The output View's execution space can access
310  // outputSpaceCopy's data, so we can run the functor now.
311  using output_space_copy_type = decltype(src_outputSpaceCopy);
312  using functor_type =
313  CopyConvertFunctor<OutputViewType, output_space_copy_type>;
314  using execution_space = typename OutputViewType::execution_space;
315  using index_type = typename OutputViewType::size_type;
316  using range_type = Kokkos::RangePolicy<execution_space, index_type>;
317  Kokkos::parallel_for("Tpetra::Details::copyConvert",
318  range_type(0, dst.extent(0)),
319  functor_type(dst, src_outputSpaceCopy));
320  }
321 };
322 } // namespace
323 
332 template <class OutputViewType,
333  class InputViewType>
334 void copyConvert(const OutputViewType& dst,
335  const InputViewType& src) {
336  static_assert(Kokkos::is_view<OutputViewType>::value,
337  "OutputViewType must be a Kokkos::View.");
338  static_assert(Kokkos::is_view<InputViewType>::value,
339  "InputViewType must be a Kokkos::View.");
340  static_assert(std::is_same<typename OutputViewType::value_type,
341  typename OutputViewType::non_const_value_type>::value,
342  "OutputViewType must be a nonconst Kokkos::View.");
343  static_assert(static_cast<int>(OutputViewType::rank) ==
344  static_cast<int>(InputViewType::rank),
345  "src and dst must have the same rank.");
346 
347  if (dst.extent(0) != src.extent(0)) {
348  std::ostringstream os;
349  os << "Tpetra::Details::copyConvert: "
350  << "dst.extent(0) = " << dst.extent(0)
351  << " != src.extent(0) = " << src.extent(0)
352  << ".";
353  throw std::invalid_argument(os.str());
354  }
355  if (static_cast<int>(OutputViewType::rank) > 1 &&
356  dst.extent(1) != src.extent(1)) {
357  std::ostringstream os;
358  os << "Tpetra::Details::copyConvert: "
359  << "dst.extent(1) = " << dst.extent(1)
360  << " != src.extent(1) = " << src.extent(1)
361  << ".";
362  throw std::invalid_argument(os.str());
363  }
364 
365  // Canonicalize the View types in order to avoid redundant instantiations.
366  using output_view_type =
367  Kokkos::View<typename OutputViewType::non_const_data_type,
368  typename OutputViewType::array_layout,
369  typename OutputViewType::device_type>;
370  using input_view_type =
371  Kokkos::View<typename InputViewType::const_data_type,
372  typename InputViewType::array_layout,
373  typename InputViewType::device_type>;
374  CopyConvertImpl<output_view_type, input_view_type>::run(dst, src);
375 }
376 
377 } // namespace Details
378 } // namespace Tpetra
379 
380 #endif // TPETRA_DETAILS_COPYCONVERT_HPP
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
void copyConvert(const OutputViewType &dst, const InputViewType &src)
Copy values from the 1-D Kokkos::View src, to the 1-D Kokkos::View dst, of the same length...