Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_copyConvert.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_DETAILS_COPYCONVERT_HPP
11 #define TPETRA_DETAILS_COPYCONVERT_HPP
12 
17 
18 #include "TpetraCore_config.h"
19 #include "Kokkos_Core.hpp"
20 #include "Kokkos_ArithTraits.hpp"
21 #include <sstream>
22 #include <stdexcept>
23 #include <type_traits>
24 
25 namespace Tpetra {
26 namespace Details {
27 
28 //
29 // Implementation details for copyConvert (see below).
30 // Users should skip over this anonymous namespace.
31 //
32 namespace { // (anonymous)
33 
34  // We need separate implementations for both (T,complex) and
35  // (complex,T), but we can't just overload for both cases, because
36  // that would be ambiguous (e.g., (complex,complex)).
37  template<class OutputValueType,
38  class InputValueType,
39  const bool outputIsComplex =
40  Kokkos::ArithTraits<OutputValueType>::is_complex,
41  const bool inputIsComplex =
42  Kokkos::ArithTraits<InputValueType>::is_complex>
43  struct ConvertValue
44  {
45  static KOKKOS_INLINE_FUNCTION void
46  convert (OutputValueType& dst, const InputValueType& src)
47  {
48  // This looks trivial, but it actually invokes OutputValueType's
49  // constructor, so that needs to be marked as a __host__
50  // __device__ function (e.g., via the KOKKOS_FUNCTION or
51  // KOKKOS_INLINE_FUNCTION macros).
52  dst = OutputValueType (src);
53  }
54  };
55 
56  template<class OutputRealType, class InputComplexType>
57  struct ConvertValue<OutputRealType, InputComplexType, false, true>
58  {
59  static KOKKOS_INLINE_FUNCTION void
60  convert (OutputRealType& dst,
61  const InputComplexType& src)
62  {
63  // OutputRealType's constructor needs to be marked with either
64  // KOKKOS_FUNCTION or KOKKOS_INLINE_FUNCTION.
65  using KAI = Kokkos::ArithTraits<InputComplexType>;
66  dst = OutputRealType (KAI::real (src));
67  }
68  };
69 
70  template<class OutputComplexType, class InputRealType>
71  struct ConvertValue<OutputComplexType, InputRealType, true, false>
72  {
73  static KOKKOS_INLINE_FUNCTION void
74  convert (OutputComplexType& dst,
75  const InputRealType& src)
76  {
77  // OutputComplexType's constructor needs to be marked with
78  // either KOKKOS_FUNCTION or KOKKOS_INLINE_FUNCTION.
79  using output_mag_type =
80  typename Kokkos::ArithTraits<OutputComplexType>::mag_type;
81  using KAM = Kokkos::ArithTraits<output_mag_type>;
82  dst = OutputComplexType (src, KAM::zero ());
83  }
84  };
85 
86  template<class OutputValueType,
87  class InputValueType>
88  KOKKOS_INLINE_FUNCTION void
89  convertValue (OutputValueType& dst, const InputValueType& src) {
90  ConvertValue<OutputValueType, InputValueType>::convert (dst, src);
91  }
92 
97  template<class OutputViewType,
98  class InputViewType,
99  const int rank = static_cast<int> (OutputViewType::rank)>
100  class CopyConvertFunctor {};
101 
102  template<class OutputViewType,
103  class InputViewType>
104  class CopyConvertFunctor<OutputViewType, InputViewType, 1> {
105  private:
106  static_assert
107  (static_cast<int> (OutputViewType::rank) == 1 &&
108  static_cast<int> (InputViewType::rank) == 1,
109  "CopyConvertFunctor (implements Tpetra::Details::copyConvert): "
110  "OutputViewType and InputViewType must both have rank 1.");
111  OutputViewType dst_;
112  InputViewType src_;
113 
114  public:
115  using index_type = typename OutputViewType::size_type;
116 
117  CopyConvertFunctor (const OutputViewType& dst,
118  const InputViewType& src) :
119  dst_ (dst),
120  src_ (src)
121  {}
122 
123  KOKKOS_INLINE_FUNCTION void
124  operator () (const index_type i) const {
125  convertValue (dst_(i), src_(i));
126  }
127  };
128 
129  template<class OutputViewType,
130  class InputViewType>
131  class CopyConvertFunctor<OutputViewType, InputViewType, 2> {
132  public:
133  using index_type = typename OutputViewType::size_type;
134 
135  private:
136  static_assert
137  (static_cast<int> (OutputViewType::rank) == 2 &&
138  static_cast<int> (InputViewType::rank) == 2,
139  "CopyConvertFunctor (implements Tpetra::Details::copyConvert): "
140  "OutputViewType and InputViewType must both have rank 2.");
141  OutputViewType dst_;
142  InputViewType src_;
143  index_type numCols_;
144 
145  public:
146  CopyConvertFunctor (const OutputViewType& dst,
147  const InputViewType& src) :
148  dst_ (dst),
149  src_ (src),
150  numCols_ (dst.extent (1))
151  {}
152 
153  KOKKOS_INLINE_FUNCTION void
154  operator () (const index_type i) const {
155  const index_type numCols = numCols_;
156  for (index_type j = 0; j < numCols; ++j) {
157  convertValue (dst_(i,j), src_(i,j));
158  }
159  }
160  };
161 
163  template<class OutputViewType, class InputViewType>
164  class CanUseKokkosDeepCopy {
165  private:
166  static constexpr bool sameValueType =
167  std::is_same<typename OutputViewType::non_const_value_type,
168  typename InputViewType::non_const_value_type>::value;
169  static constexpr bool sameMemorySpace =
170  std::is_same<typename OutputViewType::memory_space,
171  typename InputViewType::memory_space>::value;
172  static constexpr bool sameLayout =
173  std::is_same<typename OutputViewType::array_layout,
174  typename InputViewType::array_layout>::value;
175 
176  public:
177  static constexpr bool value =
178  sameValueType && (sameMemorySpace || sameLayout);
179  };
180 
199  template<class OutputViewType,
200  class InputViewType,
201  const bool canUseKokkosDeepCopy =
202  CanUseKokkosDeepCopy<OutputViewType, InputViewType>::value,
203  const bool outputExecSpaceCanAccessInputMemSpace =
204  Kokkos::SpaceAccessibility<
205  typename OutputViewType::memory_space,
206  typename InputViewType::memory_space>::accessible>
207  struct CopyConvertImpl {
208  static void
209  run (const OutputViewType& dst,
210  const InputViewType& src);
211  };
212 
214  template<class OutputViewType,
215  class InputViewType,
216  const bool outputExecSpaceCanAccessInputMemSpace>
217  struct CopyConvertImpl<OutputViewType, InputViewType,
218  true, outputExecSpaceCanAccessInputMemSpace>
219  {
220  static void
221  run (const OutputViewType& dst,
222  const InputViewType& src)
223  {
224  // NOTE: It's important to do the addition _inside_ the
225  // reinterpret-cast. If you reinterpret_cast the separate
226  // results, you may get the wrong answer (e.g., because
227  // ptrdiff_t is signed, and pointers may have arbitrary 64-bit
228  // virtual addresses). I'm speaking from experience here.
229  const ptrdiff_t dst_beg =reinterpret_cast<ptrdiff_t> (dst.data ());
230  const ptrdiff_t dst_end =
231  reinterpret_cast<ptrdiff_t> (dst.data () + dst.span ());
232  const ptrdiff_t src_beg = reinterpret_cast<ptrdiff_t> (src.data ());
233  const ptrdiff_t src_end =
234  reinterpret_cast<ptrdiff_t> (src.data () + src.span ());
235 
236  if (dst_end > src_beg && src_end > dst_beg) {
237  // dst and src alias each other, so we can't call
238  // Kokkos::deep_copy(dst,src) directly (Kokkos detects this
239  // and throws, at least in debug mode). Instead, we make
240  // temporary host storage (create_mirror always makes a new
241  // allocation, unlike create_mirror_view). Use host because
242  // it's cheaper to allocate. Hopefully users aren't doing
243  // aliased copies in a tight loop.
244  auto src_copy = Kokkos::create_mirror (Kokkos::HostSpace (), src);
245  // DEEP_COPY REVIEW - NOT TESTED
246  Kokkos::deep_copy (src_copy, src);
247  // DEEP_COPY REVIEW - NOT TESTED
248  Kokkos::deep_copy (dst, src_copy);
249  }
250  else { // no aliasing
251  // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
252  using execution_space = typename OutputViewType::execution_space;
253  Kokkos::deep_copy (execution_space(), dst, src);
254  }
255  }
256  };
257 
260  template<class OutputViewType,
261  class InputViewType>
262  struct CopyConvertImpl<OutputViewType,
263  InputViewType,
264  false,
265  true>
266  {
267  static void
268  run (const OutputViewType& dst,
269  const InputViewType& src)
270  {
271  using functor_type = CopyConvertFunctor<OutputViewType, InputViewType>;
272  using execution_space = typename OutputViewType::execution_space;
273  using index_type = typename OutputViewType::size_type;
274  using range_type = Kokkos::RangePolicy<execution_space, index_type>;
275  Kokkos::parallel_for ("Tpetra::Details::copyConvert",
276  range_type (0, dst.extent (0)),
277  functor_type (dst, src));
278  }
279  };
280 
287  template<class OutputViewType,
288  class InputViewType>
289  struct CopyConvertImpl<OutputViewType, InputViewType, false, false>
290  {
291  static void
292  run (const OutputViewType& dst,
293  const InputViewType& src)
294  {
295  using output_memory_space = typename OutputViewType::memory_space;
296  using output_execution_space = typename OutputViewType::execution_space;
297  auto src_outputSpaceCopy =
298  Kokkos::create_mirror_view (output_memory_space (), src);
299  // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
300  Kokkos::deep_copy (output_execution_space(), src_outputSpaceCopy, src);
301 
302  // The output View's execution space can access
303  // outputSpaceCopy's data, so we can run the functor now.
304  using output_space_copy_type = decltype (src_outputSpaceCopy);
305  using functor_type =
306  CopyConvertFunctor<OutputViewType, output_space_copy_type>;
307  using execution_space = typename OutputViewType::execution_space;
308  using index_type = typename OutputViewType::size_type;
309  using range_type = Kokkos::RangePolicy<execution_space, index_type>;
310  Kokkos::parallel_for ("Tpetra::Details::copyConvert",
311  range_type (0, dst.extent (0)),
312  functor_type (dst, src_outputSpaceCopy));
313  }
314  };
315 } // namespace (anonymous)
316 
325 template<class OutputViewType,
326  class InputViewType>
327 void
328 copyConvert (const OutputViewType& dst,
329  const InputViewType& src)
330 {
331  static_assert (Kokkos::is_view<OutputViewType>::value,
332  "OutputViewType must be a Kokkos::View.");
333  static_assert (Kokkos::is_view<InputViewType>::value,
334  "InputViewType must be a Kokkos::View.");
335  static_assert (std::is_same<typename OutputViewType::value_type,
336  typename OutputViewType::non_const_value_type>::value,
337  "OutputViewType must be a nonconst Kokkos::View.");
338  static_assert (static_cast<int> (OutputViewType::rank) ==
339  static_cast<int> (InputViewType::rank),
340  "src and dst must have the same rank.");
341 
342  if (dst.extent (0) != src.extent (0)) {
343  std::ostringstream os;
344  os << "Tpetra::Details::copyConvert: "
345  << "dst.extent(0) = " << dst.extent (0)
346  << " != src.extent(0) = " << src.extent (0)
347  << ".";
348  throw std::invalid_argument (os.str ());
349  }
350  if (static_cast<int> (OutputViewType::rank) > 1 &&
351  dst.extent (1) != src.extent (1)) {
352  std::ostringstream os;
353  os << "Tpetra::Details::copyConvert: "
354  << "dst.extent(1) = " << dst.extent (1)
355  << " != src.extent(1) = " << src.extent (1)
356  << ".";
357  throw std::invalid_argument (os.str ());
358  }
359 
360  // Canonicalize the View types in order to avoid redundant instantiations.
361  using output_view_type =
362  Kokkos::View<typename OutputViewType::non_const_data_type,
363  typename OutputViewType::array_layout,
364  typename OutputViewType::device_type>;
365  using input_view_type =
366  Kokkos::View<typename InputViewType::const_data_type,
367  typename InputViewType::array_layout,
368  typename InputViewType::device_type>;
369  CopyConvertImpl<output_view_type, input_view_type>::run (dst, src);
370 }
371 
372 } // namespace Details
373 } // namespace Tpetra
374 
375 #endif // TPETRA_DETAILS_COPYCONVERT_HPP
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
void copyConvert(const OutputViewType &dst, const InputViewType &src)
Copy values from the 1-D Kokkos::View src, to the 1-D Kokkos::View dst, of the same length...