Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_copyConvert.hpp
Go to the documentation of this file.
1 /*
2 // @HEADER
3 // ***********************************************************************
4 //
5 // Tpetra: Templated Linear Algebra Services Package
6 // Copyright (2008) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ************************************************************************
41 // @HEADER
42 */
43 
44 #ifndef TPETRA_DETAILS_COPYCONVERT_HPP
45 #define TPETRA_DETAILS_COPYCONVERT_HPP
46 
51 
52 #include "TpetraCore_config.h"
53 #include "Kokkos_Core.hpp"
54 #include "Kokkos_ArithTraits.hpp"
55 #include <sstream>
56 #include <stdexcept>
57 #include <type_traits>
58 
59 namespace Tpetra {
60 namespace Details {
61 
62 //
63 // Implementation details for copyConvert (see below).
64 // Users should skip over this anonymous namespace.
65 //
66 namespace { // (anonymous)
67 
68  // We need separate implementations for both (T,complex) and
69  // (complex,T), but we can't just overload for both cases, because
70  // that would be ambiguous (e.g., (complex,complex)).
71  template<class OutputValueType,
72  class InputValueType,
73  const bool outputIsComplex =
74  Kokkos::ArithTraits<OutputValueType>::is_complex,
75  const bool inputIsComplex =
76  Kokkos::ArithTraits<InputValueType>::is_complex>
77  struct ConvertValue
78  {
79  static KOKKOS_INLINE_FUNCTION void
80  convert (OutputValueType& dst, const InputValueType& src)
81  {
82  // This looks trivial, but it actually invokes OutputValueType's
83  // constructor, so that needs to be marked as a __host__
84  // __device__ function (e.g., via the KOKKOS_FUNCTION or
85  // KOKKOS_INLINE_FUNCTION macros).
86  dst = OutputValueType (src);
87  }
88  };
89 
90  template<class OutputRealType, class InputComplexType>
91  struct ConvertValue<OutputRealType, InputComplexType, false, true>
92  {
93  static KOKKOS_INLINE_FUNCTION void
94  convert (OutputRealType& dst,
95  const InputComplexType& src)
96  {
97  // OutputRealType's constructor needs to be marked with either
98  // KOKKOS_FUNCTION or KOKKOS_INLINE_FUNCTION.
99  using KAI = Kokkos::ArithTraits<InputComplexType>;
100  dst = OutputRealType (KAI::real (src));
101  }
102  };
103 
104  template<class OutputComplexType, class InputRealType>
105  struct ConvertValue<OutputComplexType, InputRealType, true, false>
106  {
107  static KOKKOS_INLINE_FUNCTION void
108  convert (OutputComplexType& dst,
109  const InputRealType& src)
110  {
111  // OutputComplexType's constructor needs to be marked with
112  // either KOKKOS_FUNCTION or KOKKOS_INLINE_FUNCTION.
113  using output_mag_type =
114  typename Kokkos::ArithTraits<OutputComplexType>::mag_type;
115  using KAM = Kokkos::ArithTraits<output_mag_type>;
116  dst = OutputComplexType (src, KAM::zero ());
117  }
118  };
119 
120  template<class OutputValueType,
121  class InputValueType>
122  KOKKOS_INLINE_FUNCTION void
123  convertValue (OutputValueType& dst, const InputValueType& src) {
124  ConvertValue<OutputValueType, InputValueType>::convert (dst, src);
125  }
126 
131  template<class OutputViewType,
132  class InputViewType,
133  const int rank = static_cast<int> (OutputViewType::rank)>
134  class CopyConvertFunctor {};
135 
136  template<class OutputViewType,
137  class InputViewType>
138  class CopyConvertFunctor<OutputViewType, InputViewType, 1> {
139  private:
140  static_assert
141  (static_cast<int> (OutputViewType::rank) == 1 &&
142  static_cast<int> (InputViewType::rank) == 1,
143  "CopyConvertFunctor (implements Tpetra::Details::copyConvert): "
144  "OutputViewType and InputViewType must both have rank 1.");
145  OutputViewType dst_;
146  InputViewType src_;
147 
148  public:
149  using index_type = typename OutputViewType::size_type;
150 
151  CopyConvertFunctor (const OutputViewType& dst,
152  const InputViewType& src) :
153  dst_ (dst),
154  src_ (src)
155  {}
156 
157  KOKKOS_INLINE_FUNCTION void
158  operator () (const index_type i) const {
159  convertValue (dst_(i), src_(i));
160  }
161  };
162 
163  template<class OutputViewType,
164  class InputViewType>
165  class CopyConvertFunctor<OutputViewType, InputViewType, 2> {
166  public:
167  using index_type = typename OutputViewType::size_type;
168 
169  private:
170  static_assert
171  (static_cast<int> (OutputViewType::rank) == 2 &&
172  static_cast<int> (InputViewType::rank) == 2,
173  "CopyConvertFunctor (implements Tpetra::Details::copyConvert): "
174  "OutputViewType and InputViewType must both have rank 2.");
175  OutputViewType dst_;
176  InputViewType src_;
177  index_type numCols_;
178 
179  public:
180  CopyConvertFunctor (const OutputViewType& dst,
181  const InputViewType& src) :
182  dst_ (dst),
183  src_ (src),
184  numCols_ (dst.extent (1))
185  {}
186 
187  KOKKOS_INLINE_FUNCTION void
188  operator () (const index_type i) const {
189  const index_type numCols = numCols_;
190  for (index_type j = 0; j < numCols; ++j) {
191  convertValue (dst_(i,j), src_(i,j));
192  }
193  }
194  };
195 
197  template<class OutputViewType, class InputViewType>
198  class CanUseKokkosDeepCopy {
199  private:
200  static constexpr bool sameValueType =
201  std::is_same<typename OutputViewType::non_const_value_type,
202  typename InputViewType::non_const_value_type>::value;
203  static constexpr bool sameMemorySpace =
204  std::is_same<typename OutputViewType::memory_space,
205  typename InputViewType::memory_space>::value;
206  static constexpr bool sameLayout =
207  std::is_same<typename OutputViewType::array_layout,
208  typename InputViewType::array_layout>::value;
209 
210  public:
211  static constexpr bool value =
212  sameValueType && (sameMemorySpace || sameLayout);
213  };
214 
233  template<class OutputViewType,
234  class InputViewType,
235  const bool canUseKokkosDeepCopy =
236  CanUseKokkosDeepCopy<OutputViewType, InputViewType>::value,
237  const bool outputExecSpaceCanAccessInputMemSpace =
238  Kokkos::SpaceAccessibility<
239  typename OutputViewType::memory_space,
240  typename InputViewType::memory_space>::accessible>
241  struct CopyConvertImpl {
242  static void
243  run (const OutputViewType& dst,
244  const InputViewType& src);
245  };
246 
248  template<class OutputViewType,
249  class InputViewType,
250  const bool outputExecSpaceCanAccessInputMemSpace>
251  struct CopyConvertImpl<OutputViewType, InputViewType,
252  true, outputExecSpaceCanAccessInputMemSpace>
253  {
254  static void
255  run (const OutputViewType& dst,
256  const InputViewType& src)
257  {
258  // NOTE: It's important to do the addition _inside_ the
259  // reinterpret-cast. If you reinterpret_cast the separate
260  // results, you may get the wrong answer (e.g., because
261  // ptrdiff_t is signed, and pointers may have arbitrary 64-bit
262  // virtual addresses). I'm speaking from experience here.
263  const ptrdiff_t dst_beg =reinterpret_cast<ptrdiff_t> (dst.data ());
264  const ptrdiff_t dst_end =
265  reinterpret_cast<ptrdiff_t> (dst.data () + dst.span ());
266  const ptrdiff_t src_beg = reinterpret_cast<ptrdiff_t> (src.data ());
267  const ptrdiff_t src_end =
268  reinterpret_cast<ptrdiff_t> (src.data () + src.span ());
269 
270  if (dst_end > src_beg && src_end > dst_beg) {
271  // dst and src alias each other, so we can't call
272  // Kokkos::deep_copy(dst,src) directly (Kokkos detects this
273  // and throws, at least in debug mode). Instead, we make
274  // temporary host storage (create_mirror always makes a new
275  // allocation, unlike create_mirror_view). Use host because
276  // it's cheaper to allocate. Hopefully users aren't doing
277  // aliased copies in a tight loop.
278  auto src_copy = Kokkos::create_mirror (Kokkos::HostSpace (), src);
279  // DEEP_COPY REVIEW - NOT TESTED
280  Kokkos::deep_copy (src_copy, src);
281  // DEEP_COPY REVIEW - NOT TESTED
282  Kokkos::deep_copy (dst, src_copy);
283  }
284  else { // no aliasing
285  // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
286  using execution_space = typename OutputViewType::execution_space;
287  Kokkos::deep_copy (execution_space(), dst, src);
288  }
289  }
290  };
291 
294  template<class OutputViewType,
295  class InputViewType>
296  struct CopyConvertImpl<OutputViewType,
297  InputViewType,
298  false,
299  true>
300  {
301  static void
302  run (const OutputViewType& dst,
303  const InputViewType& src)
304  {
305  using functor_type = CopyConvertFunctor<OutputViewType, InputViewType>;
306  using execution_space = typename OutputViewType::execution_space;
307  using index_type = typename OutputViewType::size_type;
308  using range_type = Kokkos::RangePolicy<execution_space, index_type>;
309  Kokkos::parallel_for ("Tpetra::Details::copyConvert",
310  range_type (0, dst.extent (0)),
311  functor_type (dst, src));
312  }
313  };
314 
321  template<class OutputViewType,
322  class InputViewType>
323  struct CopyConvertImpl<OutputViewType, InputViewType, false, false>
324  {
325  static void
326  run (const OutputViewType& dst,
327  const InputViewType& src)
328  {
329  using output_memory_space = typename OutputViewType::memory_space;
330  using output_execution_space = typename OutputViewType::execution_space;
331  auto src_outputSpaceCopy =
332  Kokkos::create_mirror_view (output_memory_space (), src);
333  // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
334  Kokkos::deep_copy (output_execution_space(), src_outputSpaceCopy, src);
335 
336  // The output View's execution space can access
337  // outputSpaceCopy's data, so we can run the functor now.
338  using output_space_copy_type = decltype (src_outputSpaceCopy);
339  using functor_type =
340  CopyConvertFunctor<OutputViewType, output_space_copy_type>;
341  using execution_space = typename OutputViewType::execution_space;
342  using index_type = typename OutputViewType::size_type;
343  using range_type = Kokkos::RangePolicy<execution_space, index_type>;
344  Kokkos::parallel_for ("Tpetra::Details::copyConvert",
345  range_type (0, dst.extent (0)),
346  functor_type (dst, src_outputSpaceCopy));
347  }
348  };
349 } // namespace (anonymous)
350 
359 template<class OutputViewType,
360  class InputViewType>
361 void
362 copyConvert (const OutputViewType& dst,
363  const InputViewType& src)
364 {
365  static_assert (Kokkos::is_view<OutputViewType>::value,
366  "OutputViewType must be a Kokkos::View.");
367  static_assert (Kokkos::is_view<InputViewType>::value,
368  "InputViewType must be a Kokkos::View.");
369  static_assert (std::is_same<typename OutputViewType::value_type,
370  typename OutputViewType::non_const_value_type>::value,
371  "OutputViewType must be a nonconst Kokkos::View.");
372  static_assert (static_cast<int> (OutputViewType::rank) ==
373  static_cast<int> (InputViewType::rank),
374  "src and dst must have the same rank.");
375 
376  if (dst.extent (0) != src.extent (0)) {
377  std::ostringstream os;
378  os << "Tpetra::Details::copyConvert: "
379  << "dst.extent(0) = " << dst.extent (0)
380  << " != src.extent(0) = " << src.extent (0)
381  << ".";
382  throw std::invalid_argument (os.str ());
383  }
384  if (static_cast<int> (OutputViewType::rank) > 1 &&
385  dst.extent (1) != src.extent (1)) {
386  std::ostringstream os;
387  os << "Tpetra::Details::copyConvert: "
388  << "dst.extent(1) = " << dst.extent (1)
389  << " != src.extent(1) = " << src.extent (1)
390  << ".";
391  throw std::invalid_argument (os.str ());
392  }
393 
394  // Canonicalize the View types in order to avoid redundant instantiations.
395  using output_view_type =
396  Kokkos::View<typename OutputViewType::non_const_data_type,
397  typename OutputViewType::array_layout,
398  typename OutputViewType::device_type>;
399  using input_view_type =
400  Kokkos::View<typename InputViewType::const_data_type,
401  typename InputViewType::array_layout,
402  typename InputViewType::device_type>;
403  CopyConvertImpl<output_view_type, input_view_type>::run (dst, src);
404 }
405 
406 } // namespace Details
407 } // namespace Tpetra
408 
409 #endif // TPETRA_DETAILS_COPYCONVERT_HPP
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
void copyConvert(const OutputViewType &dst, const InputViewType &src)
Copy values from the 1-D Kokkos::View src, to the 1-D Kokkos::View dst, of the same length...