Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Tpetra_Details_copyConvert.hpp
1 /*
2 // @HEADER
3 // ***********************************************************************
4 //
5 // Tpetra: Templated Linear Algebra Services Package
6 // Copyright (2008) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ************************************************************************
41 // @HEADER
42 */
43 
44 #ifndef TPETRA_DETAILS_COPYCONVERT_HPP
45 #define TPETRA_DETAILS_COPYCONVERT_HPP
46 
51 
52 #include "TpetraCore_config.h"
53 #include "Kokkos_Core.hpp"
54 #include "Kokkos_ArithTraits.hpp"
55 #include <sstream>
56 #include <stdexcept>
57 #include <type_traits>
58 
59 namespace Tpetra {
60 namespace Details {
61 
62 //
63 // Implementation details for copyConvert (see below).
64 // Users should skip over this anonymous namespace.
65 //
66 namespace { // (anonymous)
67 
68  // We need separate implementations for both (T,complex) and
69  // (complex,T), but we can't just overload for both cases, because
70  // that would be ambiguous (e.g., (complex,complex)).
71  template<class OutputValueType,
72  class InputValueType,
73  const bool outputIsComplex =
74  Kokkos::ArithTraits<OutputValueType>::is_complex,
75  const bool inputIsComplex =
76  Kokkos::ArithTraits<InputValueType>::is_complex>
77  struct ConvertValue
78  {
79  static KOKKOS_INLINE_FUNCTION void
80  convert (OutputValueType& dst, const InputValueType& src)
81  {
82  // This looks trivial, but it actually invokes OutputValueType's
83  // constructor, so that needs to be marked as a __host__
84  // __device__ function (e.g., via the KOKKOS_FUNCTION or
85  // KOKKOS_INLINE_FUNCTION macros).
86  dst = OutputValueType (src);
87  }
88  };
89 
90  template<class OutputRealType, class InputComplexType>
91  struct ConvertValue<OutputRealType, InputComplexType, false, true>
92  {
93  static KOKKOS_INLINE_FUNCTION void
94  convert (OutputRealType& dst,
95  const InputComplexType& src)
96  {
97  // OutputRealType's constructor needs to be marked with either
98  // KOKKOS_FUNCTION or KOKKOS_INLINE_FUNCTION.
99  using KAI = Kokkos::ArithTraits<InputComplexType>;
100  dst = OutputRealType (KAI::real (src));
101  }
102  };
103 
104  template<class OutputComplexType, class InputRealType>
105  struct ConvertValue<OutputComplexType, InputRealType, true, false>
106  {
107  static KOKKOS_INLINE_FUNCTION void
108  convert (OutputComplexType& dst,
109  const InputRealType& src)
110  {
111  // OutputComplexType's constructor needs to be marked with
112  // either KOKKOS_FUNCTION or KOKKOS_INLINE_FUNCTION.
113  using output_mag_type =
114  typename Kokkos::ArithTraits<OutputComplexType>::mag_type;
115  using KAM = Kokkos::ArithTraits<output_mag_type>;
116  dst = OutputComplexType (src, KAM::zero ());
117  }
118  };
119 
120  template<class OutputValueType,
121  class InputValueType>
122  KOKKOS_INLINE_FUNCTION void
123  convertValue (OutputValueType& dst, const InputValueType& src) {
124  ConvertValue<OutputValueType, InputValueType>::convert (dst, src);
125  }
126 
131  template<class OutputViewType,
132  class InputViewType,
133  const int rank = static_cast<int> (OutputViewType::Rank)>
134  class CopyConvertFunctor {};
135 
136  template<class OutputViewType,
137  class InputViewType>
138  class CopyConvertFunctor<OutputViewType, InputViewType, 1> {
139  private:
140  static_assert
141  (static_cast<int> (OutputViewType::Rank) == 1 &&
142  static_cast<int> (InputViewType::Rank) == 1,
143  "CopyConvertFunctor (implements Tpetra::Details::copyConvert): "
144  "OutputViewType and InputViewType must both have rank 1.");
145  OutputViewType dst_;
146  InputViewType src_;
147 
148  public:
149  using index_type = typename OutputViewType::size_type;
150 
151  CopyConvertFunctor (const OutputViewType& dst,
152  const InputViewType& src) :
153  dst_ (dst),
154  src_ (src)
155  {}
156 
157  KOKKOS_INLINE_FUNCTION void
158  operator () (const index_type i) const {
159  convertValue (dst_(i), src_(i));
160  }
161  };
162 
163  template<class OutputViewType,
164  class InputViewType>
165  class CopyConvertFunctor<OutputViewType, InputViewType, 2> {
166  public:
167  using index_type = typename OutputViewType::size_type;
168 
169  private:
170  static_assert
171  (static_cast<int> (OutputViewType::Rank) == 2 &&
172  static_cast<int> (InputViewType::Rank) == 2,
173  "CopyConvertFunctor (implements Tpetra::Details::copyConvert): "
174  "OutputViewType and InputViewType must both have rank 2.");
175  OutputViewType dst_;
176  InputViewType src_;
177  index_type numCols_;
178 
179  public:
180  CopyConvertFunctor (const OutputViewType& dst,
181  const InputViewType& src) :
182  dst_ (dst),
183  src_ (src),
184  numCols_ (dst.extent (1))
185  {}
186 
187  KOKKOS_INLINE_FUNCTION void
188  operator () (const index_type i) const {
189  const index_type numCols = numCols_;
190  for (index_type j = 0; j < numCols; ++j) {
191  convertValue (dst_(i,j), src_(i,j));
192  }
193  }
194  };
195 
197  template<class OutputViewType, class InputViewType>
198  class CanUseKokkosDeepCopy {
199  private:
200  static constexpr bool sameValueType =
201  std::is_same<typename OutputViewType::non_const_value_type,
202  typename InputViewType::non_const_value_type>::value;
203  static constexpr bool sameMemorySpace =
204  std::is_same<typename OutputViewType::memory_space,
205  typename InputViewType::memory_space>::value;
206  static constexpr bool sameLayout =
207  std::is_same<typename OutputViewType::array_layout,
208  typename InputViewType::array_layout>::value;
209 
210  public:
211  static constexpr bool value =
212  sameValueType && (sameMemorySpace || sameLayout);
213  };
214 
236  template<class OutputViewType,
237  class InputViewType,
238  const bool canUseKokkosDeepCopy =
239  CanUseKokkosDeepCopy<OutputViewType, InputViewType>::value,
240  const bool outputExecSpaceCanAccessInputMemSpace =
241  Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
242  typename OutputViewType::memory_space,
243  typename InputViewType::memory_space>::value>
244  struct CopyConvertImpl {
245  static void
246  run (const OutputViewType& dst,
247  const InputViewType& src);
248  };
249 
251  template<class OutputViewType,
252  class InputViewType,
253  const bool outputExecSpaceCanAccessInputMemSpace>
254  struct CopyConvertImpl<OutputViewType, InputViewType,
255  true, outputExecSpaceCanAccessInputMemSpace>
256  {
257  static void
258  run (const OutputViewType& dst,
259  const InputViewType& src)
260  {
261  // NOTE: It's important to do the addition _inside_ the
262  // reinterpret-cast. If you reinterpret_cast the separate
263  // results, you may get the wrong answer (e.g., because
264  // ptrdiff_t is signed, and pointers may have arbitrary 64-bit
265  // virtual addresses). I'm speaking from experience here.
266  const ptrdiff_t dst_beg =reinterpret_cast<ptrdiff_t> (dst.data ());
267  const ptrdiff_t dst_end =
268  reinterpret_cast<ptrdiff_t> (dst.data () + dst.span ());
269  const ptrdiff_t src_beg = reinterpret_cast<ptrdiff_t> (src.data ());
270  const ptrdiff_t src_end =
271  reinterpret_cast<ptrdiff_t> (src.data () + src.span ());
272 
273  if (dst_end > src_beg && src_end > dst_beg) {
274  // dst and src alias each other, so we can't call
275  // Kokkos::deep_copy(dst,src) directly (Kokkos detects this
276  // and throws, at least in debug mode). Instead, we make
277  // temporary host storage (create_mirror always makes a new
278  // allocation, unlike create_mirror_view). Use host because
279  // it's cheaper to allocate. Hopefully users aren't doing
280  // aliased copies in a tight loop.
281  auto src_copy = Kokkos::create_mirror (Kokkos::HostSpace (), src);
282  Kokkos::deep_copy (src_copy, src);
283  Kokkos::deep_copy (dst, src_copy);
284  }
285  else { // no aliasing
286  Kokkos::deep_copy (dst, src);
287  }
288  }
289  };
290 
293  template<class OutputViewType,
294  class InputViewType>
295  struct CopyConvertImpl<OutputViewType,
296  InputViewType,
297  false,
298  true>
299  {
300  static void
301  run (const OutputViewType& dst,
302  const InputViewType& src)
303  {
304  using functor_type = CopyConvertFunctor<OutputViewType, InputViewType>;
305  using execution_space = typename OutputViewType::execution_space;
306  using index_type = typename OutputViewType::size_type;
307  using range_type = Kokkos::RangePolicy<execution_space, index_type>;
308  Kokkos::parallel_for ("Tpetra::Details::copyConvert",
309  range_type (0, dst.extent (0)),
310  functor_type (dst, src));
311  }
312  };
313 
320  template<class OutputViewType,
321  class InputViewType>
322  struct CopyConvertImpl<OutputViewType, InputViewType, false, false>
323  {
324  static void
325  run (const OutputViewType& dst,
326  const InputViewType& src)
327  {
328  using output_memory_space = typename OutputViewType::memory_space;
329  auto src_outputSpaceCopy =
330  Kokkos::create_mirror_view (output_memory_space (), src);
331  Kokkos::deep_copy (src_outputSpaceCopy, src);
332 
333  // The output View's execution space can access
334  // outputSpaceCopy's data, so we can run the functor now.
335  using output_space_copy_type = decltype (src_outputSpaceCopy);
336  using functor_type =
337  CopyConvertFunctor<OutputViewType, output_space_copy_type>;
338  using execution_space = typename OutputViewType::execution_space;
339  using index_type = typename OutputViewType::size_type;
340  using range_type = Kokkos::RangePolicy<execution_space, index_type>;
341  Kokkos::parallel_for ("Tpetra::Details::copyConvert",
342  range_type (0, dst.extent (0)),
343  functor_type (dst, src_outputSpaceCopy));
344  }
345  };
346 } // namespace (anonymous)
347 
356 template<class OutputViewType,
357  class InputViewType>
358 void
359 copyConvert (const OutputViewType& dst,
360  const InputViewType& src)
361 {
362  static_assert (Kokkos::Impl::is_view<OutputViewType>::value,
363  "OutputViewType must be a Kokkos::View.");
364  static_assert (Kokkos::Impl::is_view<InputViewType>::value,
365  "InputViewType must be a Kokkos::View.");
366  static_assert (std::is_same<typename OutputViewType::value_type,
367  typename OutputViewType::non_const_value_type>::value,
368  "OutputViewType must be a nonconst Kokkos::View.");
369  static_assert (static_cast<int> (OutputViewType::Rank) ==
370  static_cast<int> (InputViewType::Rank),
371  "src and dst must have the same rank.");
372 
373  if (dst.extent (0) != src.extent (0)) {
374  std::ostringstream os;
375  os << "Tpetra::Details::copyConvert: "
376  << "dst.extent(0) = " << dst.extent (0)
377  << " != src.extent(0) = " << src.extent (0)
378  << ".";
379  throw std::invalid_argument (os.str ());
380  }
381  if (static_cast<int> (OutputViewType::Rank) > 1 &&
382  dst.extent (1) != src.extent (1)) {
383  std::ostringstream os;
384  os << "Tpetra::Details::copyConvert: "
385  << "dst.extent(1) = " << dst.extent (1)
386  << " != src.extent(1) = " << src.extent (1)
387  << ".";
388  throw std::invalid_argument (os.str ());
389  }
390 
391  // Canonicalize the View types in order to avoid redundant instantiations.
392  using output_view_type =
393  Kokkos::View<typename OutputViewType::non_const_data_type,
394  typename OutputViewType::array_layout,
395  typename OutputViewType::device_type>;
396  using input_view_type =
397  Kokkos::View<typename InputViewType::const_data_type,
398  typename InputViewType::array_layout,
399  typename InputViewType::device_type>;
400  CopyConvertImpl<output_view_type, input_view_type>::run (dst, src);
401 }
402 
403 } // namespace Details
404 } // namespace Tpetra
405 
406 #endif // TPETRA_DETAILS_COPYCONVERT_HPP
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
void copyConvert(const OutputViewType &dst, const InputViewType &src)
Copy values from the 1-D Kokkos::View src, to the 1-D Kokkos::View dst, of the same length...