Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Tpetra_Details_copyOffsets.hpp
Go to the documentation of this file.
1 /*
2 // @HEADER
3 // ***********************************************************************
4 //
5 // Tpetra: Templated Linear Algebra Services Package
6 // Copyright (2008) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ************************************************************************
41 // @HEADER
42 */
43 
44 #ifndef TPETRA_DETAILS_COPYOFFSETS_HPP
45 #define TPETRA_DETAILS_COPYOFFSETS_HPP
46 
51 
52 #include "TpetraCore_config.h"
53 #include "Kokkos_Core.hpp"
54 #include <limits>
55 #include <type_traits>
56 
57 namespace Tpetra {
58 namespace Details {
59 
60 //
61 // Implementation details for copyOffsets (see below).
62 // Users should skip over this anonymous namespace.
63 //
64 namespace { // (anonymous)
65 
66  // Implementation detail of copyOffsets (see below).
67  //
68  // Overflow is impossible (the output can fit the input) if the
69  // output type is bigger than the input type, or if the types have
70  // the same size and (the output type is unsigned, or both types are
71  // signed).
72  //
73  // Implicit here is the assumption that both input and output types
74  // are integers.
75  template<class T1, class T2,
76  const bool T1_is_signed = std::is_signed<T1>::value,
77  const bool T2_is_signed = std::is_signed<T2>::value>
78  struct OutputCanFitInput {
79  static const bool value = sizeof (T1) > sizeof (T2) ||
80  (sizeof (T1) == sizeof (T2) &&
81  (std::is_unsigned<T1>::value || (std::is_signed<T1>::value && std::is_signed<T2>::value)));
82  };
83 
84  // Implementation detail of copyOffsets (see below).
85  //
86  // Kokkos parallel_reduce functor for copying offset ("ptr") arrays.
87  // Tpetra::Details::FixedHashTable uses this in its "copy"
88  // constructor for converting between different Device types. All
89  // the action happens in the partial specializations for different
90  // values of outputCanFitInput. "Output can fit input" means that
91  // casting the input's value type to the output's value type will
92  // never result in integer overflow.
93  template<class OutputViewType,
94  class InputViewType,
95  const bool outputCanFitInput =
96  OutputCanFitInput<typename OutputViewType::non_const_value_type,
97  typename InputViewType::non_const_value_type>::value>
98  class CopyOffsetsFunctor {};
99 
100  // Specialization for when overflow is possible.
101  template<class OutputViewType, class InputViewType>
102  class CopyOffsetsFunctor<OutputViewType, InputViewType, false> {
103  public:
104  typedef typename OutputViewType::execution_space execution_space;
105  typedef typename OutputViewType::size_type size_type;
106  typedef int value_type;
107 
108  typedef typename InputViewType::non_const_value_type input_value_type;
109  typedef typename OutputViewType::non_const_value_type output_value_type;
110 
111  CopyOffsetsFunctor (const OutputViewType& dst, const InputViewType& src) :
112  dst_ (dst),
113  src_ (src),
114  // We know that output_value_type cannot fit all values of
115  // input_value_type, so an input_value_type can fit all values
116  // of output_value_type. This means we can convert from
117  // output_value_type to input_value_type. This is how we test
118  // whether a given input_value_type value can fit in an
119  // output_value_type.
120  minDstVal_ (static_cast<input_value_type> (std::numeric_limits<output_value_type>::min ())),
121  maxDstVal_ (static_cast<input_value_type> (std::numeric_limits<output_value_type>::max ()))
122  {
123  // NOTE (mfh 29 Jan 2016): See kokkos/kokkos#178 for why we use
124  // a memory space, rather than an execution space, as the first
125  // argument of VerifyExecutionCanAccessMemorySpace.
126  static_assert (Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
127  typename OutputViewType::memory_space,
128  typename InputViewType::memory_space>::value,
129  "CopyOffsetsFunctor (implements copyOffsets): Output "
130  "View's space must be able to access the input View's "
131  "memory space.");
132  }
133 
134  KOKKOS_INLINE_FUNCTION void
135  operator () (const size_type& i, value_type& noOverflow) const {
136  const input_value_type src_i = src_(i);
137  if (src_i < minDstVal_ || src_i > maxDstVal_) {
138  noOverflow = 0;
139  }
140  dst_(i) = static_cast<output_value_type> (src_i);
141  }
142 
143  KOKKOS_INLINE_FUNCTION void init (value_type& noOverflow) const {
144  noOverflow = 1; // success (no overflow)
145  }
146 
147  KOKKOS_INLINE_FUNCTION void
148  join (volatile value_type& result,
149  const volatile value_type& current) const {
150  result = (result>0 && current>0)?1:0; // was there any overflow?
151  }
152 
153  private:
154  OutputViewType dst_;
155  InputViewType src_;
156  input_value_type minDstVal_;
157  input_value_type maxDstVal_;
158  };
159 
160  // Specialization for when overflow is impossible.
161  template<class OutputViewType, class InputViewType>
162  class CopyOffsetsFunctor<OutputViewType, InputViewType, true> {
163  public:
164  typedef typename OutputViewType::execution_space execution_space;
165  typedef typename OutputViewType::size_type size_type;
166  typedef int value_type;
167 
168  CopyOffsetsFunctor (const OutputViewType& dst, const InputViewType& src) :
169  dst_ (dst),
170  src_ (src)
171  {
172  // NOTE (mfh 29 Jan 2016): See kokkos/kokkos#178 for why we use
173  // a memory space, rather than an execution space, as the first
174  // argument of VerifyExecutionCanAccessMemorySpace.
175  static_assert (Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
176  typename OutputViewType::memory_space,
177  typename InputViewType::memory_space>::value,
178  "CopyOffsetsFunctor (implements copyOffsets): Output "
179  "View's space must be able to access the input View's "
180  "memory space.");
181  }
182 
183  KOKKOS_INLINE_FUNCTION void
184  operator () (const size_type& i, value_type& /* noOverflow */) const {
185  // Overflow is impossible in this case, so there's no need to check.
186  dst_(i) = src_(i);
187  }
188 
189  KOKKOS_INLINE_FUNCTION void init (value_type& noOverflow) const {
190  noOverflow = 1; // success (no overflow)
191  }
192 
193  KOKKOS_INLINE_FUNCTION void
194  join (volatile value_type& result,
195  const volatile value_type& current) const {
196  result = (result>0 && current>0)?1:0; // was there any overflow?
197  }
198 
199  private:
200  OutputViewType dst_;
201  InputViewType src_;
202  };
203 
204  // Implementation detail of copyOffsets (see below).
205  //
206  // We specialize copyOffsets on two different conditions:
207  //
208  // 1. Are the two Views' layouts the same, and do the input and
209  // output Views have the same value type?
210  // 2. Can the output View's execution space access the input View's
211  // memory space?
212  //
213  // If (1) is true, that makes the implementation simple: just call
214  // Kokkos::deep_copy (FixedHashTable always uses the same layout, no
215  // matter the device type). Otherwise, we need a custom copy
216  // functor. If (2) is true, then we can use CopyOffsetsFunctor
217  // directly. Otherwise, we have to copy the input View into the
218  // output View's memory space, before we can use the functor.
219  //
220  // NOTE (mfh 29 Jan 2016): See kokkos/kokkos#178 for why we use a
221  // memory space, rather than an execution space, as the first
222  // argument of VerifyExecutionCanAccessMemorySpace.
223  template<class OutputViewType,
224  class InputViewType,
225  const bool sameLayoutsSameOffsetTypes =
226  std::is_same<typename OutputViewType::array_layout,
227  typename InputViewType::array_layout>::value &&
228  std::is_same<typename OutputViewType::non_const_value_type,
229  typename InputViewType::non_const_value_type>::value,
230  const bool outputExecSpaceCanAccessInputMemSpace =
231  Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
232  typename OutputViewType::memory_space,
233  typename InputViewType::memory_space>::value>
234  struct CopyOffsetsImpl {
235  static void run (const OutputViewType& dst, const InputViewType& src);
236  };
237 
238  // Specialization for sameLayoutsSameOffsetTypes = true:
239  //
240  // If both input and output Views have the same layout, and both
241  // input and output use the same type for offsets, then we don't
242  // need to check for overflow, and we can use Kokkos::deep_copy
243  // directly. It doesn't matter whether the output execution space
244  // can access the input memory space: Kokkos::deep_copy takes care
245  // of the details.
246  template<class OutputViewType,
247  class InputViewType,
248  const bool outputExecSpaceCanAccessInputMemSpace>
249  struct CopyOffsetsImpl<OutputViewType, InputViewType,
250  true, outputExecSpaceCanAccessInputMemSpace> {
251  static void run (const OutputViewType& dst, const InputViewType& src) {
252  static_assert (std::is_same<typename OutputViewType::non_const_value_type,
253  typename InputViewType::non_const_value_type>::value,
254  "CopyOffsetsImpl (implementation of copyOffsets): In order"
255  " to call this specialization, the input and output must "
256  "use the same offset type.");
257  static_assert (static_cast<int> (OutputViewType::rank) ==
258  static_cast<int> (InputViewType::rank),
259  "CopyOffsetsImpl (implementation of copyOffsets): In order"
260  " to call this specialization, src and dst must have the "
261  "same rank.");
262  static_assert (std::is_same<typename OutputViewType::array_layout,
263  typename InputViewType::array_layout>::value,
264  "CopyOffsetsImpl (implementation of copyOffsets): In order"
265  " to call this specialization, src and dst must have the "
266  "the same array_layout.");
267  Kokkos::deep_copy (dst, src);
268  }
269  };
270 
271  // Specializations for sameLayoutsSameOffsetTypes = false:
272  //
273  // If input and output don't have the same layout, or use different
274  // types for offsets, then we can't use Kokkos::deep_copy directly,
275  // and we may have to check for overflow.
276 
277  // Specialization for sameLayoutsSameOffsetTypes = false and
278  // outputExecSpaceCanAccessInputMemSpace = true:
279  //
280  // If the output execution space can access the input memory space,
281  // then we can use CopyOffsetsFunctor directly.
282  template<class OutputViewType,
283  class InputViewType>
284  struct CopyOffsetsImpl<OutputViewType, InputViewType,
285  false, true> {
286  static void run (const OutputViewType& dst, const InputViewType& src) {
287  static_assert (static_cast<int> (OutputViewType::rank) ==
288  static_cast<int> (InputViewType::rank),
289  "CopyOffsetsImpl (implementation of copyOffsets): "
290  "src and dst must have the same rank.");
291  constexpr bool sameLayoutsSameOffsetTypes =
292  std::is_same<typename OutputViewType::array_layout,
293  typename InputViewType::array_layout>::value &&
294  std::is_same<typename OutputViewType::non_const_value_type,
295  typename InputViewType::non_const_value_type>::value;
296  static_assert (! sameLayoutsSameOffsetTypes,
297  "CopyOffsetsImpl (implements copyOffsets): In order to "
298  "call this specialization, sameLayoutsSameOffsetTypes "
299  "must be false. That is, either the input and output "
300  "must have different array layouts, or their value types "
301  "must differ.");
302  // NOTE (mfh 29 Jan 2016): See kokkos/kokkos#178 for why we use
303  // a memory space, rather than an execution space, as the first
304  // argument of VerifyExecutionCanAccessMemorySpace.
305  static_assert (Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
306  typename OutputViewType::memory_space,
307  typename InputViewType::memory_space>::value,
308  "CopyOffsetsImpl (implements copyOffsets): In order to "
309  "call this specialization, the output View's space must "
310  "be able to access the input View's memory space.");
311  typedef CopyOffsetsFunctor<OutputViewType, InputViewType> functor_type;
312  typedef typename OutputViewType::execution_space execution_space;
313  typedef typename OutputViewType::size_type size_type;
314  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
315 
316  int noOverflow = 0; // output argument of the reduction
317  Kokkos::parallel_reduce (range_type (0, dst.extent (0)),
318  functor_type (dst, src),
319  noOverflow);
320  TEUCHOS_TEST_FOR_EXCEPTION
321  (noOverflow==0, std::runtime_error, "copyOffsets: One or more values in "
322  "src were too big (in the sense of integer overflow) to fit in dst.");
323  }
324  };
325 
326  // Specialization for sameLayoutsSameOffsetTypes = false and
327  // outputExecSpaceCanAccessInputMemSpace = false.
328  //
329  // If the output execution space canNOT access the input memory
330  // space, then we can't use CopyOffsetsFunctor directly. Instead,
331  // tell Kokkos to copy the input View's data into the output View's
332  // memory space _first_. Since the offset types are different for
333  // this specialization, we can't just call Kokkos::deep_copy
334  // directly between the input and output Views of offsets; that
335  // wouldn't compile.
336  //
337  // This case can and does come up in practice: If the output View's
338  // execution space is Cuda, it cannot currently access host memory
339  // (that's the opposite direction from what UVM allows).
340  // Furthermore, that case specifically requires overflow checking,
341  // since (as of 28 Jan 2016 at least) Kokkos::Cuda uses a smaller
342  // offset type than Kokkos' host spaces.
343  template<class OutputViewType, class InputViewType>
344  struct CopyOffsetsImpl<OutputViewType, InputViewType,
345  false, false> {
346  static void run (const OutputViewType& dst, const InputViewType& src) {
347  static_assert (static_cast<int> (OutputViewType::rank) ==
348  static_cast<int> (InputViewType::rank),
349  "CopyOffsetsImpl (implementation of copyOffsets): In order"
350  " to call this specialization, src and dst must have the "
351  "same rank.");
352  constexpr bool sameLayoutsSameOffsetTypes =
353  std::is_same<typename OutputViewType::array_layout,
354  typename InputViewType::array_layout>::value &&
355  std::is_same<typename OutputViewType::non_const_value_type,
356  typename InputViewType::non_const_value_type>::value;
357  static_assert (! sameLayoutsSameOffsetTypes,
358  "CopyOffsetsImpl (implements copyOffsets): In order to "
359  "call this specialization, sameLayoutsSameOffsetTypes "
360  "must be false. That is, either the input and output "
361  "must have different array layouts, or their value types "
362  "must differ.");
363 
364  typedef Kokkos::View<typename InputViewType::non_const_value_type*,
365  Kokkos::LayoutLeft,
366  typename OutputViewType::device_type>
367  output_space_copy_type;
368  using Kokkos::ViewAllocateWithoutInitializing;
369  output_space_copy_type
370  outputSpaceCopy (ViewAllocateWithoutInitializing ("outputSpace"),
371  src.extent (0));
372  Kokkos::deep_copy (outputSpaceCopy, src);
373 
374  // The output View's execution space can access
375  // outputSpaceCopy's data, so we can run the functor now.
376  typedef CopyOffsetsFunctor<OutputViewType,
377  output_space_copy_type> functor_type;
378  typedef typename OutputViewType::execution_space execution_space;
379  typedef typename OutputViewType::size_type size_type;
380  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
381 
382  int noOverflow = 0;
383  Kokkos::parallel_reduce (range_type (0, dst.extent (0)),
384  functor_type (dst, outputSpaceCopy),
385  noOverflow);
386  TEUCHOS_TEST_FOR_EXCEPTION
387  (noOverflow==0, std::runtime_error, "copyOffsets: One or more values "
388  "in src were too big (in the sense of integer overflow) to fit in "
389  "dst.");
390  }
391  };
392 } // namespace (anonymous)
393 
405 template<class OutputViewType, class InputViewType>
406 void
407 copyOffsets (const OutputViewType& dst, const InputViewType& src)
408 {
409  static_assert (Kokkos::Impl::is_view<OutputViewType>::value,
410  "OutputViewType (the type of dst) must be a Kokkos::View.");
411  static_assert (Kokkos::Impl::is_view<InputViewType>::value,
412  "InputViewType (the type of src) must be a Kokkos::View.");
413  static_assert (std::is_same<typename OutputViewType::value_type,
414  typename OutputViewType::non_const_value_type>::value,
415  "OutputViewType (the type of dst) must be a nonconst Kokkos::View.");
416  static_assert (static_cast<int> (OutputViewType::rank) == 1,
417  "OutputViewType (the type of dst) must be a rank-1 Kokkos::View.");
418  static_assert (static_cast<int> (InputViewType::rank) == 1,
419  "InputViewType (the type of src) must be a rank-1 Kokkos::View.");
420  static_assert (std::is_integral<typename std::decay<decltype (dst(0)) >::type>::value,
421  "The entries of dst must be built-in integers.");
422  static_assert (std::is_integral<typename std::decay<decltype (src(0)) >::type>::value,
423  "The entries of src must be built-in integers.");
424 
425  TEUCHOS_TEST_FOR_EXCEPTION
426  (dst.extent (0) != src.extent (0), std::invalid_argument,
427  "copyOffsets: dst.extent(0) = " << dst.extent (0)
428  << " != src.extent(0) = " << src.extent (0) << ".");
429 
430  CopyOffsetsImpl<OutputViewType, InputViewType>::run (dst, src);
431 }
432 
433 } // namespace Details
434 } // namespace Tpetra
435 
436 #endif // TPETRA_DETAILS_COPYOFFSETS_HPP
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types...
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.