Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_copyOffsets.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_DETAILS_COPYOFFSETS_HPP
11 #define TPETRA_DETAILS_COPYOFFSETS_HPP
12 
17 
18 #include "TpetraCore_config.h"
20 #include "Kokkos_Core.hpp"
21 #include <limits>
22 #include <type_traits>
23 
24 namespace Tpetra {
25 namespace Details {
26 
27 //
28 // Implementation details for copyOffsets (see below).
29 // Users should skip over this anonymous namespace.
30 //
31 namespace { // (anonymous)
32 
33  // Implementation detail of copyOffsets (see below). Determines
34  // whether integer overflow is impossible on assignment from an
35  // InputType to an OutputType.
36  //
37  // Implicit here is the assumption that both input and output types
38  // are integers.
39  template<class OutputType, class InputType>
40  struct OutputCanFitInput {
41  private:
42  static constexpr bool output_signed = std::is_signed<OutputType>::value;
43  static constexpr bool input_signed = std::is_signed<InputType>::value;
44 
45  public:
46  static const bool value = sizeof (OutputType) > sizeof (InputType) ||
47  (sizeof (OutputType) == sizeof (InputType) &&
48  ! output_signed && input_signed);
49  };
50 
51  // Avoid warnings for "unsigned integer < 0" comparisons.
52  template<class InputType,
53  bool input_signed = std::is_signed<InputType>::value>
54  struct Negative {};
55 
56  template<class InputType>
57  struct Negative<InputType, true> {
58  static KOKKOS_INLINE_FUNCTION bool
59  negative (const InputType src) {
60  return src < InputType (0);
61  }
62  };
63 
64  template<class InputType>
65  struct Negative<InputType, false> {
66  static KOKKOS_INLINE_FUNCTION bool
67  negative (const InputType /* src */) {
68  return false;
69  }
70  };
71 
72  template<class InputType>
73  KOKKOS_INLINE_FUNCTION bool negative (const InputType src) {
74  return Negative<InputType>::negative (src);
75  }
76 
77  template<class OutputType, class InputType>
78  struct OverflowChecker {
79  private:
80  static constexpr bool output_signed = std::is_signed<OutputType>::value;
81  static constexpr bool input_signed = std::is_signed<InputType>::value;
82 
83  public:
84  // 1. Signed to unsigned could overflow due to negative numbers.
85  // 2. Larger to smaller could overflow.
86  // 3. Same size but unsigned to signed could overflow.
87  static constexpr bool could_overflow =
88  (! output_signed && input_signed) ||
89  (sizeof (OutputType) < sizeof (InputType)) ||
90  (sizeof (OutputType) == sizeof (InputType) &&
91  output_signed && ! input_signed);
92 
93  KOKKOS_INLINE_FUNCTION bool
94  overflows (const InputType src) const
95  {
96  if (! could_overflow) {
97  return false;
98  }
99  else {
100  // Signed to unsigned could overflow due to negative numbers.
101  if (! output_signed && input_signed) {
102  return negative (src);
103  }
104  // We're only comparing InputType with InputType here, so this
105  // should not emit warnings.
106  return src < minDstVal_ || src > maxDstVal_;
107  }
108  }
109 
110  private:
111  // If InputType is unsigned and OutputType is signed, casting max
112  // OutputType to InputType could overflow. See #5548.
113  InputType minDstVal_ = input_signed ?
114  std::numeric_limits<OutputType>::min () : OutputType (0);
115  InputType maxDstVal_ = std::numeric_limits<OutputType>::max ();
116  };
117 
118 
119  template<class OutputViewType, class InputViewType>
120  void
121  errorIfOverflow (const OutputViewType& dst,
122  const InputViewType& src,
123  const size_t overflowCount)
124  {
125  if (overflowCount == 0) {
126  return;
127  }
128 
129  std::ostringstream os;
130  const bool plural = overflowCount != size_t (1);
131  os << "copyOffsets: " << overflowCount << " value" <<
132  (plural ? "s" : "") << " in src were too big (in the "
133  "sense of integer overflow) to fit in dst.";
134 
135  const bool verbose = Details::Behavior::verbose ();
136  if (verbose) {
137  const size_t maxNumToPrint =
139  const size_t srcLen (src.extent (0));
140  if (srcLen <= maxNumToPrint) {
141  auto dst_h = Kokkos::create_mirror_view (dst);
142  auto src_h = Kokkos::create_mirror_view (src);
143  // DEEP_COPY REVIEW - NOT TESTED
144  Kokkos::deep_copy (src_h, src);
145  // DEEP_COPY REVIEW - NOT TESTED
146  Kokkos::deep_copy (dst_h, dst);
147 
148  os << " src: [";
149  for (size_t k = 0; k < srcLen; ++k) {
150  os << src_h[k];
151  if (k + size_t (1) < srcLen) {
152  os << ", ";
153  }
154  }
155  os << "], ";
156 
157  os << " dst: [";
158  for (size_t k = 0; k < srcLen; ++k) {
159  os << dst_h[k];
160  if (k + size_t (1) < srcLen) {
161  os << ", ";
162  }
163  }
164  os << "].";
165  }
166  else {
167  os << " src.extent(0) > " << maxNumToPrint << ", Tpetra's "
168  "verbose print count threshold. To increase this, set the "
169  "environment variable TPETRA_VERBOSE_PRINT_COUNT_THRESHOLD "
170  "to the desired threshold and rerun. You do NOT need to "
171  "rebuild Trilinos.";
172  }
173  }
174  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, os.str ());
175  }
176 
177  // Implementation detail of copyOffsets (see below).
178  //
179  // Kokkos parallel_reduce functor for copying offset ("ptr") arrays.
180  // Tpetra::Details::FixedHashTable uses this in its "copy"
181  // constructor for converting between different Device types. All
182  // the action happens in the partial specializations for different
183  // values of outputCanFitInput. "Output can fit input" means that
184  // casting the input's value type to the output's value type will
185  // never result in integer overflow.
186  template<class OutputViewType,
187  class InputViewType,
188  const bool outputCanFitInput =
189  OutputCanFitInput<typename OutputViewType::non_const_value_type,
190  typename InputViewType::non_const_value_type>::value>
191  class CopyOffsetsFunctor {};
192 
193  // Specialization for when overflow is possible.
194  template<class OutputViewType, class InputViewType>
195  class CopyOffsetsFunctor<OutputViewType, InputViewType, false> {
196  public:
197  using execution_space = typename OutputViewType::execution_space;
198  using size_type = typename OutputViewType::size_type;
199  using value_type = size_t;
200 
201  using input_value_type = typename InputViewType::non_const_value_type;
202  using output_value_type = typename OutputViewType::non_const_value_type;
203 
204  CopyOffsetsFunctor (const OutputViewType& dst, const InputViewType& src) :
205  dst_ (dst), src_ (src)
206  {
207  static_assert (Kokkos::SpaceAccessibility<
208  typename OutputViewType::memory_space,
209  typename InputViewType::memory_space>::accessible,
210  "CopyOffsetsFunctor (implements copyOffsets): Output "
211  "View's space must be able to access the input View's "
212  "memory space.");
213  }
214 
215  KOKKOS_INLINE_FUNCTION void
216  operator () (const size_type i, value_type& overflowCount) const {
217  const input_value_type src_i = src_(i);
218  if (checker_.overflows (src_i)) {
219  ++overflowCount;
220  }
221  dst_(i) = static_cast<output_value_type> (src_i);
222  }
223 
224  KOKKOS_INLINE_FUNCTION void
225  operator () (const size_type i) const {
226  const input_value_type src_i = src_(i);
227  dst_(i) = static_cast<output_value_type> (src_i);
228  }
229 
230  KOKKOS_INLINE_FUNCTION void init (value_type& overflowCount) const {
231  overflowCount = 0;
232  }
233 
234  KOKKOS_INLINE_FUNCTION void
235  join (value_type& result,
236  const value_type& current) const {
237  result += current;
238  }
239 
240  private:
241  OutputViewType dst_;
242  InputViewType src_;
243  OverflowChecker<output_value_type, input_value_type> checker_;
244  };
245 
246  // Specialization for when overflow is impossible.
247  template<class OutputViewType, class InputViewType>
248  class CopyOffsetsFunctor<OutputViewType, InputViewType, true> {
249  public:
250  using execution_space = typename OutputViewType::execution_space;
251  using size_type = typename OutputViewType::size_type;
252  using value_type = size_t;
253 
254  CopyOffsetsFunctor (const OutputViewType& dst, const InputViewType& src) :
255  dst_ (dst),
256  src_ (src)
257  {
258  static_assert (Kokkos::SpaceAccessibility<
259  typename OutputViewType::memory_space,
260  typename InputViewType::memory_space>::accessible,
261  "CopyOffsetsFunctor (implements copyOffsets): Output "
262  "View's space must be able to access the input View's "
263  "memory space.");
264  }
265 
266  KOKKOS_INLINE_FUNCTION void
267  operator () (const size_type i, value_type& /* overflowCount */) const {
268  // Overflow is impossible in this case, so there's no need to check.
269  dst_(i) = src_(i);
270  }
271 
272  KOKKOS_INLINE_FUNCTION void
273  operator () (const size_type i) const {
274  dst_(i) = src_(i);
275  }
276 
277  KOKKOS_INLINE_FUNCTION void init (value_type& overflowCount) const {
278  overflowCount = 0;
279  }
280 
281  KOKKOS_INLINE_FUNCTION void
282  join (value_type& /* result */,
283  const value_type& /* current */) const
284  {}
285 
286  private:
287  OutputViewType dst_;
288  InputViewType src_;
289  };
290 
291  // Implementation detail of copyOffsets (see below).
292  //
293  // We specialize copyOffsets on two different conditions:
294  //
295  // 1. Are the two Views' layouts the same, and do the input and
296  // output Views have the same value type?
297  // 2. Can the output View's execution space access the input View's
298  // memory space?
299  //
300  // If (1) is true, that makes the implementation simple: just call
301  // Kokkos::deep_copy (FixedHashTable always uses the same layout, no
302  // matter the device type). Otherwise, we need a custom copy
303  // functor. If (2) is true, then we can use CopyOffsetsFunctor
304  // directly. Otherwise, we have to copy the input View into the
305  // output View's memory space, before we can use the functor.
306  //
307  template<class OutputViewType,
308  class InputViewType,
309  const bool sameLayoutsSameOffsetTypes =
310  std::is_same<typename OutputViewType::array_layout,
311  typename InputViewType::array_layout>::value &&
312  std::is_same<typename OutputViewType::non_const_value_type,
313  typename InputViewType::non_const_value_type>::value,
314  const bool outputExecSpaceCanAccessInputMemSpace =
315  Kokkos::SpaceAccessibility<
316  typename OutputViewType::memory_space,
317  typename InputViewType::memory_space>::accessible>
318  struct CopyOffsetsImpl {
319  static void run (const OutputViewType& dst, const InputViewType& src);
320  };
321 
322  // Specialization for sameLayoutsSameOffsetTypes = true:
323  //
324  // If both input and output Views have the same layout, and both
325  // input and output use the same type for offsets, then we don't
326  // need to check for overflow, and we can use Kokkos::deep_copy
327  // directly. It doesn't matter whether the output execution space
328  // can access the input memory space: Kokkos::deep_copy takes care
329  // of the details.
330  template<class OutputViewType,
331  class InputViewType,
332  const bool outputExecSpaceCanAccessInputMemSpace>
333  struct CopyOffsetsImpl<OutputViewType, InputViewType,
334  true, outputExecSpaceCanAccessInputMemSpace> {
335  static void run (const OutputViewType& dst, const InputViewType& src) {
336  static_assert (std::is_same<typename OutputViewType::non_const_value_type,
337  typename InputViewType::non_const_value_type>::value,
338  "CopyOffsetsImpl (implementation of copyOffsets): In order"
339  " to call this specialization, the input and output must "
340  "use the same offset type.");
341  static_assert (static_cast<int> (OutputViewType::rank) ==
342  static_cast<int> (InputViewType::rank),
343  "CopyOffsetsImpl (implementation of copyOffsets): In order"
344  " to call this specialization, src and dst must have the "
345  "same rank.");
346  static_assert (std::is_same<typename OutputViewType::array_layout,
347  typename InputViewType::array_layout>::value,
348  "CopyOffsetsImpl (implementation of copyOffsets): In order"
349  " to call this specialization, src and dst must have the "
350  "the same array_layout.");
351  // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
352  using execution_space = typename OutputViewType::execution_space;
353  Kokkos::deep_copy (execution_space(), dst, src);
354  }
355  };
356 
357  // Specializations for sameLayoutsSameOffsetTypes = false:
358  //
359  // If input and output don't have the same layout, or use different
360  // types for offsets, then we can't use Kokkos::deep_copy directly,
361  // and we may have to check for overflow.
362 
363  // Specialization for sameLayoutsSameOffsetTypes = false and
364  // outputExecSpaceCanAccessInputMemSpace = true:
365  //
366  // If the output execution space can access the input memory space,
367  // then we can use CopyOffsetsFunctor directly.
368  template<class OutputViewType,
369  class InputViewType>
370  struct CopyOffsetsImpl<OutputViewType, InputViewType,
371  false, true> {
372  static void run (const OutputViewType& dst, const InputViewType& src) {
373  static_assert (static_cast<int> (OutputViewType::rank) ==
374  static_cast<int> (InputViewType::rank),
375  "CopyOffsetsImpl (implementation of copyOffsets): "
376  "src and dst must have the same rank.");
377  constexpr bool sameLayoutsSameOffsetTypes =
378  std::is_same<typename OutputViewType::array_layout,
379  typename InputViewType::array_layout>::value &&
380  std::is_same<typename OutputViewType::non_const_value_type,
381  typename InputViewType::non_const_value_type>::value;
382  static_assert (! sameLayoutsSameOffsetTypes,
383  "CopyOffsetsImpl (implements copyOffsets): In order to "
384  "call this specialization, sameLayoutsSameOffsetTypes "
385  "must be false. That is, either the input and output "
386  "must have different array layouts, or their value types "
387  "must differ.");
388  static_assert (Kokkos::SpaceAccessibility<
389  typename OutputViewType::memory_space,
390  typename InputViewType::memory_space>::accessible,
391  "CopyOffsetsImpl (implements copyOffsets): In order to "
392  "call this specialization, the output View's space must "
393  "be able to access the input View's memory space.");
394  using functor_type = CopyOffsetsFunctor<OutputViewType, InputViewType>;
395  using execution_space = typename OutputViewType::execution_space;
396  using size_type = typename OutputViewType::size_type;
397  using range_type = Kokkos::RangePolicy<execution_space, size_type>;
398 
399  const bool debug = Details::Behavior::debug ();
400  if (debug) {
401  size_t overflowCount = 0; // output argument of the reduction
402  Kokkos::parallel_reduce ("Tpetra::Details::copyOffsets",
403  range_type (0, dst.extent (0)),
404  functor_type (dst, src),
405  overflowCount);
406  errorIfOverflow (dst, src, overflowCount);
407  }
408  else {
409  Kokkos::parallel_for ("Tpetra::Details::copyOffsets",
410  range_type (0, dst.extent (0)),
411  functor_type (dst, src));
412  }
413  }
414  };
415 
416  // Specialization for sameLayoutsSameOffsetTypes = false and
417  // outputExecSpaceCanAccessInputMemSpace = false.
418  //
419  // If the output execution space canNOT access the input memory
420  // space, then we can't use CopyOffsetsFunctor directly. Instead,
421  // tell Kokkos to copy the input View's data into the output View's
422  // memory space _first_. Since the offset types are different for
423  // this specialization, we can't just call Kokkos::deep_copy
424  // directly between the input and output Views of offsets; that
425  // wouldn't compile.
426  //
427  // This case can and does come up in practice: If the output View's
428  // execution space is Cuda, it cannot currently access host memory
429  // (that's the opposite direction from what UVM allows).
430  // Furthermore, that case specifically requires overflow checking,
431  // since (as of 28 Jan 2016 at least) Kokkos::Cuda uses a smaller
432  // offset type than Kokkos' host spaces.
433  template<class OutputViewType, class InputViewType>
434  struct CopyOffsetsImpl<OutputViewType, InputViewType,
435  false, false> {
436  static void run (const OutputViewType& dst, const InputViewType& src) {
437  static_assert (static_cast<int> (OutputViewType::rank) ==
438  static_cast<int> (InputViewType::rank),
439  "CopyOffsetsImpl (implementation of copyOffsets): In order"
440  " to call this specialization, src and dst must have the "
441  "same rank.");
442  constexpr bool sameLayoutsSameOffsetTypes =
443  std::is_same<typename OutputViewType::array_layout,
444  typename InputViewType::array_layout>::value &&
445  std::is_same<typename OutputViewType::non_const_value_type,
446  typename InputViewType::non_const_value_type>::value;
447  static_assert (! sameLayoutsSameOffsetTypes,
448  "CopyOffsetsImpl (implements copyOffsets): In order to "
449  "call this specialization, sameLayoutsSameOffsetTypes "
450  "must be false. That is, either the input and output "
451  "must have different array layouts, or their value types "
452  "must differ.");
453  using output_space_copy_type =
454  Kokkos::View<typename InputViewType::non_const_value_type*,
455  Kokkos::LayoutLeft, typename OutputViewType::device_type>;
456  using Kokkos::view_alloc;
457  using Kokkos::WithoutInitializing;
458  using execution_space = typename OutputViewType::execution_space;
459  output_space_copy_type
460  outputSpaceCopy (view_alloc ("outputSpace", WithoutInitializing),
461  src.extent (0));
462  // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
463  Kokkos::deep_copy (execution_space(), outputSpaceCopy, src);
464 
465  // The output View's execution space can access
466  // outputSpaceCopy's data, so we can run the functor now.
467  using functor_type =
468  CopyOffsetsFunctor<OutputViewType, output_space_copy_type>;
469  using size_type = typename OutputViewType::size_type;
470  using range_type = Kokkos::RangePolicy<execution_space, size_type>;
471 
472  const bool debug = Details::Behavior::debug ();
473  if (debug) {
474  size_t overflowCount = 0;
475  Kokkos::parallel_reduce ("Tpetra::Details::copyOffsets",
476  range_type (0, dst.extent (0)),
477  functor_type (dst, outputSpaceCopy),
478  overflowCount);
479  errorIfOverflow (dst, src, overflowCount);
480  }
481  else {
482  Kokkos::parallel_for ("Tpetra::Details::copyOffsets",
483  range_type (0, dst.extent (0)),
484  functor_type (dst, outputSpaceCopy));
485  }
486  }
487  };
488 } // namespace (anonymous)
489 
501 template<class OutputViewType, class InputViewType>
502 void
503 copyOffsets (const OutputViewType& dst, const InputViewType& src)
504 {
505  static_assert (Kokkos::is_view<OutputViewType>::value,
506  "OutputViewType (the type of dst) must be a Kokkos::View.");
507  static_assert (Kokkos::is_view<InputViewType>::value,
508  "InputViewType (the type of src) must be a Kokkos::View.");
509  static_assert (std::is_same<typename OutputViewType::value_type,
510  typename OutputViewType::non_const_value_type>::value,
511  "OutputViewType (the type of dst) must be a nonconst Kokkos::View.");
512  static_assert (static_cast<int> (OutputViewType::rank) == 1,
513  "OutputViewType (the type of dst) must be a rank-1 Kokkos::View.");
514  static_assert (static_cast<int> (InputViewType::rank) == 1,
515  "InputViewType (the type of src) must be a rank-1 Kokkos::View.");
516  static_assert (std::is_integral<typename std::decay<decltype (dst(0)) >::type>::value,
517  "The entries of dst must be built-in integers.");
518  static_assert (std::is_integral<typename std::decay<decltype (src(0)) >::type>::value,
519  "The entries of src must be built-in integers.");
520 
521  TEUCHOS_TEST_FOR_EXCEPTION
522  (dst.extent (0) != src.extent (0), std::invalid_argument,
523  "copyOffsets: dst.extent(0) = " << dst.extent (0)
524  << " != src.extent(0) = " << src.extent (0) << ".");
525 
526  CopyOffsetsImpl<OutputViewType, InputViewType>::run (dst, src);
527 }
528 
529 } // namespace Details
530 } // namespace Tpetra
531 
532 #endif // TPETRA_DETAILS_COPYOFFSETS_HPP
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types...
static bool debug()
Whether Tpetra is in debug mode.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.