Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_copyOffsets.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_DETAILS_COPYOFFSETS_HPP
11 #define TPETRA_DETAILS_COPYOFFSETS_HPP
12 
17 
18 #include "TpetraCore_config.h"
20 #include "Kokkos_Core.hpp"
21 #include <limits>
22 #include <type_traits>
23 
24 namespace Tpetra {
25 namespace Details {
26 
27 //
28 // Implementation details for copyOffsets (see below).
29 // Users should skip over this anonymous namespace.
30 //
31 namespace { // (anonymous)
32 
33 // Implementation detail of copyOffsets (see below). Determines
34 // whether integer overflow is impossible on assignment from an
35 // InputType to an OutputType.
36 //
37 // Implicit here is the assumption that both input and output types
38 // are integers.
39 template <class OutputType, class InputType>
40 struct OutputCanFitInput {
41  private:
42  static constexpr bool output_signed = std::is_signed<OutputType>::value;
43  static constexpr bool input_signed = std::is_signed<InputType>::value;
44 
45  public:
46  static const bool value = sizeof(OutputType) > sizeof(InputType) ||
47  (sizeof(OutputType) == sizeof(InputType) &&
48  !output_signed && input_signed);
49 };
50 
51 // Avoid warnings for "unsigned integer < 0" comparisons.
52 template <class InputType,
53  bool input_signed = std::is_signed<InputType>::value>
54 struct Negative {};
55 
56 template <class InputType>
57 struct Negative<InputType, true> {
58  static KOKKOS_INLINE_FUNCTION bool
59  negative(const InputType src) {
60  return src < InputType(0);
61  }
62 };
63 
64 template <class InputType>
65 struct Negative<InputType, false> {
66  static KOKKOS_INLINE_FUNCTION bool
67  negative(const InputType /* src */) {
68  return false;
69  }
70 };
71 
72 template <class InputType>
73 KOKKOS_INLINE_FUNCTION bool negative(const InputType src) {
74  return Negative<InputType>::negative(src);
75 }
76 
77 template <class OutputType, class InputType>
78 struct OverflowChecker {
79  private:
80  static constexpr bool output_signed = std::is_signed<OutputType>::value;
81  static constexpr bool input_signed = std::is_signed<InputType>::value;
82 
83  public:
84  // 1. Signed to unsigned could overflow due to negative numbers.
85  // 2. Larger to smaller could overflow.
86  // 3. Same size but unsigned to signed could overflow.
87  static constexpr bool could_overflow =
88  (!output_signed && input_signed) ||
89  (sizeof(OutputType) < sizeof(InputType)) ||
90  (sizeof(OutputType) == sizeof(InputType) &&
91  output_signed && !input_signed);
92 
93  KOKKOS_INLINE_FUNCTION bool
94  overflows(const InputType src) const {
95  if (!could_overflow) {
96  return false;
97  } else {
98  // Signed to unsigned could overflow due to negative numbers.
99  if (!output_signed && input_signed) {
100  return negative(src);
101  }
102  // We're only comparing InputType with InputType here, so this
103  // should not emit warnings.
104  return src < minDstVal_ || src > maxDstVal_;
105  }
106  }
107 
108  private:
109  // If InputType is unsigned and OutputType is signed, casting max
110  // OutputType to InputType could overflow. See #5548.
111  InputType minDstVal_ = input_signed ? std::numeric_limits<OutputType>::min() : OutputType(0);
112  InputType maxDstVal_ = std::numeric_limits<OutputType>::max();
113 };
114 
115 template <class OutputViewType, class InputViewType>
116 void errorIfOverflow(const OutputViewType& dst,
117  const InputViewType& src,
118  const size_t overflowCount) {
119  if (overflowCount == 0) {
120  return;
121  }
122 
123  std::ostringstream os;
124  const bool plural = overflowCount != size_t(1);
125  os << "copyOffsets: " << overflowCount << " value" << (plural ? "s" : "") << " in src were too big (in the "
126  "sense of integer overflow) to fit in dst.";
127 
128  const bool verbose = Details::Behavior::verbose();
129  if (verbose) {
130  const size_t maxNumToPrint =
132  const size_t srcLen(src.extent(0));
133  if (srcLen <= maxNumToPrint) {
134  auto dst_h = Kokkos::create_mirror_view(dst);
135  auto src_h = Kokkos::create_mirror_view(src);
136  // DEEP_COPY REVIEW - NOT TESTED
137  Kokkos::deep_copy(src_h, src);
138  // DEEP_COPY REVIEW - NOT TESTED
139  Kokkos::deep_copy(dst_h, dst);
140 
141  os << " src: [";
142  for (size_t k = 0; k < srcLen; ++k) {
143  os << src_h[k];
144  if (k + size_t(1) < srcLen) {
145  os << ", ";
146  }
147  }
148  os << "], ";
149 
150  os << " dst: [";
151  for (size_t k = 0; k < srcLen; ++k) {
152  os << dst_h[k];
153  if (k + size_t(1) < srcLen) {
154  os << ", ";
155  }
156  }
157  os << "].";
158  } else {
159  os << " src.extent(0) > " << maxNumToPrint << ", Tpetra's "
160  "verbose print count threshold. To increase this, set the "
161  "environment variable TPETRA_VERBOSE_PRINT_COUNT_THRESHOLD "
162  "to the desired threshold and rerun. You do NOT need to "
163  "rebuild Trilinos.";
164  }
165  }
166  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, os.str());
167 }
168 
169 // Implementation detail of copyOffsets (see below).
170 //
171 // Kokkos parallel_reduce functor for copying offset ("ptr") arrays.
172 // Tpetra::Details::FixedHashTable uses this in its "copy"
173 // constructor for converting between different Device types. All
174 // the action happens in the partial specializations for different
175 // values of outputCanFitInput. "Output can fit input" means that
176 // casting the input's value type to the output's value type will
177 // never result in integer overflow.
178 template <class OutputViewType,
179  class InputViewType,
180  const bool outputCanFitInput =
181  OutputCanFitInput<typename OutputViewType::non_const_value_type,
182  typename InputViewType::non_const_value_type>::value>
183 class CopyOffsetsFunctor {};
184 
185 // Specialization for when overflow is possible.
186 template <class OutputViewType, class InputViewType>
187 class CopyOffsetsFunctor<OutputViewType, InputViewType, false> {
188  public:
189  using execution_space = typename OutputViewType::execution_space;
190  using size_type = typename OutputViewType::size_type;
191  using value_type = size_t;
192 
193  using input_value_type = typename InputViewType::non_const_value_type;
194  using output_value_type = typename OutputViewType::non_const_value_type;
195 
196  CopyOffsetsFunctor(const OutputViewType& dst, const InputViewType& src)
197  : dst_(dst)
198  , src_(src) {
199  static_assert(Kokkos::SpaceAccessibility<
200  typename OutputViewType::memory_space,
201  typename InputViewType::memory_space>::accessible,
202  "CopyOffsetsFunctor (implements copyOffsets): Output "
203  "View's space must be able to access the input View's "
204  "memory space.");
205  }
206 
207  KOKKOS_INLINE_FUNCTION void
208  operator()(const size_type i, value_type& overflowCount) const {
209  const input_value_type src_i = src_(i);
210  if (checker_.overflows(src_i)) {
211  ++overflowCount;
212  }
213  dst_(i) = static_cast<output_value_type>(src_i);
214  }
215 
216  KOKKOS_INLINE_FUNCTION void
217  operator()(const size_type i) const {
218  const input_value_type src_i = src_(i);
219  dst_(i) = static_cast<output_value_type>(src_i);
220  }
221 
222  KOKKOS_INLINE_FUNCTION void init(value_type& overflowCount) const {
223  overflowCount = 0;
224  }
225 
226  KOKKOS_INLINE_FUNCTION void
227  join(value_type& result,
228  const value_type& current) const {
229  result += current;
230  }
231 
232  private:
233  OutputViewType dst_;
234  InputViewType src_;
235  OverflowChecker<output_value_type, input_value_type> checker_;
236 };
237 
238 // Specialization for when overflow is impossible.
239 template <class OutputViewType, class InputViewType>
240 class CopyOffsetsFunctor<OutputViewType, InputViewType, true> {
241  public:
242  using execution_space = typename OutputViewType::execution_space;
243  using size_type = typename OutputViewType::size_type;
244  using value_type = size_t;
245 
246  CopyOffsetsFunctor(const OutputViewType& dst, const InputViewType& src)
247  : dst_(dst)
248  , src_(src) {
249  static_assert(Kokkos::SpaceAccessibility<
250  typename OutputViewType::memory_space,
251  typename InputViewType::memory_space>::accessible,
252  "CopyOffsetsFunctor (implements copyOffsets): Output "
253  "View's space must be able to access the input View's "
254  "memory space.");
255  }
256 
257  KOKKOS_INLINE_FUNCTION void
258  operator()(const size_type i, value_type& /* overflowCount */) const {
259  // Overflow is impossible in this case, so there's no need to check.
260  dst_(i) = src_(i);
261  }
262 
263  KOKKOS_INLINE_FUNCTION void
264  operator()(const size_type i) const {
265  dst_(i) = src_(i);
266  }
267 
268  KOKKOS_INLINE_FUNCTION void init(value_type& overflowCount) const {
269  overflowCount = 0;
270  }
271 
272  KOKKOS_INLINE_FUNCTION void
273  join(value_type& /* result */,
274  const value_type& /* current */) const {}
275 
276  private:
277  OutputViewType dst_;
278  InputViewType src_;
279 };
280 
281 // Implementation detail of copyOffsets (see below).
282 //
283 // We specialize copyOffsets on two different conditions:
284 //
285 // 1. Are the two Views' layouts the same, and do the input and
286 // output Views have the same value type?
287 // 2. Can the output View's execution space access the input View's
288 // memory space?
289 //
290 // If (1) is true, that makes the implementation simple: just call
291 // Kokkos::deep_copy (FixedHashTable always uses the same layout, no
292 // matter the device type). Otherwise, we need a custom copy
293 // functor. If (2) is true, then we can use CopyOffsetsFunctor
294 // directly. Otherwise, we have to copy the input View into the
295 // output View's memory space, before we can use the functor.
296 //
297 template <class OutputViewType,
298  class InputViewType,
299  const bool sameLayoutsSameOffsetTypes =
300  std::is_same<typename OutputViewType::array_layout,
301  typename InputViewType::array_layout>::value&&
302  std::is_same<typename OutputViewType::non_const_value_type,
303  typename InputViewType::non_const_value_type>::value,
304  const bool outputExecSpaceCanAccessInputMemSpace =
305  Kokkos::SpaceAccessibility<
306  typename OutputViewType::memory_space,
307  typename InputViewType::memory_space>::accessible>
308 struct CopyOffsetsImpl {
309  static void run(const OutputViewType& dst, const InputViewType& src);
310 };
311 
312 // Specialization for sameLayoutsSameOffsetTypes = true:
313 //
314 // If both input and output Views have the same layout, and both
315 // input and output use the same type for offsets, then we don't
316 // need to check for overflow, and we can use Kokkos::deep_copy
317 // directly. It doesn't matter whether the output execution space
318 // can access the input memory space: Kokkos::deep_copy takes care
319 // of the details.
320 template <class OutputViewType,
321  class InputViewType,
322  const bool outputExecSpaceCanAccessInputMemSpace>
323 struct CopyOffsetsImpl<OutputViewType, InputViewType,
324  true, outputExecSpaceCanAccessInputMemSpace> {
325  static void run(const OutputViewType& dst, const InputViewType& src) {
326  static_assert(std::is_same<typename OutputViewType::non_const_value_type,
327  typename InputViewType::non_const_value_type>::value,
328  "CopyOffsetsImpl (implementation of copyOffsets): In order"
329  " to call this specialization, the input and output must "
330  "use the same offset type.");
331  static_assert(static_cast<int>(OutputViewType::rank) ==
332  static_cast<int>(InputViewType::rank),
333  "CopyOffsetsImpl (implementation of copyOffsets): In order"
334  " to call this specialization, src and dst must have the "
335  "same rank.");
336  static_assert(std::is_same<typename OutputViewType::array_layout,
337  typename InputViewType::array_layout>::value,
338  "CopyOffsetsImpl (implementation of copyOffsets): In order"
339  " to call this specialization, src and dst must have the "
340  "the same array_layout.");
341  // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
342  using execution_space = typename OutputViewType::execution_space;
343  Kokkos::deep_copy(execution_space(), dst, src);
344  }
345 };
346 
347 // Specializations for sameLayoutsSameOffsetTypes = false:
348 //
349 // If input and output don't have the same layout, or use different
350 // types for offsets, then we can't use Kokkos::deep_copy directly,
351 // and we may have to check for overflow.
352 
353 // Specialization for sameLayoutsSameOffsetTypes = false and
354 // outputExecSpaceCanAccessInputMemSpace = true:
355 //
356 // If the output execution space can access the input memory space,
357 // then we can use CopyOffsetsFunctor directly.
358 template <class OutputViewType,
359  class InputViewType>
360 struct CopyOffsetsImpl<OutputViewType, InputViewType,
361  false, true> {
362  static void run(const OutputViewType& dst, const InputViewType& src) {
363  static_assert(static_cast<int>(OutputViewType::rank) ==
364  static_cast<int>(InputViewType::rank),
365  "CopyOffsetsImpl (implementation of copyOffsets): "
366  "src and dst must have the same rank.");
367  constexpr bool sameLayoutsSameOffsetTypes =
368  std::is_same<typename OutputViewType::array_layout,
369  typename InputViewType::array_layout>::value &&
370  std::is_same<typename OutputViewType::non_const_value_type,
371  typename InputViewType::non_const_value_type>::value;
372  static_assert(!sameLayoutsSameOffsetTypes,
373  "CopyOffsetsImpl (implements copyOffsets): In order to "
374  "call this specialization, sameLayoutsSameOffsetTypes "
375  "must be false. That is, either the input and output "
376  "must have different array layouts, or their value types "
377  "must differ.");
378  static_assert(Kokkos::SpaceAccessibility<
379  typename OutputViewType::memory_space,
380  typename InputViewType::memory_space>::accessible,
381  "CopyOffsetsImpl (implements copyOffsets): In order to "
382  "call this specialization, the output View's space must "
383  "be able to access the input View's memory space.");
384  using functor_type = CopyOffsetsFunctor<OutputViewType, InputViewType>;
385  using execution_space = typename OutputViewType::execution_space;
386  using size_type = typename OutputViewType::size_type;
387  using range_type = Kokkos::RangePolicy<execution_space, size_type>;
388 
389  const bool debug = Details::Behavior::debug();
390  if (debug) {
391  size_t overflowCount = 0; // output argument of the reduction
392  Kokkos::parallel_reduce("Tpetra::Details::copyOffsets",
393  range_type(0, dst.extent(0)),
394  functor_type(dst, src),
395  overflowCount);
396  errorIfOverflow(dst, src, overflowCount);
397  } else {
398  Kokkos::parallel_for("Tpetra::Details::copyOffsets",
399  range_type(0, dst.extent(0)),
400  functor_type(dst, src));
401  }
402  }
403 };
404 
405 // Specialization for sameLayoutsSameOffsetTypes = false and
406 // outputExecSpaceCanAccessInputMemSpace = false.
407 //
408 // If the output execution space canNOT access the input memory
409 // space, then we can't use CopyOffsetsFunctor directly. Instead,
410 // tell Kokkos to copy the input View's data into the output View's
411 // memory space _first_. Since the offset types are different for
412 // this specialization, we can't just call Kokkos::deep_copy
413 // directly between the input and output Views of offsets; that
414 // wouldn't compile.
415 //
416 // This case can and does come up in practice: If the output View's
417 // execution space is Cuda, it cannot currently access host memory
418 // (that's the opposite direction from what UVM allows).
419 // Furthermore, that case specifically requires overflow checking,
420 // since (as of 28 Jan 2016 at least) Kokkos::Cuda uses a smaller
421 // offset type than Kokkos' host spaces.
422 template <class OutputViewType, class InputViewType>
423 struct CopyOffsetsImpl<OutputViewType, InputViewType,
424  false, false> {
425  static void run(const OutputViewType& dst, const InputViewType& src) {
426  static_assert(static_cast<int>(OutputViewType::rank) ==
427  static_cast<int>(InputViewType::rank),
428  "CopyOffsetsImpl (implementation of copyOffsets): In order"
429  " to call this specialization, src and dst must have the "
430  "same rank.");
431  constexpr bool sameLayoutsSameOffsetTypes =
432  std::is_same<typename OutputViewType::array_layout,
433  typename InputViewType::array_layout>::value &&
434  std::is_same<typename OutputViewType::non_const_value_type,
435  typename InputViewType::non_const_value_type>::value;
436  static_assert(!sameLayoutsSameOffsetTypes,
437  "CopyOffsetsImpl (implements copyOffsets): In order to "
438  "call this specialization, sameLayoutsSameOffsetTypes "
439  "must be false. That is, either the input and output "
440  "must have different array layouts, or their value types "
441  "must differ.");
442  using output_space_copy_type =
443  Kokkos::View<typename InputViewType::non_const_value_type*,
444  Kokkos::LayoutLeft, typename OutputViewType::device_type>;
445  using Kokkos::view_alloc;
446  using Kokkos::WithoutInitializing;
447  using execution_space = typename OutputViewType::execution_space;
448  output_space_copy_type
449  outputSpaceCopy(view_alloc("outputSpace", WithoutInitializing),
450  src.extent(0));
451  // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
452  Kokkos::deep_copy(execution_space(), outputSpaceCopy, src);
453 
454  // The output View's execution space can access
455  // outputSpaceCopy's data, so we can run the functor now.
456  using functor_type =
457  CopyOffsetsFunctor<OutputViewType, output_space_copy_type>;
458  using size_type = typename OutputViewType::size_type;
459  using range_type = Kokkos::RangePolicy<execution_space, size_type>;
460 
461  const bool debug = Details::Behavior::debug();
462  if (debug) {
463  size_t overflowCount = 0;
464  Kokkos::parallel_reduce("Tpetra::Details::copyOffsets",
465  range_type(0, dst.extent(0)),
466  functor_type(dst, outputSpaceCopy),
467  overflowCount);
468  errorIfOverflow(dst, src, overflowCount);
469  } else {
470  Kokkos::parallel_for("Tpetra::Details::copyOffsets",
471  range_type(0, dst.extent(0)),
472  functor_type(dst, outputSpaceCopy));
473  }
474  }
475 };
476 } // namespace
477 
489 template <class OutputViewType, class InputViewType>
490 void copyOffsets(const OutputViewType& dst, const InputViewType& src) {
491  static_assert(Kokkos::is_view<OutputViewType>::value,
492  "OutputViewType (the type of dst) must be a Kokkos::View.");
493  static_assert(Kokkos::is_view<InputViewType>::value,
494  "InputViewType (the type of src) must be a Kokkos::View.");
495  static_assert(std::is_same<typename OutputViewType::value_type,
496  typename OutputViewType::non_const_value_type>::value,
497  "OutputViewType (the type of dst) must be a nonconst Kokkos::View.");
498  static_assert(static_cast<int>(OutputViewType::rank) == 1,
499  "OutputViewType (the type of dst) must be a rank-1 Kokkos::View.");
500  static_assert(static_cast<int>(InputViewType::rank) == 1,
501  "InputViewType (the type of src) must be a rank-1 Kokkos::View.");
502  static_assert(std::is_integral<typename std::decay<decltype(dst(0))>::type>::value,
503  "The entries of dst must be built-in integers.");
504  static_assert(std::is_integral<typename std::decay<decltype(src(0))>::type>::value,
505  "The entries of src must be built-in integers.");
506 
507  TEUCHOS_TEST_FOR_EXCEPTION(dst.extent(0) != src.extent(0), std::invalid_argument,
508  "copyOffsets: dst.extent(0) = " << dst.extent(0)
509  << " != src.extent(0) = " << src.extent(0) << ".");
510 
511  CopyOffsetsImpl<OutputViewType, InputViewType>::run(dst, src);
512 }
513 
514 } // namespace Details
515 } // namespace Tpetra
516 
517 #endif // TPETRA_DETAILS_COPYOFFSETS_HPP
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types...
static bool debug()
Whether Tpetra is in debug mode.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.