10 #ifndef TPETRA_DETAILS_COPYOFFSETS_HPP
11 #define TPETRA_DETAILS_COPYOFFSETS_HPP
18 #include "TpetraCore_config.h"
20 #include "Kokkos_Core.hpp"
22 #include <type_traits>
39 template<
class OutputType,
class InputType>
40 struct OutputCanFitInput {
42 static constexpr
bool output_signed = std::is_signed<OutputType>::value;
43 static constexpr
bool input_signed = std::is_signed<InputType>::value;
46 static const bool value =
sizeof (OutputType) >
sizeof (InputType) ||
47 (
sizeof (OutputType) ==
sizeof (InputType) &&
48 ! output_signed && input_signed);
52 template<
class InputType,
53 bool input_signed = std::is_signed<InputType>::value>
56 template<
class InputType>
57 struct Negative<InputType, true> {
58 static KOKKOS_INLINE_FUNCTION
bool
59 negative (
const InputType src) {
60 return src < InputType (0);
64 template<
class InputType>
65 struct Negative<InputType, false> {
66 static KOKKOS_INLINE_FUNCTION
bool
67 negative (
const InputType ) {
72 template<
class InputType>
73 KOKKOS_INLINE_FUNCTION
bool negative (
const InputType src) {
74 return Negative<InputType>::negative (src);
77 template<
class OutputType,
class InputType>
78 struct OverflowChecker {
80 static constexpr
bool output_signed = std::is_signed<OutputType>::value;
81 static constexpr
bool input_signed = std::is_signed<InputType>::value;
87 static constexpr
bool could_overflow =
88 (! output_signed && input_signed) ||
89 (
sizeof (OutputType) <
sizeof (InputType)) ||
90 (
sizeof (OutputType) ==
sizeof (InputType) &&
91 output_signed && ! input_signed);
93 KOKKOS_INLINE_FUNCTION
bool
94 overflows (
const InputType src)
const
96 if (! could_overflow) {
101 if (! output_signed && input_signed) {
102 return negative (src);
106 return src < minDstVal_ || src > maxDstVal_;
113 InputType minDstVal_ = input_signed ?
114 std::numeric_limits<OutputType>::min () : OutputType (0);
115 InputType maxDstVal_ = std::numeric_limits<OutputType>::max ();
119 template<
class OutputViewType,
class InputViewType>
121 errorIfOverflow (
const OutputViewType& dst,
122 const InputViewType& src,
123 const size_t overflowCount)
125 if (overflowCount == 0) {
129 std::ostringstream os;
130 const bool plural = overflowCount != size_t (1);
131 os <<
"copyOffsets: " << overflowCount <<
" value" <<
132 (plural ?
"s" :
"") <<
" in src were too big (in the "
133 "sense of integer overflow) to fit in dst.";
137 const size_t maxNumToPrint =
139 const size_t srcLen (src.extent (0));
140 if (srcLen <= maxNumToPrint) {
141 auto dst_h = Kokkos::create_mirror_view (dst);
142 auto src_h = Kokkos::create_mirror_view (src);
149 for (
size_t k = 0; k < srcLen; ++k) {
151 if (k +
size_t (1) < srcLen) {
158 for (
size_t k = 0; k < srcLen; ++k) {
160 if (k +
size_t (1) < srcLen) {
167 os <<
" src.extent(0) > " << maxNumToPrint <<
", Tpetra's "
168 "verbose print count threshold. To increase this, set the "
169 "environment variable TPETRA_VERBOSE_PRINT_COUNT_THRESHOLD "
170 "to the desired threshold and rerun. You do NOT need to "
174 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error, os.str ());
186 template<
class OutputViewType,
188 const bool outputCanFitInput =
189 OutputCanFitInput<
typename OutputViewType::non_const_value_type,
190 typename InputViewType::non_const_value_type>::value>
191 class CopyOffsetsFunctor {};
194 template<
class OutputViewType,
class InputViewType>
195 class CopyOffsetsFunctor<OutputViewType, InputViewType, false> {
197 using execution_space =
typename OutputViewType::execution_space;
198 using size_type =
typename OutputViewType::size_type;
199 using value_type = size_t;
201 using input_value_type =
typename InputViewType::non_const_value_type;
202 using output_value_type =
typename OutputViewType::non_const_value_type;
204 CopyOffsetsFunctor (
const OutputViewType& dst,
const InputViewType& src) :
205 dst_ (dst), src_ (src)
207 static_assert (Kokkos::SpaceAccessibility<
208 typename OutputViewType::memory_space,
209 typename InputViewType::memory_space>::accessible,
210 "CopyOffsetsFunctor (implements copyOffsets): Output "
211 "View's space must be able to access the input View's "
215 KOKKOS_INLINE_FUNCTION
void
216 operator () (
const size_type i, value_type& overflowCount)
const {
217 const input_value_type src_i = src_(i);
218 if (checker_.overflows (src_i)) {
221 dst_(i) =
static_cast<output_value_type
> (src_i);
224 KOKKOS_INLINE_FUNCTION
void
225 operator () (
const size_type i)
const {
226 const input_value_type src_i = src_(i);
227 dst_(i) =
static_cast<output_value_type
> (src_i);
230 KOKKOS_INLINE_FUNCTION
void init (value_type& overflowCount)
const {
234 KOKKOS_INLINE_FUNCTION
void
235 join (value_type& result,
236 const value_type& current)
const {
243 OverflowChecker<output_value_type, input_value_type> checker_;
247 template<
class OutputViewType,
class InputViewType>
248 class CopyOffsetsFunctor<OutputViewType, InputViewType, true> {
250 using execution_space =
typename OutputViewType::execution_space;
251 using size_type =
typename OutputViewType::size_type;
252 using value_type = size_t;
254 CopyOffsetsFunctor (
const OutputViewType& dst,
const InputViewType& src) :
258 static_assert (Kokkos::SpaceAccessibility<
259 typename OutputViewType::memory_space,
260 typename InputViewType::memory_space>::accessible,
261 "CopyOffsetsFunctor (implements copyOffsets): Output "
262 "View's space must be able to access the input View's "
266 KOKKOS_INLINE_FUNCTION
void
267 operator () (
const size_type i, value_type& )
const {
272 KOKKOS_INLINE_FUNCTION
void
273 operator () (
const size_type i)
const {
277 KOKKOS_INLINE_FUNCTION
void init (value_type& overflowCount)
const {
281 KOKKOS_INLINE_FUNCTION
void
283 const value_type& )
const
307 template<
class OutputViewType,
309 const bool sameLayoutsSameOffsetTypes =
310 std::is_same<
typename OutputViewType::array_layout,
311 typename InputViewType::array_layout>::value &&
312 std::is_same<
typename OutputViewType::non_const_value_type,
313 typename InputViewType::non_const_value_type>::value,
314 const bool outputExecSpaceCanAccessInputMemSpace =
315 Kokkos::SpaceAccessibility<
316 typename OutputViewType::memory_space,
317 typename InputViewType::memory_space>::accessible>
318 struct CopyOffsetsImpl {
319 static void run (
const OutputViewType& dst,
const InputViewType& src);
330 template<
class OutputViewType,
332 const bool outputExecSpaceCanAccessInputMemSpace>
333 struct CopyOffsetsImpl<OutputViewType, InputViewType,
334 true, outputExecSpaceCanAccessInputMemSpace> {
335 static void run (
const OutputViewType& dst,
const InputViewType& src) {
336 static_assert (std::is_same<
typename OutputViewType::non_const_value_type,
337 typename InputViewType::non_const_value_type>::value,
338 "CopyOffsetsImpl (implementation of copyOffsets): In order"
339 " to call this specialization, the input and output must "
340 "use the same offset type.");
341 static_assert (static_cast<int> (OutputViewType::rank) ==
342 static_cast<int> (InputViewType::rank),
343 "CopyOffsetsImpl (implementation of copyOffsets): In order"
344 " to call this specialization, src and dst must have the "
346 static_assert (std::is_same<
typename OutputViewType::array_layout,
347 typename InputViewType::array_layout>::value,
348 "CopyOffsetsImpl (implementation of copyOffsets): In order"
349 " to call this specialization, src and dst must have the "
350 "the same array_layout.");
352 using execution_space =
typename OutputViewType::execution_space;
368 template<
class OutputViewType,
370 struct CopyOffsetsImpl<OutputViewType, InputViewType,
372 static void run (
const OutputViewType& dst,
const InputViewType& src) {
373 static_assert (static_cast<int> (OutputViewType::rank) ==
374 static_cast<int> (InputViewType::rank),
375 "CopyOffsetsImpl (implementation of copyOffsets): "
376 "src and dst must have the same rank.");
377 constexpr
bool sameLayoutsSameOffsetTypes =
378 std::is_same<
typename OutputViewType::array_layout,
379 typename InputViewType::array_layout>::value &&
380 std::is_same<
typename OutputViewType::non_const_value_type,
381 typename InputViewType::non_const_value_type>::value;
382 static_assert (! sameLayoutsSameOffsetTypes,
383 "CopyOffsetsImpl (implements copyOffsets): In order to "
384 "call this specialization, sameLayoutsSameOffsetTypes "
385 "must be false. That is, either the input and output "
386 "must have different array layouts, or their value types "
388 static_assert (Kokkos::SpaceAccessibility<
389 typename OutputViewType::memory_space,
390 typename InputViewType::memory_space>::accessible,
391 "CopyOffsetsImpl (implements copyOffsets): In order to "
392 "call this specialization, the output View's space must "
393 "be able to access the input View's memory space.");
394 using functor_type = CopyOffsetsFunctor<OutputViewType, InputViewType>;
395 using execution_space =
typename OutputViewType::execution_space;
396 using size_type =
typename OutputViewType::size_type;
397 using range_type = Kokkos::RangePolicy<execution_space, size_type>;
401 size_t overflowCount = 0;
402 Kokkos::parallel_reduce (
"Tpetra::Details::copyOffsets",
403 range_type (0, dst.extent (0)),
404 functor_type (dst, src),
406 errorIfOverflow (dst, src, overflowCount);
409 Kokkos::parallel_for (
"Tpetra::Details::copyOffsets",
410 range_type (0, dst.extent (0)),
411 functor_type (dst, src));
433 template<
class OutputViewType,
class InputViewType>
434 struct CopyOffsetsImpl<OutputViewType, InputViewType,
436 static void run (
const OutputViewType& dst,
const InputViewType& src) {
437 static_assert (static_cast<int> (OutputViewType::rank) ==
438 static_cast<int> (InputViewType::rank),
439 "CopyOffsetsImpl (implementation of copyOffsets): In order"
440 " to call this specialization, src and dst must have the "
442 constexpr
bool sameLayoutsSameOffsetTypes =
443 std::is_same<
typename OutputViewType::array_layout,
444 typename InputViewType::array_layout>::value &&
445 std::is_same<
typename OutputViewType::non_const_value_type,
446 typename InputViewType::non_const_value_type>::value;
447 static_assert (! sameLayoutsSameOffsetTypes,
448 "CopyOffsetsImpl (implements copyOffsets): In order to "
449 "call this specialization, sameLayoutsSameOffsetTypes "
450 "must be false. That is, either the input and output "
451 "must have different array layouts, or their value types "
453 using output_space_copy_type =
454 Kokkos::View<
typename InputViewType::non_const_value_type*,
455 Kokkos::LayoutLeft,
typename OutputViewType::device_type>;
456 using Kokkos::view_alloc;
457 using Kokkos::WithoutInitializing;
458 using execution_space =
typename OutputViewType::execution_space;
459 output_space_copy_type
460 outputSpaceCopy (view_alloc (
"outputSpace", WithoutInitializing),
468 CopyOffsetsFunctor<OutputViewType, output_space_copy_type>;
469 using size_type =
typename OutputViewType::size_type;
470 using range_type = Kokkos::RangePolicy<execution_space, size_type>;
474 size_t overflowCount = 0;
475 Kokkos::parallel_reduce (
"Tpetra::Details::copyOffsets",
476 range_type (0, dst.extent (0)),
477 functor_type (dst, outputSpaceCopy),
479 errorIfOverflow (dst, src, overflowCount);
482 Kokkos::parallel_for (
"Tpetra::Details::copyOffsets",
483 range_type (0, dst.extent (0)),
484 functor_type (dst, outputSpaceCopy));
501 template<
class OutputViewType,
class InputViewType>
505 static_assert (Kokkos::is_view<OutputViewType>::value,
506 "OutputViewType (the type of dst) must be a Kokkos::View.");
507 static_assert (Kokkos::is_view<InputViewType>::value,
508 "InputViewType (the type of src) must be a Kokkos::View.");
509 static_assert (std::is_same<
typename OutputViewType::value_type,
510 typename OutputViewType::non_const_value_type>::value,
511 "OutputViewType (the type of dst) must be a nonconst Kokkos::View.");
512 static_assert (static_cast<int> (OutputViewType::rank) == 1,
513 "OutputViewType (the type of dst) must be a rank-1 Kokkos::View.");
514 static_assert (static_cast<int> (InputViewType::rank) == 1,
515 "InputViewType (the type of src) must be a rank-1 Kokkos::View.");
516 static_assert (std::is_integral<
typename std::decay<decltype (dst(0)) >::type>::value,
517 "The entries of dst must be built-in integers.");
518 static_assert (std::is_integral<
typename std::decay<decltype (src(0)) >::type>::value,
519 "The entries of src must be built-in integers.");
521 TEUCHOS_TEST_FOR_EXCEPTION
522 (dst.extent (0) != src.extent (0), std::invalid_argument,
523 "copyOffsets: dst.extent(0) = " << dst.extent (0)
524 <<
" != src.extent(0) = " << src.extent (0) <<
".");
526 CopyOffsetsImpl<OutputViewType, InputViewType>::run (dst, src);
532 #endif // TPETRA_DETAILS_COPYOFFSETS_HPP
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types...
static bool debug()
Whether Tpetra is in debug mode.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.