10 #ifndef TPETRA_DETAILS_COPYOFFSETS_HPP
11 #define TPETRA_DETAILS_COPYOFFSETS_HPP
18 #include "TpetraCore_config.h"
20 #include "Kokkos_Core.hpp"
22 #include <type_traits>
39 template <
class OutputType,
class InputType>
40 struct OutputCanFitInput {
42 static constexpr
bool output_signed = std::is_signed<OutputType>::value;
43 static constexpr
bool input_signed = std::is_signed<InputType>::value;
46 static const bool value =
sizeof(OutputType) >
sizeof(InputType) ||
47 (
sizeof(OutputType) ==
sizeof(InputType) &&
48 !output_signed && input_signed);
52 template <
class InputType,
53 bool input_signed = std::is_signed<InputType>::value>
56 template <
class InputType>
57 struct Negative<InputType, true> {
58 static KOKKOS_INLINE_FUNCTION
bool
59 negative(
const InputType src) {
60 return src < InputType(0);
64 template <
class InputType>
65 struct Negative<InputType, false> {
66 static KOKKOS_INLINE_FUNCTION
bool
67 negative(
const InputType ) {
72 template <
class InputType>
73 KOKKOS_INLINE_FUNCTION
bool negative(
const InputType src) {
74 return Negative<InputType>::negative(src);
77 template <
class OutputType,
class InputType>
78 struct OverflowChecker {
80 static constexpr
bool output_signed = std::is_signed<OutputType>::value;
81 static constexpr
bool input_signed = std::is_signed<InputType>::value;
87 static constexpr
bool could_overflow =
88 (!output_signed && input_signed) ||
89 (
sizeof(OutputType) <
sizeof(InputType)) ||
90 (
sizeof(OutputType) ==
sizeof(InputType) &&
91 output_signed && !input_signed);
93 KOKKOS_INLINE_FUNCTION
bool
94 overflows(
const InputType src)
const {
95 if (!could_overflow) {
99 if (!output_signed && input_signed) {
100 return negative(src);
104 return src < minDstVal_ || src > maxDstVal_;
111 InputType minDstVal_ = input_signed ? std::numeric_limits<OutputType>::min() : OutputType(0);
112 InputType maxDstVal_ = std::numeric_limits<OutputType>::max();
115 template <
class OutputViewType,
class InputViewType>
116 void errorIfOverflow(
const OutputViewType& dst,
117 const InputViewType& src,
118 const size_t overflowCount) {
119 if (overflowCount == 0) {
123 std::ostringstream os;
124 const bool plural = overflowCount != size_t(1);
125 os <<
"copyOffsets: " << overflowCount <<
" value" << (plural ?
"s" :
"") <<
" in src were too big (in the "
126 "sense of integer overflow) to fit in dst.";
130 const size_t maxNumToPrint =
132 const size_t srcLen(src.extent(0));
133 if (srcLen <= maxNumToPrint) {
134 auto dst_h = Kokkos::create_mirror_view(dst);
135 auto src_h = Kokkos::create_mirror_view(src);
142 for (
size_t k = 0; k < srcLen; ++k) {
144 if (k +
size_t(1) < srcLen) {
151 for (
size_t k = 0; k < srcLen; ++k) {
153 if (k +
size_t(1) < srcLen) {
159 os <<
" src.extent(0) > " << maxNumToPrint <<
", Tpetra's "
160 "verbose print count threshold. To increase this, set the "
161 "environment variable TPETRA_VERBOSE_PRINT_COUNT_THRESHOLD "
162 "to the desired threshold and rerun. You do NOT need to "
166 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error, os.str());
178 template <
class OutputViewType,
180 const bool outputCanFitInput =
181 OutputCanFitInput<
typename OutputViewType::non_const_value_type,
182 typename InputViewType::non_const_value_type>::value>
183 class CopyOffsetsFunctor {};
186 template <
class OutputViewType,
class InputViewType>
187 class CopyOffsetsFunctor<OutputViewType, InputViewType, false> {
189 using execution_space =
typename OutputViewType::execution_space;
190 using size_type =
typename OutputViewType::size_type;
191 using value_type = size_t;
193 using input_value_type =
typename InputViewType::non_const_value_type;
194 using output_value_type =
typename OutputViewType::non_const_value_type;
196 CopyOffsetsFunctor(
const OutputViewType& dst,
const InputViewType& src)
199 static_assert(Kokkos::SpaceAccessibility<
200 typename OutputViewType::memory_space,
201 typename InputViewType::memory_space>::accessible,
202 "CopyOffsetsFunctor (implements copyOffsets): Output "
203 "View's space must be able to access the input View's "
207 KOKKOS_INLINE_FUNCTION
void
208 operator()(
const size_type i, value_type& overflowCount)
const {
209 const input_value_type src_i = src_(i);
210 if (checker_.overflows(src_i)) {
213 dst_(i) =
static_cast<output_value_type
>(src_i);
216 KOKKOS_INLINE_FUNCTION
void
217 operator()(
const size_type i)
const {
218 const input_value_type src_i = src_(i);
219 dst_(i) =
static_cast<output_value_type
>(src_i);
222 KOKKOS_INLINE_FUNCTION
void init(value_type& overflowCount)
const {
226 KOKKOS_INLINE_FUNCTION
void
227 join(value_type& result,
228 const value_type& current)
const {
235 OverflowChecker<output_value_type, input_value_type> checker_;
239 template <
class OutputViewType,
class InputViewType>
240 class CopyOffsetsFunctor<OutputViewType, InputViewType, true> {
242 using execution_space =
typename OutputViewType::execution_space;
243 using size_type =
typename OutputViewType::size_type;
244 using value_type = size_t;
246 CopyOffsetsFunctor(
const OutputViewType& dst,
const InputViewType& src)
249 static_assert(Kokkos::SpaceAccessibility<
250 typename OutputViewType::memory_space,
251 typename InputViewType::memory_space>::accessible,
252 "CopyOffsetsFunctor (implements copyOffsets): Output "
253 "View's space must be able to access the input View's "
257 KOKKOS_INLINE_FUNCTION
void
258 operator()(
const size_type i, value_type& )
const {
263 KOKKOS_INLINE_FUNCTION
void
264 operator()(
const size_type i)
const {
268 KOKKOS_INLINE_FUNCTION
void init(value_type& overflowCount)
const {
272 KOKKOS_INLINE_FUNCTION
void
274 const value_type& )
const {}
297 template <
class OutputViewType,
299 const bool sameLayoutsSameOffsetTypes =
300 std::is_same<
typename OutputViewType::array_layout,
301 typename InputViewType::array_layout>::value&&
302 std::is_same<
typename OutputViewType::non_const_value_type,
303 typename InputViewType::non_const_value_type>::value,
304 const bool outputExecSpaceCanAccessInputMemSpace =
305 Kokkos::SpaceAccessibility<
306 typename OutputViewType::memory_space,
307 typename InputViewType::memory_space>::accessible>
308 struct CopyOffsetsImpl {
309 static void run(
const OutputViewType& dst,
const InputViewType& src);
320 template <
class OutputViewType,
322 const bool outputExecSpaceCanAccessInputMemSpace>
323 struct CopyOffsetsImpl<OutputViewType, InputViewType,
324 true, outputExecSpaceCanAccessInputMemSpace> {
325 static void run(
const OutputViewType& dst,
const InputViewType& src) {
326 static_assert(std::is_same<
typename OutputViewType::non_const_value_type,
327 typename InputViewType::non_const_value_type>::value,
328 "CopyOffsetsImpl (implementation of copyOffsets): In order"
329 " to call this specialization, the input and output must "
330 "use the same offset type.");
331 static_assert(static_cast<int>(OutputViewType::rank) ==
332 static_cast<int>(InputViewType::rank),
333 "CopyOffsetsImpl (implementation of copyOffsets): In order"
334 " to call this specialization, src and dst must have the "
336 static_assert(std::is_same<
typename OutputViewType::array_layout,
337 typename InputViewType::array_layout>::value,
338 "CopyOffsetsImpl (implementation of copyOffsets): In order"
339 " to call this specialization, src and dst must have the "
340 "the same array_layout.");
342 using execution_space =
typename OutputViewType::execution_space;
358 template <
class OutputViewType,
360 struct CopyOffsetsImpl<OutputViewType, InputViewType,
362 static void run(
const OutputViewType& dst,
const InputViewType& src) {
363 static_assert(static_cast<int>(OutputViewType::rank) ==
364 static_cast<int>(InputViewType::rank),
365 "CopyOffsetsImpl (implementation of copyOffsets): "
366 "src and dst must have the same rank.");
367 constexpr
bool sameLayoutsSameOffsetTypes =
368 std::is_same<
typename OutputViewType::array_layout,
369 typename InputViewType::array_layout>::value &&
370 std::is_same<
typename OutputViewType::non_const_value_type,
371 typename InputViewType::non_const_value_type>::value;
372 static_assert(!sameLayoutsSameOffsetTypes,
373 "CopyOffsetsImpl (implements copyOffsets): In order to "
374 "call this specialization, sameLayoutsSameOffsetTypes "
375 "must be false. That is, either the input and output "
376 "must have different array layouts, or their value types "
378 static_assert(Kokkos::SpaceAccessibility<
379 typename OutputViewType::memory_space,
380 typename InputViewType::memory_space>::accessible,
381 "CopyOffsetsImpl (implements copyOffsets): In order to "
382 "call this specialization, the output View's space must "
383 "be able to access the input View's memory space.");
384 using functor_type = CopyOffsetsFunctor<OutputViewType, InputViewType>;
385 using execution_space =
typename OutputViewType::execution_space;
386 using size_type =
typename OutputViewType::size_type;
387 using range_type = Kokkos::RangePolicy<execution_space, size_type>;
391 size_t overflowCount = 0;
392 Kokkos::parallel_reduce(
"Tpetra::Details::copyOffsets",
393 range_type(0, dst.extent(0)),
394 functor_type(dst, src),
396 errorIfOverflow(dst, src, overflowCount);
398 Kokkos::parallel_for(
"Tpetra::Details::copyOffsets",
399 range_type(0, dst.extent(0)),
400 functor_type(dst, src));
422 template <
class OutputViewType,
class InputViewType>
423 struct CopyOffsetsImpl<OutputViewType, InputViewType,
425 static void run(
const OutputViewType& dst,
const InputViewType& src) {
426 static_assert(static_cast<int>(OutputViewType::rank) ==
427 static_cast<int>(InputViewType::rank),
428 "CopyOffsetsImpl (implementation of copyOffsets): In order"
429 " to call this specialization, src and dst must have the "
431 constexpr
bool sameLayoutsSameOffsetTypes =
432 std::is_same<
typename OutputViewType::array_layout,
433 typename InputViewType::array_layout>::value &&
434 std::is_same<
typename OutputViewType::non_const_value_type,
435 typename InputViewType::non_const_value_type>::value;
436 static_assert(!sameLayoutsSameOffsetTypes,
437 "CopyOffsetsImpl (implements copyOffsets): In order to "
438 "call this specialization, sameLayoutsSameOffsetTypes "
439 "must be false. That is, either the input and output "
440 "must have different array layouts, or their value types "
442 using output_space_copy_type =
443 Kokkos::View<
typename InputViewType::non_const_value_type*,
444 Kokkos::LayoutLeft,
typename OutputViewType::device_type>;
445 using Kokkos::view_alloc;
446 using Kokkos::WithoutInitializing;
447 using execution_space =
typename OutputViewType::execution_space;
448 output_space_copy_type
449 outputSpaceCopy(view_alloc(
"outputSpace", WithoutInitializing),
457 CopyOffsetsFunctor<OutputViewType, output_space_copy_type>;
458 using size_type =
typename OutputViewType::size_type;
459 using range_type = Kokkos::RangePolicy<execution_space, size_type>;
463 size_t overflowCount = 0;
464 Kokkos::parallel_reduce(
"Tpetra::Details::copyOffsets",
465 range_type(0, dst.extent(0)),
466 functor_type(dst, outputSpaceCopy),
468 errorIfOverflow(dst, src, overflowCount);
470 Kokkos::parallel_for(
"Tpetra::Details::copyOffsets",
471 range_type(0, dst.extent(0)),
472 functor_type(dst, outputSpaceCopy));
489 template <
class OutputViewType,
class InputViewType>
490 void copyOffsets(
const OutputViewType& dst,
const InputViewType& src) {
491 static_assert(Kokkos::is_view<OutputViewType>::value,
492 "OutputViewType (the type of dst) must be a Kokkos::View.");
493 static_assert(Kokkos::is_view<InputViewType>::value,
494 "InputViewType (the type of src) must be a Kokkos::View.");
495 static_assert(std::is_same<
typename OutputViewType::value_type,
496 typename OutputViewType::non_const_value_type>::value,
497 "OutputViewType (the type of dst) must be a nonconst Kokkos::View.");
498 static_assert(static_cast<int>(OutputViewType::rank) == 1,
499 "OutputViewType (the type of dst) must be a rank-1 Kokkos::View.");
500 static_assert(static_cast<int>(InputViewType::rank) == 1,
501 "InputViewType (the type of src) must be a rank-1 Kokkos::View.");
502 static_assert(std::is_integral<
typename std::decay<decltype(dst(0))>::type>::value,
503 "The entries of dst must be built-in integers.");
504 static_assert(std::is_integral<
typename std::decay<decltype(src(0))>::type>::value,
505 "The entries of src must be built-in integers.");
507 TEUCHOS_TEST_FOR_EXCEPTION(dst.extent(0) != src.extent(0), std::invalid_argument,
508 "copyOffsets: dst.extent(0) = " << dst.extent(0)
509 <<
" != src.extent(0) = " << src.extent(0) <<
".");
511 CopyOffsetsImpl<OutputViewType, InputViewType>::run(dst, src);
517 #endif // TPETRA_DETAILS_COPYOFFSETS_HPP
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types...
static bool debug()
Whether Tpetra is in debug mode.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.