Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_copyOffsets.hpp
Go to the documentation of this file.
1 /*
2 // @HEADER
3 // ***********************************************************************
4 //
5 // Tpetra: Templated Linear Algebra Services Package
6 // Copyright (2008) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // ************************************************************************
39 // @HEADER
40 */
41 
42 #ifndef TPETRA_DETAILS_COPYOFFSETS_HPP
43 #define TPETRA_DETAILS_COPYOFFSETS_HPP
44 
49 
50 #include "TpetraCore_config.h"
52 #include "Kokkos_Core.hpp"
53 #include <limits>
54 #include <type_traits>
55 
56 namespace Tpetra {
57 namespace Details {
58 
59 //
60 // Implementation details for copyOffsets (see below).
61 // Users should skip over this anonymous namespace.
62 //
63 namespace { // (anonymous)
64 
65  // Implementation detail of copyOffsets (see below). Determines
66  // whether integer overflow is impossible on assignment from an
67  // InputType to an OutputType.
68  //
69  // Implicit here is the assumption that both input and output types
70  // are integers.
71  template<class OutputType, class InputType>
72  struct OutputCanFitInput {
73  private:
74  static constexpr bool output_signed = std::is_signed<OutputType>::value;
75  static constexpr bool input_signed = std::is_signed<InputType>::value;
76 
77  public:
78  static const bool value = sizeof (OutputType) > sizeof (InputType) ||
79  (sizeof (OutputType) == sizeof (InputType) &&
80  ! output_signed && input_signed);
81  };
82 
83  // Avoid warnings for "unsigned integer < 0" comparisons.
84  template<class InputType,
85  bool input_signed = std::is_signed<InputType>::value>
86  struct Negative {};
87 
88  template<class InputType>
89  struct Negative<InputType, true> {
90  static KOKKOS_INLINE_FUNCTION bool
91  negative (const InputType src) {
92  return src < InputType (0);
93  }
94  };
95 
96  template<class InputType>
97  struct Negative<InputType, false> {
98  static KOKKOS_INLINE_FUNCTION bool
99  negative (const InputType /* src */) {
100  return false;
101  }
102  };
103 
104  template<class InputType>
105  KOKKOS_INLINE_FUNCTION bool negative (const InputType src) {
106  return Negative<InputType>::negative (src);
107  }
108 
109  template<class OutputType, class InputType>
110  struct OverflowChecker {
111  private:
112  static constexpr bool output_signed = std::is_signed<OutputType>::value;
113  static constexpr bool input_signed = std::is_signed<InputType>::value;
114 
115  public:
116  // 1. Signed to unsigned could overflow due to negative numbers.
117  // 2. Larger to smaller could overflow.
118  // 3. Same size but unsigned to signed could overflow.
119  static constexpr bool could_overflow =
120  (! output_signed && input_signed) ||
121  (sizeof (OutputType) < sizeof (InputType)) ||
122  (sizeof (OutputType) == sizeof (InputType) &&
123  output_signed && ! input_signed);
124 
125  KOKKOS_INLINE_FUNCTION bool
126  overflows (const InputType src) const
127  {
128  if (! could_overflow) {
129  return false;
130  }
131  else {
132  // Signed to unsigned could overflow due to negative numbers.
133  if (! output_signed && input_signed) {
134  return negative (src);
135  }
136  // We're only comparing InputType with InputType here, so this
137  // should not emit warnings.
138  return src < minDstVal_ || src > maxDstVal_;
139  }
140  }
141 
142  private:
143  // If InputType is unsigned and OutputType is signed, casting max
144  // OutputType to InputType could overflow. See #5548.
145  InputType minDstVal_ = input_signed ?
146  std::numeric_limits<OutputType>::min () : OutputType (0);
147  InputType maxDstVal_ = std::numeric_limits<OutputType>::max ();
148  };
149 
150 
151  template<class OutputViewType, class InputViewType>
152  void
153  errorIfOverflow (const OutputViewType& dst,
154  const InputViewType& src,
155  const size_t overflowCount)
156  {
157  if (overflowCount == 0) {
158  return;
159  }
160 
161  std::ostringstream os;
162  const bool plural = overflowCount != size_t (1);
163  os << "copyOffsets: " << overflowCount << " value" <<
164  (plural ? "s" : "") << " in src were too big (in the "
165  "sense of integer overflow) to fit in dst.";
166 
167  const bool verbose = Details::Behavior::verbose ();
168  if (verbose) {
169  const size_t maxNumToPrint =
171  const size_t srcLen (src.extent (0));
172  if (srcLen <= maxNumToPrint) {
173  auto dst_h = Kokkos::create_mirror_view (dst);
174  auto src_h = Kokkos::create_mirror_view (src);
175  Kokkos::deep_copy (src_h, src);
176  Kokkos::deep_copy (dst_h, dst);
177 
178  os << " src: [";
179  for (size_t k = 0; k < srcLen; ++k) {
180  os << src_h[k];
181  if (k + size_t (1) < srcLen) {
182  os << ", ";
183  }
184  }
185  os << "], ";
186 
187  os << " dst: [";
188  for (size_t k = 0; k < srcLen; ++k) {
189  os << dst_h[k];
190  if (k + size_t (1) < srcLen) {
191  os << ", ";
192  }
193  }
194  os << "].";
195  }
196  else {
197  os << " src.extent(0) > " << maxNumToPrint << ", Tpetra's "
198  "verbose print count threshold. To increase this, set the "
199  "environment variable TPETRA_VERBOSE_PRINT_COUNT_THRESHOLD "
200  "to the desired threshold and rerun. You do NOT need to "
201  "rebuild Trilinos.";
202  }
203  }
204  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, os.str ());
205  }
206 
207  // Implementation detail of copyOffsets (see below).
208  //
209  // Kokkos parallel_reduce functor for copying offset ("ptr") arrays.
210  // Tpetra::Details::FixedHashTable uses this in its "copy"
211  // constructor for converting between different Device types. All
212  // the action happens in the partial specializations for different
213  // values of outputCanFitInput. "Output can fit input" means that
214  // casting the input's value type to the output's value type will
215  // never result in integer overflow.
216  template<class OutputViewType,
217  class InputViewType,
218  const bool outputCanFitInput =
219  OutputCanFitInput<typename OutputViewType::non_const_value_type,
220  typename InputViewType::non_const_value_type>::value>
221  class CopyOffsetsFunctor {};
222 
223  // Specialization for when overflow is possible.
224  template<class OutputViewType, class InputViewType>
225  class CopyOffsetsFunctor<OutputViewType, InputViewType, false> {
226  public:
227  using execution_space = typename OutputViewType::execution_space;
228  using size_type = typename OutputViewType::size_type;
229  using value_type = size_t;
230 
231  using input_value_type = typename InputViewType::non_const_value_type;
232  using output_value_type = typename OutputViewType::non_const_value_type;
233 
234  CopyOffsetsFunctor (const OutputViewType& dst, const InputViewType& src) :
235  dst_ (dst), src_ (src)
236  {
237  // NOTE (mfh 29 Jan 2016): See kokkos/kokkos#178 for why we use
238  // a memory space, rather than an execution space, as the first
239  // argument of VerifyExecutionCanAccessMemorySpace.
240  static_assert (Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
241  typename OutputViewType::memory_space,
242  typename InputViewType::memory_space>::value,
243  "CopyOffsetsFunctor (implements copyOffsets): Output "
244  "View's space must be able to access the input View's "
245  "memory space.");
246  }
247 
248  KOKKOS_INLINE_FUNCTION void
249  operator () (const size_type i, value_type& overflowCount) const {
250  const input_value_type src_i = src_(i);
251  if (checker_.overflows (src_i)) {
252  ++overflowCount;
253  }
254  dst_(i) = static_cast<output_value_type> (src_i);
255  }
256 
257  KOKKOS_INLINE_FUNCTION void
258  operator () (const size_type i) const {
259  const input_value_type src_i = src_(i);
260  dst_(i) = static_cast<output_value_type> (src_i);
261  }
262 
263  KOKKOS_INLINE_FUNCTION void init (value_type& overflowCount) const {
264  overflowCount = 0;
265  }
266 
267  KOKKOS_INLINE_FUNCTION void
268  join (volatile value_type& result,
269  const volatile value_type& current) const {
270  result += current;
271  }
272 
273  private:
274  OutputViewType dst_;
275  InputViewType src_;
276  OverflowChecker<output_value_type, input_value_type> checker_;
277  };
278 
279  // Specialization for when overflow is impossible.
280  template<class OutputViewType, class InputViewType>
281  class CopyOffsetsFunctor<OutputViewType, InputViewType, true> {
282  public:
283  using execution_space = typename OutputViewType::execution_space;
284  using size_type = typename OutputViewType::size_type;
285  using value_type = size_t;
286 
287  CopyOffsetsFunctor (const OutputViewType& dst, const InputViewType& src) :
288  dst_ (dst),
289  src_ (src)
290  {
291  // NOTE (mfh 29 Jan 2016): See kokkos/kokkos#178 for why we use
292  // a memory space, rather than an execution space, as the first
293  // argument of VerifyExecutionCanAccessMemorySpace.
294  static_assert (Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
295  typename OutputViewType::memory_space,
296  typename InputViewType::memory_space>::value,
297  "CopyOffsetsFunctor (implements copyOffsets): Output "
298  "View's space must be able to access the input View's "
299  "memory space.");
300  }
301 
302  KOKKOS_INLINE_FUNCTION void
303  operator () (const size_type i, value_type& /* overflowCount */) const {
304  // Overflow is impossible in this case, so there's no need to check.
305  dst_(i) = src_(i);
306  }
307 
308  KOKKOS_INLINE_FUNCTION void
309  operator () (const size_type i) const {
310  dst_(i) = src_(i);
311  }
312 
313  KOKKOS_INLINE_FUNCTION void init (value_type& overflowCount) const {
314  overflowCount = 0;
315  }
316 
317  KOKKOS_INLINE_FUNCTION void
318  join (volatile value_type& /* result */,
319  const volatile value_type& /* current */) const
320  {}
321 
322  private:
323  OutputViewType dst_;
324  InputViewType src_;
325  };
326 
327  // Implementation detail of copyOffsets (see below).
328  //
329  // We specialize copyOffsets on two different conditions:
330  //
331  // 1. Are the two Views' layouts the same, and do the input and
332  // output Views have the same value type?
333  // 2. Can the output View's execution space access the input View's
334  // memory space?
335  //
336  // If (1) is true, that makes the implementation simple: just call
337  // Kokkos::deep_copy (FixedHashTable always uses the same layout, no
338  // matter the device type). Otherwise, we need a custom copy
339  // functor. If (2) is true, then we can use CopyOffsetsFunctor
340  // directly. Otherwise, we have to copy the input View into the
341  // output View's memory space, before we can use the functor.
342  //
343  // NOTE (mfh 29 Jan 2016): See kokkos/kokkos#178 for why we use a
344  // memory space, rather than an execution space, as the first
345  // argument of VerifyExecutionCanAccessMemorySpace.
346  template<class OutputViewType,
347  class InputViewType,
348  const bool sameLayoutsSameOffsetTypes =
349  std::is_same<typename OutputViewType::array_layout,
350  typename InputViewType::array_layout>::value &&
351  std::is_same<typename OutputViewType::non_const_value_type,
352  typename InputViewType::non_const_value_type>::value,
353  const bool outputExecSpaceCanAccessInputMemSpace =
354  Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
355  typename OutputViewType::memory_space,
356  typename InputViewType::memory_space>::value>
357  struct CopyOffsetsImpl {
358  static void run (const OutputViewType& dst, const InputViewType& src);
359  };
360 
361  // Specialization for sameLayoutsSameOffsetTypes = true:
362  //
363  // If both input and output Views have the same layout, and both
364  // input and output use the same type for offsets, then we don't
365  // need to check for overflow, and we can use Kokkos::deep_copy
366  // directly. It doesn't matter whether the output execution space
367  // can access the input memory space: Kokkos::deep_copy takes care
368  // of the details.
369  template<class OutputViewType,
370  class InputViewType,
371  const bool outputExecSpaceCanAccessInputMemSpace>
372  struct CopyOffsetsImpl<OutputViewType, InputViewType,
373  true, outputExecSpaceCanAccessInputMemSpace> {
374  static void run (const OutputViewType& dst, const InputViewType& src) {
375  static_assert (std::is_same<typename OutputViewType::non_const_value_type,
376  typename InputViewType::non_const_value_type>::value,
377  "CopyOffsetsImpl (implementation of copyOffsets): In order"
378  " to call this specialization, the input and output must "
379  "use the same offset type.");
380  static_assert (static_cast<int> (OutputViewType::rank) ==
381  static_cast<int> (InputViewType::rank),
382  "CopyOffsetsImpl (implementation of copyOffsets): In order"
383  " to call this specialization, src and dst must have the "
384  "same rank.");
385  static_assert (std::is_same<typename OutputViewType::array_layout,
386  typename InputViewType::array_layout>::value,
387  "CopyOffsetsImpl (implementation of copyOffsets): In order"
388  " to call this specialization, src and dst must have the "
389  "the same array_layout.");
390  Kokkos::deep_copy (dst, src);
391  }
392  };
393 
394  // Specializations for sameLayoutsSameOffsetTypes = false:
395  //
396  // If input and output don't have the same layout, or use different
397  // types for offsets, then we can't use Kokkos::deep_copy directly,
398  // and we may have to check for overflow.
399 
400  // Specialization for sameLayoutsSameOffsetTypes = false and
401  // outputExecSpaceCanAccessInputMemSpace = true:
402  //
403  // If the output execution space can access the input memory space,
404  // then we can use CopyOffsetsFunctor directly.
405  template<class OutputViewType,
406  class InputViewType>
407  struct CopyOffsetsImpl<OutputViewType, InputViewType,
408  false, true> {
409  static void run (const OutputViewType& dst, const InputViewType& src) {
410  static_assert (static_cast<int> (OutputViewType::rank) ==
411  static_cast<int> (InputViewType::rank),
412  "CopyOffsetsImpl (implementation of copyOffsets): "
413  "src and dst must have the same rank.");
414  constexpr bool sameLayoutsSameOffsetTypes =
415  std::is_same<typename OutputViewType::array_layout,
416  typename InputViewType::array_layout>::value &&
417  std::is_same<typename OutputViewType::non_const_value_type,
418  typename InputViewType::non_const_value_type>::value;
419  static_assert (! sameLayoutsSameOffsetTypes,
420  "CopyOffsetsImpl (implements copyOffsets): In order to "
421  "call this specialization, sameLayoutsSameOffsetTypes "
422  "must be false. That is, either the input and output "
423  "must have different array layouts, or their value types "
424  "must differ.");
425  // NOTE (mfh 29 Jan 2016): See kokkos/kokkos#178 for why we use
426  // a memory space, rather than an execution space, as the first
427  // argument of VerifyExecutionCanAccessMemorySpace.
428  static_assert (Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
429  typename OutputViewType::memory_space,
430  typename InputViewType::memory_space>::value,
431  "CopyOffsetsImpl (implements copyOffsets): In order to "
432  "call this specialization, the output View's space must "
433  "be able to access the input View's memory space.");
434  using functor_type = CopyOffsetsFunctor<OutputViewType, InputViewType>;
435  using execution_space = typename OutputViewType::execution_space;
436  using size_type = typename OutputViewType::size_type;
437  using range_type = Kokkos::RangePolicy<execution_space, size_type>;
438 
439  const bool debug = Details::Behavior::debug ();
440  if (debug) {
441  size_t overflowCount = 0; // output argument of the reduction
442  Kokkos::parallel_reduce ("Tpetra::Details::copyOffsets",
443  range_type (0, dst.extent (0)),
444  functor_type (dst, src),
445  overflowCount);
446  errorIfOverflow (dst, src, overflowCount);
447  }
448  else {
449  Kokkos::parallel_for ("Tpetra::Details::copyOffsets",
450  range_type (0, dst.extent (0)),
451  functor_type (dst, src));
452  }
453  }
454  };
455 
456  // Specialization for sameLayoutsSameOffsetTypes = false and
457  // outputExecSpaceCanAccessInputMemSpace = false.
458  //
459  // If the output execution space canNOT access the input memory
460  // space, then we can't use CopyOffsetsFunctor directly. Instead,
461  // tell Kokkos to copy the input View's data into the output View's
462  // memory space _first_. Since the offset types are different for
463  // this specialization, we can't just call Kokkos::deep_copy
464  // directly between the input and output Views of offsets; that
465  // wouldn't compile.
466  //
467  // This case can and does come up in practice: If the output View's
468  // execution space is Cuda, it cannot currently access host memory
469  // (that's the opposite direction from what UVM allows).
470  // Furthermore, that case specifically requires overflow checking,
471  // since (as of 28 Jan 2016 at least) Kokkos::Cuda uses a smaller
472  // offset type than Kokkos' host spaces.
473  template<class OutputViewType, class InputViewType>
474  struct CopyOffsetsImpl<OutputViewType, InputViewType,
475  false, false> {
476  static void run (const OutputViewType& dst, const InputViewType& src) {
477  static_assert (static_cast<int> (OutputViewType::rank) ==
478  static_cast<int> (InputViewType::rank),
479  "CopyOffsetsImpl (implementation of copyOffsets): In order"
480  " to call this specialization, src and dst must have the "
481  "same rank.");
482  constexpr bool sameLayoutsSameOffsetTypes =
483  std::is_same<typename OutputViewType::array_layout,
484  typename InputViewType::array_layout>::value &&
485  std::is_same<typename OutputViewType::non_const_value_type,
486  typename InputViewType::non_const_value_type>::value;
487  static_assert (! sameLayoutsSameOffsetTypes,
488  "CopyOffsetsImpl (implements copyOffsets): In order to "
489  "call this specialization, sameLayoutsSameOffsetTypes "
490  "must be false. That is, either the input and output "
491  "must have different array layouts, or their value types "
492  "must differ.");
493  using output_space_copy_type =
494  Kokkos::View<typename InputViewType::non_const_value_type*,
495  Kokkos::LayoutLeft, typename OutputViewType::device_type>;
496  using Kokkos::view_alloc;
497  using Kokkos::WithoutInitializing;
498  output_space_copy_type
499  outputSpaceCopy (view_alloc ("outputSpace", WithoutInitializing),
500  src.extent (0));
501  Kokkos::deep_copy (outputSpaceCopy, src);
502 
503  // The output View's execution space can access
504  // outputSpaceCopy's data, so we can run the functor now.
505  using functor_type =
506  CopyOffsetsFunctor<OutputViewType, output_space_copy_type>;
507  using execution_space = typename OutputViewType::execution_space;
508  using size_type = typename OutputViewType::size_type;
509  using range_type = Kokkos::RangePolicy<execution_space, size_type>;
510 
511  const bool debug = Details::Behavior::debug ();
512  if (debug) {
513  size_t overflowCount = 0;
514  Kokkos::parallel_reduce ("Tpetra::Details::copyOffsets",
515  range_type (0, dst.extent (0)),
516  functor_type (dst, outputSpaceCopy),
517  overflowCount);
518  errorIfOverflow (dst, src, overflowCount);
519  }
520  else {
521  Kokkos::parallel_for ("Tpetra::Details::copyOffsets",
522  range_type (0, dst.extent (0)),
523  functor_type (dst, outputSpaceCopy));
524  }
525  }
526  };
527 } // namespace (anonymous)
528 
540 template<class OutputViewType, class InputViewType>
541 void
542 copyOffsets (const OutputViewType& dst, const InputViewType& src)
543 {
544  static_assert (Kokkos::Impl::is_view<OutputViewType>::value,
545  "OutputViewType (the type of dst) must be a Kokkos::View.");
546  static_assert (Kokkos::Impl::is_view<InputViewType>::value,
547  "InputViewType (the type of src) must be a Kokkos::View.");
548  static_assert (std::is_same<typename OutputViewType::value_type,
549  typename OutputViewType::non_const_value_type>::value,
550  "OutputViewType (the type of dst) must be a nonconst Kokkos::View.");
551  static_assert (static_cast<int> (OutputViewType::rank) == 1,
552  "OutputViewType (the type of dst) must be a rank-1 Kokkos::View.");
553  static_assert (static_cast<int> (InputViewType::rank) == 1,
554  "InputViewType (the type of src) must be a rank-1 Kokkos::View.");
555  static_assert (std::is_integral<typename std::decay<decltype (dst(0)) >::type>::value,
556  "The entries of dst must be built-in integers.");
557  static_assert (std::is_integral<typename std::decay<decltype (src(0)) >::type>::value,
558  "The entries of src must be built-in integers.");
559 
560  TEUCHOS_TEST_FOR_EXCEPTION
561  (dst.extent (0) != src.extent (0), std::invalid_argument,
562  "copyOffsets: dst.extent(0) = " << dst.extent (0)
563  << " != src.extent(0) = " << src.extent (0) << ".");
564 
565  CopyOffsetsImpl<OutputViewType, InputViewType>::run (dst, src);
566 }
567 
568 } // namespace Details
569 } // namespace Tpetra
570 
571 #endif // TPETRA_DETAILS_COPYOFFSETS_HPP
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types...
static bool debug()
Whether Tpetra is in debug mode.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.