Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Tpetra_Details_copyOffsets.hpp
Go to the documentation of this file.
1 /*
2 // @HEADER
3 // ***********************************************************************
4 //
5 // Tpetra: Templated Linear Algebra Services Package
6 // Copyright (2008) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ************************************************************************
41 // @HEADER
42 */
43 
44 #ifndef TPETRA_DETAILS_COPYOFFSETS_HPP
45 #define TPETRA_DETAILS_COPYOFFSETS_HPP
46 
51 
52 #include "TpetraCore_config.h"
54 #include "Kokkos_Core.hpp"
55 #include <limits>
56 #include <type_traits>
57 
58 namespace Tpetra {
59 namespace Details {
60 
61 //
62 // Implementation details for copyOffsets (see below).
63 // Users should skip over this anonymous namespace.
64 //
65 namespace { // (anonymous)
66 
67  // Implementation detail of copyOffsets (see below). Determines
68  // whether integer overflow is impossible on assignment from an
69  // InputType to an OutputType.
70  //
71  // Implicit here is the assumption that both input and output types
72  // are integers.
73  template<class OutputType, class InputType>
74  struct OutputCanFitInput {
75  private:
76  static constexpr bool output_signed = std::is_signed<OutputType>::value;
77  static constexpr bool input_signed = std::is_signed<InputType>::value;
78 
79  public:
80  static const bool value = sizeof (OutputType) > sizeof (InputType) ||
81  (sizeof (OutputType) == sizeof (InputType) &&
82  ! output_signed && input_signed);
83  };
84 
85  // Avoid warnings for "unsigned integer < 0" comparisons.
86  template<class InputType,
87  bool input_signed = std::is_signed<InputType>::value>
88  struct Negative {};
89 
90  template<class InputType>
91  struct Negative<InputType, true> {
92  static KOKKOS_INLINE_FUNCTION bool
93  negative (const InputType src) {
94  return src < InputType (0);
95  }
96  };
97 
98  template<class InputType>
99  struct Negative<InputType, false> {
100  static KOKKOS_INLINE_FUNCTION bool
101  negative (const InputType /* src */) {
102  return false;
103  }
104  };
105 
106  template<class InputType>
107  KOKKOS_INLINE_FUNCTION bool negative (const InputType src) {
108  return Negative<InputType>::negative (src);
109  }
110 
111  template<class OutputType, class InputType>
112  struct OverflowChecker {
113  private:
114  static constexpr bool output_signed = std::is_signed<OutputType>::value;
115  static constexpr bool input_signed = std::is_signed<InputType>::value;
116 
117  public:
118  // 1. Signed to unsigned could overflow due to negative numbers.
119  // 2. Larger to smaller could overflow.
120  // 3. Same size but unsigned to signed could overflow.
121  static constexpr bool could_overflow =
122  (! output_signed && input_signed) ||
123  (sizeof (OutputType) < sizeof (InputType)) ||
124  (sizeof (OutputType) == sizeof (InputType) &&
125  output_signed && ! input_signed);
126 
127  KOKKOS_INLINE_FUNCTION bool
128  overflows (const InputType src) const
129  {
130  if (! could_overflow) {
131  return false;
132  }
133  else {
134  // Signed to unsigned could overflow due to negative numbers.
135  if (! output_signed && input_signed) {
136  return negative (src);
137  }
138  // We're only comparing InputType with InputType here, so this
139  // should not emit warnings.
140  return src < minDstVal_ || src > maxDstVal_;
141  }
142  }
143 
144  private:
145  // If InputType is unsigned and OutputType is signed, casting max
146  // OutputType to InputType could overflow. See #5548.
147  InputType minDstVal_ = input_signed ?
148  std::numeric_limits<OutputType>::min () : OutputType (0);
149  InputType maxDstVal_ = std::numeric_limits<OutputType>::max ();
150  };
151 
152 
153  template<class OutputViewType, class InputViewType>
154  void
155  errorIfOverflow (const OutputViewType& dst,
156  const InputViewType& src,
157  const size_t overflowCount)
158  {
159  if (overflowCount == 0) {
160  return;
161  }
162 
163  std::ostringstream os;
164  const bool plural = overflowCount != size_t (1);
165  os << "copyOffsets: " << overflowCount << " value" <<
166  (plural ? "s" : "") << " in src were too big (in the "
167  "sense of integer overflow) to fit in dst.";
168 
169  const bool verbose = Details::Behavior::verbose ();
170  if (verbose) {
171  constexpr size_t maxNumToPrint = 100;
172  const size_t srcLen (src.extent (0));
173  if (srcLen <= maxNumToPrint) {
174  auto dst_h = Kokkos::create_mirror_view (dst);
175  auto src_h = Kokkos::create_mirror_view (src);
176  Kokkos::deep_copy (src_h, src);
177  Kokkos::deep_copy (dst_h, dst);
178 
179  os << " src: [";
180  for (size_t k = 0; k < srcLen; ++k) {
181  os << src_h[k];
182  if (k + size_t (1) < srcLen) {
183  os << ", ";
184  }
185  }
186  os << "], ";
187 
188  os << " dst: [";
189  for (size_t k = 0; k < srcLen; ++k) {
190  os << dst_h[k];
191  if (k + size_t (1) < srcLen) {
192  os << ", ";
193  }
194  }
195  os << "].";
196  }
197  else {
198  os << " src and dst are too long to print.";
199  }
200  }
201  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, os.str ());
202  }
203 
204  // Implementation detail of copyOffsets (see below).
205  //
206  // Kokkos parallel_reduce functor for copying offset ("ptr") arrays.
207  // Tpetra::Details::FixedHashTable uses this in its "copy"
208  // constructor for converting between different Device types. All
209  // the action happens in the partial specializations for different
210  // values of outputCanFitInput. "Output can fit input" means that
211  // casting the input's value type to the output's value type will
212  // never result in integer overflow.
213  template<class OutputViewType,
214  class InputViewType,
215  const bool outputCanFitInput =
216  OutputCanFitInput<typename OutputViewType::non_const_value_type,
217  typename InputViewType::non_const_value_type>::value>
218  class CopyOffsetsFunctor {};
219 
220  // Specialization for when overflow is possible.
221  template<class OutputViewType, class InputViewType>
222  class CopyOffsetsFunctor<OutputViewType, InputViewType, false> {
223  public:
224  using execution_space = typename OutputViewType::execution_space;
225  using size_type = typename OutputViewType::size_type;
226  using value_type = size_t;
227 
228  using input_value_type = typename InputViewType::non_const_value_type;
229  using output_value_type = typename OutputViewType::non_const_value_type;
230 
231  CopyOffsetsFunctor (const OutputViewType& dst, const InputViewType& src) :
232  dst_ (dst), src_ (src)
233  {
234  // NOTE (mfh 29 Jan 2016): See kokkos/kokkos#178 for why we use
235  // a memory space, rather than an execution space, as the first
236  // argument of VerifyExecutionCanAccessMemorySpace.
237  static_assert (Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
238  typename OutputViewType::memory_space,
239  typename InputViewType::memory_space>::value,
240  "CopyOffsetsFunctor (implements copyOffsets): Output "
241  "View's space must be able to access the input View's "
242  "memory space.");
243  }
244 
245  KOKKOS_INLINE_FUNCTION void
246  operator () (const size_type i, value_type& overflowCount) const {
247  const input_value_type src_i = src_(i);
248  if (checker_.overflows (src_i)) {
249  ++overflowCount;
250  }
251  dst_(i) = static_cast<output_value_type> (src_i);
252  }
253 
254  KOKKOS_INLINE_FUNCTION void
255  operator () (const size_type i) const {
256  const input_value_type src_i = src_(i);
257  dst_(i) = static_cast<output_value_type> (src_i);
258  }
259 
260  KOKKOS_INLINE_FUNCTION void init (value_type& overflowCount) const {
261  overflowCount = 0;
262  }
263 
264  KOKKOS_INLINE_FUNCTION void
265  join (volatile value_type& result,
266  const volatile value_type& current) const {
267  result += current;
268  }
269 
270  private:
271  OutputViewType dst_;
272  InputViewType src_;
273  OverflowChecker<output_value_type, input_value_type> checker_;
274  };
275 
276  // Specialization for when overflow is impossible.
277  template<class OutputViewType, class InputViewType>
278  class CopyOffsetsFunctor<OutputViewType, InputViewType, true> {
279  public:
280  using execution_space = typename OutputViewType::execution_space;
281  using size_type = typename OutputViewType::size_type;
282  using value_type = size_t;
283 
284  CopyOffsetsFunctor (const OutputViewType& dst, const InputViewType& src) :
285  dst_ (dst),
286  src_ (src)
287  {
288  // NOTE (mfh 29 Jan 2016): See kokkos/kokkos#178 for why we use
289  // a memory space, rather than an execution space, as the first
290  // argument of VerifyExecutionCanAccessMemorySpace.
291  static_assert (Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
292  typename OutputViewType::memory_space,
293  typename InputViewType::memory_space>::value,
294  "CopyOffsetsFunctor (implements copyOffsets): Output "
295  "View's space must be able to access the input View's "
296  "memory space.");
297  }
298 
299  KOKKOS_INLINE_FUNCTION void
300  operator () (const size_type i, value_type& /* overflowCount */) const {
301  // Overflow is impossible in this case, so there's no need to check.
302  dst_(i) = src_(i);
303  }
304 
305  KOKKOS_INLINE_FUNCTION void
306  operator () (const size_type i) const {
307  dst_(i) = src_(i);
308  }
309 
310  KOKKOS_INLINE_FUNCTION void init (value_type& overflowCount) const {
311  overflowCount = 0;
312  }
313 
314  KOKKOS_INLINE_FUNCTION void
315  join (volatile value_type& /* result */,
316  const volatile value_type& /* current */) const
317  {}
318 
319  private:
320  OutputViewType dst_;
321  InputViewType src_;
322  };
323 
324  // Implementation detail of copyOffsets (see below).
325  //
326  // We specialize copyOffsets on two different conditions:
327  //
328  // 1. Are the two Views' layouts the same, and do the input and
329  // output Views have the same value type?
330  // 2. Can the output View's execution space access the input View's
331  // memory space?
332  //
333  // If (1) is true, that makes the implementation simple: just call
334  // Kokkos::deep_copy (FixedHashTable always uses the same layout, no
335  // matter the device type). Otherwise, we need a custom copy
336  // functor. If (2) is true, then we can use CopyOffsetsFunctor
337  // directly. Otherwise, we have to copy the input View into the
338  // output View's memory space, before we can use the functor.
339  //
340  // NOTE (mfh 29 Jan 2016): See kokkos/kokkos#178 for why we use a
341  // memory space, rather than an execution space, as the first
342  // argument of VerifyExecutionCanAccessMemorySpace.
343  template<class OutputViewType,
344  class InputViewType,
345  const bool sameLayoutsSameOffsetTypes =
346  std::is_same<typename OutputViewType::array_layout,
347  typename InputViewType::array_layout>::value &&
348  std::is_same<typename OutputViewType::non_const_value_type,
349  typename InputViewType::non_const_value_type>::value,
350  const bool outputExecSpaceCanAccessInputMemSpace =
351  Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
352  typename OutputViewType::memory_space,
353  typename InputViewType::memory_space>::value>
354  struct CopyOffsetsImpl {
355  static void run (const OutputViewType& dst, const InputViewType& src);
356  };
357 
358  // Specialization for sameLayoutsSameOffsetTypes = true:
359  //
360  // If both input and output Views have the same layout, and both
361  // input and output use the same type for offsets, then we don't
362  // need to check for overflow, and we can use Kokkos::deep_copy
363  // directly. It doesn't matter whether the output execution space
364  // can access the input memory space: Kokkos::deep_copy takes care
365  // of the details.
366  template<class OutputViewType,
367  class InputViewType,
368  const bool outputExecSpaceCanAccessInputMemSpace>
369  struct CopyOffsetsImpl<OutputViewType, InputViewType,
370  true, outputExecSpaceCanAccessInputMemSpace> {
371  static void run (const OutputViewType& dst, const InputViewType& src) {
372  static_assert (std::is_same<typename OutputViewType::non_const_value_type,
373  typename InputViewType::non_const_value_type>::value,
374  "CopyOffsetsImpl (implementation of copyOffsets): In order"
375  " to call this specialization, the input and output must "
376  "use the same offset type.");
377  static_assert (static_cast<int> (OutputViewType::rank) ==
378  static_cast<int> (InputViewType::rank),
379  "CopyOffsetsImpl (implementation of copyOffsets): In order"
380  " to call this specialization, src and dst must have the "
381  "same rank.");
382  static_assert (std::is_same<typename OutputViewType::array_layout,
383  typename InputViewType::array_layout>::value,
384  "CopyOffsetsImpl (implementation of copyOffsets): In order"
385  " to call this specialization, src and dst must have the "
386  "the same array_layout.");
387  Kokkos::deep_copy (dst, src);
388  }
389  };
390 
391  // Specializations for sameLayoutsSameOffsetTypes = false:
392  //
393  // If input and output don't have the same layout, or use different
394  // types for offsets, then we can't use Kokkos::deep_copy directly,
395  // and we may have to check for overflow.
396 
397  // Specialization for sameLayoutsSameOffsetTypes = false and
398  // outputExecSpaceCanAccessInputMemSpace = true:
399  //
400  // If the output execution space can access the input memory space,
401  // then we can use CopyOffsetsFunctor directly.
402  template<class OutputViewType,
403  class InputViewType>
404  struct CopyOffsetsImpl<OutputViewType, InputViewType,
405  false, true> {
406  static void run (const OutputViewType& dst, const InputViewType& src) {
407  static_assert (static_cast<int> (OutputViewType::rank) ==
408  static_cast<int> (InputViewType::rank),
409  "CopyOffsetsImpl (implementation of copyOffsets): "
410  "src and dst must have the same rank.");
411  constexpr bool sameLayoutsSameOffsetTypes =
412  std::is_same<typename OutputViewType::array_layout,
413  typename InputViewType::array_layout>::value &&
414  std::is_same<typename OutputViewType::non_const_value_type,
415  typename InputViewType::non_const_value_type>::value;
416  static_assert (! sameLayoutsSameOffsetTypes,
417  "CopyOffsetsImpl (implements copyOffsets): In order to "
418  "call this specialization, sameLayoutsSameOffsetTypes "
419  "must be false. That is, either the input and output "
420  "must have different array layouts, or their value types "
421  "must differ.");
422  // NOTE (mfh 29 Jan 2016): See kokkos/kokkos#178 for why we use
423  // a memory space, rather than an execution space, as the first
424  // argument of VerifyExecutionCanAccessMemorySpace.
425  static_assert (Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
426  typename OutputViewType::memory_space,
427  typename InputViewType::memory_space>::value,
428  "CopyOffsetsImpl (implements copyOffsets): In order to "
429  "call this specialization, the output View's space must "
430  "be able to access the input View's memory space.");
431  using functor_type = CopyOffsetsFunctor<OutputViewType, InputViewType>;
432  using execution_space = typename OutputViewType::execution_space;
433  using size_type = typename OutputViewType::size_type;
434  using range_type = Kokkos::RangePolicy<execution_space, size_type>;
435 
436  const bool debug = Details::Behavior::debug ();
437  if (debug) {
438  size_t overflowCount = 0; // output argument of the reduction
439  Kokkos::parallel_reduce ("Tpetra::Details::copyOffsets",
440  range_type (0, dst.extent (0)),
441  functor_type (dst, src),
442  overflowCount);
443  errorIfOverflow (dst, src, overflowCount);
444  }
445  else {
446  Kokkos::parallel_for ("Tpetra::Details::copyOffsets",
447  range_type (0, dst.extent (0)),
448  functor_type (dst, src));
449  }
450  }
451  };
452 
453  // Specialization for sameLayoutsSameOffsetTypes = false and
454  // outputExecSpaceCanAccessInputMemSpace = false.
455  //
456  // If the output execution space canNOT access the input memory
457  // space, then we can't use CopyOffsetsFunctor directly. Instead,
458  // tell Kokkos to copy the input View's data into the output View's
459  // memory space _first_. Since the offset types are different for
460  // this specialization, we can't just call Kokkos::deep_copy
461  // directly between the input and output Views of offsets; that
462  // wouldn't compile.
463  //
464  // This case can and does come up in practice: If the output View's
465  // execution space is Cuda, it cannot currently access host memory
466  // (that's the opposite direction from what UVM allows).
467  // Furthermore, that case specifically requires overflow checking,
468  // since (as of 28 Jan 2016 at least) Kokkos::Cuda uses a smaller
469  // offset type than Kokkos' host spaces.
470  template<class OutputViewType, class InputViewType>
471  struct CopyOffsetsImpl<OutputViewType, InputViewType,
472  false, false> {
473  static void run (const OutputViewType& dst, const InputViewType& src) {
474  static_assert (static_cast<int> (OutputViewType::rank) ==
475  static_cast<int> (InputViewType::rank),
476  "CopyOffsetsImpl (implementation of copyOffsets): In order"
477  " to call this specialization, src and dst must have the "
478  "same rank.");
479  constexpr bool sameLayoutsSameOffsetTypes =
480  std::is_same<typename OutputViewType::array_layout,
481  typename InputViewType::array_layout>::value &&
482  std::is_same<typename OutputViewType::non_const_value_type,
483  typename InputViewType::non_const_value_type>::value;
484  static_assert (! sameLayoutsSameOffsetTypes,
485  "CopyOffsetsImpl (implements copyOffsets): In order to "
486  "call this specialization, sameLayoutsSameOffsetTypes "
487  "must be false. That is, either the input and output "
488  "must have different array layouts, or their value types "
489  "must differ.");
490  using output_space_copy_type =
491  Kokkos::View<typename InputViewType::non_const_value_type*,
492  Kokkos::LayoutLeft, typename OutputViewType::device_type>;
493  using Kokkos::view_alloc;
494  using Kokkos::WithoutInitializing;
495  output_space_copy_type
496  outputSpaceCopy (view_alloc ("outputSpace", WithoutInitializing),
497  src.extent (0));
498  Kokkos::deep_copy (outputSpaceCopy, src);
499 
500  // The output View's execution space can access
501  // outputSpaceCopy's data, so we can run the functor now.
502  using functor_type =
503  CopyOffsetsFunctor<OutputViewType, output_space_copy_type>;
504  using execution_space = typename OutputViewType::execution_space;
505  using size_type = typename OutputViewType::size_type;
506  using range_type = Kokkos::RangePolicy<execution_space, size_type>;
507 
508  const bool debug = Details::Behavior::debug ();
509  if (debug) {
510  size_t overflowCount = 0;
511  Kokkos::parallel_reduce ("Tpetra::Details::copyOffsets",
512  range_type (0, dst.extent (0)),
513  functor_type (dst, outputSpaceCopy),
514  overflowCount);
515  errorIfOverflow (dst, src, overflowCount);
516  }
517  else {
518  Kokkos::parallel_for ("Tpetra::Details::copyOffsets",
519  range_type (0, dst.extent (0)),
520  functor_type (dst, outputSpaceCopy));
521  }
522  }
523  };
524 } // namespace (anonymous)
525 
537 template<class OutputViewType, class InputViewType>
538 void
539 copyOffsets (const OutputViewType& dst, const InputViewType& src)
540 {
541  static_assert (Kokkos::Impl::is_view<OutputViewType>::value,
542  "OutputViewType (the type of dst) must be a Kokkos::View.");
543  static_assert (Kokkos::Impl::is_view<InputViewType>::value,
544  "InputViewType (the type of src) must be a Kokkos::View.");
545  static_assert (std::is_same<typename OutputViewType::value_type,
546  typename OutputViewType::non_const_value_type>::value,
547  "OutputViewType (the type of dst) must be a nonconst Kokkos::View.");
548  static_assert (static_cast<int> (OutputViewType::rank) == 1,
549  "OutputViewType (the type of dst) must be a rank-1 Kokkos::View.");
550  static_assert (static_cast<int> (InputViewType::rank) == 1,
551  "InputViewType (the type of src) must be a rank-1 Kokkos::View.");
552  static_assert (std::is_integral<typename std::decay<decltype (dst(0)) >::type>::value,
553  "The entries of dst must be built-in integers.");
554  static_assert (std::is_integral<typename std::decay<decltype (src(0)) >::type>::value,
555  "The entries of src must be built-in integers.");
556 
557  TEUCHOS_TEST_FOR_EXCEPTION
558  (dst.extent (0) != src.extent (0), std::invalid_argument,
559  "copyOffsets: dst.extent(0) = " << dst.extent (0)
560  << " != src.extent(0) = " << src.extent (0) << ".");
561 
562  CopyOffsetsImpl<OutputViewType, InputViewType>::run (dst, src);
563 }
564 
565 } // namespace Details
566 } // namespace Tpetra
567 
568 #endif // TPETRA_DETAILS_COPYOFFSETS_HPP
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types...
static bool debug()
Whether Tpetra is in debug mode.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
static bool verbose()
Whether Tpetra is in verbose mode.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.