Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_FixedHashTable_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
11 #define TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
12 
13 #include "Tpetra_Details_FixedHashTable_decl.hpp"
15 #ifdef TPETRA_USE_MURMUR_HASH
16 #include "Kokkos_Functional.hpp" // hash function used by Kokkos::UnorderedMap
17 #endif // TPETRA_USE_MURMUR_HASH
18 #if KOKKOS_VERSION >= 40799
19 #include "KokkosKernels_ArithTraits.hpp"
20 #else
21 #include "Kokkos_ArithTraits.hpp"
22 #endif
23 #include "Teuchos_TypeNameTraits.hpp"
25 #include <type_traits>
26 
27 namespace Tpetra {
28 namespace Details {
29 //
30 // This namespace stores utility functions and Kokkos
31 // functors for use in FixedHashTable construction.
32 //
33 namespace FHT {
34 
35 // Is it worth actually using building the FixedHashTable using
36 // parallel threads, instead of just counting in a sequential loop?
37 //
38 // The parallel version of FixedHashTable construction isn't just a
39 // parallelization of the sequential loops. It incurs additional
40 // overheads. For example, the CountBuckets kernel uses atomic update
41 // instructions to count the number of "buckets" per offsets array
42 // (ptr) entry. Atomic updates have overhead, even if only one thread
43 // issues them. The Kokkos kernels are still correct in that case,
44 // but I would rather not incur overhead then. It might make sense to
45 // set the minimum number of threads to something greater than 1, but
46 // we would need experiments to find out.
47 template <class ExecSpace>
48 bool worthBuildingFixedHashTableInParallel() {
49  return ExecSpace().concurrency() > 1;
50 }
51 
52 //
53 // Functors for FixedHashTable initialization
54 //
55 // NOTE (mfh 23 May 2015): Once we can use lambdas with CUDA, we
56 // should consider replacing all of these functors with in-line
57 // lambdas. The only issue is that we would need to bake the
58 // execution space into the policy, since the default execution space
59 // might differ from the one Tpetra wants to use.
60 
70 template <class CountsViewType,
71  class KeysViewType,
72  class SizeType = typename KeysViewType::size_type>
73 class CountBuckets {
74  public:
75  typedef CountsViewType counts_view_type;
76  typedef KeysViewType keys_view_type;
77  typedef typename CountsViewType::execution_space execution_space;
78  typedef typename CountsViewType::memory_space memory_space;
79  typedef SizeType size_type;
80  typedef typename keys_view_type::non_const_value_type key_type;
81  // mfh 21 May 2015: Having a device_type typedef in the functor
82  // along with an execution_space typedef causes compilation issues.
83  // This is because one of Kokkos' partial specializations picks up
84  // on the device_type typedef, and another picks up on the
85  // execution_space typedef. The former is a legacy of a previous
86  // design iteration of Kokkos, which did not separate memory and
87  // execution spaces.
89 
95  CountBuckets(const counts_view_type& counts,
96  const keys_view_type& keys,
97  const size_type size)
98  : counts_(counts)
99  , keys_(keys)
100  , size_(size) {}
101 
106  KOKKOS_INLINE_FUNCTION void
107  operator()(const size_type& i) const {
108  typedef typename hash_type::result_type hash_value_type;
109 
110  const hash_value_type hashVal = hash_type::hashFunc(keys_[i], size_);
111  Kokkos::atomic_inc(&counts_[hashVal]);
112  }
113 
114  using value_type = Kokkos::pair<int, key_type>;
115 
119  KOKKOS_INLINE_FUNCTION void
120  operator()(const size_type& i, value_type& dst) const {
121  using hash_value_type = typename hash_type::result_type;
122 
123  const key_type keyVal = keys_[i];
124  const hash_value_type hashVal = hash_type::hashFunc(keyVal, size_);
125  if (hashVal < hash_value_type(0) ||
126  hashVal >= hash_value_type(counts_.extent(0))) {
127  dst.first = 1;
128  dst.second = keyVal;
129  } else {
130  Kokkos::atomic_inc(&counts_[hashVal]);
131  }
132  }
133 
135  KOKKOS_INLINE_FUNCTION void init(value_type& dst) const {
136  dst.first = 0;
137  dst.second = key_type(0);
138  }
139 
140  KOKKOS_INLINE_FUNCTION void
141  join(value_type& dst,
142  const value_type& src) const {
143  const bool good = dst.first == 0 || src.first == 0;
144  dst.first = good ? 0 : dst.first;
145  // leave dst.second as it is, to get the "first" key
146  }
147 
148  private:
150  counts_view_type counts_;
152  keys_view_type keys_;
154  size_type size_;
155 };
156 
167 template <class KeyType>
169  KOKKOS_INLINE_FUNCTION
171 #if KOKKOS_VERSION >= 40799
172  : minKey_(::KokkosKernels::ArithTraits<KeyType>::max())
173 #else
174  : minKey_(::Kokkos::ArithTraits<KeyType>::max())
175 #endif
176  ,
177  // min() for a floating-point type returns the minimum _positive_
178  // normalized value. This is different than for integer types.
179  // lowest() is new in C++11 and returns the least value, always
180  // negative for signed finite types.
181  //
182  // mfh 23 May 2015: I have heard reports that
183  // std::numeric_limits<int>::lowest() does not exist with the
184  // Intel compiler. I'm not sure if the users in question actually
185  // enabled C++11. However, it's easy enough to work around this
186  // issue. The standard floating-point types are signed and have a
187  // sign bit, so lowest() is just -max(). For integer types, we
188  // can use min() instead.
189 #if KOKKOS_VERSION >= 40799
190  maxKey_(::KokkosKernels::ArithTraits<KeyType>::is_integer ? ::KokkosKernels::ArithTraits<KeyType>::min() : -::KokkosKernels::ArithTraits<KeyType>::max())
191 #else
192  maxKey_(::Kokkos::ArithTraits<KeyType>::is_integer ? ::Kokkos::ArithTraits<KeyType>::min() : -::Kokkos::ArithTraits<KeyType>::max())
193 #endif
194  , success_(true) {
195  static_assert(std::is_arithmetic<KeyType>::value,
196  "FillPairsResult: "
197  "KeyType must be some kind of number type.");
198  }
199 
200  KOKKOS_INLINE_FUNCTION
201  FillPairsResult(const KeyType& initMinKey,
202  const KeyType& initMaxKey)
203  : minKey_(initMinKey)
204  , maxKey_(initMaxKey)
205  , success_(true) {
206  static_assert(std::is_arithmetic<KeyType>::value,
207  "FillPairsResult: "
208  "KeyType must be some kind of number type.");
209  }
210 
211  KeyType minKey_;
212  KeyType maxKey_;
213  bool success_;
214 };
215 
245 template <class PairsViewType,
246  class KeysViewType,
247  class CountsViewType,
248  class SizeType = typename KeysViewType::size_type>
249 class FillPairs {
250  public:
251  typedef typename CountsViewType::non_const_type counts_view_type;
252  typedef typename counts_view_type::const_type offsets_view_type;
253 
254  typedef PairsViewType pairs_view_type;
255  typedef typename KeysViewType::const_type keys_view_type;
256  typedef typename offsets_view_type::execution_space execution_space;
257  typedef typename offsets_view_type::memory_space memory_space;
258  typedef SizeType size_type;
259 
260  typedef typename keys_view_type::non_const_value_type key_type;
261  typedef typename pairs_view_type::non_const_value_type pair_type;
262 
264 
265  // mfh 23 May 2015: Having a device_type typedef in the functor
266  // along with an execution_space typedef causes compilation issues.
267  // This is because one of Kokkos' partial specializations picks up
268  // on the device_type typedef, and another picks up on the
269  // execution_space typedef. The former is a legacy of a previous
270  // design iteration of Kokkos, which did not separate memory and
271  // execution spaces.
273 
284  FillPairs(const pairs_view_type& pairs,
285  const counts_view_type& counts,
286  const offsets_view_type& ptr,
287  const keys_view_type& keys,
288  const typename pair_type::second_type startingValue)
289  : pairs_(pairs)
290  , counts_(counts)
291  , ptr_(ptr)
292  , keys_(keys)
293  , size_(counts.extent(0))
294  , startingValue_(startingValue)
295 #if KOKKOS_VERSION >= 40799
296  , initMinKey_(::KokkosKernels::ArithTraits<key_type>::max())
297  , initMaxKey_(::KokkosKernels::ArithTraits<key_type>::is_integer ? ::KokkosKernels::ArithTraits<key_type>::min() : -::KokkosKernels::ArithTraits<key_type>::max()){}
298 #else
299  , initMinKey_(::Kokkos::ArithTraits<key_type>::max())
300  , initMaxKey_(::Kokkos::ArithTraits<key_type>::is_integer ? ::Kokkos::ArithTraits<key_type>::min() : -::Kokkos::ArithTraits<key_type>::max()) {
301  }
302 #endif
303 
322  FillPairs(const pairs_view_type& pairs,
323  const counts_view_type& counts,
324  const offsets_view_type& ptr,
325  const keys_view_type& keys,
326  const typename pair_type::second_type startingValue,
327  const key_type initMinKey,
328  const key_type initMaxKey)
329  : pairs_(pairs)
330  , counts_(counts)
331  , ptr_(ptr)
332  , keys_(keys)
333  , size_(counts.extent(0))
334  , startingValue_(startingValue)
335  , initMinKey_(initMinKey)
336  , initMaxKey_(initMaxKey) {
337  }
338 
340  KOKKOS_INLINE_FUNCTION void init(value_type& dst) const {
341  dst.minKey_ = initMinKey_;
342  dst.maxKey_ = initMaxKey_;
343  dst.success_ = true;
344  }
345 
346  KOKKOS_INLINE_FUNCTION void
347  join(value_type& dst,
348  const value_type& src) const {
349  if (src.maxKey_ > dst.maxKey_) {
350  dst.maxKey_ = src.maxKey_;
351  }
352  if (src.minKey_ < dst.minKey_) {
353  dst.minKey_ = src.minKey_;
354  }
355  dst.success_ = dst.success_ && src.success_;
356  }
357 
362  KOKKOS_INLINE_FUNCTION void
363  operator()(const size_type& i, value_type& dst) const {
364  typedef typename hash_type::result_type hash_value_type;
365  typedef typename offsets_view_type::non_const_value_type offset_type;
366  typedef typename pair_type::second_type val_type;
367  typedef typename std::remove_reference<decltype(counts_[0])>::type atomic_incr_type;
368 
369  const key_type key = keys_[i];
370  if (key > dst.maxKey_) {
371  dst.maxKey_ = key;
372  }
373  if (key < dst.minKey_) {
374  dst.minKey_ = key;
375  }
376  const val_type theVal = startingValue_ + static_cast<val_type>(i);
377  const hash_value_type hashVal = hash_type::hashFunc(key, size_);
378 
379  // Return the old count; decrement afterwards.
380  const offset_type count = Kokkos::atomic_fetch_add(&counts_[hashVal], atomic_incr_type(-1));
381  if (count == 0) {
382  dst.success_ = false; // FAILURE!
383  } else {
384  const offset_type curPos = ptr_[hashVal + 1] - count;
385 
386  pairs_[curPos].first = key;
387  pairs_[curPos].second = theVal;
388  }
389  }
390 
391  private:
392  pairs_view_type pairs_;
393  counts_view_type counts_;
394  offsets_view_type ptr_;
395  keys_view_type keys_;
396  size_type size_;
397  typename pair_type::second_type startingValue_;
399  key_type initMinKey_;
401  key_type initMaxKey_;
402 };
403 
427 template <class OffsetsViewType,
428  class PairsViewType,
429  class SizeType = typename OffsetsViewType::size_type>
431  public:
432  typedef typename OffsetsViewType::const_type offsets_view_type;
433  typedef typename PairsViewType::const_type pairs_view_type;
434  typedef typename offsets_view_type::execution_space execution_space;
435  typedef typename offsets_view_type::memory_space memory_space;
436  typedef SizeType size_type;
437 
438  // The result of the check is whether the table has one or more duplicates.
439  typedef int value_type;
440 
445  CheckForDuplicateKeys(const pairs_view_type& pairs,
446  const offsets_view_type& ptr)
447  : pairs_(pairs)
448  , ptr_(ptr)
449  , size_(ptr_.extent(0) == 0 ? size_type(0) : ptr_.extent(0) - 1) {}
450 
452  KOKKOS_INLINE_FUNCTION void init(value_type& dst) const {
453  dst = 0;
454  }
455 
457  KOKKOS_INLINE_FUNCTION void
458  join(value_type& dst,
459  const value_type& src) const {
460  dst = dst + src > 0 ? 1 : 0;
461  }
462 
464  KOKKOS_INLINE_FUNCTION void
465  operator()(const size_type& i, value_type& dst) const {
466  typedef typename offsets_view_type::non_const_value_type offset_type;
467  typedef typename pairs_view_type::non_const_value_type pair_type;
468  typedef typename pair_type::first_type key_type;
469 
470  if (dst > 0) {
471  return; // we've already found duplicate keys elsewhere
472  } else {
473  const offset_type beg = ptr_[i];
474  const offset_type end = ptr_[i + 1];
475  bool foundDuplicateKey = false;
476  // This is an ~ n^2 algorithm in the worst case, where n is the
477  // max number of keys that hash to the same bucket. However, if
478  // the hash function is reasonable, n should be much less than
479  // the total number of keys.
480  for (offset_type j = beg + 1; j < end; ++j) {
481  const key_type curKey = pairs_[j].first;
482  for (offset_type k = beg; k < j; ++k) {
483  if (pairs_[k].first == curKey) {
484  foundDuplicateKey = true;
485  break;
486  }
487  }
488  }
489  dst = (dst > 0) || foundDuplicateKey ? 1 : 0;
490  }
491  }
492 
493  private:
494  pairs_view_type pairs_;
495  offsets_view_type ptr_;
496  size_type size_;
497 };
498 
499 } // namespace FHT
500 
501 //
502 // Here begins the actual implementation of FixedHashTable.
503 //
504 
505 template <class KeyType, class ValueType, class DeviceType>
508  : minVal_(0)
509  , maxVal_(keys.size() == 0 ? static_cast<ValueType>(0) : static_cast<ValueType>(keys.size() - 1))
510  , checkedForDuplicateKeys_(false) {
511  const ValueType startingValue = static_cast<ValueType>(0);
512  const KeyType initMinKey = this->minKey_;
513  const KeyType initMaxKey = this->maxKey_;
514  this->init(keys, startingValue, initMinKey, initMaxKey,
515  initMinKey, initMinKey, false);
516 }
517 
518 template <class KeyType, class ValueType, class DeviceType>
520  FixedHashTable(const Teuchos::ArrayView<const KeyType>& keys)
521  : minVal_(0)
522  , maxVal_(keys.size() == 0 ? static_cast<ValueType>(0) : static_cast<ValueType>(keys.size() - 1))
523  , checkedForDuplicateKeys_(false) {
524  typedef typename keys_type::non_const_type nonconst_keys_type;
525 
526  // mfh 01 May 2015: I don't trust that
527  // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
528  // so I ensure this manually.
529  const ValueType startingValue = static_cast<ValueType>(0);
530  host_input_keys_type keys_k(keys.size() == 0 ? NULL : keys.getRawPtr(),
531  keys.size());
532  using Kokkos::ViewAllocateWithoutInitializing;
533  nonconst_keys_type keys_d(ViewAllocateWithoutInitializing("FixedHashTable::keys"),
534  keys_k.extent(0));
535  // DEEP_COPY REVIEW - NOT TESTED
536  Kokkos::deep_copy(keys_d, keys_k);
537  const KeyType initMinKey = this->minKey_;
538  const KeyType initMaxKey = this->maxKey_;
539  this->init(keys_d, startingValue, initMinKey, initMaxKey,
540  initMinKey, initMinKey, false);
541 }
542 
543 template <class KeyType, class ValueType, class DeviceType>
545  FixedHashTable(const Teuchos::ArrayView<const KeyType>& keys,
546  const ValueType startingValue)
547  : minVal_(startingValue)
548  , maxVal_(keys.size() == 0 ? startingValue : static_cast<ValueType>(startingValue + keys.size() - 1))
549  , checkedForDuplicateKeys_(false) {
550  typedef typename keys_type::non_const_type nonconst_keys_type;
551 
552  // mfh 01 May 2015: I don't trust that
553  // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
554  // so I ensure this manually.
555  host_input_keys_type keys_k(keys.size() == 0 ? NULL : keys.getRawPtr(),
556  keys.size());
557  using Kokkos::ViewAllocateWithoutInitializing;
558  nonconst_keys_type keys_d(ViewAllocateWithoutInitializing("FixedHashTable::keys"),
559  keys_k.extent(0));
560  // DEEP_COPY REVIEW - HOST-TO_DEVICE
561  Kokkos::deep_copy(execution_space(), keys_d, keys_k);
562 
563 #if KOKKOS_VERSION >= 40799
564  const KeyType initMinKey = ::KokkosKernels::ArithTraits<KeyType>::max();
565 #else
566  const KeyType initMinKey = ::Kokkos::ArithTraits<KeyType>::max();
567 #endif
568  // min() for a floating-point type returns the minimum _positive_
569  // normalized value. This is different than for integer types.
570  // lowest() is new in C++11 and returns the least value, always
571  // negative for signed finite types.
572  //
573  // mfh 23 May 2015: I have heard reports that
574  // std::numeric_limits<int>::lowest() does not exist with the Intel
575  // compiler. I'm not sure if the users in question actually enabled
576  // C++11. However, it's easy enough to work around this issue. The
577  // standard floating-point types are signed and have a sign bit, so
578  // lowest() is just -max(). For integer types, we can use min()
579  // instead.
580 #if KOKKOS_VERSION >= 40799
581  const KeyType initMaxKey = ::KokkosKernels::ArithTraits<KeyType>::is_integer ? ::KokkosKernels::ArithTraits<KeyType>::min() : -::KokkosKernels::ArithTraits<KeyType>::max();
582 #else
583  const KeyType initMaxKey = ::Kokkos::ArithTraits<KeyType>::is_integer ? ::Kokkos::ArithTraits<KeyType>::min() : -::Kokkos::ArithTraits<KeyType>::max();
584 #endif
585  this->init(keys_d, startingValue, initMinKey, initMaxKey,
586  initMinKey, initMinKey, false);
587 }
588 
589 template <class KeyType, class ValueType, class DeviceType>
592  const KeyType firstContigKey,
593  const KeyType lastContigKey,
594  const ValueType startingValue)
595  : minVal_(startingValue)
596  , maxVal_(keys.size() == 0 ? startingValue : static_cast<ValueType>(startingValue + keys.size() - 1))
597  , firstContigKey_(firstContigKey)
598  , lastContigKey_(lastContigKey)
599  , checkedForDuplicateKeys_(false) {
600 #if KOKKOS_VERSION >= 40799
601  const KeyType initMinKey = ::KokkosKernels::ArithTraits<KeyType>::max();
602 #else
603  const KeyType initMinKey = ::Kokkos::ArithTraits<KeyType>::max();
604 #endif
605  // min() for a floating-point type returns the minimum _positive_
606  // normalized value. This is different than for integer types.
607  // lowest() is new in C++11 and returns the least value, always
608  // negative for signed finite types.
609  //
610  // mfh 23 May 2015: I have heard reports that
611  // std::numeric_limits<int>::lowest() does not exist with the Intel
612  // compiler. I'm not sure if the users in question actually enabled
613  // C++11. However, it's easy enough to work around this issue. The
614  // standard floating-point types are signed and have a sign bit, so
615  // lowest() is just -max(). For integer types, we can use min()
616  // instead.
617 #if KOKKOS_VERSION >= 40799
618  const KeyType initMaxKey = ::KokkosKernels::ArithTraits<KeyType>::is_integer ? ::KokkosKernels::ArithTraits<KeyType>::min() : -::KokkosKernels::ArithTraits<KeyType>::max();
619 #else
620  const KeyType initMaxKey = ::Kokkos::ArithTraits<KeyType>::is_integer ? ::Kokkos::ArithTraits<KeyType>::min() : -::Kokkos::ArithTraits<KeyType>::max();
621 #endif
622  this->init(keys, startingValue, initMinKey, initMaxKey,
623  firstContigKey, lastContigKey, true);
624 }
625 
626 template <class KeyType, class ValueType, class DeviceType>
628  FixedHashTable(const Teuchos::ArrayView<const KeyType>& keys,
629  const KeyType firstContigKey,
630  const KeyType lastContigKey,
631  const ValueType startingValue)
632  : minVal_(startingValue)
633  , maxVal_(keys.size() == 0 ? startingValue : static_cast<ValueType>(startingValue + keys.size() - 1))
634  , firstContigKey_(firstContigKey)
635  , lastContigKey_(lastContigKey)
636  , checkedForDuplicateKeys_(false) {
637  typedef typename keys_type::non_const_type nonconst_keys_type;
638 
639  // mfh 01 May 2015: I don't trust that
640  // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
641  // so I ensure this manually.
642  host_input_keys_type keys_k(keys.size() == 0 ? NULL : keys.getRawPtr(),
643  keys.size());
644  using Kokkos::ViewAllocateWithoutInitializing;
645  nonconst_keys_type keys_d(ViewAllocateWithoutInitializing("FixedHashTable::keys"),
646  keys_k.extent(0));
647  // DEEP_COPY REVIEW - NOT TESTED
648  Kokkos::deep_copy(keys_d, keys_k);
649 
650 #if KOKKOS_VERSION >= 40799
651  const KeyType initMinKey = ::KokkosKernels::ArithTraits<KeyType>::max();
652 #else
653  const KeyType initMinKey = ::Kokkos::ArithTraits<KeyType>::max();
654 #endif
655  // min() for a floating-point type returns the minimum _positive_
656  // normalized value. This is different than for integer types.
657  // lowest() is new in C++11 and returns the least value, always
658  // negative for signed finite types.
659  //
660  // mfh 23 May 2015: I have heard reports that
661  // std::numeric_limits<int>::lowest() does not exist with the Intel
662  // compiler. I'm not sure if the users in question actually enabled
663  // C++11. However, it's easy enough to work around this issue. The
664  // standard floating-point types are signed and have a sign bit, so
665  // lowest() is just -max(). For integer types, we can use min()
666  // instead.
667 #if KOKKOS_VERSION >= 40799
668  const KeyType initMaxKey = ::KokkosKernels::ArithTraits<KeyType>::is_integer ? ::KokkosKernels::ArithTraits<KeyType>::min() : -::KokkosKernels::ArithTraits<KeyType>::max();
669 #else
670  const KeyType initMaxKey = ::Kokkos::ArithTraits<KeyType>::is_integer ? ::Kokkos::ArithTraits<KeyType>::min() : -::Kokkos::ArithTraits<KeyType>::max();
671 #endif
672  this->init(keys_d, startingValue, initMinKey, initMaxKey,
673  firstContigKey, lastContigKey, true);
674 }
675 
676 template <class KeyType, class ValueType, class DeviceType>
679  const ValueType startingValue)
680  : minVal_(startingValue)
681  , maxVal_(keys.size() == 0 ? startingValue : static_cast<ValueType>(startingValue + keys.size() - 1))
682  , checkedForDuplicateKeys_(false) {
683 #if KOKKOS_VERSION >= 40799
684  const KeyType initMinKey = ::KokkosKernels::ArithTraits<KeyType>::max();
685 #else
686  const KeyType initMinKey = ::Kokkos::ArithTraits<KeyType>::max();
687 #endif
688  // min() for a floating-point type returns the minimum _positive_
689  // normalized value. This is different than for integer types.
690  // lowest() is new in C++11 and returns the least value, always
691  // negative for signed finite types.
692  //
693  // mfh 23 May 2015: I have heard reports that
694  // std::numeric_limits<int>::lowest() does not exist with the Intel
695  // compiler. I'm not sure if the users in question actually enabled
696  // C++11. However, it's easy enough to work around this issue. The
697  // standard floating-point types are signed and have a sign bit, so
698  // lowest() is just -max(). For integer types, we can use min()
699  // instead.
700 #if KOKKOS_VERSION >= 40799
701  const KeyType initMaxKey = ::KokkosKernels::ArithTraits<KeyType>::is_integer ? ::KokkosKernels::ArithTraits<KeyType>::min() : -::KokkosKernels::ArithTraits<KeyType>::max();
702 #else
703  const KeyType initMaxKey = ::Kokkos::ArithTraits<KeyType>::is_integer ? ::Kokkos::ArithTraits<KeyType>::min() : -::Kokkos::ArithTraits<KeyType>::max();
704 #endif
705  this->init(keys, startingValue, initMinKey, initMaxKey,
706  initMinKey, initMinKey, false);
707 }
708 
709 template <class KeyType, class ValueType, class DeviceType>
711  FixedHashTable(const Teuchos::ArrayView<const KeyType>& keys,
712  const Teuchos::ArrayView<const ValueType>& vals)
713  : contiguousValues_(false)
714  , checkedForDuplicateKeys_(false) {
715  // mfh 01 May 2015: I don't trust that
716  // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
717  // so I ensure this manually.
718  host_input_keys_type keys_k(keys.size() == 0 ? NULL : keys.getRawPtr(),
719  keys.size());
720  host_input_vals_type vals_k(vals.size() == 0 ? NULL : vals.getRawPtr(),
721  vals.size());
722 #if KOKKOS_VERSION >= 40799
723  const KeyType initMinKey = ::KokkosKernels::ArithTraits<KeyType>::max();
724 #else
725  const KeyType initMinKey = ::Kokkos::ArithTraits<KeyType>::max();
726 #endif
727  // min() for a floating-point type returns the minimum _positive_
728  // normalized value. This is different than for integer types.
729  // lowest() is new in C++11 and returns the least value, always
730  // negative for signed finite types.
731  //
732  // mfh 23 May 2015: I have heard reports that
733  // std::numeric_limits<int>::lowest() does not exist with the Intel
734  // compiler. I'm not sure if the users in question actually enabled
735  // C++11. However, it's easy enough to work around this issue. The
736  // standard floating-point types are signed and have a sign bit, so
737  // lowest() is just -max(). For integer types, we can use min()
738  // instead.
739 #if KOKKOS_VERSION >= 40799
740  const KeyType initMaxKey = ::KokkosKernels::ArithTraits<KeyType>::is_integer ? ::KokkosKernels::ArithTraits<KeyType>::min() : -::KokkosKernels::ArithTraits<KeyType>::max();
741 #else
742  const KeyType initMaxKey = ::Kokkos::ArithTraits<KeyType>::is_integer ? ::Kokkos::ArithTraits<KeyType>::min() : -::Kokkos::ArithTraits<KeyType>::max();
743 #endif
744  this->init(keys_k, vals_k, initMinKey, initMaxKey);
745 }
746 
747 template <class KeyType, class ValueType, class DeviceType>
749  init(const keys_type& keys,
750  ValueType startingValue,
751  KeyType initMinKey,
752  KeyType initMaxKey,
753  KeyType firstContigKey,
754  KeyType lastContigKey,
755  const bool computeInitContigKeys) {
756  using Kokkos::subview;
757  using Kokkos::ViewAllocateWithoutInitializing;
758  using Teuchos::TypeNameTraits;
759  typedef typename std::decay<decltype(keys.extent(0))>::type size_type;
760  Tpetra::Details::ProfilingRegion pr("Tpetra::Details::FixedHashTable::init(7-arg)");
761  const char prefix[] = "Tpetra::Details::FixedHashTable: ";
762 
763  const offset_type numKeys = static_cast<offset_type>(keys.extent(0));
764  {
765 #if KOKKOS_VERSION >= 40799
766  const offset_type theMaxVal = ::KokkosKernels::ArithTraits<offset_type>::max();
767 #else
768  const offset_type theMaxVal = ::Kokkos::ArithTraits<offset_type>::max();
769 #endif
770  const size_type maxValST = static_cast<size_type>(theMaxVal);
771  TEUCHOS_TEST_FOR_EXCEPTION(keys.extent(0) > maxValST, std::invalid_argument, prefix << "The "
772  "number of keys "
773  << keys.extent(0) << " does not fit in "
774  "offset_type = "
775  << TypeNameTraits<offset_type>::name() << ", whose "
776  "max value is "
777  << theMaxVal << ". This means that it is not possible to "
778  "use this constructor.");
779  }
780 #if KOKKOS_VERSION >= 40799
781  TEUCHOS_TEST_FOR_EXCEPTION(static_cast<unsigned long long>(numKeys) >
782  static_cast<unsigned long long>(::KokkosKernels::ArithTraits<ValueType>::max()),
783  std::invalid_argument,
784  "Tpetra::Details::FixedHashTable: The number of "
785  "keys "
786  << numKeys << " is greater than the maximum representable "
787  "ValueType value "
788  << ::KokkosKernels::ArithTraits<ValueType>::max() << ". "
789  "This means that it is not possible to use this constructor.");
790 #else
791  TEUCHOS_TEST_FOR_EXCEPTION(static_cast<unsigned long long>(numKeys) >
792  static_cast<unsigned long long>(::Kokkos::ArithTraits<ValueType>::max()),
793  std::invalid_argument,
794  "Tpetra::Details::FixedHashTable: The number of "
795  "keys "
796  << numKeys << " is greater than the maximum representable "
797  "ValueType value "
798  << ::Kokkos::ArithTraits<ValueType>::max() << ". "
799  "This means that it is not possible to use this constructor.");
800 #endif
801  TEUCHOS_TEST_FOR_EXCEPTION(numKeys > static_cast<offset_type>(INT_MAX), std::logic_error, prefix << "This class currently only works when the number of keys is <= INT_MAX = " << INT_MAX << ". If this is a problem for you, please talk to the Tpetra "
802  "developers.");
803 
804  const bool buildInParallel =
805  FHT::worthBuildingFixedHashTableInParallel<execution_space>();
806  const bool debug = ::Tpetra::Details::Behavior::debug();
807 
808  // NOTE (mfh 14 May 2015) This method currently assumes UVM. We
809  // could change that by setting up ptr and val as Kokkos::DualView
810  // instances. If we do that, since we are filling on host for now,
811  // we want to make sure that we only zero-fill ptr on host
812  // initially, and that we don't fill val at all. Once we finish
813  // Kokkos-izing all the set-up kernels, we won't need DualView for
814  // either ptr or val.
815 
816  if (computeInitContigKeys) {
817  // Find the first and last initial contiguous keys. If we find a
818  // long sequence of initial contiguous keys, we can save space by
819  // not storing them explicitly as pairs in the hash table.
820  //
821  // NOTE (mfh 01 Jun 2015) Doing this in parallel requires a scan
822  // ("min index such that the difference between the current key and
823  // the next != 1"), which takes multiple passes over the data. We
824  // could fuse it with CountBuckets (only update counts on 'final'
825  // pass). However, we're really just moving this sequential search
826  // out of Map's constructor here, so there is no loss in doing it
827  // sequentially for now. Later, we can work on parallelization.
828  if (numKeys > 0) {
829  // FIXME: make it a parallel kernel with no host copy
830  auto keys_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
831  keys);
832  firstContigKey_ = keys_h[0];
833  // Start with one plus, then decrement at the end. That lets us do
834  // only one addition per loop iteration, rather than two (if we test
835  // against lastContigKey + 1 and then increment lastContigKey).
836  lastContigKey_ = firstContigKey_ + 1;
837 
838  // We will only store keys in the table that are not part of the
839  // initial contiguous sequence. It's possible for the initial
840  // contiguous sequence to be trivial, which for a nonzero number of
841  // keys means that the "sequence" has length 1.
842  for (offset_type k = 1; k < numKeys; ++k) {
843  if (lastContigKey_ != keys_h[k]) {
844  break;
845  }
846  ++lastContigKey_;
847  }
848  --lastContigKey_;
849  }
850  } else {
851  firstContigKey_ = firstContigKey;
852  lastContigKey_ = lastContigKey;
853  }
854 
855  offset_type startIndex;
856  if (numKeys > 0) {
857  initMinKey = std::min(initMinKey, firstContigKey_);
858  initMaxKey = std::max(initMaxKey, lastContigKey_);
859  startIndex = static_cast<offset_type>(lastContigKey_ - firstContigKey_);
860  } else {
861  startIndex = 0;
862  }
863 
864  const offset_type theNumKeys = numKeys - startIndex;
865  const offset_type size = hash_type::getRecommendedSize(theNumKeys);
866 #ifdef HAVE_TPETRA_DEBUG
867  TEUCHOS_TEST_FOR_EXCEPTION(
868  size == 0 && numKeys != 0, std::logic_error,
869  "Tpetra::Details::FixedHashTable constructor: "
870  "getRecommendedSize("
871  << numKeys << ") returned zero, "
872  "even though the number of keys "
873  << numKeys << " is nonzero. "
874  "Please report this bug to the Tpetra developers.");
875 #endif // HAVE_TPETRA_DEBUG
876  keys_type theKeys =
877  subview(keys, std::pair<offset_type, offset_type>(startIndex, numKeys));
878 
879  // FIXME (mfh 28 Mar 2016) For some reason, we don't seem to need a
880  // fence here, but we do before other allocations.
881 
882  // The array of counts must be separate from the array of offsets,
883  // in order for parallel_scan to work correctly.
884  typedef typename ptr_type::non_const_type counts_type;
885  counts_type counts("Tpetra::FixedHashTable::counts", size);
886 
887  //
888  // Count the number of "buckets" per offsets array (ptr) entry.
889  //
890 
891  // Will only create the mirror for buildInParallel false - but then use it in two places
892  typename keys_type::host_mirror_type theKeysHost;
893 
894  // The Kokkos kernel uses atomic update instructions to count the
895  // number of "buckets" per offsets array (ptr) entry. Atomic
896  // updates incur overhead, even in the sequential case. The Kokkos
897  // kernel is still correct in that case, but I would rather not
898  // incur overhead then.
899  if (buildInParallel) {
900  FHT::CountBuckets<counts_type, keys_type> functor(counts, theKeys, size);
901  using range_type = Kokkos::RangePolicy<execution_space, offset_type>;
902  const char kernelLabel[] = "Tpetra::Details::FixedHashTable CountBuckets";
903  if (debug) {
904  using key_type = typename keys_type::non_const_value_type;
905  Kokkos::pair<int, key_type> err;
906  Kokkos::parallel_reduce(kernelLabel, range_type(0, theNumKeys),
907  functor, err);
908  TEUCHOS_TEST_FOR_EXCEPTION(err.first != 0, std::logic_error,
909  "Tpetra::Details::FixedHashTable "
910  "constructor: CountBuckets found a key "
911  << err.second << " that "
912  "results in an out-of-bounds hash value.");
913  } else {
914  Kokkos::parallel_for(kernelLabel, range_type(0, theNumKeys), functor);
915  }
916  } else {
917  Kokkos::HostSpace hostMemSpace;
918  theKeysHost = Kokkos::create_mirror_view(theKeys);
919  // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
920  Kokkos::deep_copy(execution_space(), theKeysHost, theKeys);
921  auto countsHost = Kokkos::create_mirror_view(hostMemSpace, counts);
922 
923  for (offset_type k = 0; k < theNumKeys; ++k) {
924  using key_type = typename keys_type::non_const_value_type;
925  const key_type key = theKeysHost[k];
926 
927  using hash_value_type = typename hash_type::result_type;
928  const hash_value_type hashVal = hash_type::hashFunc(key, size);
929  TEUCHOS_TEST_FOR_EXCEPTION(hashVal < hash_value_type(0) ||
930  hashVal >= hash_value_type(countsHost.extent(0)),
931  std::logic_error,
932  "Tpetra::Details::FixedHashTable "
933  "constructor: Sequential CountBuckets found a key "
934  << key
935  << " that results in an out-of-bounds hash value.");
936 
937  ++countsHost[hashVal];
938  }
939  // DEEP_COPY REVIEW - HOSTMIRROR-TO-DEVICE
940  Kokkos::deep_copy(execution_space(), counts, countsHost);
941  }
942 
943  // KJ: This fence is not required for the 2-argument deep_copy which calls
944  // fence, but will be required if switched to the 3-argumemt deep_copy which
945  // passes a space. The 3-argument form does not fence.
946  execution_space().fence();
947 
948  // Kokkos::View fills with zeros by default.
949  typename ptr_type::non_const_type ptr("Tpetra::FixedHashTable::ptr", size + 1);
950 
951  // Compute row offsets via prefix sum:
952  //
953  // ptr[i+1] = \sum_{j=0}^{i} counts[j].
954  //
955  // Thus, ptr[i+1] - ptr[i] = counts[i], so that ptr[i+1] = ptr[i] +
956  // counts[i]. If we stored counts[i] in ptr[i+1] on input, then the
957  // formula is ptr[i+1] += ptr[i].
958  //
959  // parallel_scan does not incur overhead with Kokkos::Serial, but
960  // with actual parallel execution spaces, it does require multiple
961  // passes over the data. Thus, it still makes sense to have a
962  // sequential fall-back.
963 
965  if (buildInParallel) {
966  computeOffsetsFromCounts(ptr, counts);
967  }
968 
969  if (!buildInParallel || debug) {
970  Kokkos::HostSpace hostMemSpace;
971  auto counts_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, counts);
972  auto ptr_h = Kokkos::create_mirror_view(hostMemSpace, ptr);
973 
974 #ifdef KOKKOS_ENABLE_SERIAL
975  Kokkos::Serial hostExecSpace;
976 #else
977  Kokkos::DefaultHostExecutionSpace hostExecSpace;
978 #endif // KOKKOS_ENABLE_SERIAL
979 
980  computeOffsetsFromCounts(hostExecSpace, ptr_h, counts_h);
981  // DEEP_COPY REVIEW - HOSTMIRROR-TO-DEVICE
982  Kokkos::deep_copy(execution_space(), ptr, ptr_h);
983 
984  if (debug) {
985  bool bad = false;
986  for (offset_type i = 0; i < size; ++i) {
987  if (ptr_h[i + 1] != ptr_h[i] + counts_h[i]) {
988  bad = true;
989  }
990  }
991  TEUCHOS_TEST_FOR_EXCEPTION(bad, std::logic_error,
992  "Tpetra::Details::FixedHashTable "
993  "constructor: computeOffsetsFromCounts gave an incorrect "
994  "result.");
995  }
996  }
997 
998  // KJ: computeOffsetsFromCounts calls parallel_scan which does not fence.
999  // This fence is necessary as we need to make sure that the offset view
1000  // completes before the view is used in the next functor.
1001  execution_space().fence();
1002 
1003  // Allocate the array of (key,value) pairs. Don't fill it with
1004  // zeros, because we will fill it with actual data below.
1005  typedef typename val_type::non_const_type nonconst_val_type;
1006  nonconst_val_type val(ViewAllocateWithoutInitializing("Tpetra::FixedHashTable::pairs"),
1007  theNumKeys);
1008 
1009  // Fill in the hash table's "values" (the (key,value) pairs).
1010  typedef FHT::FillPairs<typename val_type::non_const_type, keys_type,
1011  typename ptr_type::non_const_type>
1012  functor_type;
1013  typename functor_type::value_type result(initMinKey, initMaxKey);
1014 
1015  const ValueType newStartingValue = startingValue + static_cast<ValueType>(startIndex);
1016  if (buildInParallel) {
1017  functor_type functor(val, counts, ptr, theKeys, newStartingValue,
1018  initMinKey, initMaxKey);
1019  typedef Kokkos::RangePolicy<execution_space, offset_type> range_type;
1020  Kokkos::parallel_reduce("Tpetra::Details::FixedHashTable::FillPairs", range_type(0, theNumKeys), functor, result);
1021  } else {
1022  Kokkos::HostSpace hostMemSpace;
1023  auto counts_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, counts);
1024  auto ptr_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, ptr);
1025  auto val_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, val);
1026  for (offset_type k = 0; k < theNumKeys; ++k) {
1027  typedef typename hash_type::result_type hash_value_type;
1028  const KeyType key = theKeysHost[k];
1029  if (key > result.maxKey_) {
1030  result.maxKey_ = key;
1031  }
1032  if (key < result.minKey_) {
1033  result.minKey_ = key;
1034  }
1035  const ValueType theVal = newStartingValue + static_cast<ValueType>(k);
1036  const hash_value_type hashVal = hash_type::hashFunc(key, size);
1037 
1038  // Return the old count; decrement afterwards.
1039  const offset_type count = counts_h[hashVal];
1040  --counts_h[hashVal];
1041  if (count == 0) {
1042  result.success_ = false; // FAILURE!
1043  break;
1044  } else {
1045  const offset_type curPos = ptr_h[hashVal + 1] - count;
1046  val_h[curPos].first = key;
1047  val_h[curPos].second = theVal;
1048  }
1049  }
1050  Kokkos::deep_copy(counts, counts_h); // restore
1051  Kokkos::deep_copy(val, val_h); // restore
1052  }
1053 
1054  // FIXME (mfh 01 Jun 2015) Temporarily commented out because of
1055  // reports of exceptions being thrown in Albany.
1056  //
1057  // TEUCHOS_TEST_FOR_EXCEPTION
1058  // (! result.success_, std::logic_error, "Tpetra::Details::FixedHashTable::"
1059  // "init: Filling the hash table failed! Please report this bug to the "
1060  // "Tpetra developers.");
1061  (void)result; // prevent build warning (set but unused variable)
1062 
1063  // "Commit" the computed arrays and other computed quantities.
1064  ptr_ = ptr;
1065  val_ = val;
1066  minKey_ = result.minKey_;
1067  maxKey_ = result.maxKey_;
1068  // We've already set firstContigKey_ and lastContigKey_ above.
1069 }
1070 
1071 template <class KeyType, class ValueType, class DeviceType>
1072 void FixedHashTable<KeyType, ValueType, DeviceType>::
1073  init(const host_input_keys_type& keys,
1074  const host_input_vals_type& vals,
1075  KeyType initMinKey,
1076  KeyType initMaxKey) {
1077  Tpetra::Details::ProfilingRegion pr("Tpetra::Details::FixedHashTable::init(4-arg)");
1078  const offset_type numKeys = static_cast<offset_type>(keys.extent(0));
1079 #if KOKKOS_VERSION >= 40799
1080  TEUCHOS_TEST_FOR_EXCEPTION(static_cast<unsigned long long>(numKeys) > static_cast<unsigned long long>(::KokkosKernels::ArithTraits<ValueType>::max()),
1081  std::invalid_argument,
1082  "Tpetra::Details::FixedHashTable: The number of "
1083  "keys "
1084  << numKeys << " is greater than the maximum representable "
1085  "ValueType value "
1086  << ::KokkosKernels::ArithTraits<ValueType>::max() << ".");
1087 #else
1088  TEUCHOS_TEST_FOR_EXCEPTION(static_cast<unsigned long long>(numKeys) > static_cast<unsigned long long>(::Kokkos::ArithTraits<ValueType>::max()),
1089  std::invalid_argument,
1090  "Tpetra::Details::FixedHashTable: The number of "
1091  "keys "
1092  << numKeys << " is greater than the maximum representable "
1093  "ValueType value "
1094  << ::Kokkos::ArithTraits<ValueType>::max() << ".");
1095 #endif
1096  TEUCHOS_TEST_FOR_EXCEPTION(numKeys > static_cast<offset_type>(INT_MAX), std::logic_error,
1097  "Tpetra::"
1098  "Details::FixedHashTable: This class currently only works when the number "
1099  "of keys is <= INT_MAX = "
1100  << INT_MAX << ". If this is a problem for you"
1101  ", please talk to the Tpetra developers.");
1102 
1103  // There's no need to find the first and last initial contiguous
1104  // keys in this case, because if we reach this init() function, then
1105  // hasContiguousValues() is false and so get() doesn't use the
1106  // initial contiguous sequence of keys.
1107 
1108  const offset_type size = hash_type::getRecommendedSize(numKeys);
1109 #ifdef HAVE_TPETRA_DEBUG
1110  TEUCHOS_TEST_FOR_EXCEPTION(
1111  size == 0 && numKeys != 0, std::logic_error,
1112  "Tpetra::Details::FixedHashTable constructor: "
1113  "getRecommendedSize("
1114  << numKeys << ") returned zero, "
1115  "even though the number of keys "
1116  << numKeys << " is nonzero. "
1117  "Please report this bug to the Tpetra developers.");
1118 #endif // HAVE_TPETRA_DEBUG
1119 
1120  // FIXME: Investigate a couple options:
1121  // 1. Allocate ptr_h, val_h directly on host and only deep_copy to ptr_ and val_ once at the end
1122  // 2. Do all this work as a parallel kernel with the same execution/memory spaces as ptr_ and val_
1123  // An old comment from MFH noted ptr_h should be zero-initialized, while val_h should not be initialized.
1124  // It further noted that we shouldn't need a DualView type arrangement when all setup kernels have
1125  // been "Kokkos-ized".
1126  Kokkos::HostSpace hostMemSpace;
1127  typename ptr_type::non_const_type ptr("Tpetra::FixedHashTable::ptr", size + 1);
1128  auto ptr_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, ptr);
1129 
1130  // Allocate the array of key,value pairs. Don't waste time filling
1131  // it with zeros, because we will fill it with actual data below.
1132  using Kokkos::ViewAllocateWithoutInitializing;
1133  typedef typename val_type::non_const_type nonconst_val_type;
1134  nonconst_val_type val(ViewAllocateWithoutInitializing("Tpetra::FixedHashTable::pairs"),
1135  numKeys);
1136  auto val_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, val);
1137 
1138  // Compute number of entries in each hash table position.
1139  for (offset_type k = 0; k < numKeys; ++k) {
1140  const typename hash_type::result_type hashVal =
1141  hash_type::hashFunc(keys[k], size);
1142  // Shift over one, so that counts[j] = ptr[j+1]. See below.
1143  ++ptr_h[hashVal + 1];
1144  }
1145 
1146  // Compute row offsets via prefix sum:
1147  //
1148  // ptr[i+1] = \sum_{j=0}^{i} counts[j].
1149  //
1150  // Thus, ptr[i+1] - ptr[i] = counts[i], so that ptr[i+1] = ptr[i] +
1151  // counts[i]. If we stored counts[i] in ptr[i+1] on input, then the
1152  // formula is ptr[i+1] += ptr[i].
1153  for (offset_type i = 0; i < size; ++i) {
1154  ptr_h[i + 1] += ptr_h[i];
1155  }
1156  // ptr[0] = 0; // We've already done this when initializing ptr above.
1157 
1158  // curRowStart[i] is the offset of the next element in row i.
1159  typename ptr_type::non_const_type::host_mirror_type curRowStart("Tpetra::FixedHashTable::curRowStart", size);
1160 
1161  // Fill in the hash table.
1162  FHT::FillPairsResult<KeyType> result(initMinKey, initMaxKey);
1163  for (offset_type k = 0; k < numKeys; ++k) {
1164  typedef typename hash_type::result_type hash_value_type;
1165  const KeyType key = keys[k];
1166  if (key > result.maxKey_) {
1167  result.maxKey_ = key;
1168  }
1169  if (key < result.minKey_) {
1170  result.minKey_ = key;
1171  }
1172  const ValueType theVal = vals[k];
1173  if (theVal > maxVal_) {
1174  maxVal_ = theVal;
1175  }
1176  if (theVal < minVal_) {
1177  minVal_ = theVal;
1178  }
1179  const hash_value_type hashVal = hash_type::hashFunc(key, size);
1180 
1181  const offset_type offset = curRowStart[hashVal];
1182  const offset_type curPos = ptr_h[hashVal] + offset;
1183  if (curPos >= ptr_h[hashVal + 1]) {
1184  result.success_ = false; // FAILURE!
1185  } else {
1186  val_h[curPos].first = key;
1187  val_h[curPos].second = theVal;
1188  ++curRowStart[hashVal];
1189  }
1190  }
1191 
1192  TEUCHOS_TEST_FOR_EXCEPTION(!result.success_, std::logic_error,
1193  "Tpetra::Details::FixedHashTable::"
1194  "init: Filling the hash table failed! Please report this bug to the "
1195  "Tpetra developers.");
1196 
1197  // "Commit" the computed arrays and other computed quantities.
1198  Kokkos::deep_copy(ptr, ptr_h);
1199  Kokkos::deep_copy(val, val_h);
1200 
1201  ptr_ = ptr;
1202  val_ = val;
1203  minKey_ = result.minKey_;
1204  maxKey_ = result.maxKey_;
1205  // We've already assigned to minVal_ and maxVal_ above.
1206 }
1207 
1208 template <class KeyType, class ValueType, class DeviceType>
1211  if (!checkedForDuplicateKeys_) {
1212  hasDuplicateKeys_ = checkForDuplicateKeys();
1213  checkedForDuplicateKeys_ = true;
1214  }
1215  return hasDuplicateKeys_;
1216 }
1217 
1218 template <class KeyType, class ValueType, class DeviceType>
1220  checkForDuplicateKeys() const {
1221  const offset_type size = this->getSize();
1222  // It's allowed for the hash table to have a positive number of
1223  // buckets (getSize()), but still store no entries (numPairs()).
1224  // Both cases trivially entail no duplicates.
1225  if (size == 0 || this->numPairs() == 0) {
1226  return false;
1227  } else {
1228  typedef FHT::CheckForDuplicateKeys<ptr_type, val_type> functor_type;
1229  functor_type functor(val_, ptr_);
1230  int hasDupKeys = 0;
1231  typedef Kokkos::RangePolicy<execution_space, offset_type> range_type;
1232  Kokkos::parallel_reduce("Tpetra::Details::FixedHashTable::CheckForDuplicateKeys", range_type(0, size), functor, hasDupKeys);
1233  return hasDupKeys > 0;
1234  }
1235 }
1236 
1237 template <class KeyType, class ValueType, class DeviceType>
1238 std::string
1240  description() const {
1241  std::ostringstream oss;
1242  oss << "FixedHashTable<"
1243  << Teuchos::TypeNameTraits<KeyType>::name() << ","
1244  << Teuchos::TypeNameTraits<ValueType>::name() << ">: "
1245  << "{ numKeys: " << val_.extent(0)
1246  << ", tableSize: " << this->getSize() << " }";
1247  return oss.str();
1248 }
1249 
1250 template <class KeyType, class ValueType, class DeviceType>
1252  describe(Teuchos::FancyOStream& out,
1253  const Teuchos::EVerbosityLevel verbLevel) const {
1254  using std::endl;
1255  using std::setw;
1256  using Teuchos::OSTab;
1257  using Teuchos::rcpFromRef;
1258  using Teuchos::TypeNameTraits;
1259  using Teuchos::VERB_DEFAULT;
1260  using Teuchos::VERB_EXTREME;
1261  using Teuchos::VERB_LOW;
1262  using Teuchos::VERB_NONE;
1263 
1264  // NOTE (mfh 14 May 2015) This method currently assumes UVM for
1265  // access to ptr_ and val_ from the host.
1266 
1267  Teuchos::EVerbosityLevel vl = verbLevel;
1268  if (vl == VERB_DEFAULT) vl = VERB_LOW;
1269 
1270  if (vl == VERB_NONE) {
1271  // do nothing
1272  } else if (vl == VERB_LOW) {
1273  out << this->description() << endl;
1274  } else { // MEDIUM, HIGH or EXTREME
1275  out << "FixedHashTable:" << endl;
1276  {
1277  OSTab tab1(rcpFromRef(out));
1278 
1279  // const std::string label = this->getObjectLabel ();
1280  // if (label != "") {
1281  // out << "label: " << label << endl;
1282  // }
1283  out << "Template parameters:" << endl;
1284  {
1285  OSTab tab2(rcpFromRef(out));
1286  out << "KeyType: " << TypeNameTraits<KeyType>::name() << endl
1287  << "ValueType: " << TypeNameTraits<ValueType>::name() << endl;
1288  }
1289 
1290  const offset_type tableSize = this->getSize();
1291  const offset_type numKeys = val_.extent(0);
1292 
1293  out << "Table parameters:" << endl;
1294  {
1295  OSTab tab2(rcpFromRef(out));
1296  out << "numKeys: " << numKeys << endl
1297  << "tableSize: " << tableSize << endl;
1298  }
1299 
1300  if (vl >= VERB_EXTREME) {
1301  out << "Contents: ";
1302  if (tableSize == 0 || numKeys == 0) {
1303  out << "[]" << endl;
1304  } else {
1305  out << "[ " << endl;
1306  {
1307  OSTab tab2(rcpFromRef(out));
1308  for (offset_type i = 0; i < tableSize; ++i) {
1309  OSTab tab3(rcpFromRef(out));
1310  out << "[";
1311  for (offset_type k = ptr_[i]; k < ptr_[i + 1]; ++k) {
1312  out << "(" << val_[k].first << "," << val_[k].second << ")";
1313  if (k + 1 < ptr_[i + 1]) {
1314  out << ", ";
1315  }
1316  }
1317  out << "]" << endl;
1318  } // for each table position i
1319  }
1320  out << "]" << endl;
1321  } // The table contains entries
1322  } // vl >= VERB_EXTREME
1323  }
1324  out << endl;
1325  } // if vl > VERB_LOW
1326 }
1327 
1328 } // namespace Details
1329 } // namespace Tpetra
1330 
1331 // Macro that explicitly instantiates FixedHashTable for the given
1332 // local ordinal (LO), global ordinal (GO), and Kokkos device (NODE::device_type)
1333 // types. Note that FixedHashTable's first two template parameters
1334 // occur in the opposite order of most Tpetra classes. This is
1335 // because FixedHashTable performs global-to-local lookup, and the
1336 // convention in templated C++ lookup tables (such as std::map) is
1337 // <KeyType, ValueType>.
1338 //
1339 // This macro must be explanded within the Tpetra::Details namespace.
1340 // Moreover, we need (GO,LO,Host) and (LO,LO,Host)
1341 #if defined(HAVE_TPETRA_INST_SERIAL)
1342 #if defined(HAVE_TPETRA_INST_INT_INT)
1343 #define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT(LO, GO, NODE) \
1344  template class Details::FixedHashTable<GO, LO, typename NODE::device_type>;
1345 #else
1346 #define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT(LO, GO, NODE) \
1347  template class Details::FixedHashTable<GO, LO, typename NODE::device_type>; \
1348  template class Details::FixedHashTable<LO, LO, typename NODE::device_type>;
1349 #endif
1350 #elif defined(HAVE_TPETRA_INST_OPENMP)
1351 #if defined(HAVE_TPETRA_INST_INT_INT)
1352 #define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT(LO, GO, NODE) \
1353  template class Details::FixedHashTable<GO, LO, typename NODE::device_type>; \
1354  template class Details::FixedHashTable<GO, LO, Kokkos::HostSpace::device_type>;
1355 #else
1356 #define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT(LO, GO, NODE) \
1357  template class Details::FixedHashTable<GO, LO, typename NODE::device_type>; \
1358  template class Details::FixedHashTable<LO, LO, typename NODE::device_type>; \
1359  template class Details::FixedHashTable<GO, LO, Kokkos::Device<Kokkos::Serial, Kokkos::HostSpace>>; \
1360  template class Details::FixedHashTable<LO, LO, Kokkos::Device<Kokkos::Serial, Kokkos::HostSpace>>;
1361 #endif
1362 #else
1363 #define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT(LO, GO, NODE) \
1364  template class Details::FixedHashTable<GO, LO, typename NODE::device_type>; \
1365  template class Details::FixedHashTable<GO, LO, Kokkos::HostSpace::device_type>; \
1366  template class Details::FixedHashTable<LO, LO, Kokkos::HostSpace::device_type>;
1367 #endif
1368 #endif
CheckForDuplicateKeys(const pairs_view_type &pairs, const offsets_view_type &ptr)
Constructor.
Parallel reduce functor for filling the FixedHashTable, and computing the min and max keys...
FillPairs(const pairs_view_type &pairs, const counts_view_type &counts, const offsets_view_type &ptr, const keys_view_type &keys, const typename pair_type::second_type startingValue, const key_type initMinKey, const key_type initMaxKey)
Constructor that takes initial min and max key values.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
KOKKOS_DEFAULTED_FUNCTION FixedHashTable()=default
Default constructor; makes an empty table.
static KOKKOS_INLINE_FUNCTION result_type hashFunc(const argument_type &, const offset_type &)
The hash function.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i) const
Do this for every entry of keys_.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print this object with the given verbosity to the output stream.
ResultType result_type
Type of the return value of the hash function.
Parallel for functor for counting &quot;buckets&quot; in the FixedHashTable.
static bool debug()
Whether Tpetra is in debug mode.
bool success_
Whether fill succeeded (it can only fail on a bug)
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
The hash function for FixedHashTable.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i, value_type &dst) const
Debug reduce version of above operator().
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
std::string description() const
Implementation of Teuchos::Describable interface.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i, value_type &dst) const
Parallel loop body; do this for every entry of keys_.
FillPairs(const pairs_view_type &pairs, const counts_view_type &counts, const offsets_view_type &ptr, const keys_view_type &keys, const typename pair_type::second_type startingValue)
Constructor.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
bool hasDuplicateKeys()
Whether the table has any duplicate keys.
CountBuckets(const counts_view_type &counts, const keys_view_type &keys, const size_type size)
Constructor.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
Functor for checking whether a FixedHashTable has one or more duplicate entries.
KOKKOS_INLINE_FUNCTION void join(value_type &dst, const value_type &src) const
Combine two intermediate reduction results.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
Reduction result for FillPairs functor below.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i, value_type &dst) const
Parallel loop body.
Kokkos::View< const KeyType *, Kokkos::LayoutLeft, device_type > keys_type
Type of a 1-D Kokkos::View (array) used to store keys.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.