Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_FixedHashTable_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
11 #define TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
12 
13 #include "Tpetra_Details_FixedHashTable_decl.hpp"
15 #ifdef TPETRA_USE_MURMUR_HASH
16 #include "Kokkos_Functional.hpp" // hash function used by Kokkos::UnorderedMap
17 #endif // TPETRA_USE_MURMUR_HASH
18 #if KOKKOS_VERSION >= 40799
19 #include "KokkosKernels_ArithTraits.hpp"
20 #else
21 #include "Kokkos_ArithTraits.hpp"
22 #endif
23 #include "Teuchos_TypeNameTraits.hpp"
25 #include <type_traits>
26 
27 namespace Tpetra {
28 namespace Details {
29 //
30 // This namespace stores utility functions and Kokkos
31 // functors for use in FixedHashTable construction.
32 //
33 namespace FHT {
34 
35 // Is it worth actually using building the FixedHashTable using
36 // parallel threads, instead of just counting in a sequential loop?
37 //
38 // The parallel version of FixedHashTable construction isn't just a
39 // parallelization of the sequential loops. It incurs additional
40 // overheads. For example, the CountBuckets kernel uses atomic update
41 // instructions to count the number of "buckets" per offsets array
42 // (ptr) entry. Atomic updates have overhead, even if only one thread
43 // issues them. The Kokkos kernels are still correct in that case,
44 // but I would rather not incur overhead then. It might make sense to
45 // set the minimum number of threads to something greater than 1, but
46 // we would need experiments to find out.
47 template <class ExecSpace>
48 bool worthBuildingFixedHashTableInParallel() {
49  return ExecSpace().concurrency() > 1;
50 }
51 
52 //
53 // Functors for FixedHashTable initialization
54 //
55 // NOTE (mfh 23 May 2015): Once we can use lambdas with CUDA, we
56 // should consider replacing all of these functors with in-line
57 // lambdas. The only issue is that we would need to bake the
58 // execution space into the policy, since the default execution space
59 // might differ from the one Tpetra wants to use.
60 
70 template <class CountsViewType,
71  class KeysViewType,
72  class SizeType = typename KeysViewType::size_type>
73 class CountBuckets {
74  public:
75  typedef CountsViewType counts_view_type;
76  typedef KeysViewType keys_view_type;
77  typedef typename CountsViewType::execution_space execution_space;
78  typedef typename CountsViewType::memory_space memory_space;
79  typedef SizeType size_type;
80  typedef typename keys_view_type::non_const_value_type key_type;
81  // mfh 21 May 2015: Having a device_type typedef in the functor
82  // along with an execution_space typedef causes compilation issues.
83  // This is because one of Kokkos' partial specializations picks up
84  // on the device_type typedef, and another picks up on the
85  // execution_space typedef. The former is a legacy of a previous
86  // design iteration of Kokkos, which did not separate memory and
87  // execution spaces.
89 
95  CountBuckets(const counts_view_type& counts,
96  const keys_view_type& keys,
97  const size_type size)
98  : counts_(counts)
99  , keys_(keys)
100  , size_(size) {}
101 
106  KOKKOS_INLINE_FUNCTION void
107  operator()(const size_type& i) const {
108  typedef typename hash_type::result_type hash_value_type;
109 
110  const hash_value_type hashVal = hash_type::hashFunc(keys_[i], size_);
111  Kokkos::atomic_inc(&counts_[hashVal]);
112  }
113 
114  using value_type = Kokkos::pair<int, key_type>;
115 
119  KOKKOS_INLINE_FUNCTION void
120  operator()(const size_type& i, value_type& dst) const {
121  using hash_value_type = typename hash_type::result_type;
122 
123  const key_type keyVal = keys_[i];
124  const hash_value_type hashVal = hash_type::hashFunc(keyVal, size_);
125  if (hashVal < hash_value_type(0) ||
126  hashVal >= hash_value_type(counts_.extent(0))) {
127  dst.first = 1;
128  dst.second = keyVal;
129  } else {
130  Kokkos::atomic_inc(&counts_[hashVal]);
131  }
132  }
133 
135  KOKKOS_INLINE_FUNCTION void init(value_type& dst) const {
136  dst.first = 0;
137  dst.second = key_type(0);
138  }
139 
140  KOKKOS_INLINE_FUNCTION void
141  join(value_type& dst,
142  const value_type& src) const {
143  const bool good = dst.first == 0 || src.first == 0;
144  dst.first = good ? 0 : dst.first;
145  // leave dst.second as it is, to get the "first" key
146  }
147 
148  private:
150  counts_view_type counts_;
152  keys_view_type keys_;
154  size_type size_;
155 };
156 
167 template <class KeyType>
169  KOKKOS_INLINE_FUNCTION
171 #if KOKKOS_VERSION >= 40799
172  : minKey_(::KokkosKernels::ArithTraits<KeyType>::max())
173 #else
174  : minKey_(::Kokkos::ArithTraits<KeyType>::max())
175 #endif
176  ,
177  // min() for a floating-point type returns the minimum _positive_
178  // normalized value. This is different than for integer types.
179  // lowest() is new in C++11 and returns the least value, always
180  // negative for signed finite types.
181  //
182  // mfh 23 May 2015: I have heard reports that
183  // std::numeric_limits<int>::lowest() does not exist with the
184  // Intel compiler. I'm not sure if the users in question actually
185  // enabled C++11. However, it's easy enough to work around this
186  // issue. The standard floating-point types are signed and have a
187  // sign bit, so lowest() is just -max(). For integer types, we
188  // can use min() instead.
189 #if KOKKOS_VERSION >= 40799
190  maxKey_(::KokkosKernels::ArithTraits<KeyType>::is_integer ? ::KokkosKernels::ArithTraits<KeyType>::min() : -::KokkosKernels::ArithTraits<KeyType>::max())
191 #else
192  maxKey_(::Kokkos::ArithTraits<KeyType>::is_integer ? ::Kokkos::ArithTraits<KeyType>::min() : -::Kokkos::ArithTraits<KeyType>::max())
193 #endif
194  , success_(true) {
195  static_assert(std::is_arithmetic<KeyType>::value,
196  "FillPairsResult: "
197  "KeyType must be some kind of number type.");
198  }
199 
200  KOKKOS_INLINE_FUNCTION
201  FillPairsResult(const KeyType& initMinKey,
202  const KeyType& initMaxKey)
203  : minKey_(initMinKey)
204  , maxKey_(initMaxKey)
205  , success_(true) {
206  static_assert(std::is_arithmetic<KeyType>::value,
207  "FillPairsResult: "
208  "KeyType must be some kind of number type.");
209  }
210 
211  KeyType minKey_;
212  KeyType maxKey_;
213  bool success_;
214 };
215 
245 template <class PairsViewType,
246  class KeysViewType,
247  class CountsViewType,
248  class SizeType = typename KeysViewType::size_type>
249 class FillPairs {
250  public:
251  typedef typename CountsViewType::non_const_type counts_view_type;
252  typedef typename counts_view_type::const_type offsets_view_type;
253 
254  typedef PairsViewType pairs_view_type;
255  typedef typename KeysViewType::const_type keys_view_type;
256  typedef typename offsets_view_type::execution_space execution_space;
257  typedef typename offsets_view_type::memory_space memory_space;
258  typedef SizeType size_type;
259 
260  typedef typename keys_view_type::non_const_value_type key_type;
261  typedef typename pairs_view_type::non_const_value_type pair_type;
262 
264 
265  // mfh 23 May 2015: Having a device_type typedef in the functor
266  // along with an execution_space typedef causes compilation issues.
267  // This is because one of Kokkos' partial specializations picks up
268  // on the device_type typedef, and another picks up on the
269  // execution_space typedef. The former is a legacy of a previous
270  // design iteration of Kokkos, which did not separate memory and
271  // execution spaces.
273 
284  FillPairs(const pairs_view_type& pairs,
285  const counts_view_type& counts,
286  const offsets_view_type& ptr,
287  const keys_view_type& keys,
288  const typename pair_type::second_type startingValue)
289  : pairs_(pairs)
290  , counts_(counts)
291  , ptr_(ptr)
292  , keys_(keys)
293  , size_(counts.extent(0))
294  , startingValue_(startingValue)
295 #if KOKKOS_VERSION >= 40799
296  , initMinKey_(::KokkosKernels::ArithTraits<key_type>::max())
297  , initMaxKey_(::KokkosKernels::ArithTraits<key_type>::is_integer ? ::KokkosKernels::ArithTraits<key_type>::min() : -::KokkosKernels::ArithTraits<key_type>::max()){}
298 #else
299  , initMinKey_(::Kokkos::ArithTraits<key_type>::max())
300  , initMaxKey_(::Kokkos::ArithTraits<key_type>::is_integer ? ::Kokkos::ArithTraits<key_type>::min() : -::Kokkos::ArithTraits<key_type>::max()) {
301  }
302 #endif
303 
322  FillPairs(const pairs_view_type& pairs,
323  const counts_view_type& counts,
324  const offsets_view_type& ptr,
325  const keys_view_type& keys,
326  const typename pair_type::second_type startingValue,
327  const key_type initMinKey,
328  const key_type initMaxKey)
329  : pairs_(pairs)
330  , counts_(counts)
331  , ptr_(ptr)
332  , keys_(keys)
333  , size_(counts.extent(0))
334  , startingValue_(startingValue)
335  , initMinKey_(initMinKey)
336  , initMaxKey_(initMaxKey) {
337  }
338 
340  KOKKOS_INLINE_FUNCTION void init(value_type& dst) const {
341  dst.minKey_ = initMinKey_;
342  dst.maxKey_ = initMaxKey_;
343  dst.success_ = true;
344  }
345 
346  KOKKOS_INLINE_FUNCTION void
347  join(value_type& dst,
348  const value_type& src) const {
349  if (src.maxKey_ > dst.maxKey_) {
350  dst.maxKey_ = src.maxKey_;
351  }
352  if (src.minKey_ < dst.minKey_) {
353  dst.minKey_ = src.minKey_;
354  }
355  dst.success_ = dst.success_ && src.success_;
356  }
357 
362  KOKKOS_INLINE_FUNCTION void
363  operator()(const size_type& i, value_type& dst) const {
364  typedef typename hash_type::result_type hash_value_type;
365  typedef typename offsets_view_type::non_const_value_type offset_type;
366  typedef typename pair_type::second_type val_type;
367  typedef typename std::remove_reference<decltype(counts_[0])>::type atomic_incr_type;
368 
369  const key_type key = keys_[i];
370  if (key > dst.maxKey_) {
371  dst.maxKey_ = key;
372  }
373  if (key < dst.minKey_) {
374  dst.minKey_ = key;
375  }
376  const val_type theVal = startingValue_ + static_cast<val_type>(i);
377  const hash_value_type hashVal = hash_type::hashFunc(key, size_);
378 
379  // Return the old count; decrement afterwards.
380  const offset_type count = Kokkos::atomic_fetch_add(&counts_[hashVal], atomic_incr_type(-1));
381  if (count == 0) {
382  dst.success_ = false; // FAILURE!
383  } else {
384  const offset_type curPos = ptr_[hashVal + 1] - count;
385 
386  pairs_[curPos].first = key;
387  pairs_[curPos].second = theVal;
388  }
389  }
390 
391  private:
392  pairs_view_type pairs_;
393  counts_view_type counts_;
394  offsets_view_type ptr_;
395  keys_view_type keys_;
396  size_type size_;
397  typename pair_type::second_type startingValue_;
399  key_type initMinKey_;
401  key_type initMaxKey_;
402 };
403 
427 template <class OffsetsViewType,
428  class PairsViewType,
429  class SizeType = typename OffsetsViewType::size_type>
431  public:
432  typedef typename OffsetsViewType::const_type offsets_view_type;
433  typedef typename PairsViewType::const_type pairs_view_type;
434  typedef typename offsets_view_type::execution_space execution_space;
435  typedef typename offsets_view_type::memory_space memory_space;
436  typedef SizeType size_type;
437 
438  // The result of the check is whether the table has one or more duplicates.
439  typedef int value_type;
440 
445  CheckForDuplicateKeys(const pairs_view_type& pairs,
446  const offsets_view_type& ptr)
447  : pairs_(pairs)
448  , ptr_(ptr)
449  , size_(ptr_.extent(0) == 0 ? size_type(0) : ptr_.extent(0) - 1) {}
450 
452  KOKKOS_INLINE_FUNCTION void init(value_type& dst) const {
453  dst = 0;
454  }
455 
457  KOKKOS_INLINE_FUNCTION void
458  join(value_type& dst,
459  const value_type& src) const {
460  dst = dst + src > 0 ? 1 : 0;
461  }
462 
464  KOKKOS_INLINE_FUNCTION void
465  operator()(const size_type& i, value_type& dst) const {
466  typedef typename offsets_view_type::non_const_value_type offset_type;
467  typedef typename pairs_view_type::non_const_value_type pair_type;
468  typedef typename pair_type::first_type key_type;
469 
470  if (dst > 0) {
471  return; // we've already found duplicate keys elsewhere
472  } else {
473  const offset_type beg = ptr_[i];
474  const offset_type end = ptr_[i + 1];
475  bool foundDuplicateKey = false;
476  // This is an ~ n^2 algorithm in the worst case, where n is the
477  // max number of keys that hash to the same bucket. However, if
478  // the hash function is reasonable, n should be much less than
479  // the total number of keys.
480  for (offset_type j = beg + 1; j < end; ++j) {
481  const key_type curKey = pairs_[j].first;
482  for (offset_type k = beg; k < j; ++k) {
483  if (pairs_[k].first == curKey) {
484  foundDuplicateKey = true;
485  break;
486  }
487  }
488  }
489  dst = (dst > 0) || foundDuplicateKey ? 1 : 0;
490  }
491  }
492 
493  private:
494  pairs_view_type pairs_;
495  offsets_view_type ptr_;
496  size_type size_;
497 };
498 
499 } // namespace FHT
500 
501 //
502 // Here begins the actual implementation of FixedHashTable.
503 //
504 
505 template <class KeyType, class ValueType, class DeviceType>
508  : minVal_(0)
509  , maxVal_(keys.size() == 0 ? static_cast<ValueType>(0) : static_cast<ValueType>(keys.size() - 1))
510  , checkedForDuplicateKeys_(false) {
511  const ValueType startingValue = static_cast<ValueType>(0);
512  const KeyType initMinKey = this->minKey_;
513  const KeyType initMaxKey = this->maxKey_;
514  this->init(keys, startingValue, initMinKey, initMaxKey,
515  initMinKey, initMinKey, false);
516 }
517 
518 template <class KeyType, class ValueType, class DeviceType>
520  FixedHashTable(const Teuchos::ArrayView<const KeyType>& keys)
521  : minVal_(0)
522  , maxVal_(keys.size() == 0 ? static_cast<ValueType>(0) : static_cast<ValueType>(keys.size() - 1))
523  , checkedForDuplicateKeys_(false) {
524  typedef typename keys_type::non_const_type nonconst_keys_type;
525 
526  // mfh 01 May 2015: I don't trust that
527  // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
528  // so I ensure this manually.
529  const ValueType startingValue = static_cast<ValueType>(0);
530  host_input_keys_type keys_k(keys.size() == 0 ? NULL : keys.getRawPtr(),
531  keys.size());
532  using Kokkos::ViewAllocateWithoutInitializing;
533  nonconst_keys_type keys_d(ViewAllocateWithoutInitializing("FixedHashTable::keys"),
534  keys_k.extent(0));
535  // DEEP_COPY REVIEW - NOT TESTED
536  Kokkos::deep_copy(keys_d, keys_k);
537  const KeyType initMinKey = this->minKey_;
538  const KeyType initMaxKey = this->maxKey_;
539  this->init(keys_d, startingValue, initMinKey, initMaxKey,
540  initMinKey, initMinKey, false);
541 }
542 
543 template <class KeyType, class ValueType, class DeviceType>
545  FixedHashTable(const Teuchos::ArrayView<const KeyType>& keys,
546  const ValueType startingValue)
547  : minVal_(startingValue)
548  , maxVal_(keys.size() == 0 ? startingValue : static_cast<ValueType>(startingValue + keys.size() - 1))
549  , checkedForDuplicateKeys_(false) {
550  typedef typename keys_type::non_const_type nonconst_keys_type;
551 
552  // mfh 01 May 2015: I don't trust that
553  // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
554  // so I ensure this manually.
555  host_input_keys_type keys_k(keys.size() == 0 ? NULL : keys.getRawPtr(),
556  keys.size());
557  using Kokkos::ViewAllocateWithoutInitializing;
558  nonconst_keys_type keys_d(ViewAllocateWithoutInitializing("FixedHashTable::keys"),
559  keys_k.extent(0));
560  // DEEP_COPY REVIEW - HOST-TO_DEVICE
561  Kokkos::deep_copy(execution_space(), keys_d, keys_k);
562 
563 #if KOKKOS_VERSION >= 40799
564  const KeyType initMinKey = ::KokkosKernels::ArithTraits<KeyType>::max();
565 #else
566  const KeyType initMinKey = ::Kokkos::ArithTraits<KeyType>::max();
567 #endif
568  // min() for a floating-point type returns the minimum _positive_
569  // normalized value. This is different than for integer types.
570  // lowest() is new in C++11 and returns the least value, always
571  // negative for signed finite types.
572  //
573  // mfh 23 May 2015: I have heard reports that
574  // std::numeric_limits<int>::lowest() does not exist with the Intel
575  // compiler. I'm not sure if the users in question actually enabled
576  // C++11. However, it's easy enough to work around this issue. The
577  // standard floating-point types are signed and have a sign bit, so
578  // lowest() is just -max(). For integer types, we can use min()
579  // instead.
580 #if KOKKOS_VERSION >= 40799
581  const KeyType initMaxKey = ::KokkosKernels::ArithTraits<KeyType>::is_integer ? ::KokkosKernels::ArithTraits<KeyType>::min() : -::KokkosKernels::ArithTraits<KeyType>::max();
582 #else
583  const KeyType initMaxKey = ::Kokkos::ArithTraits<KeyType>::is_integer ? ::Kokkos::ArithTraits<KeyType>::min() : -::Kokkos::ArithTraits<KeyType>::max();
584 #endif
585  this->init(keys_d, startingValue, initMinKey, initMaxKey,
586  initMinKey, initMinKey, false);
587 }
588 
589 template <class KeyType, class ValueType, class DeviceType>
592  const KeyType firstContigKey,
593  const KeyType lastContigKey,
594  const ValueType startingValue)
595  : minVal_(startingValue)
596  , maxVal_(keys.size() == 0 ? startingValue : static_cast<ValueType>(startingValue + keys.size() - 1))
597  , firstContigKey_(firstContigKey)
598  , lastContigKey_(lastContigKey)
599  , checkedForDuplicateKeys_(false) {
600 #if KOKKOS_VERSION >= 40799
601  const KeyType initMinKey = ::KokkosKernels::ArithTraits<KeyType>::max();
602 #else
603  const KeyType initMinKey = ::Kokkos::ArithTraits<KeyType>::max();
604 #endif
605  // min() for a floating-point type returns the minimum _positive_
606  // normalized value. This is different than for integer types.
607  // lowest() is new in C++11 and returns the least value, always
608  // negative for signed finite types.
609  //
610  // mfh 23 May 2015: I have heard reports that
611  // std::numeric_limits<int>::lowest() does not exist with the Intel
612  // compiler. I'm not sure if the users in question actually enabled
613  // C++11. However, it's easy enough to work around this issue. The
614  // standard floating-point types are signed and have a sign bit, so
615  // lowest() is just -max(). For integer types, we can use min()
616  // instead.
617 #if KOKKOS_VERSION >= 40799
618  const KeyType initMaxKey = ::KokkosKernels::ArithTraits<KeyType>::is_integer ? ::KokkosKernels::ArithTraits<KeyType>::min() : -::KokkosKernels::ArithTraits<KeyType>::max();
619 #else
620  const KeyType initMaxKey = ::Kokkos::ArithTraits<KeyType>::is_integer ? ::Kokkos::ArithTraits<KeyType>::min() : -::Kokkos::ArithTraits<KeyType>::max();
621 #endif
622  this->init(keys, startingValue, initMinKey, initMaxKey,
623  firstContigKey, lastContigKey, true);
624 }
625 
626 template <class KeyType, class ValueType, class DeviceType>
628  FixedHashTable(const Teuchos::ArrayView<const KeyType>& keys,
629  const KeyType firstContigKey,
630  const KeyType lastContigKey,
631  const ValueType startingValue)
632  : minVal_(startingValue)
633  , maxVal_(keys.size() == 0 ? startingValue : static_cast<ValueType>(startingValue + keys.size() - 1))
634  , firstContigKey_(firstContigKey)
635  , lastContigKey_(lastContigKey)
636  , checkedForDuplicateKeys_(false) {
637  typedef typename keys_type::non_const_type nonconst_keys_type;
638 
639  // mfh 01 May 2015: I don't trust that
640  // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
641  // so I ensure this manually.
642  host_input_keys_type keys_k(keys.size() == 0 ? NULL : keys.getRawPtr(),
643  keys.size());
644  using Kokkos::ViewAllocateWithoutInitializing;
645  nonconst_keys_type keys_d(ViewAllocateWithoutInitializing("FixedHashTable::keys"),
646  keys_k.extent(0));
647  // DEEP_COPY REVIEW - NOT TESTED
648  Kokkos::deep_copy(keys_d, keys_k);
649 
650 #if KOKKOS_VERSION >= 40799
651  const KeyType initMinKey = ::KokkosKernels::ArithTraits<KeyType>::max();
652 #else
653  const KeyType initMinKey = ::Kokkos::ArithTraits<KeyType>::max();
654 #endif
655  // min() for a floating-point type returns the minimum _positive_
656  // normalized value. This is different than for integer types.
657  // lowest() is new in C++11 and returns the least value, always
658  // negative for signed finite types.
659  //
660  // mfh 23 May 2015: I have heard reports that
661  // std::numeric_limits<int>::lowest() does not exist with the Intel
662  // compiler. I'm not sure if the users in question actually enabled
663  // C++11. However, it's easy enough to work around this issue. The
664  // standard floating-point types are signed and have a sign bit, so
665  // lowest() is just -max(). For integer types, we can use min()
666  // instead.
667 #if KOKKOS_VERSION >= 40799
668  const KeyType initMaxKey = ::KokkosKernels::ArithTraits<KeyType>::is_integer ? ::KokkosKernels::ArithTraits<KeyType>::min() : -::KokkosKernels::ArithTraits<KeyType>::max();
669 #else
670  const KeyType initMaxKey = ::Kokkos::ArithTraits<KeyType>::is_integer ? ::Kokkos::ArithTraits<KeyType>::min() : -::Kokkos::ArithTraits<KeyType>::max();
671 #endif
672  this->init(keys_d, startingValue, initMinKey, initMaxKey,
673  firstContigKey, lastContigKey, true);
674 }
675 
676 template <class KeyType, class ValueType, class DeviceType>
679  const ValueType startingValue)
680  : minVal_(startingValue)
681  , maxVal_(keys.size() == 0 ? startingValue : static_cast<ValueType>(startingValue + keys.size() - 1))
682  , checkedForDuplicateKeys_(false) {
683 #if KOKKOS_VERSION >= 40799
684  const KeyType initMinKey = ::KokkosKernels::ArithTraits<KeyType>::max();
685 #else
686  const KeyType initMinKey = ::Kokkos::ArithTraits<KeyType>::max();
687 #endif
688  // min() for a floating-point type returns the minimum _positive_
689  // normalized value. This is different than for integer types.
690  // lowest() is new in C++11 and returns the least value, always
691  // negative for signed finite types.
692  //
693  // mfh 23 May 2015: I have heard reports that
694  // std::numeric_limits<int>::lowest() does not exist with the Intel
695  // compiler. I'm not sure if the users in question actually enabled
696  // C++11. However, it's easy enough to work around this issue. The
697  // standard floating-point types are signed and have a sign bit, so
698  // lowest() is just -max(). For integer types, we can use min()
699  // instead.
700 #if KOKKOS_VERSION >= 40799
701  const KeyType initMaxKey = ::KokkosKernels::ArithTraits<KeyType>::is_integer ? ::KokkosKernels::ArithTraits<KeyType>::min() : -::KokkosKernels::ArithTraits<KeyType>::max();
702 #else
703  const KeyType initMaxKey = ::Kokkos::ArithTraits<KeyType>::is_integer ? ::Kokkos::ArithTraits<KeyType>::min() : -::Kokkos::ArithTraits<KeyType>::max();
704 #endif
705  this->init(keys, startingValue, initMinKey, initMaxKey,
706  initMinKey, initMinKey, false);
707 }
708 
709 template <class KeyType, class ValueType, class DeviceType>
711  FixedHashTable(const Teuchos::ArrayView<const KeyType>& keys,
712  const Teuchos::ArrayView<const ValueType>& vals)
713  : contiguousValues_(false)
714  , checkedForDuplicateKeys_(false) {
715  // mfh 01 May 2015: I don't trust that
716  // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
717  // so I ensure this manually.
718  host_input_keys_type keys_k(keys.size() == 0 ? NULL : keys.getRawPtr(),
719  keys.size());
720  host_input_vals_type vals_k(vals.size() == 0 ? NULL : vals.getRawPtr(),
721  vals.size());
722 #if KOKKOS_VERSION >= 40799
723  const KeyType initMinKey = ::KokkosKernels::ArithTraits<KeyType>::max();
724 #else
725  const KeyType initMinKey = ::Kokkos::ArithTraits<KeyType>::max();
726 #endif
727  // min() for a floating-point type returns the minimum _positive_
728  // normalized value. This is different than for integer types.
729  // lowest() is new in C++11 and returns the least value, always
730  // negative for signed finite types.
731  //
732  // mfh 23 May 2015: I have heard reports that
733  // std::numeric_limits<int>::lowest() does not exist with the Intel
734  // compiler. I'm not sure if the users in question actually enabled
735  // C++11. However, it's easy enough to work around this issue. The
736  // standard floating-point types are signed and have a sign bit, so
737  // lowest() is just -max(). For integer types, we can use min()
738  // instead.
739 #if KOKKOS_VERSION >= 40799
740  const KeyType initMaxKey = ::KokkosKernels::ArithTraits<KeyType>::is_integer ? ::KokkosKernels::ArithTraits<KeyType>::min() : -::KokkosKernels::ArithTraits<KeyType>::max();
741 #else
742  const KeyType initMaxKey = ::Kokkos::ArithTraits<KeyType>::is_integer ? ::Kokkos::ArithTraits<KeyType>::min() : -::Kokkos::ArithTraits<KeyType>::max();
743 #endif
744  this->init(keys_k, vals_k, initMinKey, initMaxKey);
745 }
746 
747 template <class KeyType, class ValueType, class DeviceType>
749  init(const keys_type& keys,
750  ValueType startingValue,
751  KeyType initMinKey,
752  KeyType initMaxKey,
753  KeyType firstContigKey,
754  KeyType lastContigKey,
755  const bool computeInitContigKeys) {
756  using Kokkos::subview;
757  using Kokkos::ViewAllocateWithoutInitializing;
758  using Teuchos::TypeNameTraits;
759  typedef typename std::decay<decltype(keys.extent(0))>::type size_type;
760  Tpetra::Details::ProfilingRegion pr("Tpetra::Details::FixedHashTable::init(7-arg)");
761  const char prefix[] = "Tpetra::Details::FixedHashTable: ";
762 
763  const offset_type numKeys = static_cast<offset_type>(keys.extent(0));
764  {
765 #if KOKKOS_VERSION >= 40799
766  const offset_type theMaxVal = ::KokkosKernels::ArithTraits<offset_type>::max();
767 #else
768  const offset_type theMaxVal = ::Kokkos::ArithTraits<offset_type>::max();
769 #endif
770  const size_type maxValST = static_cast<size_type>(theMaxVal);
771  TEUCHOS_TEST_FOR_EXCEPTION(keys.extent(0) > maxValST, std::invalid_argument, prefix << "The "
772  "number of keys "
773  << keys.extent(0) << " does not fit in "
774  "offset_type = "
775  << TypeNameTraits<offset_type>::name() << ", whose "
776  "max value is "
777  << theMaxVal << ". This means that it is not possible to "
778  "use this constructor.");
779  }
780  TEUCHOS_TEST_FOR_EXCEPTION(static_cast<unsigned long long>(numKeys) >
781 #if KOKKOS_VERSION >= 40799
782  static_cast<unsigned long long>(::KokkosKernels::ArithTraits<ValueType>::max()),
783 #else
784  static_cast<unsigned long long>(::Kokkos::ArithTraits<ValueType>::max()),
785 #endif
786  std::invalid_argument,
787  "Tpetra::Details::FixedHashTable: The number of "
788  "keys "
789  << numKeys << " is greater than the maximum representable "
790  "ValueType value "
791 #if KOKKOS_VERSION >= 40799
792  << ::KokkosKernels::ArithTraits<ValueType>::max() << ". "
793 #else
794  << ::Kokkos::ArithTraits<ValueType>::max() << ". "
795 #endif
796  "This means that it is not possible to use this constructor.");
797  TEUCHOS_TEST_FOR_EXCEPTION(numKeys > static_cast<offset_type>(INT_MAX), std::logic_error, prefix << "This class currently only works when the number of keys is <= INT_MAX = " << INT_MAX << ". If this is a problem for you, please talk to the Tpetra "
798  "developers.");
799 
800  const bool buildInParallel =
801  FHT::worthBuildingFixedHashTableInParallel<execution_space>();
802  const bool debug = ::Tpetra::Details::Behavior::debug();
803 
804  // NOTE (mfh 14 May 2015) This method currently assumes UVM. We
805  // could change that by setting up ptr and val as Kokkos::DualView
806  // instances. If we do that, since we are filling on host for now,
807  // we want to make sure that we only zero-fill ptr on host
808  // initially, and that we don't fill val at all. Once we finish
809  // Kokkos-izing all the set-up kernels, we won't need DualView for
810  // either ptr or val.
811 
812  if (computeInitContigKeys) {
813  // Find the first and last initial contiguous keys. If we find a
814  // long sequence of initial contiguous keys, we can save space by
815  // not storing them explicitly as pairs in the hash table.
816  //
817  // NOTE (mfh 01 Jun 2015) Doing this in parallel requires a scan
818  // ("min index such that the difference between the current key and
819  // the next != 1"), which takes multiple passes over the data. We
820  // could fuse it with CountBuckets (only update counts on 'final'
821  // pass). However, we're really just moving this sequential search
822  // out of Map's constructor here, so there is no loss in doing it
823  // sequentially for now. Later, we can work on parallelization.
824  if (numKeys > 0) {
825  // FIXME: make it a parallel kernel with no host copy
826  auto keys_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
827  keys);
828  firstContigKey_ = keys_h[0];
829  // Start with one plus, then decrement at the end. That lets us do
830  // only one addition per loop iteration, rather than two (if we test
831  // against lastContigKey + 1 and then increment lastContigKey).
832  lastContigKey_ = firstContigKey_ + 1;
833 
834  // We will only store keys in the table that are not part of the
835  // initial contiguous sequence. It's possible for the initial
836  // contiguous sequence to be trivial, which for a nonzero number of
837  // keys means that the "sequence" has length 1.
838  for (offset_type k = 1; k < numKeys; ++k) {
839  if (lastContigKey_ != keys_h[k]) {
840  break;
841  }
842  ++lastContigKey_;
843  }
844  --lastContigKey_;
845  }
846  } else {
847  firstContigKey_ = firstContigKey;
848  lastContigKey_ = lastContigKey;
849  }
850 
851  offset_type startIndex;
852  if (numKeys > 0) {
853  initMinKey = std::min(initMinKey, firstContigKey_);
854  initMaxKey = std::max(initMaxKey, lastContigKey_);
855  startIndex = static_cast<offset_type>(lastContigKey_ - firstContigKey_);
856  } else {
857  startIndex = 0;
858  }
859 
860  const offset_type theNumKeys = numKeys - startIndex;
861  const offset_type size = hash_type::getRecommendedSize(theNumKeys);
862 #ifdef HAVE_TPETRA_DEBUG
863  TEUCHOS_TEST_FOR_EXCEPTION(
864  size == 0 && numKeys != 0, std::logic_error,
865  "Tpetra::Details::FixedHashTable constructor: "
866  "getRecommendedSize("
867  << numKeys << ") returned zero, "
868  "even though the number of keys "
869  << numKeys << " is nonzero. "
870  "Please report this bug to the Tpetra developers.");
871 #endif // HAVE_TPETRA_DEBUG
872  keys_type theKeys =
873  subview(keys, std::pair<offset_type, offset_type>(startIndex, numKeys));
874 
875  // FIXME (mfh 28 Mar 2016) For some reason, we don't seem to need a
876  // fence here, but we do before other allocations.
877 
878  // The array of counts must be separate from the array of offsets,
879  // in order for parallel_scan to work correctly.
880  typedef typename ptr_type::non_const_type counts_type;
881  counts_type counts("Tpetra::FixedHashTable::counts", size);
882 
883  //
884  // Count the number of "buckets" per offsets array (ptr) entry.
885  //
886 
887  // Will only create the mirror for buildInParallel false - but then use it in two places
888  typename keys_type::host_mirror_type theKeysHost;
889 
890  // The Kokkos kernel uses atomic update instructions to count the
891  // number of "buckets" per offsets array (ptr) entry. Atomic
892  // updates incur overhead, even in the sequential case. The Kokkos
893  // kernel is still correct in that case, but I would rather not
894  // incur overhead then.
895  if (buildInParallel) {
896  FHT::CountBuckets<counts_type, keys_type> functor(counts, theKeys, size);
897  using range_type = Kokkos::RangePolicy<execution_space, offset_type>;
898  const char kernelLabel[] = "Tpetra::Details::FixedHashTable CountBuckets";
899  if (debug) {
900  using key_type = typename keys_type::non_const_value_type;
901  Kokkos::pair<int, key_type> err;
902  Kokkos::parallel_reduce(kernelLabel, range_type(0, theNumKeys),
903  functor, err);
904  TEUCHOS_TEST_FOR_EXCEPTION(err.first != 0, std::logic_error,
905  "Tpetra::Details::FixedHashTable "
906  "constructor: CountBuckets found a key "
907  << err.second << " that "
908  "results in an out-of-bounds hash value.");
909  } else {
910  Kokkos::parallel_for(kernelLabel, range_type(0, theNumKeys), functor);
911  }
912  } else {
913  Kokkos::HostSpace hostMemSpace;
914  theKeysHost = Kokkos::create_mirror_view(theKeys);
915  // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
916  Kokkos::deep_copy(execution_space(), theKeysHost, theKeys);
917  auto countsHost = Kokkos::create_mirror_view(hostMemSpace, counts);
918 
919  for (offset_type k = 0; k < theNumKeys; ++k) {
920  using key_type = typename keys_type::non_const_value_type;
921  const key_type key = theKeysHost[k];
922 
923  using hash_value_type = typename hash_type::result_type;
924  const hash_value_type hashVal = hash_type::hashFunc(key, size);
925  TEUCHOS_TEST_FOR_EXCEPTION(hashVal < hash_value_type(0) ||
926  hashVal >= hash_value_type(countsHost.extent(0)),
927  std::logic_error,
928  "Tpetra::Details::FixedHashTable "
929  "constructor: Sequential CountBuckets found a key "
930  << key
931  << " that results in an out-of-bounds hash value.");
932 
933  ++countsHost[hashVal];
934  }
935  // DEEP_COPY REVIEW - HOSTMIRROR-TO-DEVICE
936  Kokkos::deep_copy(execution_space(), counts, countsHost);
937  }
938 
939  // KJ: This fence is not required for the 2-argument deep_copy which calls
940  // fence, but will be required if switched to the 3-argumemt deep_copy which
941  // passes a space. The 3-argument form does not fence.
942  execution_space().fence();
943 
944  // Kokkos::View fills with zeros by default.
945  typename ptr_type::non_const_type ptr("Tpetra::FixedHashTable::ptr", size + 1);
946 
947  // Compute row offsets via prefix sum:
948  //
949  // ptr[i+1] = \sum_{j=0}^{i} counts[j].
950  //
951  // Thus, ptr[i+1] - ptr[i] = counts[i], so that ptr[i+1] = ptr[i] +
952  // counts[i]. If we stored counts[i] in ptr[i+1] on input, then the
953  // formula is ptr[i+1] += ptr[i].
954  //
955  // parallel_scan does not incur overhead with Kokkos::Serial, but
956  // with actual parallel execution spaces, it does require multiple
957  // passes over the data. Thus, it still makes sense to have a
958  // sequential fall-back.
959 
961  if (buildInParallel) {
962  computeOffsetsFromCounts(ptr, counts);
963  }
964 
965  if (!buildInParallel || debug) {
966  Kokkos::HostSpace hostMemSpace;
967  auto counts_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, counts);
968  auto ptr_h = Kokkos::create_mirror_view(hostMemSpace, ptr);
969 
970 #ifdef KOKKOS_ENABLE_SERIAL
971  Kokkos::Serial hostExecSpace;
972 #else
973  Kokkos::DefaultHostExecutionSpace hostExecSpace;
974 #endif // KOKKOS_ENABLE_SERIAL
975 
976  computeOffsetsFromCounts(hostExecSpace, ptr_h, counts_h);
977  // DEEP_COPY REVIEW - HOSTMIRROR-TO-DEVICE
978  Kokkos::deep_copy(execution_space(), ptr, ptr_h);
979 
980  if (debug) {
981  bool bad = false;
982  for (offset_type i = 0; i < size; ++i) {
983  if (ptr_h[i + 1] != ptr_h[i] + counts_h[i]) {
984  bad = true;
985  }
986  }
987  TEUCHOS_TEST_FOR_EXCEPTION(bad, std::logic_error,
988  "Tpetra::Details::FixedHashTable "
989  "constructor: computeOffsetsFromCounts gave an incorrect "
990  "result.");
991  }
992  }
993 
994  // KJ: computeOffsetsFromCounts calls parallel_scan which does not fence.
995  // This fence is necessary as we need to make sure that the offset view
996  // completes before the view is used in the next functor.
997  execution_space().fence();
998 
999  // Allocate the array of (key,value) pairs. Don't fill it with
1000  // zeros, because we will fill it with actual data below.
1001  typedef typename val_type::non_const_type nonconst_val_type;
1002  nonconst_val_type val(ViewAllocateWithoutInitializing("Tpetra::FixedHashTable::pairs"),
1003  theNumKeys);
1004 
1005  // Fill in the hash table's "values" (the (key,value) pairs).
1006  typedef FHT::FillPairs<typename val_type::non_const_type, keys_type,
1007  typename ptr_type::non_const_type>
1008  functor_type;
1009  typename functor_type::value_type result(initMinKey, initMaxKey);
1010 
1011  const ValueType newStartingValue = startingValue + static_cast<ValueType>(startIndex);
1012  if (buildInParallel) {
1013  functor_type functor(val, counts, ptr, theKeys, newStartingValue,
1014  initMinKey, initMaxKey);
1015  typedef Kokkos::RangePolicy<execution_space, offset_type> range_type;
1016  Kokkos::parallel_reduce("Tpetra::Details::FixedHashTable::FillPairs", range_type(0, theNumKeys), functor, result);
1017  } else {
1018  Kokkos::HostSpace hostMemSpace;
1019  auto counts_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, counts);
1020  auto ptr_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, ptr);
1021  auto val_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, val);
1022  for (offset_type k = 0; k < theNumKeys; ++k) {
1023  typedef typename hash_type::result_type hash_value_type;
1024  const KeyType key = theKeysHost[k];
1025  if (key > result.maxKey_) {
1026  result.maxKey_ = key;
1027  }
1028  if (key < result.minKey_) {
1029  result.minKey_ = key;
1030  }
1031  const ValueType theVal = newStartingValue + static_cast<ValueType>(k);
1032  const hash_value_type hashVal = hash_type::hashFunc(key, size);
1033 
1034  // Return the old count; decrement afterwards.
1035  const offset_type count = counts_h[hashVal];
1036  --counts_h[hashVal];
1037  if (count == 0) {
1038  result.success_ = false; // FAILURE!
1039  break;
1040  } else {
1041  const offset_type curPos = ptr_h[hashVal + 1] - count;
1042  val_h[curPos].first = key;
1043  val_h[curPos].second = theVal;
1044  }
1045  }
1046  Kokkos::deep_copy(counts, counts_h); // restore
1047  Kokkos::deep_copy(val, val_h); // restore
1048  }
1049 
1050  // FIXME (mfh 01 Jun 2015) Temporarily commented out because of
1051  // reports of exceptions being thrown in Albany.
1052  //
1053  // TEUCHOS_TEST_FOR_EXCEPTION
1054  // (! result.success_, std::logic_error, "Tpetra::Details::FixedHashTable::"
1055  // "init: Filling the hash table failed! Please report this bug to the "
1056  // "Tpetra developers.");
1057  (void)result; // prevent build warning (set but unused variable)
1058 
1059  // "Commit" the computed arrays and other computed quantities.
1060  ptr_ = ptr;
1061  val_ = val;
1062  minKey_ = result.minKey_;
1063  maxKey_ = result.maxKey_;
1064  // We've already set firstContigKey_ and lastContigKey_ above.
1065 }
1066 
1067 template <class KeyType, class ValueType, class DeviceType>
1068 void FixedHashTable<KeyType, ValueType, DeviceType>::
1069  init(const host_input_keys_type& keys,
1070  const host_input_vals_type& vals,
1071  KeyType initMinKey,
1072  KeyType initMaxKey) {
1073  Tpetra::Details::ProfilingRegion pr("Tpetra::Details::FixedHashTable::init(4-arg)");
1074  const offset_type numKeys = static_cast<offset_type>(keys.extent(0));
1075 #if KOKKOS_VERSION >= 40799
1076  TEUCHOS_TEST_FOR_EXCEPTION(static_cast<unsigned long long>(numKeys) > static_cast<unsigned long long>(::KokkosKernels::ArithTraits<ValueType>::max()),
1077 #else
1078  TEUCHOS_TEST_FOR_EXCEPTION(static_cast<unsigned long long>(numKeys) > static_cast<unsigned long long>(::Kokkos::ArithTraits<ValueType>::max()),
1079 #endif
1080  std::invalid_argument,
1081  "Tpetra::Details::FixedHashTable: The number of "
1082  "keys "
1083  << numKeys << " is greater than the maximum representable "
1084  "ValueType value "
1085 #if KOKKOS_VERSION >= 40799
1086  << ::KokkosKernels::ArithTraits<ValueType>::max() << ".");
1087 #else
1088  << ::Kokkos::ArithTraits<ValueType>::max() << ".");
1089 #endif
1090  TEUCHOS_TEST_FOR_EXCEPTION(numKeys > static_cast<offset_type>(INT_MAX), std::logic_error,
1091  "Tpetra::"
1092  "Details::FixedHashTable: This class currently only works when the number "
1093  "of keys is <= INT_MAX = "
1094  << INT_MAX << ". If this is a problem for you"
1095  ", please talk to the Tpetra developers.");
1096 
1097  // There's no need to find the first and last initial contiguous
1098  // keys in this case, because if we reach this init() function, then
1099  // hasContiguousValues() is false and so get() doesn't use the
1100  // initial contiguous sequence of keys.
1101 
1102  const offset_type size = hash_type::getRecommendedSize(numKeys);
1103 #ifdef HAVE_TPETRA_DEBUG
1104  TEUCHOS_TEST_FOR_EXCEPTION(
1105  size == 0 && numKeys != 0, std::logic_error,
1106  "Tpetra::Details::FixedHashTable constructor: "
1107  "getRecommendedSize("
1108  << numKeys << ") returned zero, "
1109  "even though the number of keys "
1110  << numKeys << " is nonzero. "
1111  "Please report this bug to the Tpetra developers.");
1112 #endif // HAVE_TPETRA_DEBUG
1113 
1114  // FIXME: Investigate a couple options:
1115  // 1. Allocate ptr_h, val_h directly on host and only deep_copy to ptr_ and val_ once at the end
1116  // 2. Do all this work as a parallel kernel with the same execution/memory spaces as ptr_ and val_
1117  // An old comment from MFH noted ptr_h should be zero-initialized, while val_h should not be initialized.
1118  // It further noted that we shouldn't need a DualView type arrangement when all setup kernels have
1119  // been "Kokkos-ized".
1120  Kokkos::HostSpace hostMemSpace;
1121  typename ptr_type::non_const_type ptr("Tpetra::FixedHashTable::ptr", size + 1);
1122  auto ptr_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, ptr);
1123 
1124  // Allocate the array of key,value pairs. Don't waste time filling
1125  // it with zeros, because we will fill it with actual data below.
1126  using Kokkos::ViewAllocateWithoutInitializing;
1127  typedef typename val_type::non_const_type nonconst_val_type;
1128  nonconst_val_type val(ViewAllocateWithoutInitializing("Tpetra::FixedHashTable::pairs"),
1129  numKeys);
1130  auto val_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, val);
1131 
1132  // Compute number of entries in each hash table position.
1133  for (offset_type k = 0; k < numKeys; ++k) {
1134  const typename hash_type::result_type hashVal =
1135  hash_type::hashFunc(keys[k], size);
1136  // Shift over one, so that counts[j] = ptr[j+1]. See below.
1137  ++ptr_h[hashVal + 1];
1138  }
1139 
1140  // Compute row offsets via prefix sum:
1141  //
1142  // ptr[i+1] = \sum_{j=0}^{i} counts[j].
1143  //
1144  // Thus, ptr[i+1] - ptr[i] = counts[i], so that ptr[i+1] = ptr[i] +
1145  // counts[i]. If we stored counts[i] in ptr[i+1] on input, then the
1146  // formula is ptr[i+1] += ptr[i].
1147  for (offset_type i = 0; i < size; ++i) {
1148  ptr_h[i + 1] += ptr_h[i];
1149  }
1150  // ptr[0] = 0; // We've already done this when initializing ptr above.
1151 
1152  // curRowStart[i] is the offset of the next element in row i.
1153  typename ptr_type::non_const_type::host_mirror_type curRowStart("Tpetra::FixedHashTable::curRowStart", size);
1154 
1155  // Fill in the hash table.
1156  FHT::FillPairsResult<KeyType> result(initMinKey, initMaxKey);
1157  for (offset_type k = 0; k < numKeys; ++k) {
1158  typedef typename hash_type::result_type hash_value_type;
1159  const KeyType key = keys[k];
1160  if (key > result.maxKey_) {
1161  result.maxKey_ = key;
1162  }
1163  if (key < result.minKey_) {
1164  result.minKey_ = key;
1165  }
1166  const ValueType theVal = vals[k];
1167  if (theVal > maxVal_) {
1168  maxVal_ = theVal;
1169  }
1170  if (theVal < minVal_) {
1171  minVal_ = theVal;
1172  }
1173  const hash_value_type hashVal = hash_type::hashFunc(key, size);
1174 
1175  const offset_type offset = curRowStart[hashVal];
1176  const offset_type curPos = ptr_h[hashVal] + offset;
1177  if (curPos >= ptr_h[hashVal + 1]) {
1178  result.success_ = false; // FAILURE!
1179  } else {
1180  val_h[curPos].first = key;
1181  val_h[curPos].second = theVal;
1182  ++curRowStart[hashVal];
1183  }
1184  }
1185 
1186  TEUCHOS_TEST_FOR_EXCEPTION(!result.success_, std::logic_error,
1187  "Tpetra::Details::FixedHashTable::"
1188  "init: Filling the hash table failed! Please report this bug to the "
1189  "Tpetra developers.");
1190 
1191  // "Commit" the computed arrays and other computed quantities.
1192  Kokkos::deep_copy(ptr, ptr_h);
1193  Kokkos::deep_copy(val, val_h);
1194 
1195  ptr_ = ptr;
1196  val_ = val;
1197  minKey_ = result.minKey_;
1198  maxKey_ = result.maxKey_;
1199  // We've already assigned to minVal_ and maxVal_ above.
1200 }
1201 
1202 template <class KeyType, class ValueType, class DeviceType>
1205  if (!checkedForDuplicateKeys_) {
1206  hasDuplicateKeys_ = checkForDuplicateKeys();
1207  checkedForDuplicateKeys_ = true;
1208  }
1209  return hasDuplicateKeys_;
1210 }
1211 
1212 template <class KeyType, class ValueType, class DeviceType>
1214  checkForDuplicateKeys() const {
1215  const offset_type size = this->getSize();
1216  // It's allowed for the hash table to have a positive number of
1217  // buckets (getSize()), but still store no entries (numPairs()).
1218  // Both cases trivially entail no duplicates.
1219  if (size == 0 || this->numPairs() == 0) {
1220  return false;
1221  } else {
1222  typedef FHT::CheckForDuplicateKeys<ptr_type, val_type> functor_type;
1223  functor_type functor(val_, ptr_);
1224  int hasDupKeys = 0;
1225  typedef Kokkos::RangePolicy<execution_space, offset_type> range_type;
1226  Kokkos::parallel_reduce("Tpetra::Details::FixedHashTable::CheckForDuplicateKeys", range_type(0, size), functor, hasDupKeys);
1227  return hasDupKeys > 0;
1228  }
1229 }
1230 
1231 template <class KeyType, class ValueType, class DeviceType>
1232 std::string
1234  description() const {
1235  std::ostringstream oss;
1236  oss << "FixedHashTable<"
1237  << Teuchos::TypeNameTraits<KeyType>::name() << ","
1238  << Teuchos::TypeNameTraits<ValueType>::name() << ">: "
1239  << "{ numKeys: " << val_.extent(0)
1240  << ", tableSize: " << this->getSize() << " }";
1241  return oss.str();
1242 }
1243 
1244 template <class KeyType, class ValueType, class DeviceType>
1246  describe(Teuchos::FancyOStream& out,
1247  const Teuchos::EVerbosityLevel verbLevel) const {
1248  using std::endl;
1249  using std::setw;
1250  using Teuchos::OSTab;
1251  using Teuchos::rcpFromRef;
1252  using Teuchos::TypeNameTraits;
1253  using Teuchos::VERB_DEFAULT;
1254  using Teuchos::VERB_EXTREME;
1255  using Teuchos::VERB_LOW;
1256  using Teuchos::VERB_NONE;
1257 
1258  // NOTE (mfh 14 May 2015) This method currently assumes UVM for
1259  // access to ptr_ and val_ from the host.
1260 
1261  Teuchos::EVerbosityLevel vl = verbLevel;
1262  if (vl == VERB_DEFAULT) vl = VERB_LOW;
1263 
1264  if (vl == VERB_NONE) {
1265  // do nothing
1266  } else if (vl == VERB_LOW) {
1267  out << this->description() << endl;
1268  } else { // MEDIUM, HIGH or EXTREME
1269  out << "FixedHashTable:" << endl;
1270  {
1271  OSTab tab1(rcpFromRef(out));
1272 
1273  // const std::string label = this->getObjectLabel ();
1274  // if (label != "") {
1275  // out << "label: " << label << endl;
1276  // }
1277  out << "Template parameters:" << endl;
1278  {
1279  OSTab tab2(rcpFromRef(out));
1280  out << "KeyType: " << TypeNameTraits<KeyType>::name() << endl
1281  << "ValueType: " << TypeNameTraits<ValueType>::name() << endl;
1282  }
1283 
1284  const offset_type tableSize = this->getSize();
1285  const offset_type numKeys = val_.extent(0);
1286 
1287  out << "Table parameters:" << endl;
1288  {
1289  OSTab tab2(rcpFromRef(out));
1290  out << "numKeys: " << numKeys << endl
1291  << "tableSize: " << tableSize << endl;
1292  }
1293 
1294  if (vl >= VERB_EXTREME) {
1295  out << "Contents: ";
1296  if (tableSize == 0 || numKeys == 0) {
1297  out << "[]" << endl;
1298  } else {
1299  out << "[ " << endl;
1300  {
1301  OSTab tab2(rcpFromRef(out));
1302  for (offset_type i = 0; i < tableSize; ++i) {
1303  OSTab tab3(rcpFromRef(out));
1304  out << "[";
1305  for (offset_type k = ptr_[i]; k < ptr_[i + 1]; ++k) {
1306  out << "(" << val_[k].first << "," << val_[k].second << ")";
1307  if (k + 1 < ptr_[i + 1]) {
1308  out << ", ";
1309  }
1310  }
1311  out << "]" << endl;
1312  } // for each table position i
1313  }
1314  out << "]" << endl;
1315  } // The table contains entries
1316  } // vl >= VERB_EXTREME
1317  }
1318  out << endl;
1319  } // if vl > VERB_LOW
1320 }
1321 
1322 } // namespace Details
1323 } // namespace Tpetra
1324 
1325 // Macro that explicitly instantiates FixedHashTable for the given
1326 // local ordinal (LO), global ordinal (GO), and Kokkos device (NODE::device_type)
1327 // types. Note that FixedHashTable's first two template parameters
1328 // occur in the opposite order of most Tpetra classes. This is
1329 // because FixedHashTable performs global-to-local lookup, and the
1330 // convention in templated C++ lookup tables (such as std::map) is
1331 // <KeyType, ValueType>.
1332 //
1333 // This macro must be explanded within the Tpetra::Details namespace.
1334 // Moreover, we need (GO,LO,Host) and (LO,LO,Host)
1335 #if defined(HAVE_TPETRA_INST_SERIAL) || defined(HAVE_TPETRA_INST_OPENMP)
1336 #if defined(HAVE_TPETRA_INST_INT_INT)
1337 #define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT(LO, GO, NODE) \
1338  template class Details::FixedHashTable<GO, LO, typename NODE::device_type>;
1339 #else
1340 #define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT(LO, GO, NODE) \
1341  template class Details::FixedHashTable<GO, LO, typename NODE::device_type>; \
1342  template class Details::FixedHashTable<LO, LO, typename NODE::device_type>;
1343 #endif
1344 #else
1345 #define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT(LO, GO, NODE) \
1346  template class Details::FixedHashTable<GO, LO, typename NODE::device_type>; \
1347  template class Details::FixedHashTable<GO, LO, Kokkos::HostSpace::device_type>; \
1348  template class Details::FixedHashTable<LO, LO, Kokkos::HostSpace::device_type>;
1349 #endif
1350 #endif
CheckForDuplicateKeys(const pairs_view_type &pairs, const offsets_view_type &ptr)
Constructor.
Parallel reduce functor for filling the FixedHashTable, and computing the min and max keys...
FillPairs(const pairs_view_type &pairs, const counts_view_type &counts, const offsets_view_type &ptr, const keys_view_type &keys, const typename pair_type::second_type startingValue, const key_type initMinKey, const key_type initMaxKey)
Constructor that takes initial min and max key values.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
KOKKOS_DEFAULTED_FUNCTION FixedHashTable()=default
Default constructor; makes an empty table.
static KOKKOS_INLINE_FUNCTION result_type hashFunc(const argument_type &, const offset_type &)
The hash function.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i) const
Do this for every entry of keys_.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print this object with the given verbosity to the output stream.
ResultType result_type
Type of the return value of the hash function.
Parallel for functor for counting &quot;buckets&quot; in the FixedHashTable.
static bool debug()
Whether Tpetra is in debug mode.
bool success_
Whether fill succeeded (it can only fail on a bug)
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
The hash function for FixedHashTable.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i, value_type &dst) const
Debug reduce version of above operator().
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
std::string description() const
Implementation of Teuchos::Describable interface.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i, value_type &dst) const
Parallel loop body; do this for every entry of keys_.
FillPairs(const pairs_view_type &pairs, const counts_view_type &counts, const offsets_view_type &ptr, const keys_view_type &keys, const typename pair_type::second_type startingValue)
Constructor.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
bool hasDuplicateKeys()
Whether the table has any duplicate keys.
CountBuckets(const counts_view_type &counts, const keys_view_type &keys, const size_type size)
Constructor.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
Functor for checking whether a FixedHashTable has one or more duplicate entries.
KOKKOS_INLINE_FUNCTION void join(value_type &dst, const value_type &src) const
Combine two intermediate reduction results.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
Reduction result for FillPairs functor below.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i, value_type &dst) const
Parallel loop body.
Kokkos::View< const KeyType *, Kokkos::LayoutLeft, device_type > keys_type
Type of a 1-D Kokkos::View (array) used to store keys.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.