Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_FixedHashTable_def.hpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
11 #define TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
12 
13 #include "Tpetra_Details_FixedHashTable_decl.hpp"
15 #ifdef TPETRA_USE_MURMUR_HASH
16 # include "Kokkos_Functional.hpp" // hash function used by Kokkos::UnorderedMap
17 #endif // TPETRA_USE_MURMUR_HASH
18 #include "Kokkos_ArithTraits.hpp"
19 #include "Teuchos_TypeNameTraits.hpp"
21 #include <type_traits>
22 
23 namespace Tpetra {
24 namespace Details {
25 //
26 // This namespace stores utility functions and Kokkos
27 // functors for use in FixedHashTable construction.
28 //
29 namespace FHT {
30 
31 
32 // Is it worth actually using building the FixedHashTable using
33 // parallel threads, instead of just counting in a sequential loop?
34 //
35 // The parallel version of FixedHashTable construction isn't just a
36 // parallelization of the sequential loops. It incurs additional
37 // overheads. For example, the CountBuckets kernel uses atomic update
38 // instructions to count the number of "buckets" per offsets array
39 // (ptr) entry. Atomic updates have overhead, even if only one thread
40 // issues them. The Kokkos kernels are still correct in that case,
41 // but I would rather not incur overhead then. It might make sense to
42 // set the minimum number of threads to something greater than 1, but
43 // we would need experiments to find out.
44 template<class ExecSpace>
45 bool worthBuildingFixedHashTableInParallel () {
46  return ExecSpace().concurrency() > 1;
47 }
48 
49 //
50 // Functors for FixedHashTable initialization
51 //
52 // NOTE (mfh 23 May 2015): Once we can use lambdas with CUDA, we
53 // should consider replacing all of these functors with in-line
54 // lambdas. The only issue is that we would need to bake the
55 // execution space into the policy, since the default execution space
56 // might differ from the one Tpetra wants to use.
57 
67 template<class CountsViewType,
68  class KeysViewType,
69  class SizeType = typename KeysViewType::size_type>
70 class CountBuckets {
71 public:
72  typedef CountsViewType counts_view_type;
73  typedef KeysViewType keys_view_type;
74  typedef typename CountsViewType::execution_space execution_space;
75  typedef typename CountsViewType::memory_space memory_space;
76  typedef SizeType size_type;
77  typedef typename keys_view_type::non_const_value_type key_type;
78  // mfh 21 May 2015: Having a device_type typedef in the functor
79  // along with an execution_space typedef causes compilation issues.
80  // This is because one of Kokkos' partial specializations picks up
81  // on the device_type typedef, and another picks up on the
82  // execution_space typedef. The former is a legacy of a previous
83  // design iteration of Kokkos, which did not separate memory and
84  // execution spaces.
86 
92  CountBuckets (const counts_view_type& counts,
93  const keys_view_type& keys,
94  const size_type size) :
95  counts_ (counts),
96  keys_ (keys),
97  size_ (size)
98  {}
99 
104  KOKKOS_INLINE_FUNCTION void
105  operator () (const size_type& i) const
106  {
107  typedef typename hash_type::result_type hash_value_type;
108 
109  const hash_value_type hashVal = hash_type::hashFunc (keys_[i], size_);
110  Kokkos::atomic_inc (&counts_[hashVal]);
111  }
112 
113  using value_type = Kokkos::pair<int, key_type>;
114 
118  KOKKOS_INLINE_FUNCTION void
119  operator () (const size_type& i, value_type& dst) const
120  {
121  using hash_value_type = typename hash_type::result_type;
122 
123  const key_type keyVal = keys_[i];
124  const hash_value_type hashVal = hash_type::hashFunc (keyVal, size_);
125  if (hashVal < hash_value_type (0) ||
126  hashVal >= hash_value_type (counts_.extent (0))) {
127  dst.first = 1;
128  dst.second = keyVal;
129  }
130  else {
131  Kokkos::atomic_inc (&counts_[hashVal]);
132  }
133  }
134 
136  KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
137  {
138  dst.first = 0;
139  dst.second = key_type (0);
140  }
141 
142  KOKKOS_INLINE_FUNCTION void
143  join (value_type& dst,
144  const value_type& src) const
145  {
146  const bool good = dst.first == 0 || src.first == 0;
147  dst.first = good ? 0 : dst.first;
148  // leave dst.second as it is, to get the "first" key
149  }
150 
151 private:
153  counts_view_type counts_;
155  keys_view_type keys_;
157  size_type size_;
158 };
159 
170 template<class KeyType>
172  KOKKOS_INLINE_FUNCTION
173  FillPairsResult () :
174  minKey_ (::Kokkos::ArithTraits<KeyType>::max ()),
175  // min() for a floating-point type returns the minimum _positive_
176  // normalized value. This is different than for integer types.
177  // lowest() is new in C++11 and returns the least value, always
178  // negative for signed finite types.
179  //
180  // mfh 23 May 2015: I have heard reports that
181  // std::numeric_limits<int>::lowest() does not exist with the
182  // Intel compiler. I'm not sure if the users in question actually
183  // enabled C++11. However, it's easy enough to work around this
184  // issue. The standard floating-point types are signed and have a
185  // sign bit, so lowest() is just -max(). For integer types, we
186  // can use min() instead.
187  maxKey_ (::Kokkos::ArithTraits<KeyType>::is_integer ?
188  ::Kokkos::ArithTraits<KeyType>::min () :
189  -::Kokkos::ArithTraits<KeyType>::max ()),
190  success_ (true)
191  {
192  static_assert (std::is_arithmetic<KeyType>::value, "FillPairsResult: "
193  "KeyType must be some kind of number type.");
194  }
195 
196  KOKKOS_INLINE_FUNCTION
197  FillPairsResult (const KeyType& initMinKey,
198  const KeyType& initMaxKey) :
199  minKey_ (initMinKey),
200  maxKey_ (initMaxKey),
201  success_ (true)
202  {
203  static_assert (std::is_arithmetic<KeyType>::value, "FillPairsResult: "
204  "KeyType must be some kind of number type.");
205  }
206 
207  KeyType minKey_;
208  KeyType maxKey_;
209  bool success_;
210 };
211 
241 template<class PairsViewType,
242  class KeysViewType,
243  class CountsViewType,
244  class SizeType = typename KeysViewType::size_type>
245 class FillPairs {
246 public:
247  typedef typename CountsViewType::non_const_type counts_view_type;
248  typedef typename counts_view_type::const_type offsets_view_type;
249 
250  typedef PairsViewType pairs_view_type;
251  typedef typename KeysViewType::const_type keys_view_type;
252  typedef typename offsets_view_type::execution_space execution_space;
253  typedef typename offsets_view_type::memory_space memory_space;
254  typedef SizeType size_type;
255 
256  typedef typename keys_view_type::non_const_value_type key_type;
257  typedef typename pairs_view_type::non_const_value_type pair_type;
258 
260 
261  // mfh 23 May 2015: Having a device_type typedef in the functor
262  // along with an execution_space typedef causes compilation issues.
263  // This is because one of Kokkos' partial specializations picks up
264  // on the device_type typedef, and another picks up on the
265  // execution_space typedef. The former is a legacy of a previous
266  // design iteration of Kokkos, which did not separate memory and
267  // execution spaces.
269 
280  FillPairs (const pairs_view_type& pairs,
281  const counts_view_type& counts,
282  const offsets_view_type& ptr,
283  const keys_view_type& keys,
284  const typename pair_type::second_type startingValue) :
285  pairs_ (pairs),
286  counts_ (counts),
287  ptr_ (ptr),
288  keys_ (keys),
289  size_ (counts.extent (0)),
290  startingValue_ (startingValue),
291  initMinKey_ (::Kokkos::ArithTraits<key_type>::max ()),
292  initMaxKey_ (::Kokkos::ArithTraits<key_type>::is_integer ?
293  ::Kokkos::ArithTraits<key_type>::min () :
294  -::Kokkos::ArithTraits<key_type>::max ())
295  {}
296 
315  FillPairs (const pairs_view_type& pairs,
316  const counts_view_type& counts,
317  const offsets_view_type& ptr,
318  const keys_view_type& keys,
319  const typename pair_type::second_type startingValue,
320  const key_type initMinKey,
321  const key_type initMaxKey) :
322  pairs_ (pairs),
323  counts_ (counts),
324  ptr_ (ptr),
325  keys_ (keys),
326  size_ (counts.extent (0)),
327  startingValue_ (startingValue),
328  initMinKey_ (initMinKey),
329  initMaxKey_ (initMaxKey)
330  {}
331 
333  KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
334  {
335  dst.minKey_ = initMinKey_;
336  dst.maxKey_ = initMaxKey_;
337  dst.success_ = true;
338  }
339 
340  KOKKOS_INLINE_FUNCTION void
341  join (value_type& dst,
342  const value_type& src) const
343  {
344  if (src.maxKey_ > dst.maxKey_) {
345  dst.maxKey_ = src.maxKey_;
346  }
347  if (src.minKey_ < dst.minKey_) {
348  dst.minKey_ = src.minKey_;
349  }
350  dst.success_ = dst.success_ && src.success_;
351  }
352 
357  KOKKOS_INLINE_FUNCTION void
358  operator () (const size_type& i, value_type& dst) const
359  {
360  typedef typename hash_type::result_type hash_value_type;
361  typedef typename offsets_view_type::non_const_value_type offset_type;
362  typedef typename pair_type::second_type val_type;
363  typedef typename std::remove_reference< decltype( counts_[0] ) >::type atomic_incr_type;
364 
365  const key_type key = keys_[i];
366  if (key > dst.maxKey_) {
367  dst.maxKey_ = key;
368  }
369  if (key < dst.minKey_) {
370  dst.minKey_ = key;
371  }
372  const val_type theVal = startingValue_ + static_cast<val_type> (i);
373  const hash_value_type hashVal = hash_type::hashFunc (key, size_);
374 
375  // Return the old count; decrement afterwards.
376  const offset_type count = Kokkos::atomic_fetch_add (&counts_[hashVal], atomic_incr_type(-1));
377  if (count == 0) {
378  dst.success_ = false; // FAILURE!
379  }
380  else {
381  const offset_type curPos = ptr_[hashVal+1] - count;
382 
383  pairs_[curPos].first = key;
384  pairs_[curPos].second = theVal;
385  }
386  }
387 
388 private:
389  pairs_view_type pairs_;
390  counts_view_type counts_;
391  offsets_view_type ptr_;
392  keys_view_type keys_;
393  size_type size_;
394  typename pair_type::second_type startingValue_;
396  key_type initMinKey_;
398  key_type initMaxKey_;
399 };
400 
424 template<class OffsetsViewType,
425  class PairsViewType,
426  class SizeType = typename OffsetsViewType::size_type>
428 public:
429  typedef typename OffsetsViewType::const_type offsets_view_type;
430  typedef typename PairsViewType::const_type pairs_view_type;
431  typedef typename offsets_view_type::execution_space execution_space;
432  typedef typename offsets_view_type::memory_space memory_space;
433  typedef SizeType size_type;
434 
435  // The result of the check is whether the table has one or more duplicates.
436  typedef int value_type;
437 
442  CheckForDuplicateKeys (const pairs_view_type& pairs,
443  const offsets_view_type& ptr) :
444  pairs_ (pairs),
445  ptr_ (ptr),
446  size_ (ptr_.extent (0) == 0 ?
447  size_type (0) :
448  ptr_.extent (0) - 1)
449  {}
450 
452  KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
453  {
454  dst = 0;
455  }
456 
458  KOKKOS_INLINE_FUNCTION void
459  join (value_type& dst,
460  const value_type& src) const
461  {
462  dst = dst + src > 0?1:0;
463  }
464 
466  KOKKOS_INLINE_FUNCTION void
467  operator () (const size_type& i, value_type& dst) const
468  {
469  typedef typename offsets_view_type::non_const_value_type offset_type;
470  typedef typename pairs_view_type::non_const_value_type pair_type;
471  typedef typename pair_type::first_type key_type;
472 
473  if (dst>0) {
474  return; // we've already found duplicate keys elsewhere
475  }
476  else {
477  const offset_type beg = ptr_[i];
478  const offset_type end = ptr_[i+1];
479  bool foundDuplicateKey = false;
480  // This is an ~ n^2 algorithm in the worst case, where n is the
481  // max number of keys that hash to the same bucket. However, if
482  // the hash function is reasonable, n should be much less than
483  // the total number of keys.
484  for (offset_type j = beg + 1; j < end; ++j) {
485  const key_type curKey = pairs_[j].first;
486  for (offset_type k = beg; k < j; ++k) {
487  if (pairs_[k].first == curKey) {
488  foundDuplicateKey = true;
489  break;
490  }
491  }
492  }
493  dst = (dst>0) || foundDuplicateKey?1:0;
494  }
495  }
496 
497 private:
498  pairs_view_type pairs_;
499  offsets_view_type ptr_;
500  size_type size_;
501 };
502 
503 } // namespace FHT
504 
505 //
506 // Here begins the actual implementation of FixedHashTable.
507 //
508 
509 template<class KeyType, class ValueType, class DeviceType>
511 FixedHashTable (const keys_type& keys) :
512  minVal_ (0),
513  maxVal_ (keys.size () == 0 ?
514  static_cast<ValueType> (0) :
515  static_cast<ValueType> (keys.size () - 1)),
516  checkedForDuplicateKeys_ (false)
517 {
518  const ValueType startingValue = static_cast<ValueType> (0);
519  const KeyType initMinKey = this->minKey_;
520  const KeyType initMaxKey = this->maxKey_;
521  this->init (keys, startingValue, initMinKey, initMaxKey,
522  initMinKey, initMinKey, false);
523 }
524 
525 template<class KeyType, class ValueType, class DeviceType>
527 FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys) :
528  minVal_ (0),
529  maxVal_ (keys.size () == 0 ?
530  static_cast<ValueType> (0) :
531  static_cast<ValueType> (keys.size () - 1)),
532  checkedForDuplicateKeys_ (false)
533 {
534  typedef typename keys_type::non_const_type nonconst_keys_type;
535 
536  // mfh 01 May 2015: I don't trust that
537  // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
538  // so I ensure this manually.
539  const ValueType startingValue = static_cast<ValueType> (0);
540  host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
541  keys.size ());
542  using Kokkos::ViewAllocateWithoutInitializing;
543  nonconst_keys_type keys_d (ViewAllocateWithoutInitializing ("FixedHashTable::keys"),
544  keys_k.extent (0));
545  // DEEP_COPY REVIEW - NOT TESTED
546  Kokkos::deep_copy (keys_d, keys_k);
547  const KeyType initMinKey = this->minKey_;
548  const KeyType initMaxKey = this->maxKey_;
549  this->init (keys_d, startingValue, initMinKey, initMaxKey,
550  initMinKey, initMinKey, false);
551 }
552 
553 template<class KeyType, class ValueType, class DeviceType>
555 FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys,
556  const ValueType startingValue) :
557  minVal_ (startingValue),
558  maxVal_ (keys.size () == 0 ?
559  startingValue :
560  static_cast<ValueType> (startingValue + keys.size () - 1)),
561  checkedForDuplicateKeys_ (false)
562 {
563  typedef typename keys_type::non_const_type nonconst_keys_type;
564 
565  // mfh 01 May 2015: I don't trust that
566  // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
567  // so I ensure this manually.
568  host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
569  keys.size ());
570  using Kokkos::ViewAllocateWithoutInitializing;
571  nonconst_keys_type keys_d (ViewAllocateWithoutInitializing ("FixedHashTable::keys"),
572  keys_k.extent (0));
573  // DEEP_COPY REVIEW - HOST-TO_DEVICE
574  Kokkos::deep_copy (execution_space(), keys_d, keys_k);
575 
576  const KeyType initMinKey = ::Kokkos::ArithTraits<KeyType>::max ();
577  // min() for a floating-point type returns the minimum _positive_
578  // normalized value. This is different than for integer types.
579  // lowest() is new in C++11 and returns the least value, always
580  // negative for signed finite types.
581  //
582  // mfh 23 May 2015: I have heard reports that
583  // std::numeric_limits<int>::lowest() does not exist with the Intel
584  // compiler. I'm not sure if the users in question actually enabled
585  // C++11. However, it's easy enough to work around this issue. The
586  // standard floating-point types are signed and have a sign bit, so
587  // lowest() is just -max(). For integer types, we can use min()
588  // instead.
589  const KeyType initMaxKey = ::Kokkos::ArithTraits<KeyType>::is_integer ?
590  ::Kokkos::ArithTraits<KeyType>::min () :
591  -::Kokkos::ArithTraits<KeyType>::max ();
592  this->init (keys_d, startingValue, initMinKey, initMaxKey,
593  initMinKey, initMinKey, false);
594 
595 }
596 
597 template<class KeyType, class ValueType, class DeviceType>
600  const KeyType firstContigKey,
601  const KeyType lastContigKey,
602  const ValueType startingValue) :
603  minVal_ (startingValue),
604  maxVal_ (keys.size () == 0 ?
605  startingValue :
606  static_cast<ValueType> (startingValue + keys.size () - 1)),
607  firstContigKey_ (firstContigKey),
608  lastContigKey_ (lastContigKey),
609  checkedForDuplicateKeys_ (false)
610 {
611  const KeyType initMinKey = ::Kokkos::ArithTraits<KeyType>::max ();
612  // min() for a floating-point type returns the minimum _positive_
613  // normalized value. This is different than for integer types.
614  // lowest() is new in C++11 and returns the least value, always
615  // negative for signed finite types.
616  //
617  // mfh 23 May 2015: I have heard reports that
618  // std::numeric_limits<int>::lowest() does not exist with the Intel
619  // compiler. I'm not sure if the users in question actually enabled
620  // C++11. However, it's easy enough to work around this issue. The
621  // standard floating-point types are signed and have a sign bit, so
622  // lowest() is just -max(). For integer types, we can use min()
623  // instead.
624  const KeyType initMaxKey = ::Kokkos::ArithTraits<KeyType>::is_integer ?
625  ::Kokkos::ArithTraits<KeyType>::min () :
626  -::Kokkos::ArithTraits<KeyType>::max ();
627  this->init (keys, startingValue, initMinKey, initMaxKey,
628  firstContigKey, lastContigKey, true);
629 }
630 
631 template<class KeyType, class ValueType, class DeviceType>
633 FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys,
634  const KeyType firstContigKey,
635  const KeyType lastContigKey,
636  const ValueType startingValue) :
637  minVal_ (startingValue),
638  maxVal_ (keys.size () == 0 ?
639  startingValue :
640  static_cast<ValueType> (startingValue + keys.size () - 1)),
641  firstContigKey_ (firstContigKey),
642  lastContigKey_ (lastContigKey),
643  checkedForDuplicateKeys_ (false)
644 {
645  typedef typename keys_type::non_const_type nonconst_keys_type;
646 
647  // mfh 01 May 2015: I don't trust that
648  // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
649  // so I ensure this manually.
650  host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
651  keys.size ());
652  using Kokkos::ViewAllocateWithoutInitializing;
653  nonconst_keys_type keys_d (ViewAllocateWithoutInitializing ("FixedHashTable::keys"),
654  keys_k.extent (0));
655  // DEEP_COPY REVIEW - NOT TESTED
656  Kokkos::deep_copy (keys_d, keys_k);
657 
658  const KeyType initMinKey = ::Kokkos::ArithTraits<KeyType>::max ();
659  // min() for a floating-point type returns the minimum _positive_
660  // normalized value. This is different than for integer types.
661  // lowest() is new in C++11 and returns the least value, always
662  // negative for signed finite types.
663  //
664  // mfh 23 May 2015: I have heard reports that
665  // std::numeric_limits<int>::lowest() does not exist with the Intel
666  // compiler. I'm not sure if the users in question actually enabled
667  // C++11. However, it's easy enough to work around this issue. The
668  // standard floating-point types are signed and have a sign bit, so
669  // lowest() is just -max(). For integer types, we can use min()
670  // instead.
671  const KeyType initMaxKey = ::Kokkos::ArithTraits<KeyType>::is_integer ?
672  ::Kokkos::ArithTraits<KeyType>::min () :
673  -::Kokkos::ArithTraits<KeyType>::max ();
674  this->init (keys_d, startingValue, initMinKey, initMaxKey,
675  firstContigKey, lastContigKey, true);
676 }
677 
678 template<class KeyType, class ValueType, class DeviceType>
681  const ValueType startingValue) :
682  minVal_ (startingValue),
683  maxVal_ (keys.size () == 0 ?
684  startingValue :
685  static_cast<ValueType> (startingValue + keys.size () - 1)),
686  checkedForDuplicateKeys_ (false)
687 {
688  const KeyType initMinKey = ::Kokkos::ArithTraits<KeyType>::max ();
689  // min() for a floating-point type returns the minimum _positive_
690  // normalized value. This is different than for integer types.
691  // lowest() is new in C++11 and returns the least value, always
692  // negative for signed finite types.
693  //
694  // mfh 23 May 2015: I have heard reports that
695  // std::numeric_limits<int>::lowest() does not exist with the Intel
696  // compiler. I'm not sure if the users in question actually enabled
697  // C++11. However, it's easy enough to work around this issue. The
698  // standard floating-point types are signed and have a sign bit, so
699  // lowest() is just -max(). For integer types, we can use min()
700  // instead.
701  const KeyType initMaxKey = ::Kokkos::ArithTraits<KeyType>::is_integer ?
702  ::Kokkos::ArithTraits<KeyType>::min () :
703  -::Kokkos::ArithTraits<KeyType>::max ();
704  this->init (keys, startingValue, initMinKey, initMaxKey,
705  initMinKey, initMinKey, false);
706 }
707 
708 template<class KeyType, class ValueType, class DeviceType>
710 FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys,
711  const Teuchos::ArrayView<const ValueType>& vals) :
712  contiguousValues_ (false),
713  checkedForDuplicateKeys_ (false)
714 {
715  // mfh 01 May 2015: I don't trust that
716  // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
717  // so I ensure this manually.
718  host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
719  keys.size ());
720  host_input_vals_type vals_k (vals.size () == 0 ? NULL : vals.getRawPtr (),
721  vals.size ());
722  const KeyType initMinKey = ::Kokkos::ArithTraits<KeyType>::max ();
723  // min() for a floating-point type returns the minimum _positive_
724  // normalized value. This is different than for integer types.
725  // lowest() is new in C++11 and returns the least value, always
726  // negative for signed finite types.
727  //
728  // mfh 23 May 2015: I have heard reports that
729  // std::numeric_limits<int>::lowest() does not exist with the Intel
730  // compiler. I'm not sure if the users in question actually enabled
731  // C++11. However, it's easy enough to work around this issue. The
732  // standard floating-point types are signed and have a sign bit, so
733  // lowest() is just -max(). For integer types, we can use min()
734  // instead.
735  const KeyType initMaxKey = ::Kokkos::ArithTraits<KeyType>::is_integer ?
736  ::Kokkos::ArithTraits<KeyType>::min () :
737  -::Kokkos::ArithTraits<KeyType>::max ();
738  this->init (keys_k, vals_k, initMinKey, initMaxKey);
739 }
740 
741 template<class KeyType, class ValueType, class DeviceType>
742 void
744 init (const keys_type& keys,
745  ValueType startingValue,
746  KeyType initMinKey,
747  KeyType initMaxKey,
748  KeyType firstContigKey,
749  KeyType lastContigKey,
750  const bool computeInitContigKeys)
751 {
752  using Kokkos::subview;
753  using Kokkos::ViewAllocateWithoutInitializing;
754  using Teuchos::TypeNameTraits;
755  typedef typename std::decay<decltype (keys.extent (0)) >::type size_type;
756  Tpetra::Details::ProfilingRegion pr("Tpetra::Details::FixedHashTable::init(7-arg)");
757  const char prefix[] = "Tpetra::Details::FixedHashTable: ";
758 
759  const offset_type numKeys = static_cast<offset_type> (keys.extent (0));
760  {
761  const offset_type theMaxVal = ::Kokkos::ArithTraits<offset_type>::max ();
762  const size_type maxValST = static_cast<size_type> (theMaxVal);
763  TEUCHOS_TEST_FOR_EXCEPTION
764  (keys.extent (0) > maxValST, std::invalid_argument, prefix << "The "
765  "number of keys " << keys.extent (0) << " does not fit in "
766  "offset_type = " << TypeNameTraits<offset_type>::name () << ", whose "
767  "max value is " << theMaxVal << ". This means that it is not possible to "
768  "use this constructor.");
769  }
770  TEUCHOS_TEST_FOR_EXCEPTION
771  (static_cast<unsigned long long> (numKeys) >
772  static_cast<unsigned long long> (::Kokkos::ArithTraits<ValueType>::max ()),
773  std::invalid_argument, "Tpetra::Details::FixedHashTable: The number of "
774  "keys " << numKeys << " is greater than the maximum representable "
775  "ValueType value " << ::Kokkos::ArithTraits<ValueType>::max () << ". "
776  "This means that it is not possible to use this constructor.");
777  TEUCHOS_TEST_FOR_EXCEPTION
778  (numKeys > static_cast<offset_type> (INT_MAX), std::logic_error, prefix <<
779  "This class currently only works when the number of keys is <= INT_MAX = "
780  << INT_MAX << ". If this is a problem for you, please talk to the Tpetra "
781  "developers.");
782 
783  const bool buildInParallel =
784  FHT::worthBuildingFixedHashTableInParallel<execution_space> ();
785  const bool debug = ::Tpetra::Details::Behavior::debug ();
786 
787  // NOTE (mfh 14 May 2015) This method currently assumes UVM. We
788  // could change that by setting up ptr and val as Kokkos::DualView
789  // instances. If we do that, since we are filling on host for now,
790  // we want to make sure that we only zero-fill ptr on host
791  // initially, and that we don't fill val at all. Once we finish
792  // Kokkos-izing all the set-up kernels, we won't need DualView for
793  // either ptr or val.
794 
795  if (computeInitContigKeys) {
796  // Find the first and last initial contiguous keys. If we find a
797  // long sequence of initial contiguous keys, we can save space by
798  // not storing them explicitly as pairs in the hash table.
799  //
800  // NOTE (mfh 01 Jun 2015) Doing this in parallel requires a scan
801  // ("min index such that the difference between the current key and
802  // the next != 1"), which takes multiple passes over the data. We
803  // could fuse it with CountBuckets (only update counts on 'final'
804  // pass). However, we're really just moving this sequential search
805  // out of Map's constructor here, so there is no loss in doing it
806  // sequentially for now. Later, we can work on parallelization.
807  if (numKeys > 0) {
808  // FIXME: make it a parallel kernel with no host copy
809  auto keys_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),
810  keys);
811  firstContigKey_ = keys_h[0];
812  // Start with one plus, then decrement at the end. That lets us do
813  // only one addition per loop iteration, rather than two (if we test
814  // against lastContigKey + 1 and then increment lastContigKey).
815  lastContigKey_ = firstContigKey_ + 1;
816 
817  // We will only store keys in the table that are not part of the
818  // initial contiguous sequence. It's possible for the initial
819  // contiguous sequence to be trivial, which for a nonzero number of
820  // keys means that the "sequence" has length 1.
821  for (offset_type k = 1; k < numKeys; ++k) {
822  if (lastContigKey_ != keys_h[k]) {
823  break;
824  }
825  ++lastContigKey_;
826  }
827  --lastContigKey_;
828  }
829  }
830  else {
831  firstContigKey_ = firstContigKey;
832  lastContigKey_ = lastContigKey;
833  }
834 
835  offset_type startIndex;
836  if (numKeys > 0) {
837  initMinKey = std::min (initMinKey, firstContigKey_);
838  initMaxKey = std::max (initMaxKey, lastContigKey_);
839  startIndex = static_cast<offset_type> (lastContigKey_ - firstContigKey_);
840  } else {
841  startIndex = 0;
842  }
843 
844  const offset_type theNumKeys = numKeys - startIndex;
845  const offset_type size = hash_type::getRecommendedSize (theNumKeys);
846 #ifdef HAVE_TPETRA_DEBUG
847  TEUCHOS_TEST_FOR_EXCEPTION(
848  size == 0 && numKeys != 0, std::logic_error,
849  "Tpetra::Details::FixedHashTable constructor: "
850  "getRecommendedSize(" << numKeys << ") returned zero, "
851  "even though the number of keys " << numKeys << " is nonzero. "
852  "Please report this bug to the Tpetra developers.");
853 #endif // HAVE_TPETRA_DEBUG
854  keys_type theKeys =
855  subview (keys, std::pair<offset_type, offset_type> (startIndex, numKeys));
856 
857  // FIXME (mfh 28 Mar 2016) For some reason, we don't seem to need a
858  // fence here, but we do before other allocations.
859 
860  // The array of counts must be separate from the array of offsets,
861  // in order for parallel_scan to work correctly.
862  typedef typename ptr_type::non_const_type counts_type;
863  counts_type counts ("Tpetra::FixedHashTable::counts", size);
864 
865  //
866  // Count the number of "buckets" per offsets array (ptr) entry.
867  //
868 
869  // Will only create the mirror for buildInParallel false - but then use it in two places
870  typename keys_type::HostMirror theKeysHost;
871 
872  // The Kokkos kernel uses atomic update instructions to count the
873  // number of "buckets" per offsets array (ptr) entry. Atomic
874  // updates incur overhead, even in the sequential case. The Kokkos
875  // kernel is still correct in that case, but I would rather not
876  // incur overhead then.
877  if (buildInParallel) {
878  FHT::CountBuckets<counts_type, keys_type> functor (counts, theKeys, size);
879  using range_type = Kokkos::RangePolicy<execution_space, offset_type>;
880  const char kernelLabel[] = "Tpetra::Details::FixedHashTable CountBuckets";
881  if (debug) {
882  using key_type = typename keys_type::non_const_value_type;
883  Kokkos::pair<int, key_type> err;
884  Kokkos::parallel_reduce (kernelLabel, range_type (0, theNumKeys),
885  functor, err);
886  TEUCHOS_TEST_FOR_EXCEPTION
887  (err.first != 0, std::logic_error, "Tpetra::Details::FixedHashTable "
888  "constructor: CountBuckets found a key " << err.second << " that "
889  "results in an out-of-bounds hash value.");
890  }
891  else {
892  Kokkos::parallel_for (kernelLabel, range_type (0, theNumKeys), functor);
893  }
894  }
895  else {
896  Kokkos::HostSpace hostMemSpace;
897  theKeysHost = Kokkos::create_mirror_view(theKeys);
898  // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
899  Kokkos::deep_copy(execution_space(), theKeysHost, theKeys);
900  auto countsHost = Kokkos::create_mirror_view (hostMemSpace, counts);
901 
902  for (offset_type k = 0; k < theNumKeys; ++k) {
903  using key_type = typename keys_type::non_const_value_type;
904  const key_type key = theKeysHost[k];
905 
906  using hash_value_type = typename hash_type::result_type;
907  const hash_value_type hashVal = hash_type::hashFunc (key, size);
908  TEUCHOS_TEST_FOR_EXCEPTION
909  (hashVal < hash_value_type (0) ||
910  hashVal >= hash_value_type (countsHost.extent (0)),
911  std::logic_error, "Tpetra::Details::FixedHashTable "
912  "constructor: Sequential CountBuckets found a key " << key
913  << " that results in an out-of-bounds hash value.");
914 
915  ++countsHost[hashVal];
916  }
917  // DEEP_COPY REVIEW - HOSTMIRROR-TO-DEVICE
918  Kokkos::deep_copy (execution_space(), counts, countsHost);
919  }
920 
921  // KJ: This fence is not required for the 2-argument deep_copy which calls
922  // fence, but will be required if switched to the 3-argumemt deep_copy which
923  // passes a space. The 3-argument form does not fence.
924  execution_space().fence ();
925 
926  // Kokkos::View fills with zeros by default.
927  typename ptr_type::non_const_type ptr ("Tpetra::FixedHashTable::ptr", size+1);
928 
929  // Compute row offsets via prefix sum:
930  //
931  // ptr[i+1] = \sum_{j=0}^{i} counts[j].
932  //
933  // Thus, ptr[i+1] - ptr[i] = counts[i], so that ptr[i+1] = ptr[i] +
934  // counts[i]. If we stored counts[i] in ptr[i+1] on input, then the
935  // formula is ptr[i+1] += ptr[i].
936  //
937  // parallel_scan does not incur overhead with Kokkos::Serial, but
938  // with actual parallel execution spaces, it does require multiple
939  // passes over the data. Thus, it still makes sense to have a
940  // sequential fall-back.
941 
943  if (buildInParallel) {
944  computeOffsetsFromCounts (ptr, counts);
945  }
946 
947  if (! buildInParallel || debug) {
948  Kokkos::HostSpace hostMemSpace;
949  auto counts_h = Kokkos::create_mirror_view_and_copy (hostMemSpace, counts);
950  auto ptr_h = Kokkos::create_mirror_view (hostMemSpace, ptr);
951 
952 #ifdef KOKKOS_ENABLE_SERIAL
953  Kokkos::Serial hostExecSpace;
954 #else
955  Kokkos::DefaultHostExecutionSpace hostExecSpace;
956 #endif // KOKKOS_ENABLE_SERIAL
957 
958  computeOffsetsFromCounts (hostExecSpace, ptr_h, counts_h);
959  // DEEP_COPY REVIEW - HOSTMIRROR-TO-DEVICE
960  Kokkos::deep_copy (execution_space(), ptr, ptr_h);
961 
962  if (debug) {
963  bool bad = false;
964  for (offset_type i = 0; i < size; ++i) {
965  if (ptr_h[i+1] != ptr_h[i] + counts_h[i]) {
966  bad = true;
967  }
968  }
969  TEUCHOS_TEST_FOR_EXCEPTION
970  (bad, std::logic_error, "Tpetra::Details::FixedHashTable "
971  "constructor: computeOffsetsFromCounts gave an incorrect "
972  "result.");
973  }
974  }
975 
976  // KJ: computeOffsetsFromCounts calls parallel_scan which does not fence.
977  // This fence is necessary as we need to make sure that the offset view
978  // completes before the view is used in the next functor.
979  execution_space().fence ();
980 
981  // Allocate the array of (key,value) pairs. Don't fill it with
982  // zeros, because we will fill it with actual data below.
983  typedef typename val_type::non_const_type nonconst_val_type;
984  nonconst_val_type val (ViewAllocateWithoutInitializing ("Tpetra::FixedHashTable::pairs"),
985  theNumKeys);
986 
987  // Fill in the hash table's "values" (the (key,value) pairs).
988  typedef FHT::FillPairs<typename val_type::non_const_type, keys_type,
989  typename ptr_type::non_const_type> functor_type;
990  typename functor_type::value_type result (initMinKey, initMaxKey);
991 
992  const ValueType newStartingValue = startingValue + static_cast<ValueType> (startIndex);
993  if (buildInParallel) {
994  functor_type functor (val, counts, ptr, theKeys, newStartingValue,
995  initMinKey, initMaxKey);
996  typedef Kokkos::RangePolicy<execution_space, offset_type> range_type;
997  Kokkos::parallel_reduce ("Tpetra::Details::FixedHashTable::FillPairs", range_type (0, theNumKeys), functor, result);
998  }
999  else {
1000  Kokkos::HostSpace hostMemSpace;
1001  auto counts_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, counts);
1002  auto ptr_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, ptr);
1003  auto val_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, val);
1004  for (offset_type k = 0; k < theNumKeys; ++k) {
1005  typedef typename hash_type::result_type hash_value_type;
1006  const KeyType key = theKeysHost[k];
1007  if (key > result.maxKey_) {
1008  result.maxKey_ = key;
1009  }
1010  if (key < result.minKey_) {
1011  result.minKey_ = key;
1012  }
1013  const ValueType theVal = newStartingValue + static_cast<ValueType> (k);
1014  const hash_value_type hashVal = hash_type::hashFunc (key, size);
1015 
1016  // Return the old count; decrement afterwards.
1017  const offset_type count = counts_h[hashVal];
1018  --counts_h[hashVal];
1019  if (count == 0) {
1020  result.success_ = false; // FAILURE!
1021  break;
1022  }
1023  else {
1024  const offset_type curPos = ptr_h[hashVal+1] - count;
1025  val_h[curPos].first = key;
1026  val_h[curPos].second = theVal;
1027  }
1028  }
1029  Kokkos::deep_copy(counts, counts_h); // restore
1030  Kokkos::deep_copy(val, val_h); // restore
1031  }
1032 
1033  // FIXME (mfh 01 Jun 2015) Temporarily commented out because of
1034  // reports of exceptions being thrown in Albany.
1035  //
1036  // TEUCHOS_TEST_FOR_EXCEPTION
1037  // (! result.success_, std::logic_error, "Tpetra::Details::FixedHashTable::"
1038  // "init: Filling the hash table failed! Please report this bug to the "
1039  // "Tpetra developers.");
1040  (void) result; // prevent build warning (set but unused variable)
1041 
1042  // "Commit" the computed arrays and other computed quantities.
1043  ptr_ = ptr;
1044  val_ = val;
1045  minKey_ = result.minKey_;
1046  maxKey_ = result.maxKey_;
1047  // We've already set firstContigKey_ and lastContigKey_ above.
1048 }
1049 
1050 
1051 template<class KeyType, class ValueType, class DeviceType>
1052 void
1053 FixedHashTable<KeyType, ValueType, DeviceType>::
1054 init (const host_input_keys_type& keys,
1055  const host_input_vals_type& vals,
1056  KeyType initMinKey,
1057  KeyType initMaxKey)
1058 {
1059  Tpetra::Details::ProfilingRegion pr("Tpetra::Details::FixedHashTable::init(4-arg)");
1060  const offset_type numKeys = static_cast<offset_type> (keys.extent (0));
1061  TEUCHOS_TEST_FOR_EXCEPTION
1062  (static_cast<unsigned long long> (numKeys) > static_cast<unsigned long long> (::Kokkos::ArithTraits<ValueType>::max ()),
1063  std::invalid_argument, "Tpetra::Details::FixedHashTable: The number of "
1064  "keys " << numKeys << " is greater than the maximum representable "
1065  "ValueType value " << ::Kokkos::ArithTraits<ValueType>::max () << ".");
1066  TEUCHOS_TEST_FOR_EXCEPTION
1067  (numKeys > static_cast<offset_type> (INT_MAX), std::logic_error, "Tpetra::"
1068  "Details::FixedHashTable: This class currently only works when the number "
1069  "of keys is <= INT_MAX = " << INT_MAX << ". If this is a problem for you"
1070  ", please talk to the Tpetra developers.");
1071 
1072  // There's no need to find the first and last initial contiguous
1073  // keys in this case, because if we reach this init() function, then
1074  // hasContiguousValues() is false and so get() doesn't use the
1075  // initial contiguous sequence of keys.
1076 
1077  const offset_type size = hash_type::getRecommendedSize (numKeys);
1078 #ifdef HAVE_TPETRA_DEBUG
1079  TEUCHOS_TEST_FOR_EXCEPTION(
1080  size == 0 && numKeys != 0, std::logic_error,
1081  "Tpetra::Details::FixedHashTable constructor: "
1082  "getRecommendedSize(" << numKeys << ") returned zero, "
1083  "even though the number of keys " << numKeys << " is nonzero. "
1084  "Please report this bug to the Tpetra developers.");
1085 #endif // HAVE_TPETRA_DEBUG
1086 
1087  // FIXME: Investigate a couple options:
1088  // 1. Allocate ptr_h, val_h directly on host and only deep_copy to ptr_ and val_ once at the end
1089  // 2. Do all this work as a parallel kernel with the same execution/memory spaces as ptr_ and val_
1090  // An old comment from MFH noted ptr_h should be zero-initialized, while val_h should not be initialized.
1091  // It further noted that we shouldn't need a DualView type arrangement when all setup kernels have
1092  // been "Kokkos-ized".
1093  Kokkos::HostSpace hostMemSpace;
1094  typename ptr_type::non_const_type ptr ("Tpetra::FixedHashTable::ptr", size + 1);
1095  auto ptr_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, ptr);
1096 
1097  // Allocate the array of key,value pairs. Don't waste time filling
1098  // it with zeros, because we will fill it with actual data below.
1099  using Kokkos::ViewAllocateWithoutInitializing;
1100  typedef typename val_type::non_const_type nonconst_val_type;
1101  nonconst_val_type val (ViewAllocateWithoutInitializing ("Tpetra::FixedHashTable::pairs"),
1102  numKeys);
1103  auto val_h = Kokkos::create_mirror_view_and_copy(hostMemSpace, val);
1104 
1105  // Compute number of entries in each hash table position.
1106  for (offset_type k = 0; k < numKeys; ++k) {
1107  const typename hash_type::result_type hashVal =
1108  hash_type::hashFunc (keys[k], size);
1109  // Shift over one, so that counts[j] = ptr[j+1]. See below.
1110  ++ptr_h[hashVal+1];
1111  }
1112 
1113  // Compute row offsets via prefix sum:
1114  //
1115  // ptr[i+1] = \sum_{j=0}^{i} counts[j].
1116  //
1117  // Thus, ptr[i+1] - ptr[i] = counts[i], so that ptr[i+1] = ptr[i] +
1118  // counts[i]. If we stored counts[i] in ptr[i+1] on input, then the
1119  // formula is ptr[i+1] += ptr[i].
1120  for (offset_type i = 0; i < size; ++i) {
1121  ptr_h[i+1] += ptr_h[i];
1122  }
1123  //ptr[0] = 0; // We've already done this when initializing ptr above.
1124 
1125  // curRowStart[i] is the offset of the next element in row i.
1126  typename ptr_type::non_const_type::HostMirror curRowStart ("Tpetra::FixedHashTable::curRowStart", size);
1127 
1128  // Fill in the hash table.
1129  FHT::FillPairsResult<KeyType> result (initMinKey, initMaxKey);
1130  for (offset_type k = 0; k < numKeys; ++k) {
1131  typedef typename hash_type::result_type hash_value_type;
1132  const KeyType key = keys[k];
1133  if (key > result.maxKey_) {
1134  result.maxKey_ = key;
1135  }
1136  if (key < result.minKey_) {
1137  result.minKey_ = key;
1138  }
1139  const ValueType theVal = vals[k];
1140  if (theVal > maxVal_) {
1141  maxVal_ = theVal;
1142  }
1143  if (theVal < minVal_) {
1144  minVal_ = theVal;
1145  }
1146  const hash_value_type hashVal = hash_type::hashFunc (key, size);
1147 
1148  const offset_type offset = curRowStart[hashVal];
1149  const offset_type curPos = ptr_h[hashVal] + offset;
1150  if (curPos >= ptr_h[hashVal+1]) {
1151  result.success_ = false; // FAILURE!
1152  }
1153  else {
1154  val_h[curPos].first = key;
1155  val_h[curPos].second = theVal;
1156  ++curRowStart[hashVal];
1157  }
1158  }
1159 
1160  TEUCHOS_TEST_FOR_EXCEPTION
1161  (! result.success_, std::logic_error, "Tpetra::Details::FixedHashTable::"
1162  "init: Filling the hash table failed! Please report this bug to the "
1163  "Tpetra developers.");
1164 
1165  // "Commit" the computed arrays and other computed quantities.
1166  Kokkos::deep_copy(ptr, ptr_h);
1167  Kokkos::deep_copy(val, val_h);
1168 
1169  ptr_ = ptr;
1170  val_ = val;
1171  minKey_ = result.minKey_;
1172  maxKey_ = result.maxKey_;
1173  // We've already assigned to minVal_ and maxVal_ above.
1174 }
1175 
1176 template <class KeyType, class ValueType, class DeviceType>
1177 bool
1180 {
1181  if (! checkedForDuplicateKeys_) {
1182  hasDuplicateKeys_ = checkForDuplicateKeys ();
1183  checkedForDuplicateKeys_ = true;
1184  }
1185  return hasDuplicateKeys_;
1186 }
1187 
1188 template <class KeyType, class ValueType, class DeviceType>
1189 bool
1191 checkForDuplicateKeys () const
1192 {
1193  const offset_type size = this->getSize ();
1194  // It's allowed for the hash table to have a positive number of
1195  // buckets (getSize()), but still store no entries (numPairs()).
1196  // Both cases trivially entail no duplicates.
1197  if (size == 0 || this->numPairs () == 0) {
1198  return false;
1199  }
1200  else {
1201  typedef FHT::CheckForDuplicateKeys<ptr_type, val_type> functor_type;
1202  functor_type functor (val_, ptr_);
1203  int hasDupKeys = 0;
1204  typedef Kokkos::RangePolicy<execution_space, offset_type> range_type;
1205  Kokkos::parallel_reduce ("Tpetra::Details::FixedHashTable::CheckForDuplicateKeys", range_type (0, size), functor, hasDupKeys);
1206  return hasDupKeys > 0;
1207  }
1208 }
1209 
1210 template <class KeyType, class ValueType, class DeviceType>
1211 std::string
1213 description () const
1214 {
1215  std::ostringstream oss;
1216  oss << "FixedHashTable<"
1217  << Teuchos::TypeNameTraits<KeyType>::name () << ","
1218  << Teuchos::TypeNameTraits<ValueType>::name () << ">: "
1219  << "{ numKeys: " << val_.extent (0)
1220  << ", tableSize: " << this->getSize () << " }";
1221  return oss.str();
1222 }
1223 
1224 template <class KeyType, class ValueType, class DeviceType>
1225 void
1227 describe (Teuchos::FancyOStream& out,
1228  const Teuchos::EVerbosityLevel verbLevel) const
1229 {
1230  using std::endl;
1231  using std::setw;
1232  using Teuchos::OSTab;
1233  using Teuchos::rcpFromRef;
1234  using Teuchos::TypeNameTraits;
1235  using Teuchos::VERB_DEFAULT;
1236  using Teuchos::VERB_NONE;
1237  using Teuchos::VERB_LOW;
1238  using Teuchos::VERB_EXTREME;
1239 
1240  // NOTE (mfh 14 May 2015) This method currently assumes UVM for
1241  // access to ptr_ and val_ from the host.
1242 
1243  Teuchos::EVerbosityLevel vl = verbLevel;
1244  if (vl == VERB_DEFAULT) vl = VERB_LOW;
1245 
1246  if (vl == VERB_NONE) {
1247  // do nothing
1248  }
1249  else if (vl == VERB_LOW) {
1250  out << this->description() << endl;
1251  }
1252  else { // MEDIUM, HIGH or EXTREME
1253  out << "FixedHashTable:" << endl;
1254  {
1255  OSTab tab1 (rcpFromRef (out));
1256 
1257  // const std::string label = this->getObjectLabel ();
1258  // if (label != "") {
1259  // out << "label: " << label << endl;
1260  // }
1261  out << "Template parameters:" << endl;
1262  {
1263  OSTab tab2 (rcpFromRef (out));
1264  out << "KeyType: " << TypeNameTraits<KeyType>::name () << endl
1265  << "ValueType: " << TypeNameTraits<ValueType>::name () << endl;
1266  }
1267 
1268  const offset_type tableSize = this->getSize ();
1269  const offset_type numKeys = val_.extent (0);
1270 
1271  out << "Table parameters:" << endl;
1272  {
1273  OSTab tab2 (rcpFromRef (out));
1274  out << "numKeys: " << numKeys << endl
1275  << "tableSize: " << tableSize << endl;
1276  }
1277 
1278  if (vl >= VERB_EXTREME) {
1279  out << "Contents: ";
1280  if (tableSize == 0 || numKeys == 0) {
1281  out << "[]" << endl;
1282  } else {
1283  out << "[ " << endl;
1284  {
1285  OSTab tab2 (rcpFromRef (out));
1286  for (offset_type i = 0; i < tableSize; ++i) {
1287  OSTab tab3 (rcpFromRef (out));
1288  out << "[";
1289  for (offset_type k = ptr_[i]; k < ptr_[i+1]; ++k) {
1290  out << "(" << val_[k].first << "," << val_[k].second << ")";
1291  if (k + 1 < ptr_[i+1]) {
1292  out << ", ";
1293  }
1294  }
1295  out << "]" << endl;
1296  } // for each table position i
1297  }
1298  out << "]" << endl;
1299  } // The table contains entries
1300  } // vl >= VERB_EXTREME
1301  }
1302  out << endl;
1303  } // if vl > VERB_LOW
1304 }
1305 
1306 } // namespace Details
1307 } // namespace Tpetra
1308 
1309 // Macro that explicitly instantiates FixedHashTable for the given local
1310 // ordinal (LO) and global ordinal (GO) types. Note that FixedHashTable's
1311 // template parameters occur in the opposite order of most Tpetra
1312 // classes. This is because FixedHashTable performs global-to-local
1313 // lookup, and the convention in templated C++ lookup tables (such as
1314 // std::map) is <KeyType, ValueType>.
1315 //
1316 // This macro must be explanded within the Tpetra::Details namespace.
1317 #define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT_DEFAULTNODE(LO,GO) \
1318  template class FixedHashTable< GO , LO >;
1319 
1320 // Macro that explicitly instantiates FixedHashTable for the given
1321 // local ordinal (LO), global ordinal (GO), and Kokkos device (DEVICE)
1322 // types. Note that FixedHashTable's first two template parameters
1323 // occur in the opposite order of most Tpetra classes. This is
1324 // because FixedHashTable performs global-to-local lookup, and the
1325 // convention in templated C++ lookup tables (such as std::map) is
1326 // <KeyType, ValueType>.
1327 //
1328 // This macro must be explanded within the Tpetra::Details namespace.
1329 #define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT(LO, GO, DEVICE) \
1330  template class FixedHashTable< GO , LO , DEVICE >;
1331 
1332 #endif // TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
CheckForDuplicateKeys(const pairs_view_type &pairs, const offsets_view_type &ptr)
Constructor.
Parallel reduce functor for filling the FixedHashTable, and computing the min and max keys...
FillPairs(const pairs_view_type &pairs, const counts_view_type &counts, const offsets_view_type &ptr, const keys_view_type &keys, const typename pair_type::second_type startingValue, const key_type initMinKey, const key_type initMaxKey)
Constructor that takes initial min and max key values.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
KOKKOS_DEFAULTED_FUNCTION FixedHashTable()=default
Default constructor; makes an empty table.
static KOKKOS_INLINE_FUNCTION result_type hashFunc(const argument_type &, const offset_type &)
The hash function.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i) const
Do this for every entry of keys_.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print this object with the given verbosity to the output stream.
ResultType result_type
Type of the return value of the hash function.
Parallel for functor for counting &quot;buckets&quot; in the FixedHashTable.
static bool debug()
Whether Tpetra is in debug mode.
bool success_
Whether fill succeeded (it can only fail on a bug)
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
The hash function for FixedHashTable.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
std::string description() const
Implementation of Teuchos::Describable interface.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i, value_type &dst) const
Parallel loop body; do this for every entry of keys_.
FillPairs(const pairs_view_type &pairs, const counts_view_type &counts, const offsets_view_type &ptr, const keys_view_type &keys, const typename pair_type::second_type startingValue)
Constructor.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
bool hasDuplicateKeys()
Whether the table has any duplicate keys.
CountBuckets(const counts_view_type &counts, const keys_view_type &keys, const size_type size)
Constructor.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
Functor for checking whether a FixedHashTable has one or more duplicate entries.
KOKKOS_INLINE_FUNCTION void join(value_type &dst, const value_type &src) const
Combine two intermediate reduction results.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
Reduction result for FillPairs functor below.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i, value_type &dst) const
Parallel loop body.
Kokkos::View< const KeyType *, Kokkos::LayoutLeft, device_type > keys_type
Type of a 1-D Kokkos::View (array) used to store keys.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.