Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Tpetra_Details_FixedHashTable_def.hpp
1 /*
2 // @HEADER
3 // ***********************************************************************
4 //
5 // Tpetra: Templated Linear Algebra Services Package
6 // Copyright (2008) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ************************************************************************
41 // @HEADER
42 */
43 
44 #ifndef TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
45 #define TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
46 
47 #include "Tpetra_Details_FixedHashTable_decl.hpp"
49 #ifdef TPETRA_USE_MURMUR_HASH
50 # include "Kokkos_Functional.hpp" // hash function used by Kokkos::UnorderedMap
51 #endif // TPETRA_USE_MURMUR_HASH
52 #include "Kokkos_ArithTraits.hpp"
53 #include "Teuchos_TypeNameTraits.hpp"
55 #include <type_traits>
56 
57 namespace Tpetra {
58 namespace Details {
59 //
60 // This namespace stores utility functions and Kokkos
61 // functors for use in FixedHashTable construction.
62 //
63 namespace FHT {
64 
65 
66 // Is it worth actually using building the FixedHashTable using
67 // parallel threads, instead of just counting in a sequential loop?
68 //
69 // The parallel version of FixedHashTable construction isn't just a
70 // parallelization of the sequential loops. It incurs additional
71 // overheads. For example, the CountBuckets kernel uses atomic update
72 // instructions to count the number of "buckets" per offsets array
73 // (ptr) entry. Atomic updates have overhead, even if only one thread
74 // issues them. The Kokkos kernels are still correct in that case,
75 // but I would rather not incur overhead then. It might make sense to
76 // set the minimum number of threads to something greater than 1, but
77 // we would need experiments to find out.
78 template<class ExecSpace>
79 bool worthBuildingFixedHashTableInParallel () {
80  return ExecSpace::concurrency() > 1;
81 }
82 
83 //
84 // Functors for FixedHashTable initialization
85 //
86 // NOTE (mfh 23 May 2015): Once we can use lambdas with CUDA, we
87 // should consider replacing all of these functors with in-line
88 // lambdas. The only issue is that we would need to bake the
89 // execution space into the policy, since the default execution space
90 // might differ from the one Tpetra wants to use.
91 
101 template<class CountsViewType,
102  class KeysViewType,
103  class SizeType = typename KeysViewType::size_type>
105 public:
106  typedef CountsViewType counts_view_type;
107  typedef KeysViewType keys_view_type;
108  typedef typename CountsViewType::execution_space execution_space;
109  typedef typename CountsViewType::memory_space memory_space;
110  typedef SizeType size_type;
111  typedef typename keys_view_type::non_const_value_type key_type;
112  // mfh 21 May 2015: Having a device_type typedef in the functor
113  // along with an execution_space typedef causes compilation issues.
114  // This is because one of Kokkos' partial specializations picks up
115  // on the device_type typedef, and another picks up on the
116  // execution_space typedef. The former is a legacy of a previous
117  // design iteration of Kokkos, which did not separate memory and
118  // execution spaces.
120 
126  CountBuckets (const counts_view_type& counts,
127  const keys_view_type& keys,
128  const size_type size) :
129  counts_ (counts),
130  keys_ (keys),
131  size_ (size)
132  {}
133 
138  KOKKOS_INLINE_FUNCTION void
139  operator () (const size_type& i) const
140  {
141  typedef typename hash_type::result_type hash_value_type;
142 
143  const hash_value_type hashVal = hash_type::hashFunc (keys_[i], size_);
144  Kokkos::atomic_increment (&counts_[hashVal]);
145  }
146 
147  using value_type = Kokkos::pair<int, key_type>;
148 
152  KOKKOS_INLINE_FUNCTION void
153  operator () (const size_type& i, value_type& dst) const
154  {
155  using hash_value_type = typename hash_type::result_type;
156 
157  const key_type keyVal = keys_[i];
158  const hash_value_type hashVal = hash_type::hashFunc (keyVal, size_);
159  if (hashVal < hash_value_type (0) ||
160  hashVal >= hash_value_type (counts_.extent (0))) {
161  dst.first = 1;
162  dst.second = keyVal;
163  }
164  else {
165  Kokkos::atomic_increment (&counts_[hashVal]);
166  }
167  }
168 
170  KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
171  {
172  dst.first = 0;
173  dst.second = key_type (0);
174  }
175 
176  KOKKOS_INLINE_FUNCTION void
177  join (volatile value_type& dst,
178  const volatile value_type& src) const
179  {
180  const bool good = dst.first == 0 || src.first == 0;
181  dst.first = good ? 0 : dst.first;
182  // leave dst.second as it is, to get the "first" key
183  }
184 
185 private:
187  counts_view_type counts_;
189  keys_view_type keys_;
191  size_type size_;
192 };
193 
204 template<class KeyType>
206  KOKKOS_INLINE_FUNCTION
207  FillPairsResult () :
208  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
209  // min() for a floating-point type returns the minimum _positive_
210  // normalized value. This is different than for integer types.
211  // lowest() is new in C++11 and returns the least value, always
212  // negative for signed finite types.
213  //
214  // mfh 23 May 2015: I have heard reports that
215  // std::numeric_limits<int>::lowest() does not exist with the
216  // Intel compiler. I'm not sure if the users in question actually
217  // enabled C++11. However, it's easy enough to work around this
218  // issue. The standard floating-point types are signed and have a
219  // sign bit, so lowest() is just -max(). For integer types, we
220  // can use min() instead.
221  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
222  ::Kokkos::Details::ArithTraits<KeyType>::min () :
223  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
224  success_ (true)
225  {
226  static_assert (std::is_arithmetic<KeyType>::value, "FillPairsResult: "
227  "KeyType must be some kind of number type.");
228  }
229 
230  KOKKOS_INLINE_FUNCTION
231  FillPairsResult (const KeyType& initMinKey,
232  const KeyType& initMaxKey) :
233  minKey_ (initMinKey),
234  maxKey_ (initMaxKey),
235  success_ (true)
236  {
237  static_assert (std::is_arithmetic<KeyType>::value, "FillPairsResult: "
238  "KeyType must be some kind of number type.");
239  }
240 
241  KeyType minKey_;
242  KeyType maxKey_;
243  bool success_;
244 };
245 
275 template<class PairsViewType,
276  class KeysViewType,
277  class CountsViewType,
278  class SizeType = typename KeysViewType::size_type>
279 class FillPairs {
280 public:
281  typedef typename CountsViewType::non_const_type counts_view_type;
282  typedef typename counts_view_type::const_type offsets_view_type;
283 
284  typedef PairsViewType pairs_view_type;
285  typedef typename KeysViewType::const_type keys_view_type;
286  typedef typename offsets_view_type::execution_space execution_space;
287  typedef typename offsets_view_type::memory_space memory_space;
288  typedef SizeType size_type;
289 
290  typedef typename keys_view_type::non_const_value_type key_type;
291  typedef typename pairs_view_type::non_const_value_type pair_type;
292 
294 
295  // mfh 23 May 2015: Having a device_type typedef in the functor
296  // along with an execution_space typedef causes compilation issues.
297  // This is because one of Kokkos' partial specializations picks up
298  // on the device_type typedef, and another picks up on the
299  // execution_space typedef. The former is a legacy of a previous
300  // design iteration of Kokkos, which did not separate memory and
301  // execution spaces.
303 
314  FillPairs (const pairs_view_type& pairs,
315  const counts_view_type& counts,
316  const offsets_view_type& ptr,
317  const keys_view_type& keys,
318  const typename pair_type::second_type startingValue) :
319  pairs_ (pairs),
320  counts_ (counts),
321  ptr_ (ptr),
322  keys_ (keys),
323  size_ (counts.extent (0)),
324  startingValue_ (startingValue),
325  initMinKey_ (::Kokkos::Details::ArithTraits<key_type>::max ()),
326  initMaxKey_ (::Kokkos::Details::ArithTraits<key_type>::is_integer ?
327  ::Kokkos::Details::ArithTraits<key_type>::min () :
328  -::Kokkos::Details::ArithTraits<key_type>::max ())
329  {}
330 
349  FillPairs (const pairs_view_type& pairs,
350  const counts_view_type& counts,
351  const offsets_view_type& ptr,
352  const keys_view_type& keys,
353  const typename pair_type::second_type startingValue,
354  const key_type initMinKey,
355  const key_type initMaxKey) :
356  pairs_ (pairs),
357  counts_ (counts),
358  ptr_ (ptr),
359  keys_ (keys),
360  size_ (counts.extent (0)),
361  startingValue_ (startingValue),
362  initMinKey_ (initMinKey),
363  initMaxKey_ (initMaxKey)
364  {}
365 
367  KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
368  {
369  dst.minKey_ = initMinKey_;
370  dst.maxKey_ = initMaxKey_;
371  dst.success_ = true;
372  }
373 
374  KOKKOS_INLINE_FUNCTION void
375  join (volatile value_type& dst,
376  const volatile value_type& src) const
377  {
378  if (src.maxKey_ > dst.maxKey_) {
379  dst.maxKey_ = src.maxKey_;
380  }
381  if (src.minKey_ < dst.minKey_) {
382  dst.minKey_ = src.minKey_;
383  }
384  dst.success_ = dst.success_ && src.success_;
385  }
386 
391  KOKKOS_INLINE_FUNCTION void
392  operator () (const size_type& i, value_type& dst) const
393  {
394  typedef typename hash_type::result_type hash_value_type;
395  typedef typename offsets_view_type::non_const_value_type offset_type;
396  typedef typename pair_type::second_type val_type;
397  typedef typename std::remove_reference< decltype( counts_[0] ) >::type atomic_incr_type;
398 
399  const key_type key = keys_[i];
400  if (key > dst.maxKey_) {
401  dst.maxKey_ = key;
402  }
403  if (key < dst.minKey_) {
404  dst.minKey_ = key;
405  }
406  const val_type theVal = startingValue_ + static_cast<val_type> (i);
407  const hash_value_type hashVal = hash_type::hashFunc (key, size_);
408 
409  // Return the old count; decrement afterwards.
410  const offset_type count = Kokkos::atomic_fetch_add (&counts_[hashVal], atomic_incr_type(-1));
411  if (count == 0) {
412  dst.success_ = false; // FAILURE!
413  }
414  else {
415  const offset_type curPos = ptr_[hashVal+1] - count;
416 
417  pairs_[curPos].first = key;
418  pairs_[curPos].second = theVal;
419  }
420  }
421 
422 private:
423  pairs_view_type pairs_;
424  counts_view_type counts_;
425  offsets_view_type ptr_;
426  keys_view_type keys_;
427  size_type size_;
428  typename pair_type::second_type startingValue_;
430  key_type initMinKey_;
432  key_type initMaxKey_;
433 };
434 
458 template<class OffsetsViewType,
459  class PairsViewType,
460  class SizeType = typename OffsetsViewType::size_type>
462 public:
463  typedef typename OffsetsViewType::const_type offsets_view_type;
464  typedef typename PairsViewType::const_type pairs_view_type;
465  typedef typename offsets_view_type::execution_space execution_space;
466  typedef typename offsets_view_type::memory_space memory_space;
467  typedef SizeType size_type;
468 
469  // The result of the check is whether the table has one or more duplicates.
470  typedef int value_type;
471 
476  CheckForDuplicateKeys (const pairs_view_type& pairs,
477  const offsets_view_type& ptr) :
478  pairs_ (pairs),
479  ptr_ (ptr),
480  size_ (ptr_.extent (0) == 0 ?
481  size_type (0) :
482  ptr_.extent (0) - 1)
483  {}
484 
486  KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
487  {
488  dst = 0;
489  }
490 
492  KOKKOS_INLINE_FUNCTION void
493  join (volatile value_type& dst,
494  const volatile value_type& src) const
495  {
496  dst = dst + src > 0?1:0;
497  }
498 
500  KOKKOS_INLINE_FUNCTION void
501  operator () (const size_type& i, value_type& dst) const
502  {
503  typedef typename offsets_view_type::non_const_value_type offset_type;
504  typedef typename pairs_view_type::non_const_value_type pair_type;
505  typedef typename pair_type::first_type key_type;
506 
507  if (dst>0) {
508  return; // we've already found duplicate keys elsewhere
509  }
510  else {
511  const offset_type beg = ptr_[i];
512  const offset_type end = ptr_[i+1];
513  bool foundDuplicateKey = false;
514  // This is an ~ n^2 algorithm in the worst case, where n is the
515  // max number of keys that hash to the same bucket. However, if
516  // the hash function is reasonable, n should be much less than
517  // the total number of keys.
518  for (offset_type j = beg + 1; j < end; ++j) {
519  const key_type curKey = pairs_[j].first;
520  for (offset_type k = beg; k < j; ++k) {
521  if (pairs_[k].first == curKey) {
522  foundDuplicateKey = true;
523  break;
524  }
525  }
526  }
527  dst = (dst>0) || foundDuplicateKey?1:0;
528  }
529  }
530 
531 private:
532  pairs_view_type pairs_;
533  offsets_view_type ptr_;
534  size_type size_;
535 };
536 
537 } // namespace FHT
538 
539 //
540 // Here begins the actual implementation of FixedHashTable.
541 //
542 
543 template<class KeyType, class ValueType, class DeviceType>
544 bool
546 hasKeys () const {
547  // This also works if FixedHashTable has no entries. getKey()
548  // works in that case, but always returns the flag (invalid).
549  //
550  // FIXME (31 May 2015) This only works because vals_ contains no
551  // padding. If we ever pad within a "row" of vals_, we'll have to
552  // change this.
553  return keys_.extent (0) == val_.extent (0);
554 }
555 
556 template<class KeyType, class ValueType, class DeviceType>
557 void
559 check () const
560 {
561  // const char prefix[] = "Tpetra::Details::FixedHashTable: ";
562  // const char suffix[] = " Please report this bug to the Tpetra developers.";
563 }
564 
565 template<class KeyType, class ValueType, class DeviceType>
568  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
569  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
570  ::Kokkos::Details::ArithTraits<KeyType>::min () :
571  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
572  minVal_ (::Kokkos::Details::ArithTraits<ValueType>::max ()),
573  maxVal_ (::Kokkos::Details::ArithTraits<ValueType>::is_integer ?
574  ::Kokkos::Details::ArithTraits<ValueType>::min () :
575  -::Kokkos::Details::ArithTraits<ValueType>::max ()),
576  firstContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
577  lastContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
578  ::Kokkos::Details::ArithTraits<KeyType>::min () :
579  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
580  contiguousValues_ (true), // trivially
581  checkedForDuplicateKeys_ (true), // it's an empty table; no need to check
582  hasDuplicateKeys_ (false)
583 {
584 #ifdef HAVE_TPETRA_DEBUG
585  check ();
586 #endif // HAVE_TPETRA_DEBUG
587 }
588 
589 template<class KeyType, class ValueType, class DeviceType>
591 FixedHashTable (const keys_type& keys) :
592  keys_ (keys),
593  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()), // to be set in init()
594  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
595  ::Kokkos::Details::ArithTraits<KeyType>::min () :
596  -::Kokkos::Details::ArithTraits<KeyType>::max ()), // to be set in init()
597  minVal_ (0),
598  maxVal_ (keys.size () == 0 ?
599  static_cast<ValueType> (0) :
600  static_cast<ValueType> (keys.size () - 1)),
601  firstContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
602  lastContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
603  ::Kokkos::Details::ArithTraits<KeyType>::min () :
604  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
605  contiguousValues_ (true),
606  checkedForDuplicateKeys_ (false),
607  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
608 {
609  const ValueType startingValue = static_cast<ValueType> (0);
610  const KeyType initMinKey = this->minKey_;
611  const KeyType initMaxKey = this->maxKey_;
612  this->init (keys, startingValue, initMinKey, initMaxKey,
613  initMinKey, initMinKey, false);
614 
615 #ifdef HAVE_TPETRA_DEBUG
616  check ();
617 #endif // HAVE_TPETRA_DEBUG
618 }
619 
620 template<class KeyType, class ValueType, class DeviceType>
622 FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys,
623  const bool keepKeys) :
624  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()), // to be set in init()
625  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
626  ::Kokkos::Details::ArithTraits<KeyType>::min () :
627  -::Kokkos::Details::ArithTraits<KeyType>::max ()), // to be set in init()
628  minVal_ (0),
629  maxVal_ (keys.size () == 0 ?
630  static_cast<ValueType> (0) :
631  static_cast<ValueType> (keys.size () - 1)),
632  firstContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
633  lastContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
634  ::Kokkos::Details::ArithTraits<KeyType>::min () :
635  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
636  contiguousValues_ (true),
637  checkedForDuplicateKeys_ (false),
638  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
639 {
640  typedef typename keys_type::non_const_type nonconst_keys_type;
641 
642  // mfh 01 May 2015: I don't trust that
643  // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
644  // so I ensure this manually.
645  const ValueType startingValue = static_cast<ValueType> (0);
646  host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
647  keys.size ());
648  using Kokkos::ViewAllocateWithoutInitializing;
649  nonconst_keys_type keys_d (ViewAllocateWithoutInitializing ("FixedHashTable::keys"),
650  keys_k.extent (0));
651  Kokkos::deep_copy (keys_d, keys_k);
652  const KeyType initMinKey = this->minKey_;
653  const KeyType initMaxKey = this->maxKey_;
654  this->init (keys_d, startingValue, initMinKey, initMaxKey,
655  initMinKey, initMinKey, false);
656  if (keepKeys) {
657  keys_ = keys_d;
658 #ifdef HAVE_TPETRA_DEBUG
659  typedef typename keys_type::size_type size_type;
660  TEUCHOS_TEST_FOR_EXCEPTION
661  (keys_.extent (0) != static_cast<size_type> (keys.size ()),
662  std::logic_error, "Tpetra::Details::FixedHashTable constructor: "
663  "keepKeys is true, but on return, keys_.extent(0) = " <<
664  keys_.extent (0) << " != keys.size() = " << keys.size () <<
665  ". Please report this bug to the Tpetra developers.");
666 #endif // HAVE_TPETRA_DEBUG
667  }
668 
669 #ifdef HAVE_TPETRA_DEBUG
670  check ();
671 #endif // HAVE_TPETRA_DEBUG
672 }
673 
674 template<class KeyType, class ValueType, class DeviceType>
676 FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys,
677  const ValueType startingValue,
678  const bool keepKeys) :
679  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
680  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
681  ::Kokkos::Details::ArithTraits<KeyType>::min () :
682  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
683  minVal_ (startingValue),
684  maxVal_ (keys.size () == 0 ?
685  startingValue :
686  static_cast<ValueType> (startingValue + keys.size () - 1)),
687  firstContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
688  lastContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
689  ::Kokkos::Details::ArithTraits<KeyType>::min () :
690  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
691  contiguousValues_ (true),
692  checkedForDuplicateKeys_ (false),
693  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
694 {
695  typedef typename keys_type::non_const_type nonconst_keys_type;
696 
697  // mfh 01 May 2015: I don't trust that
698  // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
699  // so I ensure this manually.
700  host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
701  keys.size ());
702  using Kokkos::ViewAllocateWithoutInitializing;
703  nonconst_keys_type keys_d (ViewAllocateWithoutInitializing ("FixedHashTable::keys"),
704  keys_k.extent (0));
705  Kokkos::deep_copy (keys_d, keys_k);
706 
707  const KeyType initMinKey = ::Kokkos::Details::ArithTraits<KeyType>::max ();
708  // min() for a floating-point type returns the minimum _positive_
709  // normalized value. This is different than for integer types.
710  // lowest() is new in C++11 and returns the least value, always
711  // negative for signed finite types.
712  //
713  // mfh 23 May 2015: I have heard reports that
714  // std::numeric_limits<int>::lowest() does not exist with the Intel
715  // compiler. I'm not sure if the users in question actually enabled
716  // C++11. However, it's easy enough to work around this issue. The
717  // standard floating-point types are signed and have a sign bit, so
718  // lowest() is just -max(). For integer types, we can use min()
719  // instead.
720  const KeyType initMaxKey = ::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
721  ::Kokkos::Details::ArithTraits<KeyType>::min () :
722  -::Kokkos::Details::ArithTraits<KeyType>::max ();
723  this->init (keys_d, startingValue, initMinKey, initMaxKey,
724  initMinKey, initMinKey, false);
725  if (keepKeys) {
726  keys_ = keys_d;
727 #ifdef HAVE_TPETRA_DEBUG
728  typedef typename keys_type::size_type size_type;
729  TEUCHOS_TEST_FOR_EXCEPTION
730  (keys_.extent (0) != static_cast<size_type> (keys.size ()),
731  std::logic_error, "Tpetra::Details::FixedHashTable constructor: "
732  "keepKeys is true, but on return, keys_.extent(0) = " <<
733  keys_.extent (0) << " != keys.size() = " << keys.size () <<
734  ". Please report this bug to the Tpetra developers.");
735 #endif // HAVE_TPETRA_DEBUG
736  }
737 
738 #ifdef HAVE_TPETRA_DEBUG
739  check ();
740 #endif // HAVE_TPETRA_DEBUG
741 }
742 
743 template<class KeyType, class ValueType, class DeviceType>
746  const KeyType firstContigKey,
747  const KeyType lastContigKey,
748  const ValueType startingValue,
749  const bool keepKeys) :
750  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
751  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
752  ::Kokkos::Details::ArithTraits<KeyType>::min () :
753  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
754  minVal_ (startingValue),
755  maxVal_ (keys.size () == 0 ?
756  startingValue :
757  static_cast<ValueType> (startingValue + keys.size () - 1)),
758  firstContigKey_ (firstContigKey),
759  lastContigKey_ (lastContigKey),
760  contiguousValues_ (true),
761  checkedForDuplicateKeys_ (false),
762  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
763 {
764  const KeyType initMinKey = ::Kokkos::Details::ArithTraits<KeyType>::max ();
765  // min() for a floating-point type returns the minimum _positive_
766  // normalized value. This is different than for integer types.
767  // lowest() is new in C++11 and returns the least value, always
768  // negative for signed finite types.
769  //
770  // mfh 23 May 2015: I have heard reports that
771  // std::numeric_limits<int>::lowest() does not exist with the Intel
772  // compiler. I'm not sure if the users in question actually enabled
773  // C++11. However, it's easy enough to work around this issue. The
774  // standard floating-point types are signed and have a sign bit, so
775  // lowest() is just -max(). For integer types, we can use min()
776  // instead.
777  const KeyType initMaxKey = ::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
778  ::Kokkos::Details::ArithTraits<KeyType>::min () :
779  -::Kokkos::Details::ArithTraits<KeyType>::max ();
780  this->init (keys, startingValue, initMinKey, initMaxKey,
781  firstContigKey, lastContigKey, true);
782  if (keepKeys) {
783  keys_ = keys;
784 #ifdef HAVE_TPETRA_DEBUG
785  typedef typename keys_type::size_type size_type;
786  TEUCHOS_TEST_FOR_EXCEPTION
787  (keys_.extent (0) != static_cast<size_type> (keys.size ()),
788  std::logic_error, "Tpetra::Details::FixedHashTable constructor: "
789  "keepKeys is true, but on return, keys_.extent(0) = " <<
790  keys_.extent (0) << " != keys.size() = " << keys.size () <<
791  ". Please report this bug to the Tpetra developers.");
792 #endif // HAVE_TPETRA_DEBUG
793  }
794 
795 #ifdef HAVE_TPETRA_DEBUG
796  check ();
797 #endif // HAVE_TPETRA_DEBUG
798 }
799 
800 template<class KeyType, class ValueType, class DeviceType>
802 FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys,
803  const KeyType firstContigKey,
804  const KeyType lastContigKey,
805  const ValueType startingValue,
806  const bool keepKeys) :
807  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
808  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
809  ::Kokkos::Details::ArithTraits<KeyType>::min () :
810  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
811  minVal_ (startingValue),
812  maxVal_ (keys.size () == 0 ?
813  startingValue :
814  static_cast<ValueType> (startingValue + keys.size () - 1)),
815  firstContigKey_ (firstContigKey),
816  lastContigKey_ (lastContigKey),
817  contiguousValues_ (true),
818  checkedForDuplicateKeys_ (false),
819  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
820 {
821  typedef typename keys_type::non_const_type nonconst_keys_type;
822 
823  // mfh 01 May 2015: I don't trust that
824  // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
825  // so I ensure this manually.
826  host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
827  keys.size ());
828  using Kokkos::ViewAllocateWithoutInitializing;
829  nonconst_keys_type keys_d (ViewAllocateWithoutInitializing ("FixedHashTable::keys"),
830  keys_k.extent (0));
831  Kokkos::deep_copy (keys_d, keys_k);
832 
833  const KeyType initMinKey = ::Kokkos::Details::ArithTraits<KeyType>::max ();
834  // min() for a floating-point type returns the minimum _positive_
835  // normalized value. This is different than for integer types.
836  // lowest() is new in C++11 and returns the least value, always
837  // negative for signed finite types.
838  //
839  // mfh 23 May 2015: I have heard reports that
840  // std::numeric_limits<int>::lowest() does not exist with the Intel
841  // compiler. I'm not sure if the users in question actually enabled
842  // C++11. However, it's easy enough to work around this issue. The
843  // standard floating-point types are signed and have a sign bit, so
844  // lowest() is just -max(). For integer types, we can use min()
845  // instead.
846  const KeyType initMaxKey = ::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
847  ::Kokkos::Details::ArithTraits<KeyType>::min () :
848  -::Kokkos::Details::ArithTraits<KeyType>::max ();
849  this->init (keys_d, startingValue, initMinKey, initMaxKey,
850  firstContigKey, lastContigKey, true);
851  if (keepKeys) {
852  keys_ = keys_d;
853 #ifdef HAVE_TPETRA_DEBUG
854  typedef typename keys_type::size_type size_type;
855  TEUCHOS_TEST_FOR_EXCEPTION
856  (keys_.extent (0) != static_cast<size_type> (keys.size ()),
857  std::logic_error, "Tpetra::Details::FixedHashTable constructor: "
858  "keepKeys is true, but on return, keys_.extent(0) = " <<
859  keys_.extent (0) << " != keys.size() = " << keys.size () <<
860  ". Please report this bug to the Tpetra developers.");
861 #endif // HAVE_TPETRA_DEBUG
862  }
863 
864 #ifdef HAVE_TPETRA_DEBUG
865  check ();
866 #endif // HAVE_TPETRA_DEBUG
867 }
868 
869 template<class KeyType, class ValueType, class DeviceType>
872  const ValueType startingValue) :
873  keys_ (keys),
874  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
875  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
876  ::Kokkos::Details::ArithTraits<KeyType>::min () :
877  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
878  minVal_ (startingValue),
879  maxVal_ (keys.size () == 0 ?
880  startingValue :
881  static_cast<ValueType> (startingValue + keys.size () - 1)),
882  firstContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
883  lastContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
884  ::Kokkos::Details::ArithTraits<KeyType>::min () :
885  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
886  contiguousValues_ (true),
887  checkedForDuplicateKeys_ (false),
888  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
889 {
890  const KeyType initMinKey = ::Kokkos::Details::ArithTraits<KeyType>::max ();
891  // min() for a floating-point type returns the minimum _positive_
892  // normalized value. This is different than for integer types.
893  // lowest() is new in C++11 and returns the least value, always
894  // negative for signed finite types.
895  //
896  // mfh 23 May 2015: I have heard reports that
897  // std::numeric_limits<int>::lowest() does not exist with the Intel
898  // compiler. I'm not sure if the users in question actually enabled
899  // C++11. However, it's easy enough to work around this issue. The
900  // standard floating-point types are signed and have a sign bit, so
901  // lowest() is just -max(). For integer types, we can use min()
902  // instead.
903  const KeyType initMaxKey = ::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
904  ::Kokkos::Details::ArithTraits<KeyType>::min () :
905  -::Kokkos::Details::ArithTraits<KeyType>::max ();
906  this->init (keys, startingValue, initMinKey, initMaxKey,
907  initMinKey, initMinKey, false);
908 
909 #ifdef HAVE_TPETRA_DEBUG
910  check ();
911 #endif // HAVE_TPETRA_DEBUG
912 }
913 
914 template<class KeyType, class ValueType, class DeviceType>
916 FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys,
917  const Teuchos::ArrayView<const ValueType>& vals) :
918  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
919  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
920  ::Kokkos::Details::ArithTraits<KeyType>::min () :
921  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
922  minVal_ (::Kokkos::Details::ArithTraits<ValueType>::max ()),
923  maxVal_ (::Kokkos::Details::ArithTraits<ValueType>::is_integer ?
924  ::Kokkos::Details::ArithTraits<ValueType>::min () :
925  -::Kokkos::Details::ArithTraits<ValueType>::max ()),
926  firstContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
927  lastContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
928  ::Kokkos::Details::ArithTraits<KeyType>::min () :
929  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
930  contiguousValues_ (false),
931  checkedForDuplicateKeys_ (false),
932  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
933 {
934  // mfh 01 May 2015: I don't trust that
935  // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
936  // so I ensure this manually.
937  host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
938  keys.size ());
939  host_input_vals_type vals_k (vals.size () == 0 ? NULL : vals.getRawPtr (),
940  vals.size ());
941  const KeyType initMinKey = ::Kokkos::Details::ArithTraits<KeyType>::max ();
942  // min() for a floating-point type returns the minimum _positive_
943  // normalized value. This is different than for integer types.
944  // lowest() is new in C++11 and returns the least value, always
945  // negative for signed finite types.
946  //
947  // mfh 23 May 2015: I have heard reports that
948  // std::numeric_limits<int>::lowest() does not exist with the Intel
949  // compiler. I'm not sure if the users in question actually enabled
950  // C++11. However, it's easy enough to work around this issue. The
951  // standard floating-point types are signed and have a sign bit, so
952  // lowest() is just -max(). For integer types, we can use min()
953  // instead.
954  const KeyType initMaxKey = ::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
955  ::Kokkos::Details::ArithTraits<KeyType>::min () :
956  -::Kokkos::Details::ArithTraits<KeyType>::max ();
957  this->init (keys_k, vals_k, initMinKey, initMaxKey);
958 
959 #ifdef HAVE_TPETRA_DEBUG
960  check ();
961 #endif // HAVE_TPETRA_DEBUG
962 }
963 
964 template<class KeyType, class ValueType, class DeviceType>
965 void
967 init (const keys_type& keys,
968  ValueType startingValue,
969  KeyType initMinKey,
970  KeyType initMaxKey,
971  KeyType firstContigKey,
972  KeyType lastContigKey,
973  const bool computeInitContigKeys)
974 {
975  using Kokkos::subview;
976  using Kokkos::ViewAllocateWithoutInitializing;
977  using Teuchos::TypeNameTraits;
978  typedef typename std::decay<decltype (keys.extent (0)) >::type size_type;
979  const char prefix[] = "Tpetra::Details::FixedHashTable: ";
980 
981  const offset_type numKeys = static_cast<offset_type> (keys.extent (0));
982  {
983  const offset_type theMaxVal = ::Kokkos::Details::ArithTraits<offset_type>::max ();
984  const size_type maxValST = static_cast<size_type> (theMaxVal);
985  TEUCHOS_TEST_FOR_EXCEPTION
986  (keys.extent (0) > maxValST, std::invalid_argument, prefix << "The "
987  "number of keys " << keys.extent (0) << " does not fit in "
988  "offset_type = " << TypeNameTraits<offset_type>::name () << ", whose "
989  "max value is " << theMaxVal << ". This means that it is not possible to "
990  "use this constructor.");
991  }
992  TEUCHOS_TEST_FOR_EXCEPTION
993  (static_cast<unsigned long long> (numKeys) >
994  static_cast<unsigned long long> (::Kokkos::Details::ArithTraits<ValueType>::max ()),
995  std::invalid_argument, "Tpetra::Details::FixedHashTable: The number of "
996  "keys " << numKeys << " is greater than the maximum representable "
997  "ValueType value " << ::Kokkos::Details::ArithTraits<ValueType>::max () << ". "
998  "This means that it is not possible to use this constructor.");
999  TEUCHOS_TEST_FOR_EXCEPTION
1000  (numKeys > static_cast<offset_type> (INT_MAX), std::logic_error, prefix <<
1001  "This class currently only works when the number of keys is <= INT_MAX = "
1002  << INT_MAX << ". If this is a problem for you, please talk to the Tpetra "
1003  "developers.");
1004 
1005  const bool buildInParallel =
1006  FHT::worthBuildingFixedHashTableInParallel<execution_space> ();
1007  const bool debug = ::Tpetra::Details::Behavior::debug ();
1008 
1009  // NOTE (mfh 14 May 2015) This method currently assumes UVM. We
1010  // could change that by setting up ptr and val as Kokkos::DualView
1011  // instances. If we do that, since we are filling on host for now,
1012  // we want to make sure that we only zero-fill ptr on host
1013  // initially, and that we don't fill val at all. Once we finish
1014  // Kokkos-izing all the set-up kernels, we won't need DualView for
1015  // either ptr or val.
1016 
1017  if (computeInitContigKeys) {
1018  // Find the first and last initial contiguous keys. If we find a
1019  // long sequence of initial contiguous keys, we can save space by
1020  // not storing them explicitly as pairs in the hash table.
1021  //
1022  // NOTE (mfh 01 Jun 2015) Doing this in parallel requires a scan
1023  // ("min index such that the difference between the current key and
1024  // the next != 1"), which takes multiple passes over the data. We
1025  // could fuse it with CountBuckets (only update counts on 'final'
1026  // pass). However, we're really just moving this sequential search
1027  // out of Map's constructor here, so there is no loss in doing it
1028  // sequentially for now. Later, we can work on parallelization.
1029  //
1030  // NOTE (mfh 01 Jun 2015, 28 Mar 2016) The code below assumes UVM.
1031  // It is rational to assume UVM here, because this is "sparse"
1032  // access -- we might not need to look at all the entries of keys,
1033  // so it doesn't necessarily make sense to copy the whole thing
1034  // back to host.
1035  if (numKeys > 0) {
1036  firstContigKey_ = keys[0];
1037  // Start with one plus, then decrement at the end. That lets us do
1038  // only one addition per loop iteration, rather than two (if we test
1039  // against lastContigKey + 1 and then increment lastContigKey).
1040  lastContigKey_ = firstContigKey_ + 1;
1041 
1042  // We will only store keys in the table that are not part of the
1043  // initial contiguous sequence. It's possible for the initial
1044  // contiguous sequence to be trivial, which for a nonzero number of
1045  // keys means that the "sequence" has length 1.
1046  for (offset_type k = 1; k < numKeys; ++k) {
1047  if (lastContigKey_ != keys[k]) {
1048  break;
1049  }
1050  ++lastContigKey_;
1051  }
1052  --lastContigKey_;
1053  }
1054  }
1055  else {
1056  firstContigKey_ = firstContigKey;
1057  lastContigKey_ = lastContigKey;
1058  }
1059 
1060  offset_type startIndex;
1061  if (numKeys > 0) {
1062  initMinKey = std::min (initMinKey, firstContigKey_);
1063  initMaxKey = std::max (initMaxKey, lastContigKey_);
1064  startIndex = static_cast<offset_type> (lastContigKey_ - firstContigKey_);
1065  } else {
1066  startIndex = 0;
1067  }
1068 
1069  const offset_type theNumKeys = numKeys - startIndex;
1070  const offset_type size = hash_type::getRecommendedSize (theNumKeys);
1071 #ifdef HAVE_TPETRA_DEBUG
1072  TEUCHOS_TEST_FOR_EXCEPTION(
1073  size == 0 && numKeys != 0, std::logic_error,
1074  "Tpetra::Details::FixedHashTable constructor: "
1075  "getRecommendedSize(" << numKeys << ") returned zero, "
1076  "even though the number of keys " << numKeys << " is nonzero. "
1077  "Please report this bug to the Tpetra developers.");
1078 #endif // HAVE_TPETRA_DEBUG
1079  keys_type theKeys =
1080  subview (keys, std::pair<offset_type, offset_type> (startIndex, numKeys));
1081 
1082  // FIXME (mfh 28 Mar 2016) For some reason, we don't seem to need a
1083  // fence here, but we do before other allocations.
1084 
1085  // The array of counts must be separate from the array of offsets,
1086  // in order for parallel_scan to work correctly.
1087  typedef typename ptr_type::non_const_type counts_type;
1088  counts_type counts ("FixedHashTable::counts", size);
1089 
1090  //
1091  // Count the number of "buckets" per offsets array (ptr) entry.
1092  //
1093 
1094  // The Kokkos kernel uses atomic update instructions to count the
1095  // number of "buckets" per offsets array (ptr) entry. Atomic
1096  // updates incur overhead, even in the sequential case. The Kokkos
1097  // kernel is still correct in that case, but I would rather not
1098  // incur overhead then.
1099  if (buildInParallel) {
1100  FHT::CountBuckets<counts_type, keys_type> functor (counts, theKeys, size);
1101  using execution_space = typename counts_type::execution_space;
1102  using range_type = Kokkos::RangePolicy<execution_space, offset_type>;
1103  const char kernelLabel[] = "Tpetra::Details::FixedHashTable CountBuckets";
1104  if (debug) {
1105  using key_type = typename keys_type::non_const_value_type;
1106  Kokkos::pair<int, key_type> err;
1107  Kokkos::parallel_reduce (kernelLabel, range_type (0, theNumKeys),
1108  functor, err);
1109  TEUCHOS_TEST_FOR_EXCEPTION
1110  (err.first != 0, std::logic_error, "Tpetra::Details::FixedHashTable "
1111  "constructor: CountBuckets found a key " << err.second << " that "
1112  "results in an out-of-bounds hash value.");
1113  }
1114  else {
1115  Kokkos::parallel_for (kernelLabel, range_type (0, theNumKeys), functor);
1116  }
1117  }
1118  else {
1119  // Access to counts is not necessarily contiguous, but is
1120  // irregular and likely touches all pages of the array. Thus, it
1121  // probably makes sense to use a host copy explicitly, rather than
1122  // assume UVM.
1123  auto countsHost = Kokkos::create_mirror_view (counts);
1124  // FIXME (mfh 28 Mar 2016) Does create_mirror_view zero-fill?
1125  Kokkos::deep_copy (countsHost, static_cast<offset_type> (0));
1126 
1127  for (offset_type k = 0; k < theNumKeys; ++k) {
1128  using key_type = typename keys_type::non_const_value_type;
1129  const key_type key = theKeys[k];
1130 
1131  using hash_value_type = typename hash_type::result_type;
1132  const hash_value_type hashVal = hash_type::hashFunc (key, size);
1133  TEUCHOS_TEST_FOR_EXCEPTION
1134  (hashVal < hash_value_type (0) ||
1135  hashVal >= hash_value_type (countsHost.extent (0)),
1136  std::logic_error, "Tpetra::Details::FixedHashTable "
1137  "constructor: Sequential CountBuckets found a key " << key
1138  << " that results in an out-of-bounds hash value.");
1139 
1140  ++countsHost[hashVal];
1141  }
1142  Kokkos::deep_copy (counts, countsHost);
1143  }
1144 
1145  // KJ: This fence is not required for the 2-argument deep_copy which calls
1146  // fence, but will be required if switched to the 3-argumemt deep_copy which
1147  // passes a space. The 3-argument form does not fence.
1148  execution_space().fence ();
1149 
1150  // Kokkos::View fills with zeros by default.
1151  typename ptr_type::non_const_type ptr ("FixedHashTable::ptr", size+1);
1152 
1153  // Compute row offsets via prefix sum:
1154  //
1155  // ptr[i+1] = \sum_{j=0}^{i} counts[j].
1156  //
1157  // Thus, ptr[i+1] - ptr[i] = counts[i], so that ptr[i+1] = ptr[i] +
1158  // counts[i]. If we stored counts[i] in ptr[i+1] on input, then the
1159  // formula is ptr[i+1] += ptr[i].
1160  //
1161  // parallel_scan does not incur overhead with Kokkos::Serial, but
1162  // with actual parallel execution spaces, it does require multiple
1163  // passes over the data. Thus, it still makes sense to have a
1164  // sequential fall-back.
1165 
1167  if (buildInParallel) {
1168  computeOffsetsFromCounts (ptr, counts);
1169  }
1170 
1171  if (! buildInParallel || debug) {
1172  Kokkos::HostSpace hostMemSpace;
1173  auto counts_h = Kokkos::create_mirror_view (hostMemSpace, counts);
1174  Kokkos::deep_copy (counts_h, counts);
1175  auto ptr_h = Kokkos::create_mirror_view (hostMemSpace, ptr);
1176 
1177 #ifdef KOKKOS_ENABLE_SERIAL
1178  Kokkos::Serial hostExecSpace;
1179 #else
1180  Kokkos::DefaultHostExecutionSpace hostExecSpace;
1181 #endif // KOKKOS_ENABLE_SERIAL
1182 
1183  computeOffsetsFromCounts (hostExecSpace, ptr_h, counts_h);
1184  Kokkos::deep_copy (ptr, ptr_h);
1185 
1186  if (debug) {
1187  bool bad = false;
1188  for (offset_type i = 0; i < size; ++i) {
1189  if (ptr_h[i+1] != ptr_h[i] + counts_h[i]) {
1190  bad = true;
1191  }
1192  }
1193  TEUCHOS_TEST_FOR_EXCEPTION
1194  (bad, std::logic_error, "Tpetra::Details::FixedHashTable "
1195  "constructor: computeOffsetsFromCounts gave an incorrect "
1196  "result.");
1197  }
1198  }
1199 
1200  // KJ: computeOffsetsFromCounts calls parallel_scan which does not fence.
1201  // This fence is necessary as we need to make sure that the offset view
1202  // completes before the view is used in the next functor.
1203  execution_space().fence ();
1204 
1205  // Allocate the array of (key,value) pairs. Don't fill it with
1206  // zeros, because we will fill it with actual data below.
1207  using Kokkos::ViewAllocateWithoutInitializing;
1208  typedef typename val_type::non_const_type nonconst_val_type;
1209  nonconst_val_type val (ViewAllocateWithoutInitializing ("FixedHashTable::pairs"),
1210  theNumKeys);
1211 
1212  // Fill in the hash table's "values" (the (key,value) pairs).
1213  typedef FHT::FillPairs<typename val_type::non_const_type, keys_type,
1214  typename ptr_type::non_const_type> functor_type;
1215  typename functor_type::value_type result (initMinKey, initMaxKey);
1216 
1217  const ValueType newStartingValue = startingValue + static_cast<ValueType> (startIndex);
1218  if (buildInParallel) {
1219  functor_type functor (val, counts, ptr, theKeys, newStartingValue,
1220  initMinKey, initMaxKey);
1221  typedef Kokkos::RangePolicy<execution_space, offset_type> range_type;
1222  Kokkos::parallel_reduce (range_type (0, theNumKeys), functor, result);
1223  }
1224  else {
1225  for (offset_type k = 0; k < theNumKeys; ++k) {
1226  typedef typename hash_type::result_type hash_value_type;
1227  const KeyType key = theKeys[k];
1228  if (key > result.maxKey_) {
1229  result.maxKey_ = key;
1230  }
1231  if (key < result.minKey_) {
1232  result.minKey_ = key;
1233  }
1234  const ValueType theVal = newStartingValue + static_cast<ValueType> (k);
1235  const hash_value_type hashVal = hash_type::hashFunc (key, size);
1236 
1237  // Return the old count; decrement afterwards.
1238  //
1239  // NOTE (mfh 28 Mar 2016) This assumes UVM. It might make more
1240  // sense to use a host mirror here, due to the access pattern.
1241  const offset_type count = counts[hashVal];
1242  --counts[hashVal];
1243  if (count == 0) {
1244  result.success_ = false; // FAILURE!
1245  break;
1246  }
1247  else {
1248  // NOTE (mfh 28 Mar 2016) This assumes UVM.
1249  const offset_type curPos = ptr[hashVal+1] - count;
1250 
1251  // NOTE (mfh 28 Mar 2016) This assumes UVM.
1252  val[curPos].first = key;
1253  val[curPos].second = theVal;
1254  }
1255  }
1256  }
1257 
1258  // FIXME (mfh 01 Jun 2015) Temporarily commented out because of
1259  // reports of exceptions being thrown in Albany.
1260  //
1261  // TEUCHOS_TEST_FOR_EXCEPTION
1262  // (! result.success_, std::logic_error, "Tpetra::Details::FixedHashTable::"
1263  // "init: Filling the hash table failed! Please report this bug to the "
1264  // "Tpetra developers.");
1265  (void) result; // prevent build warning (set but unused variable)
1266 
1267  // "Commit" the computed arrays and other computed quantities.
1268  ptr_ = ptr;
1269  val_ = val;
1270  minKey_ = result.minKey_;
1271  maxKey_ = result.maxKey_;
1272  // We've already set firstContigKey_ and lastContigKey_ above.
1273 }
1274 
1275 
1276 template<class KeyType, class ValueType, class DeviceType>
1277 void
1278 FixedHashTable<KeyType, ValueType, DeviceType>::
1279 init (const host_input_keys_type& keys,
1280  const host_input_vals_type& vals,
1281  KeyType initMinKey,
1282  KeyType initMaxKey)
1283 {
1284  const offset_type numKeys = static_cast<offset_type> (keys.extent (0));
1285  TEUCHOS_TEST_FOR_EXCEPTION
1286  (static_cast<unsigned long long> (numKeys) > static_cast<unsigned long long> (::Kokkos::Details::ArithTraits<ValueType>::max ()),
1287  std::invalid_argument, "Tpetra::Details::FixedHashTable: The number of "
1288  "keys " << numKeys << " is greater than the maximum representable "
1289  "ValueType value " << ::Kokkos::Details::ArithTraits<ValueType>::max () << ".");
1290  TEUCHOS_TEST_FOR_EXCEPTION
1291  (numKeys > static_cast<offset_type> (INT_MAX), std::logic_error, "Tpetra::"
1292  "Details::FixedHashTable: This class currently only works when the number "
1293  "of keys is <= INT_MAX = " << INT_MAX << ". If this is a problem for you"
1294  ", please talk to the Tpetra developers.");
1295 
1296  // There's no need to find the first and last initial contiguous
1297  // keys in this case, because if we reach this init() function, then
1298  // hasContiguousValues() is false and so get() doesn't use the
1299  // initial contiguous sequence of keys.
1300 
1301  const offset_type size = hash_type::getRecommendedSize (numKeys);
1302 #ifdef HAVE_TPETRA_DEBUG
1303  TEUCHOS_TEST_FOR_EXCEPTION(
1304  size == 0 && numKeys != 0, std::logic_error,
1305  "Tpetra::Details::FixedHashTable constructor: "
1306  "getRecommendedSize(" << numKeys << ") returned zero, "
1307  "even though the number of keys " << numKeys << " is nonzero. "
1308  "Please report this bug to the Tpetra developers.");
1309 #endif // HAVE_TPETRA_DEBUG
1310 
1311  // NOTE (mfh 14 May 2015) This method currently assumes UVM. We
1312  // could change that by setting up ptr and val as Kokkos::DualView
1313  // instances. If we do that, since we are filling on host for now,
1314  // we want to make sure that we only zero-fill ptr on host
1315  // initially, and that we don't fill val at all. Once we finish
1316  // Kokkos-izing all the set-up kernels, we won't need DualView for
1317  // either ptr or val.
1318 
1319  typename ptr_type::non_const_type ptr ("FixedHashTable::ptr", size + 1);
1320 
1321  // Allocate the array of key,value pairs. Don't waste time filling
1322  // it with zeros, because we will fill it with actual data below.
1323  using Kokkos::ViewAllocateWithoutInitializing;
1324  typedef typename val_type::non_const_type nonconst_val_type;
1325  nonconst_val_type val (ViewAllocateWithoutInitializing ("FixedHashTable::pairs"),
1326  numKeys);
1327 
1328  // Compute number of entries in each hash table position.
1329  for (offset_type k = 0; k < numKeys; ++k) {
1330  const typename hash_type::result_type hashVal =
1331  hash_type::hashFunc (keys[k], size);
1332  // Shift over one, so that counts[j] = ptr[j+1]. See below.
1333  ++ptr[hashVal+1];
1334  }
1335 
1336  // Compute row offsets via prefix sum:
1337  //
1338  // ptr[i+1] = \sum_{j=0}^{i} counts[j].
1339  //
1340  // Thus, ptr[i+1] - ptr[i] = counts[i], so that ptr[i+1] = ptr[i] +
1341  // counts[i]. If we stored counts[i] in ptr[i+1] on input, then the
1342  // formula is ptr[i+1] += ptr[i].
1343  for (offset_type i = 0; i < size; ++i) {
1344  ptr[i+1] += ptr[i];
1345  }
1346  //ptr[0] = 0; // We've already done this when initializing ptr above.
1347 
1348  // curRowStart[i] is the offset of the next element in row i.
1349  typename ptr_type::non_const_type curRowStart ("FixedHashTable::curRowStart", size);
1350 
1351  // Fill in the hash table.
1352  FHT::FillPairsResult<KeyType> result (initMinKey, initMaxKey);
1353  for (offset_type k = 0; k < numKeys; ++k) {
1354  typedef typename hash_type::result_type hash_value_type;
1355  const KeyType key = keys[k];
1356  if (key > result.maxKey_) {
1357  result.maxKey_ = key;
1358  }
1359  if (key < result.minKey_) {
1360  result.minKey_ = key;
1361  }
1362  const ValueType theVal = vals[k];
1363  if (theVal > maxVal_) {
1364  maxVal_ = theVal;
1365  }
1366  if (theVal < minVal_) {
1367  minVal_ = theVal;
1368  }
1369  const hash_value_type hashVal = hash_type::hashFunc (key, size);
1370 
1371  const offset_type offset = curRowStart[hashVal];
1372  const offset_type curPos = ptr[hashVal] + offset;
1373  if (curPos >= ptr[hashVal+1]) {
1374  result.success_ = false; // FAILURE!
1375  }
1376  else {
1377  val[curPos].first = key;
1378  val[curPos].second = theVal;
1379  ++curRowStart[hashVal];
1380  }
1381  }
1382 
1383  TEUCHOS_TEST_FOR_EXCEPTION
1384  (! result.success_, std::logic_error, "Tpetra::Details::FixedHashTable::"
1385  "init: Filling the hash table failed! Please report this bug to the "
1386  "Tpetra developers.");
1387 
1388  // "Commit" the computed arrays and other computed quantities.
1389  ptr_ = ptr;
1390  val_ = val;
1391  minKey_ = result.minKey_;
1392  maxKey_ = result.maxKey_;
1393  // We've already assigned to minVal_ and maxVal_ above.
1394 }
1395 
1396 template <class KeyType, class ValueType, class DeviceType>
1397 bool
1400 {
1401  if (! checkedForDuplicateKeys_) {
1402  hasDuplicateKeys_ = checkForDuplicateKeys ();
1403  checkedForDuplicateKeys_ = true;
1404  }
1405  return hasDuplicateKeys_;
1406 }
1407 
1408 template <class KeyType, class ValueType, class DeviceType>
1409 bool
1411 checkForDuplicateKeys () const
1412 {
1413  const offset_type size = this->getSize ();
1414  // It's allowed for the hash table to have a positive number of
1415  // buckets (getSize()), but still store no entries (numPairs()).
1416  // Both cases trivially entail no duplicates.
1417  if (size == 0 || this->numPairs () == 0) {
1418  return false;
1419  }
1420  else {
1421  typedef FHT::CheckForDuplicateKeys<ptr_type, val_type> functor_type;
1422  functor_type functor (val_, ptr_);
1423  int hasDupKeys = 0;
1424  typedef Kokkos::RangePolicy<execution_space, offset_type> range_type;
1425  Kokkos::parallel_reduce (range_type (0, size), functor, hasDupKeys);
1426  return hasDupKeys > 0;
1427  }
1428 }
1429 
1430 template <class KeyType, class ValueType, class DeviceType>
1431 std::string
1433 description () const
1434 {
1435  std::ostringstream oss;
1436  oss << "FixedHashTable<"
1437  << Teuchos::TypeNameTraits<KeyType>::name () << ","
1438  << Teuchos::TypeNameTraits<ValueType>::name () << ">: "
1439  << "{ numKeys: " << val_.extent (0)
1440  << ", tableSize: " << this->getSize () << " }";
1441  return oss.str();
1442 }
1443 
1444 template <class KeyType, class ValueType, class DeviceType>
1445 void
1447 describe (Teuchos::FancyOStream& out,
1448  const Teuchos::EVerbosityLevel verbLevel) const
1449 {
1450  using std::endl;
1451  using std::setw;
1452  using Teuchos::OSTab;
1453  using Teuchos::rcpFromRef;
1454  using Teuchos::TypeNameTraits;
1455  using Teuchos::VERB_DEFAULT;
1456  using Teuchos::VERB_NONE;
1457  using Teuchos::VERB_LOW;
1458  using Teuchos::VERB_EXTREME;
1459 
1460  // NOTE (mfh 14 May 2015) This method currently assumes UVM for
1461  // access to ptr_ and val_ from the host.
1462 
1463  Teuchos::EVerbosityLevel vl = verbLevel;
1464  if (vl == VERB_DEFAULT) vl = VERB_LOW;
1465 
1466  if (vl == VERB_NONE) {
1467  // do nothing
1468  }
1469  else if (vl == VERB_LOW) {
1470  out << this->description() << endl;
1471  }
1472  else { // MEDIUM, HIGH or EXTREME
1473  out << "FixedHashTable:" << endl;
1474  {
1475  OSTab tab1 (rcpFromRef (out));
1476 
1477  // const std::string label = this->getObjectLabel ();
1478  // if (label != "") {
1479  // out << "label: " << label << endl;
1480  // }
1481  out << "Template parameters:" << endl;
1482  {
1483  OSTab tab2 (rcpFromRef (out));
1484  out << "KeyType: " << TypeNameTraits<KeyType>::name () << endl
1485  << "ValueType: " << TypeNameTraits<ValueType>::name () << endl;
1486  }
1487 
1488  const offset_type tableSize = this->getSize ();
1489  const offset_type numKeys = val_.extent (0);
1490 
1491  out << "Table parameters:" << endl;
1492  {
1493  OSTab tab2 (rcpFromRef (out));
1494  out << "numKeys: " << numKeys << endl
1495  << "tableSize: " << tableSize << endl;
1496  }
1497 
1498  if (vl >= VERB_EXTREME) {
1499  out << "Contents: ";
1500  if (tableSize == 0 || numKeys == 0) {
1501  out << "[]" << endl;
1502  } else {
1503  out << "[ " << endl;
1504  {
1505  OSTab tab2 (rcpFromRef (out));
1506  for (offset_type i = 0; i < tableSize; ++i) {
1507  OSTab tab3 (rcpFromRef (out));
1508  out << "[";
1509  for (offset_type k = ptr_[i]; k < ptr_[i+1]; ++k) {
1510  out << "(" << val_[k].first << "," << val_[k].second << ")";
1511  if (k + 1 < ptr_[i+1]) {
1512  out << ", ";
1513  }
1514  }
1515  out << "]" << endl;
1516  } // for each table position i
1517  }
1518  out << "]" << endl;
1519  } // The table contains entries
1520  } // vl >= VERB_EXTREME
1521  }
1522  out << endl;
1523  } // if vl > VERB_LOW
1524 }
1525 
1526 } // namespace Details
1527 } // namespace Tpetra
1528 
1529 // Macro that explicitly instantiates FixedHashTable for the given local
1530 // ordinal (LO) and global ordinal (GO) types. Note that FixedHashTable's
1531 // template parameters occur in the opposite order of most Tpetra
1532 // classes. This is because FixedHashTable performs global-to-local
1533 // lookup, and the convention in templated C++ lookup tables (such as
1534 // std::map) is <KeyType, ValueType>.
1535 //
1536 // This macro must be explanded within the Tpetra::Details namespace.
1537 #define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT_DEFAULTNODE(LO,GO) \
1538  template class FixedHashTable< GO , LO >;
1539 
1540 // Macro that explicitly instantiates FixedHashTable for the given
1541 // local ordinal (LO), global ordinal (GO), and Kokkos device (DEVICE)
1542 // types. Note that FixedHashTable's first two template parameters
1543 // occur in the opposite order of most Tpetra classes. This is
1544 // because FixedHashTable performs global-to-local lookup, and the
1545 // convention in templated C++ lookup tables (such as std::map) is
1546 // <KeyType, ValueType>.
1547 //
1548 // This macro must be explanded within the Tpetra::Details namespace.
1549 #define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT(LO, GO, DEVICE) \
1550  template class FixedHashTable< GO , LO , DEVICE >;
1551 
1552 #endif // TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
CheckForDuplicateKeys(const pairs_view_type &pairs, const offsets_view_type &ptr)
Constructor.
Parallel reduce functor for filling the FixedHashTable, and computing the min and max keys...
FillPairs(const pairs_view_type &pairs, const counts_view_type &counts, const offsets_view_type &ptr, const keys_view_type &keys, const typename pair_type::second_type startingValue, const key_type initMinKey, const key_type initMaxKey)
Constructor that takes initial min and max key values.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
KOKKOS_INLINE_FUNCTION void join(volatile value_type &dst, const volatile value_type &src) const
Combine two intermediate reduction results.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i) const
Do this for every entry of keys_.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print this object with the given verbosity to the output stream.
ResultType result_type
Type of the return value of the hash function.
Parallel for functor for counting &quot;buckets&quot; in the FixedHashTable.
static bool debug()
Whether Tpetra is in debug mode.
bool hasKeys() const
Whether it is safe to call getKey().
bool success_
Whether fill succeeded (it can only fail on a bug)
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
The hash function for FixedHashTable.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
std::string description() const
Implementation of Teuchos::Describable interface.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i, value_type &dst) const
Parallel loop body; do this for every entry of keys_.
FixedHashTable()
Default constructor; makes an empty table.
FillPairs(const pairs_view_type &pairs, const counts_view_type &counts, const offsets_view_type &ptr, const keys_view_type &keys, const typename pair_type::second_type startingValue)
Constructor.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
bool hasDuplicateKeys()
Whether the table has any duplicate keys.
static KOKKOS_INLINE_FUNCTION result_type hashFunc(const argument_type &key, const offset_type &size)
The hash function.
CountBuckets(const counts_view_type &counts, const keys_view_type &keys, const size_type size)
Constructor.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
Functor for checking whether a FixedHashTable has one or more duplicate entries.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
Reduction result for FillPairs functor below.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i, value_type &dst) const
Parallel loop body.
Kokkos::View< const KeyType *, Kokkos::LayoutLeft, device_type > keys_type
Type of a 1-D Kokkos::View (array) used to store keys.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra&#39;s behavior.