Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Tpetra_Details_FixedHashTable_def.hpp
1 /*
2 // @HEADER
3 // ***********************************************************************
4 //
5 // Tpetra: Templated Linear Algebra Services Package
6 // Copyright (2008) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ************************************************************************
41 // @HEADER
42 */
43 
44 #ifndef TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
45 #define TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
46 
47 #include "Tpetra_Details_FixedHashTable_decl.hpp"
49 #ifdef TPETRA_USE_MURMUR_HASH
50 # include "Kokkos_Functional.hpp" // hash function used by Kokkos::UnorderedMap
51 #endif // TPETRA_USE_MURMUR_HASH
52 #include "Kokkos_ArithTraits.hpp"
53 #include "Teuchos_TypeNameTraits.hpp"
54 #include <type_traits>
55 
56 namespace Tpetra {
57 namespace Details {
58 //
59 // This namespace stores utility functions and Kokkos
60 // functors for use in FixedHashTable construction.
61 //
62 namespace FHT {
63 
64 
65 // Is it worth actually using building the FixedHashTable using
66 // parallel threads, instead of just counting in a sequential loop?
67 //
68 // The parallel version of FixedHashTable construction isn't just a
69 // parallelization of the sequential loops. It incurs additional
70 // overheads. For example, the CountBuckets kernel uses atomic update
71 // instructions to count the number of "buckets" per offsets array
72 // (ptr) entry. Atomic updates have overhead, even if only one thread
73 // issues them. The Kokkos kernels are still correct in that case,
74 // but I would rather not incur overhead then. It might make sense to
75 // set the minimum number of threads to something greater than 1, but
76 // we would need experiments to find out.
77 template<class ExecSpace>
78 bool worthBuildingFixedHashTableInParallel () {
79  return ExecSpace::concurrency() > 1;
80 }
81 
82 // If the input kokkos::View<const KeyType*, ArrayLayout,
83 // InputExecSpace, Kokkos::MemoryUnamanged> is NOT accessible from the
84 // OutputExecSpace execution space, make and return a deep copy.
85 // Otherwise, just return the original input.
86 //
87 // The point of this is to avoid unnecessary copies, when the input
88 // array of keys comes in as a Teuchos::ArrayView (which we wrap in an
89 // unmanaged Kokkos::View).
90 template<class KeyType,
91  class ArrayLayout,
92  class InputExecSpace,
93  class OutputExecSpace,
94  const bool mustDeepCopy =
95  ! std::is_same<typename InputExecSpace::memory_space,
96  typename OutputExecSpace::memory_space>::value>
97 struct DeepCopyIfNeeded {
98  // The default implementation is trivial; all the work happens in
99  // partial specializations.
100 };
101 
102 // Specialization for when a deep copy is actually needed.
103 template<class KeyType,
104  class ArrayLayout,
105  class InputExecSpace,
106  class OutputExecSpace>
107 struct DeepCopyIfNeeded<KeyType, ArrayLayout, InputExecSpace, OutputExecSpace, true>
108 {
109  typedef Kokkos::View<const KeyType*, ArrayLayout,
110  InputExecSpace, Kokkos::MemoryUnmanaged> input_view_type;
111  // In this case, a deep copy IS needed. As a result, the output
112  // type is a managed Kokkos::View, which differs from the input
113  // type. Clients must get the correct return type from this struct,
114  // either from the typedef below or from 'auto'. Assigning an
115  // unmanaged View to a managed View is a syntax error.
116  typedef Kokkos::View<const KeyType*, ArrayLayout, OutputExecSpace> output_view_type;
117 
118  static output_view_type copy (const input_view_type& src) {
119  typedef typename output_view_type::non_const_type NC;
120 
121  NC dst (Kokkos::ViewAllocateWithoutInitializing (src.tracker ().label ()),
122  src.extent (0));
123  Kokkos::deep_copy (dst, src);
124  return output_view_type (dst);
125  }
126 };
127 
128 // Specialization if no need to make a deep copy.
129 template<class KeyType,
130  class ArrayLayout,
131  class InputExecSpace,
132  class OutputExecSpace>
133 struct DeepCopyIfNeeded<KeyType, ArrayLayout, InputExecSpace, OutputExecSpace, false> {
134  typedef Kokkos::View<const KeyType*, ArrayLayout,
135  InputExecSpace, Kokkos::MemoryUnmanaged> input_view_type;
136  typedef Kokkos::View<const KeyType*, ArrayLayout, OutputExecSpace,
137  Kokkos::MemoryUnmanaged> output_view_type;
138 
139  static output_view_type copy (const input_view_type& src) {
140  return output_view_type (src);
141  }
142 };
143 
144 //
145 // Functors for FixedHashTable initialization
146 //
147 // NOTE (mfh 23 May 2015): Once we can use lambdas with CUDA, we
148 // should consider replacing all of these functors with in-line
149 // lambdas. The only issue is that we would need to bake the
150 // execution space into the policy, since the default execution space
151 // might differ from the one Tpetra wants to use.
152 
162 template<class CountsViewType,
163  class KeysViewType,
164  class SizeType = typename KeysViewType::size_type>
166 public:
167  typedef CountsViewType counts_view_type;
168  typedef KeysViewType keys_view_type;
169  typedef typename CountsViewType::execution_space execution_space;
170  typedef typename CountsViewType::memory_space memory_space;
171  typedef SizeType size_type;
172  typedef typename keys_view_type::non_const_value_type key_type;
173  // mfh 21 May 2015: Having a device_type typedef in the functor
174  // along with an execution_space typedef causes compilation issues.
175  // This is because one of Kokkos' partial specializations picks up
176  // on the device_type typedef, and another picks up on the
177  // execution_space typedef. The former is a legacy of a previous
178  // design iteration of Kokkos, which did not separate memory and
179  // execution spaces.
181 
187  CountBuckets (const counts_view_type& counts,
188  const keys_view_type& keys,
189  const size_type size) :
190  counts_ (counts),
191  keys_ (keys),
192  size_ (size)
193  {}
194 
199  KOKKOS_INLINE_FUNCTION void
200  operator () (const size_type& i) const
201  {
202  typedef typename hash_type::result_type hash_value_type;
203 
204  const hash_value_type hashVal = hash_type::hashFunc (keys_[i], size_);
205  Kokkos::atomic_increment (&counts_[hashVal]);
206  }
207 
208 private:
210  counts_view_type counts_;
212  keys_view_type keys_;
214  size_type size_;
215 };
216 
227 template<class KeyType>
229  KOKKOS_INLINE_FUNCTION
230  FillPairsResult () :
231  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
232  // min() for a floating-point type returns the minimum _positive_
233  // normalized value. This is different than for integer types.
234  // lowest() is new in C++11 and returns the least value, always
235  // negative for signed finite types.
236  //
237  // mfh 23 May 2015: I have heard reports that
238  // std::numeric_limits<int>::lowest() does not exist with the
239  // Intel compiler. I'm not sure if the users in question actually
240  // enabled C++11. However, it's easy enough to work around this
241  // issue. The standard floating-point types are signed and have a
242  // sign bit, so lowest() is just -max(). For integer types, we
243  // can use min() instead.
244  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
245  ::Kokkos::Details::ArithTraits<KeyType>::min () :
246  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
247  success_ (true)
248  {
249  static_assert (std::is_arithmetic<KeyType>::value, "FillPairsResult: "
250  "KeyType must be some kind of number type.");
251  }
252 
253  KOKKOS_INLINE_FUNCTION
254  FillPairsResult (const KeyType& initMinKey,
255  const KeyType& initMaxKey) :
256  minKey_ (initMinKey),
257  maxKey_ (initMaxKey),
258  success_ (true)
259  {
260  static_assert (std::is_arithmetic<KeyType>::value, "FillPairsResult: "
261  "KeyType must be some kind of number type.");
262  }
263 
264  KeyType minKey_;
265  KeyType maxKey_;
266  bool success_;
267 };
268 
298 template<class PairsViewType,
299  class KeysViewType,
300  class CountsViewType,
301  class SizeType = typename KeysViewType::size_type>
302 class FillPairs {
303 public:
304  typedef typename CountsViewType::non_const_type counts_view_type;
305  typedef typename counts_view_type::const_type offsets_view_type;
306 
307  typedef PairsViewType pairs_view_type;
308  typedef typename KeysViewType::const_type keys_view_type;
309  typedef typename offsets_view_type::execution_space execution_space;
310  typedef typename offsets_view_type::memory_space memory_space;
311  typedef SizeType size_type;
312 
313  typedef typename keys_view_type::non_const_value_type key_type;
314  typedef typename pairs_view_type::non_const_value_type pair_type;
315 
317 
318  // mfh 23 May 2015: Having a device_type typedef in the functor
319  // along with an execution_space typedef causes compilation issues.
320  // This is because one of Kokkos' partial specializations picks up
321  // on the device_type typedef, and another picks up on the
322  // execution_space typedef. The former is a legacy of a previous
323  // design iteration of Kokkos, which did not separate memory and
324  // execution spaces.
326 
337  FillPairs (const pairs_view_type& pairs,
338  const counts_view_type& counts,
339  const offsets_view_type& ptr,
340  const keys_view_type& keys,
341  const typename pair_type::second_type startingValue) :
342  pairs_ (pairs),
343  counts_ (counts),
344  ptr_ (ptr),
345  keys_ (keys),
346  size_ (counts.extent (0)),
347  startingValue_ (startingValue),
348  initMinKey_ (::Kokkos::Details::ArithTraits<key_type>::max ()),
349  initMaxKey_ (::Kokkos::Details::ArithTraits<key_type>::is_integer ?
350  ::Kokkos::Details::ArithTraits<key_type>::min () :
351  -::Kokkos::Details::ArithTraits<key_type>::max ())
352  {}
353 
372  FillPairs (const pairs_view_type& pairs,
373  const counts_view_type& counts,
374  const offsets_view_type& ptr,
375  const keys_view_type& keys,
376  const typename pair_type::second_type startingValue,
377  const key_type initMinKey,
378  const key_type initMaxKey) :
379  pairs_ (pairs),
380  counts_ (counts),
381  ptr_ (ptr),
382  keys_ (keys),
383  size_ (counts.extent (0)),
384  startingValue_ (startingValue),
385  initMinKey_ (initMinKey),
386  initMaxKey_ (initMaxKey)
387  {}
388 
390  KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
391  {
392  dst.minKey_ = initMinKey_;
393  dst.maxKey_ = initMaxKey_;
394  dst.success_ = true;
395  }
396 
397  KOKKOS_INLINE_FUNCTION void
398  join (volatile value_type& dst,
399  const volatile value_type& src) const
400  {
401  if (src.maxKey_ > dst.maxKey_) {
402  dst.maxKey_ = src.maxKey_;
403  }
404  if (src.minKey_ < dst.minKey_) {
405  dst.minKey_ = src.minKey_;
406  }
407  dst.success_ = dst.success_ && src.success_;
408  }
409 
414  KOKKOS_INLINE_FUNCTION void
415  operator () (const size_type& i, value_type& dst) const
416  {
417  typedef typename hash_type::result_type hash_value_type;
418  typedef typename offsets_view_type::non_const_value_type offset_type;
419  typedef typename pair_type::second_type val_type;
420  typedef typename std::remove_reference< decltype( counts_[0] ) >::type atomic_incr_type;
421 
422  const key_type key = keys_[i];
423  if (key > dst.maxKey_) {
424  dst.maxKey_ = key;
425  }
426  if (key < dst.minKey_) {
427  dst.minKey_ = key;
428  }
429  const val_type theVal = startingValue_ + static_cast<val_type> (i);
430  const hash_value_type hashVal = hash_type::hashFunc (key, size_);
431 
432  // Return the old count; decrement afterwards.
433  const offset_type count = Kokkos::atomic_fetch_add (&counts_[hashVal], atomic_incr_type(-1));
434  if (count == 0) {
435  dst.success_ = false; // FAILURE!
436  }
437  else {
438  const offset_type curPos = ptr_[hashVal+1] - count;
439 
440  pairs_[curPos].first = key;
441  pairs_[curPos].second = theVal;
442  }
443  }
444 
445 private:
446  pairs_view_type pairs_;
447  counts_view_type counts_;
448  offsets_view_type ptr_;
449  keys_view_type keys_;
450  size_type size_;
451  typename pair_type::second_type startingValue_;
453  key_type initMinKey_;
455  key_type initMaxKey_;
456 };
457 
481 template<class OffsetsViewType,
482  class PairsViewType,
483  class SizeType = typename OffsetsViewType::size_type>
485 public:
486  typedef typename OffsetsViewType::const_type offsets_view_type;
487  typedef typename PairsViewType::const_type pairs_view_type;
488  typedef typename offsets_view_type::execution_space execution_space;
489  typedef typename offsets_view_type::memory_space memory_space;
490  typedef SizeType size_type;
491 
492  // The result of the check is whether the table has one or more duplicates.
493  typedef int value_type;
494 
499  CheckForDuplicateKeys (const pairs_view_type& pairs,
500  const offsets_view_type& ptr) :
501  pairs_ (pairs),
502  ptr_ (ptr),
503  size_ (ptr_.extent (0) == 0 ?
504  size_type (0) :
505  ptr_.extent (0) - 1)
506  {}
507 
509  KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
510  {
511  dst = 0;
512  }
513 
515  KOKKOS_INLINE_FUNCTION void
516  join (volatile value_type& dst,
517  const volatile value_type& src) const
518  {
519  dst = dst + src > 0?1:0;
520  }
521 
523  KOKKOS_INLINE_FUNCTION void
524  operator () (const size_type& i, value_type& dst) const
525  {
526  typedef typename offsets_view_type::non_const_value_type offset_type;
527  typedef typename pairs_view_type::non_const_value_type pair_type;
528  typedef typename pair_type::first_type key_type;
529 
530  if (dst>0) {
531  return; // we've already found duplicate keys elsewhere
532  }
533  else {
534  const offset_type beg = ptr_[i];
535  const offset_type end = ptr_[i+1];
536  bool foundDuplicateKey = false;
537  // This is an ~ n^2 algorithm in the worst case, where n is the
538  // max number of keys that hash to the same bucket. However, if
539  // the hash function is reasonable, n should be much less than
540  // the total number of keys.
541  for (offset_type j = beg + 1; j < end; ++j) {
542  const key_type curKey = pairs_[j].first;
543  for (offset_type k = beg; k < j; ++k) {
544  if (pairs_[k].first == curKey) {
545  foundDuplicateKey = true;
546  break;
547  }
548  }
549  }
550  dst = (dst>0) || foundDuplicateKey?1:0;
551  }
552  }
553 
554 private:
555  pairs_view_type pairs_;
556  offsets_view_type ptr_;
557  size_type size_;
558 };
559 
560 } // namespace FHT
561 
562 //
563 // Here begins the actual implementation of FixedHashTable.
564 //
565 
566 template<class KeyType, class ValueType, class DeviceType>
567 bool
569 hasKeys () const {
570  // This also works if FixedHashTable has no entries. getKey()
571  // works in that case, but always returns the flag (invalid).
572  //
573  // FIXME (31 May 2015) This only works because vals_ contains no
574  // padding. If we ever pad within a "row" of vals_, we'll have to
575  // change this.
576  return keys_.extent (0) == val_.extent (0);
577 }
578 
579 template<class KeyType, class ValueType, class DeviceType>
580 void
582 check () const
583 {
584  // const char prefix[] = "Tpetra::Details::FixedHashTable: ";
585  // const char suffix[] = " Please report this bug to the Tpetra developers.";
586 }
587 
588 template<class KeyType, class ValueType, class DeviceType>
591  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
592  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
593  ::Kokkos::Details::ArithTraits<KeyType>::min () :
594  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
595  minVal_ (::Kokkos::Details::ArithTraits<ValueType>::max ()),
596  maxVal_ (::Kokkos::Details::ArithTraits<ValueType>::is_integer ?
597  ::Kokkos::Details::ArithTraits<ValueType>::min () :
598  -::Kokkos::Details::ArithTraits<ValueType>::max ()),
599  firstContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
600  lastContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
601  ::Kokkos::Details::ArithTraits<KeyType>::min () :
602  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
603  contiguousValues_ (true), // trivially
604  checkedForDuplicateKeys_ (true), // it's an empty table; no need to check
605  hasDuplicateKeys_ (false)
606 {
607 #ifdef HAVE_TPETRA_DEBUG
608  check ();
609 #endif // HAVE_TPETRA_DEBUG
610 }
611 
612 template<class KeyType, class ValueType, class DeviceType>
614 FixedHashTable (const keys_type& keys) :
615  keys_ (keys),
616  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()), // to be set in init()
617  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
618  ::Kokkos::Details::ArithTraits<KeyType>::min () :
619  -::Kokkos::Details::ArithTraits<KeyType>::max ()), // to be set in init()
620  minVal_ (0),
621  maxVal_ (keys.size () == 0 ?
622  static_cast<ValueType> (0) :
623  static_cast<ValueType> (keys.size () - 1)),
624  firstContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
625  lastContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
626  ::Kokkos::Details::ArithTraits<KeyType>::min () :
627  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
628  contiguousValues_ (true),
629  checkedForDuplicateKeys_ (false),
630  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
631 {
632  const ValueType startingValue = static_cast<ValueType> (0);
633  const KeyType initMinKey = this->minKey_;
634  const KeyType initMaxKey = this->maxKey_;
635  this->init (keys, startingValue, initMinKey, initMaxKey,
636  initMinKey, initMinKey, false);
637 
638 #ifdef HAVE_TPETRA_DEBUG
639  check ();
640 #endif // HAVE_TPETRA_DEBUG
641 }
642 
643 template<class KeyType, class ValueType, class DeviceType>
645 FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys,
646  const bool keepKeys) :
647  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()), // to be set in init()
648  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
649  ::Kokkos::Details::ArithTraits<KeyType>::min () :
650  -::Kokkos::Details::ArithTraits<KeyType>::max ()), // to be set in init()
651  minVal_ (0),
652  maxVal_ (keys.size () == 0 ?
653  static_cast<ValueType> (0) :
654  static_cast<ValueType> (keys.size () - 1)),
655  firstContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
656  lastContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
657  ::Kokkos::Details::ArithTraits<KeyType>::min () :
658  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
659  contiguousValues_ (true),
660  checkedForDuplicateKeys_ (false),
661  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
662 {
663  typedef typename keys_type::non_const_type nonconst_keys_type;
664 
665  // mfh 01 May 2015: I don't trust that
666  // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
667  // so I ensure this manually.
668  const ValueType startingValue = static_cast<ValueType> (0);
669  host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
670  keys.size ());
671  using Kokkos::ViewAllocateWithoutInitializing;
672  nonconst_keys_type keys_d (ViewAllocateWithoutInitializing ("FixedHashTable::keys"),
673  keys_k.extent (0));
674  Kokkos::deep_copy (keys_d, keys_k);
675  const KeyType initMinKey = this->minKey_;
676  const KeyType initMaxKey = this->maxKey_;
677  this->init (keys_d, startingValue, initMinKey, initMaxKey,
678  initMinKey, initMinKey, false);
679  if (keepKeys) {
680  keys_ = keys_d;
681 #ifdef HAVE_TPETRA_DEBUG
682  typedef typename keys_type::size_type size_type;
683  TEUCHOS_TEST_FOR_EXCEPTION
684  (keys_.extent (0) != static_cast<size_type> (keys.size ()),
685  std::logic_error, "Tpetra::Details::FixedHashTable constructor: "
686  "keepKeys is true, but on return, keys_.extent(0) = " <<
687  keys_.extent (0) << " != keys.size() = " << keys.size () <<
688  ". Please report this bug to the Tpetra developers.");
689 #endif // HAVE_TPETRA_DEBUG
690  }
691 
692 #ifdef HAVE_TPETRA_DEBUG
693  check ();
694 #endif // HAVE_TPETRA_DEBUG
695 }
696 
697 template<class KeyType, class ValueType, class DeviceType>
699 FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys,
700  const ValueType startingValue,
701  const bool keepKeys) :
702  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
703  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
704  ::Kokkos::Details::ArithTraits<KeyType>::min () :
705  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
706  minVal_ (startingValue),
707  maxVal_ (keys.size () == 0 ?
708  startingValue :
709  static_cast<ValueType> (startingValue + keys.size () - 1)),
710  firstContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
711  lastContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
712  ::Kokkos::Details::ArithTraits<KeyType>::min () :
713  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
714  contiguousValues_ (true),
715  checkedForDuplicateKeys_ (false),
716  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
717 {
718  typedef typename keys_type::non_const_type nonconst_keys_type;
719 
720  // mfh 01 May 2015: I don't trust that
721  // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
722  // so I ensure this manually.
723  host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
724  keys.size ());
725  using Kokkos::ViewAllocateWithoutInitializing;
726  nonconst_keys_type keys_d (ViewAllocateWithoutInitializing ("FixedHashTable::keys"),
727  keys_k.extent (0));
728  Kokkos::deep_copy (keys_d, keys_k);
729 
730  const KeyType initMinKey = ::Kokkos::Details::ArithTraits<KeyType>::max ();
731  // min() for a floating-point type returns the minimum _positive_
732  // normalized value. This is different than for integer types.
733  // lowest() is new in C++11 and returns the least value, always
734  // negative for signed finite types.
735  //
736  // mfh 23 May 2015: I have heard reports that
737  // std::numeric_limits<int>::lowest() does not exist with the Intel
738  // compiler. I'm not sure if the users in question actually enabled
739  // C++11. However, it's easy enough to work around this issue. The
740  // standard floating-point types are signed and have a sign bit, so
741  // lowest() is just -max(). For integer types, we can use min()
742  // instead.
743  const KeyType initMaxKey = ::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
744  ::Kokkos::Details::ArithTraits<KeyType>::min () :
745  -::Kokkos::Details::ArithTraits<KeyType>::max ();
746  this->init (keys_d, startingValue, initMinKey, initMaxKey,
747  initMinKey, initMinKey, false);
748  if (keepKeys) {
749  keys_ = keys_d;
750 #ifdef HAVE_TPETRA_DEBUG
751  typedef typename keys_type::size_type size_type;
752  TEUCHOS_TEST_FOR_EXCEPTION
753  (keys_.extent (0) != static_cast<size_type> (keys.size ()),
754  std::logic_error, "Tpetra::Details::FixedHashTable constructor: "
755  "keepKeys is true, but on return, keys_.extent(0) = " <<
756  keys_.extent (0) << " != keys.size() = " << keys.size () <<
757  ". Please report this bug to the Tpetra developers.");
758 #endif // HAVE_TPETRA_DEBUG
759  }
760 
761 #ifdef HAVE_TPETRA_DEBUG
762  check ();
763 #endif // HAVE_TPETRA_DEBUG
764 }
765 
766 template<class KeyType, class ValueType, class DeviceType>
769  const KeyType firstContigKey,
770  const KeyType lastContigKey,
771  const ValueType startingValue,
772  const bool keepKeys) :
773  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
774  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
775  ::Kokkos::Details::ArithTraits<KeyType>::min () :
776  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
777  minVal_ (startingValue),
778  maxVal_ (keys.size () == 0 ?
779  startingValue :
780  static_cast<ValueType> (startingValue + keys.size () - 1)),
781  firstContigKey_ (firstContigKey),
782  lastContigKey_ (lastContigKey),
783  contiguousValues_ (true),
784  checkedForDuplicateKeys_ (false),
785  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
786 {
787  const KeyType initMinKey = ::Kokkos::Details::ArithTraits<KeyType>::max ();
788  // min() for a floating-point type returns the minimum _positive_
789  // normalized value. This is different than for integer types.
790  // lowest() is new in C++11 and returns the least value, always
791  // negative for signed finite types.
792  //
793  // mfh 23 May 2015: I have heard reports that
794  // std::numeric_limits<int>::lowest() does not exist with the Intel
795  // compiler. I'm not sure if the users in question actually enabled
796  // C++11. However, it's easy enough to work around this issue. The
797  // standard floating-point types are signed and have a sign bit, so
798  // lowest() is just -max(). For integer types, we can use min()
799  // instead.
800  const KeyType initMaxKey = ::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
801  ::Kokkos::Details::ArithTraits<KeyType>::min () :
802  -::Kokkos::Details::ArithTraits<KeyType>::max ();
803  this->init (keys, startingValue, initMinKey, initMaxKey,
804  firstContigKey, lastContigKey, true);
805  if (keepKeys) {
806  keys_ = keys;
807 #ifdef HAVE_TPETRA_DEBUG
808  typedef typename keys_type::size_type size_type;
809  TEUCHOS_TEST_FOR_EXCEPTION
810  (keys_.extent (0) != static_cast<size_type> (keys.size ()),
811  std::logic_error, "Tpetra::Details::FixedHashTable constructor: "
812  "keepKeys is true, but on return, keys_.extent(0) = " <<
813  keys_.extent (0) << " != keys.size() = " << keys.size () <<
814  ". Please report this bug to the Tpetra developers.");
815 #endif // HAVE_TPETRA_DEBUG
816  }
817 
818 #ifdef HAVE_TPETRA_DEBUG
819  check ();
820 #endif // HAVE_TPETRA_DEBUG
821 }
822 
823 template<class KeyType, class ValueType, class DeviceType>
825 FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys,
826  const KeyType firstContigKey,
827  const KeyType lastContigKey,
828  const ValueType startingValue,
829  const bool keepKeys) :
830  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
831  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
832  ::Kokkos::Details::ArithTraits<KeyType>::min () :
833  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
834  minVal_ (startingValue),
835  maxVal_ (keys.size () == 0 ?
836  startingValue :
837  static_cast<ValueType> (startingValue + keys.size () - 1)),
838  firstContigKey_ (firstContigKey),
839  lastContigKey_ (lastContigKey),
840  contiguousValues_ (true),
841  checkedForDuplicateKeys_ (false),
842  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
843 {
844  typedef typename keys_type::non_const_type nonconst_keys_type;
845 
846  // mfh 01 May 2015: I don't trust that
847  // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
848  // so I ensure this manually.
849  host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
850  keys.size ());
851  using Kokkos::ViewAllocateWithoutInitializing;
852  nonconst_keys_type keys_d (ViewAllocateWithoutInitializing ("FixedHashTable::keys"),
853  keys_k.extent (0));
854  Kokkos::deep_copy (keys_d, keys_k);
855 
856  const KeyType initMinKey = ::Kokkos::Details::ArithTraits<KeyType>::max ();
857  // min() for a floating-point type returns the minimum _positive_
858  // normalized value. This is different than for integer types.
859  // lowest() is new in C++11 and returns the least value, always
860  // negative for signed finite types.
861  //
862  // mfh 23 May 2015: I have heard reports that
863  // std::numeric_limits<int>::lowest() does not exist with the Intel
864  // compiler. I'm not sure if the users in question actually enabled
865  // C++11. However, it's easy enough to work around this issue. The
866  // standard floating-point types are signed and have a sign bit, so
867  // lowest() is just -max(). For integer types, we can use min()
868  // instead.
869  const KeyType initMaxKey = ::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
870  ::Kokkos::Details::ArithTraits<KeyType>::min () :
871  -::Kokkos::Details::ArithTraits<KeyType>::max ();
872  this->init (keys_d, startingValue, initMinKey, initMaxKey,
873  firstContigKey, lastContigKey, true);
874  if (keepKeys) {
875  keys_ = keys_d;
876 #ifdef HAVE_TPETRA_DEBUG
877  typedef typename keys_type::size_type size_type;
878  TEUCHOS_TEST_FOR_EXCEPTION
879  (keys_.extent (0) != static_cast<size_type> (keys.size ()),
880  std::logic_error, "Tpetra::Details::FixedHashTable constructor: "
881  "keepKeys is true, but on return, keys_.extent(0) = " <<
882  keys_.extent (0) << " != keys.size() = " << keys.size () <<
883  ". Please report this bug to the Tpetra developers.");
884 #endif // HAVE_TPETRA_DEBUG
885  }
886 
887 #ifdef HAVE_TPETRA_DEBUG
888  check ();
889 #endif // HAVE_TPETRA_DEBUG
890 }
891 
892 template<class KeyType, class ValueType, class DeviceType>
895  const ValueType startingValue) :
896  keys_ (keys),
897  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
898  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
899  ::Kokkos::Details::ArithTraits<KeyType>::min () :
900  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
901  minVal_ (startingValue),
902  maxVal_ (keys.size () == 0 ?
903  startingValue :
904  static_cast<ValueType> (startingValue + keys.size () - 1)),
905  firstContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
906  lastContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
907  ::Kokkos::Details::ArithTraits<KeyType>::min () :
908  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
909  contiguousValues_ (true),
910  checkedForDuplicateKeys_ (false),
911  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
912 {
913  const KeyType initMinKey = ::Kokkos::Details::ArithTraits<KeyType>::max ();
914  // min() for a floating-point type returns the minimum _positive_
915  // normalized value. This is different than for integer types.
916  // lowest() is new in C++11 and returns the least value, always
917  // negative for signed finite types.
918  //
919  // mfh 23 May 2015: I have heard reports that
920  // std::numeric_limits<int>::lowest() does not exist with the Intel
921  // compiler. I'm not sure if the users in question actually enabled
922  // C++11. However, it's easy enough to work around this issue. The
923  // standard floating-point types are signed and have a sign bit, so
924  // lowest() is just -max(). For integer types, we can use min()
925  // instead.
926  const KeyType initMaxKey = ::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
927  ::Kokkos::Details::ArithTraits<KeyType>::min () :
928  -::Kokkos::Details::ArithTraits<KeyType>::max ();
929  this->init (keys, startingValue, initMinKey, initMaxKey,
930  initMinKey, initMinKey, false);
931 
932 #ifdef HAVE_TPETRA_DEBUG
933  check ();
934 #endif // HAVE_TPETRA_DEBUG
935 }
936 
937 template<class KeyType, class ValueType, class DeviceType>
939 FixedHashTable (const Teuchos::ArrayView<const KeyType>& keys,
940  const Teuchos::ArrayView<const ValueType>& vals) :
941  minKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
942  maxKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
943  ::Kokkos::Details::ArithTraits<KeyType>::min () :
944  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
945  minVal_ (::Kokkos::Details::ArithTraits<ValueType>::max ()),
946  maxVal_ (::Kokkos::Details::ArithTraits<ValueType>::is_integer ?
947  ::Kokkos::Details::ArithTraits<ValueType>::min () :
948  -::Kokkos::Details::ArithTraits<ValueType>::max ()),
949  firstContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::max ()),
950  lastContigKey_ (::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
951  ::Kokkos::Details::ArithTraits<KeyType>::min () :
952  -::Kokkos::Details::ArithTraits<KeyType>::max ()),
953  contiguousValues_ (false),
954  checkedForDuplicateKeys_ (false),
955  hasDuplicateKeys_ (false) // to revise in hasDuplicateKeys()
956 {
957  // mfh 01 May 2015: I don't trust that
958  // Teuchos::ArrayView::getRawPtr() returns NULL when the size is 0,
959  // so I ensure this manually.
960  host_input_keys_type keys_k (keys.size () == 0 ? NULL : keys.getRawPtr (),
961  keys.size ());
962  host_input_vals_type vals_k (vals.size () == 0 ? NULL : vals.getRawPtr (),
963  vals.size ());
964  const KeyType initMinKey = ::Kokkos::Details::ArithTraits<KeyType>::max ();
965  // min() for a floating-point type returns the minimum _positive_
966  // normalized value. This is different than for integer types.
967  // lowest() is new in C++11 and returns the least value, always
968  // negative for signed finite types.
969  //
970  // mfh 23 May 2015: I have heard reports that
971  // std::numeric_limits<int>::lowest() does not exist with the Intel
972  // compiler. I'm not sure if the users in question actually enabled
973  // C++11. However, it's easy enough to work around this issue. The
974  // standard floating-point types are signed and have a sign bit, so
975  // lowest() is just -max(). For integer types, we can use min()
976  // instead.
977  const KeyType initMaxKey = ::Kokkos::Details::ArithTraits<KeyType>::is_integer ?
978  ::Kokkos::Details::ArithTraits<KeyType>::min () :
979  -::Kokkos::Details::ArithTraits<KeyType>::max ();
980  this->init (keys_k, vals_k, initMinKey, initMaxKey);
981 
982 #ifdef HAVE_TPETRA_DEBUG
983  check ();
984 #endif // HAVE_TPETRA_DEBUG
985 }
986 
987 template<class KeyType, class ValueType, class DeviceType>
988 void
990 init (const keys_type& keys,
991  ValueType startingValue,
992  KeyType initMinKey,
993  KeyType initMaxKey,
994  KeyType firstContigKey,
995  KeyType lastContigKey,
996  const bool computeInitContigKeys)
997 {
998  using Kokkos::subview;
999  using Kokkos::ViewAllocateWithoutInitializing;
1000  using Teuchos::TypeNameTraits;
1001  typedef typename std::decay<decltype (keys.extent (0)) >::type size_type;
1002  const char prefix[] = "Tpetra::Details::FixedHashTable: ";
1003 
1004  const offset_type numKeys = static_cast<offset_type> (keys.extent (0));
1005  {
1006  const offset_type theMaxVal = ::Kokkos::Details::ArithTraits<offset_type>::max ();
1007  const size_type maxValST = static_cast<size_type> (theMaxVal);
1008  TEUCHOS_TEST_FOR_EXCEPTION
1009  (keys.extent (0) > maxValST, std::invalid_argument, prefix << "The "
1010  "number of keys " << keys.extent (0) << " does not fit in "
1011  "offset_type = " << TypeNameTraits<offset_type>::name () << ", whose "
1012  "max value is " << theMaxVal << ". This means that it is not possible to "
1013  "use this constructor.");
1014  }
1015  TEUCHOS_TEST_FOR_EXCEPTION
1016  (static_cast<unsigned long long> (numKeys) >
1017  static_cast<unsigned long long> (::Kokkos::Details::ArithTraits<ValueType>::max ()),
1018  std::invalid_argument, "Tpetra::Details::FixedHashTable: The number of "
1019  "keys " << numKeys << " is greater than the maximum representable "
1020  "ValueType value " << ::Kokkos::Details::ArithTraits<ValueType>::max () << ". "
1021  "This means that it is not possible to use this constructor.");
1022  TEUCHOS_TEST_FOR_EXCEPTION
1023  (numKeys > static_cast<offset_type> (INT_MAX), std::logic_error, prefix <<
1024  "This class currently only works when the number of keys is <= INT_MAX = "
1025  << INT_MAX << ". If this is a problem for you, please talk to the Tpetra "
1026  "developers.");
1027 
1028  const bool buildInParallel =
1029  FHT::worthBuildingFixedHashTableInParallel<execution_space> ();
1030 
1031  // NOTE (mfh 14 May 2015) This method currently assumes UVM. We
1032  // could change that by setting up ptr and val as Kokkos::DualView
1033  // instances. If we do that, since we are filling on host for now,
1034  // we want to make sure that we only zero-fill ptr on host
1035  // initially, and that we don't fill val at all. Once we finish
1036  // Kokkos-izing all the set-up kernels, we won't need DualView for
1037  // either ptr or val.
1038 
1039  if (computeInitContigKeys) {
1040  // Find the first and last initial contiguous keys. If we find a
1041  // long sequence of initial contiguous keys, we can save space by
1042  // not storing them explicitly as pairs in the hash table.
1043  //
1044  // NOTE (mfh 01 Jun 2015) Doing this in parallel requires a scan
1045  // ("min index such that the difference between the current key and
1046  // the next != 1"), which takes multiple passes over the data. We
1047  // could fuse it with CountBuckets (only update counts on 'final'
1048  // pass). However, we're really just moving this sequential search
1049  // out of Map's constructor here, so there is no loss in doing it
1050  // sequentially for now. Later, we can work on parallelization.
1051  //
1052  // NOTE (mfh 01 Jun 2015, 28 Mar 2016) The code below assumes UVM.
1053  // It is rational to assume UVM here, because this is "sparse"
1054  // access -- we might not need to look at all the entries of keys,
1055  // so it doesn't necessarily make sense to copy the whole thing
1056  // back to host.
1057  if (numKeys > 0) {
1058  firstContigKey_ = keys[0];
1059  // Start with one plus, then decrement at the end. That lets us do
1060  // only one addition per loop iteration, rather than two (if we test
1061  // against lastContigKey + 1 and then increment lastContigKey).
1062  lastContigKey_ = firstContigKey_ + 1;
1063 
1064  // We will only store keys in the table that are not part of the
1065  // initial contiguous sequence. It's possible for the initial
1066  // contiguous sequence to be trivial, which for a nonzero number of
1067  // keys means that the "sequence" has length 1.
1068  for (offset_type k = 1; k < numKeys; ++k) {
1069  if (lastContigKey_ != keys[k]) {
1070  break;
1071  }
1072  ++lastContigKey_;
1073  }
1074  --lastContigKey_;
1075  }
1076  }
1077  else {
1078  firstContigKey_ = firstContigKey;
1079  lastContigKey_ = lastContigKey;
1080  }
1081 
1082  offset_type startIndex;
1083  if (numKeys > 0) {
1084  initMinKey = std::min (initMinKey, firstContigKey_);
1085  initMaxKey = std::max (initMaxKey, lastContigKey_);
1086  startIndex = static_cast<offset_type> (lastContigKey_ - firstContigKey_);
1087  } else {
1088  startIndex = 0;
1089  }
1090 
1091  const offset_type theNumKeys = numKeys - startIndex;
1092  const offset_type size = hash_type::getRecommendedSize (theNumKeys);
1093 #ifdef HAVE_TPETRA_DEBUG
1094  TEUCHOS_TEST_FOR_EXCEPTION(
1095  size == 0 && numKeys != 0, std::logic_error,
1096  "Tpetra::Details::FixedHashTable constructor: "
1097  "getRecommendedSize(" << numKeys << ") returned zero, "
1098  "even though the number of keys " << numKeys << " is nonzero. "
1099  "Please report this bug to the Tpetra developers.");
1100 #endif // HAVE_TPETRA_DEBUG
1101  keys_type theKeys =
1102  subview (keys, std::pair<offset_type, offset_type> (startIndex, numKeys));
1103 
1104  // FIXME (mfh 28 Mar 2016) For some reason, we don't seem to need a
1105  // fence here, but we do before other allocations.
1106 
1107  // The array of counts must be separate from the array of offsets,
1108  // in order for parallel_scan to work correctly.
1109  typedef typename ptr_type::non_const_type counts_type;
1110  counts_type counts ("FixedHashTable::counts", size);
1111 
1112  //
1113  // Count the number of "buckets" per offsets array (ptr) entry.
1114  //
1115 
1116  // The Kokkos kernel uses atomic update instructions to count the
1117  // number of "buckets" per offsets array (ptr) entry. Atomic
1118  // updates incur overhead, even in the sequential case. The Kokkos
1119  // kernel is still correct in that case, but I would rather not
1120  // incur overhead then.
1121  if (buildInParallel) {
1122  FHT::CountBuckets<counts_type, keys_type> functor (counts, theKeys, size);
1123 
1124  typedef typename counts_type::execution_space execution_space;
1125  typedef Kokkos::RangePolicy<execution_space, offset_type> range_type;
1126  Kokkos::parallel_for (range_type (0, theNumKeys), functor);
1127  }
1128  else {
1129  // Access to counts is not necessarily contiguous, but is
1130  // irregular and likely touches all pages of the array. Thus, it
1131  // probably makes sense to use a host copy explicitly, rather than
1132  // assume UVM.
1133  auto countsHost = Kokkos::create_mirror_view (counts);
1134  // FIXME (mfh 28 Mar 2016) Does create_mirror_view zero-fill?
1135  Kokkos::deep_copy (countsHost, static_cast<offset_type> (0));
1136 
1137  for (offset_type k = 0; k < theNumKeys; ++k) {
1138  typedef typename hash_type::result_type hash_value_type;
1139  const hash_value_type hashVal = hash_type::hashFunc (theKeys[k], size);
1140  ++countsHost[hashVal];
1141  }
1142  Kokkos::deep_copy (counts, countsHost);
1143  }
1144 
1145  // FIXME (mfh 28 Mar 2016) Need a fence here, otherwise SIGSEGV w/
1146  // CUDA when ptr is filled.
1147  execution_space::fence ();
1148 
1149  // Kokkos::View fills with zeros by default.
1150  typename ptr_type::non_const_type ptr ("FixedHashTable::ptr", size+1);
1151 
1152  // Compute row offsets via prefix sum:
1153  //
1154  // ptr[i+1] = \sum_{j=0}^{i} counts[j].
1155  //
1156  // Thus, ptr[i+1] - ptr[i] = counts[i], so that ptr[i+1] = ptr[i] +
1157  // counts[i]. If we stored counts[i] in ptr[i+1] on input, then the
1158  // formula is ptr[i+1] += ptr[i].
1159  //
1160  // parallel_scan does not incur overhead with Kokkos::Serial, but
1161  // with actual parallel execution spaces, it does require multiple
1162  // passes over the data. Thus, it still makes sense to have a
1163  // sequential fall-back.
1164  if (buildInParallel) {
1166  }
1167  else {
1168  // mfh 28 Mar 2016: We could use UVM here, but it's pretty easy to
1169  // use a host mirror too, so I'll just do that.
1170  typename decltype (ptr)::HostMirror ptrHost = Kokkos::create_mirror_view (ptr);
1171 
1172  ptrHost[0] = 0;
1173  for (offset_type i = 0; i < size; ++i) {
1174  //ptrHost[i+1] += ptrHost[i];
1175  ptrHost[i+1] = ptrHost[i] + counts[i];
1176  }
1177  Kokkos::deep_copy (ptr, ptrHost);
1178  }
1179 
1180  // FIXME (mfh 28 Mar 2016) Need a fence here, otherwise SIGSEGV w/
1181  // CUDA when val is filled.
1182  execution_space::fence ();
1183 
1184  // Allocate the array of (key,value) pairs. Don't fill it with
1185  // zeros, because we will fill it with actual data below.
1186  using Kokkos::ViewAllocateWithoutInitializing;
1187  typedef typename val_type::non_const_type nonconst_val_type;
1188  nonconst_val_type val (ViewAllocateWithoutInitializing ("FixedHashTable::pairs"),
1189  theNumKeys);
1190 
1191  // Fill in the hash table's "values" (the (key,value) pairs).
1192  typedef FHT::FillPairs<typename val_type::non_const_type, keys_type,
1193  typename ptr_type::non_const_type> functor_type;
1194  typename functor_type::value_type result (initMinKey, initMaxKey);
1195 
1196  const ValueType newStartingValue = startingValue + static_cast<ValueType> (startIndex);
1197  if (buildInParallel) {
1198  functor_type functor (val, counts, ptr, theKeys, newStartingValue,
1199  initMinKey, initMaxKey);
1200  typedef Kokkos::RangePolicy<execution_space, offset_type> range_type;
1201  Kokkos::parallel_reduce (range_type (0, theNumKeys), functor, result);
1202  }
1203  else {
1204  for (offset_type k = 0; k < theNumKeys; ++k) {
1205  typedef typename hash_type::result_type hash_value_type;
1206  const KeyType key = theKeys[k];
1207  if (key > result.maxKey_) {
1208  result.maxKey_ = key;
1209  }
1210  if (key < result.minKey_) {
1211  result.minKey_ = key;
1212  }
1213  const ValueType theVal = newStartingValue + static_cast<ValueType> (k);
1214  const hash_value_type hashVal = hash_type::hashFunc (key, size);
1215 
1216  // Return the old count; decrement afterwards.
1217  //
1218  // NOTE (mfh 28 Mar 2016) This assumes UVM. It might make more
1219  // sense to use a host mirror here, due to the access pattern.
1220  const offset_type count = counts[hashVal];
1221  --counts[hashVal];
1222  if (count == 0) {
1223  result.success_ = false; // FAILURE!
1224  break;
1225  }
1226  else {
1227  // NOTE (mfh 28 Mar 2016) This assumes UVM.
1228  const offset_type curPos = ptr[hashVal+1] - count;
1229 
1230  // NOTE (mfh 28 Mar 2016) This assumes UVM.
1231  val[curPos].first = key;
1232  val[curPos].second = theVal;
1233  }
1234  }
1235  }
1236 
1237  // FIXME (mfh 01 Jun 2015) Temporarily commented out because of
1238  // reports of exceptions being thrown in Albany.
1239  //
1240  // TEUCHOS_TEST_FOR_EXCEPTION
1241  // (! result.success_, std::logic_error, "Tpetra::Details::FixedHashTable::"
1242  // "init: Filling the hash table failed! Please report this bug to the "
1243  // "Tpetra developers.");
1244  (void) result; // prevent build warning (set but unused variable)
1245 
1246  // "Commit" the computed arrays and other computed quantities.
1247  ptr_ = ptr;
1248  val_ = val;
1249  minKey_ = result.minKey_;
1250  maxKey_ = result.maxKey_;
1251  // We've already set firstContigKey_ and lastContigKey_ above.
1252 }
1253 
1254 
1255 template<class KeyType, class ValueType, class DeviceType>
1256 void
1257 FixedHashTable<KeyType, ValueType, DeviceType>::
1258 init (const host_input_keys_type& keys,
1259  const host_input_vals_type& vals,
1260  KeyType initMinKey,
1261  KeyType initMaxKey)
1262 {
1263  const offset_type numKeys = static_cast<offset_type> (keys.extent (0));
1264  TEUCHOS_TEST_FOR_EXCEPTION
1265  (static_cast<unsigned long long> (numKeys) > static_cast<unsigned long long> (::Kokkos::Details::ArithTraits<ValueType>::max ()),
1266  std::invalid_argument, "Tpetra::Details::FixedHashTable: The number of "
1267  "keys " << numKeys << " is greater than the maximum representable "
1268  "ValueType value " << ::Kokkos::Details::ArithTraits<ValueType>::max () << ".");
1269  TEUCHOS_TEST_FOR_EXCEPTION
1270  (numKeys > static_cast<offset_type> (INT_MAX), std::logic_error, "Tpetra::"
1271  "Details::FixedHashTable: This class currently only works when the number "
1272  "of keys is <= INT_MAX = " << INT_MAX << ". If this is a problem for you"
1273  ", please talk to the Tpetra developers.");
1274 
1275  // There's no need to find the first and last initial contiguous
1276  // keys in this case, because if we reach this init() function, then
1277  // hasContiguousValues() is false and so get() doesn't use the
1278  // initial contiguous sequence of keys.
1279 
1280  const offset_type size = hash_type::getRecommendedSize (numKeys);
1281 #ifdef HAVE_TPETRA_DEBUG
1282  TEUCHOS_TEST_FOR_EXCEPTION(
1283  size == 0 && numKeys != 0, std::logic_error,
1284  "Tpetra::Details::FixedHashTable constructor: "
1285  "getRecommendedSize(" << numKeys << ") returned zero, "
1286  "even though the number of keys " << numKeys << " is nonzero. "
1287  "Please report this bug to the Tpetra developers.");
1288 #endif // HAVE_TPETRA_DEBUG
1289 
1290  // NOTE (mfh 14 May 2015) This method currently assumes UVM. We
1291  // could change that by setting up ptr and val as Kokkos::DualView
1292  // instances. If we do that, since we are filling on host for now,
1293  // we want to make sure that we only zero-fill ptr on host
1294  // initially, and that we don't fill val at all. Once we finish
1295  // Kokkos-izing all the set-up kernels, we won't need DualView for
1296  // either ptr or val.
1297 
1298  typename ptr_type::non_const_type ptr ("FixedHashTable::ptr", size + 1);
1299 
1300  // Allocate the array of key,value pairs. Don't waste time filling
1301  // it with zeros, because we will fill it with actual data below.
1302  using Kokkos::ViewAllocateWithoutInitializing;
1303  typedef typename val_type::non_const_type nonconst_val_type;
1304  nonconst_val_type val (ViewAllocateWithoutInitializing ("FixedHashTable::pairs"),
1305  numKeys);
1306 
1307  // Compute number of entries in each hash table position.
1308  for (offset_type k = 0; k < numKeys; ++k) {
1309  const typename hash_type::result_type hashVal =
1310  hash_type::hashFunc (keys[k], size);
1311  // Shift over one, so that counts[j] = ptr[j+1]. See below.
1312  ++ptr[hashVal+1];
1313  }
1314 
1315  // Compute row offsets via prefix sum:
1316  //
1317  // ptr[i+1] = \sum_{j=0}^{i} counts[j].
1318  //
1319  // Thus, ptr[i+1] - ptr[i] = counts[i], so that ptr[i+1] = ptr[i] +
1320  // counts[i]. If we stored counts[i] in ptr[i+1] on input, then the
1321  // formula is ptr[i+1] += ptr[i].
1322  for (offset_type i = 0; i < size; ++i) {
1323  ptr[i+1] += ptr[i];
1324  }
1325  //ptr[0] = 0; // We've already done this when initializing ptr above.
1326 
1327  // curRowStart[i] is the offset of the next element in row i.
1328  typename ptr_type::non_const_type curRowStart ("FixedHashTable::curRowStart", size);
1329 
1330  // Fill in the hash table.
1331  FHT::FillPairsResult<KeyType> result (initMinKey, initMaxKey);
1332  for (offset_type k = 0; k < numKeys; ++k) {
1333  typedef typename hash_type::result_type hash_value_type;
1334  const KeyType key = keys[k];
1335  if (key > result.maxKey_) {
1336  result.maxKey_ = key;
1337  }
1338  if (key < result.minKey_) {
1339  result.minKey_ = key;
1340  }
1341  const ValueType theVal = vals[k];
1342  if (theVal > maxVal_) {
1343  maxVal_ = theVal;
1344  }
1345  if (theVal < minVal_) {
1346  minVal_ = theVal;
1347  }
1348  const hash_value_type hashVal = hash_type::hashFunc (key, size);
1349 
1350  const offset_type offset = curRowStart[hashVal];
1351  const offset_type curPos = ptr[hashVal] + offset;
1352  if (curPos >= ptr[hashVal+1]) {
1353  result.success_ = false; // FAILURE!
1354  }
1355  else {
1356  val[curPos].first = key;
1357  val[curPos].second = theVal;
1358  ++curRowStart[hashVal];
1359  }
1360  }
1361 
1362  TEUCHOS_TEST_FOR_EXCEPTION
1363  (! result.success_, std::logic_error, "Tpetra::Details::FixedHashTable::"
1364  "init: Filling the hash table failed! Please report this bug to the "
1365  "Tpetra developers.");
1366 
1367  // "Commit" the computed arrays and other computed quantities.
1368  ptr_ = ptr;
1369  val_ = val;
1370  minKey_ = result.minKey_;
1371  maxKey_ = result.maxKey_;
1372  // We've already assigned to minVal_ and maxVal_ above.
1373 }
1374 
1375 template <class KeyType, class ValueType, class DeviceType>
1376 bool
1379 {
1380  if (! checkedForDuplicateKeys_) {
1381  hasDuplicateKeys_ = checkForDuplicateKeys ();
1382  checkedForDuplicateKeys_ = true;
1383  }
1384  return hasDuplicateKeys_;
1385 }
1386 
1387 template <class KeyType, class ValueType, class DeviceType>
1388 bool
1390 checkForDuplicateKeys () const
1391 {
1392  const offset_type size = this->getSize ();
1393  // It's allowed for the hash table to have a positive number of
1394  // buckets (getSize()), but still store no entries (numPairs()).
1395  // Both cases trivially entail no duplicates.
1396  if (size == 0 || this->numPairs () == 0) {
1397  return false;
1398  }
1399  else {
1400  typedef FHT::CheckForDuplicateKeys<ptr_type, val_type> functor_type;
1401  functor_type functor (val_, ptr_);
1402  int hasDupKeys = 0;
1403  typedef Kokkos::RangePolicy<execution_space, offset_type> range_type;
1404  Kokkos::parallel_reduce (range_type (0, size), functor, hasDupKeys);
1405  return hasDupKeys > 0;
1406  }
1407 }
1408 
1409 template <class KeyType, class ValueType, class DeviceType>
1410 std::string
1412 description () const
1413 {
1414  std::ostringstream oss;
1415  oss << "FixedHashTable<"
1416  << Teuchos::TypeNameTraits<KeyType>::name () << ","
1417  << Teuchos::TypeNameTraits<ValueType>::name () << ">: "
1418  << "{ numKeys: " << val_.extent (0)
1419  << ", tableSize: " << this->getSize () << " }";
1420  return oss.str();
1421 }
1422 
1423 template <class KeyType, class ValueType, class DeviceType>
1424 void
1426 describe (Teuchos::FancyOStream& out,
1427  const Teuchos::EVerbosityLevel verbLevel) const
1428 {
1429  using std::endl;
1430  using std::setw;
1431  using Teuchos::OSTab;
1432  using Teuchos::rcpFromRef;
1433  using Teuchos::TypeNameTraits;
1434  using Teuchos::VERB_DEFAULT;
1435  using Teuchos::VERB_NONE;
1436  using Teuchos::VERB_LOW;
1437  using Teuchos::VERB_EXTREME;
1438 
1439  // NOTE (mfh 14 May 2015) This method currently assumes UVM for
1440  // access to ptr_ and val_ from the host.
1441 
1442  Teuchos::EVerbosityLevel vl = verbLevel;
1443  if (vl == VERB_DEFAULT) vl = VERB_LOW;
1444 
1445  if (vl == VERB_NONE) {
1446  // do nothing
1447  }
1448  else if (vl == VERB_LOW) {
1449  out << this->description() << endl;
1450  }
1451  else { // MEDIUM, HIGH or EXTREME
1452  out << "FixedHashTable:" << endl;
1453  {
1454  OSTab tab1 (rcpFromRef (out));
1455 
1456  // const std::string label = this->getObjectLabel ();
1457  // if (label != "") {
1458  // out << "label: " << label << endl;
1459  // }
1460  out << "Template parameters:" << endl;
1461  {
1462  OSTab tab2 (rcpFromRef (out));
1463  out << "KeyType: " << TypeNameTraits<KeyType>::name () << endl
1464  << "ValueType: " << TypeNameTraits<ValueType>::name () << endl;
1465  }
1466 
1467  const offset_type tableSize = this->getSize ();
1468  const offset_type numKeys = val_.extent (0);
1469 
1470  out << "Table parameters:" << endl;
1471  {
1472  OSTab tab2 (rcpFromRef (out));
1473  out << "numKeys: " << numKeys << endl
1474  << "tableSize: " << tableSize << endl;
1475  }
1476 
1477  if (vl >= VERB_EXTREME) {
1478  out << "Contents: ";
1479  if (tableSize == 0 || numKeys == 0) {
1480  out << "[]" << endl;
1481  } else {
1482  out << "[ " << endl;
1483  {
1484  OSTab tab2 (rcpFromRef (out));
1485  for (offset_type i = 0; i < tableSize; ++i) {
1486  OSTab tab3 (rcpFromRef (out));
1487  out << "[";
1488  for (offset_type k = ptr_[i]; k < ptr_[i+1]; ++k) {
1489  out << "(" << val_[k].first << "," << val_[k].second << ")";
1490  if (k + 1 < ptr_[i+1]) {
1491  out << ", ";
1492  }
1493  }
1494  out << "]" << endl;
1495  } // for each table position i
1496  }
1497  out << "]" << endl;
1498  } // The table contains entries
1499  } // vl >= VERB_EXTREME
1500  }
1501  out << endl;
1502  } // if vl > VERB_LOW
1503 }
1504 
1505 } // namespace Details
1506 } // namespace Tpetra
1507 
1508 // Macro that explicitly instantiates FixedHashTable for the given local
1509 // ordinal (LO) and global ordinal (GO) types. Note that FixedHashTable's
1510 // template parameters occur in the opposite order of most Tpetra
1511 // classes. This is because FixedHashTable performs global-to-local
1512 // lookup, and the convention in templated C++ lookup tables (such as
1513 // std::map) is <KeyType, ValueType>.
1514 //
1515 // This macro must be explanded within the Tpetra::Details namespace.
1516 #define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT_DEFAULTNODE(LO,GO) \
1517  template class FixedHashTable< GO , LO >;
1518 
1519 // Macro that explicitly instantiates FixedHashTable for the given
1520 // local ordinal (LO), global ordinal (GO), and Kokkos device (DEVICE)
1521 // types. Note that FixedHashTable's first two template parameters
1522 // occur in the opposite order of most Tpetra classes. This is
1523 // because FixedHashTable performs global-to-local lookup, and the
1524 // convention in templated C++ lookup tables (such as std::map) is
1525 // <KeyType, ValueType>.
1526 //
1527 // This macro must be explanded within the Tpetra::Details namespace.
1528 #define TPETRA_DETAILS_FIXEDHASHTABLE_INSTANT(LO, GO, DEVICE) \
1529  template class FixedHashTable< GO , LO , DEVICE >;
1530 
1531 #endif // TPETRA_DETAILS_FIXEDHASHTABLE_DEF_HPP
CheckForDuplicateKeys(const pairs_view_type &pairs, const offsets_view_type &ptr)
Constructor.
Parallel reduce functor for filling the FixedHashTable, and computing the min and max keys...
FillPairs(const pairs_view_type &pairs, const counts_view_type &counts, const offsets_view_type &ptr, const keys_view_type &keys, const typename pair_type::second_type startingValue, const key_type initMinKey, const key_type initMaxKey)
Constructor that takes initial min and max key values.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
KOKKOS_INLINE_FUNCTION void join(volatile value_type &dst, const volatile value_type &src) const
Combine two intermediate reduction results.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i) const
Do this for every entry of keys_.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print this object with the given verbosity to the output stream.
ResultType result_type
Type of the return value of the hash function.
Parallel for functor for counting &quot;buckets&quot; in the FixedHashTable.
bool hasKeys() const
Whether it is safe to call getKey().
bool success_
Whether fill succeeded (it can only fail on a bug)
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
The hash function for FixedHashTable.
Declare and define the function Tpetra::Details::computeOffsetsFromCounts, an implementation detail o...
std::string description() const
Implementation of Teuchos::Describable interface.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i, value_type &dst) const
Parallel loop body; do this for every entry of keys_.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
FixedHashTable()
Default constructor; makes an empty table.
FillPairs(const pairs_view_type &pairs, const counts_view_type &counts, const offsets_view_type &ptr, const keys_view_type &keys, const typename pair_type::second_type startingValue)
Constructor.
bool hasDuplicateKeys()
Whether the table has any duplicate keys.
static KOKKOS_INLINE_FUNCTION result_type hashFunc(const argument_type &key, const offset_type &size)
The hash function.
CountBuckets(const counts_view_type &counts, const keys_view_type &keys, const size_type size)
Constructor.
KOKKOS_INLINE_FUNCTION void init(value_type &dst) const
Set the initial value of the reduction result.
Functor for checking whether a FixedHashTable has one or more duplicate entries.
Reduction result for FillPairs functor below.
KOKKOS_INLINE_FUNCTION void operator()(const size_type &i, value_type &dst) const
Parallel loop body.
Kokkos::View< const KeyType *, Kokkos::LayoutLeft, device_type > keys_type
Type of a 1-D Kokkos::View (array) used to store keys.