Tpetra parallel linear algebra  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
MurmurHash3.cpp
1 // @HEADER
2 // *****************************************************************************
3 // Tpetra: Templated Linear Algebra Services Package
4 //
5 // Copyright 2008 NTESS and the Tpetra contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 //-----------------------------------------------------------------------------
11 // MurmurHash3 was written by Austin Appleby, and is placed in the public
12 // domain. The author hereby disclaims copyright to this source code.
13 
14 // Note - The x86 and x64 versions do _not_ produce the same results, as the
15 // algorithms are optimized for their respective platforms. You can still
16 // compile and run any of them on any platform, but your performance with the
17 // non-native version will be less than optimal.
18 
19 #include "MurmurHash3.hpp"
20 
21 //-----------------------------------------------------------------------------
22 // Platform-specific functions and macros
23 
24 // Microsoft Visual Studio
25 #if defined(_MSC_VER)
26 
27 #define FORCE_INLINE __forceinline
28 
29 #include <stdlib.h>
30 
31 #define ROTL32(x, y) _rotl(x, y)
32 #define ROTL64(x, y) _rotl64(x, y)
33 
34 #define BIG_CONSTANT(x) (x)
35 
36 // Other compilers
37 
38 #else // not defined(_MSC_VER)
39 
40 namespace { // anonymous
41 
42 inline uint32_t rotl32(uint32_t x, int8_t r) {
43  return (x << r) | (x >> (32 - r));
44 }
45 
46 inline uint64_t rotl64(uint64_t x, int8_t r) {
47  return (x << r) | (x >> (64 - r));
48 }
49 
50 } // namespace
51 
52 #define ROTL32(x, y) rotl32(x, y)
53 #define ROTL64(x, y) rotl64(x, y)
54 
55 #define BIG_CONSTANT(x) (x##LLU)
56 
57 #endif // !defined(_MSC_VER)
58 
59 //-----------------------------------------------------------------------------
60 // Block read - if your platform needs to do endian-swapping or can only
61 // handle aligned reads, do the conversion here
62 
63 #define GETBLOCK(lhs, p, i) \
64  { \
65  lhs = p[(i)]; \
66  }
67 
68 //-----------------------------------------------------------------------------
69 // Finalization mix - force all bits of a hash block to avalanche
70 
71 #define FMIX_32(h) \
72  { \
73  uint32_t t_h = (h); \
74  t_h ^= t_h >> 16; \
75  t_h *= 0x85ebca6b; \
76  t_h ^= t_h >> 13; \
77  t_h *= 0xc2b2ae35; \
78  t_h ^= t_h >> 16; \
79  h = t_h; \
80  }
81 
82 //----------
83 
84 #define FMIX_64(k) \
85  { \
86  uint64_t t_k = (k); \
87  t_k ^= t_k >> 33; \
88  t_k *= BIG_CONSTANT(0xff51afd7ed558ccd); \
89  t_k ^= t_k >> 33; \
90  t_k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); \
91  t_k ^= t_k >> 33; \
92  k = t_k; \
93  }
94 
95 //-----------------------------------------------------------------------------
96 
97 namespace Tpetra {
98 namespace Details {
99 
100 void MurmurHash3_x86_32(const void *key, int len,
101  uint32_t seed, void *out) {
102  const uint8_t *data = (const uint8_t *)key;
103  const int nblocks = len / 4;
104 
105  uint32_t h1 = seed;
106 
107  const uint32_t c1 = 0xcc9e2d51;
108  const uint32_t c2 = 0x1b873593;
109 
110  //----------
111  // body
112 
113  const uint32_t *blocks = (const uint32_t *)(data + nblocks * 4);
114 
115  for (int i = -nblocks; i; i++) {
116  uint32_t k1;
117  GETBLOCK(k1, blocks, i);
118 
119  k1 *= c1;
120  k1 = ROTL32(k1, 15);
121  k1 *= c2;
122 
123  h1 ^= k1;
124  h1 = ROTL32(h1, 13);
125  h1 = h1 * 5 + 0xe6546b64;
126  }
127 
128  //----------
129  // tail
130 
131  const uint8_t *tail = (const uint8_t *)(data + nblocks * 4);
132 
133  uint32_t k1 = 0;
134 
135  switch (len & 3) {
136  case 3: k1 ^= tail[2] << 16;
137  case 2: k1 ^= tail[1] << 8;
138  case 1:
139  k1 ^= tail[0];
140  k1 *= c1;
141  k1 = ROTL32(k1, 15);
142  k1 *= c2;
143  h1 ^= k1;
144  };
145 
146  //----------
147  // finalization
148 
149  h1 ^= len;
150 
151  FMIX_32(h1);
152 
153  *(uint32_t *)out = h1;
154 }
155 
156 //-----------------------------------------------------------------------------
157 
158 void MurmurHash3_x86_128(const void *key, const int len,
159  uint32_t seed, void *out) {
160  const uint8_t *data = (const uint8_t *)key;
161  const int nblocks = len / 16;
162 
163  uint32_t h1 = seed;
164  uint32_t h2 = seed;
165  uint32_t h3 = seed;
166  uint32_t h4 = seed;
167 
168  const uint32_t c1 = 0x239b961b;
169  const uint32_t c2 = 0xab0e9789;
170  const uint32_t c3 = 0x38b34ae5;
171  const uint32_t c4 = 0xa1e38b93;
172 
173  //----------
174  // body
175 
176  const uint32_t *blocks = (const uint32_t *)(data + nblocks * 16);
177 
178  for (int i = -nblocks; i; i++) {
179  uint32_t k1, k2, k3, k4;
180  GETBLOCK(k1, blocks, i * 4 + 0);
181  GETBLOCK(k2, blocks, i * 4 + 1);
182  GETBLOCK(k3, blocks, i * 4 + 2);
183  GETBLOCK(k4, blocks, i * 4 + 3);
184 
185  k1 *= c1;
186  k1 = ROTL32(k1, 15);
187  k1 *= c2;
188  h1 ^= k1;
189 
190  h1 = ROTL32(h1, 19);
191  h1 += h2;
192  h1 = h1 * 5 + 0x561ccd1b;
193 
194  k2 *= c2;
195  k2 = ROTL32(k2, 16);
196  k2 *= c3;
197  h2 ^= k2;
198 
199  h2 = ROTL32(h2, 17);
200  h2 += h3;
201  h2 = h2 * 5 + 0x0bcaa747;
202 
203  k3 *= c3;
204  k3 = ROTL32(k3, 17);
205  k3 *= c4;
206  h3 ^= k3;
207 
208  h3 = ROTL32(h3, 15);
209  h3 += h4;
210  h3 = h3 * 5 + 0x96cd1c35;
211 
212  k4 *= c4;
213  k4 = ROTL32(k4, 18);
214  k4 *= c1;
215  h4 ^= k4;
216 
217  h4 = ROTL32(h4, 13);
218  h4 += h1;
219  h4 = h4 * 5 + 0x32ac3b17;
220  }
221 
222  //----------
223  // tail
224 
225  const uint8_t *tail = (const uint8_t *)(data + nblocks * 16);
226 
227  uint32_t k1 = 0;
228  uint32_t k2 = 0;
229  uint32_t k3 = 0;
230  uint32_t k4 = 0;
231 
232  switch (len & 15) {
233  case 15: k4 ^= tail[14] << 16;
234  case 14: k4 ^= tail[13] << 8;
235  case 13:
236  k4 ^= tail[12] << 0;
237  k4 *= c4;
238  k4 = ROTL32(k4, 18);
239  k4 *= c1;
240  h4 ^= k4;
241 
242  case 12: k3 ^= tail[11] << 24;
243  case 11: k3 ^= tail[10] << 16;
244  case 10: k3 ^= tail[9] << 8;
245  case 9:
246  k3 ^= tail[8] << 0;
247  k3 *= c3;
248  k3 = ROTL32(k3, 17);
249  k3 *= c4;
250  h3 ^= k3;
251 
252  case 8: k2 ^= tail[7] << 24;
253  case 7: k2 ^= tail[6] << 16;
254  case 6: k2 ^= tail[5] << 8;
255  case 5:
256  k2 ^= tail[4] << 0;
257  k2 *= c2;
258  k2 = ROTL32(k2, 16);
259  k2 *= c3;
260  h2 ^= k2;
261 
262  case 4: k1 ^= tail[3] << 24;
263  case 3: k1 ^= tail[2] << 16;
264  case 2: k1 ^= tail[1] << 8;
265  case 1:
266  k1 ^= tail[0] << 0;
267  k1 *= c1;
268  k1 = ROTL32(k1, 15);
269  k1 *= c2;
270  h1 ^= k1;
271  };
272 
273  //----------
274  // finalization
275 
276  h1 ^= len;
277  h2 ^= len;
278  h3 ^= len;
279  h4 ^= len;
280 
281  h1 += h2;
282  h1 += h3;
283  h1 += h4;
284  h2 += h1;
285  h3 += h1;
286  h4 += h1;
287 
288  FMIX_32(h1);
289  FMIX_32(h2);
290  FMIX_32(h3);
291  FMIX_32(h4);
292 
293  h1 += h2;
294  h1 += h3;
295  h1 += h4;
296  h2 += h1;
297  h3 += h1;
298  h4 += h1;
299 
300  ((uint32_t *)out)[0] = h1;
301  ((uint32_t *)out)[1] = h2;
302  ((uint32_t *)out)[2] = h3;
303  ((uint32_t *)out)[3] = h4;
304 }
305 
306 //-----------------------------------------------------------------------------
307 
308 void MurmurHash3_x64_128(const void *key, const int len,
309  const uint32_t seed, void *out) {
310  const uint8_t *data = (const uint8_t *)key;
311  const int nblocks = len / 16;
312 
313  uint64_t h1 = seed;
314  uint64_t h2 = seed;
315 
316  const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
317  const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
318 
319  //----------
320  // body
321 
322  const uint64_t *blocks = (const uint64_t *)(data);
323 
324  for (int i = 0; i < nblocks; i++) {
325  uint64_t k1, k2;
326  GETBLOCK(k1, blocks, i * 2 + 0);
327  GETBLOCK(k2, blocks, i * 2 + 1);
328 
329  k1 *= c1;
330  k1 = ROTL64(k1, 31);
331  k1 *= c2;
332  h1 ^= k1;
333 
334  h1 = ROTL64(h1, 27);
335  h1 += h2;
336  h1 = h1 * 5 + 0x52dce729;
337 
338  k2 *= c2;
339  k2 = ROTL64(k2, 33);
340  k2 *= c1;
341  h2 ^= k2;
342 
343  h2 = ROTL64(h2, 31);
344  h2 += h1;
345  h2 = h2 * 5 + 0x38495ab5;
346  }
347 
348  //----------
349  // tail
350 
351  const uint8_t *tail = (const uint8_t *)(data + nblocks * 16);
352 
353  uint64_t k1 = 0;
354  uint64_t k2 = 0;
355 
356  switch (len & 15) {
357  case 15: k2 ^= uint64_t(tail[14]) << 48;
358  case 14: k2 ^= uint64_t(tail[13]) << 40;
359  case 13: k2 ^= uint64_t(tail[12]) << 32;
360  case 12: k2 ^= uint64_t(tail[11]) << 24;
361  case 11: k2 ^= uint64_t(tail[10]) << 16;
362  case 10: k2 ^= uint64_t(tail[9]) << 8;
363  case 9:
364  k2 ^= uint64_t(tail[8]) << 0;
365  k2 *= c2;
366  k2 = ROTL64(k2, 33);
367  k2 *= c1;
368  h2 ^= k2;
369 
370  case 8: k1 ^= uint64_t(tail[7]) << 56;
371  case 7: k1 ^= uint64_t(tail[6]) << 48;
372  case 6: k1 ^= uint64_t(tail[5]) << 40;
373  case 5: k1 ^= uint64_t(tail[4]) << 32;
374  case 4: k1 ^= uint64_t(tail[3]) << 24;
375  case 3: k1 ^= uint64_t(tail[2]) << 16;
376  case 2: k1 ^= uint64_t(tail[1]) << 8;
377  case 1:
378  k1 ^= uint64_t(tail[0]) << 0;
379  k1 *= c1;
380  k1 = ROTL64(k1, 31);
381  k1 *= c2;
382  h1 ^= k1;
383  };
384 
385  //----------
386  // finalization
387 
388  h1 ^= len;
389  h2 ^= len;
390 
391  h1 += h2;
392  h2 += h1;
393 
394  FMIX_64(h1);
395  FMIX_64(h2);
396 
397  h1 += h2;
398  h2 += h1;
399 
400  ((uint64_t *)out)[0] = h1;
401  ((uint64_t *)out)[1] = h2;
402 }
403 
404 } // namespace Details
405 } // namespace Tpetra
406 
407 //-----------------------------------------------------------------------------