-
-
Notifications
You must be signed in to change notification settings - Fork 180
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
69 additions
and
57 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -696,6 +696,7 @@ add_library( | |
${T1HA_SRC} | ||
${SHA_SRC} | ||
mum.cc | ||
mum_v3.cc | ||
jody_hash32.c | ||
jody_hash64.c | ||
${TSIP_SRC} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
#define MUM_V1 | ||
#include "mum.h" | ||
|
||
void mum_hash_test(const void *key, int len, uint32_t seed, void *out) { | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
/* Copyright (c) 2016 Vladimir Makarov <[email protected]> | ||
/* Copyright (c) 2016, 2017, 2018 | ||
Vladimir Makarov <[email protected]> | ||
Permission is hereby granted, free of charge, to any person | ||
obtaining a copy of this software and associated documentation | ||
|
@@ -58,11 +59,7 @@ typedef unsigned __int64 uint64_t; | |
|
||
#ifdef __GNUC__ | ||
#define _MUM_ATTRIBUTE_UNUSED __attribute__((unused)) | ||
# ifdef __clang__ | ||
# define _MUM_OPTIMIZE(opts) | ||
# else | ||
# define _MUM_OPTIMIZE(opts) __attribute__((__optimize__ (opts))) | ||
# endif | ||
#define _MUM_OPTIMIZE(opts) __attribute__((__optimize__ (opts))) | ||
#define _MUM_TARGET(opts) __attribute__((__target__ (opts))) | ||
#else | ||
#define _MUM_ATTRIBUTE_UNUSED | ||
|
@@ -83,10 +80,6 @@ typedef unsigned __int64 uint64_t; | |
#endif | ||
#endif | ||
|
||
#if defined(__GNUC__) && ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 9) || (__GNUC__ > 4)) | ||
#define _MUM_FRESH_GCC | ||
#endif | ||
|
||
/* Here are different primes randomly generated with the equal | ||
probability of their bit values. They are used to randomize input | ||
values. */ | ||
|
@@ -97,7 +90,7 @@ static uint64_t _mum_unroll_prime = 0x7b51ec3d22f7096fULL; | |
static uint64_t _mum_tail_prime = 0xaf47d47c99b1461bULL; | ||
static uint64_t _mum_finish_prime1 = 0xa9a7ae7ceff79f3fULL; | ||
static uint64_t _mum_finish_prime2 = 0xaf47d47c99b1461bULL; | ||
|
||
static uint64_t _mum_primes [] = { | ||
0X9ebdcae10d981691, 0X32b9b9b97a27ac7d, 0X29b5584d83d35bbd, 0X4b04e0e61401255f, | ||
0X25e8f7b1f1c9d027, 0X80d4c8c000f3e881, 0Xbd1255431904b9dd, 0X8a3bd4485eee6d81, | ||
|
@@ -116,7 +109,7 @@ _mum (uint64_t v, uint64_t p) { | |
multiplication. If we use a generic code we actually call a | ||
function doing 128x128->128 bit multiplication. The function is | ||
very slow. */ | ||
lo = v * p, hi; | ||
lo = v * p; | ||
asm ("umulh %0, %1, %2" : "=r" (hi) : "r" (v), "r" (p)); | ||
#else | ||
__uint128_t r = (__uint128_t) v * (__uint128_t) p; | ||
|
@@ -133,7 +126,7 @@ _mum (uint64_t v, uint64_t p) { | |
uint64_t rm_1 = hp * lv; | ||
uint64_t rl = lv * lp; | ||
uint64_t t, carry = 0; | ||
|
||
/* We could ignore a carry bit here if we did not care about the | ||
same hash for 32-bit and 64-bit targets. */ | ||
t = rl + (rm_0 << 32); | ||
|
@@ -190,6 +183,17 @@ _mum_le32 (uint32_t v) { | |
#endif | ||
} | ||
|
||
static inline uint64_t | ||
_mum_le16 (uint16_t v) { | ||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH) | ||
return v; | ||
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ | ||
return (v >> 8) | ((v & 0xff) << 8); | ||
#else | ||
#error "Unknown endianess" | ||
#endif | ||
} | ||
|
||
/* Macro defining how many times the most nested loop in | ||
_mum_hash_aligned will be unrolled by the compiler (although it can | ||
make an own decision:). Use only a constant here to help a | ||
|
@@ -203,8 +207,10 @@ _mum_le32 (uint32_t v) { | |
#define _MUM_UNROLL_FACTOR_POWER 3 | ||
#elif defined(__aarch64__) && !defined(MUM_TARGET_INDEPENDENT_HASH) | ||
#define _MUM_UNROLL_FACTOR_POWER 4 | ||
#else | ||
#elif defined (MUM_V1) || defined (MUM_V2) | ||
#define _MUM_UNROLL_FACTOR_POWER 2 | ||
#else | ||
#define _MUM_UNROLL_FACTOR_POWER 3 | ||
#endif | ||
#endif | ||
|
||
|
@@ -216,21 +222,35 @@ _mum_le32 (uint32_t v) { | |
|
||
#define _MUM_UNROLL_FACTOR (1 << _MUM_UNROLL_FACTOR_POWER) | ||
|
||
/* Rotate V left by SH. */ | ||
static inline uint64_t _mum_rotl (uint64_t v, int sh) { | ||
return v << sh | v >> (64 - sh); | ||
} | ||
|
||
static inline uint64_t _MUM_OPTIMIZE("unroll-loops") | ||
_mum_hash_aligned (uint64_t start, const void* key, size_t len) { | ||
_mum_hash_aligned (uint64_t start, const void *key, size_t len) { | ||
uint64_t result = start; | ||
const unsigned char* str = (const unsigned char*) key; | ||
const unsigned char *str = (const unsigned char *) key; | ||
uint64_t u64; | ||
size_t i; | ||
size_t n; | ||
|
||
|
||
#ifndef MUM_V2 | ||
result = _mum (result, _mum_block_start_prime); | ||
#endif | ||
while (len > _MUM_UNROLL_FACTOR * sizeof (uint64_t)) { | ||
/* This loop could be vectorized when we have vector insns for | ||
64x64->128-bit multiplication. AVX2 currently only have a | ||
vector insn for 4 32x32->64-bit multiplication. */ | ||
64x64->128-bit multiplication. AVX2 currently only have vector | ||
insns for 4 32x32->64-bit multiplication and for 1 | ||
64x64->128-bit multiplication (pclmulqdq). */ | ||
#if defined (MUM_V1) || defined (MUM_V2) | ||
for (i = 0; i < _MUM_UNROLL_FACTOR; i++) | ||
result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]); | ||
#else | ||
for (i = 0; i < _MUM_UNROLL_FACTOR; i += 2) | ||
result ^= _mum (_mum_le (((uint64_t *) str)[i]) ^ _mum_primes[i], | ||
_mum_le (((uint64_t *) str)[i + 1]) ^ _mum_primes[i + 1]); | ||
#endif | ||
len -= _MUM_UNROLL_FACTOR * sizeof (uint64_t); | ||
str += _MUM_UNROLL_FACTOR * sizeof (uint64_t); | ||
/* We will use the same prime numbers on the next iterations -- | ||
|
@@ -244,14 +264,12 @@ _mum_hash_aligned (uint64_t start, const void* key, size_t len) { | |
switch (len) { | ||
case 7: | ||
u64 = _mum_le32 (*(uint32_t *) str); | ||
u64 |= (uint64_t) str[4] << 32; | ||
u64 |= (uint64_t) str[5] << 40; | ||
u64 |= _mum_le16 (*(uint16_t *) (str + 4)) << 32; | ||
u64 |= (uint64_t) str[6] << 48; | ||
return result ^ _mum (u64, _mum_tail_prime); | ||
case 6: | ||
u64 = _mum_le32 (*(uint32_t *) str); | ||
u64 |= (uint64_t) str[4] << 32; | ||
u64 |= (uint64_t) str[5] << 40; | ||
u64 |= _mum_le16 (*(uint16_t *) (str + 4)) << 32; | ||
return result ^ _mum (u64, _mum_tail_prime); | ||
case 5: | ||
u64 = _mum_le32 (*(uint32_t *) str); | ||
|
@@ -261,13 +279,11 @@ _mum_hash_aligned (uint64_t start, const void* key, size_t len) { | |
u64 = _mum_le32 (*(uint32_t *) str); | ||
return result ^ _mum (u64, _mum_tail_prime); | ||
case 3: | ||
u64 = str[0]; | ||
u64 |= (uint64_t) str[1] << 8; | ||
u64 = _mum_le16 (*(uint16_t *) str); | ||
u64 |= (uint64_t) str[2] << 16; | ||
return result ^ _mum (u64, _mum_tail_prime); | ||
case 2: | ||
u64 = str[0]; | ||
u64 |= (uint64_t) str[1] << 8; | ||
u64 = _mum_le16 (*(uint16_t *) str); | ||
return result ^ _mum (u64, _mum_tail_prime); | ||
case 1: | ||
u64 = str[0]; | ||
|
@@ -279,23 +295,18 @@ _mum_hash_aligned (uint64_t start, const void* key, size_t len) { | |
/* Final randomization of H. */ | ||
static inline uint64_t | ||
_mum_final (uint64_t h) { | ||
#if defined (MUM_V1) | ||
h ^= _mum (h, _mum_finish_prime1); | ||
h ^= _mum (h, _mum_finish_prime2); | ||
#elif defined (MUM_V2) | ||
h ^= _mum_rotl (h, 33); | ||
h ^= _mum (h, _mum_finish_prime1); | ||
#else | ||
h = _mum (h, h); | ||
#endif | ||
return h; | ||
} | ||
|
||
#if defined(__x86_64__) && defined(_MUM_FRESH_GCC) | ||
|
||
/* We want to use AVX2 insn MULX instead of generic x86-64 MULQ where | ||
it is possible. Although on modern Intel processors MULQ takes | ||
3-cycles vs. 4 for MULX, MULX permits more freedom in insn | ||
scheduling as it uses less fixed registers. */ | ||
static inline uint64_t _MUM_TARGET("arch=haswell") | ||
_mum_hash_avx2 (const void * key, size_t len, uint64_t seed) { | ||
return _mum_final (_mum_hash_aligned (seed + len, key, len)); | ||
} | ||
#endif | ||
|
||
#ifndef _MUM_UNALIGNED_ACCESS | ||
#if defined(__x86_64__) || defined(__i386__) || defined(__PPC64__) \ | ||
|| defined(__s390__) || defined(__m32c__) || defined(cris) \ | ||
|
@@ -320,16 +331,16 @@ _mum_hash_avx2 (const void * key, size_t len, uint64_t seed) { | |
|
||
static inline uint64_t | ||
#if defined(__x86_64__) | ||
//_MUM_TARGET("inline-all-stringops") | ||
_MUM_TARGET("inline-all-stringops") | ||
#endif | ||
_mum_hash_default (const void *key, size_t len, uint64_t seed) { | ||
uint64_t result; | ||
const unsigned char *str = (const unsigned char *) key; | ||
size_t block_len; | ||
uint64_t buf[_MUM_BLOCK_LEN / sizeof (uint64_t)]; | ||
|
||
result = seed + len; | ||
if (_MUM_UNALIGNED_ACCESS || ((size_t) str & 0x7) == 0) | ||
if (((size_t) str & 0x7) == 0) | ||
result = _mum_hash_aligned (result, key, len); | ||
else { | ||
while (len != 0) { | ||
|
@@ -347,7 +358,7 @@ static inline uint64_t | |
_mum_next_factor (void) { | ||
uint64_t start = 0; | ||
int i; | ||
|
||
for (i = 0; i < 8; i++) | ||
start = (start << 8) | rand() % 256; | ||
return start; | ||
|
@@ -380,8 +391,7 @@ mum_hash_init (uint64_t seed) { | |
|
||
/* Process data KEY with the state H and return the updated state. */ | ||
static inline uint64_t | ||
mum_hash_step (uint64_t h, uint64_t key) | ||
{ | ||
mum_hash_step (uint64_t h, uint64_t key) { | ||
return _mum (h, _mum_hash_step_prime) ^ _mum (key, _mum_key_step_prime); | ||
} | ||
|
||
|
@@ -402,19 +412,11 @@ mum_hash64 (uint64_t key, uint64_t seed) { | |
target endianess and the unroll factor. */ | ||
static inline uint64_t | ||
mum_hash (const void *key, size_t len, uint64_t seed) { | ||
#if defined(__x86_64__) && defined(_MUM_FRESH_GCC) | ||
static int avx2_support = 0; | ||
|
||
if (avx2_support > 0) | ||
return _mum_hash_avx2 (key, len, seed); | ||
else if (! avx2_support) { | ||
__builtin_cpu_init (); | ||
avx2_support = __builtin_cpu_supports ("avx2") ? 1 : -1; | ||
if (avx2_support > 0) | ||
return _mum_hash_avx2 (key, len, seed); | ||
} | ||
#endif | ||
#if _MUM_UNALIGNED_ACCESS | ||
return _mum_final (_mum_hash_aligned (seed + len, key, len)); | ||
#else | ||
return _mum_hash_default (key, len, seed); | ||
#endif | ||
} | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
#include "mum.h" | ||
|
||
void mum_v3_hash_test(const void *key, int len, uint32_t seed, void *out) { | ||
*(uint64_t *)out = mum_hash(key, len, seed); | ||
} |