From 29fc1ddbf7e432645d6e897314aeed371d9197c4 Mon Sep 17 00:00:00 2001 From: Reini Urban Date: Sat, 14 Dec 2024 16:25:47 +0100 Subject: [PATCH] WIP add aesni-hash-peterrk from smhasher3 --- CMakeLists.txt | 2 +- Hashes.cpp | 2 + Hashes.h | 1 + README.md | 1 + aesnihash-peterrk.hpp | 189 ++++++++++++++++++++++++++++++++++++++++++ doc/epyc.html | 8 ++ doc/i7.html | 8 ++ doc/ryzen3.html | 8 ++ doc/table.html | 8 ++ main.cpp | 2 +- testspeed-i7.sh | 10 +-- testspeed.sh | 2 +- 12 files changed, 233 insertions(+), 8 deletions(-) create mode 100644 aesnihash-peterrk.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 9c93f857..86e34305 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -700,7 +700,6 @@ add_library( beamsplitter.cpp discohash_512.cpp xxhash.c - ${GX_SRC} metrohash/metrohash64.cpp metrohash/metrohash128.cpp cmetrohash64.c @@ -711,6 +710,7 @@ add_library( # ${FHTW_OBJ} ${T1HA_SRC} ${SHA_SRC} + ${GX_SRC} mum.cc jody_hash32.c jody_hash64.c diff --git a/Hashes.cpp b/Hashes.cpp index b9aac7e2..1c75dc07 100644 --- a/Hashes.cpp +++ b/Hashes.cpp @@ -1365,3 +1365,5 @@ void polymur_test ( const void *key, int len, uint32_t seed, void *out) { *(uint64_t*)out = polymur_hash((const uint8_t*)key, (size_t)len, &g_polymurhashparams, (uint64_t)seed); } + +#include "aesnihash-peterrk.hpp" diff --git a/Hashes.h b/Hashes.h index d3617db4..872c9cb7 100644 --- a/Hashes.h +++ b/Hashes.h @@ -539,6 +539,7 @@ inline void t1ha0_ia32aes_noavx_test(const void * key, int len, uint32_t seed, v // objsize 0-39d: 925 *(uint64_t*)out = t1ha0_ia32aes_noavx(key, len, seed); } +void aesnihash_peterrk(const void * in, int len0, uint32_t seed, void * out); #endif #if defined(__AVX__) inline void t1ha0_ia32aes_avx1_test(const void * key, int len, uint32_t seed, void * out) diff --git a/README.md b/README.md index dd5a48b7..330e06dd 100644 --- a/README.md +++ b/README.md @@ -128,6 +128,7 @@ SMhasher | [t1ha2_stream](doc/t1ha2_stream.txt) | 13673.22 | 81.12 | 263.88 (3) |1665 | Sparse, Permutation, LongNeighbors | | [t1ha2_stream128](doc/t1ha2_stream128.txt) | 13913.43 | 94.60 | 296.15 (4) |1665 | Sparse, Permutation, LongNeighbors | | [aesnihash](doc/aesnihash.txt) | 5365.60 | 57.21 | 255.87 (3) |1209 | fails many tests, machine-specific (x64 AES-NI) | +| [aesni-hash-peterrk](doc/aesni-hash-peterrk.txt)| 29107.73 | 28.86 | 217.57 (1) | | fails many tests, machine-specific (x64 AES-NI) | | [falkhash](doc/falkhash.txt) | 52401.48 | 122.70 | 316.79 (4) | 264 | Sparse, LongNeighbors, machine-specific (x64 AES-NI) | | [MeowHash](doc/MeowHash.txt) | 29969.81 | 64.90 | 273.79 (8) |1764 | Sparse, invertible, machine-specific (x64 AES-NI) | | [MeowHash64low](doc/MeowHash64low.txt) | 29438.45 | 63.76 | 269.41 (4) |1764 | Sparse, invertible, machine-specific (x64 AES-NI) | diff --git a/aesnihash-peterrk.hpp b/aesnihash-peterrk.hpp new file mode 100644 index 00000000..a992db2e --- /dev/null +++ b/aesnihash-peterrk.hpp @@ -0,0 +1,189 @@ +/* + * aesnihash from the PageBloomFilter project + * Copyright (C) 2023 Frank J. T. Wojcik + * Copyright (C) 2023 Ruan Kunliang + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "Platform.h" + +#if defined(HAVE_AESNI) +#include + +void aesnihash_peterrk(const void * in, int len0, uint32_t seed, void * out) { + auto a = _mm_set1_epi32((uint32_t)seed); + auto b = _mm_set1_epi32((uint32_t)len0); + auto m = _mm_set_epi32(0xdeadbeef, 0xffff0000, 0x01234567, 0x89abcdef); + auto s = _mm_set_epi8(3, 7, 11, 15, 2, 6, 10, 14, 1, 5, 9, 13, 0, 4, 8, 12); + const uint8_t * msg = (const uint8_t *)in; + uint32_t len = (uint32_t)len0; + + bool greed = (((uintptr_t)msg + (len - 1)) & UINT64_C(0xfff)) >= 15; // do not cross page + + if (len > 80) { + auto c = _mm_aesenc_si128(b, m); + auto d = _mm_aesdec_si128(a, m); + a = _mm_aesenc_si128(a, m); + b = _mm_aesdec_si128(b, m); + do { + a = _mm_xor_si128(a, _mm_lddqu_si128((const __m128i *)msg)); + b = _mm_xor_si128(b, _mm_lddqu_si128((const __m128i *)(msg + 16))); + c = _mm_xor_si128(c, _mm_lddqu_si128((const __m128i *)(msg + 32))); + d = _mm_xor_si128(d, _mm_lddqu_si128((const __m128i *)(msg + 48))); + a = _mm_shuffle_epi8(_mm_aesenc_si128(a, m), s); + b = _mm_shuffle_epi8(_mm_aesdec_si128(b, m), s); + c = _mm_shuffle_epi8(_mm_aesenc_si128(c, m), s); + d = _mm_shuffle_epi8(_mm_aesdec_si128(d, m), s); + msg += 64; + len -= 64; + } while (len > 80); + c = _mm_aesenc_si128(a, c); + d = _mm_aesdec_si128(b, d); + a = _mm_aesenc_si128(c, d); + b = _mm_aesdec_si128(d, c); + } + + auto mix = [&a, &b, m, s]( __m128i x ) { + a = _mm_aesenc_si128(x, a); + a = _mm_aesenc_si128(a, m); + b = _mm_shuffle_epi8(_mm_xor_si128(x, b) , s); + b = _mm_shuffle_epi8(_mm_aesdec_si128(b, m), s); + }; + + while (len >= 16) { + mix(_mm_lddqu_si128((const __m128i *)msg)); + msg += 16; + len -= 16; + } + + if (greed) { +#define GREEDILY_READ(n, addr) \ + _mm_bsrli_si128(_mm_bslli_si128(_mm_lddqu_si128((const __m128i*)addr), (16-(n))), (16-(n))) + + switch (len) { + case 15: mix(GREEDILY_READ(15, msg)); break; + case 14: mix(GREEDILY_READ(14, msg)); break; + case 13: mix(GREEDILY_READ(13, msg)); break; + case 12: mix(GREEDILY_READ(12, msg)); break; + case 11: mix(GREEDILY_READ(11, msg)); break; + case 10: mix(GREEDILY_READ(10, msg)); break; + case 9: mix(GREEDILY_READ(9, msg)); break; + case 8: mix((__m128i)_mm_load_sd((const double *)msg)); break; + case 7: mix(GREEDILY_READ(7, msg)); break; + case 6: mix(GREEDILY_READ(6, msg)); break; + case 5: mix(GREEDILY_READ(5, msg)); break; + case 4: mix((__m128i)_mm_load_ss((const float *)msg)); break; + case 3: mix(GREEDILY_READ(3, msg)); break; + case 2: mix(GREEDILY_READ(2, msg)); break; + case 1: mix(GREEDILY_READ(1, msg)); break; + case 0: + default: // try to keep m & s from register spilling + a = _mm_add_epi8(a, s); + b = _mm_add_epi8(b, m); + } + #undef GREEDILY_READ + } else { + uint64_t x = 0; + switch (len) { + case 15: + x |= ((uint64_t)msg[14]) << 48U; //FALLTHROUGH + case 14: + x |= ((uint64_t)msg[13]) << 40U; //FALLTHROUGH + case 13: + x |= ((uint64_t)msg[12]) << 32U; //FALLTHROUGH + case 12: + x |= *(const uint32_t *)(msg + 8); + mix(_mm_set_epi64x(x, *(const uint64_t *)msg)); + break; + case 11: + x |= ((uint32_t)msg[10]) << 16U; //FALLTHROUGH + case 10: + x |= ((uint32_t)msg[ 9]) << 8U; //FALLTHROUGH + case 9: + x |= msg[8]; //FALLTHROUGH + case 8: + mix(_mm_set_epi64x(x, *(const uint64_t *)msg)); + break; + case 7: + x |= ((uint64_t)msg[6]) << 48U; //FALLTHROUGH + case 6: + x |= ((uint64_t)msg[5]) << 40U; //FALLTHROUGH + case 5: + x |= ((uint64_t)msg[4]) << 32U; //FALLTHROUGH + case 4: + x |= *(const uint32_t *)msg; + mix(_mm_set_epi64x(0, x)); + break; + case 3: + x |= ((uint32_t)msg[2]) << 16U; //FALLTHROUGH + case 2: + x |= ((uint32_t)msg[1]) << 8U; //FALLTHROUGH + case 1: + x |= msg[0]; + mix(_mm_set_epi64x(0, x)); + break; + case 0: + default: // try to keep m & s from register spilling + a = _mm_add_epi8(a, s); + b = _mm_add_epi8(b, m); + } + } + + _mm_storeu_si128((__m128i *)out, _mm_aesenc_si128(a, b)); +} + +#endif + +#ifdef smhasher3 +//------------------------------------------------------------ +REGISTER_FAMILY(aesnihash_peterrk, + $.src_url = "https://github.com/PeterRK/PageBloomFilter/blob/main/src/aesni-hash.h", + $.src_status = HashFamilyInfo::SRC_ACTIVE + ); + +REGISTER_HASH(aesnihash_peterrk, + $.desc = "AESNI Hash by PeterRK", + $.impl = "aesni", + $.hash_flags = + FLAG_HASH_AES_BASED | + FLAG_HASH_SMALL_SEED | + FLAG_HASH_ENDIAN_INDEPENDENT, + $.impl_flags = + FLAG_IMPL_READ_PAST_EOB | + FLAG_IMPL_CANONICAL_BOTH | + FLAG_IMPL_LICENSE_BSD, + $.bits = 128, + $.verification_LE = 0xF06DA1B1, + $.verification_BE = 0xF06DA1B1, + $.hashfn_native = aesnihash_peterrk, + $.hashfn_bswap = aesnihash_peterrk + ); +#endif diff --git a/doc/epyc.html b/doc/epyc.html index 8b62b5bd..e182d69c 100644 --- a/doc/epyc.html +++ b/doc/epyc.html @@ -1032,6 +1032,14 @@

SMhasher

fails many tests, machine-specific (x64 AES-NI) +aesni-hash-peterrk +28435.27 +28.83 +126.43 (2) + +fails many tests, machine-specific (x64 AES-NI) + + falkhash 66674.45 83.69 diff --git a/doc/i7.html b/doc/i7.html index 31e2ebb4..45a19aae 100644 --- a/doc/i7.html +++ b/doc/i7.html @@ -1032,6 +1032,14 @@

SMhasher

fails many tests, machine-specific (x64 AES-NI) +aesni-hash-peterrk +32644.92 +22.28 +126.19 (3) + +fails many tests, machine-specific (x64 AES-NI) + + falkhash 46580.43 84.93 diff --git a/doc/ryzen3.html b/doc/ryzen3.html index 72c06fe4..b1d74a85 100644 --- a/doc/ryzen3.html +++ b/doc/ryzen3.html @@ -1016,6 +1016,14 @@

SMhasher

fails many tests, machine-specific (x64 AES-NI) +aesni-hash-peterrk +35244.97 +23.99 +481.13 (26) + +fails many tests, machine-specific (x64 AES-NI) + + falkhash 62025.17 128.28 diff --git a/doc/table.html b/doc/table.html index abc95d90..070ef287 100644 --- a/doc/table.html +++ b/doc/table.html @@ -1000,6 +1000,14 @@

SMhasher

fails many tests, machine-specific (x64 AES-NI) +aesni-hash-peterrk +29107.73 +28.86 +217.57 (1) + +fails many tests, machine-specific (x64 AES-NI) + + falkhash 52401.48 122.70 diff --git a/main.cpp b/main.cpp index 149ccd00..556fc189 100644 --- a/main.cpp +++ b/main.cpp @@ -385,7 +385,7 @@ HashInfo g_hashes[] = #if defined(HAVE_SSE2) && defined(HAVE_AESNI) && !defined(_MSC_VER) { aesnihash_test, 64, 0xA68E0D42, "aesnihash", "majek's seeded aesnihash with aesenc, 64-bit for x64", POOR, {0x70736575} }, - +{ aesnihash_peterrk, 128, 0xF06DA1B1, "aesni-hash-peterrk", "PeterRK's seeded aesnihash with aesenc, 128-bit for x64", POOR, {} }, { aesni128_test, 128, 0xF06DA1B1, "aesni", "aesni 128bit", GOOD,{} }, { aesni64_test, 64, 0x3AA1A480, "aesni-low","aesni 64bit", GOOD,{} }, #endif diff --git a/testspeed-i7.sh b/testspeed-i7.sh index 484f5d55..ceb902de 100755 --- a/testspeed-i7.sh +++ b/testspeed-i7.sh @@ -2,10 +2,10 @@ make -C build ./start-bench.sh if [ -z "$1" ]; then - test -f log.speed-i7-6820 && mv log.speed-i7-6820 log.speed-i7-6820.bak + test -f log.speed-i7 && mv log.speed-i7 log.speed-i7.bak (for g in `build/SMHasher --listnames`; do \ - build/SMHasher --test=Speed,Hashmap $g 2>&1; done) | tee log.speed-i7-6820 - ./speed.pl -h=doc/i7 log.speed-i7-6820 + build/SMHasher --test=Speed,Hashmap $g 2>&1; done) | tee log.speed-i7 + ./speed.pl -h=doc/i7 log.speed-i7 else for g in `build/SMHasher --listnames`; do for p in $@; do @@ -13,7 +13,7 @@ else build/SMHasher --test=Speed,Hashmap $g 2>&1 fi done - done | tee "log.speed-i7-6820-$1" - ./speed.pl -h=doc/i7 "log.speed-i7-6820-$1" + done | tee "log.speed-i7-$1" + ./speed.pl -h=doc/i7 "log.speed-i7-$1" fi ./stop-bench.sh diff --git a/testspeed.sh b/testspeed.sh index afcb7bde..0878cdd3 100755 --- a/testspeed.sh +++ b/testspeed.sh @@ -1,6 +1,6 @@ #!/bin/bash hname="`hostname`" -if [ x$hname = xe495 ]; then +if [ x$hname = xe495-reini ] || [ x$hname = xe495 ]; then ./testspeed-ryzen3.sh $@ exit fi