Skip to content

Commit

Permalink
fix compiling on avx512vnni
Browse files Browse the repository at this point in the history
Passed f15 STC, VLTC:
Book: f15-base-8k
TC: 10+0.1
Total/Win/Draw/Lose: 4224 / 1872 / 966 / 1386
PTNML: 204 / 298 / 811 / 406 / 393
WinRate: 55.75%
ELO: 39.83[29.99, 49.81]
LOS: 100.00
LLR: 3.31[-2.94, 2.94]

TC: 180+1.8
Total/Win/Draw/Lose: 4314 / 1246 / 2190 / 878
PTNML: 47 / 390 / 994 / 600 / 126
WinRate: 54.27%
ELO: 29.46[22.02, 36.98]
LOS: 100.00
LLR: 3.06[-2.94, 2.94]

Passed f20 STC, VLTC:
Book: f20-base-8k
TC: 10+0.1
Total/Win/Draw/Lose: 10298 / 5436 / 4 / 4858
PTNML: 1043 / 2 / 2770 / 2 / 1332
WinRate: 52.81%
ELO: 19.18[12.28, 26.10]
LOS: 100.00
LLR: 3.07[-2.94, 2.94]

TC: 180+1.8
Total/Win/Draw/Lose: 8888 / 4709 / 21 / 4158
PTNML: 734 / 9 / 2681 / 12 / 1008
WinRate: 53.10%
ELO: 21.23[13.95, 28.53]
LOS: 100.00
LLR: 2.96[-2.94, 2.94]

Passed s15 STC, VLTC:
Book: s15-base-24k
TC: 10+0.1
Total/Win/Draw/Lose: 3820 / 1666 / 953 / 1201
PTNML: 167 / 305 / 674 / 424 / 340
WinRate: 56.09%
ELO: 42.18[32.10, 52.43]
LOS: 100.00
LLR: 3.23[-2.94, 2.94]

TC: 180+1.8
Total/Win/Draw/Lose: 3616 / 911 / 2153 / 552
PTNML: 37 / 282 / 881 / 501 / 107
WinRate: 54.96%
ELO: 34.39[26.91, 41.98]
LOS: 100.00
LLR: 3.15[-2.94, 2.94]

Passed r15 STC, VLTC:
Book: r15-base-40k
TC: 10+0.1
Total/Win/Draw/Lose: 3686 / 1910 / 381 / 1395
PTNML: 231 / 147 / 801 / 204 / 460
WinRate: 56.99%
ELO: 48.52[37.36, 59.89]
LOS: 100.00
LLR: 3.17[-2.94, 2.94]

TC: 180+1.8
Total/Win/Draw/Lose: 2734 / 1151 / 871 / 712
PTNML: 68 / 178 / 557 / 375 / 189
WinRate: 58.03%
ELO: 55.96[44.53, 67.70]
LOS: 100.00
LLR: 3.23[-2.94, 2.94]
  • Loading branch information
dhbloo committed Jun 8, 2024
1 parent ee2ed41 commit 0d1d79c
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 19 deletions.
26 changes: 15 additions & 11 deletions .github/workflows/rapfi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
target:
- {
name: linux-gcc,
os: ubuntu-20.04,
os: ubuntu-22.04,
c_compiler: gcc,
cxx_compiler: g++,
cmake_command: "",
Expand All @@ -29,8 +29,8 @@ jobs:
- {
name: linux-clang,
os: ubuntu-20.04,
c_compiler: clang,
cxx_compiler: clang++,
c_compiler: clang-18,
cxx_compiler: clang++-18,
cmake_command: "",
shell: "bash {0}"
}
Expand All @@ -55,9 +55,11 @@ jobs:
shell: "msys2 {0}"
}
arch:
- { name: avx2, cmake_command: "-DUSE_BMI2=OFF -DUSE_AVX2=ON -DUSE_AVX=ON -DUSE_SSE=ON" }
- { name: avx, cmake_command: "-DUSE_BMI2=OFF -DUSE_AVX2=OFF -DUSE_AVX=ON -DUSE_SSE=ON" }
- { name: sse, cmake_command: "-DUSE_BMI2=OFF -DUSE_AVX2=OFF -DUSE_AVX=OFF -DUSE_SSE=ON" }
- { name: sse, cmake_command: "-DUSE_AVX2=OFF" }
- { name: avx2, cmake_command: "-DUSE_AVX2=ON" }
- { name: avxvnni, cmake_command: "-DUSE_AVX2=ON -DUSE_VNNI=ON" }
- { name: avx512, cmake_command: "-DUSE_AVX2=ON -DUSE_AVX512=ON" }
- { name: avx512vnni, cmake_command: "-DUSE_AVX2=ON -DUSE_AVX512=ON -DUSE_VNNI=ON" }

defaults:
run:
Expand All @@ -71,18 +73,20 @@ jobs:
fetch-depth: 0
submodules: true

- name: Download required linux packages
if: runner.os == 'Linux'
- name: Download latest clang compiler on linux
if: matrix.target.name == 'linux-clang'
run: |
sudo apt update
sudo apt install -y libtbb-dev
wget https://apt.llvm.org/llvm.sh
chmod u+x llvm.sh
echo | sudo ./llvm.sh 18
rm llvm.sh
- name: Setup msys and install required packages
if: runner.os == 'Windows'
uses: msys2/setup-msys2@v2
with:
msystem: ${{matrix.target.msys_sys}}
install: mingw-w64-${{matrix.target.msys_env}}-${{matrix.target.c_compiler}} mingw-w64-${{matrix.target.msys_env}}-tbb mingw-w64-${{matrix.target.msys_env}}-cmake make git
install: mingw-w64-${{matrix.target.msys_env}}-${{matrix.target.c_compiler}} mingw-w64-${{matrix.target.msys_env}}-cmake make git

- name: Extract the bench hash from the commit history
run: |
Expand Down
2 changes: 1 addition & 1 deletion Rapfi/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "C
if(USE_AVX512)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mavx512dq -mavx512bw")
if(USE_VNNI)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512vnni")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512vnni -mavx512vl")
endif()
elseif(USE_AVX2)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2 -mfma")
Expand Down
51 changes: 51 additions & 0 deletions Rapfi/CMakeSettings.json
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,34 @@
}
]
},
{
"name": "x64-Release-AVX512VNNI",
"generator": "Ninja",
"configurationType": "Release",
"buildRoot": "${projectDir}\\build\\${name}",
"installRoot": "${projectDir}\\build\\install\\${name}",
"cmakeCommandArgs": "",
"buildCommandArgs": "",
"ctestCommandArgs": "",
"inheritEnvironments": [ "msvc_x64_x64" ],
"variables": [
{
"name": "ENABLE_LTO",
"value": "True",
"type": "BOOL"
},
{
"name": "USE_AVX512",
"value": "True",
"type": "BOOL"
},
{
"name": "USE_VNNI",
"value": "True",
"type": "BOOL"
}
]
},
{
"name": "x64-Release-ST",
"generator": "Ninja",
Expand Down Expand Up @@ -219,6 +247,29 @@
}
]
},
{
"name": "x64-Clang-Release-AVX512VNNI",
"generator": "Ninja",
"configurationType": "Release",
"buildRoot": "${projectDir}\\build\\${name}",
"installRoot": "${projectDir}\\build\\install\\${name}",
"cmakeCommandArgs": "",
"buildCommandArgs": "",
"ctestCommandArgs": "",
"inheritEnvironments": [ "clang_cl_x64_x64" ],
"variables": [
{
"name": "USE_AVX512",
"value": "True",
"type": "BOOL"
},
{
"name": "USE_VNNI",
"value": "True",
"type": "BOOL"
}
]
},
{
"name": "x64-Clang-Release-ST",
"generator": "Ninja",
Expand Down
10 changes: 3 additions & 7 deletions Rapfi/eval/mix9litennue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,17 +54,13 @@ constexpr int MaxOuterChanges[23] = {5, 11, 33, 107, 293, 675, 1
static Evaluation::WeightRegistry<Mix9LiteWeight> Mix9LiteWeightRegistry;

constexpr int Alignment = 16;
constexpr simd::InstructionType IT512 = getInstTypeOfWidth(simd::NativeInstType, 512);
constexpr simd::InstructionType IT256 = getInstTypeOfWidth(simd::NativeInstType, 256);
constexpr simd::InstructionType IT128 = getInstTypeOfWidth(simd::NativeInstType, 128);

template <size_t Size, typename T>
using Batch = std::conditional_t<
simd::detail::VecBatch<Size, T, IT512, true>::NumExtra == 0,
simd::detail::VecBatch<Size, T, IT512>,
std::conditional_t<simd::detail::VecBatch<Size, T, IT256, true>::NumExtra == 0,
simd::detail::VecBatch<Size, T, IT256>,
simd::detail::VecBatch<Size, T, IT128>>>;
using Batch = std::conditional_t<simd::detail::VecBatch<Size, T, IT256, true>::NumExtra == 0,
simd::detail::VecBatch<Size, T, IT256>,
simd::detail::VecBatch<Size, T, IT128>>;

template <typename FT, typename TT, typename Batch>
using Convert = simd::detail::VecCvt<FT, TT, Batch::Inst>;
Expand Down
18 changes: 18 additions & 0 deletions Rapfi/eval/simdops.h
Original file line number Diff line number Diff line change
Expand Up @@ -823,7 +823,11 @@ namespace detail {
static FORCE_INLINE void dot4_u7i8_accum(R &acc, R a, R b)
{
#if defined(USE_VNNI)
#if !defined(USE_AVX512)
acc = _mm_dpbusd_avx_epi32(acc, a, b);
#else
acc = _mm_dpbusd_epi32(acc, a, b);
#endif
#else
R product0 = simde_mm_maddubs_epi16(a, b);
product0 = simde_mm_madd_epi16(product0, simde_mm_set1_epi16(1));
Expand All @@ -840,8 +844,13 @@ namespace detail {
R low7 = simde_mm_andnot_si128(highest_bit, a);

#if defined(USE_VNNI)
#if !defined(USE_AVX512)
msb = _mm_dpbusd_avx_epi32(_mm_setzero_si128(), msb, b); // 0 or 128
low7 = _mm_dpbusd_avx_epi32(_mm_setzero_si128(), low7, b);
#else
msb = _mm_dpbusd_epi32(_mm_setzero_si128(), msb, b); // 0 or 128
low7 = _mm_dpbusd_epi32(_mm_setzero_si128(), low7, b);
#endif
#else
// Multiply a * b in two parts and accumulate neighbouring outputs into int16 values
msb = simde_mm_maddubs_epi16(msb, b); // 0 or 128
Expand Down Expand Up @@ -877,7 +886,11 @@ namespace detail {
static FORCE_INLINE void dot4_u7i8_accum(R &acc, R a, R b)
{
#if defined(USE_VNNI)
#if !defined(USE_AVX512)
acc = _mm256_dpbusd_avx_epi32(acc, a, b);
#else
acc = _mm256_dpbusd_epi32(acc, a, b);
#endif
#else
R product0 = simde_mm256_maddubs_epi16(a, b);
product0 = simde_mm256_madd_epi16(product0, simde_mm256_set1_epi16(1));
Expand All @@ -894,8 +907,13 @@ namespace detail {
R low7 = simde_mm256_andnot_si256(highest_bit, a);

#if defined(USE_VNNI)
#if !defined(USE_AVX512)
msb = _mm256_dpbusd_avx_epi32(_mm256_setzero_si256(), msb, b); // 0 or 128
low7 = _mm256_dpbusd_avx_epi32(_mm256_setzero_si256(), low7, b);
#else
msb = _mm256_dpbusd_epi32(_mm256_setzero_si256(), msb, b); // 0 or 128
low7 = _mm256_dpbusd_epi32(_mm256_setzero_si256(), low7, b);
#endif
#else
// Multiply a * b in two parts and accumulate neighbouring outputs into int16 values
msb = simde_mm256_maddubs_epi16(msb, b); // 0 or 128
Expand Down

0 comments on commit 0d1d79c

Please sign in to comment.