Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AVX512 in FLOPS test #16

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion Phenom2_Benchmark/AVXChecker.asm
Original file line number Diff line number Diff line change
@@ -1,4 +1,20 @@
.code
; Checks CPUID for AVX512 capability
GetAVX512Capability proc
push rbx

mov eax, 7
xor ecx, ecx
cpuid

mov eax, ebx
shr eax, 16 ; Read bit 16
and eax, 1

pop rbx
ret
GetAVX512Capability endp

; Checks CPUID for AVX capability
GetAVXCapability proc
push rbx
Expand All @@ -16,7 +32,7 @@ GetAVXCapability endp

; Checks CPUID for SSE capability
GetSSECapability proc
push rbx
push rbx

mov eax, 1
cpuid
Expand Down
68 changes: 68 additions & 0 deletions Phenom2_Benchmark/FLOPS.asm
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,17 @@ ADD_AVX macro
vaddps ymm8, ymm8, ymm0
endm

ADD_AVX512 macro
vaddps zmm1, zmm1, zmm0
vaddps zmm2, zmm2, zmm0
vaddps zmm3, zmm3, zmm0
vaddps zmm4, zmm4, zmm0
vaddps zmm5, zmm5, zmm0
vaddps zmm6, zmm6, zmm0
vaddps zmm7, zmm7, zmm0
vaddps zmm8, zmm8, zmm0
endm

FLOPS_SSE proc
push rbx
push rdi
Expand Down Expand Up @@ -127,4 +138,61 @@ LoopHead:
ret
FLOPS_AVX endp

FLOPS_AVX512 proc
push rbx
push rdi
push rsi
push rbp
push r11
push r8
push r9
push r10
push r12

mov r12, (1024*1024*1024)/(128 * 16) ; x16 for AVX512

; Set all the AVX512 regs to 0.0
vxorps zmm0, zmm0, zmm0
vxorps zmm1, zmm1, zmm1
vxorps zmm2, zmm2, zmm2
vxorps zmm3, zmm3, zmm3
vxorps zmm4, zmm4, zmm4
vxorps zmm5, zmm5, zmm5
vxorps zmm6, zmm6, zmm6
vxorps zmm7, zmm7, zmm7
vxorps zmm8, zmm8, zmm8

LoopHead:
ADD_AVX512
ADD_AVX512
ADD_AVX512
ADD_AVX512
ADD_AVX512
ADD_AVX512
ADD_AVX512
ADD_AVX512
ADD_AVX512
ADD_AVX512
ADD_AVX512
ADD_AVX512
ADD_AVX512
ADD_AVX512
ADD_AVX512
ADD_AVX512

dec r12
jnz LoopHead

pop r12
pop r10
pop r9
pop r8
pop r11
pop rbp
pop rsi
pop rdi
pop rbx
ret
FLOPS_AVX512 endp

end
11 changes: 8 additions & 3 deletions Phenom2_Benchmark/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <vector>

// Checker function for AVX and SSE
extern "C" bool GetAVX512Capability();
extern "C" bool GetAVXCapability();
extern "C" bool GetSSECapability();

Expand All @@ -17,6 +18,7 @@ extern "C" void PADDB_MMX(int threadID);
extern "C" void CMOVcc_REG_REG(int threadID);
extern "C" void FLOPS_SSE(int threadID);
extern "C" void FLOPS_AVX(int threadID);
extern "C" void FLOPS_AVX512(int threadID);

// MUL added for Reirei
extern "C" void IMUL_REG_REG(int threadID);
Expand Down Expand Up @@ -85,6 +87,7 @@ int main()
};

// Check for AVX and SSE capability
bool AVX512_CAPABLE = GetAVX512Capability();
bool AVX_CAPABLE = GetAVXCapability();
bool SSE_CAPABLE = GetSSECapability();

Expand All @@ -107,6 +110,10 @@ int main()
std::cout<<"AMD Quad Core Phenom II 810 from the year 2009."<< std::endl;
std::cout<<std::endl;

if (AVX512_CAPABLE)
std::cout << "AVX512 CPU detected!" << std::endl;
else
std::cout << "No AVX512 support detected" << std::endl;
if (AVX_CAPABLE)
std::cout<<"AVX CPU detected!"<< std::endl;
else
Expand Down Expand Up @@ -155,9 +162,7 @@ int main()
case 3: currentFunction = SHR_REG_CL; break;
case 4: currentFunction = PADDB_MMX; break;
case 5: currentFunction = CMOVcc_REG_REG; break;
case 6: currentFunction = (void (*)(int)) // Select SSE or AVX
((unsigned long long)FLOPS_SSE * !AVX_CAPABLE +
(unsigned long long)FLOPS_AVX * AVX_CAPABLE); break;
case 6: currentFunction = (AVX512_CAPABLE ? FLOPS_AVX512 : (AVX_CAPABLE ? FLOPS_AVX : FLOPS_SSE)); break;
case 7: currentFunction = IMUL_REG_REG; break;
case 8: threadCount = (1 * (threadCount != 1)) + // Toggle threaded or single thread
(std::thread::hardware_concurrency() * (threadCount == 1)); break;
Expand Down
Binary file modified x64/Release/Phenom2_Benchmark.exe
Binary file not shown.