fix(int_mm): disable for CPU

There are still accuracy issues on AMD CPUs. A fix should be available in 2.6.
huggingface · Oct 24, 2024 · 30110b3 · 30110b3
1 parent d73696f
commit 30110b3
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/optimum/quanto/library/qbytes_mm.py b/optimum/quanto/library/qbytes_mm.py
@@ -92,7 +92,7 @@ def qbytes_mm_impl_cuda(activations: torch.Tensor, weights: torch.Tensor, output
 def qbytes_mm_impl_cpu(activations: torch.Tensor, weights: torch.Tensor, output_scales: torch.Tensor) -> torch.Tensor:
     if (
         # FIXME: accuracy issues with 2.4.x
-        version.parse(torch.__version__).release > version.parse("2.4.1").release
+        version.parse(torch.__version__).release > version.parse("2.5.0").release
         and activations.dtype == torch.int8
         and weights.dtype == torch.int8
     ):