From e4e8504366810d11bc9889d1b7677b4a0cf766d3 Mon Sep 17 00:00:00 2001
From: Evan Sultanik <evan.sultanik@trailofbits.com>
Date: Wed, 10 Nov 2021 13:04:12 -0500
Subject: [PATCH] Properly handle integer under/overflow in libmagic

---
 polyfile/arithmetic.py | 27 +++++++++++++++++++++
 polyfile/magic.py      | 54 +++++++++++++++++++++---------------------
 setup.py               |  1 +
 3 files changed, 55 insertions(+), 27 deletions(-)
 create mode 100644 polyfile/arithmetic.py

diff --git a/polyfile/arithmetic.py b/polyfile/arithmetic.py
new file mode 100644
index 00000000..546348ea
--- /dev/null
+++ b/polyfile/arithmetic.py
@@ -0,0 +1,27 @@
+from typing import Callable, Dict, Tuple
+
+import cint
+
+
+CStyleInt = cint.Cint
+
+
+INT_TYPES: Dict[Tuple[int, bool], Callable[[int], CStyleInt]] = {
+    (1, False): cint.U8,
+    (1, True): cint.I8,
+    (2, False): cint.U16,
+    (2, True): cint.I16,
+    (4, False): cint.U32,
+    (4, True): cint.I32,
+    (8, False): cint.U64,
+    (8, True): cint.I64
+}
+
+
+def make_c_style_int(value: int, num_bytes: int, signed: bool):
+    if (num_bytes, signed) not in INT_TYPES:
+        raise NotImplementedError(f"{num_bytes*8}-bit {['un',''][signed]}signed integers are not yet supported")
+    return INT_TYPES[(num_bytes, signed)](value)
+
+
+setattr(CStyleInt, "new", make_c_style_int)
diff --git a/polyfile/magic.py b/polyfile/magic.py
index af54aa85..f00ed8d8 100644
--- a/polyfile/magic.py
+++ b/polyfile/magic.py
@@ -25,9 +25,11 @@
 )
 from uuid import UUID
 
+from .arithmetic import CStyleInt, make_c_style_int
 from .iterators import LazyIterableSet
 from .logger import getStatusLogger, TRACE
 
+
 if sys.version_info < (3, 9):
     from typing import Pattern
 else:
@@ -1419,22 +1421,31 @@ class NumericOperator(Enum):
     ALL_BITS_CLEAR = ("^", lambda a, b: not (a & b))  # value from the file (a) must have clear all bits set in b
     NOT = ("!", lambda a, b: not (a == b))
 
-    def __init__(self, symbol: str, test: Union[Callable[[int, int], bool], Callable[[float, float], bool]]):
+    def __init__(self, symbol: str, test: Union[
+            Callable[[int, int], bool],
+            Callable[[float, float], bool],
+            Callable[[CStyleInt, CStyleInt], bool]
+    ]):
         self.symbol: str = symbol
-        self.test: Union[Callable[[int, int], bool], Callable[[float, float], bool]] = test
+        self.test: Union[
+            Callable[[int, int], bool], Callable[[float, float], bool], Callable[[CStyleInt, CStyleInt], bool]
+        ] = test
         NUMERIC_OPERATORS_BY_SYMBOL[symbol] = self
 
     @staticmethod
     def get(symbol: str) -> "NumericOperator":
         return NUMERIC_OPERATORS_BY_SYMBOL[symbol]
 
+    def __str__(self):
+        return self.symbol
+
 
 class NumericValue(Generic[T]):
     def __init__(self, value: T, operator: NumericOperator = NumericOperator.EQUALS):
         self.value: T = value
         self.operator: NumericOperator = operator
 
-    def test(self, to_match: T, unsigned: bool, num_bytes: int, preprocess: Callable[[int], int] = lambda x: x) -> bool:
+    def test(self, to_match: T, unsigned: bool, num_bytes: int, preprocess: Callable[[T], T] = lambda x: x) -> bool:
         return self.operator.test(preprocess(to_match), self.value)
 
     @staticmethod
@@ -1450,6 +1461,9 @@ def parse(value: str, num_bytes: int) -> "NumericValue":
             pass
         raise ValueError(f"Could not parse numeric type {value!r}")
 
+    def __str__(self):
+        return f"{self.operator}{self.value!s}"
+
 
 class NumericWildcard(NumericValue):
     def __init__(self):
@@ -1460,30 +1474,16 @@ def test(self, to_match, unsigned, num_bytes, preprocess: Callable[[int], int] =
 
 
 class IntegerValue(NumericValue[int]):
-    @staticmethod
-    def normalize_signedness(value: int, unsigned: bool, num_bytes: int) -> int:
-        bits = 8 * num_bytes
-        if unsigned:
-            max_value = (1 << bits) - 1
-            min_value = 0
-            if value < 0:
-                # convert the value to a bit-equivalent unsigned value
-                value += 2**bits
-        else:
-            max_value = (1 << bits) >> 1
-            min_value = ~max_value
-            if value > max_value:
-                # convert the value to a bit-equivalent signed value
-                value -= 2 ** bits
-        if not (min_value <= value <= max_value):
-            raise ValueError(f"Invalid integer constant {value} for comparing to a "
-                             f"{['signed', 'n unsigned'][unsigned]} {num_bytes}-byte integer")
-        return value
-
-    def test(self, to_match: int, unsigned: bool, num_bytes: int, preprocess: Callable[[int], int] = lambda x: x) -> bool:
-        to_test = IntegerValue.normalize_signedness(self.value, unsigned, num_bytes)
-        to_match = IntegerValue.normalize_signedness(preprocess(to_match), unsigned, num_bytes)
-        return self.operator.test(to_match, to_test)
+    def test(
+            self,
+            to_match: int,
+            unsigned: bool,
+            num_bytes: int,
+            preprocess: Callable[[CStyleInt], CStyleInt] = lambda x: x
+    ) -> bool:
+        to_test = make_c_style_int(value=self.value, num_bytes=num_bytes, signed=not unsigned)
+        to_match = make_c_style_int(value=to_match, num_bytes=num_bytes, signed=not unsigned)
+        return self.operator.test(preprocess(to_match), to_test)
 
     @staticmethod
     def parse(value: Union[str, bytes], num_bytes: int) -> "IntegerValue":
diff --git a/setup.py b/setup.py
index 59c34353..94b545f6 100644
--- a/setup.py
+++ b/setup.py
@@ -122,6 +122,7 @@ def update(self, n: int):
     python_requires='>=3.6',
     install_requires=[
         "dataclasses;python_version<'3.7'",  # dataclasses were only added in Python 3.7
+        'cint',
         'graphviz',
         'intervaltree',
         'jinja2',