From e4e8504366810d11bc9889d1b7677b4a0cf766d3 Mon Sep 17 00:00:00 2001 From: Evan Sultanik Date: Wed, 10 Nov 2021 13:04:12 -0500 Subject: [PATCH] Properly handle integer under/overflow in libmagic --- polyfile/arithmetic.py | 27 +++++++++++++++++++++ polyfile/magic.py | 54 +++++++++++++++++++++--------------------- setup.py | 1 + 3 files changed, 55 insertions(+), 27 deletions(-) create mode 100644 polyfile/arithmetic.py diff --git a/polyfile/arithmetic.py b/polyfile/arithmetic.py new file mode 100644 index 00000000..546348ea --- /dev/null +++ b/polyfile/arithmetic.py @@ -0,0 +1,27 @@ +from typing import Callable, Dict, Tuple + +import cint + + +CStyleInt = cint.Cint + + +INT_TYPES: Dict[Tuple[int, bool], Callable[[int], CStyleInt]] = { + (1, False): cint.U8, + (1, True): cint.I8, + (2, False): cint.U16, + (2, True): cint.I16, + (4, False): cint.U32, + (4, True): cint.I32, + (8, False): cint.U64, + (8, True): cint.I64 +} + + +def make_c_style_int(value: int, num_bytes: int, signed: bool): + if (num_bytes, signed) not in INT_TYPES: + raise NotImplementedError(f"{num_bytes*8}-bit {['un',''][signed]}signed integers are not yet supported") + return INT_TYPES[(num_bytes, signed)](value) + + +setattr(CStyleInt, "new", make_c_style_int) diff --git a/polyfile/magic.py b/polyfile/magic.py index af54aa85..f00ed8d8 100644 --- a/polyfile/magic.py +++ b/polyfile/magic.py @@ -25,9 +25,11 @@ ) from uuid import UUID +from .arithmetic import CStyleInt, make_c_style_int from .iterators import LazyIterableSet from .logger import getStatusLogger, TRACE + if sys.version_info < (3, 9): from typing import Pattern else: @@ -1419,22 +1421,31 @@ class NumericOperator(Enum): ALL_BITS_CLEAR = ("^", lambda a, b: not (a & b)) # value from the file (a) must have clear all bits set in b NOT = ("!", lambda a, b: not (a == b)) - def __init__(self, symbol: str, test: Union[Callable[[int, int], bool], Callable[[float, float], bool]]): + def __init__(self, symbol: str, test: Union[ + Callable[[int, int], bool], + Callable[[float, float], bool], + Callable[[CStyleInt, CStyleInt], bool] + ]): self.symbol: str = symbol - self.test: Union[Callable[[int, int], bool], Callable[[float, float], bool]] = test + self.test: Union[ + Callable[[int, int], bool], Callable[[float, float], bool], Callable[[CStyleInt, CStyleInt], bool] + ] = test NUMERIC_OPERATORS_BY_SYMBOL[symbol] = self @staticmethod def get(symbol: str) -> "NumericOperator": return NUMERIC_OPERATORS_BY_SYMBOL[symbol] + def __str__(self): + return self.symbol + class NumericValue(Generic[T]): def __init__(self, value: T, operator: NumericOperator = NumericOperator.EQUALS): self.value: T = value self.operator: NumericOperator = operator - def test(self, to_match: T, unsigned: bool, num_bytes: int, preprocess: Callable[[int], int] = lambda x: x) -> bool: + def test(self, to_match: T, unsigned: bool, num_bytes: int, preprocess: Callable[[T], T] = lambda x: x) -> bool: return self.operator.test(preprocess(to_match), self.value) @staticmethod @@ -1450,6 +1461,9 @@ def parse(value: str, num_bytes: int) -> "NumericValue": pass raise ValueError(f"Could not parse numeric type {value!r}") + def __str__(self): + return f"{self.operator}{self.value!s}" + class NumericWildcard(NumericValue): def __init__(self): @@ -1460,30 +1474,16 @@ def test(self, to_match, unsigned, num_bytes, preprocess: Callable[[int], int] = class IntegerValue(NumericValue[int]): - @staticmethod - def normalize_signedness(value: int, unsigned: bool, num_bytes: int) -> int: - bits = 8 * num_bytes - if unsigned: - max_value = (1 << bits) - 1 - min_value = 0 - if value < 0: - # convert the value to a bit-equivalent unsigned value - value += 2**bits - else: - max_value = (1 << bits) >> 1 - min_value = ~max_value - if value > max_value: - # convert the value to a bit-equivalent signed value - value -= 2 ** bits - if not (min_value <= value <= max_value): - raise ValueError(f"Invalid integer constant {value} for comparing to a " - f"{['signed', 'n unsigned'][unsigned]} {num_bytes}-byte integer") - return value - - def test(self, to_match: int, unsigned: bool, num_bytes: int, preprocess: Callable[[int], int] = lambda x: x) -> bool: - to_test = IntegerValue.normalize_signedness(self.value, unsigned, num_bytes) - to_match = IntegerValue.normalize_signedness(preprocess(to_match), unsigned, num_bytes) - return self.operator.test(to_match, to_test) + def test( + self, + to_match: int, + unsigned: bool, + num_bytes: int, + preprocess: Callable[[CStyleInt], CStyleInt] = lambda x: x + ) -> bool: + to_test = make_c_style_int(value=self.value, num_bytes=num_bytes, signed=not unsigned) + to_match = make_c_style_int(value=to_match, num_bytes=num_bytes, signed=not unsigned) + return self.operator.test(preprocess(to_match), to_test) @staticmethod def parse(value: Union[str, bytes], num_bytes: int) -> "IntegerValue": diff --git a/setup.py b/setup.py index 59c34353..94b545f6 100644 --- a/setup.py +++ b/setup.py @@ -122,6 +122,7 @@ def update(self, n: int): python_requires='>=3.6', install_requires=[ "dataclasses;python_version<'3.7'", # dataclasses were only added in Python 3.7 + 'cint', 'graphviz', 'intervaltree', 'jinja2',