Skip to content

Commit

Permalink
Properly handle integer under/overflow in libmagic
Browse files Browse the repository at this point in the history
  • Loading branch information
ESultanik committed Nov 10, 2021
1 parent dfb393b commit e4e8504
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 27 deletions.
27 changes: 27 additions & 0 deletions polyfile/arithmetic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from typing import Callable, Dict, Tuple

import cint


CStyleInt = cint.Cint


INT_TYPES: Dict[Tuple[int, bool], Callable[[int], CStyleInt]] = {
(1, False): cint.U8,
(1, True): cint.I8,
(2, False): cint.U16,
(2, True): cint.I16,
(4, False): cint.U32,
(4, True): cint.I32,
(8, False): cint.U64,
(8, True): cint.I64
}


def make_c_style_int(value: int, num_bytes: int, signed: bool):
if (num_bytes, signed) not in INT_TYPES:
raise NotImplementedError(f"{num_bytes*8}-bit {['un',''][signed]}signed integers are not yet supported")
return INT_TYPES[(num_bytes, signed)](value)


setattr(CStyleInt, "new", make_c_style_int)
54 changes: 27 additions & 27 deletions polyfile/magic.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@
)
from uuid import UUID

from .arithmetic import CStyleInt, make_c_style_int
from .iterators import LazyIterableSet
from .logger import getStatusLogger, TRACE


if sys.version_info < (3, 9):
from typing import Pattern
else:
Expand Down Expand Up @@ -1419,22 +1421,31 @@ class NumericOperator(Enum):
ALL_BITS_CLEAR = ("^", lambda a, b: not (a & b)) # value from the file (a) must have clear all bits set in b
NOT = ("!", lambda a, b: not (a == b))

def __init__(self, symbol: str, test: Union[Callable[[int, int], bool], Callable[[float, float], bool]]):
def __init__(self, symbol: str, test: Union[
Callable[[int, int], bool],
Callable[[float, float], bool],
Callable[[CStyleInt, CStyleInt], bool]
]):
self.symbol: str = symbol
self.test: Union[Callable[[int, int], bool], Callable[[float, float], bool]] = test
self.test: Union[
Callable[[int, int], bool], Callable[[float, float], bool], Callable[[CStyleInt, CStyleInt], bool]
] = test
NUMERIC_OPERATORS_BY_SYMBOL[symbol] = self

@staticmethod
def get(symbol: str) -> "NumericOperator":
return NUMERIC_OPERATORS_BY_SYMBOL[symbol]

def __str__(self):
return self.symbol


class NumericValue(Generic[T]):
def __init__(self, value: T, operator: NumericOperator = NumericOperator.EQUALS):
self.value: T = value
self.operator: NumericOperator = operator

def test(self, to_match: T, unsigned: bool, num_bytes: int, preprocess: Callable[[int], int] = lambda x: x) -> bool:
def test(self, to_match: T, unsigned: bool, num_bytes: int, preprocess: Callable[[T], T] = lambda x: x) -> bool:
return self.operator.test(preprocess(to_match), self.value)

@staticmethod
Expand All @@ -1450,6 +1461,9 @@ def parse(value: str, num_bytes: int) -> "NumericValue":
pass
raise ValueError(f"Could not parse numeric type {value!r}")

def __str__(self):
return f"{self.operator}{self.value!s}"


class NumericWildcard(NumericValue):
def __init__(self):
Expand All @@ -1460,30 +1474,16 @@ def test(self, to_match, unsigned, num_bytes, preprocess: Callable[[int], int] =


class IntegerValue(NumericValue[int]):
@staticmethod
def normalize_signedness(value: int, unsigned: bool, num_bytes: int) -> int:
bits = 8 * num_bytes
if unsigned:
max_value = (1 << bits) - 1
min_value = 0
if value < 0:
# convert the value to a bit-equivalent unsigned value
value += 2**bits
else:
max_value = (1 << bits) >> 1
min_value = ~max_value
if value > max_value:
# convert the value to a bit-equivalent signed value
value -= 2 ** bits
if not (min_value <= value <= max_value):
raise ValueError(f"Invalid integer constant {value} for comparing to a "
f"{['signed', 'n unsigned'][unsigned]} {num_bytes}-byte integer")
return value

def test(self, to_match: int, unsigned: bool, num_bytes: int, preprocess: Callable[[int], int] = lambda x: x) -> bool:
to_test = IntegerValue.normalize_signedness(self.value, unsigned, num_bytes)
to_match = IntegerValue.normalize_signedness(preprocess(to_match), unsigned, num_bytes)
return self.operator.test(to_match, to_test)
def test(
self,
to_match: int,
unsigned: bool,
num_bytes: int,
preprocess: Callable[[CStyleInt], CStyleInt] = lambda x: x
) -> bool:
to_test = make_c_style_int(value=self.value, num_bytes=num_bytes, signed=not unsigned)
to_match = make_c_style_int(value=to_match, num_bytes=num_bytes, signed=not unsigned)
return self.operator.test(preprocess(to_match), to_test)

@staticmethod
def parse(value: Union[str, bytes], num_bytes: int) -> "IntegerValue":
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ def update(self, n: int):
python_requires='>=3.6',
install_requires=[
"dataclasses;python_version<'3.7'", # dataclasses were only added in Python 3.7
'cint',
'graphviz',
'intervaltree',
'jinja2',
Expand Down

0 comments on commit e4e8504

Please sign in to comment.