From 9e8c5e5dc8ee501bc384a2b4ab6b0a948c86ea74 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Mon, 9 Dec 2024 10:14:56 +0000 Subject: [PATCH] BinExport2: better handle weird Ghidra expressions analogous to the inspect-binexport2 issues reported in #2528 and #2530, but this fixes the feature extractor. --- .../features/extractors/binexport2/helpers.py | 35 ++++++++++++++++--- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/capa/features/extractors/binexport2/helpers.py b/capa/features/extractors/binexport2/helpers.py index f23c95cbd..ced5ff1a3 100644 --- a/capa/features/extractors/binexport2/helpers.py +++ b/capa/features/extractors/binexport2/helpers.py @@ -208,9 +208,22 @@ def _fill_operand_expression_list( children_tree_indexes: list[int] = expression_tree[tree_index] if expression.type == BinExport2.Expression.REGISTER: - assert len(children_tree_indexes) == 0 + assert len(children_tree_indexes) <= 1 expression_list.append(expression) - return + + if len(children_tree_indexes) == 0: + return + elif len(children_tree_indexes) == 1: + # like for aarch64 with vector instructions, indicating vector data size: + # + # FADD V0.4S, V1.4S, V2.4S + # + # see: https://github.com/mandiant/capa/issues/2528 + child_index = children_tree_indexes[0] + _fill_operand_expression_list(be2, operand, expression_tree, child_index, expression_list) + return + else: + raise NotImplementedError(len(children_tree_indexes)) elif expression.type == BinExport2.Expression.SYMBOL: assert len(children_tree_indexes) <= 1 @@ -233,9 +246,23 @@ def _fill_operand_expression_list( raise NotImplementedError(len(children_tree_indexes)) elif expression.type == BinExport2.Expression.IMMEDIATE_INT: - assert len(children_tree_indexes) == 0 + assert len(children_tree_indexes) <= 1 expression_list.append(expression) - return + + if len(children_tree_indexes) == 0: + return + elif len(children_tree_indexes) == 1: + # the ghidra exporter can produce some weird expressions, + # particularly for MSRs, like for: + # + # sreg(3, 0, c.0, c.4, 4) + # + # see: https://github.com/mandiant/capa/issues/2530 + child_index = children_tree_indexes[0] + _render_expression_tree(be2, operand, expression_tree, child_index, o) + return + else: + raise NotImplementedError(len(children_tree_indexes)) elif expression.type == BinExport2.Expression.SIZE_PREFIX: # like: b4