diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e11d0b2c..e68ebe31b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -47,6 +47,7 @@ - binja: move the stack string detection to function level #2516 @xusheng6 - BinExport2: fix handling of incorrect thunk functions #2524 @williballenthin - BinExport2: more precise pruning of expressions @williballenthin +- BinExport2: better handle weird expression trees from Ghidra #2528 #2530 @williballenthin ### capa Explorer Web diff --git a/capa/features/extractors/binexport2/helpers.py b/capa/features/extractors/binexport2/helpers.py index f23c95cbd..3189c70af 100644 --- a/capa/features/extractors/binexport2/helpers.py +++ b/capa/features/extractors/binexport2/helpers.py @@ -208,9 +208,22 @@ def _fill_operand_expression_list( children_tree_indexes: list[int] = expression_tree[tree_index] if expression.type == BinExport2.Expression.REGISTER: - assert len(children_tree_indexes) == 0 + assert len(children_tree_indexes) <= 1 expression_list.append(expression) - return + + if len(children_tree_indexes) == 0: + return + elif len(children_tree_indexes) == 1: + # like for aarch64 with vector instructions, indicating vector data size: + # + # FADD V0.4S, V1.4S, V2.4S + # + # see: https://github.com/mandiant/capa/issues/2528 + child_index = children_tree_indexes[0] + _fill_operand_expression_list(be2, operand, expression_tree, child_index, expression_list) + return + else: + raise NotImplementedError(len(children_tree_indexes)) elif expression.type == BinExport2.Expression.SYMBOL: assert len(children_tree_indexes) <= 1 @@ -233,9 +246,23 @@ def _fill_operand_expression_list( raise NotImplementedError(len(children_tree_indexes)) elif expression.type == BinExport2.Expression.IMMEDIATE_INT: - assert len(children_tree_indexes) == 0 + assert len(children_tree_indexes) <= 1 expression_list.append(expression) - return + + if len(children_tree_indexes) == 0: + return + elif len(children_tree_indexes) == 1: + # the ghidra exporter can produce some weird expressions, + # particularly for MSRs, like for: + # + # sreg(3, 0, c.0, c.4, 4) + # + # see: https://github.com/mandiant/capa/issues/2530 + child_index = children_tree_indexes[0] + _fill_operand_expression_list(be2, operand, expression_tree, child_index, expression_list) + return + else: + raise NotImplementedError(len(children_tree_indexes)) elif expression.type == BinExport2.Expression.SIZE_PREFIX: # like: b4 diff --git a/capa/main.py b/capa/main.py index bc8159254..2e3a5900c 100644 --- a/capa/main.py +++ b/capa/main.py @@ -198,7 +198,7 @@ def simple_message_exception_handler( else: print( f"Unexpected exception raised: {exctype}. Please run capa in debug mode (-d/--debug) " - + "to see the stack trace. Please also report your issue on the capa GitHub page so we " + + "to see the stack trace.\nPlease also report your issue on the capa GitHub page so we " + "can improve the code! (https://github.com/mandiant/capa/issues)", file=sys.stderr, ) diff --git a/scripts/inspect-binexport2.py b/scripts/inspect-binexport2.py index 9e205b0d2..1e49c98af 100644 --- a/scripts/inspect-binexport2.py +++ b/scripts/inspect-binexport2.py @@ -81,8 +81,21 @@ def _render_expression_tree( if expression.type == BinExport2.Expression.REGISTER: o.write(expression.symbol) - assert len(children_tree_indexes) == 0 - return + assert len(children_tree_indexes) <= 1 + + if len(children_tree_indexes) == 0: + return + elif len(children_tree_indexes) == 1: + # like for aarch64 with vector instructions, indicating vector data size: + # + # FADD V0.4S, V1.4S, V2.4S + # + # see: https://github.com/mandiant/capa/issues/2528 + child_index = children_tree_indexes[0] + _render_expression_tree(be2, operand, expression_tree, child_index, o) + return + else: + raise NotImplementedError(len(children_tree_indexes)) elif expression.type == BinExport2.Expression.SYMBOL: o.write(expression.symbol) @@ -106,8 +119,22 @@ def _render_expression_tree( elif expression.type == BinExport2.Expression.IMMEDIATE_INT: o.write(f"0x{expression.immediate:X}") - assert len(children_tree_indexes) == 0 - return + assert len(children_tree_indexes) <= 1 + + if len(children_tree_indexes) == 0: + return + elif len(children_tree_indexes) == 1: + # the ghidra exporter can produce some weird expressions, + # particularly for MSRs, like for: + # + # sreg(3, 0, c.0, c.4, 4) + # + # see: https://github.com/mandiant/capa/issues/2530 + child_index = children_tree_indexes[0] + _render_expression_tree(be2, operand, expression_tree, child_index, o) + return + else: + raise NotImplementedError(len(children_tree_indexes)) elif expression.type == BinExport2.Expression.SIZE_PREFIX: # like: b4