From 8616165472424d9949c434a5da26858e7880affb Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Wed, 19 Jun 2024 13:47:58 +0200 Subject: [PATCH] Fix segfault with regular expressions that matched the zero-length string. In regular expressions, repetition operators`{0,0}` and `{,0}` are valid, but they always match the zero-length string. For instance, `a{0,0}` and `[a-z]{0,0}` both match the zero-length string. When the whole regular expression consists in one of these repetitions it caused a segfault during the evaluation of the regular expression because the `forward_code_ref` field for the root atom in the atom's tree was null. Closes #2084. --- libyara/atoms.c | 16 ------- libyara/parser.c | 105 ++++++++++++++++++++++++++++++--------------- tests/test-rules.c | 2 + 3 files changed, 73 insertions(+), 50 deletions(-) diff --git a/libyara/atoms.c b/libyara/atoms.c index c09d6d8670..e7e9a15bdb 100644 --- a/libyara/atoms.c +++ b/libyara/atoms.c @@ -1355,22 +1355,6 @@ int yr_atoms_extract_from_re( *atoms = _yr_atoms_list_concat(*atoms, case_insensitive_atoms); } - // No atoms has been extracted, let's add a zero-length atom. - - if (*atoms == NULL) - { - *atoms = (YR_ATOM_LIST_ITEM*) yr_malloc(sizeof(YR_ATOM_LIST_ITEM)); - - if (*atoms == NULL) - return ERROR_INSUFFICIENT_MEMORY; - - (*atoms)->atom.length = 0; - (*atoms)->backtrack = 0; - (*atoms)->forward_code_ref = re_ast->root_node->forward_code_ref; - (*atoms)->backward_code_ref = YR_ARENA_NULL_REF; - (*atoms)->next = NULL; - } - return ERROR_SUCCESS; } diff --git a/libyara/parser.c b/libyara/parser.c index 2433e52743..dbc47deea3 100644 --- a/libyara/parser.c +++ b/libyara/parser.c @@ -502,19 +502,22 @@ static int _yr_parser_write_string( literal_string->length + 1, // +1 to include terminating NULL &ref); + if (result != ERROR_SUCCESS) + goto cleanup; + string->length = (uint32_t) literal_string->length; string->string = (uint8_t*) yr_arena_ref_to_ptr(compiler->arena, &ref); - if (result == ERROR_SUCCESS) - { - result = yr_atoms_extract_from_string( - &compiler->atoms_config, - (uint8_t*) literal_string->c_string, - (int32_t) literal_string->length, - modifier, - &atom_list, - min_atom_quality); - } + result = yr_atoms_extract_from_string( + &compiler->atoms_config, + (uint8_t*) literal_string->c_string, + (int32_t) literal_string->length, + modifier, + &atom_list, + min_atom_quality); + + if (result != ERROR_SUCCESS) + goto cleanup; } else { @@ -524,20 +527,55 @@ static int _yr_parser_write_string( // variable-length portions. modifier.flags &= ~STRING_FLAGS_FIXED_OFFSET; + // Save the position where the RE forward code starts for later reference. + yr_arena_off_t forward_code_start = yr_arena_get_current_offset( + compiler->arena, YR_RE_CODE_SECTION); + // Emit forwards code result = yr_re_ast_emit_code(re_ast, compiler->arena, false); + if (result != ERROR_SUCCESS) + goto cleanup; + // Emit backwards code - if (result == ERROR_SUCCESS) - result = yr_re_ast_emit_code(re_ast, compiler->arena, true); + result = yr_re_ast_emit_code(re_ast, compiler->arena, true); - if (result == ERROR_SUCCESS) - result = yr_atoms_extract_from_re( - &compiler->atoms_config, - re_ast, - modifier, - &atom_list, - min_atom_quality); + if (result != ERROR_SUCCESS) + goto cleanup; + + // Extract atoms from the regular expression. + result = yr_atoms_extract_from_re( + &compiler->atoms_config, + re_ast, + modifier, + &atom_list, + min_atom_quality); + + if (result != ERROR_SUCCESS) + goto cleanup; + + // If no atom was extracted let's add a zero-length atom. + if (atom_list == NULL) + { + atom_list = (YR_ATOM_LIST_ITEM*) yr_malloc(sizeof(YR_ATOM_LIST_ITEM)); + + if (atom_list == NULL) + { + result = ERROR_INSUFFICIENT_MEMORY; + goto cleanup; + } + + atom_list->atom.length = 0; + atom_list->backtrack = 0; + atom_list->backward_code_ref = YR_ARENA_NULL_REF; + atom_list->next = NULL; + + yr_arena_ptr_to_ref( + compiler->arena, + yr_arena_get_ptr( + compiler->arena, YR_RE_CODE_SECTION, forward_code_start), + &(atom_list->forward_code_ref)); + } } string->flags = modifier.flags; @@ -545,16 +583,16 @@ static int _yr_parser_write_string( string->idx = compiler->current_string_idx; string->fixed_offset = YR_UNDEFINED; - if (result == ERROR_SUCCESS) - { - // Add the string to Aho-Corasick automaton. - result = yr_ac_add_string( - compiler->automaton, - string, - compiler->current_string_idx, - atom_list, - compiler->arena); - } + // Add the string to Aho-Corasick automaton. + result = yr_ac_add_string( + compiler->automaton, + string, + compiler->current_string_idx, + atom_list, + compiler->arena); + + if (result != ERROR_SUCCESS) + goto cleanup; if (modifier.flags & STRING_FLAGS_LITERAL) { @@ -580,6 +618,7 @@ static int _yr_parser_write_string( compiler->current_string_idx++; +cleanup: if (free_literal) yr_free(literal_string); @@ -761,11 +800,9 @@ int yr_parser_reduce_string_declaration( { if (result == ERROR_UNKNOWN_ESCAPE_SEQUENCE) { - yywarning( - yyscanner, - "unknown escape sequence"); + yywarning(yyscanner, "unknown escape sequence"); } - else + else { snprintf( message, @@ -1148,7 +1185,7 @@ int yr_parser_reduce_string_identifier( YR_STRING* string; YR_COMPILER* compiler = yyget_extra(yyscanner); - if (strcmp(identifier, "$") == 0) // is an anonymous string ? + if (strcmp(identifier, "$") == 0) // is an anonymous string ? { if (compiler->loop_for_of_var_index >= 0) // inside a loop ? { diff --git a/tests/test-rules.c b/tests/test-rules.c index 28cd1fc464..decddedb79 100644 --- a/tests/test-rules.c +++ b/tests/test-rules.c @@ -2517,6 +2517,8 @@ void test_re() assert_true_regexp("a{0,1}?bc", "abc", "abc"); assert_true_regexp("a{0,1}bc", "bbc", "bc"); assert_true_regexp("a{0,1}?bc", "abc", "bc"); + assert_true_regexp("a{,0}", "a", ""); + assert_true_regexp("a{,0}", "x", ""); assert_true_regexp("aa{0,1}?bc", "abc", "abc"); assert_true_regexp("aa{0,1}?bc", "abc", "abc"); assert_true_regexp("aa{0,1}bc", "abc", "abc");