From dc1fabef96a9617afe39bcfe9a12a97a975f4a03 Mon Sep 17 00:00:00 2001 From: Matteo Pace Date: Mon, 18 Sep 2023 23:36:37 +0200 Subject: [PATCH 1/3] removes multiline from default regex modifiers --- internal/operators/rx.go | 8 ++++---- internal/operators/rx_test.go | 30 +++++++++++++++++++++++++----- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/internal/operators/rx.go b/internal/operators/rx.go index e801c9f72..246780fcd 100644 --- a/internal/operators/rx.go +++ b/internal/operators/rx.go @@ -24,10 +24,10 @@ type rx struct { var _ plugintypes.Operator = (*rx)(nil) func newRX(options plugintypes.OperatorOptions) (plugintypes.Operator, error) { - // (?sm) enables multiline and dotall mode, required by some CRS rules and matching ModSec behavior, see - // - https://stackoverflow.com/a/27680233 - // - https://groups.google.com/g/golang-nuts/c/jiVdamGFU9E - data := fmt.Sprintf("(?sm)%s", options.Arguments) + // (?s) enables dotall mode, required by some CRS rules and matching ModSec behavior, see + // - https://github.com/google/re2/wiki/Syntax + // - Flag usage: https://groups.google.com/g/golang-nuts/c/jiVdamGFU9E + data := fmt.Sprintf("(?s)%s", options.Arguments) if matchesArbitraryBytes(data) { // Use binary regex matcher if expression matches non-utf8 bytes. The binary matcher does diff --git a/internal/operators/rx_test.go b/internal/operators/rx_test.go index e9785713b..59b5a34c9 100644 --- a/internal/operators/rx_test.go +++ b/internal/operators/rx_test.go @@ -55,22 +55,42 @@ func TestRx(t *testing.T) { want: true, }, { - // Requires multiline + // Requires multiline disabled by default pattern: `^hello.*world`, input: "test\nhello\nworld", + want: false, + }, + { + // Makes sure multiline can be enabled by the user + pattern: `(?m)^hello.*world`, + input: "test\nhello\nworld", want: true, }, { - // Makes sure, (?sm) passed by the user does not + // Makes sure, (?s) passed by the user does not // break the regex. - pattern: `(?sm)hello.*world`, + pattern: `(?s)hello.*world`, input: "hello\nworld", want: true, }, { // Make sure user flags are also applied - pattern: `(?i)^hello.*world`, - input: "test\nHELLO\nworld", + pattern: `(?i)hello.*world`, + input: "testHELLO\nworld", + want: true, + }, + { + // The so called DOLLAR_ENDONLY modifier in PCRE2 is meant to tweak the meaning of dollar '$' + // so that it matches only at the very end of the string (see: https://www.pcre.org/current/doc/html/pcre2pattern.html#SEC6) + // It seems that re2 already behaves like that by default. + pattern: `123$`, + input: "123\n", + want: false, + }, + { + // Dollar endonly match + pattern: `123$`, + input: "test123", want: true, }, } From 8715d642391008166d048b4fd7cc1b4fbff74f34 Mon Sep 17 00:00:00 2001 From: Matteo Pace Date: Wed, 13 Nov 2024 01:03:34 +0100 Subject: [PATCH 2/3] relies on coraza.rule.no_regex_multiline --- README.md | 1 + internal/operators/multilineregex.go | 8 ++ internal/operators/multilineregex_default.go | 8 ++ internal/operators/rx.go | 17 ++- internal/operators/rx_no_multiline_test.go | 119 +++++++++++++++++++ internal/operators/rx_test.go | 32 ++--- magefile.go | 9 ++ 7 files changed, 165 insertions(+), 29 deletions(-) create mode 100644 internal/operators/multilineregex.go create mode 100644 internal/operators/multilineregex_default.go create mode 100644 internal/operators/rx_no_multiline_test.go diff --git a/README.md b/README.md index 18e346148..27d713a82 100644 --- a/README.md +++ b/README.md @@ -104,6 +104,7 @@ dictionaries to reduce memory consumption in deployments that launch several cor instances. For more context check [this issue](https://github.com/corazawaf/coraza-caddy/issues/76) * `no_fs_access` - indicates that the target environment has no access to FS in order to not leverage OS' filesystem related functionality e.g. file body buffers. * `coraza.rule.case_sensitive_args_keys` - enables case-sensitive matching for ARGS keys, aligning Coraza behavior with RFC 3986 specification. It will be enabled by default in the next major version. +* `coraza.rule.no_regex_multiline` - disables enabling by default regexes multiline modifiers in `@rx` operator. It aligns with CRS expected behavior, reduces false positives and might improve performances. No multiline regexes by default will be enabled in the next major version. For more context check [this PR](https://github.com/corazawaf/coraza/pull/876) ## E2E Testing diff --git a/internal/operators/multilineregex.go b/internal/operators/multilineregex.go new file mode 100644 index 000000000..83a444275 --- /dev/null +++ b/internal/operators/multilineregex.go @@ -0,0 +1,8 @@ +// Copyright 2024 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + +//go:build coraza.rule.no_regex_multiline + +package operators + +var shouldNotUseMultilineRegexesOperatorByDefault = true diff --git a/internal/operators/multilineregex_default.go b/internal/operators/multilineregex_default.go new file mode 100644 index 000000000..6b40c2c45 --- /dev/null +++ b/internal/operators/multilineregex_default.go @@ -0,0 +1,8 @@ +// Copyright 2024 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + +//go:build !coraza.rule.no_regex_multiline + +package operators + +var shouldNotUseMultilineRegexesOperatorByDefault = false diff --git a/internal/operators/rx.go b/internal/operators/rx.go index 246780fcd..ac0431d73 100644 --- a/internal/operators/rx.go +++ b/internal/operators/rx.go @@ -24,10 +24,19 @@ type rx struct { var _ plugintypes.Operator = (*rx)(nil) func newRX(options plugintypes.OperatorOptions) (plugintypes.Operator, error) { - // (?s) enables dotall mode, required by some CRS rules and matching ModSec behavior, see - // - https://github.com/google/re2/wiki/Syntax - // - Flag usage: https://groups.google.com/g/golang-nuts/c/jiVdamGFU9E - data := fmt.Sprintf("(?s)%s", options.Arguments) + var data string + if shouldNotUseMultilineRegexesOperatorByDefault { + // (?s) enables dotall mode, required by some CRS rules and matching ModSec behavior, see + // - https://github.com/google/re2/wiki/Syntax + // - Flag usage: https://groups.google.com/g/golang-nuts/c/jiVdamGFU9E + data = fmt.Sprintf("(?s)%s", options.Arguments) + } else { + // TODO: deprecate multiline modifier set by default in Coraza v4 + // CRS rules will explicitly set the multiline modifier when needed + // Having it enabled by default can lead to false positives and less performance + // See https://github.com/corazawaf/coraza/pull/876 + data = fmt.Sprintf("(?sm)%s", options.Arguments) + } if matchesArbitraryBytes(data) { // Use binary regex matcher if expression matches non-utf8 bytes. The binary matcher does diff --git a/internal/operators/rx_no_multiline_test.go b/internal/operators/rx_no_multiline_test.go new file mode 100644 index 000000000..a8816ad4c --- /dev/null +++ b/internal/operators/rx_no_multiline_test.go @@ -0,0 +1,119 @@ +// Copyright 2022 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + +//go:build coraza.rule.no_regex_multiline + +package operators + +import ( + "fmt" + "testing" + + "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" + "github.com/corazawaf/coraza/v3/internal/corazawaf" +) + +func TestRx(t *testing.T) { + tests := []struct { + pattern string + input string + want bool + }{ + { + pattern: "som(.*)ta", + input: "somedata", + want: true, + }, + { + pattern: "som(.*)ta", + input: "notdata", + want: false, + }, + { + pattern: "ハロー", + input: "ハローワールド", + want: true, + }, + { + pattern: "ハロー", + input: "グッバイワールド", + want: false, + }, + { + pattern: `\xac\xed\x00\x05`, + input: "\xac\xed\x00\x05t\x00\x04test", + want: true, + }, + { + pattern: `\xac\xed\x00\x05`, + input: "\xac\xed\x00t\x00\x04test", + want: false, + }, + { + // Requires dotall + pattern: `hello.*world`, + input: "hello\nworld", + want: true, + }, + { + // Requires multiline disabled by default + pattern: `^hello.*world`, + input: "test\nhello\nworld", + want: false, + }, + { + // Makes sure multiline can be enabled by the user + pattern: `(?m)^hello.*world`, + input: "test\nhello\nworld", + want: true, + }, + { + // Makes sure, (?s) passed by the user does not + // break the regex. + pattern: `(?s)hello.*world`, + input: "hello\nworld", + want: true, + }, + { + // Make sure user flags are also applied + pattern: `(?i)hello.*world`, + input: "testHELLO\nworld", + want: true, + }, + { + // The so called DOLLAR_ENDONLY modifier in PCRE2 is meant to tweak the meaning of dollar '$' + // so that it matches only at the very end of the string (see: https://www.pcre.org/current/doc/html/pcre2pattern.html#SEC6) + // It seems that re2 already behaves like that by default. + pattern: `123$`, + input: "123\n", + want: false, + }, + { + // Dollar endonly match + pattern: `123$`, + input: "test123", + want: true, + }, + } + + for _, tc := range tests { + tt := tc + t.Run(fmt.Sprintf("%s/%s", tt.pattern, tt.input), func(t *testing.T) { + + opts := plugintypes.OperatorOptions{ + Arguments: tt.pattern, + } + rx, err := newRX(opts) + if err != nil { + t.Error(err) + } + waf := corazawaf.NewWAF() + tx := waf.NewTransaction() + tx.Capture = true + res := rx.Evaluate(tx, tt.input) + if res != tt.want { + t.Errorf("want %v, got %v", tt.want, res) + } + }) + } +} diff --git a/internal/operators/rx_test.go b/internal/operators/rx_test.go index 59b5a34c9..ccbb0649d 100644 --- a/internal/operators/rx_test.go +++ b/internal/operators/rx_test.go @@ -1,6 +1,8 @@ // Copyright 2022 Juan Pablo Tosso and the OWASP Coraza contributors // SPDX-License-Identifier: Apache-2.0 +//go:build !coraza.rule.no_regex_multiline + package operators import ( @@ -55,42 +57,22 @@ func TestRx(t *testing.T) { want: true, }, { - // Requires multiline disabled by default + // Requires multiline pattern: `^hello.*world`, input: "test\nhello\nworld", - want: false, - }, - { - // Makes sure multiline can be enabled by the user - pattern: `(?m)^hello.*world`, - input: "test\nhello\nworld", want: true, }, { - // Makes sure, (?s) passed by the user does not + // Makes sure, (?sm) passed by the user does not // break the regex. - pattern: `(?s)hello.*world`, + pattern: `(?sm)hello.*world`, input: "hello\nworld", want: true, }, { // Make sure user flags are also applied - pattern: `(?i)hello.*world`, - input: "testHELLO\nworld", - want: true, - }, - { - // The so called DOLLAR_ENDONLY modifier in PCRE2 is meant to tweak the meaning of dollar '$' - // so that it matches only at the very end of the string (see: https://www.pcre.org/current/doc/html/pcre2pattern.html#SEC6) - // It seems that re2 already behaves like that by default. - pattern: `123$`, - input: "123\n", - want: false, - }, - { - // Dollar endonly match - pattern: `123$`, - input: "test123", + pattern: `(?i)^hello.*world`, + input: "test\nHELLO\nworld", want: true, }, } diff --git a/magefile.go b/magefile.go index c399e4825..07a0c84ae 100644 --- a/magefile.go +++ b/magefile.go @@ -130,6 +130,15 @@ func Test() error { return err } + // Execute FTW tests with coraza.rule.no_regex_multiline as well + if err := sh.RunV("go", "test", "-tags=coraza.rule.no_regex_multiline", "./testing/coreruleset"); err != nil { + return err + } + + if err := sh.RunV("go", "test", "-tags=coraza.rule.no_regex_multiline", "-run=^TestRx", "./..."); err != nil { + return err + } + if err := sh.RunV("go", "test", "-tags=coraza.rule.case_sensitive_args_keys", "-run=^TestCaseSensitive", "./..."); err != nil { return err } From 242f972f21938936ba6ba5edeab0f751c7e532d7 Mon Sep 17 00:00:00 2001 From: Matteo Pace Date: Wed, 13 Nov 2024 13:03:40 +0100 Subject: [PATCH 3/3] adds coraza.rule.no_regex_multiline to TagsMatrix --- magefile.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/magefile.go b/magefile.go index 07a0c84ae..c47242479 100644 --- a/magefile.go +++ b/magefile.go @@ -183,7 +183,7 @@ func Coverage() error { if err := sh.RunV("go", "test", tagsCmd, "-coverprofile=build/coverage-ftw.txt", "-covermode=atomic", "-coverpkg=./...", "./testing/coreruleset"); err != nil { return err } - // we run tinygo tag only if memoize_builders is is not enabled + // we run tinygo tag only if memoize_builders is not enabled if !strings.Contains(tags, "memoize_builders") { if tagsCmd != "" { tagsCmd += ",tinygo" @@ -280,6 +280,7 @@ func combinations(tags []string) []string { func TagsMatrix() error { tags := []string{ "coraza.rule.case_sensitive_args_keys", + "coraza.rule.no_regex_multiline", "memoize_builders", "coraza.rule.multiphase_valuation", "no_fs_access",