Skip to content

Commit

Permalink
Fix empty capture groups in regexes (#2109)
Browse files Browse the repository at this point in the history
  • Loading branch information
jneem authored Nov 27, 2024
1 parent 77f6e75 commit 4f2136f
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 8 deletions.
15 changes: 13 additions & 2 deletions core/src/eval/operation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1006,7 +1006,12 @@ impl<R: ImportResolver, C: Cache> VirtualMachine<R, C> {
"groups",
RichTerm::from(Term::Array(
Array::from_iter(
groups.into_iter().map(|s| Term::Str(s).into())
groups
.into_iter()
// Unmatched groups get turned into empty strings. It
// might be nicer to have a 'Some s / 'None instead,
// but that would be an API break.
.map(|s| Term::Str(s.unwrap_or_default()).into())
),
ArrayAttrs::new().closurized()
))
Expand All @@ -1029,7 +1034,13 @@ impl<R: ImportResolver, C: Cache> VirtualMachine<R, C> {
"groups",
RichTerm::from(Term::Array(
Array::from_iter(
found.groups.into_iter().map(|s| Term::Str(s).into())
found
.groups
.into_iter()
// Unmatched groups get turned into empty strings. It
// might be nicer to have a 'Some s / 'None instead,
// but that would be an API break.
.map(|s| Term::Str(s.unwrap_or_default()).into())
),
ArrayAttrs::new().closurized()
))
Expand Down
11 changes: 6 additions & 5 deletions core/src/term/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ impl NickelString {
let groups = capt
.iter()
.skip(1)
.filter_map(|s_opt| s_opt.map(|s| s.as_str().into()))
.map(|s_opt| s_opt.map(|s| s.as_str().into()))
.collect();

// The indices returned by the `regex` crate are byte offsets into
Expand Down Expand Up @@ -357,7 +357,10 @@ impl Default for NickelString {
pub struct RegexFindResult {
pub matched: NickelString,
pub index: Number,
pub groups: Vec<NickelString>,
/// If a capture group didn't match, we store a `None`. This `None` placeholders
/// make the indexing predictable, so it's possible to associate captures with
/// parenthesis groupings in the original regex.
pub groups: Vec<Option<NickelString>>,
}

/// Errors returned by `NickelString`'s `substring` method.
Expand Down Expand Up @@ -573,9 +576,7 @@ mod grapheme_cluster_preservation {
) -> impl Iterator<Item = regex::Captures<'a>> {
needle.captures_iter(haystack).filter(|c| {
c.iter().all(|maybe_match| {
maybe_match
.map(|m| does_match_start_and_end_on_boundary(haystack, &m))
.unwrap_or(false)
maybe_match.map_or(true, |m| does_match_start_and_end_on_boundary(haystack, &m))
})
})
}
Expand Down
3 changes: 2 additions & 1 deletion core/tests/integration/inputs/stdlib/string_find.ncl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ let { string, .. } = std in
std.string.find "a" "aaa bbb ccc abc" == { groups = [], index = 0, matched = "a" },
std.string.find "([a-z]+)=([0-9]+)" "one=1, two=2, three=3" == { groups = ["one", "1"], index = 0, matched = "one=1" },
std.string.find "(\\d+)\\.(\\d+)\\.(\\d+)" "1.2.3" == { groups = ["1", "2", "3"], index = 0, matched = "1.2.3" },
std.string.find "(\\p{Emoji})=(\\w+)" "πŸ˜€=smiling" == { groups = ["πŸ˜€", "smiling"], index = 0, matched = "πŸ˜€=smiling" }
std.string.find "(\\p{Emoji})=(\\w+)" "πŸ˜€=smiling" == { groups = ["πŸ˜€", "smiling"], index = 0, matched = "πŸ˜€=smiling" },
std.string.find "a(b)?" "ac" == { groups = [""], index = 0, matched = "a" },
]
|> std.test.assert_all

0 comments on commit 4f2136f

Please sign in to comment.