From f27b3033d59ba86a3edd6e341b8e6cc3783ef7de Mon Sep 17 00:00:00 2001 From: Josh Suereth Date: Wed, 27 Nov 2024 11:45:43 -0500 Subject: [PATCH] Update acronym filter to ignore word boundaries. Fixes #415 --- crates/weaver_forge/src/extensions/util.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/crates/weaver_forge/src/extensions/util.rs b/crates/weaver_forge/src/extensions/util.rs index ffab931a..f2235858 100644 --- a/crates/weaver_forge/src/extensions/util.rs +++ b/crates/weaver_forge/src/extensions/util.rs @@ -8,7 +8,6 @@ use minijinja::{Environment, ErrorKind, Value}; use regex::Regex; use std::borrow::Cow; use std::collections::HashMap; -use std::sync::OnceLock; /// Add utility filters and tests to the environment. pub(crate) fn add_filters(env: &mut Environment<'_>, target_config: &WeaverConfig) { @@ -112,22 +111,23 @@ fn regex_replace( /// A function that takes an input string and returns a new string with the /// acronyms replaced. pub fn acronym(acronyms: Vec) -> impl Fn(&str) -> String { - static RE: OnceLock = OnceLock::new(); let acronym_map = acronyms .iter() .map(|acronym| (acronym.to_lowercase(), acronym.clone())) .collect::>(); move |input: &str| -> String { - // Pattern to match sequences of whitespace (\s+), non-whitespace - // non-punctuation (\w+), or any punctuation ([^\w\s]+) - let re = RE.get_or_init(|| Regex::new(r"(\s+|\w+|[^\w\s]+)").expect("Invalid regex")); - re.find_iter(input) - .map(|mat| match acronym_map.get(&mat.as_str().to_lowercase()) { - Some(acronym) => acronym.clone(), - None => mat.as_str().to_owned(), - }) - .collect() + // Arbitrarily replace all existence of an acronym. + // Note: This assumes lower + upper case have the same length. + // This may not be true for i18n strings. + let mut result = input.to_owned(); + let input_matcher = input.to_lowercase(); + for (acronymn, replacement) in acronym_map.iter() { + for (idx, _) in input_matcher.match_indices(acronymn) { + result.replace_range(idx..(idx + replacement.len()), &replacement); + } + } + result } }