Skip to content

Commit

Permalink
Update acronym filter to ignore word boundaries.
Browse files Browse the repository at this point in the history
  • Loading branch information
jsuereth committed Nov 27, 2024
1 parent 9f1d9a5 commit f27b303
Showing 1 changed file with 11 additions and 11 deletions.
22 changes: 11 additions & 11 deletions crates/weaver_forge/src/extensions/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ use minijinja::{Environment, ErrorKind, Value};
use regex::Regex;
use std::borrow::Cow;
use std::collections::HashMap;
use std::sync::OnceLock;

/// Add utility filters and tests to the environment.
pub(crate) fn add_filters(env: &mut Environment<'_>, target_config: &WeaverConfig) {
Expand Down Expand Up @@ -112,22 +111,23 @@ fn regex_replace(
/// A function that takes an input string and returns a new string with the
/// acronyms replaced.
pub fn acronym(acronyms: Vec<String>) -> impl Fn(&str) -> String {
static RE: OnceLock<Regex> = OnceLock::new();
let acronym_map = acronyms
.iter()
.map(|acronym| (acronym.to_lowercase(), acronym.clone()))
.collect::<HashMap<String, String>>();

move |input: &str| -> String {
// Pattern to match sequences of whitespace (\s+), non-whitespace
// non-punctuation (\w+), or any punctuation ([^\w\s]+)
let re = RE.get_or_init(|| Regex::new(r"(\s+|\w+|[^\w\s]+)").expect("Invalid regex"));
re.find_iter(input)
.map(|mat| match acronym_map.get(&mat.as_str().to_lowercase()) {
Some(acronym) => acronym.clone(),
None => mat.as_str().to_owned(),
})
.collect()
// Arbitrarily replace all existence of an acronym.
// Note: This assumes lower + upper case have the same length.
// This may not be true for i18n strings.
let mut result = input.to_owned();
let input_matcher = input.to_lowercase();
for (acronymn, replacement) in acronym_map.iter() {
for (idx, _) in input_matcher.match_indices(acronymn) {
result.replace_range(idx..(idx + replacement.len()), &replacement);
}
}
result
}
}

Expand Down

0 comments on commit f27b303

Please sign in to comment.