Skip to content

Commit

Permalink
fix: child slice selectors only selecting first matching index (#499)
Browse files Browse the repository at this point in the history
- Fixed a bug where the compiler would erronously mark states
with a single slice transition as unitary, even though such
transitions could match more than one index.
  • Loading branch information
V0ldek authored Apr 3, 2024
1 parent 15486dd commit 68dc3fe
Show file tree
Hide file tree
Showing 6 changed files with 99 additions and 9 deletions.
2 changes: 0 additions & 2 deletions .github/workflows/book.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,15 +65,13 @@ jobs:
target/
key: book-cargo-${{ hashFiles('**/Cargo.toml') }}
- name: cargo install mdbook
if: steps.cache-restore-cargo.outputs.cache-hit != 'true'
uses: baptiste0928/cargo-install@94e1849646e5797d0c8b34d8e525124ae9ae1d86 # v3.0.1
with:
# Name of the crate to install
crate: mdbook
env:
CARGO_TARGET_DIR: target/
- name: cargo install mdbook-katex
if: steps.cache-restore-cargo.outputs.cache-hit != 'true'
uses: baptiste0928/cargo-install@94e1849646e5797d0c8b34d8e525124ae9ae1d86 # v3.0.1
with:
# Name of the crate to install
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/clusterfuzzlite-cron.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:
uses: google/clusterfuzzlite/actions/run_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
fuzz-seconds: 600
fuzz-seconds: 1200
mode: 'prune'
output-sarif: true
storage-repo: https://${{ secrets.CLUSTERFUZZLITE_STORAGE_TOKEN }}@github.com/rsonquery/rsonpath-fuzz-storage.git
Expand All @@ -64,7 +64,7 @@ jobs:
uses: google/clusterfuzzlite/actions/run_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
fuzz-seconds: 600
fuzz-seconds: 1200
mode: 'coverage'
sanitizer: 'coverage'
storage-repo: https://${{ secrets.CLUSTERFUZZLITE_STORAGE_TOKEN }}@github.com/rsonquery/rsonpath-fuzz-storage.git
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,6 @@ jobs:
target/
key: ${{ matrix.toolchain }}-${{ matrix.target_triple }}-cargo-${{ hashFiles('**/Cargo.toml') }}
- name: cargo install cargo-hack
if: steps.cache-restore-cargo.outputs.cache-hit != 'true'
uses: baptiste0928/cargo-install@94e1849646e5797d0c8b34d8e525124ae9ae1d86 # v3.0.1
with:
# Name of the crate to install
Expand Down
9 changes: 9 additions & 0 deletions crates/rsonpath-lib/src/automaton.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,15 @@ impl ArrayTransitionLabel {
Self::Slice(s) => s.contains(index),
}
}

fn matches_at_most_once(&self) -> bool {
match self {
Self::Index(_) => true,
Self::Slice(slice) => {
slice.step == JsonUInt::ZERO && slice.end.map_or(false, |end| slice.start.as_u64() + 1 >= end.as_u64())
}
}
}
}

impl From<JsonUInt> for ArrayTransitionLabel {
Expand Down
64 changes: 60 additions & 4 deletions crates/rsonpath-lib/src/automaton/minimizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,10 +188,7 @@ impl<'q> Minimizer<'q> {
debug!("{id} is rejecting");
attrs = attrs.rejecting();
}
if array_transitions.len() + member_transitions.len() == 1 && fallback == Self::rejecting_state() {
debug!("{id} is unitary");
attrs = attrs.unitary();
}

if self.accepting.contains(fallback.0)
|| array_transitions
.iter()
Expand All @@ -213,6 +210,23 @@ impl<'q> Minimizer<'q> {
attrs = attrs.has_array_transition_to_accepting();
}

// Unitarity check:
// 1. Fallback rejects.
// 2. Only one transition that can match at most one element in a JSON, either:
// a) member transition; or
// b) array transition that matches only one index.
let is_unitary = {
fallback == Self::rejecting_state()
&& ((member_transitions.len() == 1 && array_transitions.is_empty())
|| (array_transitions.len() == 1
&& member_transitions.is_empty()
&& array_transitions[0].label.matches_at_most_once()))
};
if is_unitary {
debug!("{id} is unitary");
attrs = attrs.unitary();
}

attrs.into()
}

Expand Down Expand Up @@ -1260,6 +1274,48 @@ mod tests {
assert_eq!(result, expected);
}

#[test]
fn direct_slice() {
// Query = $[1:3]
let label = SimpleSlice::new(1.into(), Some(3.into()), 1.into());

let nfa = NondeterministicAutomaton {
ordered_states: vec![
NfaState::Direct(nfa::Transition::Array(label.into())),
NfaState::Accepting,
],
};

let mut result = minimize(nfa).unwrap();
make_canonical(&mut result);
let expected = Automaton {
states: vec![
StateTable {
array_transitions: smallvec![],
member_transitions: smallvec![],
fallback_state: State(0),
attributes: StateAttributes::REJECTING,
},
StateTable {
array_transitions: smallvec![ArrayTransition::new(ArrayTransitionLabel::Slice(label), State(2)),],
member_transitions: smallvec![],
fallback_state: State(0),
attributes: StateAttributes::TRANSITIONS_TO_ACCEPTING
| StateAttributes::HAS_ARRAY_TRANSITION
| StateAttributes::HAS_ARRAY_TRANSITION_TO_ACCEPTING,
},
StateTable {
array_transitions: smallvec![],
member_transitions: smallvec![],
fallback_state: State(0),
attributes: StateAttributes::ACCEPTING,
},
],
};

assert_eq!(result, expected);
}

/// DFA creation is unstable - it can result in many different isomorphic automaton structures.
/// This function relabels the states in a canonical way so that they can be compared for equality.
fn make_canonical(dfa: &mut Automaton) {
Expand Down
28 changes: 28 additions & 0 deletions crates/rsonpath-test/documents/toml/lists.toml
Original file line number Diff line number Diff line change
Expand Up @@ -377,3 +377,31 @@ nodes = ['''[
0
]
]''']

[[queries]]
query = "$[1:3]"
description = "select the second and third elements"

[queries.results]
count = 2
spans = [[31, 33], [39, 278]]
nodes = [
'[]',
'''[
[],
[
[
[],
0
],
[
[],
0
],
[
[],
0
]
]
]'''
]

0 comments on commit 68dc3fe

Please sign in to comment.