Skip to content

Commit

Permalink
Merge pull request #6 from block/block/alexp/anyofchanges
Browse files Browse the repository at this point in the history
Change anyOf behaviour to evaluate to true for empty predicates
  • Loading branch information
acerbusace authored Nov 11, 2024
2 parents 5e8d264 + 817d91b commit 91dbffd
Show file tree
Hide file tree
Showing 12 changed files with 299 additions and 71 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,7 @@ def merge_into(bool_node)
bool_node[occurrence].concat(clauses)
end

# For `any_of: []` we need a way to force the datastore to match no documents, but
# I haven't found any sort of literal `false` we can pass in the compound expression
# or even a literal `1 = 0` as is sometimes used in SQL. Instead, we use this for that
# case.
empty_array = [] # : ::Array[untyped]
ALWAYS_FALSE_FILTER = filter({ids: {values: empty_array}})
ALWAYS_FALSE_FILTER = filter({match_none: {}})
end
end
end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def process_filter_hash(bool_node, filter_hash, field_path)
filter_hash.each do |field_or_op, expression|
case filter_node_interpreter.identify_node_type(field_or_op, expression)
when :empty
# This is an "empty" filter predicate and we can ignore it.
# This is an "empty" filter predicate and can be treated as `true`.
when :not
process_not_expression(bool_node, expression, field_path)
when :list_any_filter
Expand Down Expand Up @@ -103,13 +103,15 @@ def filters_on_sub_fields?(expression)
end

def process_not_expression(bool_node, expression, field_path)
return if expression.nil? || expression == {}

sub_filter = build_bool_hash do |inner_node|
process_filter_hash(inner_node, expression, field_path)
process_filter_hash(inner_node, expression || {}, field_path)
end

return unless sub_filter
unless sub_filter
# Since an empty expression is treated as `true`, convert to `false` when negating.
BooleanQuery::ALWAYS_FALSE_FILTER.merge_into(bool_node)
return
end

# Prevent any negated filters from being unnecessarily double-negated by
# converting them to a positive filter (i.e., !!A == A).
Expand Down Expand Up @@ -187,21 +189,32 @@ def process_any_satisfy_filter_expression_on_scalar_list(bool_node, filter, fiel
end
end

# We want to provide the following semantics for `any_of`:
#
# * `filter: {anyOf: []}` -> return no results
# * `filter: {anyOf: [{field: null}]}` -> return all results
# * `filter: {anyOf: [{field: null}, {field: ...}]}` -> return all results
def process_any_of_expression(bool_node, expressions, field_path)
return if expressions.nil? || expressions == {}

if expressions.empty?
# When our `expressions` array is empty, we want to match no documents. However, that's
# not the behavior the datastore will give us if we have an empty array in the query under
# `should`. To get the behavior we want, we need to pass the datastore some filter criteria
# that will evaluate to false for every document.
BooleanQuery::ALWAYS_FALSE_FILTER.merge_into(bool_node)
return
end

shoulds = expressions.filter_map do |expression|
build_bool_hash do |inner_bool_node|
process_filter_hash(inner_bool_node, expression, field_path)
end
end

# When our `shoulds` array is empty, the filtering semantics we want is to match no documents.
# However, that's not the behavior the datastore will give us if we have an empty array in the
# query under `should`. To get the behavior we want, we need to pass the datastore some filter
# criteria that will evaluate to false for every document.
bool_query = shoulds.empty? ? BooleanQuery::ALWAYS_FALSE_FILTER : BooleanQuery.should(*shoulds)
bool_query.merge_into(bool_node)
return if shoulds.size < expressions.size

BooleanQuery.should(*shoulds).merge_into(bool_node)
end

def process_all_of_expression(bool_node, expressions, field_path)
Expand Down Expand Up @@ -333,7 +346,7 @@ def process_list_count_expression(bool_node, expression, field_path)
def build_bool_hash(&block)
bool_node = Hash.new { |h, k| h[k] = [] }.tap(&block)

# To ignore "empty" filter predicates we need to return `nil` here.
# To treat "empty" filter predicates as `true` we need to return `nil` here.
return nil if bool_node.empty?

# According to https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-bool-query.html#bool-min-should-match,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def filter_value_set_for_filter_hash(filter_hash, target_field_path_parts, trave
# to a particular field.
def filter_value_set_for_filter_hash_entry(field_or_op, filter_value, target_field_path_parts, traversed_field_path_parts, negate:)
if filter_value.nil?
# Any filter with a `nil` value is effectively ignored by our filtering logic, so we need
# Any filter with a `nil` value is effectively treated as `true` by our filtering logic, so we need
# to return our `@all_values_set` to indicate this filter matches all documents.
@all_values_set
elsif field_or_op == @schema_names.not
Expand Down
59 changes: 43 additions & 16 deletions elasticgraph-graphql/spec/acceptance/search_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ module ElasticGraph
string_hash_of(widget1, :id, :amount_cents)
])

# Verify that we can filter on `DateTime` fields (and that `nil` DateTime filter values are ignored)
# Verify that we can filter on `DateTime` fields (and that `nil` DateTime filter values are treated as `true`)
widgets = list_widgets_with(:created_at,
filter: {created_at: {gte: "2019-07-02T12:00:00Z", lt: nil}},
order_by: [:created_at_ASC])
Expand Down Expand Up @@ -229,7 +229,7 @@ module ElasticGraph
string_hash_of(widget2, :id, created_at: "2019-07-02T12:00:00.000Z")
])

# Verify that we can filter on `Date` fields (and that `nil` Date filter values are ignored)
# Verify that we can filter on `Date` fields (and that `nil` Date filter values are treated as `true`)
widgets = list_widgets_with(:created_on,
filter: {created_on: {lte: "2019-07-02", gte: nil}},
order_by: [:created_on_ASC])
Expand Down Expand Up @@ -310,6 +310,28 @@ module ElasticGraph
string_hash_of(widget3, :id, :amount_cents)
])

# Test `not: {any_of: []}` results in all matching documents
widgets = list_widgets_with(:amount_cents,
filter: {not: {any_of: []}})

expect(widgets).to match([
string_hash_of(widget3, :id, :amount_cents),
string_hash_of(widget2, :id, :amount_cents),
string_hash_of(widget1, :id, :amount_cents)
])

# Test `not: {any_of: [emptyPredicate]}` results in no matching documents
widgets = list_widgets_with(:amount_cents,
filter: {not: {any_of: [{name: {equal_to_any_of: nil}}]}})

expect(widgets).to eq []

# Test `not: {any_of: [emptyPredicate, nonEmptyPredicate]}` results in no matching documents
widgets = list_widgets_with(:amount_cents,
filter: {not: {any_of: [{name: {equal_to_any_of: nil}}, {amount_cents: {gt: 150}}]}})

expect(widgets).to eq []

# Test `equal_to_any_of` with `[nil, other_value]`
widgets = list_widgets_with(:amount_cents,
filter: {name: {equal_to_any_of: [nil, "thing2", "", " ", "\n"]}}, # empty strings should be ignored.,
Expand Down Expand Up @@ -350,17 +372,13 @@ module ElasticGraph
string_hash_of(widget3, :id, :amount_cents)
])

# The negation of an ignored filter is an ignored filter: `{not: {equal_to_any_of: nil}}`
# evaluates to {not: {}} which will be ignored, therefore the filter will match all documents.
# The negation of an empty predicate is an always false filter. `{not: {equal_to_any_of: nil}}`
# evaluates to `{not: {true}}`, therefore the filter will match no documents.
widgets = list_widgets_with(:amount_cents,
filter: {id: {not: {equal_to_any_of: nil}}},
order_by: [:amount_cents_ASC])

expect(widgets).to match([
string_hash_of(widget1, :id, :amount_cents),
string_hash_of(widget2, :id, :amount_cents),
string_hash_of(widget3, :id, :amount_cents)
])
expect(widgets).to match []

# Test that sorting by the same field twice in different directions doesn't fail.
# (The extra sort should be effectively ignored).
Expand Down Expand Up @@ -630,6 +648,14 @@ module ElasticGraph
}}}})
expect(results).to eq [{"id" => "t1"}, {"id" => "t2"}, {"id" => "t3"}]

# Verify `any_of: [emptyPredicate]` returns all results.
results = query_teams_with(filter: {any_of: [{forbes_valuations: nil}]})
expect(results).to eq [{"id" => "t1"}, {"id" => "t2"}, {"id" => "t3"}, {"id" => "t4"}]

# Verify `any_of: [emptyPredicate, nonEmptyPredicate]` returns all results.
results = query_teams_with(filter: {any_of: [{forbes_valuations: nil}, {id: {equal_to_any_of: ["t3"]}}]})
expect(results).to eq [{"id" => "t1"}, {"id" => "t2"}, {"id" => "t3"}, {"id" => "t4"}]

# Verify we can use `any_of` directly under `any_satisfy` on a nested field.
results = query_teams_with(filter: {current_players_nested: {any_satisfy: {any_of: [
{name: {equal_to_any_of: ["Johnny Rocket"]}},
Expand Down Expand Up @@ -757,17 +783,17 @@ module ElasticGraph

# Verify `all_of: [{not: null}]` works as expected.
results = query_teams_with(filter: {seasons_nested: {all_of: [{not: nil}]}})
# All teams should be returned since the `nil` part of the filter expression is pruned.
expect(results).to eq [{"id" => "t1"}, {"id" => "t2"}, {"id" => "t3"}, {"id" => "t4"}]
# No teams should be returned since the `nil` part of the filter expression evaluates to `true`.
expect(results).to eq []

# Verify `all_of: [{not: null}]` works as expected.
results = query_teams_with(filter: {seasons_nested: {all_of: [{all_of: nil}]}})
# All teams should be returned since the `nil` part of the filter expression is pruned.
# All teams should be returned since the `nil` part of the filter expression is treated as `true`.
expect(results).to eq [{"id" => "t1"}, {"id" => "t2"}, {"id" => "t3"}, {"id" => "t4"}]

# Verify `all_of: [{}]` works as expected.
results = query_teams_with(filter: {seasons_nested: {all_of: [{}]}})
# All teams should be returned since the `nil` part of the filter expression is pruned.
# All teams should be returned since the `nil` part of the filter expression is treated as `true`.
expect(results).to eq [{"id" => "t1"}, {"id" => "t2"}, {"id" => "t3"}, {"id" => "t4"}]
end

Expand Down Expand Up @@ -865,14 +891,15 @@ def expect_error_from(filter, *error_snippets)
filter: {options: {color: {equal_to_any_of: nil}}}
)).to contain_exactly(expected_widget1, expected_widget2)

# `{not: emptyPredicate}` should result in an always false filter
expect(list_widgets_with_options_and_inventor(
filter: {options: {color: {not: {equal_to_any_of: nil}}}}
)).to contain_exactly(expected_widget1, expected_widget2)
)).to eq []

# not set to 'nil' should not cause any filtering on that value.
# `{not: nil}` should result in an always false filter
expect(list_widgets_with_options_and_inventor(
filter: {options: {color: {not: nil}}}
)).to contain_exactly(expected_widget1, expected_widget2)
)).to eq []

# On type unions you can filter on a subfield that is present on all subtypes...
expect(list_widgets_with_options_and_inventor(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ def search_datastore(**options, &before_msearch)
expect(results).to match_array(ids_of(team2))
end

it "correctly ignores nil filters under `any_satisfy`" do
it "correctly treats nil filters under `any_satisfy` treating as `true`" do
results = search_with_filter("current_players_nested", "any_satisfy", {
"name" => {"equal_to_any_of" => nil},
"nicknames" => {"any_satisfy" => {"equal_to_any_of" => nil}}
Expand Down Expand Up @@ -889,7 +889,7 @@ def search_datastore(**options, &before_msearch)
expect(results).to match_array(ids_of(widget2, widget4))
end

specify "`equal_to_any_of: []` or `any_of: []` matches no documents, but `any_predicate: nil` or `field: {}` is ignored, matching all documents" do
specify "`equal_to_any_of: []` or `any_of: []` matches no documents, but `any_predicate: nil` or `field: {}` is treated as `true`, matching all documents" do
index_into(
graphql,
widget1 = build(:widget),
Expand All @@ -898,6 +898,7 @@ def search_datastore(**options, &before_msearch)

expect(search_with_filter("id", "equal_to_any_of", [])).to eq []
expect(search_with_filter("id", "any_of", [])).to eq []
expect(search_with_filter("id", "any_of", [{"any_of" => []}])).to eq []

expect(search_with_filter("id", "equal_to_any_of", nil)).to eq ids_of(widget1, widget2)
expect(search_with_filter("amount_cents", "lt", nil)).to eq ids_of(widget1, widget2)
Expand All @@ -909,6 +910,22 @@ def search_datastore(**options, &before_msearch)
expect(search_with_filter("name_text", "matches_query", nil)).to eq ids_of(widget1, widget2)
expect(search_with_filter("name_text", "matches_phrase", nil)).to eq ids_of(widget1, widget2)
expect(search_with_freeform_filter({"id" => {}})).to eq ids_of(widget1, widget2)
expect(search_with_freeform_filter({"any_of" => [{"id" => nil}]})).to eq ids_of(widget1, widget2)
expect(search_with_freeform_filter({"any_of" => [{"id" => nil}, {"amount_cents" => {"lt" => 0}}]})).to eq ids_of(widget1, widget2)
end

specify "`not: {any_of: []}` matches all documents, but `not: {any_of: [field: nil, ...]}` is treated as `false` matching no documents" do
index_into(
graphql,
widget1 = build(:widget, id: "one"),
widget2 = build(:widget, id: "two")
)

expect(search_with_freeform_filter({"not" => {"any_of" => []}})).to eq ids_of(widget1, widget2)
expect(search_with_freeform_filter({"not" => {"not" => {"any_of" => []}}})).to eq []

expect(search_with_freeform_filter({"not" => {"any_of" => [{"id" => nil}]}})).to eq []
expect(search_with_freeform_filter({"not" => {"any_of" => [{"id" => nil}, {"amount_cents" => {"lt" => 1000}}]}})).to eq []
end

it "`equal_to_any_of:` with `nil` matches documents with null values or not null values" do
Expand Down Expand Up @@ -1157,12 +1174,12 @@ def search_datastore(**options, &before_msearch)
expect(triple_nested_not).to match_array(ids_of(widget1, widget3))
end

it "is ignored when set to nil" do
it "matches no documents when set to `nil`" do
index_into(
graphql,
widget1 = build(:widget, amount_cents: 100),
widget2 = build(:widget, amount_cents: 205),
widget3 = build(:widget, amount_cents: 400)
build(:widget, amount_cents: 100),
build(:widget, amount_cents: 205),
build(:widget, amount_cents: 400)
)

inner_not_result1 = search_with_freeform_filter({"amount_cents" => {
Expand Down Expand Up @@ -1195,9 +1212,51 @@ def search_datastore(**options, &before_msearch)
"amount_cents" => {"equal_to_any_of" => nil}
}})

expect(inner_not_result1).to eq(outer_not_result1).and match_array(ids_of(widget1, widget2, widget3))
expect(inner_not_result2).to eq(outer_not_result2).and match_array(ids_of(widget1))
expect(inner_not_result3).to eq(outer_not_result3).and match_array(ids_of(widget1, widget2, widget3))
inner_not_result4 = search_with_freeform_filter({"amount_cents" => {
"not" => {}
}})

outer_not_result4 = search_with_freeform_filter({"not" => {
"amount_cents" => {}
}})

expect(inner_not_result1).to eq(outer_not_result1).and eq []
expect(inner_not_result2).to eq(outer_not_result2).and eq []
expect(inner_not_result3).to eq(outer_not_result3).and eq []
expect(inner_not_result4).to eq(outer_not_result4).and eq []
end

it "is treated as `true` when set to nil inside `any_of`" do
index_into(
graphql,
widget1 = build(:widget, amount_cents: 100),
build(:widget, amount_cents: 205),
build(:widget, amount_cents: 400)
)

inner_not_result1 = search_with_freeform_filter({"amount_cents" => {
"any_of" => [
{"not" => nil},
{"lt" => 200}
]
}})

outer_not_result1 = search_with_freeform_filter({
"any_of" => [
{
"not" => {
"amount_cents" => nil
}
},
{
"amount_cents" => {
"lt" => 200
}
}
]
})

expect(inner_not_result1).to eq(outer_not_result1).and match_array(ids_of(widget1))
end
end

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ class GraphQL
}]
end

it "ignores empty filters" do
it "treats empty filters as `true`" do
query = aggregation_query_of(name: "teams", sub_aggregations: [
nested_sub_aggregation_of(path_in_index: ["current_players_nested"], query: sub_aggregation_query_of(name: "current_players_nested", filter: {
"name" => {"equal_to_any_of" => nil}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ module Aggregation
}]
end

it "ignores an empty filter" do
it "treats an empty filter treating as `true`" do
aggs = {
"target:seasons_nested" => {"doc_count" => 423, "meta" => outer_meta}
}
Expand Down
Loading

0 comments on commit 91dbffd

Please sign in to comment.