Skip to content

Commit b48fcb7

Browse files
Narrow queried indices based on _typename filters.
When a query on an abstract type filters on `__typename`, we can skip any index that contains none of the requested concrete types. For example, querying `DistributionChannel` with `_typename: PhysicalStore` only needs to hit `physical_stores`, not `distribution_channels`. `TypenameIndexPicker` implements this using `FilterValueSetExtractor` for full set-algebra support across `not`, `any_of`, and `all_of` filter combinators. Generated with Claude Code
1 parent 203f849 commit b48fcb7

9 files changed

Lines changed: 510 additions & 2 deletions

File tree

elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query.rb

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ class GraphQL
2828
# with minimal effort.
2929
class DatastoreQuery < Support::MemoizableData.define(
3030
:total_document_count_needed, :aggregations, :logger, :filter_interpreter, :routing_picker,
31-
:index_expression_builder, :default_page_size, :search_index_definitions, :max_page_size,
31+
:index_expression_builder, :typename_index_picker, :default_page_size, :search_index_definitions,
32+
:index_definitions_by_possible_typename, :max_page_size,
3233
:client_filters, :internal_filters, :sort, :document_pagination,
3334
:requested_fields, :request_all_fields, :requested_highlights, :request_all_highlights,
3435
:individual_docs_needed, :size_multiplier, :monotonic_clock_deadline, :schema_element_names
@@ -39,6 +40,7 @@ class DatastoreQuery < Support::MemoizableData.define(
3940
require "elastic_graph/graphql/datastore_query/index_expression_builder"
4041
require "elastic_graph/graphql/datastore_query/paginator"
4142
require "elastic_graph/graphql/datastore_query/routing_picker"
43+
require "elastic_graph/graphql/datastore_query/typename_index_picker"
4244

4345
# Performs a list of queries by building a hash of datastore msearch header/body tuples (keyed
4446
# by query), yielding them to the caller, and then post-processing the results. The caller is
@@ -300,6 +302,14 @@ def ignored_values_for_routing
300302
@ignored_values_for_routing ||= search_index_definitions.flat_map { |i| i.ignored_values_for_routing.to_a }.to_set
301303
end
302304

305+
def search_index_definitions
306+
# Narrows the stored index definitions based on a __typename filter. `super` calls the
307+
# generated Data accessor to get the value passed to `new_query` as `search_index_definitions:`
308+
# at construction time, before narrowing is applied.
309+
@narrowed_search_index_definitions ||=
310+
typename_index_picker.pick_index_definitions(client_filters.to_a, index_definitions_by_possible_typename, super)
311+
end
312+
303313
def to_datastore_body
304314
@to_datastore_body ||= aggregations_datastore_body
305315
.merge(document_paginator.to_datastore_body)
@@ -360,8 +370,16 @@ def index_expression_builder
360370
)
361371
end
362372

373+
def typename_index_picker
374+
@typename_index_picker ||= TypenameIndexPicker.new(
375+
filter_node_interpreter: filter_node_interpreter,
376+
schema_names: runtime_metadata.schema_element_names
377+
)
378+
end
379+
363380
def new_query(
364381
search_index_definitions:,
382+
index_definitions_by_possible_typename: {},
365383
client_filters: [],
366384
internal_filters: [],
367385
sort: [],
@@ -388,9 +406,11 @@ def new_query(
388406
DatastoreQuery.new(
389407
routing_picker: routing_picker,
390408
index_expression_builder: index_expression_builder,
409+
typename_index_picker: typename_index_picker,
391410
logger: logger,
392411
schema_element_names: runtime_metadata.schema_element_names,
393412
search_index_definitions: search_index_definitions,
413+
index_definitions_by_possible_typename: index_definitions_by_possible_typename,
394414
client_filters: client_filters.to_set,
395415
internal_filters: internal_filters.to_set,
396416
sort: sort,
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
# Copyright 2024 - 2026 Block, Inc.
2+
#
3+
# Use of this source code is governed by an MIT-style
4+
# license that can be found in the LICENSE file or at
5+
# https://opensource.org/licenses/MIT.
6+
#
7+
# frozen_string_literal: true
8+
9+
require "elastic_graph/graphql/filtering/filter_value_set_extractor"
10+
11+
module ElasticGraph
12+
class GraphQL
13+
class DatastoreQuery
14+
# Responsible for narrowing the set of searched indices based on a `__typename` filter.
15+
# When a query filters on `__typename`, we can skip any index that contains none of the
16+
# requested concrete types, avoiding unnecessary datastore queries.
17+
#
18+
# Mirrors the structure of `RoutingPicker`, which performs a similar narrowing for shards.
19+
class TypenameIndexPicker
20+
def initialize(filter_node_interpreter:, schema_names:)
21+
@filter_value_set_extractor = Filtering::FilterValueSetExtractor.new(
22+
filter_node_interpreter,
23+
schema_names,
24+
TypenameValueSet::ALL,
25+
TypenameValueSet::EMPTY
26+
) do |operator, filter_value|
27+
if operator == :equal_to_any_of
28+
TypenameValueSet.of(filter_value.compact)
29+
end
30+
end
31+
end
32+
33+
# Given a list of `filter_hashes`, a map of `{ type_name => [index_definition] }`, and the
34+
# full set of `search_index_definitions`, returns the narrowed set of index definitions that
35+
# need to be searched. Returns `search_index_definitions` unchanged if there is no `__typename`
36+
# filter or if no type mapping is available.
37+
def pick_index_definitions(filter_hashes, index_definitions_by_possible_typename, search_index_definitions)
38+
# An empty map means the caller did not opt into typename-based narrowing (e.g. a direct
39+
# `new_query` call that omits `index_definitions_by_possible_typename`)
40+
return search_index_definitions if index_definitions_by_possible_typename.empty?
41+
42+
typename_set = @filter_value_set_extractor.extract_filter_value_set(filter_hashes, ["__typename"])
43+
return search_index_definitions if typename_set.nil?
44+
45+
index_definitions_by_possible_typename.flat_map do |type_name, index_defs|
46+
typename_set.include?(type_name) ? index_defs : []
47+
end
48+
end
49+
end
50+
51+
class TypenameValueSet < Data.define(:type, :type_names)
52+
def self.of(type_names)
53+
new(:inclusive, type_names.to_set)
54+
end
55+
56+
def self.of_all_except(type_names)
57+
new(:exclusive, type_names.to_set)
58+
end
59+
60+
ALL = of_all_except([])
61+
EMPTY = of([])
62+
63+
def include?(type_name)
64+
inclusive? ? type_names.include?(type_name) : !type_names.include?(type_name)
65+
end
66+
67+
def intersection(other)
68+
if inclusive? && other.inclusive?
69+
# Since both sets are inclusive, we can just delegate to `Set#intersection` here.
70+
TypenameValueSet.of(type_names.intersection(other.type_names))
71+
elsif exclusive? && other.exclusive?
72+
# Since both sets are exclusive, we need to return an exclusive set of the union of the
73+
# excluded values. For example:
74+
#
75+
# s1 = TypenameValueSet.of_all_except(["Widget"]) # everything except Widget
76+
# s2 = TypenameValueSet.of_all_except(["Address"]) # everything except Address
77+
#
78+
# s3 = s1.intersection(s2)
79+
#
80+
# Here s3 would be everything except Widget or Address (the same as `of_all_except(["Widget", "Address"])`)
81+
TypenameValueSet.of_all_except(type_names.union(other.type_names))
82+
else
83+
# Since one set is inclusive and one is exclusive, we need an inclusive set of
84+
# `included_names - excluded_names`. For example:
85+
#
86+
# s1 = TypenameValueSet.of(["Widget", "Address"]) # Widget, Address
87+
# s2 = TypenameValueSet.of_all_except(["Address"]) # everything except Address
88+
#
89+
# s3 = s1.intersection(s2)
90+
#
91+
# Here s3 would be just Widget.
92+
included_names = inclusive? ? type_names : other.type_names
93+
excluded_names = inclusive? ? other.type_names : type_names
94+
TypenameValueSet.of(included_names - excluded_names)
95+
end
96+
end
97+
98+
def union(other)
99+
if inclusive? && other.inclusive?
100+
# Since both sets are inclusive, we can just delegate to `Set#union` here.
101+
TypenameValueSet.of(type_names.union(other.type_names))
102+
elsif exclusive? && other.exclusive?
103+
# Since both sets are exclusive, only names excluded from *both* are excluded from the union.
104+
# For example:
105+
#
106+
# s1 = TypenameValueSet.of_all_except(["Widget", "Address"]) # everything except Widget, Address
107+
# s2 = TypenameValueSet.of_all_except(["Address", "Manufacturer"]) # everything except Address, Manufacturer
108+
#
109+
# s3 = s1.union(s2)
110+
#
111+
# Widget is in s2 and Manufacturer is in s1, so both appear in the union.
112+
# Only Address is excluded from both, so s3 is everything except Address
113+
# (the same as `of_all_except(["Address"])`, i.e. the intersection of the exclusion sets).
114+
TypenameValueSet.of_all_except(type_names.intersection(other.type_names))
115+
elsif inclusive?
116+
# s1 is inclusive, s2 is exclusive: return exclusive set of `excluded - included`.
117+
# For example:
118+
#
119+
# s1 = TypenameValueSet.of(["Widget"]) # Widget
120+
# s2 = TypenameValueSet.of_all_except(["Widget", "Address"]) # everything except Widget, Address
121+
#
122+
# s3 = s1.union(s2)
123+
#
124+
# Widget is already in s1, so it appears in the union.
125+
# Address is excluded from s2 and not in s1, so it remains excluded.
126+
# Here s3 is everything except Address (the same as `of_all_except(["Address"])`)
127+
TypenameValueSet.of_all_except(other.type_names - type_names)
128+
else
129+
# s1 is exclusive, s2 is inclusive: symmetric to the above.
130+
TypenameValueSet.of_all_except(type_names - other.type_names)
131+
end
132+
end
133+
134+
def negate
135+
with(type: INVERTED_TYPES.fetch(type))
136+
end
137+
138+
INVERTED_TYPES = {inclusive: :exclusive, exclusive: :inclusive}
139+
140+
protected
141+
142+
def inclusive?
143+
type == :inclusive
144+
end
145+
146+
def exclusive?
147+
type == :exclusive
148+
end
149+
end
150+
151+
# `TypenameIndexPicker` exists only for use by `DatastoreQuery` and is effectively private.
152+
private_constant :TypenameIndexPicker
153+
# `TypenameValueSet` exists only for use here and is effectively private.
154+
private_constant :TypenameValueSet
155+
156+
# Steep is complaining that it can't find some `DatastoreQuery` methods but they are not in this file...
157+
# @dynamic shard_routing_values, effective_size, merge_with, search_index_expression, with, to_datastore_msearch_header_and_body
158+
end
159+
end
160+
end

elasticgraph-graphql/lib/elastic_graph/graphql/resolvers/query_adapter.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ def build_new_query_from(field, args, lookahead, context, monotonic_clock_deadli
6565

6666
initial_query = @datastore_query_builder.new_query(
6767
search_index_definitions: unwrapped_type.search_index_definitions,
68+
index_definitions_by_possible_typename: unwrapped_type.index_definitions_by_possible_typename,
6869
monotonic_clock_deadline: monotonic_clock_deadline
6970
)
7071

elasticgraph-graphql/lib/elastic_graph/graphql/schema/type.rb

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,21 @@ def subtypes
134134
.to_set
135135
end
136136

137+
# Returns a map of each possible concrete typename to its index definitions.
138+
# For abstract types, maps each concrete subtype; for concrete types, maps this type itself.
139+
def index_definitions_by_possible_typename
140+
@index_definitions_by_possible_typename ||=
141+
if (st = source_type)
142+
# Derived types (e.g. indexed aggregations) share their source document type's indices.
143+
# Delegate so that _typename filters match concrete document typenames, not the derived type name.
144+
st.index_definitions_by_possible_typename
145+
elsif abstract?
146+
subtypes.to_h { |subtype| [subtype.name, subtype.search_index_definitions] }
147+
else
148+
{name => search_index_definitions}
149+
end
150+
end
151+
137152
# For derived types (e.g. indexed aggregations), returns the underlying source document type.
138153
# Returns `nil` for non-derived types.
139154
def source_type
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
module ElasticGraph
2+
class GraphQL
3+
class DatastoreQuery
4+
class TypenameIndexPicker
5+
def initialize: (
6+
filter_node_interpreter: Filtering::FilterNodeInterpreter,
7+
schema_names: SchemaArtifacts::RuntimeMetadata::SchemaElementNames
8+
) -> void
9+
10+
def pick_index_definitions: (
11+
::Array[::Hash[::String, untyped]],
12+
::Hash[::String, ::Array[DatastoreCore::_IndexDefinition]],
13+
::Array[DatastoreCore::_IndexDefinition]
14+
) -> ::Array[DatastoreCore::_IndexDefinition]
15+
16+
private
17+
18+
@filter_value_set_extractor: Filtering::FilterValueSetExtractor[TypenameValueSet]
19+
end
20+
21+
type typenameValueSetType = :inclusive | :exclusive
22+
23+
class TypenameValueSetSupertype
24+
attr_reader type: typenameValueSetType
25+
attr_reader type_names: ::Set[::String]
26+
27+
def initialize: (typenameValueSetType, ::Set[::String]) -> void
28+
def self.with: (
29+
type: typenameValueSetType,
30+
type_names: ::Set[::String]
31+
) -> TypenameValueSet
32+
33+
def with: (
34+
?type: typenameValueSetType,
35+
?type_names: ::Set[::String]
36+
) -> TypenameValueSet
37+
end
38+
39+
class TypenameValueSet < TypenameValueSetSupertype
40+
include Support::_NegatableSet[TypenameValueSet]
41+
def self.of: (::Enumerable[::String]) -> TypenameValueSet
42+
def self.of_all_except: (::Enumerable[::String]) -> TypenameValueSet
43+
44+
ALL: TypenameValueSet
45+
EMPTY: TypenameValueSet
46+
INVERTED_TYPES: ::Hash[typenameValueSetType, typenameValueSetType]
47+
48+
def include?: (::String) -> bool
49+
50+
def inclusive?: () -> bool
51+
def exclusive?: () -> bool
52+
end
53+
end
54+
end
55+
end

elasticgraph-graphql/sig/elastic_graph/graphql/schema/type.rbs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ module ElasticGraph
1111
def search_index_definitions: () -> ::Array[DatastoreCore::_IndexDefinition]
1212
def source_type: () -> Type?
1313
def subtypes: () -> ::Set[Type]
14+
def index_definitions_by_possible_typename: () -> ::Hash[::String, ::Array[DatastoreCore::_IndexDefinition]]
1415
def shares_index_with_non_subtypes?: () -> bool
1516
def unwrap_fully: () -> Type
1617
def field_named: (::String) -> Field

elasticgraph-graphql/spec/unit/elastic_graph/graphql/datastore_query/merge_spec.rb

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ class GraphQL
2121
# These are derived from app state and don't vary in two different queries for the same app,
2222
# so we don't have to deal with merging them.
2323
app_level_attributes = %i[
24-
logger filter_interpreter routing_picker index_expression_builder
24+
logger filter_interpreter routing_picker index_expression_builder typename_index_picker
2525
default_page_size max_page_size schema_element_names
2626
]
2727

@@ -50,6 +50,16 @@ class GraphQL
5050
}.to raise_error ArgumentError, a_string_including("search_index_definitions")
5151
end
5252

53+
it "does not allow `index_definitions_by_possible_typename` to be overridden", covers: :index_definitions_by_possible_typename do
54+
widgets_def = graphql.datastore_core.index_definitions_by_name.fetch("widgets")
55+
56+
query = new_query(index_definitions_by_possible_typename: {"Widget" => [widgets_def]})
57+
58+
expect {
59+
query.merge_with(index_definitions_by_possible_typename: {"Widget" => [widgets_def]})
60+
}.to raise_error ArgumentError, a_string_including("index_definitions_by_possible_typename")
61+
end
62+
5363
%i[client_filters internal_filters].each do |filter_attr|
5464
describe ":#{filter_attr}", covers: filter_attr do
5565
it "can merge `equal_to_any_of` conditions from two separate queries that are on separate fields" do

0 commit comments

Comments
 (0)