Skip to content

Commit b3d6b71

Browse files
Narrow queried indices based on __typename filters (#1229)
When a query on an abstract type filters on `__typename`, we can skip any index that contains none of the requested concrete types. For example, querying `DistributionChannel` with `__typename: {equal_to_any_of: [PhysicalStore]}` only needs to hit `physical_stores`, not `distribution_channels`. `TypenameFilter` determines the subset of type names satisfying the `__typename` filter using full set-algebra support across `not`, `any_of`, and `all_of` combinators. `DatastoreQuery` uses this via `narrowed_search_index_definitions`, which intersects `initial_search_index_definitions` with only the indices that could contain the filtered types. Closes #1179
1 parent 8be4954 commit b3d6b71

10 files changed

Lines changed: 144 additions & 18 deletions

File tree

elasticgraph-graphql/lib/elastic_graph/graphql.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ def datastore_query_builder
124124
filter_node_interpreter:,
125125
runtime_metadata:,
126126
logger:,
127+
index_definitions_by_type_name: @datastore_core.index_definitions_by_graphql_type,
127128
default_page_size: @config.default_page_size,
128129
max_page_size: @config.max_page_size
129130
)

elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query.rb

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ class GraphQL
2929
class DatastoreQuery < Support::MemoizableData.define(
3030
:total_document_count_needed, :aggregations, :logger, :filter_interpreter, :routing_picker,
3131
:index_expression_builder, :default_page_size, :initial_search_index_definitions, :max_page_size,
32+
:typename_filter, :index_definitions_by_type_name,
3233
:client_filters, :internal_filters, :sort, :document_pagination,
3334
:requested_fields, :request_all_fields, :requested_highlights, :request_all_highlights,
3435
:individual_docs_needed, :size_multiplier, :monotonic_clock_deadline, :schema_element_names
@@ -39,6 +40,7 @@ class DatastoreQuery < Support::MemoizableData.define(
3940
require "elastic_graph/graphql/datastore_query/index_expression_builder"
4041
require "elastic_graph/graphql/datastore_query/paginator"
4142
require "elastic_graph/graphql/datastore_query/routing_picker"
43+
require "elastic_graph/graphql/filtering/typename_filter"
4244

4345
# Performs a list of queries by building a hash of datastore msearch header/body tuples (keyed
4446
# by query), yielding them to the caller, and then post-processing the results. The caller is
@@ -141,13 +143,29 @@ def to_datastore_msearch_header_and_body
141143
# @!attribute [r] initial_search_index_definitions
142144
# The index definitions as provided at construction, before any subsequent adjustments.
143145

146+
# Returns the narrowed set of index definitions to search, based on any `__typename` filter
147+
# in the client filters. Falls back to `initial_search_index_definitions` when there is no
148+
# `__typename` filter or no type name mapping is available.
149+
def narrowed_search_index_definitions
150+
@narrowed_search_index_definitions ||= begin
151+
filtered_type_names = typename_filter.filtered_type_names(client_filters.to_a)
152+
153+
if filtered_type_names
154+
possible_index_defs = index_definitions_by_type_name.slice(*filtered_type_names).values.flatten
155+
initial_search_index_definitions & possible_index_defs
156+
else
157+
initial_search_index_definitions
158+
end
159+
end
160+
end
161+
144162
# Returns an index_definition expression string to use for searches. This string can specify
145163
# multiple indices, use wildcards, etc. For info about what is supported, see:
146164
# https://www.elastic.co/guide/en/elasticsearch/reference/current/multi-index.html
147165
def search_index_expression
148166
@search_index_expression ||= index_expression_builder.determine_search_index_expression(
149167
all_filters,
150-
initial_search_index_definitions,
168+
narrowed_search_index_definitions,
151169
# When we have aggregations, we must require indices to search. When we search no indices, the datastore does not return
152170
# the standard aggregations response structure, which causes problems.
153171
require_indices: !aggregations_datastore_body.empty?
@@ -348,14 +366,22 @@ def highlight
348366

349367
# Encapsulates dependencies of `Query`, giving us something we can expose off of `application`
350368
# to build queries when desired.
351-
class Builder < Support::MemoizableData.define(:runtime_metadata, :logger, :filter_interpreter, :filter_node_interpreter, :default_page_size, :max_page_size)
369+
class Builder < Support::MemoizableData.define(:runtime_metadata, :logger, :filter_interpreter, :filter_node_interpreter, :default_page_size, :max_page_size, :index_definitions_by_type_name)
352370
def routing_picker
353371
@routing_picker ||= RoutingPicker.new(
354372
filter_node_interpreter: filter_node_interpreter,
355373
schema_names: runtime_metadata.schema_element_names
356374
)
357375
end
358376

377+
def typename_filter
378+
@typename_filter ||= Filtering::TypenameFilter.new(
379+
filter_node_interpreter: filter_node_interpreter,
380+
schema_names: runtime_metadata.schema_element_names,
381+
known_type_names: index_definitions_by_type_name.keys
382+
)
383+
end
384+
359385
def index_expression_builder
360386
@index_expression_builder ||= IndexExpressionBuilder.new(
361387
filter_node_interpreter: filter_node_interpreter,
@@ -390,10 +416,12 @@ def new_query(
390416

391417
DatastoreQuery.new(
392418
routing_picker: routing_picker,
419+
typename_filter: typename_filter,
393420
index_expression_builder: index_expression_builder,
394421
logger: logger,
395422
schema_element_names: runtime_metadata.schema_element_names,
396423
initial_search_index_definitions: initial_search_index_definitions,
424+
index_definitions_by_type_name: index_definitions_by_type_name,
397425
client_filters: client_filters.to_set,
398426
internal_filters: internal_filters.to_set,
399427
sort: sort,

elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query/index_expression_builder.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,8 +141,8 @@ def +(other)
141141
# `Query::IndexExpressionBuilder` exists only for use by `Query` and is effectively private.
142142
private_constant :IndexExpressionBuilder
143143

144-
# Steep is complaining that it can't find some `Query` but they are not in this file...
145-
# @dynamic shard_routing_values, effective_size, merge_with, search_index_expression, with, to_datastore_msearch_header_and_body
144+
# Steep can't find implementations of these `DatastoreQuery` methods because they're defined in `datastore_query.rb`, not in this file.
145+
# @dynamic shard_routing_values, effective_size, merge_with, search_index_expression, narrowed_search_index_definitions, with, to_datastore_msearch_header_and_body
146146
end
147147
end
148148
end

elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query/routing_picker.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ def extract_eligible_routing_values(filter_hashes, routing_field_paths)
5353
# `Query::RoutingPicker` exists only for use by `Query` and is effectively private.
5454
private_constant :RoutingPicker
5555

56-
# Steep is complaining that it can't find some `Query` but they are not in this file...
57-
# @dynamic shard_routing_values, effective_size, merge_with, search_index_expression, with, to_datastore_msearch_header_and_body
56+
# Steep can't find implementations of these `DatastoreQuery` methods because they're defined in `datastore_query.rb`, not in this file.
57+
# @dynamic shard_routing_values, effective_size, merge_with, search_index_expression, narrowed_search_index_definitions, with, to_datastore_msearch_header_and_body
5858
end
5959
end
6060
end
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Copyright 2024 - 2026 Block, Inc.
2+
#
3+
# Use of this source code is governed by an MIT-style
4+
# license that can be found in the LICENSE file or at
5+
# https://opensource.org/licenses/MIT.
6+
#
7+
# frozen_string_literal: true
8+
9+
require "elastic_graph/graphql/filtering/filter_value_set_extractor"
10+
11+
module ElasticGraph
12+
class GraphQL
13+
module Filtering
14+
# Responsible for extracting a constrained set of concrete type names from query filters,
15+
# based on a `__typename` filter.
16+
class TypenameFilter
17+
def initialize(filter_node_interpreter:, schema_names:, known_type_names:)
18+
@extractor = FilterValueSetExtractor.for_equality(filter_node_interpreter, schema_names)
19+
@known_type_names = known_type_names
20+
end
21+
22+
# Returns the subset of `known_type_names` that satisfy any `__typename` filter in
23+
# `filter_hashes`. Returns `nil` if the filters place no constraint on `__typename`,
24+
# meaning all type names are potentially matched.
25+
def filtered_type_names(filter_hashes)
26+
typename_set = @extractor.extract_filter_value_set(filter_hashes, ["__typename"])
27+
return nil unless typename_set
28+
29+
if typename_set.inclusive?
30+
typename_set.values.to_a
31+
else
32+
@known_type_names - typename_set.values.to_a
33+
end
34+
end
35+
end
36+
end
37+
end
38+
end

elasticgraph-graphql/sig/elastic_graph/graphql/datastore_query.rbs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@ module ElasticGraph
22
class GraphQL
33
class DatastoreQuerySupertype
44
attr_reader initial_search_index_definitions: ::Array[DatastoreCore::_IndexDefinition]
5+
attr_reader index_definitions_by_type_name: ::Hash[::String, ::Array[DatastoreCore::_IndexDefinition]]
56
attr_reader aggregations: ::Hash[::String, Aggregation::Query]
67
attr_reader document_paginator: DatastoreQuery::DocumentPaginator
78
attr_reader total_document_count_needed: bool
89
end
910

1011
# Note: this is a partial signature definition
1112
class DatastoreQuery < DatastoreQuerySupertype
13+
def narrowed_search_index_definitions: () -> ::Array[DatastoreCore::_IndexDefinition]
1214
def shard_routing_values: () -> ::Array[::String]?
1315
def merge_with: (**untyped) -> DatastoreQuery
1416
def search_index_expression: () -> ::String
@@ -21,6 +23,7 @@ module ElasticGraph
2123
def self.new: (
2224
runtime_metadata: SchemaArtifacts::RuntimeMetadata::Schema,
2325
logger: ::Logger,
26+
index_definitions_by_type_name: ::Hash[::String, ::Array[DatastoreCore::_IndexDefinition]],
2427
**untyped
2528
) -> Builder
2629

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
module ElasticGraph
2+
class GraphQL
3+
module Filtering
4+
class TypenameFilter
5+
def initialize: (
6+
filter_node_interpreter: FilterNodeInterpreter,
7+
schema_names: SchemaArtifacts::RuntimeMetadata::SchemaElementNames,
8+
known_type_names: ::Array[::String]
9+
) -> void
10+
11+
def filtered_type_names: (
12+
::Array[::Hash[::String, untyped]]
13+
) -> ::Array[::String]?
14+
15+
private
16+
17+
@extractor: FilterValueSetExtractor[EqualityValueSet]
18+
@known_type_names: ::Array[::String]
19+
end
20+
end
21+
end
22+
end

elasticgraph-graphql/spec/acceptance/search_spec.rb

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -701,6 +701,9 @@ module ElasticGraph
701701
it "correctly scopes results to the queried interface level across a multi-level type hierarchy", :expect_search_routing do
702702
established_on_asc = :"#{case_correctly("established_on")}_ASC"
703703
id_desc = :"#{case_correctly("id")}_DESC"
704+
dc_index = index_definition_name_for("distribution_channels")
705+
ps_index = index_definition_name_for("physical_stores")
706+
both_indices = "#{dc_index},#{ps_index}"
704707

705708
# The DistributionChannel hierarchy has two branches:
706709
# DistributionChannel (index: distribution_channels)
@@ -738,33 +741,39 @@ module ElasticGraph
738741
expect(channels.map { |c| c["__typename"] }).to contain_exactly(
739742
*expected_store_typenames, "DirectWholesaler", "BrokerWholesaler"
740743
)
744+
expect(index_search_expressions_from_queries("main").last(1)).to eq [both_indices]
741745

742746
# Querying at the Retail interface excludes wholesalers, even though they live in
743747
# the same distribution_channels index.
744748
retailers = list_retailers_with(*store_fragments)
745749
expect(retailers.map { |r| r["__typename"] }).to contain_exactly(*expected_store_typenames)
750+
expect(index_search_expressions_from_queries("main").last(1)).to eq [both_indices]
746751

747752
# Querying at the Store interface likewise excludes wholesalers.
748753
stores = list_stores_with(*store_fragments)
749754
expect(stores.map { |s| s["__typename"] }).to contain_exactly(*expected_store_typenames)
755+
expect(index_search_expressions_from_queries("main").last(1)).to eq [both_indices]
750756

751757
# Using `nodes` (instead of `edges { node }`) also works. This exercises the `nodes`
752758
# code path where the field type is list-wrapped (e.g. `[Store!]!`).
753759
stores_via_nodes = list_stores_via_nodes_with(*store_fragments)
754760
expect(stores_via_nodes.map { |s| s["__typename"] }).to contain_exactly(*expected_store_typenames)
761+
expect(index_search_expressions_from_queries("main").last(1)).to eq [both_indices]
755762

756763
# Filters apply within the correct scope at each level.
757764
# At distribution_channels: active=false matches only wholesaler2.
758765
inactive = list_distribution_channels_with(*all_channel_fragments, filter: {active: {equal_to_any_of: [false]}})
759766
expect(inactive.map { |c| c["__typename"] }).to contain_exactly("BrokerWholesaler")
760767
expect(inactive.map { |c| c["id"] }).to contain_exactly(wholesaler2.fetch(:id))
768+
expect(index_search_expressions_from_queries("main").last(1)).to eq [both_indices]
761769

762770
# At retailers: established_on filter applies, and wholesalers are still excluded.
763771
retailers_after_2020 = list_retailers_with(*store_fragments, filter: {established_on: {gte: "2020-01-01"}})
764772
expect(retailers_after_2020.map { |r| r["id"] }).to contain_exactly(
765773
online_store1.fetch(:id), online_store2.fetch(:id),
766774
physical_store2.fetch(:id)
767775
)
776+
expect(index_search_expressions_from_queries("main").last(1)).to eq [both_indices]
768777

769778
# Sort by established_on at the stores level spans both indices correctly.
770779
stores_sorted = list_stores_with(*store_fragments, order_by: [established_on_asc])
@@ -774,6 +783,7 @@ module ElasticGraph
774783
online_store2.fetch(:id),
775784
physical_store2.fetch(:id)
776785
])
786+
expect(index_search_expressions_from_queries("main").last(1)).to eq [both_indices]
777787

778788
# Pagination at the distribution_channels level covers all types.
779789
channels_page, page_info = list_distribution_channels_and_page_info_with(
@@ -783,6 +793,7 @@ module ElasticGraph
783793
)
784794
expect(channels_page.size).to eq(4)
785795
expect(page_info).to include(case_correctly("has_next_page") => true)
796+
expect(index_search_expressions_from_queries("main").last(1)).to eq [both_indices]
786797

787798
# Filter by ID spans indices and respects the __typename scope at each query level.
788799
stores_by_id = list_stores_with(
@@ -793,11 +804,13 @@ module ElasticGraph
793804
[physical_store1.fetch(:id), "PhysicalStore"],
794805
[online_store1.fetch(:id), "OnlineStore"]
795806
)
807+
expect(index_search_expressions_from_queries("main").last(1)).to eq [both_indices]
796808

797809
# Aggregations respect the same __typename scoping as document queries.
798810
store_agg_count = call_graphql_query("query { #{case_correctly("store_aggregations")} { nodes { #{case_correctly("count")} } } }")
799811
.dig("data", case_correctly("store_aggregations"), "nodes", 0, case_correctly("count"))
800812
expect(store_agg_count).to eq(expected_store_typenames.size)
813+
expect(index_search_expressions_from_queries("main").last(1)).to eq [both_indices]
801814

802815
# `_typename` filter allows querying by concrete subtype across multiple indexes and
803816
# branches of the type hierarchy. `DirectWholesaler` is in the shared `distribution_channels`
@@ -808,6 +821,23 @@ module ElasticGraph
808821
filter: {typename_key => {equal_to_any_of: ["DirectWholesaler", "PhysicalStore"]}}
809822
)
810823
expect(wholesaler_or_physical.map { |c| c["__typename"] }).to contain_exactly("DirectWholesaler", "PhysicalStore", "PhysicalStore")
824+
expect(index_search_expressions_from_queries("main").last(1)).to eq [both_indices]
825+
826+
# A __typename filter targeting only PhysicalStore narrows to just the physical_stores index.
827+
physical_stores_only = list_distribution_channels_with(
828+
*all_channel_fragments,
829+
filter: {typename_key => {equal_to_any_of: ["PhysicalStore"]}}
830+
)
831+
expect(physical_stores_only.map { |c| c["__typename"] }).to contain_exactly("PhysicalStore", "PhysicalStore")
832+
expect(index_search_expressions_from_queries("main").last(1)).to eq [ps_index]
833+
834+
# A __typename filter targeting both wholesaler types narrows to just the distribution_channels index.
835+
wholesalers_only = list_distribution_channels_with(
836+
*all_channel_fragments,
837+
filter: {typename_key => {equal_to_any_of: ["DirectWholesaler", "BrokerWholesaler"]}}
838+
)
839+
expect(wholesalers_only.map { |c| c["__typename"] }).to contain_exactly("DirectWholesaler", "BrokerWholesaler")
840+
expect(index_search_expressions_from_queries("main").last(1)).to eq [dc_index]
811841

812842
# `_typename` filter interacts correctly with automatic `__typename` scoping at a sub-interface level.
813843
# Filtering `retailers` to `OnlineStore OR PhysicalStore` returns all retailers (the full set),
@@ -817,6 +847,7 @@ module ElasticGraph
817847
filter: {typename_key => {equal_to_any_of: ["OnlineStore", "PhysicalStore"]}}
818848
)
819849
expect(all_retailers.map { |r| r["__typename"] }).to contain_exactly(*expected_store_typenames)
850+
expect(index_search_expressions_from_queries("main").last(1)).to eq [both_indices]
820851

821852
# `_typename` filter also works on aggregations, including across indexes.
822853
wholesaler_or_physical_agg_count = call_graphql_query(<<~QUERY)
@@ -830,6 +861,7 @@ module ElasticGraph
830861
QUERY
831862
.dig("data", case_correctly("distribution_channel_aggregations"), "nodes", 0, case_correctly("count"))
832863
expect(wholesaler_or_physical_agg_count).to eq(wholesaler_or_physical.size)
864+
expect(index_search_expressions_from_queries("main").last(1)).to eq [both_indices]
833865

834866
# all_highlights resolves against the concrete type (OnlineStore), not the abstract root
835867
# (DistributionChannel). OnlineStore.name is absent from DistributionChannel — without
@@ -842,6 +874,7 @@ module ElasticGraph
842874
{"path" => ["name"], "snippets" => ["<em>Example Marketplace</em>"]}
843875
]
844876
})
877+
expect(index_search_expressions_from_queries("main").last(1)).to eq [both_indices]
845878
end
846879

847880
it "supports querying a type that is both indexed (via interface inheritance) and embedded as a field on another type" do

elasticgraph-graphql/spec/unit/elastic_graph/graphql/datastore_query/merge_spec.rb

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,25 +18,26 @@ class GraphQL
1818
include_context "DatastoreQueryUnitSupport"
1919

2020
before(:context) do
21-
# These are derived from app state and don't vary in two different queries for the same app,
22-
# so we don't have to deal with merging them.
23-
app_level_attributes = %i[
24-
logger filter_interpreter routing_picker index_expression_builder
25-
default_page_size max_page_size schema_element_names
26-
]
27-
28-
@attributes_needing_merge_test_coverage = (DatastoreQuery.members - app_level_attributes).to_set
21+
@attributes_needing_merge_test_coverage = DatastoreQuery::Builder.instance_method(:new_query).parameters.map(&:last).to_set
22+
@attributes_covered = ::Set.new
2923
end
3024

3125
before(:example) do |ex|
3226
Array(ex.metadata[:covers]).each do |attribute|
33-
@attributes_needing_merge_test_coverage.delete(attribute)
27+
if @attributes_needing_merge_test_coverage.include?(attribute)
28+
@attributes_covered << attribute
29+
else
30+
# :nocov: -- only executed when a test has a typo in its `covers:` metadata
31+
raise "Attribute `#{attribute}` (from `covers: :#{attribute}`) does not appear to need coverage. Did you misspell it?"
32+
# :nocov:
33+
end
3434
end
3535
end
3636

3737
after(:context) do
38-
expect(@attributes_needing_merge_test_coverage).to be_empty, "`#merge` tests are expected to cover all attributes, " \
39-
"but the following do not appear to have coverage: #{@attributes_needing_merge_test_coverage}"
38+
untested_attribute = @attributes_needing_merge_test_coverage - @attributes_covered
39+
expect(untested_attribute).to be_empty, "`#merge` tests are expected to cover all attributes, " \
40+
"but the following do not appear to have coverage: #{untested_attribute}"
4041
end
4142

4243
it "does not allow `initial_search_index_definitions` to be overridden", covers: :initial_search_index_definitions do

0 commit comments

Comments
 (0)