Skip to content

Commit a6256d3

Browse files
jwilsclaude
andcommitted
Support retrieved_from: :doc_values for direct leaf fields (#1110)
Add a narrow secondary retrieval path for fields that should stay returnable in GraphQL while being excluded from stored `_source`. When a field is marked `retrieved_from: :doc_values`, ElasticGraph: - keeps the field in GraphQL output types - excludes the field from datastore `_source` via `_source.excludes` - requests the field through datastore `docvalue_fields` - resolves the field from `fields` in datastore hits when `_source` omits it The change is intentionally narrow: only direct, non-list, non-text GraphQL leaf fields on indexed root document types are supported. Query planning only uses `docvalue_fields` when every participating index definition agrees on the retrieval method. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 182fa35 commit a6256d3

36 files changed

Lines changed: 670 additions & 37 deletions

File tree

config/schema/artifacts/datastore_config.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1463,6 +1463,9 @@ index_templates:
14631463
required: true
14641464
_size:
14651465
enabled: true
1466+
_source:
1467+
excludes:
1468+
- workspace_id2
14661469
settings:
14671470
index.mapping.ignore_malformed: false
14681471
index.mapping.coerce: false
@@ -1505,6 +1508,9 @@ indices:
15051508
dynamic: 'false'
15061509
_size:
15071510
enabled: true
1511+
_source:
1512+
excludes:
1513+
- full_address
15081514
settings:
15091515
index.mapping.ignore_malformed: false
15101516
index.mapping.coerce: false

config/schema/artifacts/runtime_metadata.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1446,6 +1446,7 @@ index_definitions_by_name:
14461446
__counts.shapes|type:
14471447
source: __self
14481448
full_address:
1449+
retrieved_from: doc_values
14491450
source: __self
14501451
geo_location.lat:
14511452
source: __self
@@ -2613,6 +2614,7 @@ index_definitions_by_name:
26132614
weight_in_ng_str:
26142615
source: __self
26152616
workspace_id2:
2617+
retrieved_from: doc_values
26162618
source: __self
26172619
workspace_name:
26182620
source: workspace

config/schema/artifacts_with_apollo/datastore_config.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1463,6 +1463,9 @@ index_templates:
14631463
required: true
14641464
_size:
14651465
enabled: true
1466+
_source:
1467+
excludes:
1468+
- workspace_id2
14661469
settings:
14671470
index.mapping.ignore_malformed: false
14681471
index.mapping.coerce: false
@@ -1505,6 +1508,9 @@ indices:
15051508
dynamic: 'false'
15061509
_size:
15071510
enabled: true
1511+
_source:
1512+
excludes:
1513+
- full_address
15081514
settings:
15091515
index.mapping.ignore_malformed: false
15101516
index.mapping.coerce: false

config/schema/artifacts_with_apollo/runtime_metadata.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1475,6 +1475,7 @@ index_definitions_by_name:
14751475
__counts.shapes|type:
14761476
source: __self
14771477
full_address:
1478+
retrieved_from: doc_values
14781479
source: __self
14791480
geo_location.lat:
14801481
source: __self
@@ -2642,6 +2643,7 @@ index_definitions_by_name:
26422643
weight_in_ng_str:
26432644
source: __self
26442645
workspace_id2:
2646+
retrieved_from: doc_values
26452647
source: __self
26462648
workspace_name:
26472649
source: workspace

config/schema/widgets.rb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,8 @@
7575
t.field "id", "ID!"
7676

7777
# Here we use an alternate name for this field since it's the routing field and want to verify
78-
# that `name_in_index` works correctly on routing fields.
79-
t.field "workspace_id", "ID", name_in_index: "workspace_id2"
78+
# that `name_in_index` works correctly on routing fields, including when fetched from doc values.
79+
t.field "workspace_id", "ID", name_in_index: "workspace_id2", retrieved_from: :doc_values
8080

8181
# It's a bit funny we have both `amount_cents` and `cost` but it's nice to be able to test
8282
# aggregations on both a root numeric field and on a nested one, so we are keeping both here.
@@ -367,7 +367,7 @@
367367
# We use `indexing_only: true` here to verify that `id` can be an indexing-only field.
368368
t.field "id", "ID!", indexing_only: true
369369

370-
t.field "full_address", "String!"
370+
t.field "full_address", "String!", retrieved_from: :doc_values
371371
t.field "timestamps", "AddressTimestamps"
372372
t.field "geo_location", "GeoLocation"
373373

elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query.rb

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ def ignored_values_for_routing
303303
def to_datastore_body
304304
@to_datastore_body ||= aggregations_datastore_body
305305
.merge(document_paginator.to_datastore_body)
306-
.merge({highlight: highlight, query: filter_interpreter.build_query(all_filters), _source: source}.compact)
306+
.merge({docvalue_fields: docvalue_fields, highlight: highlight, query: filter_interpreter.build_query(all_filters), _source: source}.compact)
307307
end
308308

309309
def aggregations_datastore_body
@@ -323,13 +323,33 @@ def aggregations_datastore_body
323323
# we only ask for the fields we need to return.
324324
def source
325325
return true if request_all_fields
326-
requested_source_fields = requested_fields - ["id"]
326+
requested_source_fields = requested_fields_for_source - ["id"]
327327
return false if requested_source_fields.empty?
328328
# Merging in requested_fields as _source:{includes:} based on Elasticsearch documentation:
329329
# https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-source-field.html#include-exclude
330330
{includes: requested_source_fields.to_a}
331331
end
332332

333+
def docvalue_fields
334+
requested_docvalue_fields =
335+
if request_all_fields
336+
# When requesting all fields we send `_source: true`, but fields excluded from stored
337+
# `_source` (because they use `retrieved_from: :doc_values`) still need an alternative
338+
# retrieval path. We therefore request docvalue_fields for any field that ANY index
339+
# definition stores in doc values, unlike the selective path below which requires
340+
# unanimity across all index definitions.
341+
all_docvalue_fields
342+
else
343+
requested_fields.select do |field_path|
344+
requested_via_doc_values?(field_path)
345+
end
346+
end
347+
348+
return nil if requested_docvalue_fields.empty?
349+
350+
requested_docvalue_fields.to_a
351+
end
352+
333353
def highlight
334354
return nil if !request_all_highlights && requested_highlights.empty?
335355

@@ -343,6 +363,35 @@ def highlight
343363
{fields:, highlight_query:}.compact
344364
end
345365

366+
def requested_fields_for_source
367+
@requested_fields_for_source ||= requested_fields.reject do |field_path|
368+
requested_via_doc_values?(field_path)
369+
end
370+
end
371+
372+
def all_docvalue_fields
373+
@all_docvalue_fields ||= search_index_definitions.flat_map do |index_def|
374+
index_def.fields_by_path.filter_map do |field_path, field|
375+
field_path if field.retrieved_from_doc_values?
376+
end
377+
end.to_set
378+
end
379+
380+
# Returns true only when every participating index definition agrees the field should be
381+
# retrieved via doc values. When they disagree we fall back to `_source` so that source-backed
382+
# indices can return the field normally; the doc-values-backed index will also have the value
383+
# available in `_source` in that case (a disagreement like this should not happen in practice,
384+
# since `retrieved_from` is set once per field definition and propagates to all index definitions).
385+
def requested_via_doc_values?(field_path)
386+
return false if field_path == "id"
387+
388+
field_definitions = search_index_definitions.filter_map do |index_def|
389+
index_def.fields_by_path[field_path]
390+
end
391+
392+
field_definitions.any? && field_definitions.all?(&:retrieved_from_doc_values?)
393+
end
394+
346395
# Encapsulates dependencies of `Query`, giving us something we can expose off of `application`
347396
# to build queries when desired.
348397
class Builder < Support::MemoizableData.define(:runtime_metadata, :logger, :filter_interpreter, :filter_node_interpreter, :default_page_size, :max_page_size)

elasticgraph-graphql/lib/elastic_graph/graphql/datastore_response/document.rb

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,23 @@
88

99
require "elastic_graph/graphql/decoded_cursor"
1010
require "elastic_graph/support/memoizable_data"
11+
require "elastic_graph/support/hash_util"
1112
require "forwardable"
1213

1314
module ElasticGraph
1415
class GraphQL
1516
module DatastoreResponse
17+
# @private
18+
# Sentinel value to distinguish "no default given" from an explicit `nil` default in {Document#fetch_value_at}.
19+
UNSET = ::Object.new.freeze
20+
1621
# Represents a document fetched from the datastore. Exposes both the raw metadata
1722
# provided by the datastore and the doc payload itself. In addition, you can treat
1823
# it just like a document hash using `#[]` or `#fetch`.
1924
Document = Support::MemoizableData.define(:raw_data, :payload, :decoded_cursor_factory) do
2025
# @implements Document
2126
extend Forwardable
2227

23-
def_delegators :payload, :[], :fetch
24-
2528
def self.build(raw_data, decoded_cursor_factory: DecodedCursor::Factory::Null)
2629
source = raw_data.fetch("_source") do
2730
{} # : ::Hash[::String, untyped]
@@ -51,6 +54,38 @@ def id
5154
raw_data["_id"]
5255
end
5356

57+
def [](key)
58+
return payload[key] if payload.key?(key)
59+
docvalue_field(key)&.first
60+
end
61+
62+
def fetch(key, default = UNSET)
63+
return payload[key] if payload.key?(key)
64+
if (field_values = docvalue_field(key))
65+
return field_values.first
66+
end
67+
return yield(key) if block_given?
68+
return default unless default.equal?(UNSET)
69+
raise KeyError, "key not found: #{key.inspect}"
70+
end
71+
72+
def fetch_value_at(path, default_value: UNSET)
73+
Support::HashUtil.fetch_value_at_path(payload, path) do
74+
if (field_values = docvalue_field(path.join(".")))
75+
next field_values.first
76+
end
77+
next yield(path) if block_given?
78+
next default_value unless default_value.equal?(UNSET)
79+
raise KeyError, "path not found: #{path.join(".")}"
80+
end
81+
end
82+
83+
def value_at(path)
84+
Support::HashUtil.fetch_value_at_path(payload, path) do
85+
docvalue_field(path.join("."))&.first
86+
end
87+
end
88+
5489
def sort
5590
raw_data["sort"]
5691
end
@@ -77,6 +112,14 @@ def to_s
77112
"#<#{self.class.name} #{datastore_path}>"
78113
end
79114
alias_method :inspect, :to_s
115+
116+
private
117+
118+
# Returns the doc_values field array for the given key, or nil if not present.
119+
# Datastore doc_values are always returned as arrays (e.g. `{"name" => ["Bob"]}`).
120+
def docvalue_field(key)
121+
raw_data.dig("fields", key)
122+
end
80123
end
81124
end
82125
end

elasticgraph-graphql/lib/elastic_graph/graphql/datastore_response/search_response.rb

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ def filter_results(field_path, values, size)
114114
# `id` within `_source`, given it's available as `_id`.
115115
->(hit) { values.include?(hit.fetch("_id")) }
116116
else
117-
->(hit) { values.intersect?(Support::HashUtil.fetch_leaf_values_at_path(hit.fetch("_source"), field_path).to_set) }
117+
->(hit) { values.intersect?(hit_values_at_path(hit, field_path).to_set) }
118118
end
119119

120120
hits = raw_data.fetch("hits").fetch("hits").select(&filter).first(size)
@@ -131,6 +131,28 @@ def docs_description
131131
(documents.size < 3) ? documents.inspect : "[#{documents.first}, ..., #{documents.last}]"
132132
end
133133

134+
# Extracts leaf values from a hit, checking `_source` first and falling back to `fields`
135+
# (populated by `docvalue_fields` in the query). When a field is excluded from `_source`
136+
# (e.g. `retrieved_from: :doc_values`), the datastore still returns it under the `fields`
137+
# key because the query explicitly requested it via `docvalue_fields`.
138+
def hit_values_at_path(hit, field_path)
139+
source = hit["_source"]
140+
if source
141+
Support::HashUtil.fetch_leaf_values_at_path(source, field_path) do
142+
docvalue_fields_for(hit, field_path)
143+
end
144+
else
145+
docvalue_fields_for(hit, field_path)
146+
end
147+
end
148+
149+
def docvalue_fields_for(hit, field_path)
150+
fields = hit["fields"]
151+
joined_path = field_path.join(".")
152+
raise KeyError, "key not found: #{joined_path}" unless fields
153+
fields.fetch(joined_path)
154+
end
155+
134156
def total_document_count(default: nil)
135157
super() || default || raise(Errors::CountUnavailableError, "#{__method__} is unavailable; set `query.total_document_count_needed = true` to make it available")
136158
end

elasticgraph-graphql/lib/elastic_graph/graphql/resolvers/get_record_field_value.rb

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,14 @@ def initialize(elasticgraph_graphql:, config:)
1919
end
2020

2121
def resolve(field:, object:, args:, context:)
22-
data =
22+
value =
2323
case object
2424
when DatastoreResponse::Document
25-
object.payload
25+
object.value_at(field.path_in_index)
2626
else
27-
object
27+
Support::HashUtil.fetch_value_at_path(object, field.path_in_index) { nil }
2828
end
2929

30-
value = Support::HashUtil.fetch_value_at_path(data, field.path_in_index) { nil }
3130
value = [] if value.nil? && field.type.list?
3231

3332
if field.type.relay_connection?

elasticgraph-graphql/sig/elastic_graph/graphql/datastore_response/document.rbs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
module ElasticGraph
22
class GraphQL
33
module DatastoreResponse
4+
UNSET: ::Object
5+
46
class Document
57
extend Forwardable
68

@@ -29,6 +31,12 @@ module ElasticGraph
2931

3032
def []: (::String) -> untyped
3133
def fetch: (::String) -> untyped
34+
| (::String, untyped) -> untyped
35+
| (::String) { (::String) -> untyped } -> untyped
36+
def fetch_value_at: (::Array[::String]) -> untyped
37+
| (::Array[::String], default_value: untyped) -> untyped
38+
| (::Array[::String]) { (::Array[::String]) -> untyped } -> untyped
39+
def value_at: (::Array[::String]) -> untyped
3240
def index_name: () -> ::String
3341
def index_definition_name: () -> ::String
3442
def id: () -> ::String

0 commit comments

Comments
 (0)