diff --git a/config/schema/artifacts/datastore_config.yaml b/config/schema/artifacts/datastore_config.yaml index 8ed565868..c6595f236 100644 --- a/config/schema/artifacts/datastore_config.yaml +++ b/config/schema/artifacts/datastore_config.yaml @@ -1463,6 +1463,9 @@ index_templates: required: true _size: enabled: true + _source: + excludes: + - workspace_id2 settings: index.mapping.ignore_malformed: false index.mapping.coerce: false @@ -1505,6 +1508,9 @@ indices: dynamic: 'false' _size: enabled: true + _source: + excludes: + - full_address settings: index.mapping.ignore_malformed: false index.mapping.coerce: false diff --git a/config/schema/artifacts/runtime_metadata.yaml b/config/schema/artifacts/runtime_metadata.yaml index 3e367ec99..82847d26b 100644 --- a/config/schema/artifacts/runtime_metadata.yaml +++ b/config/schema/artifacts/runtime_metadata.yaml @@ -1446,6 +1446,7 @@ index_definitions_by_name: __counts.shapes|type: source: __self full_address: + retrieved_from: doc_values source: __self geo_location.lat: source: __self @@ -2613,6 +2614,7 @@ index_definitions_by_name: weight_in_ng_str: source: __self workspace_id2: + retrieved_from: doc_values source: __self workspace_name: source: workspace diff --git a/config/schema/artifacts_with_apollo/datastore_config.yaml b/config/schema/artifacts_with_apollo/datastore_config.yaml index 8ed565868..c6595f236 100644 --- a/config/schema/artifacts_with_apollo/datastore_config.yaml +++ b/config/schema/artifacts_with_apollo/datastore_config.yaml @@ -1463,6 +1463,9 @@ index_templates: required: true _size: enabled: true + _source: + excludes: + - workspace_id2 settings: index.mapping.ignore_malformed: false index.mapping.coerce: false @@ -1505,6 +1508,9 @@ indices: dynamic: 'false' _size: enabled: true + _source: + excludes: + - full_address settings: index.mapping.ignore_malformed: false index.mapping.coerce: false diff --git a/config/schema/artifacts_with_apollo/runtime_metadata.yaml b/config/schema/artifacts_with_apollo/runtime_metadata.yaml index fa616bfb3..06ddd12a2 100644 --- a/config/schema/artifacts_with_apollo/runtime_metadata.yaml +++ b/config/schema/artifacts_with_apollo/runtime_metadata.yaml @@ -1475,6 +1475,7 @@ index_definitions_by_name: __counts.shapes|type: source: __self full_address: + retrieved_from: doc_values source: __self geo_location.lat: source: __self @@ -2642,6 +2643,7 @@ index_definitions_by_name: weight_in_ng_str: source: __self workspace_id2: + retrieved_from: doc_values source: __self workspace_name: source: workspace diff --git a/config/schema/widgets.rb b/config/schema/widgets.rb index 7df5223fc..46ccaa6d9 100644 --- a/config/schema/widgets.rb +++ b/config/schema/widgets.rb @@ -75,8 +75,8 @@ t.field "id", "ID!" # Here we use an alternate name for this field since it's the routing field and want to verify - # that `name_in_index` works correctly on routing fields. - t.field "workspace_id", "ID", name_in_index: "workspace_id2" + # that `name_in_index` works correctly on routing fields, including when fetched from doc values. + t.field "workspace_id", "ID", name_in_index: "workspace_id2", retrieved_from: :doc_values # It's a bit funny we have both `amount_cents` and `cost` but it's nice to be able to test # aggregations on both a root numeric field and on a nested one, so we are keeping both here. @@ -367,7 +367,7 @@ # We use `indexing_only: true` here to verify that `id` can be an indexing-only field. t.field "id", "ID!", indexing_only: true - t.field "full_address", "String!" + t.field "full_address", "String!", retrieved_from: :doc_values t.field "timestamps", "AddressTimestamps" t.field "geo_location", "GeoLocation" diff --git a/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query.rb b/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query.rb index d7369a9d4..bfacca97e 100644 --- a/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query.rb +++ b/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_query.rb @@ -303,7 +303,7 @@ def ignored_values_for_routing def to_datastore_body @to_datastore_body ||= aggregations_datastore_body .merge(document_paginator.to_datastore_body) - .merge({highlight: highlight, query: filter_interpreter.build_query(all_filters), _source: source}.compact) + .merge({docvalue_fields: docvalue_fields, highlight: highlight, query: filter_interpreter.build_query(all_filters), _source: source}.compact) end def aggregations_datastore_body @@ -323,13 +323,33 @@ def aggregations_datastore_body # we only ask for the fields we need to return. def source return true if request_all_fields - requested_source_fields = requested_fields - ["id"] + requested_source_fields = requested_fields_for_source - ["id"] return false if requested_source_fields.empty? # Merging in requested_fields as _source:{includes:} based on Elasticsearch documentation: # https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-source-field.html#include-exclude {includes: requested_source_fields.to_a} end + def docvalue_fields + requested_docvalue_fields = + if request_all_fields + # When requesting all fields we send `_source: true`, but fields excluded from stored + # `_source` (because they use `retrieved_from: :doc_values`) still need an alternative + # retrieval path. We therefore request docvalue_fields for any field that ANY index + # definition stores in doc values, unlike the selective path below which requires + # unanimity across all index definitions. + all_docvalue_fields + else + requested_fields.select do |field_path| + requested_via_doc_values?(field_path) + end + end + + return nil if requested_docvalue_fields.empty? + + requested_docvalue_fields.to_a + end + def highlight return nil if !request_all_highlights && requested_highlights.empty? @@ -343,6 +363,35 @@ def highlight {fields:, highlight_query:}.compact end + def requested_fields_for_source + @requested_fields_for_source ||= requested_fields.reject do |field_path| + requested_via_doc_values?(field_path) + end + end + + def all_docvalue_fields + @all_docvalue_fields ||= search_index_definitions.flat_map do |index_def| + index_def.fields_by_path.filter_map do |field_path, field| + field_path if field.retrieved_from_doc_values? + end + end.to_set + end + + # Returns true only when every participating index definition agrees the field should be + # retrieved via doc values. When they disagree we fall back to `_source` so that source-backed + # indices can return the field normally; the doc-values-backed index will also have the value + # available in `_source` in that case (a disagreement like this should not happen in practice, + # since `retrieved_from` is set once per field definition and propagates to all index definitions). + def requested_via_doc_values?(field_path) + return false if field_path == "id" + + field_definitions = search_index_definitions.filter_map do |index_def| + index_def.fields_by_path[field_path] + end + + field_definitions.any? && field_definitions.all?(&:retrieved_from_doc_values?) + end + # Encapsulates dependencies of `Query`, giving us something we can expose off of `application` # to build queries when desired. class Builder < Support::MemoizableData.define(:runtime_metadata, :logger, :filter_interpreter, :filter_node_interpreter, :default_page_size, :max_page_size) diff --git a/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_response/document.rb b/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_response/document.rb index 9524b21d0..24e2c3370 100644 --- a/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_response/document.rb +++ b/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_response/document.rb @@ -8,11 +8,16 @@ require "elastic_graph/graphql/decoded_cursor" require "elastic_graph/support/memoizable_data" +require "elastic_graph/support/hash_util" require "forwardable" module ElasticGraph class GraphQL module DatastoreResponse + # @private + # Sentinel value to distinguish "no default given" from an explicit `nil` default in {Document#fetch_value_at}. + UNSET = ::Object.new.freeze + # Represents a document fetched from the datastore. Exposes both the raw metadata # provided by the datastore and the doc payload itself. In addition, you can treat # it just like a document hash using `#[]` or `#fetch`. @@ -20,8 +25,6 @@ module DatastoreResponse # @implements Document extend Forwardable - def_delegators :payload, :[], :fetch - def self.build(raw_data, decoded_cursor_factory: DecodedCursor::Factory::Null) source = raw_data.fetch("_source") do {} # : ::Hash[::String, untyped] @@ -51,6 +54,38 @@ def id raw_data["_id"] end + def [](key) + return payload[key] if payload.key?(key) + docvalue_field(key)&.first + end + + def fetch(key, default = UNSET) + return payload[key] if payload.key?(key) + if (field_values = docvalue_field(key)) + return field_values.first + end + return yield(key) if block_given? + return default unless default.equal?(UNSET) + raise KeyError, "key not found: #{key.inspect}" + end + + def fetch_value_at(path, default_value: UNSET) + Support::HashUtil.fetch_value_at_path(payload, path) do + if (field_values = docvalue_field(path.join("."))) + next field_values.first + end + next yield(path) if block_given? + next default_value unless default_value.equal?(UNSET) + raise KeyError, "path not found: #{path.join(".")}" + end + end + + def value_at(path) + Support::HashUtil.fetch_value_at_path(payload, path) do + docvalue_field(path.join("."))&.first + end + end + def sort raw_data["sort"] end @@ -77,6 +112,14 @@ def to_s "#<#{self.class.name} #{datastore_path}>" end alias_method :inspect, :to_s + + private + + # Returns the doc_values field array for the given key, or nil if not present. + # Datastore doc_values are always returned as arrays (e.g. `{"name" => ["Bob"]}`). + def docvalue_field(key) + raw_data.dig("fields", key) + end end end end diff --git a/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_response/search_response.rb b/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_response/search_response.rb index 85e838241..c71bffdc7 100644 --- a/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_response/search_response.rb +++ b/elasticgraph-graphql/lib/elastic_graph/graphql/datastore_response/search_response.rb @@ -114,7 +114,7 @@ def filter_results(field_path, values, size) # `id` within `_source`, given it's available as `_id`. ->(hit) { values.include?(hit.fetch("_id")) } else - ->(hit) { values.intersect?(Support::HashUtil.fetch_leaf_values_at_path(hit.fetch("_source"), field_path).to_set) } + ->(hit) { values.intersect?(hit_values_at_path(hit, field_path).to_set) } end hits = raw_data.fetch("hits").fetch("hits").select(&filter).first(size) @@ -131,6 +131,28 @@ def docs_description (documents.size < 3) ? documents.inspect : "[#{documents.first}, ..., #{documents.last}]" end + # Extracts leaf values from a hit, checking `_source` first and falling back to `fields` + # (populated by `docvalue_fields` in the query). When a field is excluded from `_source` + # (e.g. `retrieved_from: :doc_values`), the datastore still returns it under the `fields` + # key because the query explicitly requested it via `docvalue_fields`. + def hit_values_at_path(hit, field_path) + source = hit["_source"] + if source + Support::HashUtil.fetch_leaf_values_at_path(source, field_path) do + docvalue_fields_for(hit, field_path) + end + else + docvalue_fields_for(hit, field_path) + end + end + + def docvalue_fields_for(hit, field_path) + fields = hit["fields"] + joined_path = field_path.join(".") + raise KeyError, "key not found: #{joined_path}" unless fields + fields.fetch(joined_path) + end + def total_document_count(default: nil) super() || default || raise(Errors::CountUnavailableError, "#{__method__} is unavailable; set `query.total_document_count_needed = true` to make it available") end diff --git a/elasticgraph-graphql/lib/elastic_graph/graphql/resolvers/get_record_field_value.rb b/elasticgraph-graphql/lib/elastic_graph/graphql/resolvers/get_record_field_value.rb index 19c7ece8a..60fcff44d 100644 --- a/elasticgraph-graphql/lib/elastic_graph/graphql/resolvers/get_record_field_value.rb +++ b/elasticgraph-graphql/lib/elastic_graph/graphql/resolvers/get_record_field_value.rb @@ -19,15 +19,14 @@ def initialize(elasticgraph_graphql:, config:) end def resolve(field:, object:, args:, context:) - data = + value = case object when DatastoreResponse::Document - object.payload + object.value_at(field.path_in_index) else - object + Support::HashUtil.fetch_value_at_path(object, field.path_in_index) { nil } end - value = Support::HashUtil.fetch_value_at_path(data, field.path_in_index) { nil } value = [] if value.nil? && field.type.list? if field.type.relay_connection? diff --git a/elasticgraph-graphql/sig/elastic_graph/graphql/datastore_response/document.rbs b/elasticgraph-graphql/sig/elastic_graph/graphql/datastore_response/document.rbs index e64a73b0a..5729f52ec 100644 --- a/elasticgraph-graphql/sig/elastic_graph/graphql/datastore_response/document.rbs +++ b/elasticgraph-graphql/sig/elastic_graph/graphql/datastore_response/document.rbs @@ -1,6 +1,8 @@ module ElasticGraph class GraphQL module DatastoreResponse + UNSET: ::Object + class Document extend Forwardable @@ -29,6 +31,12 @@ module ElasticGraph def []: (::String) -> untyped def fetch: (::String) -> untyped + | (::String, untyped) -> untyped + | (::String) { (::String) -> untyped } -> untyped + def fetch_value_at: (::Array[::String]) -> untyped + | (::Array[::String], default_value: untyped) -> untyped + | (::Array[::String]) { (::Array[::String]) -> untyped } -> untyped + def value_at: (::Array[::String]) -> untyped def index_name: () -> ::String def index_definition_name: () -> ::String def id: () -> ::String diff --git a/elasticgraph-graphql/sig/elastic_graph/graphql/schema/relation_join.rbs b/elasticgraph-graphql/sig/elastic_graph/graphql/schema/relation_join.rbs index e5d7939d8..c32371a49 100644 --- a/elasticgraph-graphql/sig/elastic_graph/graphql/schema/relation_join.rbs +++ b/elasticgraph-graphql/sig/elastic_graph/graphql/schema/relation_join.rbs @@ -55,8 +55,8 @@ module ElasticGraph def blank_value: () -> (nil | DatastoreResponse::SearchResponse) def extract_id_or_ids_from: ( - ::Hash[::String, untyped], - ^(document: ::Hash[::String, untyped], problem: ::String) -> void + DatastoreResponse::Document | ::Hash[::String, untyped], + ^(document: DatastoreResponse::Document | ::Hash[::String, untyped], problem: ::String) -> void ) -> (nil | ::String | ::Enumerable[::String]) def normalize_documents: ( diff --git a/elasticgraph-graphql/spec/acceptance/datastore_spec.rb b/elasticgraph-graphql/spec/acceptance/datastore_spec.rb index 50cad90ee..5505c9eb6 100644 --- a/elasticgraph-graphql/spec/acceptance/datastore_spec.rb +++ b/elasticgraph-graphql/spec/acceptance/datastore_spec.rb @@ -122,6 +122,23 @@ module ElasticGraph ]) end + it "returns direct leaf fields configured to be fetched from doc values" do + index_records( + widget = build(:widget, workspace_id: "workspace_1"), + address = build(:address, full_address: "123 Main St") + ) + + widgets = list_widgets(fields: <<~EOS) + id + #{case_correctly("workspace_id")} + EOS + + addresses = list_addresses(fields: case_correctly("full_address")) + + expect(widgets).to eq([string_hash_of(widget, :id, :workspace_id)]) + expect(addresses).to eq([string_hash_of(address, :full_address)]) + end + describe "timeout behavior" do it "raises `Errors::RequestExceededDeadlineError` if the specified timeout is exceeded by a datastore query" do expect { @@ -170,6 +187,20 @@ def list_addresses(fields:, gql: graphql, **query_args) QUERY end + def list_widgets(fields:, gql: graphql, **query_args) + call_graphql_query(<<~QUERY, gql: gql).dig("data", "widgets", "edges").map { |we| we.fetch("node") } + query { + widgets#{graphql_args(query_args)} { + edges { + node { + #{fields} + } + } + } + } + QUERY + end + def query_all_indexed_type_counts call_graphql_query(<<~QUERY).fetch("data") query { diff --git a/elasticgraph-graphql/spec/integration/elastic_graph/graphql/datastore_query/requested_fields_spec.rb b/elasticgraph-graphql/spec/integration/elastic_graph/graphql/datastore_query/requested_fields_spec.rb index 21959ffc2..3ea96e4f0 100644 --- a/elasticgraph-graphql/spec/integration/elastic_graph/graphql/datastore_query/requested_fields_spec.rb +++ b/elasticgraph-graphql/spec/integration/elastic_graph/graphql/datastore_query/requested_fields_spec.rb @@ -50,6 +50,15 @@ class GraphQL expect(results.first.payload.keys).to include("name", "id", "options", "the_opts", "tags") expect(results.first["options"].keys).to include("size", "the_sighs") end + + specify "returns docvalue-backed fields if passed `request_all_fields: true`" do + widget = build(:widget, workspace_id: "ws-1") + index_into(graphql, widget) + + results = search_datastore(request_all_fields: true) + + expect(results.first["workspace_id2"]).to eq "ws-1" + end end end end diff --git a/elasticgraph-graphql/spec/unit/elastic_graph/graphql/datastore_query/requested_fields_spec.rb b/elasticgraph-graphql/spec/unit/elastic_graph/graphql/datastore_query/requested_fields_spec.rb index 01c99b348..bd3c10714 100644 --- a/elasticgraph-graphql/spec/unit/elastic_graph/graphql/datastore_query/requested_fields_spec.rb +++ b/elasticgraph-graphql/spec/unit/elastic_graph/graphql/datastore_query/requested_fields_spec.rb @@ -42,6 +42,88 @@ class GraphQL expect(datastore_body_of(query)[:_source]).to eq(true) end + + it "still requests doc values when requesting all fields" do + graphql = build_graphql(schema_definition: lambda do |schema| + schema.object_type "Widget" do |t| + t.field "id", "ID!" + t.field "name", "String" + t.field "internal_code", "String", retrieved_from: :doc_values + t.index "widgets" + end + end) + query = graphql.datastore_query_builder.new_query( + search_index_definitions: graphql.datastore_core.index_definitions_by_graphql_type.fetch("Widget"), + request_all_fields: true + ) + + expect(datastore_body_of(query)[:_source]).to eq true + expect(datastore_body_of(query)[:docvalue_fields]).to contain_exactly("internal_code") + end + + it "still requests doc values when requesting all fields from mixed index definitions" do + graphql = build_graphql(schema_definition: lambda do |schema| + schema.object_type "Widget" do |t| + t.field "id", "ID!" + t.field "internal_code", "String", retrieved_from: :doc_values + t.index "widgets" + end + end) + doc_values_index_def = graphql.datastore_core.index_definitions_by_name.fetch("widgets") + source_backed_index_def = doc_values_index_def.with( + fields_by_path: doc_values_index_def.fields_by_path.merge( + "internal_code" => doc_values_index_def.fields_by_path.fetch("internal_code").with(retrieved_from: nil) + ) + ) + query = graphql.datastore_query_builder.new_query( + search_index_definitions: [doc_values_index_def, source_backed_index_def], + request_all_fields: true + ) + + expect(datastore_body_of(query)[:_source]).to eq true + expect(datastore_body_of(query)[:docvalue_fields]).to contain_exactly("internal_code") + end + + it "requests doc values for fields marked `retrieved_from: :doc_values`" do + graphql = build_graphql(schema_definition: lambda do |schema| + schema.object_type "Widget" do |t| + t.field "id", "ID!" + t.field "name", "String" + t.field "internal_code", "String", retrieved_from: :doc_values + t.index "widgets" + end + end) + query = graphql.datastore_query_builder.new_query( + search_index_definitions: graphql.datastore_core.index_definitions_by_graphql_type.fetch("Widget"), + requested_fields: ["internal_code", "name"] + ) + + expect(datastore_body_of(query)[:docvalue_fields]).to contain_exactly("internal_code") + expect(datastore_body_of(query)[:_source][:includes]).to contain_exactly("name") + end + + it "keeps requesting `_source` when index definitions disagree on how to retrieve a field" do + graphql = build_graphql(schema_definition: lambda do |schema| + schema.object_type "Widget" do |t| + t.field "id", "ID!" + t.field "internal_code", "String", retrieved_from: :doc_values + t.index "widgets" + end + end) + doc_values_index_def = graphql.datastore_core.index_definitions_by_name.fetch("widgets") + source_backed_index_def = doc_values_index_def.with( + fields_by_path: doc_values_index_def.fields_by_path.merge( + "internal_code" => doc_values_index_def.fields_by_path.fetch("internal_code").with(retrieved_from: nil) + ) + ) + query = graphql.datastore_query_builder.new_query( + search_index_definitions: [doc_values_index_def, source_backed_index_def], + requested_fields: ["internal_code"] + ) + + expect(datastore_body_of(query)[:docvalue_fields]).to eq nil + expect(datastore_body_of(query)[:_source][:includes]).to contain_exactly("internal_code") + end end end end diff --git a/elasticgraph-graphql/spec/unit/elastic_graph/graphql/datastore_response/document_spec.rb b/elasticgraph-graphql/spec/unit/elastic_graph/graphql/datastore_response/document_spec.rb index ea8400577..16889ed26 100644 --- a/elasticgraph-graphql/spec/unit/elastic_graph/graphql/datastore_response/document_spec.rb +++ b/elasticgraph-graphql/spec/unit/elastic_graph/graphql/datastore_response/document_spec.rb @@ -93,6 +93,14 @@ module DatastoreResponse expect(document.payload).to eq raw_data.fetch("_source") end + it "returns direct field values from docvalue fields when `_source` omits them" do + doc = build_doc(raw_data.except("_source").merge("fields" => {"name" => ["HuaweiP Smart"]})) + + expect(doc.value_at(["name"])).to eq "HuaweiP Smart" + expect(doc["name"]).to eq "HuaweiP Smart" + expect(doc.fetch("name")).to eq "HuaweiP Smart" + end + it "exposes its datastore path" do expect(document.datastore_path).to eq "/widgets/_doc/qwbfffaijhkljtfmcuwv" end @@ -134,6 +142,52 @@ module DatastoreResponse expect { document.fetch("foo") }.to raise_error(KeyError) end + it "returns the provided default when `#fetch` is called on a missing field" do + expect(document.fetch("foo", "default")).to eq "default" + end + + it "prefers the block over the default when `#fetch` is called with both" do + expect(document.fetch("amount_cents", "default") { "block" }).to eq 300 + expect(document.fetch("foo", "default") { "block" }).to eq "block" + end + + it "raises `ArgumentError` when `#fetch` is called with too many default arguments" do + expect { document.fetch("foo", "default", "extra") }.to raise_error(ArgumentError) + end + + it "allows field paths to be accessed using `#fetch_value_at`" do + expect(document.fetch_value_at(%w[options size])).to eq "MEDIUM" + expect { document.fetch_value_at(%w[options material]) }.to raise_error(KeyError) + end + + it "returns the provided default when `#fetch_value_at` is called on a missing field path" do + expect(document.fetch_value_at(%w[options material], default_value: "default")).to eq "default" + end + + it "returns field path values from docvalue fields when `_source` omits them" do + doc = build_doc(raw_data.except("_source").merge("fields" => {"name" => ["HuaweiP Smart"]})) + + expect(doc.fetch_value_at(["name"])).to eq "HuaweiP Smart" + end + + it "prefers the block over the default when `#fetch_value_at` is called with both" do + expect(document.fetch_value_at(%w[options material], default_value: "default") { "block" }).to eq "block" + end + + it "treats a `nil` field value as present when using `#fetch`" do + document = Document.with_payload({"foo" => nil}) + + expect(document.fetch("foo", "default")).to eq nil + expect(document.fetch("foo") { "default" }).to eq nil + end + + it "treats a `nil` field value as present when using `#fetch_value_at`" do + document = Document.with_payload({"foo" => nil}) + + expect(document.fetch_value_at(["foo"], default_value: "default")).to eq nil + expect(document.fetch_value_at(["foo"]) { "default" }).to eq nil + end + it "supports easy construction without the full raw data payload (such as for tests)" do doc = Document.with_payload({"foo" => 12}) expect(doc.to_s).to eq "#" diff --git a/elasticgraph-graphql/spec/unit/elastic_graph/graphql/datastore_response/search_response_spec.rb b/elasticgraph-graphql/spec/unit/elastic_graph/graphql/datastore_response/search_response_spec.rb index 3a8cd3023..204a8834c 100644 --- a/elasticgraph-graphql/spec/unit/elastic_graph/graphql/datastore_response/search_response_spec.rb +++ b/elasticgraph-graphql/spec/unit/elastic_graph/graphql/datastore_response/search_response_spec.rb @@ -369,6 +369,18 @@ def response_of(*hits) expect(filtered.documents.map(&:payload)).to eq [bob.fetch("_source"), eileen.fetch("_source")] end + it "can filter on values returned in datastore `fields` when `_source` omits them" do + response = response_of( + bob = {"_id" => "B", "fields" => {"name" => ["Bob"]}}, + {"_id" => "J", "fields" => {"name" => ["Judy"]}}, + eileen = {"_id" => "E", "fields" => {"name" => ["Eileen"]}} + ) + + filtered = response.filter_results(["name"], ["Bob", "Eileen"].to_set, 10) + + expect(filtered.documents.map(&:id)).to eq [bob.fetch("_id"), eileen.fetch("_id")] + end + it "preserves the `decoded_cursor_factory` that was on the original documents" do response = response_of( {"_id" => "B", "_source" => {"id" => "B", "name" => "Bob", "age" => 17}}, diff --git a/elasticgraph-graphql/spec/unit/elastic_graph/graphql/resolvers/get_record_field_value_spec.rb b/elasticgraph-graphql/spec/unit/elastic_graph/graphql/resolvers/get_record_field_value_spec.rb index 89479c1f8..eef01a666 100644 --- a/elasticgraph-graphql/spec/unit/elastic_graph/graphql/resolvers/get_record_field_value_spec.rb +++ b/elasticgraph-graphql/spec/unit/elastic_graph/graphql/resolvers/get_record_field_value_spec.rb @@ -58,6 +58,13 @@ module Resolvers expect(value).to eq "Napoleon" end + it "works with a `DatastoreResponse::Document` that has the field in `fields` instead of `_source`" do + doc = DatastoreResponse::Document.build({"_id" => "1", "fields" => {"name" => ["Napoleon"]}}) + value = resolve("Person", "name", doc) + + expect(value).to eq "Napoleon" + end + it "fetches a requested list field from the document" do value = resolve("Person", "nicknames", {"id" => 1, "nicknames" => %w[Napo Leon]}) @@ -109,6 +116,13 @@ module Resolvers expect(value).to eq "Napoleon" end + it "resolves a `name_in_index` field from docvalue fields" do + doc = DatastoreResponse::Document.build({"_id" => "1", "fields" => {"name" => ["Napoleon"]}}) + value = resolve("Person", "alt_name1", doc) + + expect(value).to eq "Napoleon" + end + it "returns `nil` for a scalar when the directive field name is missing" do value = resolve("Person", "alt_name1", {"id" => 1}) expect(value).to eq nil diff --git a/elasticgraph-graphql/spec/unit/elastic_graph/graphql/schema/field_spec.rb b/elasticgraph-graphql/spec/unit/elastic_graph/graphql/schema/field_spec.rb index e26f8d7c7..3750d856a 100644 --- a/elasticgraph-graphql/spec/unit/elastic_graph/graphql/schema/field_spec.rb +++ b/elasticgraph-graphql/spec/unit/elastic_graph/graphql/schema/field_spec.rb @@ -102,6 +102,39 @@ class Schema expect(field.relation_join).to be(nil).and be(field.relation_join) expect(RelationJoin).to have_received(:from).once end + + it "logs and returns the blank value when a datastore document is missing the relation id field" do + field = define_schema do |s| + s.object_type "Color" do |t| + t.field "id", "ID!" + t.index "colors" + end + + s.object_type "Photo" do |t| + t.field "id", "ID!" + t.field "photo_id", "ID" + t.relates_to_one "color", "Color", via: "photo_id", dir: :out + t.index "photos" + end + end.field_named("Photo", "color") + + warnings = [] + + result = field.relation_join.extract_id_or_ids_from( + DatastoreResponse::Document.build({"_id" => "p1", "_source" => {}}), + lambda do |document:, problem:| + warnings << {document: document, problem: problem} + end + ) + + expect(result).to eq nil + expect(warnings).to contain_exactly( + a_hash_including( + document: an_object_having_attributes(id: "p1"), + problem: "photo_id is missing from the document" + ) + ) + end end describe "#sort_clauses_for" do diff --git a/elasticgraph-indexer/spec/acceptance/multi_source_indexing_spec.rb b/elasticgraph-indexer/spec/acceptance/multi_source_indexing_spec.rb index 5d08b3819..3f9857542 100644 --- a/elasticgraph-indexer/spec/acceptance/multi_source_indexing_spec.rb +++ b/elasticgraph-indexer/spec/acceptance/multi_source_indexing_spec.rb @@ -165,7 +165,7 @@ module ElasticGraph expect(indexed_widget_source).to include({ "id" => "w1", "created_at" => timestamp_in_2023, - "workspace_id2" => "wid_23", + # Note: `workspace_id2` is not in `_source` because its schema field uses `retrieved_from: :doc_values`. "workspace_name" => "Garage" # the sourced_from field copied from the `WidgetWorkspace` }) end diff --git a/elasticgraph-schema_artifacts/lib/elastic_graph/schema_artifacts/runtime_metadata/index_field.rb b/elasticgraph-schema_artifacts/lib/elastic_graph/schema_artifacts/runtime_metadata/index_field.rb index 9c5d4fe18..66bacb72e 100644 --- a/elasticgraph-schema_artifacts/lib/elastic_graph/schema_artifacts/runtime_metadata/index_field.rb +++ b/elasticgraph-schema_artifacts/lib/elastic_graph/schema_artifacts/runtime_metadata/index_field.rb @@ -14,20 +14,27 @@ module RuntimeMetadata # Runtime metadata related to a field on a datastore index definition. # # @private - class IndexField < ::Data.define(:source) + class IndexField < ::Data.define(:source, :retrieved_from) SOURCE = "source" + RETRIEVED_FROM = "retrieved_from" def self.from_hash(hash) new( - source: hash[SOURCE] || SELF_RELATIONSHIP_NAME + source: hash[SOURCE] || SELF_RELATIONSHIP_NAME, + retrieved_from: hash[RETRIEVED_FROM] ) end def to_dumpable_hash - { - # Keys here are ordered alphabetically; please keep them that way. - SOURCE => source - } + # Keys here are ordered alphabetically; please keep them that way. + hash = {} + hash[RETRIEVED_FROM] = retrieved_from if retrieved_from + hash[SOURCE] = source + hash + end + + def retrieved_from_doc_values? + retrieved_from == "doc_values" end end end diff --git a/elasticgraph-schema_artifacts/sig/elastic_graph/schema_artifacts/runtime_metadata/index_field.rbs b/elasticgraph-schema_artifacts/sig/elastic_graph/schema_artifacts/runtime_metadata/index_field.rbs index dc248c955..7d1c7eb2e 100644 --- a/elasticgraph-schema_artifacts/sig/elastic_graph/schema_artifacts/runtime_metadata/index_field.rbs +++ b/elasticgraph-schema_artifacts/sig/elastic_graph/schema_artifacts/runtime_metadata/index_field.rbs @@ -3,14 +3,17 @@ module ElasticGraph module RuntimeMetadata class IndexFieldSupertype attr_reader source: ::String + attr_reader retrieved_from: ::String? - def initialize: (source: ::String) -> void - def with: (?source: ::String) -> IndexField + def initialize: (source: ::String, ?retrieved_from: ::String?) -> void + def with: (?source: ::String, ?retrieved_from: ::String?) -> IndexField end class IndexField < IndexFieldSupertype + RETRIEVED_FROM: "retrieved_from" SOURCE: "source" def self.from_hash: (::Hash[::String, untyped]) -> IndexField + def retrieved_from_doc_values?: () -> bool def to_dumpable_hash: () -> ::Hash[::String, untyped] end end diff --git a/elasticgraph-schema_artifacts/spec/unit/elastic_graph/schema_artifacts/runtime_metadata/index_field_spec.rb b/elasticgraph-schema_artifacts/spec/unit/elastic_graph/schema_artifacts/runtime_metadata/index_field_spec.rb index b08f97210..dd5d51d04 100644 --- a/elasticgraph-schema_artifacts/spec/unit/elastic_graph/schema_artifacts/runtime_metadata/index_field_spec.rb +++ b/elasticgraph-schema_artifacts/spec/unit/elastic_graph/schema_artifacts/runtime_metadata/index_field_spec.rb @@ -18,7 +18,19 @@ module RuntimeMetadata it "builds from a minimal hash" do field = IndexField.from_hash({}) - expect(field).to eq IndexField.new(source: SELF_RELATIONSHIP_NAME) + expect(field).to eq IndexField.new(source: SELF_RELATIONSHIP_NAME, retrieved_from: nil) + expect(field.to_dumpable_hash).to eq("source" => SELF_RELATIONSHIP_NAME) + end + + it "builds from a hash with `retrieved_from` metadata" do + field = IndexField.from_hash("retrieved_from" => "doc_values") + + expect(field).to eq IndexField.new(source: SELF_RELATIONSHIP_NAME, retrieved_from: "doc_values") + expect(field.retrieved_from_doc_values?).to eq true + expect(field.to_dumpable_hash).to eq( + "retrieved_from" => "doc_values", + "source" => SELF_RELATIONSHIP_NAME + ) end end end diff --git a/elasticgraph-schema_artifacts/spec/unit/elastic_graph/schema_artifacts/runtime_metadata/schema_spec.rb b/elasticgraph-schema_artifacts/spec/unit/elastic_graph/schema_artifacts/runtime_metadata/schema_spec.rb index 1b8041cbe..97767849a 100644 --- a/elasticgraph-schema_artifacts/spec/unit/elastic_graph/schema_artifacts/runtime_metadata/schema_spec.rb +++ b/elasticgraph-schema_artifacts/spec/unit/elastic_graph/schema_artifacts/runtime_metadata/schema_spec.rb @@ -120,7 +120,7 @@ module RuntimeMetadata ], current_sources: [SELF_RELATIONSHIP_NAME], fields_by_path: { - "foo.bar" => IndexField.new(source: "other") + "foo.bar" => IndexField.new(source: "other", retrieved_from: nil) }, has_had_multiple_sources: false ), diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/index.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/index.rb index 1e939d0a4..e1f3de9f6 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/index.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/index.rb @@ -327,10 +327,10 @@ def mappings hash["_routing"] = {"required" => true} if uses_custom_routing? hash["_size"] = {"enabled" => true} if schema_def_state.index_document_sizes? - # Exclude non-returnable fields from `_source` to save storage. These fields are still + # Exclude source-excluded fields from `_source` to save storage. These fields are still # indexed (in the inverted index and/or doc_values) for filtering, sorting, and aggregation, # but their values are not stored in the compressed `_source` blob. - source_excludes = indexed_type.source_excludes_paths + source_excludes = indexed_type.source_excluded_field_paths hash["_source"] = {"excludes" => source_excludes} if source_excludes.any? end end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/mixins/has_indices.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/mixins/has_indices.rb index 64c51ecab..fdeae33e9 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/mixins/has_indices.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/mixins/has_indices.rb @@ -292,22 +292,22 @@ def fields_with_sources indexing_fields_by_name_in_index.values.reject { |f| f.source.nil? } end - # Returns the list of `_source.excludes` paths for non-returnable fields. + # Returns the list of `_source.excludes` paths for fields that should be excluded from stored `_source`. # # Uses `indexing_fields_by_name_in_index` for traversal (same as # `index_field_runtime_metadata_tuples`) to avoid infinite recursion # through interface/union subtype cycles. # # @private - def source_excludes_paths(path_prefix: "") + def source_excluded_field_paths(path_prefix: "") indexing_fields_by_name_in_index.flat_map do |name, field| path = path_prefix + name object_type = field.type.fully_unwrapped.as_object_type - if !field.returnable? + if field.source_excluded? [object_type ? "#{path}.*" : path] elsif object_type - object_type.source_excludes_paths(path_prefix: "#{path}.") + object_type.source_excluded_field_paths(path_prefix: "#{path}.") else [] end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/mixins/has_subtypes.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/mixins/has_subtypes.rb index b5db57997..ec5aa1462 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/mixins/has_subtypes.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/mixins/has_subtypes.rb @@ -59,6 +59,10 @@ def current_sources resolve_subtypes.flat_map(&:current_sources) end + def source_excluded_field_paths(path_prefix: "") + resolve_subtypes.flat_map { |t| t.source_excluded_field_paths(path_prefix: path_prefix) }.uniq + end + def index_field_runtime_metadata_tuples( path_prefix: "", parent_source: SELF_RELATIONSHIP_NAME, diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/field.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/field.rb index 54b57e567..8e468ccb1 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/field.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/field.rb @@ -93,7 +93,7 @@ class Field < Struct.new( :name, :original_type, :parent_type, :original_type_for_derived_types, :schema_def_state, :accuracy_confidence, :filter_customizations, :grouped_by_customizations, :highlights_customizations, :sub_aggregations_customizations, :aggregated_values_customizations, :sort_order_enum_value_customizations, :args, - :sortable, :filterable, :aggregatable, :groupable, :highlightable, :returnable, + :sortable, :filterable, :aggregatable, :groupable, :highlightable, :returnable, :retrieved_from, :graphql_only, :source, :runtime_field_script, :relationship, :singular_name, :computation_detail, :non_nullable_in_json_schema, :as_input, :name_in_index, :resolver @@ -108,7 +108,7 @@ def initialize( name:, type:, parent_type:, schema_def_state:, accuracy_confidence: :high, name_in_index: name, type_for_derived_types: nil, graphql_only: nil, singular: nil, - sortable: nil, filterable: nil, aggregatable: nil, groupable: nil, highlightable: nil, returnable: nil, + sortable: nil, filterable: nil, aggregatable: nil, groupable: nil, highlightable: nil, returnable: nil, retrieved_from: nil, as_input: false, resolver: nil ) type_ref = schema_def_state.type_ref(type) @@ -132,6 +132,7 @@ def initialize( groupable: groupable, highlightable: highlightable, returnable: returnable, + retrieved_from: retrieved_from&.to_s, graphql_only: graphql_only, source: nil, runtime_field_script: nil, @@ -161,6 +162,12 @@ def initialize( end end + if retrieved_from + schema_def_state.after_user_definition_complete do + validate_retrieved_from! + end + end + schema_def_state.register_user_defined_field(self) yield self if block_given? end @@ -756,6 +763,20 @@ def returnable? returnable end + # Indicates if this field should be retrieved from datastore doc values rather than `_source`. + # + # @return [Boolean] true if this field should be returned via `docvalue_fields` + def retrieved_from_doc_values? + retrieved_from == "doc_values" + end + + # Indicates if the field should be omitted from stored `_source`. + # + # @return [Boolean] + def source_excluded? + !returnable? || retrieved_from_doc_values? + end + # Defines an argument on the field. # # @note ElasticGraph takes care of defining arguments for all the query features it supports, so there is generally no need to use @@ -908,7 +929,8 @@ def to_filter_field(parent_type:, for_single_value: !type_for_derived_types.list resolver: nil, # Filter fields should always appear in their parent input type's SDL regardless # of the source field's returnability. - returnable: true + returnable: true, + retrieved_from: nil ) schema_def_state.factory.new_field(**params).tap do |f| @@ -1259,6 +1281,39 @@ def filter_field_category(for_single_value) EOS end end + + def validate_retrieved_from! + unless retrieved_from == "doc_values" + raise Errors::SchemaError, + "#{self} has an invalid `retrieved_from`: #{retrieved_from.inspect}. " \ + "The only supported value is `:doc_values`." + end + + unless parent_type.root_document_type? + raise Errors::SchemaError, + "#{self} uses `retrieved_from: :doc_values`, but that is only supported on direct fields of indexed root document types." + end + + unless type_for_derived_types.fully_unwrapped.leaf? + raise Errors::SchemaError, + "#{self} uses `retrieved_from: :doc_values`, but that is only supported on GraphQL leaf fields." + end + + if type_for_derived_types.list? + raise Errors::SchemaError, + "#{self} uses `retrieved_from: :doc_values`, but that is only supported on non-list GraphQL leaf fields because datastore doc values do not preserve list semantics." + end + + if name_in_index.include?(".") + raise Errors::SchemaError, + "#{self} uses `retrieved_from: :doc_values`, but that is only supported on direct document fields without a dotted `name_in_index`." + end + + if text? + raise Errors::SchemaError, + "#{self} uses `retrieved_from: :doc_values`, but text fields do not support datastore doc values." + end + end end end end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/type_with_subfields.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/type_with_subfields.rb index c77e61917..845d1ae33 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/type_with_subfields.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/type_with_subfields.rb @@ -140,6 +140,9 @@ def name # @option options [Boolean] returnable when set to `false`, the field will not appear in the GraphQL output type and its data # will be excluded from `_source` in the datastore for storage savings. The field will still be available for filtering, # sorting, grouping, and aggregation. Defaults to `true`. + # @option options [Symbol] retrieved_from when set to `:doc_values`, the field remains returnable in GraphQL but is excluded + # from stored `_source` and instead fetched via datastore `docvalue_fields`. This is only supported on direct, non-list, + # non-text GraphQL leaf fields of indexed root document types; nested field paths are not supported. # @yield [Field] the field for further customization # @return [void] # @@ -505,7 +508,7 @@ def index_field_runtime_metadata_tuples( indexing_fields_by_name_in_index.flat_map do |name, field| path = path_prefix + name source = field.source&.relationship_name || parent_source - index_field = SchemaArtifacts::RuntimeMetadata::IndexField.new(source: source) + index_field = SchemaArtifacts::RuntimeMetadata::IndexField.new(source: source, retrieved_from: field.retrieved_from) list_count_field_tuples = field.paths_to_lists_for_count_indexing.map do |subpath| [list_counts_state.path_to_count_subfield(subpath), index_field] # : [::String, SchemaArtifacts::RuntimeMetadata::IndexField] diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/mixins/has_subtypes.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/mixins/has_subtypes.rbs index e8d1b0457..d69c63632 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/mixins/has_subtypes.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/mixins/has_subtypes.rbs @@ -13,6 +13,7 @@ module ElasticGraph def indexing_fields_by_name_in_index: () -> ::Hash[::String, SchemaElements::Field] def abstract?: () -> true def current_sources: () -> ::Array[::String] + def source_excluded_field_paths: (?path_prefix: ::String) -> ::Array[::String] def index_field_runtime_metadata_tuples: ( ?path_prefix: ::String, ?parent_source: ::String, diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/field.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/field.rbs index be91e03a3..43a1aac0c 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/field.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/field.rbs @@ -24,6 +24,8 @@ module ElasticGraph attr_reader sort_order_enum_value_customizations: ::Array[^(SortOrderEnumValue) -> void] attr_reader non_nullable_in_json_schema: bool attr_reader source: FieldSource? + attr_reader returnable: bool? + attr_reader retrieved_from: ::String? attr_accessor relationship: Relationship? attr_reader resolver: SchemaArtifacts::RuntimeMetadata::ConfiguredGraphQLResolver? attr_reader singular_name: ::String? @@ -41,6 +43,8 @@ module ElasticGraph ?aggregatable: bool?, ?groupable: bool?, ?highlightable: bool?, + ?returnable: bool?, + ?retrieved_from: ::Symbol?, ?graphql_only: bool?, ?singular: ::String?, ?as_input: bool @@ -62,6 +66,9 @@ module ElasticGraph def filterable?: () -> bool def groupable?: () -> bool def highlightable?: () -> bool + def returnable?: () -> bool + def retrieved_from_doc_values?: () -> bool + def source_excluded?: () -> bool def list_field_groupable_by_single_values?: () -> bool def aggregatable?: () -> bool def sub_aggregatable?: () -> bool diff --git a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/type_with_subfields.rbs b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/type_with_subfields.rbs index 557e23bef..e82eeaa7b 100644 --- a/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/type_with_subfields.rbs +++ b/elasticgraph-schema_definition/sig/elastic_graph/schema_definition/schema_elements/type_with_subfields.rbs @@ -81,6 +81,7 @@ module ElasticGraph def generate_sdl: (name_section: ::String) ?{ (Field::argument) -> boolish } -> String def current_sources: () -> ::Array[::String] + def source_excluded_field_paths: (?path_prefix: ::String) -> ::Array[::String] def index_field_runtime_metadata_tuples: ( ?path_prefix: ::String, ?parent_source: ::String, diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/datastore_config/index_mappings/abstract_types_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/datastore_config/index_mappings/abstract_types_spec.rb index a85968631..0001466f6 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/datastore_config/index_mappings/abstract_types_spec.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/datastore_config/index_mappings/abstract_types_spec.rb @@ -84,6 +84,29 @@ module SchemaDefinition }) end + it "includes `_source.excludes` for subtype fields fetched from doc values" do + mapping = index_mapping_for "things" do |s| + s.object_type "Widget" do |t| + t.field "id", "ID!" + t.field "internal_code", "String", retrieved_from: :doc_values + link_subtype_to_supertype(t, "Thing") + end + + s.object_type "Component" do |t| + t.field "id", "ID!" + link_subtype_to_supertype(t, "Thing") + end + + s.public_send type_def_method, "Thing" do |t| + link_supertype_to_subtypes(t, "Widget", "Component") + t.index "things" + end + end + + expect(mapping).to include("_source" => {"excludes" => ["internal_code"]}) + expect(mapping.dig("properties", "internal_code")).to eq({"type" => "keyword"}) + end + it "handles the subtypes having no fields" do mapping = index_mapping_for "things" do |s| s.object_type "Widget" do |t| diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/datastore_config/index_mappings/miscellaneous_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/datastore_config/index_mappings/miscellaneous_spec.rb index c5e094db4..0aae794ff 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/datastore_config/index_mappings/miscellaneous_spec.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/datastore_config/index_mappings/miscellaneous_spec.rb @@ -212,6 +212,19 @@ module SchemaDefinition expect(mapping).not_to have_key("_source") end + it "includes `_source.excludes` for fields fetched from doc values" do + mapping = index_mapping_for "my_type" do |s| + s.object_type "MyType" do |t| + t.field "id", "ID" + t.field "internal_code", "String", retrieved_from: :doc_values + t.index "my_type" + end + end + + expect(mapping).to include("_source" => {"excludes" => ["internal_code"]}) + expect(mapping.dig("properties", "internal_code")).to eq({"type" => "keyword"}) + end + it "keeps `source_from` fields in the mapping so that indexed documents support the field even though it comes from an alternate source" do mapping = index_mapping_for "components" do |s| s.object_type "Widget" do |t| diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/object_type_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/object_type_spec.rb index 83ca0f54e..2d9f60204 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/object_type_spec.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/object_type_spec.rb @@ -676,6 +676,127 @@ module SchemaDefinition expect(highlights_type_from(result, "Widget")).to include("internal_code: [String!]!") end + it "keeps `retrieved_from: :doc_values` fields in the output type" do + result = define_schema do |api| + api.object_type "Widget" do |t| + t.field "id", "ID" + t.field "internal_code", "String", retrieved_from: :doc_values + t.index "widgets" + end + end + + expect(type_def_from(result, "Widget")).to include("internal_code: String") + end + + it "rejects unsupported `retrieved_from` values" do + expect { + define_schema do |api| + api.object_type "Widget" do |t| + t.field "id", "ID" + t.field "internal_code", "String", retrieved_from: :stored_fields + t.index "widgets" + end + end + }.to raise_error Errors::SchemaError, a_string_including("invalid `retrieved_from`", "The only supported value is `:doc_values`") + end + + it "rejects `retrieved_from: :doc_values` on fields of embedded object types" do + expect { + define_schema do |api| + api.object_type "WidgetOptions" do |t| + t.field "size", "String", retrieved_from: :doc_values + end + + api.object_type "Widget" do |t| + t.field "id", "ID" + t.field "options", "WidgetOptions" + t.index "widgets" + end + end + }.to raise_error Errors::SchemaError, a_string_including("retrieved_from: :doc_values", "direct fields of indexed root document types") + end + + it "rejects `retrieved_from: :doc_values` on non-leaf GraphQL fields" do + expect { + define_schema do |api| + api.object_type "WidgetOptions" do |t| + t.field "size", "String" + end + + api.object_type "Widget" do |t| + t.field "id", "ID" + t.field "options", "WidgetOptions", retrieved_from: :doc_values + t.index "widgets" + end + end + }.to raise_error Errors::SchemaError, a_string_including("retrieved_from: :doc_values", "GraphQL leaf fields") + end + + it "rejects `retrieved_from: :doc_values` on fields with a dotted `name_in_index`" do + expect { + define_schema do |api| + api.object_type "WidgetOptions" do |t| + t.field "size", "String" + end + + api.object_type "Widget" do |t| + t.field "id", "ID" + t.field "options", "WidgetOptions" + t.field "size", "String", graphql_only: true, name_in_index: "options.size", retrieved_from: :doc_values + t.index "widgets" + end + end + }.to raise_error Errors::SchemaError, a_string_including("retrieved_from: :doc_values", "without a dotted `name_in_index`") + end + + it "rejects `retrieved_from: :doc_values` on fields under a nested path" do + expect { + define_schema do |api| + api.object_type "WorkspaceMembership" do |t| + t.field "workspace_id", "ID" + end + + api.object_type "Widget" do |t| + t.field "id", "ID" + t.field "memberships", "[WorkspaceMembership!]!" do |f| + f.mapping type: "nested" + end + t.field "workspace_id", "ID", + graphql_only: true, + name_in_index: "memberships.workspace_id", + retrieved_from: :doc_values + t.index "widgets" + end + end + }.to raise_error Errors::SchemaError, a_string_including("retrieved_from: :doc_values", "without a dotted `name_in_index`") + end + + it "rejects `retrieved_from: :doc_values` on text fields" do + expect { + define_schema do |api| + api.object_type "Widget" do |t| + t.field "id", "ID" + t.field "description", "String", retrieved_from: :doc_values do |f| + f.mapping type: "text" + end + t.index "widgets" + end + end + }.to raise_error Errors::SchemaError, a_string_including("retrieved_from: :doc_values", "text fields do not support datastore doc values") + end + + it "rejects `retrieved_from: :doc_values` on list fields" do + expect { + define_schema do |api| + api.object_type "Widget" do |t| + t.field "id", "ID" + t.field "component_ids", "[ID!]!", retrieved_from: :doc_values + t.index "widgets" + end + end + }.to raise_error Errors::SchemaError, a_string_including("retrieved_from: :doc_values", "non-list GraphQL leaf fields") + end + def object_type(name, *args, pre_def: nil, include_docs: true, &block) result = define_schema do |api| pre_def&.call(api) diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/runtime_metadata/index_definitions_by_name_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/runtime_metadata/index_definitions_by_name_spec.rb index 1835088ba..c440d370b 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/runtime_metadata/index_definitions_by_name_spec.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/runtime_metadata/index_definitions_by_name_spec.rb @@ -135,6 +135,17 @@ module SchemaDefinition }.not_to raise_error end + it "dumps field retrieval metadata in `fields_by_path`" do + widgets = index_definition_metadata_for("widgets", on_my_type: ->(t) { t.field "internal_code", "String", retrieved_from: :doc_values }) + + expect(widgets.fields_by_path.fetch("internal_code")).to eq( + SchemaArtifacts::RuntimeMetadata::IndexField.new( + source: SELF_RELATIONSHIP_NAME, + retrieved_from: "doc_values" + ) + ) + end + it "raises a clear error when a nested sort field references a type that does not exist", :dont_validate_graphql_schema do expect { index_definition_metadata_for("widgets", on_my_type: ->(t) { t.field "options", "Opts" }) do |i| diff --git a/spec_support/lib/elastic_graph/spec_support/runtime_metadata_support.rb b/spec_support/lib/elastic_graph/spec_support/runtime_metadata_support.rb index 29316874b..6969ea4b6 100644 --- a/spec_support/lib/elastic_graph/spec_support/runtime_metadata_support.rb +++ b/spec_support/lib/elastic_graph/spec_support/runtime_metadata_support.rb @@ -123,8 +123,8 @@ def index_definition_with(route_with: nil, rollover: nil, default_sort_fields: [ ) end - def index_field_with(source: SELF_RELATIONSHIP_NAME) - IndexField.new(source: source) + def index_field_with(source: SELF_RELATIONSHIP_NAME, retrieved_from: nil) + IndexField.new(source: source, retrieved_from: retrieved_from) end def enum_type_with(values_by_name: {})