diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/index.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/index.rb index d12d37f5e..1e939d0a4 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/index.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/index.rb @@ -326,6 +326,12 @@ def mappings # made against the wrong shard. hash["_routing"] = {"required" => true} if uses_custom_routing? hash["_size"] = {"enabled" => true} if schema_def_state.index_document_sizes? + + # Exclude non-returnable fields from `_source` to save storage. These fields are still + # indexed (in the inverted index and/or doc_values) for filtering, sorting, and aggregation, + # but their values are not stored in the compressed `_source` blob. + source_excludes = indexed_type.source_excludes_paths + hash["_source"] = {"excludes" => source_excludes} if source_excludes.any? end end diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/mixins/has_indices.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/mixins/has_indices.rb index e47d59257..64c51ecab 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/mixins/has_indices.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/mixins/has_indices.rb @@ -292,6 +292,28 @@ def fields_with_sources indexing_fields_by_name_in_index.values.reject { |f| f.source.nil? } end + # Returns the list of `_source.excludes` paths for non-returnable fields. + # + # Uses `indexing_fields_by_name_in_index` for traversal (same as + # `index_field_runtime_metadata_tuples`) to avoid infinite recursion + # through interface/union subtype cycles. + # + # @private + def source_excludes_paths(path_prefix: "") + indexing_fields_by_name_in_index.flat_map do |name, field| + path = path_prefix + name + object_type = field.type.fully_unwrapped.as_object_type + + if !field.returnable? + [object_type ? "#{path}.*" : path] + elsif object_type + object_type.source_excludes_paths(path_prefix: "#{path}.") + else + [] + end + end + end + private def initialize_has_indices diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/field.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/field.rb index 9bf47dab0..54b57e567 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/field.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/field.rb @@ -73,6 +73,8 @@ module SchemaElements # @private # @!attribute [rw] highlightable # @private + # @!attribute [rw] returnable + # @private # @!attribute [rw] source # @private # @!attribute [rw] runtime_field_script @@ -91,7 +93,7 @@ class Field < Struct.new( :name, :original_type, :parent_type, :original_type_for_derived_types, :schema_def_state, :accuracy_confidence, :filter_customizations, :grouped_by_customizations, :highlights_customizations, :sub_aggregations_customizations, :aggregated_values_customizations, :sort_order_enum_value_customizations, :args, - :sortable, :filterable, :aggregatable, :groupable, :highlightable, + :sortable, :filterable, :aggregatable, :groupable, :highlightable, :returnable, :graphql_only, :source, :runtime_field_script, :relationship, :singular_name, :computation_detail, :non_nullable_in_json_schema, :as_input, :name_in_index, :resolver @@ -106,7 +108,7 @@ def initialize( name:, type:, parent_type:, schema_def_state:, accuracy_confidence: :high, name_in_index: name, type_for_derived_types: nil, graphql_only: nil, singular: nil, - sortable: nil, filterable: nil, aggregatable: nil, groupable: nil, highlightable: nil, + sortable: nil, filterable: nil, aggregatable: nil, groupable: nil, highlightable: nil, returnable: nil, as_input: false, resolver: nil ) type_ref = schema_def_state.type_ref(type) @@ -129,6 +131,7 @@ def initialize( aggregatable: aggregatable, groupable: groupable, highlightable: highlightable, + returnable: returnable, graphql_only: graphql_only, source: nil, runtime_field_script: nil, @@ -743,6 +746,16 @@ def highlightable? type_for_derived_types.fully_unwrapped.as_object_type&.supports?(&:highlightable?) end + # Indicates if this field is returnable in GraphQL query responses. When `false`, the field will + # still be available for filtering, sorting, grouping, and aggregation, but will not appear in the + # GraphQL output type and its data will be excluded from `_source` in the datastore for storage savings. + # + # @return [Boolean] true if this field's data can be returned (default: true) + def returnable? + return true if returnable.nil? + returnable + end + # Defines an argument on the field. # # @note ElasticGraph takes care of defining arguments for all the query features it supports, so there is generally no need to use @@ -892,7 +905,10 @@ def to_filter_field(parent_type:, for_single_value: !type_for_derived_types.list parent_type: parent_type, name_in_index: name_in_index, type_for_derived_types: nil, - resolver: nil + resolver: nil, + # Filter fields should always appear in their parent input type's SDL regardless + # of the source field's returnability. + returnable: true ) schema_def_state.factory.new_field(**params).tap do |f| diff --git a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/type_with_subfields.rb b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/type_with_subfields.rb index eb45743ac..c77e61917 100644 --- a/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/type_with_subfields.rb +++ b/elasticgraph-schema_definition/lib/elastic_graph/schema_definition/schema_elements/type_with_subfields.rb @@ -137,6 +137,9 @@ def name # ElasticGraph will infer field sortability based on the field's GraphQL type and mapping type. # @option options [Boolean] highlightable force-enables or disables the ability to request search highlights for this field. When # not provided, ElasticGraph will infer field highlightable based on the field's mapping type. + # @option options [Boolean] returnable when set to `false`, the field will not appear in the GraphQL output type and its data + # will be excluded from `_source` in the datastore for storage savings. The field will still be available for filtering, + # sorting, grouping, and aggregation. Defaults to `true`. # @yield [Field] the field for further customization # @return [void] # @@ -531,6 +534,7 @@ def index_field_runtime_metadata_tuples( def fields_sdl(&arg_selector) graphql_fields_by_name.values + .select(&:returnable?) .map { |f| f.to_sdl(&arg_selector) } .flat_map { |sdl| sdl.split("\n") } .join("\n ") diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/datastore_config/index_mappings/miscellaneous_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/datastore_config/index_mappings/miscellaneous_spec.rb index c4b752659..c5e094db4 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/datastore_config/index_mappings/miscellaneous_spec.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/datastore_config/index_mappings/miscellaneous_spec.rb @@ -117,6 +117,101 @@ module SchemaDefinition }) end + it "adds `name_in_index` to `_source.excludes` for `returnable: false` fields" do + mapping = index_mapping_for "my_type" do |s| + s.object_type "MyType" do |t| + t.field "id", "ID" + t.field "name", "String" + t.field "internal_code_gql", "String", name_in_index: "internal_code", returnable: false + t.index "my_type" + end + end + + expect(mapping.dig("_source", "excludes")).to contain_exactly("internal_code") + # The field should still appear in properties (it's indexed, just not in _source) + expect(mapping.dig("properties", "internal_code")).to eq({"type" => "keyword"}) + end + + it "adds `.*` to `_source.excludes` for `returnable: false` object fields" do + mapping = index_mapping_for "my_type" do |s| + s.object_type "InternalMetadata" do |t| + t.field "internal_code", "String" + end + + s.object_type "MyType" do |t| + t.field "id", "ID" + t.field "internal_metadata", "InternalMetadata", returnable: false + t.index "my_type" + end + end + + expect(mapping).to include("_source" => {"excludes" => ["internal_metadata.*"]}) + expect(mapping.dig("properties", "internal_metadata", "properties", "internal_code")).to eq({"type" => "keyword"}) + end + + it "adds `returnable: false` indexing-only fields to `_source.excludes` but not `graphql_only` fields" do + mapping = index_mapping_for "my_type" do |s| + s.object_type "MyType" do |t| + t.field "id", "ID" + t.field "name", "String" + t.field "legacy_name", "String", graphql_only: true, name_in_index: "name", returnable: false + t.field "internal_code", "String", indexing_only: true, returnable: false + t.index "my_type" + end + end + + expect(mapping.dig("_source", "excludes")).to contain_exactly("internal_code") + expect(mapping.fetch("properties")).to include( + "name" => {"type" => "keyword"}, + "internal_code" => {"type" => "keyword"} + ) + expect(mapping.fetch("properties")).not_to include("legacy_name") + end + + it "adds full indexed paths to `_source.excludes` for `returnable: false` fields under nested mappings" do + mapping = index_mapping_for "my_type" do |s| + s.object_type "Parent" do |t| + t.field "child", "String", name_in_index: "child_in_index", returnable: false + end + + s.object_type "Grandparent" do |t| + t.field "parent", "Parent!", name_in_index: "parent_in_index" + end + + s.object_type "MyType" do |t| + t.field "id", "ID!" + t.field "grandparents", "[Grandparent!]!", name_in_index: "grandparents_in_index" do |f| + f.mapping type: "nested" + end + t.index "my_type" + end + end + + expect(mapping.dig("_source", "excludes")).to contain_exactly("grandparents_in_index.parent_in_index.child_in_index") + expect(mapping.dig("properties", "grandparents_in_index")).to include( + "type" => "nested", + "properties" => { + "parent_in_index" => { + "properties" => { + "child_in_index" => {"type" => "keyword"} + } + } + } + ) + end + + it "does not add `_source` config when all fields are returnable" do + mapping = index_mapping_for "my_type" do |s| + s.object_type "MyType" do |t| + t.field "id", "ID" + t.field "name", "String" + t.index "my_type" + end + end + + expect(mapping).not_to have_key("_source") + end + it "keeps `source_from` fields in the mapping so that indexed documents support the field even though it comes from an alternate source" do mapping = index_mapping_for "components" do |s| s.object_type "Widget" do |t| diff --git a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/object_type_spec.rb b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/object_type_spec.rb index d986b458e..83ca0f54e 100644 --- a/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/object_type_spec.rb +++ b/elasticgraph-schema_definition/spec/unit/elastic_graph/schema_definition/graphql_schema/object_type_spec.rb @@ -643,6 +643,39 @@ module SchemaDefinition end end + it "excludes `returnable: false` fields from the output type but keeps them in filter, sort, grouped_by, aggregated_values, and highlights types" do + result = define_schema do |api| + api.object_type "Widget" do |t| + t.field "id", "ID" + t.field "name", "String" + t.field "internal_code", "String", returnable: false + t.index "widgets" + end + end + + expect(type_def_from(result, "Widget")).to eq(<<~EOS.strip) + type Widget { + id: ID + name: String + } + EOS + + # returnable: false field should still appear in filter input + expect(filter_type_from(result, "Widget")).to include("internal_code: StringFilterInput") + + # returnable: false field should still appear in sort order + expect(sort_order_type_from(result, "Widget")).to include("internal_code_ASC") + + # returnable: false field should still appear in grouped_by + expect(grouped_by_type_from(result, "Widget")).to include("internal_code: String") + + # returnable: false field should still appear in aggregated_values + expect(aggregated_values_type_from(result, "Widget")).to include("internal_code: NonNumericAggregatedValues") + + # returnable: false field should still appear in highlights + expect(highlights_type_from(result, "Widget")).to include("internal_code: [String!]!") + end + def object_type(name, *args, pre_def: nil, include_docs: true, &block) result = define_schema do |api| pre_def&.call(api)