Skip to content

Commit 3d879aa

Browse files
committed
Add JSON ingestion indexing extensions
1 parent 2561384 commit 3d879aa

13 files changed

Lines changed: 721 additions & 0 deletions

File tree

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
# Copyright 2024 - 2026 Block, Inc.
2+
#
3+
# Use of this source code is governed by an MIT-style
4+
# license that can be found in the LICENSE file or at
5+
# https://opensource.org/licenses/MIT.
6+
#
7+
# frozen_string_literal: true
8+
9+
require "delegate"
10+
require "elastic_graph/constants"
11+
require "elastic_graph/errors"
12+
require "elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata"
13+
require "elastic_graph/schema_definition/indexing/field"
14+
require "elastic_graph/support/hash_util"
15+
16+
module ElasticGraph
17+
module JSONIngestion
18+
module SchemaDefinition
19+
# Namespace for JSON-schema-aware indexing components.
20+
module Indexing
21+
# Wraps an indexing field with JSON schema generation behavior.
22+
#
23+
# @api private
24+
class Field < DelegateClass(ElasticGraph::SchemaDefinition::Indexing::Field)
25+
# @dynamic __getobj__
26+
27+
# JSON schema overrides that automatically apply to specific mapping types so that the JSON schema
28+
# validation will reject values which cannot be indexed into fields of a specific mapping type.
29+
#
30+
# @see https://www.elastic.co/guide/en/elasticsearch/reference/current/number.html Elasticsearch numeric field type documentation
31+
# @note We don't handle `integer` here because it's the default numeric type (handled by our definition of the `Int` scalar type).
32+
# @note Likewise, we don't handle `long` here because a custom scalar type must be used for that since GraphQL's `Int` type can't handle long values.
33+
JSON_SCHEMA_OVERRIDES_BY_MAPPING_TYPE = {
34+
"byte" => {"minimum" => -(2**7), "maximum" => (2**7) - 1},
35+
"short" => {"minimum" => -(2**15), "maximum" => (2**15) - 1},
36+
"keyword" => {"maxLength" => DEFAULT_MAX_KEYWORD_LENGTH},
37+
"text" => {"maxLength" => DEFAULT_MAX_TEXT_LENGTH}
38+
}
39+
40+
# @param field [ElasticGraph::SchemaDefinition::Indexing::Field] the indexing field to wrap
41+
# @param json_schema_layers [Array<Symbol>] JSON schema wrapper layers from the field type reference
42+
# @param json_schema_customizations [Hash<Symbol, Object>] user-defined JSON schema customizations
43+
def initialize(field, json_schema_layers:, json_schema_customizations:)
44+
super(field)
45+
@json_schema_layers = json_schema_layers
46+
@json_schema_customizations = json_schema_customizations
47+
end
48+
49+
# standard:disable Style/TrivialAccessors -- Steep does not recognize `attr_reader` on `DelegateClass` subclasses.
50+
# @return [Array<Symbol>] JSON schema wrapper layers from the field type reference
51+
def json_schema_layers
52+
@json_schema_layers
53+
end
54+
55+
# @return [Hash<Symbol, Object>] user-specified JSON schema customizations for this field
56+
def json_schema_customizations
57+
@json_schema_customizations
58+
end
59+
# standard:enable Style/TrivialAccessors
60+
61+
# Returns the JSON schema definition for this field.
62+
#
63+
# @return [Hash<String, Object>] the JSON schema hash
64+
def json_schema
65+
@json_schema ||=
66+
json_schema_layers
67+
.reverse # resolve layers from innermost to outermost wrappings
68+
.reduce(inner_json_schema) { |acc, layer| process_layer(layer, acc) }
69+
.merge(outer_json_schema_customizations)
70+
.merge({"description" => doc_comment}.compact)
71+
.then { |hash| Support::HashUtil.stringify_keys(hash) }
72+
end
73+
74+
# @return [JSONSchemaFieldMetadata] metadata about this field for inclusion in the JSON schema
75+
def json_schema_metadata
76+
JSONSchemaFieldMetadata.new(type: type.name, name_in_index: name_in_index)
77+
end
78+
79+
def nullable?
80+
json_schema_layers.include?(:nullable)
81+
end
82+
83+
# Compares fields, including JSON schema metadata tracked by this wrapper.
84+
#
85+
# @param other [Object] the object to compare against
86+
# @return [Boolean] true when the field and JSON schema metadata match
87+
def ==(other)
88+
case other
89+
when Field
90+
__getobj__ == other.__getobj__ &&
91+
json_schema_layers == other.json_schema_layers &&
92+
json_schema_customizations == other.json_schema_customizations
93+
else
94+
super
95+
end
96+
end
97+
98+
def eql?(other)
99+
self == other
100+
end
101+
102+
# Returns a hash code based on the wrapped field and JSON schema metadata.
103+
#
104+
# @return [Integer] the hash code
105+
def hash
106+
[__getobj__, json_schema_layers, json_schema_customizations].hash
107+
end
108+
109+
private
110+
111+
def inner_json_schema
112+
user_specified_customizations =
113+
if user_specified_json_schema_customizations_go_on_outside?
114+
{} # : ::Hash[::String, untyped]
115+
else
116+
Support::HashUtil.stringify_keys(json_schema_customizations)
117+
end
118+
119+
customizations_from_mapping = JSON_SCHEMA_OVERRIDES_BY_MAPPING_TYPE[mapping["type"]] || {}
120+
customizations = customizations_from_mapping.merge(user_specified_customizations)
121+
# @type var field_type: _JSONFieldType
122+
field_type = _ = indexing_field_type
123+
customizations = field_type.format_field_json_schema_customizations(customizations)
124+
125+
ref = {"$ref" => "#/$defs/#{type.unwrapped_name}"}
126+
return ref if customizations.empty?
127+
128+
# Combine any customizations with the type ref under an "allOf" subschema:
129+
# all of these properties must hold true for the type to be valid.
130+
#
131+
# Note that if we simply combine the customizations with the `$ref`
132+
# at the same level, it will not work, because other subschema
133+
# properties are ignored when they are in the same object as a `$ref`:
134+
# https://github.com/json-schema-org/JSON-Schema-Test-Suite/blob/2.0.0/tests/draft7/ref.json#L165-L168
135+
{"allOf" => [ref, customizations]}
136+
end
137+
138+
def outer_json_schema_customizations
139+
return {} unless user_specified_json_schema_customizations_go_on_outside?
140+
Support::HashUtil.stringify_keys(json_schema_customizations)
141+
end
142+
143+
# Indicates if the user-specified JSON schema customizations should go on the inside
144+
# (where they normally go) or on the outside. They only go on the outside when it's
145+
# an array field, because then they apply to the array itself instead of the items in the
146+
# array.
147+
def user_specified_json_schema_customizations_go_on_outside?
148+
json_schema_layers.include?(:array)
149+
end
150+
151+
def process_layer(layer, schema)
152+
case layer
153+
when :nullable
154+
# Here we use "anyOf" to ensure that JSON can either match the schema OR null.
155+
#
156+
# (Using "oneOf" would mean that if we had a schema that also allowed null,
157+
# null would never be allowed, since "oneOf" must match exactly one subschema).
158+
{
159+
"anyOf" => [
160+
schema,
161+
{"type" => "null"}
162+
]
163+
}
164+
when :array
165+
{"type" => "array", "items" => schema}
166+
else
167+
raise Errors::SchemaError, "Unknown JSON schema layer: #{layer.inspect}"
168+
end
169+
end
170+
end
171+
end
172+
end
173+
end
174+
end
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# Copyright 2024 - 2026 Block, Inc.
2+
#
3+
# Use of this source code is governed by an MIT-style
4+
# license that can be found in the LICENSE file or at
5+
# https://opensource.org/licenses/MIT.
6+
#
7+
# frozen_string_literal: true
8+
9+
require "delegate"
10+
require "elastic_graph/json_ingestion/schema_definition/indexing/field"
11+
require "elastic_graph/schema_definition/indexing/field_reference"
12+
13+
module ElasticGraph
14+
module JSONIngestion
15+
module SchemaDefinition
16+
module Indexing
17+
# Wraps an indexing field reference with JSON schema state needed when resolving fields.
18+
#
19+
# @api private
20+
class FieldReference < DelegateClass(ElasticGraph::SchemaDefinition::Indexing::FieldReference)
21+
# @dynamic __getobj__
22+
23+
# @param field_reference [ElasticGraph::SchemaDefinition::Indexing::FieldReference] the field reference to wrap
24+
# @param json_schema_layers [Array<Symbol>] JSON schema wrapper layers from the field type reference
25+
# @param json_schema_customizations [Hash<Symbol, Object>] user-defined JSON schema customizations
26+
def initialize(field_reference, json_schema_layers:, json_schema_customizations:)
27+
super(field_reference)
28+
@json_schema_layers = json_schema_layers
29+
@json_schema_customizations = json_schema_customizations
30+
end
31+
32+
# standard:disable Style/TrivialAccessors -- Steep does not recognize `attr_reader` on `DelegateClass` subclasses.
33+
# @return [Array<Symbol>] JSON schema wrapper layers from the field type reference
34+
def json_schema_layers
35+
@json_schema_layers
36+
end
37+
38+
# @return [Hash<Symbol, Object>] user-defined JSON schema customizations
39+
def json_schema_customizations
40+
@json_schema_customizations
41+
end
42+
# standard:enable Style/TrivialAccessors
43+
44+
# Resolves this reference to a JSON-schema-aware indexing field.
45+
#
46+
# @return [Field, nil] the resolved field, or nil when the type is unresolved
47+
def resolve
48+
return nil unless (resolved_field = __getobj__.resolve)
49+
50+
Field.new(
51+
resolved_field,
52+
json_schema_layers: json_schema_layers,
53+
json_schema_customizations: json_schema_customizations
54+
)
55+
end
56+
57+
# Compares field references, including JSON schema metadata tracked by this wrapper.
58+
#
59+
# @param other [Object] the object to compare against
60+
# @return [Boolean] true when the field reference and JSON schema metadata match
61+
def ==(other)
62+
case other
63+
when FieldReference
64+
__getobj__ == other.__getobj__ &&
65+
json_schema_layers == other.json_schema_layers &&
66+
json_schema_customizations == other.json_schema_customizations
67+
else
68+
super
69+
end
70+
end
71+
72+
def eql?(other)
73+
self == other
74+
end
75+
76+
# Returns a hash code based on the wrapped field reference and JSON schema metadata.
77+
#
78+
# @return [Integer] the hash code
79+
def hash
80+
[__getobj__, json_schema_layers, json_schema_customizations].hash
81+
end
82+
end
83+
end
84+
end
85+
end
86+
end
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# Copyright 2024 - 2026 Block, Inc.
2+
#
3+
# Use of this source code is governed by an MIT-style
4+
# license that can be found in the LICENSE file or at
5+
# https://opensource.org/licenses/MIT.
6+
#
7+
# frozen_string_literal: true
8+
9+
require "delegate"
10+
require "elastic_graph/schema_definition/indexing/field_type/enum"
11+
12+
module ElasticGraph
13+
module JSONIngestion
14+
module SchemaDefinition
15+
module Indexing
16+
# Namespace for JSON-schema-aware indexing field types.
17+
module FieldType
18+
# Wraps enum indexing field types with JSON schema serialization.
19+
#
20+
# @private
21+
class Enum < DelegateClass(ElasticGraph::SchemaDefinition::Indexing::FieldType::Enum)
22+
# @return [Hash<String, ::Object>] additional ElasticGraph metadata to put in the JSON schema for this enum type.
23+
def json_schema_field_metadata_by_field_name
24+
{}
25+
end
26+
27+
# @param customizations [Hash<String, ::Object>] JSON schema customizations
28+
# @return [Hash<String, ::Object>] formatted customizations.
29+
def format_field_json_schema_customizations(customizations)
30+
# Since an enum type already restricts the values to a small set of allowed values, we do not need to keep
31+
# other customizations (such as the `maxLength` field customization EG automatically applies to fields
32+
# indexed as a `keyword`--we don't allow enum values to exceed that length, anyway).
33+
#
34+
# It's desirable to restrict what customizations are applied because when a publisher uses the JSON schema
35+
# to generate code using a library such as https://github.com/pwall567/json-kotlin-schema-codegen, we found
36+
# that the presence of extra field customizations inhibits the library's ability to generate code in the way
37+
# we want (it causes the type of the enum to change since the JSON schema changes from a direct `$ref` to
38+
# being wrapped in an `allOf`).
39+
#
40+
# However, we still want to apply `enum` customizations--this allows a user to "narrow" the set of allowed
41+
# values for a field. For example, a `Currency` enum could contain every currency, and a user may want to
42+
# restrict a specific `currency` field to a subset of currencies (e.g. to just USD, CAD, and EUR).
43+
customizations.slice("enum")
44+
end
45+
46+
# @return [Hash<String, ::Object>] the JSON schema for this enum type.
47+
def to_json_schema
48+
{"type" => "string", "enum" => enum_value_names}
49+
end
50+
end
51+
end
52+
end
53+
end
54+
end
55+
end

0 commit comments

Comments
 (0)