Skip to content

Commit 08fa741

Browse files
committed
Add JSON ingestion indexing extensions
1 parent 2561384 commit 08fa741

13 files changed

Lines changed: 592 additions & 0 deletions

File tree

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
# Copyright 2024 - 2026 Block, Inc.
2+
#
3+
# Use of this source code is governed by an MIT-style
4+
# license that can be found in the LICENSE file or at
5+
# https://opensource.org/licenses/MIT.
6+
#
7+
# frozen_string_literal: true
8+
9+
require "elastic_graph/constants"
10+
require "elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata"
11+
require "elastic_graph/support/hash_util"
12+
13+
module ElasticGraph
14+
module JSONIngestion
15+
module SchemaDefinition
16+
# Namespace for JSON-schema-aware indexing components.
17+
module Indexing
18+
# Extends indexing fields with JSON schema generation behavior.
19+
#
20+
# @api private
21+
module FieldExtension
22+
# JSON schema overrides that automatically apply to specific mapping types so that the JSON schema
23+
# validation will reject values which cannot be indexed into fields of a specific mapping type.
24+
#
25+
# @see https://www.elastic.co/guide/en/elasticsearch/reference/current/number.html Elasticsearch numeric field type documentation
26+
# @note We don't handle `integer` here because it's the default numeric type (handled by our definition of the `Int` scalar type).
27+
# @note Likewise, we don't handle `long` here because a custom scalar type must be used for that since GraphQL's `Int` type can't handle long values.
28+
JSON_SCHEMA_OVERRIDES_BY_MAPPING_TYPE = {
29+
"byte" => {"minimum" => -(2**7), "maximum" => (2**7) - 1},
30+
"short" => {"minimum" => -(2**15), "maximum" => (2**15) - 1},
31+
"keyword" => {"maxLength" => DEFAULT_MAX_KEYWORD_LENGTH},
32+
"text" => {"maxLength" => DEFAULT_MAX_TEXT_LENGTH}
33+
}
34+
35+
# @return [Hash<Symbol, Object>] user-specified JSON schema customizations for this field
36+
def json_schema_customizations
37+
@json_schema_customizations
38+
end
39+
40+
# @private
41+
def with_json_schema(json_schema_layers:, json_schema_customizations:)
42+
@json_schema_layers = json_schema_layers
43+
@json_schema_customizations = json_schema_customizations
44+
self
45+
end
46+
47+
# Returns the JSON schema definition for this field.
48+
#
49+
# @return [Hash<String, Object>] the JSON schema hash
50+
def json_schema
51+
@json_schema ||=
52+
json_schema_layers
53+
.reverse # resolve layers from innermost to outermost wrappings
54+
.reduce(inner_json_schema) { |acc, layer| process_layer(layer, acc) }
55+
.merge(outer_json_schema_customizations)
56+
.merge({"description" => doc_comment}.compact)
57+
.then { |hash| Support::HashUtil.stringify_keys(hash) }
58+
end
59+
60+
# @return [JSONSchemaFieldMetadata] metadata about this field for inclusion in the JSON schema
61+
def json_schema_metadata
62+
JSONSchemaFieldMetadata.new(type: type.name, name_in_index: name_in_index)
63+
end
64+
65+
def nullable?
66+
json_schema_layers.include?(:nullable)
67+
end
68+
69+
private
70+
71+
def json_schema_layers
72+
@json_schema_layers
73+
end
74+
75+
def inner_json_schema
76+
user_specified_customizations =
77+
if user_specified_json_schema_customizations_go_on_outside?
78+
{} # : ::Hash[::String, untyped]
79+
else
80+
Support::HashUtil.stringify_keys(json_schema_customizations)
81+
end
82+
83+
customizations_from_mapping = JSON_SCHEMA_OVERRIDES_BY_MAPPING_TYPE[mapping["type"]] || {}
84+
customizations = customizations_from_mapping.merge(user_specified_customizations)
85+
# @type var field_type: _JSONFieldType
86+
field_type = _ = indexing_field_type
87+
customizations = field_type.format_field_json_schema_customizations(customizations)
88+
89+
ref = {"$ref" => "#/$defs/#{type.unwrapped_name}"}
90+
return ref if customizations.empty?
91+
92+
# Combine any customizations with the type ref under an "allOf" subschema:
93+
# all of these properties must hold true for the type to be valid.
94+
#
95+
# Note that if we simply combine the customizations with the `$ref`
96+
# at the same level, it will not work, because other subschema
97+
# properties are ignored when they are in the same object as a `$ref`:
98+
# https://github.com/json-schema-org/JSON-Schema-Test-Suite/blob/2.0.0/tests/draft7/ref.json#L165-L168
99+
{"allOf" => [ref, customizations]}
100+
end
101+
102+
def outer_json_schema_customizations
103+
return {} unless user_specified_json_schema_customizations_go_on_outside?
104+
Support::HashUtil.stringify_keys(json_schema_customizations)
105+
end
106+
107+
# Indicates if the user-specified JSON schema customizations should go on the inside
108+
# (where they normally go) or on the outside. They only go on the outside when it's
109+
# an array field, because then they apply to the array itself instead of the items in the
110+
# array.
111+
def user_specified_json_schema_customizations_go_on_outside?
112+
json_schema_layers.include?(:array)
113+
end
114+
115+
def process_layer(layer, schema)
116+
case layer
117+
when :nullable
118+
# Here we use "anyOf" to ensure that JSON can either match the schema OR null.
119+
#
120+
# (Using "oneOf" would mean that if we had a schema that also allowed null,
121+
# null would never be allowed, since "oneOf" must match exactly one subschema).
122+
{
123+
"anyOf" => [
124+
schema,
125+
{"type" => "null"}
126+
]
127+
}
128+
when :array
129+
{"type" => "array", "items" => schema}
130+
end
131+
end
132+
end
133+
end
134+
end
135+
end
136+
end
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# Copyright 2024 - 2026 Block, Inc.
2+
#
3+
# Use of this source code is governed by an MIT-style
4+
# license that can be found in the LICENSE file or at
5+
# https://opensource.org/licenses/MIT.
6+
#
7+
# frozen_string_literal: true
8+
9+
require "elastic_graph/json_ingestion/schema_definition/indexing/field"
10+
11+
module ElasticGraph
12+
module JSONIngestion
13+
module SchemaDefinition
14+
module Indexing
15+
# Extends indexing field references with JSON schema state needed when resolving fields.
16+
#
17+
# @api private
18+
module FieldReferenceExtension
19+
def with_json_schema(json_schema_layers:, json_schema_customizations:)
20+
@json_schema_layers = json_schema_layers
21+
@json_schema_customizations = json_schema_customizations
22+
self
23+
end
24+
25+
def ==(other)
26+
other.is_a?(FieldReferenceExtension) &&
27+
field_reference_comparison_metadata == other.field_reference_comparison_metadata &&
28+
json_schema_comparison_metadata == other.json_schema_comparison_metadata
29+
end
30+
31+
def resolve
32+
return nil unless (resolved_field = super)
33+
34+
json_schema_field = resolved_field.extend(Indexing::FieldExtension) # : ElasticGraph::SchemaDefinition::Indexing::Field & FieldExtension
35+
json_schema_field.with_json_schema(
36+
json_schema_layers: @json_schema_layers,
37+
json_schema_customizations: @json_schema_customizations
38+
)
39+
end
40+
41+
def json_schema_comparison_metadata
42+
[@json_schema_layers, @json_schema_customizations]
43+
end
44+
45+
def field_reference_comparison_metadata
46+
[
47+
name,
48+
name_in_index,
49+
type,
50+
mapping_options,
51+
accuracy_confidence,
52+
source,
53+
runtime_field_script,
54+
doc_comment
55+
]
56+
end
57+
end
58+
end
59+
end
60+
end
61+
end
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# Copyright 2024 - 2026 Block, Inc.
2+
#
3+
# Use of this source code is governed by an MIT-style
4+
# license that can be found in the LICENSE file or at
5+
# https://opensource.org/licenses/MIT.
6+
#
7+
# frozen_string_literal: true
8+
9+
module ElasticGraph
10+
module JSONIngestion
11+
module SchemaDefinition
12+
module Indexing
13+
# Namespace for indexing-field-type extensions that contribute JSON schema generation behavior.
14+
module FieldType
15+
# Extends enum indexing field types with JSON schema serialization.
16+
#
17+
# @private
18+
module EnumExtension
19+
# @return [Hash<String, ::Object>] additional ElasticGraph metadata to put in the JSON schema for this enum type.
20+
def json_schema_field_metadata_by_field_name
21+
{}
22+
end
23+
24+
# @param customizations [Hash<String, ::Object>] JSON schema customizations
25+
# @return [Hash<String, ::Object>] formatted customizations.
26+
def format_field_json_schema_customizations(customizations)
27+
# Since an enum type already restricts the values to a small set of allowed values, we do not need to keep
28+
# other customizations (such as the `maxLength` field customization EG automatically applies to fields
29+
# indexed as a `keyword`--we don't allow enum values to exceed that length, anyway).
30+
#
31+
# It's desirable to restrict what customizations are applied because when a publisher uses the JSON schema
32+
# to generate code using a library such as https://github.com/pwall567/json-kotlin-schema-codegen, we found
33+
# that the presence of extra field customizations inhibits the library's ability to generate code in the way
34+
# we want (it causes the type of the enum to change since the JSON schema changes from a direct `$ref` to
35+
# being wrapped in an `allOf`).
36+
#
37+
# However, we still want to apply `enum` customizations--this allows a user to "narrow" the set of allowed
38+
# values for a field. For example, a `Currency` enum could contain every currency, and a user may want to
39+
# restrict a specific `currency` field to a subset of currencies (e.g. to just USD, CAD, and EUR).
40+
customizations.slice("enum")
41+
end
42+
43+
# @return [Hash<String, ::Object>] the JSON schema for this enum type.
44+
def to_json_schema
45+
{"type" => "string", "enum" => enum_value_names}
46+
end
47+
end
48+
end
49+
end
50+
end
51+
end
52+
end
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
# Copyright 2024 - 2026 Block, Inc.
2+
#
3+
# Use of this source code is governed by an MIT-style
4+
# license that can be found in the LICENSE file or at
5+
# https://opensource.org/licenses/MIT.
6+
#
7+
# frozen_string_literal: true
8+
9+
require "elastic_graph/support/hash_util"
10+
11+
module ElasticGraph
12+
module JSONIngestion
13+
module SchemaDefinition
14+
module Indexing
15+
module FieldType
16+
# Extends object/interface indexing field types with JSON schema serialization.
17+
#
18+
# @private
19+
module ObjectExtension
20+
def json_schema_options
21+
@json_schema_options ||= {}
22+
end
23+
24+
def with_json_schema_options(json_schema_options)
25+
@json_schema_options = json_schema_options
26+
self
27+
end
28+
29+
# @return [Hash<String, JSONSchemaFieldMetadata>] field metadata keyed by field name
30+
def json_schema_field_metadata_by_field_name
31+
subfields.to_h { |field| [field.name, field.json_schema_metadata] }
32+
end
33+
34+
# @param customizations [Hash<String, Object>] the customizations to format
35+
# @return [Hash<String, Object>] the formatted customizations
36+
def format_field_json_schema_customizations(customizations)
37+
customizations
38+
end
39+
40+
# @return [Hash<String, Object>] the JSON schema definition for this object type
41+
def to_json_schema
42+
@to_json_schema ||=
43+
if json_schema_options.empty?
44+
# Fields that are `sourced_from` an alternate type must not be included in this type's JSON schema,
45+
# since events of this type won't include them.
46+
other_source_subfields, json_schema_candidate_subfields = subfields.partition(&:source)
47+
validate_sourced_fields_have_no_json_schema_overrides(other_source_subfields)
48+
json_schema_subfields = json_schema_candidate_subfields.reject(&:runtime_field_script)
49+
required_fields = json_schema_subfields
50+
required_fields = required_fields.reject(&:nullable?) if schema_def_state.allow_omitted_json_schema_fields
51+
52+
{
53+
"type" => "object",
54+
"properties" => json_schema_subfields.to_h { |field| [field.name, field.json_schema] }.merge(json_schema_typename_field),
55+
# Note: `__typename` is intentionally not included in the `required` list. If `__typename` is present
56+
# we want it validated (as we do by merging in `json_schema_typename_field`) but we only want
57+
# to require it in the context of a union type. The union's JSON schema requires the field.
58+
"required" => required_fields.map(&:name).freeze,
59+
"additionalProperties" => (false unless schema_def_state.allow_extra_json_schema_fields),
60+
"description" => doc_comment
61+
}.compact.freeze
62+
else
63+
Support::HashUtil.stringify_keys(json_schema_options)
64+
end
65+
end
66+
67+
private
68+
69+
# Returns a `__typename` property which we use for union types.
70+
#
71+
# This must always be set to the name of the type (thus the const value).
72+
#
73+
# We also add a "default" value. This does not impact validation, but rather
74+
# aids tools like our Kotlin codegen to save publishers from having to set the
75+
# property explicitly when creating events.
76+
def json_schema_typename_field
77+
{
78+
"__typename" => {
79+
"type" => "string",
80+
"const" => type_name,
81+
"default" => type_name
82+
}
83+
}
84+
end
85+
86+
def validate_sourced_fields_have_no_json_schema_overrides(other_source_subfields)
87+
problem_fields = other_source_subfields.reject { |field| field.json_schema_customizations.empty? }
88+
return if problem_fields.empty?
89+
90+
field_descriptions = problem_fields.map(&:name).sort.map { |field| "`#{field}`" }.join(", ")
91+
raise Errors::SchemaError,
92+
"`#{type_name}` has #{problem_fields.size} field(s) (#{field_descriptions}) that are `sourced_from` " \
93+
"another type and also have JSON schema customizations. Instead, put the JSON schema " \
94+
"customizations on the source type's field definitions."
95+
end
96+
end
97+
end
98+
end
99+
end
100+
end
101+
end
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# Copyright 2024 - 2026 Block, Inc.
2+
#
3+
# Use of this source code is governed by an MIT-style
4+
# license that can be found in the LICENSE file or at
5+
# https://opensource.org/licenses/MIT.
6+
#
7+
# frozen_string_literal: true
8+
9+
require "elastic_graph/support/hash_util"
10+
11+
module ElasticGraph
12+
module JSONIngestion
13+
module SchemaDefinition
14+
module Indexing
15+
module FieldType
16+
# Extends scalar indexing field types with JSON schema serialization.
17+
#
18+
# @private
19+
module ScalarExtension
20+
# @return [Hash] empty hash, as scalar types have no subfields
21+
def json_schema_field_metadata_by_field_name
22+
{}
23+
end
24+
25+
# @param customizations [Hash<String, Object>] the customizations to format
26+
# @return [Hash<String, Object>] the formatted customizations
27+
def format_field_json_schema_customizations(customizations)
28+
customizations
29+
end
30+
31+
# @return [Hash<String, Object>] the JSON schema definition for this scalar type
32+
def to_json_schema
33+
Support::HashUtil.stringify_keys(scalar_type.json_schema_options)
34+
end
35+
end
36+
end
37+
end
38+
end
39+
end
40+
end

0 commit comments

Comments
 (0)