Skip to content

Commit 4f85649

Browse files
committed
Wire JSON ingestion schema extension modules
1 parent 66a50ff commit 4f85649

31 files changed

Lines changed: 1412 additions & 7 deletions

File tree

config/site/support/doctest_helper.rb

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
# frozen_string_literal: true
88

99
require "elastic_graph/apollo/schema_definition/api_extension"
10+
require "elastic_graph/json_ingestion/schema_definition/api_extension"
1011
require "elastic_graph/schema_artifacts/runtime_metadata/schema_element_names"
1112
require "elastic_graph/schema_definition/api"
1213
require "elastic_graph/schema_definition/schema_artifact_manager"
@@ -90,6 +91,21 @@ module ElasticGraph
9091
end
9192
end
9293

94+
doctest.before "ElasticGraph::JSONIngestion::SchemaDefinition" do
95+
@api = SchemaDefinition::API.new(
96+
SchemaArtifacts::RuntimeMetadata::SchemaElementNames.new(form: :camelCase, overrides: {}),
97+
true,
98+
extension_modules: [JSONIngestion::SchemaDefinition::APIExtension]
99+
)
100+
101+
@api.json_schema_version 1
102+
::Thread.current[:ElasticGraph_SchemaDefinition_API_instance] = @api
103+
end
104+
105+
doctest.after "ElasticGraph::JSONIngestion::SchemaDefinition" do
106+
::Thread.current[:ElasticGraph_SchemaDefinition_API_instance] = nil
107+
end
108+
93109
doctest.before "ElasticGraph::SchemaDefinition::API#json_schema_version" do
94110
ElasticGraph.define_schema do |schema|
95111
# `schema.json_schema_version` raises an error when the version is set more than once.
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
# Copyright 2024 - 2026 Block, Inc.
2+
#
3+
# Use of this source code is governed by an MIT-style
4+
# license that can be found in the LICENSE file or at
5+
# https://opensource.org/licenses/MIT.
6+
#
7+
# frozen_string_literal: true
8+
9+
require "elastic_graph/constants"
10+
require "elastic_graph/graphql/scalar_coercion_adapters/valid_time_zones"
11+
require "elastic_graph/json_ingestion/schema_definition/factory_extension"
12+
require "elastic_graph/json_ingestion/schema_definition/state_extension"
13+
14+
module ElasticGraph
15+
module JSONIngestion
16+
# Namespace for all JSON Schema schema definition support.
17+
#
18+
# {SchemaDefinition::APIExtension} is the primary entry point and should be used as a schema definition extension module.
19+
module SchemaDefinition
20+
# Module designed to be extended onto an {ElasticGraph::SchemaDefinition::API} instance
21+
# to add JSON Schema ingestion serializer capabilities.
22+
module APIExtension
23+
# Default JSON schema options applied to ElasticGraph's built-in scalar types when this extension
24+
# is loaded. Keyed by the un-overridden type name; the lookup at runtime maps each key through
25+
# `type_name_overrides` so renamed built-ins still receive the right options.
26+
BUILT_IN_SCALAR_JSON_SCHEMA_OPTIONS_BY_NAME = {
27+
"Boolean" => {type: "boolean"},
28+
"Float" => {type: "number"},
29+
"ID" => {type: "string"},
30+
"Int" => {type: "integer", minimum: INT_MIN, maximum: INT_MAX},
31+
"String" => {type: "string"},
32+
"Cursor" => {type: "string"},
33+
"Date" => {type: "string", format: "date"},
34+
"DateTime" => {type: "string", format: "date-time"},
35+
"LocalTime" => {type: "string", pattern: VALID_LOCAL_TIME_JSON_SCHEMA_PATTERN},
36+
"TimeZone" => {type: "string", enum: GraphQL::ScalarCoercionAdapters::VALID_TIME_ZONES.to_a.freeze},
37+
"Untyped" => {type: ["array", "boolean", "integer", "number", "object", "string"].freeze},
38+
"JsonSafeLong" => {type: "integer", minimum: JSON_SAFE_LONG_MIN, maximum: JSON_SAFE_LONG_MAX},
39+
"LongString" => {type: "integer", minimum: LONG_STRING_MIN, maximum: LONG_STRING_MAX}
40+
}.freeze
41+
42+
# Wires up the JSON ingestion extensions when this module is extended onto an API instance.
43+
#
44+
# @param api [ElasticGraph::SchemaDefinition::API] the API instance to extend
45+
# @return [void]
46+
# @api private
47+
def self.extended(api)
48+
api.state.extend(StateExtension)
49+
api.factory.extend FactoryExtension
50+
51+
# Build a lookup from final (post-`type_name_overrides`) names to JSON schema options. We can't
52+
# key directly on `type.name` because users may have overridden the names of built-in scalars
53+
# (e.g. `Cursor` → `PreCursor`); the keys in `BUILT_IN_SCALAR_JSON_SCHEMA_OPTIONS_BY_NAME` are
54+
# always the un-overridden names.
55+
options_by_final_name = BUILT_IN_SCALAR_JSON_SCHEMA_OPTIONS_BY_NAME.to_h do |name, options|
56+
[api.state.type_ref(name).to_final_form.name, options]
57+
end
58+
59+
api.on_built_in_types do |type|
60+
if (options = options_by_final_name[type.name])
61+
scalar_type = type # : ElasticGraph::SchemaDefinition::SchemaElements::ScalarType & SchemaElements::ScalarTypeExtension
62+
scalar_type.json_schema(**options)
63+
elsif type.name == api.state.type_ref("GeoLocation").to_final_form.name
64+
# @type var geo_location_type: ElasticGraph::SchemaDefinition::SchemaElements::TypeWithSubfields & SchemaElements::ObjectInterfaceExtension
65+
geo_location_type = _ = type
66+
names = api.state.schema_elements
67+
68+
# We use `nullable: false` because `GeoLocation` is indexed as a single `geo_point` field,
69+
# and therefore can't support a `latitude` without a `longitude` or vice-versa.
70+
latitude = geo_location_type.graphql_fields_by_name.fetch(names.latitude) # : ElasticGraph::SchemaDefinition::SchemaElements::Field & SchemaElements::FieldExtension
71+
longitude = geo_location_type.graphql_fields_by_name.fetch(names.longitude) # : ElasticGraph::SchemaDefinition::SchemaElements::Field & SchemaElements::FieldExtension
72+
latitude.json_schema minimum: -90, maximum: 90, nullable: false
73+
longitude.json_schema minimum: -180, maximum: 180, nullable: false
74+
end
75+
end
76+
end
77+
78+
# Defines the version number of the current JSON schema. Importantly, every time a change is made that impacts the JSON schema
79+
# artifact, the version number must be incremented to ensure that each different version of the JSON schema is identified by a unique
80+
# version number. The publisher will then include this version number in published events to identify the version of the schema it
81+
# was using. This avoids the need to deploy the publisher and ElasticGraph indexer at the same time to keep them in sync.
82+
#
83+
# @note While this is an important part of how ElasticGraph is designed to support schema evolution, it can be annoying constantly
84+
# have to increment this while rapidly changing the schema during prototyping. You can disable the requirement to increment this
85+
# on every JSON schema change with {#enforce_json_schema_version}.
86+
#
87+
# @param version [Integer] current version number of the JSON schema artifact
88+
# @return [void]
89+
# @see #enforce_json_schema_version
90+
def json_schema_version(version)
91+
state = json_ingestion_state
92+
93+
if !version.is_a?(Integer) || version < 1
94+
raise Errors::SchemaError, "`json_schema_version` must be a positive integer. Specified version: #{version}"
95+
end
96+
97+
if state.json_schema_version
98+
raise Errors::SchemaError, "`json_schema_version` can only be set once on a schema. Previously-set version: #{state.json_schema_version}"
99+
end
100+
101+
state.json_schema_version = version
102+
state.json_schema_version_setter_location = caller_locations(1, 1).to_a.first
103+
nil
104+
end
105+
106+
# Configures whether JSON schema artifact dumping enforces the requirement that the JSON schema version is incremented every time
107+
# dumping the JSON schemas results in a changed artifact. Defaults to `true`.
108+
#
109+
# @note Generally speaking, you will want this to be `true` for any ElasticGraph application that is in
110+
# production as the versioning of JSON schemas is what supports safe schema evolution as it allows
111+
# ElasticGraph to identify which version of the JSON schema the publishing system was operating on
112+
# when it published an event.
113+
#
114+
# It can be useful to set it to `false` before your application is in production, as you do not want
115+
# to be forced to bump the version after every single schema change while you are building an initial
116+
# prototype.
117+
#
118+
# @param value [Boolean] whether to require `json_schema_version` to be incremented on changes that impact `json_schemas.yaml`
119+
# @return [void]
120+
# @see #json_schema_version
121+
#
122+
# @example Disable enforcement during initial prototyping
123+
# ElasticGraph.define_schema do |schema|
124+
# # TODO: remove this once we're past the prototyping stage
125+
# schema.enforce_json_schema_version false
126+
# end
127+
def enforce_json_schema_version(value)
128+
unless value == true || value == false
129+
raise Errors::SchemaError, "`enforce_json_schema_version` must be a boolean. Specified value: #{value.inspect}"
130+
end
131+
132+
json_ingestion_state.enforce_json_schema_version = value
133+
nil
134+
end
135+
136+
# Defines strictness of the JSON schema validation. By default, the JSON schema will require all fields to be provided by the
137+
# publisher (but they can be nullable) and will ignore extra fields that are not defined in the schema. Use this method to
138+
# configure this behavior.
139+
#
140+
# @param allow_omitted_fields [bool] Whether nullable fields can be omitted from indexing events.
141+
# @param allow_extra_fields [bool] Whether extra fields (e.g. beyond fields defined in the schema) can be included in indexing events.
142+
# @return [void]
143+
#
144+
# @note If you allow both omitted fields and extra fields, ElasticGraph's JSON schema validation will allow (and ignore) misspelled
145+
# field names in indexing events. For example, if the ElasticGraph schema has a nullable field named `parentId` but the publisher
146+
# accidentally provides it as `parent_id`, ElasticGraph would happily ignore the `parent_id` field entirely, because `parentId`
147+
# is allowed to be omitted and `parent_id` would be treated as an extra field. Therefore, we recommend that you only set one of
148+
# these to `true` (or none).
149+
def json_schema_strictness(allow_omitted_fields: false, allow_extra_fields: true)
150+
state = json_ingestion_state
151+
152+
unless [true, false].include?(allow_omitted_fields)
153+
raise Errors::SchemaError, "`allow_omitted_fields` must be true or false"
154+
end
155+
156+
unless [true, false].include?(allow_extra_fields)
157+
raise Errors::SchemaError, "`allow_extra_fields` must be true or false"
158+
end
159+
160+
state.allow_omitted_json_schema_fields = allow_omitted_fields
161+
state.allow_extra_json_schema_fields = allow_extra_fields
162+
nil
163+
end
164+
165+
private
166+
167+
# Returns the API's `state` narrowed to include this gem's `StateExtension`. Centralizes
168+
# the Steep cast that's needed because Steep can't see the `extend(StateExtension)` applied
169+
# at runtime in `extended`.
170+
def json_ingestion_state
171+
state # : ElasticGraph::SchemaDefinition::State & StateExtension
172+
end
173+
end
174+
end
175+
end
176+
end
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
# Copyright 2024 - 2026 Block, Inc.
2+
#
3+
# Use of this source code is governed by an MIT-style
4+
# license that can be found in the LICENSE file or at
5+
# https://opensource.org/licenses/MIT.
6+
#
7+
# frozen_string_literal: true
8+
9+
require "elastic_graph/json_ingestion/schema_definition/indexing/field_type/enum_extension"
10+
require "elastic_graph/json_ingestion/schema_definition/indexing/field_type/object_extension"
11+
require "elastic_graph/json_ingestion/schema_definition/indexing/field_type/scalar_extension"
12+
require "elastic_graph/json_ingestion/schema_definition/indexing/field_type/union_extension"
13+
require "elastic_graph/json_ingestion/schema_definition/indexing/index_extension"
14+
require "elastic_graph/json_ingestion/schema_definition/results_extension"
15+
require "elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension"
16+
require "elastic_graph/json_ingestion/schema_definition/schema_elements/enum_type_extension"
17+
require "elastic_graph/json_ingestion/schema_definition/schema_elements/field_extension"
18+
require "elastic_graph/json_ingestion/schema_definition/schema_elements/object_interface_extension"
19+
require "elastic_graph/json_ingestion/schema_definition/schema_elements/scalar_type_extension"
20+
require "elastic_graph/json_ingestion/schema_definition/schema_elements/type_reference_extension"
21+
22+
module ElasticGraph
23+
module JSONIngestion
24+
module SchemaDefinition
25+
# Extension module applied to `ElasticGraph::SchemaDefinition::Factory` to wire up
26+
# JSON Schema support on Results and SchemaArtifactManager instances.
27+
#
28+
# @api private
29+
module FactoryExtension
30+
# @private
31+
def new_enum_type(name)
32+
super(name) do |type|
33+
type.extend SchemaElements::EnumTypeExtension
34+
yield type if block_given?
35+
end
36+
end
37+
38+
# @private
39+
def new_enum_indexing_field_type(...)
40+
super.extend(Indexing::FieldType::EnumExtension).then do |field_type|
41+
field_type # : ElasticGraph::SchemaDefinition::Indexing::FieldType::Enum & Indexing::FieldType::EnumExtension
42+
end
43+
end
44+
45+
# @private
46+
def new_field(**kwargs, &block)
47+
super(**kwargs) do |field|
48+
field.extend SchemaElements::FieldExtension
49+
block&.call(field)
50+
end
51+
end
52+
53+
# @private
54+
def new_index(name, settings, type, &block)
55+
super(name, settings, type) do |index|
56+
index.extend Indexing::IndexExtension
57+
index.require_id_in_json_schema
58+
block&.call(index)
59+
end
60+
end
61+
62+
# @private
63+
def new_interface_type(name)
64+
super(name) do |type|
65+
type.extend SchemaElements::ObjectInterfaceExtension
66+
yield type if block_given?
67+
end
68+
end
69+
70+
# @private
71+
def new_object_type(name)
72+
super(name) do |type|
73+
type.extend SchemaElements::ObjectInterfaceExtension
74+
yield type if block_given?
75+
end
76+
end
77+
78+
# @private
79+
def new_object_indexing_field_type(type_name:, subfields:, mapping_options:, doc_comment:, json_schema_options: {})
80+
field_type = super(
81+
type_name: type_name,
82+
subfields: subfields,
83+
mapping_options: mapping_options,
84+
json_schema_options: json_schema_options,
85+
doc_comment: doc_comment
86+
).extend(Indexing::FieldType::ObjectExtension) # : ElasticGraph::SchemaDefinition::Indexing::FieldType::Object & Indexing::FieldType::ObjectExtension
87+
88+
field_type.with_json_schema_options(json_schema_options)
89+
field_type
90+
end
91+
92+
# @private
93+
def new_scalar_type(name)
94+
super(name) do |type|
95+
type.extend SchemaElements::ScalarTypeExtension
96+
yield type if block_given?
97+
type.validate_json_schema_configuration! unless state.initially_registered_built_in_types.empty?
98+
end
99+
end
100+
101+
# @private
102+
def new_scalar_indexing_field_type(...)
103+
super.extend(Indexing::FieldType::ScalarExtension).then do |field_type|
104+
field_type # : ElasticGraph::SchemaDefinition::Indexing::FieldType::Scalar & Indexing::FieldType::ScalarExtension
105+
end
106+
end
107+
108+
# @private
109+
def new_type_reference(name)
110+
super(name).extend(SchemaElements::TypeReferenceExtension)
111+
end
112+
113+
# @private
114+
def new_union_indexing_field_type(...)
115+
super.extend(Indexing::FieldType::UnionExtension).then do |field_type|
116+
field_type # : ElasticGraph::SchemaDefinition::Indexing::FieldType::Union & Indexing::FieldType::UnionExtension
117+
end
118+
end
119+
120+
# Creates a new Results instance with JSON Schema extensions.
121+
#
122+
# @return [ElasticGraph::SchemaDefinition::Results] the created results instance
123+
def new_results
124+
super.extend(ResultsExtension)
125+
end
126+
127+
# Creates a new SchemaArtifactManager instance with JSON Schema extensions.
128+
#
129+
# @return [ElasticGraph::SchemaDefinition::SchemaArtifactManager] the created artifact manager
130+
def new_schema_artifact_manager(...)
131+
super.extend(SchemaArtifactManagerExtension)
132+
end
133+
end
134+
end
135+
end
136+
end

elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/field_type/object_extension.rb

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,9 @@ def with_json_schema_options(json_schema_options)
2828

2929
# @return [Hash<String, JSONSchemaFieldMetadata>] field metadata keyed by field name
3030
def json_schema_field_metadata_by_field_name
31-
subfields.to_h { |field| [field.name, field.json_schema_metadata] }
31+
# @type var json_subfields: ::Array[ElasticGraph::SchemaDefinition::Indexing::Field & FieldExtension]
32+
json_subfields = _ = subfields
33+
json_subfields.to_h { |field| [field.name, field.json_schema_metadata] }
3234
end
3335

3436
# @param customizations [Hash<String, Object>] the customizations to format
@@ -39,15 +41,20 @@ def format_field_json_schema_customizations(customizations)
3941

4042
# @return [Hash<String, Object>] the JSON schema definition for this object type
4143
def to_json_schema
44+
# @type var state: ElasticGraph::SchemaDefinition::State & StateExtension
45+
state = _ = schema_def_state
46+
# @type var json_subfields: ::Array[ElasticGraph::SchemaDefinition::Indexing::Field & FieldExtension]
47+
json_subfields = _ = subfields
48+
4249
@to_json_schema ||=
4350
if json_schema_options.empty?
4451
# Fields that are `sourced_from` an alternate type must not be included in this type's JSON schema,
4552
# since events of this type won't include them.
46-
other_source_subfields, json_schema_candidate_subfields = subfields.partition(&:source)
53+
other_source_subfields, json_schema_candidate_subfields = json_subfields.partition(&:source)
4754
validate_sourced_fields_have_no_json_schema_overrides(other_source_subfields)
4855
json_schema_subfields = json_schema_candidate_subfields.reject(&:runtime_field_script)
4956
required_fields = json_schema_subfields
50-
required_fields = required_fields.reject(&:nullable?) if schema_def_state.allow_omitted_json_schema_fields
57+
required_fields = required_fields.reject(&:nullable?) if state.allow_omitted_json_schema_fields
5158

5259
{
5360
"type" => "object",
@@ -56,7 +63,7 @@ def to_json_schema
5663
# we want it validated (as we do by merging in `json_schema_typename_field`) but we only want
5764
# to require it in the context of a union type. The union's JSON schema requires the field.
5865
"required" => required_fields.map(&:name).freeze,
59-
"additionalProperties" => (false unless schema_def_state.allow_extra_json_schema_fields),
66+
"additionalProperties" => (false unless state.allow_extra_json_schema_fields),
6067
"description" => doc_comment
6168
}.compact.freeze
6269
else

0 commit comments

Comments
 (0)