Skip to content

Commit ea857eb

Browse files
committed
Wire JSON ingestion schema extension modules
1 parent bce83e3 commit ea857eb

52 files changed

Lines changed: 1935 additions & 6 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

config/site/support/doctest_helper.rb

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
# frozen_string_literal: true
88

99
require "elastic_graph/apollo/schema_definition/api_extension"
10+
require "elastic_graph/json_ingestion/schema_definition/api_extension"
1011
require "elastic_graph/schema_artifacts/runtime_metadata/schema_element_names"
1112
require "elastic_graph/schema_definition/api"
1213
require "elastic_graph/schema_definition/schema_artifact_manager"
@@ -90,6 +91,21 @@ module ElasticGraph
9091
end
9192
end
9293

94+
doctest.before "ElasticGraph::JSONIngestion::SchemaDefinition" do
95+
@api = SchemaDefinition::API.new(
96+
SchemaArtifacts::RuntimeMetadata::SchemaElementNames.new(form: :camelCase, overrides: {}),
97+
true,
98+
extension_modules: [JSONIngestion::SchemaDefinition::APIExtension]
99+
)
100+
101+
@api.json_schema_version 1
102+
::Thread.current[:ElasticGraph_SchemaDefinition_API_instance] = @api
103+
end
104+
105+
doctest.after "ElasticGraph::JSONIngestion::SchemaDefinition" do
106+
::Thread.current[:ElasticGraph_SchemaDefinition_API_instance] = nil
107+
end
108+
93109
doctest.before "ElasticGraph::SchemaDefinition::API#json_schema_version" do
94110
ElasticGraph.define_schema do |schema|
95111
# `schema.json_schema_version` raises an error when the version is set more than once.
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
# Copyright 2024 - 2026 Block, Inc.
2+
#
3+
# Use of this source code is governed by an MIT-style
4+
# license that can be found in the LICENSE file or at
5+
# https://opensource.org/licenses/MIT.
6+
#
7+
# frozen_string_literal: true
8+
9+
require "elastic_graph/constants"
10+
require "elastic_graph/graphql/scalar_coercion_adapters/valid_time_zones"
11+
require "elastic_graph/json_ingestion/schema_definition/factory_extension"
12+
require "elastic_graph/json_ingestion/schema_definition/state_extension"
13+
14+
module ElasticGraph
15+
module JSONIngestion
16+
# Namespace for all JSON Schema schema definition support.
17+
#
18+
# {SchemaDefinition::APIExtension} is the primary entry point and should be used as a schema definition extension module.
19+
module SchemaDefinition
20+
# Module designed to be extended onto an {ElasticGraph::SchemaDefinition::API} instance
21+
# to add JSON Schema ingestion serializer capabilities.
22+
module APIExtension
23+
# Default JSON schema options applied to ElasticGraph's built-in scalar types when this extension
24+
# is loaded. Keyed by the un-overridden type name; the lookup at runtime maps each key through
25+
# `type_name_overrides` so renamed built-ins still receive the right options.
26+
BUILT_IN_SCALAR_JSON_SCHEMA_OPTIONS_BY_NAME = {
27+
"Boolean" => {type: "boolean"},
28+
"Float" => {type: "number"},
29+
"ID" => {type: "string"},
30+
"Int" => {type: "integer", minimum: INT_MIN, maximum: INT_MAX},
31+
"String" => {type: "string"},
32+
"Cursor" => {type: "string"},
33+
"Date" => {type: "string", format: "date"},
34+
"DateTime" => {type: "string", format: "date-time"},
35+
"LocalTime" => {type: "string", pattern: VALID_LOCAL_TIME_JSON_SCHEMA_PATTERN},
36+
"TimeZone" => {type: "string", enum: GraphQL::ScalarCoercionAdapters::VALID_TIME_ZONES.to_a.freeze},
37+
"Untyped" => {type: ["array", "boolean", "integer", "number", "object", "string"].freeze},
38+
"JsonSafeLong" => {type: "integer", minimum: JSON_SAFE_LONG_MIN, maximum: JSON_SAFE_LONG_MAX},
39+
"LongString" => {type: "integer", minimum: LONG_STRING_MIN, maximum: LONG_STRING_MAX}
40+
}.freeze
41+
42+
# Wires up the JSON ingestion extensions when this module is extended onto an API instance.
43+
#
44+
# @param api [ElasticGraph::SchemaDefinition::API] the API instance to extend
45+
# @return [void]
46+
# @api private
47+
def self.extended(api)
48+
api.state.extend(StateExtension)
49+
api.factory.extend FactoryExtension
50+
51+
# Build a lookup from final (post-`type_name_overrides`) names to JSON schema options. We can't
52+
# key directly on `type.name` because users may have overridden the names of built-in scalars
53+
# (e.g. `Cursor` → `PreCursor`); the keys in `BUILT_IN_SCALAR_JSON_SCHEMA_OPTIONS_BY_NAME` are
54+
# always the un-overridden names.
55+
options_by_final_name = BUILT_IN_SCALAR_JSON_SCHEMA_OPTIONS_BY_NAME.to_h do |name, options|
56+
[api.state.type_ref(name).to_final_form.name, options]
57+
end
58+
59+
api.on_built_in_types do |type|
60+
if (options = options_by_final_name[type.name])
61+
scalar_type = type # : ElasticGraph::SchemaDefinition::SchemaElements::ScalarType & SchemaElements::ScalarTypeExtension
62+
scalar_type.json_schema(**options)
63+
elsif type.name == api.state.type_ref("GeoLocation").to_final_form.name
64+
# @type var geo_location_type: ElasticGraph::SchemaDefinition::SchemaElements::TypeWithSubfields & SchemaElements::ObjectInterfaceExtension
65+
geo_location_type = _ = type
66+
names = api.state.schema_elements
67+
68+
# We use `nullable: false` because `GeoLocation` is indexed as a single `geo_point` field,
69+
# and therefore can't support a `latitude` without a `longitude` or vice-versa.
70+
latitude = geo_location_type.graphql_fields_by_name.fetch(names.latitude) # : ElasticGraph::SchemaDefinition::SchemaElements::Field & SchemaElements::FieldExtension
71+
longitude = geo_location_type.graphql_fields_by_name.fetch(names.longitude) # : ElasticGraph::SchemaDefinition::SchemaElements::Field & SchemaElements::FieldExtension
72+
latitude.json_schema minimum: -90, maximum: 90, nullable: false
73+
longitude.json_schema minimum: -180, maximum: 180, nullable: false
74+
end
75+
end
76+
end
77+
78+
# Defines the version number of the current JSON schema. Importantly, every time a change is made that impacts the JSON schema
79+
# artifact, the version number must be incremented to ensure that each different version of the JSON schema is identified by a unique
80+
# version number. The publisher will then include this version number in published events to identify the version of the schema it
81+
# was using. This avoids the need to deploy the publisher and ElasticGraph indexer at the same time to keep them in sync.
82+
#
83+
# @note While this is an important part of how ElasticGraph is designed to support schema evolution, it can be annoying constantly
84+
# have to increment this while rapidly changing the schema during prototyping. You can disable the requirement to increment this
85+
# on every JSON schema change with {#enforce_json_schema_version}.
86+
#
87+
# @param version [Integer] current version number of the JSON schema artifact
88+
# @return [void]
89+
# @see #enforce_json_schema_version
90+
def json_schema_version(version)
91+
state = json_ingestion_state
92+
93+
if !version.is_a?(Integer) || version < 1
94+
raise Errors::SchemaError, "`json_schema_version` must be a positive integer. Specified version: #{version}"
95+
end
96+
97+
if state.json_schema_version
98+
raise Errors::SchemaError, "`json_schema_version` can only be set once on a schema. Previously-set version: #{state.json_schema_version}"
99+
end
100+
101+
state.json_schema_version = version
102+
state.json_schema_version_setter_location = caller_locations(1, 1).to_a.first
103+
nil
104+
end
105+
106+
# Configures whether JSON schema artifact dumping enforces the requirement that the JSON schema version is incremented every time
107+
# dumping the JSON schemas results in a changed artifact. Defaults to `true`.
108+
#
109+
# @note Generally speaking, you will want this to be `true` for any ElasticGraph application that is in
110+
# production as the versioning of JSON schemas is what supports safe schema evolution as it allows
111+
# ElasticGraph to identify which version of the JSON schema the publishing system was operating on
112+
# when it published an event.
113+
#
114+
# It can be useful to set it to `false` before your application is in production, as you do not want
115+
# to be forced to bump the version after every single schema change while you are building an initial
116+
# prototype.
117+
#
118+
# @param value [Boolean] whether to require `json_schema_version` to be incremented on changes that impact `json_schemas.yaml`
119+
# @return [void]
120+
# @see #json_schema_version
121+
#
122+
# @example Disable enforcement during initial prototyping
123+
# ElasticGraph.define_schema do |schema|
124+
# # TODO: remove this once we're past the prototyping stage
125+
# schema.enforce_json_schema_version false
126+
# end
127+
def enforce_json_schema_version(value)
128+
unless value == true || value == false
129+
raise Errors::SchemaError, "`enforce_json_schema_version` must be a boolean. Specified value: #{value.inspect}"
130+
end
131+
132+
json_ingestion_state.enforce_json_schema_version = value
133+
nil
134+
end
135+
136+
# Defines strictness of the JSON schema validation. By default, the JSON schema will require all fields to be provided by the
137+
# publisher (but they can be nullable) and will ignore extra fields that are not defined in the schema. Use this method to
138+
# configure this behavior.
139+
#
140+
# @param allow_omitted_fields [bool] Whether nullable fields can be omitted from indexing events.
141+
# @param allow_extra_fields [bool] Whether extra fields (e.g. beyond fields defined in the schema) can be included in indexing events.
142+
# @return [void]
143+
#
144+
# @note If you allow both omitted fields and extra fields, ElasticGraph's JSON schema validation will allow (and ignore) misspelled
145+
# field names in indexing events. For example, if the ElasticGraph schema has a nullable field named `parentId` but the publisher
146+
# accidentally provides it as `parent_id`, ElasticGraph would happily ignore the `parent_id` field entirely, because `parentId`
147+
# is allowed to be omitted and `parent_id` would be treated as an extra field. Therefore, we recommend that you only set one of
148+
# these to `true` (or none).
149+
def json_schema_strictness(allow_omitted_fields: false, allow_extra_fields: true)
150+
state = json_ingestion_state
151+
152+
unless [true, false].include?(allow_omitted_fields)
153+
raise Errors::SchemaError, "`allow_omitted_fields` must be true or false"
154+
end
155+
156+
unless [true, false].include?(allow_extra_fields)
157+
raise Errors::SchemaError, "`allow_extra_fields` must be true or false"
158+
end
159+
160+
state.allow_omitted_json_schema_fields = allow_omitted_fields
161+
state.allow_extra_json_schema_fields = allow_extra_fields
162+
nil
163+
end
164+
165+
private
166+
167+
# Returns the API's `state` narrowed to include this gem's `StateExtension`. Centralizes
168+
# the Steep cast that's needed because Steep can't see the `extend(StateExtension)` applied
169+
# at runtime in `extended`.
170+
def json_ingestion_state
171+
state # : ElasticGraph::SchemaDefinition::State & StateExtension
172+
end
173+
end
174+
end
175+
end
176+
end
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
# Copyright 2024 - 2026 Block, Inc.
2+
#
3+
# Use of this source code is governed by an MIT-style
4+
# license that can be found in the LICENSE file or at
5+
# https://opensource.org/licenses/MIT.
6+
#
7+
# frozen_string_literal: true
8+
9+
require "elastic_graph/json_ingestion/schema_definition/indexing/index_extension"
10+
require "elastic_graph/json_ingestion/schema_definition/results_extension"
11+
require "elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension"
12+
require "elastic_graph/json_ingestion/schema_definition/schema_elements/enum_type_extension"
13+
require "elastic_graph/json_ingestion/schema_definition/schema_elements/field_extension"
14+
require "elastic_graph/json_ingestion/schema_definition/schema_elements/object_interface_extension"
15+
require "elastic_graph/json_ingestion/schema_definition/schema_elements/scalar_type_extension"
16+
require "elastic_graph/json_ingestion/schema_definition/schema_elements/type_reference_extension"
17+
require "elastic_graph/json_ingestion/schema_definition/schema_elements/union_type_extension"
18+
19+
module ElasticGraph
20+
module JSONIngestion
21+
module SchemaDefinition
22+
# Extension module applied to `ElasticGraph::SchemaDefinition::Factory` to wire up
23+
# JSON Schema support on Results and SchemaArtifactManager instances.
24+
#
25+
# @api private
26+
module FactoryExtension
27+
# @private
28+
def new_enum_type(name)
29+
super(name) do |type|
30+
type.extend SchemaElements::EnumTypeExtension
31+
yield type if block_given?
32+
end
33+
end
34+
35+
# @private
36+
def new_field(**kwargs, &block)
37+
super(**kwargs) do |field|
38+
field.extend SchemaElements::FieldExtension
39+
block&.call(field)
40+
end
41+
end
42+
43+
# @private
44+
def new_index(name, settings, type, &block)
45+
super(name, settings, type) do |index|
46+
index.extend Indexing::IndexExtension
47+
index.require_id_in_json_schema
48+
block&.call(index)
49+
end
50+
end
51+
52+
# @private
53+
def new_interface_type(name)
54+
super(name) do |type|
55+
type.extend SchemaElements::ObjectInterfaceExtension
56+
yield type if block_given?
57+
end
58+
end
59+
60+
# @private
61+
def new_object_type(name)
62+
super(name) do |type|
63+
type.extend SchemaElements::ObjectInterfaceExtension
64+
yield type if block_given?
65+
end
66+
end
67+
68+
# @private
69+
def new_scalar_type(name)
70+
super(name) do |type|
71+
type.extend SchemaElements::ScalarTypeExtension
72+
yield type if block_given?
73+
type.validate_json_schema_configuration! unless state.initially_registered_built_in_types.empty?
74+
end
75+
end
76+
77+
# @private
78+
def new_type_reference(name)
79+
super(name).extend(SchemaElements::TypeReferenceExtension)
80+
end
81+
82+
# @private
83+
def new_union_type(name)
84+
super(name) do |type|
85+
type.extend SchemaElements::UnionTypeExtension
86+
yield type if block_given?
87+
end
88+
end
89+
90+
# Creates a new Results instance with JSON Schema extensions.
91+
#
92+
# @return [ElasticGraph::SchemaDefinition::Results] the created results instance
93+
def new_results
94+
super.extend(ResultsExtension)
95+
end
96+
97+
# Creates a new SchemaArtifactManager instance with JSON Schema extensions.
98+
#
99+
# @return [ElasticGraph::SchemaDefinition::SchemaArtifactManager] the created artifact manager
100+
def new_schema_artifact_manager(...)
101+
super.extend(SchemaArtifactManagerExtension)
102+
end
103+
end
104+
end
105+
end
106+
end

0 commit comments

Comments
 (0)