Skip to content

Commit 8ea9e97

Browse files
committed
Extract JSON ingestion schema support
1 parent edb2d90 commit 8ea9e97

95 files changed

Lines changed: 2022 additions & 1007 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Gemfile.lock

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ PATH
198198
elasticgraph-schema_definition (1.2.1.pre)
199199
elasticgraph-graphql (= 1.2.1.pre)
200200
elasticgraph-indexer (= 1.2.1.pre)
201+
elasticgraph-json_ingestion (= 1.2.1.pre)
201202
elasticgraph-schema_artifacts (= 1.2.1.pre)
202203
elasticgraph-support (= 1.2.1.pre)
203204
graphql (~> 2.6.2)

config/docker_demo/Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ COPY elasticgraph-datastore_core elasticgraph-datastore_core/
1616
COPY elasticgraph-graphiql elasticgraph-graphiql/
1717
COPY elasticgraph-graphql elasticgraph-graphql/
1818
COPY elasticgraph-indexer elasticgraph-indexer/
19+
COPY elasticgraph-json_ingestion elasticgraph-json_ingestion/
1920
COPY elasticgraph-local elasticgraph-local/
2021
COPY elasticgraph-opensearch elasticgraph-opensearch/
2122
COPY elasticgraph-query_registry elasticgraph-query_registry/

config/site/support/doctest_helper.rb

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
require "elastic_graph/apollo/schema_definition/api_extension"
1010
require "elastic_graph/schema_artifacts/runtime_metadata/schema_element_names"
1111
require "elastic_graph/schema_definition/api"
12+
require "elastic_graph/schema_definition/extension_module_support"
1213
require "elastic_graph/schema_definition/schema_artifact_manager"
1314
require "elastic_graph/warehouse/schema_definition/api_extension"
1415
require "rspec/mocks"
@@ -60,7 +61,7 @@ module ElasticGraph
6061
@api = SchemaDefinition::API.new(
6162
SchemaArtifacts::RuntimeMetadata::SchemaElementNames.new(form: :camelCase, overrides: {}),
6263
true,
63-
extension_modules: extension_modules
64+
extension_modules: SchemaDefinition::ExtensionModuleSupport.default_extension_modules + extension_modules
6465
)
6566

6667
# This is required in all schemas, but we don't want to have to put in all our examples,
@@ -95,7 +96,8 @@ module ElasticGraph
9596
ElasticGraph.define_schema do |schema|
9697
# `schema.json_schema_version` raises an error when the version is set more than once.
9798
# By default we set it above. Here we clear it to allow our example to set it.
98-
schema.state.json_schema_version = nil
99+
schema.state.ingestion_serializer_state.delete(:json_schema_version)
100+
schema.state.ingestion_serializer_state.delete(:json_schema_version_setter_location)
99101
end
100102
end
101103

elasticgraph-apollo/apollo_tests_implementation/Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ COPY elasticgraph-elasticsearch /web/elasticgraph-elasticsearch
1414
COPY elasticgraph-graphiql /web/elasticgraph-graphiql
1515
COPY elasticgraph-graphql /web/elasticgraph-graphql
1616
COPY elasticgraph-indexer /web/elasticgraph-indexer
17+
COPY elasticgraph-json_ingestion /web/elasticgraph-json_ingestion
1718
COPY elasticgraph-rack /web/elasticgraph-rack
1819
COPY elasticgraph-schema_artifacts /web/elasticgraph-schema_artifacts
1920
COPY elasticgraph-schema_definition /web/elasticgraph-schema_definition

elasticgraph-apollo/apollo_tests_implementation/Gemfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ source "https://rubygems.org"
1616
graphiql
1717
graphql
1818
indexer
19+
json_ingestion
1920
rack
2021
schema_artifacts
2122
schema_definition

elasticgraph-apollo/spec/unit/elastic_graph/apollo/apollo_directives_spec.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -552,7 +552,7 @@ def self.with_both_casing_forms(&block)
552552
end
553553

554554
def define_schema(&block)
555-
extension_modules = [SchemaDefinition::APIExtension]
555+
extension_modules = ::ElasticGraph::SchemaDefinition::ExtensionModuleSupport.default_extension_modules + [SchemaDefinition::APIExtension]
556556
super(schema_element_name_form: schema_element_name_form, extension_modules: extension_modules, &block)
557557
end
558558
end

elasticgraph-apollo/spec/unit/elastic_graph/apollo/schema_definition_spec.rb

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1431,7 +1431,9 @@ def expect_identifiable_type_tagging_of_token(&type_def_for)
14311431
end
14321432

14331433
def define_schema(with_apollo: true, &block)
1434-
extension_modules = with_apollo ? [SchemaDefinition::APIExtension] : []
1434+
# Always include the JSON ingestion default so `Results#json_schemas_for` is available in both modes.
1435+
extension_modules = ::ElasticGraph::SchemaDefinition::ExtensionModuleSupport.default_extension_modules
1436+
extension_modules += [SchemaDefinition::APIExtension] if with_apollo
14351437
super(schema_element_name_form: schema_element_name_form, extension_modules: extension_modules, &block)
14361438
end
14371439
end
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
# Copyright 2024 - 2026 Block, Inc.
2+
#
3+
# Use of this source code is governed by an MIT-style
4+
# license that can be found in the LICENSE file or at
5+
# https://opensource.org/licenses/MIT.
6+
#
7+
# frozen_string_literal: true
8+
9+
require "elastic_graph/constants"
10+
require "elastic_graph/json_ingestion/schema_definition/factory_extension"
11+
require "elastic_graph/json_ingestion/schema_definition/indexing/field_type/enum_extension"
12+
require "elastic_graph/json_ingestion/schema_definition/indexing/field_type/object_extension"
13+
require "elastic_graph/json_ingestion/schema_definition/indexing/field_type/scalar_extension"
14+
require "elastic_graph/json_ingestion/schema_definition/indexing/field_type/union_extension"
15+
require "elastic_graph/json_ingestion/schema_definition/state_extension"
16+
require "elastic_graph/graphql/scalar_coercion_adapters/valid_time_zones"
17+
require "elastic_graph/schema_definition/indexing/field_type/enum"
18+
require "elastic_graph/schema_definition/indexing/field_type/object"
19+
require "elastic_graph/schema_definition/indexing/field_type/scalar"
20+
require "elastic_graph/schema_definition/indexing/field_type/union"
21+
22+
module ElasticGraph
23+
module JSONIngestion
24+
# Namespace for all JSON Schema schema definition support.
25+
#
26+
# {SchemaDefinition::APIExtension} is the primary entry point and should be used as a schema definition extension module.
27+
module SchemaDefinition
28+
# Module designed to be extended onto an {ElasticGraph::SchemaDefinition::API} instance
29+
# to add JSON Schema ingestion serializer capabilities.
30+
module APIExtension
31+
# Default JSON schema options applied to ElasticGraph's built-in scalar types when this extension
32+
# is loaded. Keyed by the un-overridden type name; the lookup at runtime maps each key through
33+
# `type_name_overrides` so renamed built-ins still receive the right options.
34+
BUILT_IN_SCALAR_JSON_SCHEMA_OPTIONS_BY_NAME = {
35+
"Boolean" => {type: "boolean"},
36+
"Float" => {type: "number"},
37+
"ID" => {type: "string"},
38+
"Int" => {type: "integer", minimum: INT_MIN, maximum: INT_MAX},
39+
"String" => {type: "string"},
40+
"Cursor" => {type: "string"},
41+
"Date" => {type: "string", format: "date"},
42+
"DateTime" => {type: "string", format: "date-time"},
43+
"LocalTime" => {type: "string", pattern: VALID_LOCAL_TIME_JSON_SCHEMA_PATTERN},
44+
"TimeZone" => {type: "string", enum: GraphQL::ScalarCoercionAdapters::VALID_TIME_ZONES.to_a.freeze},
45+
"Untyped" => {type: ["array", "boolean", "integer", "number", "object", "string"].freeze},
46+
"JsonSafeLong" => {type: "integer", minimum: JSON_SAFE_LONG_MIN, maximum: JSON_SAFE_LONG_MAX},
47+
"LongString" => {type: "integer", minimum: LONG_STRING_MIN, maximum: LONG_STRING_MAX}
48+
}.freeze
49+
50+
# Wires up the factory extension when this module is extended onto an API instance.
51+
#
52+
# @param api [ElasticGraph::SchemaDefinition::API] the API instance to extend
53+
# @return [void]
54+
# @api private
55+
def self.extended(api)
56+
# Prepend our indexing-field-type extensions onto the core classes so they participate in
57+
# `to_json_schema` / `format_field_json_schema_customizations` / `json_schema_field_metadata_by_field_name`.
58+
# Guarded so re-extending an already-extended API instance is a no-op.
59+
::ElasticGraph::SchemaDefinition::Indexing::FieldType::Enum.prepend(Indexing::FieldType::EnumExtension) unless ::ElasticGraph::SchemaDefinition::Indexing::FieldType::Enum < Indexing::FieldType::EnumExtension
60+
::ElasticGraph::SchemaDefinition::Indexing::FieldType::Object.prepend(Indexing::FieldType::ObjectExtension) unless ::ElasticGraph::SchemaDefinition::Indexing::FieldType::Object < Indexing::FieldType::ObjectExtension
61+
::ElasticGraph::SchemaDefinition::Indexing::FieldType::Scalar.prepend(Indexing::FieldType::ScalarExtension) unless ::ElasticGraph::SchemaDefinition::Indexing::FieldType::Scalar < Indexing::FieldType::ScalarExtension
62+
::ElasticGraph::SchemaDefinition::Indexing::FieldType::Union.prepend(Indexing::FieldType::UnionExtension) unless ::ElasticGraph::SchemaDefinition::Indexing::FieldType::Union < Indexing::FieldType::UnionExtension
63+
64+
state = api.state.extend(StateExtension) # : ::ElasticGraph::SchemaDefinition::State & StateExtension
65+
state.reserved_type_names << EVENT_ENVELOPE_JSON_SCHEMA_NAME
66+
api.factory.extend FactoryExtension
67+
68+
# Build a lookup from final (post-`type_name_overrides`) names to JSON schema options. We can't
69+
# key directly on `type.name` because users may have overridden the names of built-in scalars
70+
# (e.g. `Cursor` → `PreCursor`); the keys in `BUILT_IN_SCALAR_JSON_SCHEMA_OPTIONS_BY_NAME` are
71+
# always the un-overridden names.
72+
options_by_final_name = BUILT_IN_SCALAR_JSON_SCHEMA_OPTIONS_BY_NAME.to_h do |name, options|
73+
[api.state.type_ref(name).to_final_form.name, options]
74+
end
75+
76+
api.on_built_in_types do |type|
77+
if (options = options_by_final_name[type.name])
78+
scalar_type = type # : ::ElasticGraph::SchemaDefinition::SchemaElements::ScalarType & SchemaElements::ScalarTypeExtension
79+
scalar_type.json_schema(**options)
80+
elsif type.name == api.state.type_ref("GeoLocation").to_final_form.name
81+
# @type var geo_location_type: ::ElasticGraph::SchemaDefinition::SchemaElements::TypeWithSubfields & SchemaElements::ObjectInterfaceExtension
82+
geo_location_type = _ = type
83+
names = api.state.schema_elements
84+
85+
# We use `nullable: false` because `GeoLocation` is indexed as a single `geo_point` field,
86+
# and therefore can't support a `latitude` without a `longitude` or vice-versa.
87+
latitude = geo_location_type.graphql_fields_by_name.fetch(names.latitude) # : ::ElasticGraph::SchemaDefinition::SchemaElements::Field & SchemaElements::FieldExtension
88+
longitude = geo_location_type.graphql_fields_by_name.fetch(names.longitude) # : ::ElasticGraph::SchemaDefinition::SchemaElements::Field & SchemaElements::FieldExtension
89+
latitude.json_schema minimum: -90, maximum: 90, nullable: false
90+
longitude.json_schema minimum: -180, maximum: 180, nullable: false
91+
end
92+
end
93+
end
94+
95+
# Defines the version number of the current JSON schema. Importantly, every time a change is made that impacts the JSON schema
96+
# artifact, the version number must be incremented to ensure that each different version of the JSON schema is identified by a unique
97+
# version number. The publisher will then include this version number in published events to identify the version of the schema it
98+
# was using. This avoids the need to deploy the publisher and ElasticGraph indexer at the same time to keep them in sync.
99+
#
100+
# @note While this is an important part of how ElasticGraph is designed to support schema evolution, it can be annoying constantly
101+
# have to increment this while rapidly changing the schema during prototyping. You can disable the requirement to increment this
102+
# on every JSON schema change by setting `enforce_json_schema_version` to `false` in your `Rakefile`.
103+
#
104+
# @param version [Integer] current version number of the JSON schema artifact
105+
# @return [void]
106+
# @see Local::RakeTasks#enforce_json_schema_version
107+
def json_schema_version(version)
108+
state = json_ingestion_state
109+
110+
if !version.is_a?(Integer) || version < 1
111+
raise Errors::SchemaError, "`json_schema_version` must be a positive integer. Specified version: #{version}"
112+
end
113+
114+
if state.json_schema_version
115+
raise Errors::SchemaError, "`json_schema_version` can only be set once on a schema. Previously-set version: #{state.json_schema_version}"
116+
end
117+
118+
state.json_schema_version = version
119+
state.json_schema_version_setter_location = caller_locations(1, 1).to_a.first
120+
nil
121+
end
122+
123+
# Defines strictness of the JSON schema validation. By default, the JSON schema will require all fields to be provided by the
124+
# publisher (but they can be nullable) and will ignore extra fields that are not defined in the schema. Use this method to
125+
# configure this behavior.
126+
#
127+
# @param allow_omitted_fields [bool] Whether nullable fields can be omitted from indexing events.
128+
# @param allow_extra_fields [bool] Whether extra fields (e.g. beyond fields defined in the schema) can be included in indexing events.
129+
# @return [void]
130+
#
131+
# @note If you allow both omitted fields and extra fields, ElasticGraph's JSON schema validation will allow (and ignore) misspelled
132+
# field names in indexing events. For example, if the ElasticGraph schema has a nullable field named `parentId` but the publisher
133+
# accidentally provides it as `parent_id`, ElasticGraph would happily ignore the `parent_id` field entirely, because `parentId`
134+
# is allowed to be omitted and `parent_id` would be treated as an extra field. Therefore, we recommend that you only set one of
135+
# these to `true` (or none).
136+
def json_schema_strictness(allow_omitted_fields: false, allow_extra_fields: true)
137+
state = json_ingestion_state
138+
139+
unless [true, false].include?(allow_omitted_fields)
140+
raise Errors::SchemaError, "`allow_omitted_fields` must be true or false"
141+
end
142+
143+
unless [true, false].include?(allow_extra_fields)
144+
raise Errors::SchemaError, "`allow_extra_fields` must be true or false"
145+
end
146+
147+
state.allow_omitted_json_schema_fields = allow_omitted_fields
148+
state.allow_extra_json_schema_fields = allow_extra_fields
149+
nil
150+
end
151+
152+
private
153+
154+
# Returns the API's `state` narrowed to include this gem's `StateExtension`. Centralizes
155+
# the Steep cast that's needed because Steep can't see the `extend(StateExtension)` applied
156+
# at runtime in `extended`.
157+
def json_ingestion_state
158+
state # : ::ElasticGraph::SchemaDefinition::State & StateExtension
159+
end
160+
end
161+
end
162+
end
163+
end
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
# Copyright 2024 - 2026 Block, Inc.
2+
#
3+
# Use of this source code is governed by an MIT-style
4+
# license that can be found in the LICENSE file or at
5+
# https://opensource.org/licenses/MIT.
6+
#
7+
# frozen_string_literal: true
8+
9+
require "elastic_graph/json_ingestion/schema_definition/indexing/index_extension"
10+
require "elastic_graph/json_ingestion/schema_definition/results_extension"
11+
require "elastic_graph/json_ingestion/schema_definition/schema_artifact_manager_extension"
12+
require "elastic_graph/json_ingestion/schema_definition/schema_elements/enum_type_extension"
13+
require "elastic_graph/json_ingestion/schema_definition/schema_elements/field_extension"
14+
require "elastic_graph/json_ingestion/schema_definition/schema_elements/object_interface_extension"
15+
require "elastic_graph/json_ingestion/schema_definition/schema_elements/scalar_type_extension"
16+
require "elastic_graph/json_ingestion/schema_definition/schema_elements/type_reference_extension"
17+
18+
module ElasticGraph
19+
module JSONIngestion
20+
module SchemaDefinition
21+
# Extension module applied to `ElasticGraph::SchemaDefinition::Factory` to wire up
22+
# JSON Schema support on Results and SchemaArtifactManager instances.
23+
#
24+
# @api private
25+
module FactoryExtension
26+
# @private
27+
def new_enum_type(name)
28+
super(name) do |type|
29+
type.extend SchemaElements::EnumTypeExtension
30+
yield type if block_given?
31+
end
32+
end
33+
34+
# @private
35+
def new_field(**kwargs, &block)
36+
super(**kwargs) do |field|
37+
field.extend SchemaElements::FieldExtension
38+
block&.call(field)
39+
end
40+
end
41+
42+
# @private
43+
def new_index(name, settings, type, &block)
44+
super(name, settings, type) do |index|
45+
index.extend Indexing::IndexExtension
46+
index.require_id_in_json_schema
47+
block&.call(index)
48+
end
49+
end
50+
51+
# @private
52+
def new_interface_type(name)
53+
super(name) do |type|
54+
type.extend SchemaElements::ObjectInterfaceExtension
55+
yield type if block_given?
56+
end
57+
end
58+
59+
# @private
60+
def new_object_type(name)
61+
super(name) do |type|
62+
type.extend SchemaElements::ObjectInterfaceExtension
63+
yield type if block_given?
64+
end
65+
end
66+
67+
# @private
68+
def new_scalar_type(name)
69+
super(name) do |type|
70+
type.extend SchemaElements::ScalarTypeExtension
71+
yield type if block_given?
72+
type.validate_json_schema_configuration! unless state.initially_registered_built_in_types.empty?
73+
end
74+
end
75+
76+
# @private
77+
def new_type_reference(name)
78+
super(name).extend(SchemaElements::TypeReferenceExtension)
79+
end
80+
81+
# Creates a new Results instance with JSON Schema extensions.
82+
#
83+
# @return [ElasticGraph::SchemaDefinition::Results] the created results instance
84+
def new_results
85+
super.extend(ResultsExtension)
86+
end
87+
88+
# Creates a new SchemaArtifactManager instance with JSON Schema extensions.
89+
#
90+
# @return [ElasticGraph::SchemaDefinition::SchemaArtifactManager] the created artifact manager
91+
def new_schema_artifact_manager(...)
92+
super.extend(SchemaArtifactManagerExtension)
93+
end
94+
end
95+
end
96+
end
97+
end

0 commit comments

Comments
 (0)