Skip to content

Commit 65c8468

Browse files
committed
Add JSON ingestion indexing extensions
1 parent 2561384 commit 65c8468

19 files changed

Lines changed: 480 additions & 98 deletions

File tree

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# Copyright 2024 - 2026 Block, Inc.
2+
#
3+
# Use of this source code is governed by an MIT-style
4+
# license that can be found in the LICENSE file or at
5+
# https://opensource.org/licenses/MIT.
6+
#
7+
# frozen_string_literal: true
8+
9+
require "elastic_graph/json_ingestion/schema_definition/indexing/field"
10+
11+
module ElasticGraph
12+
module JSONIngestion
13+
module SchemaDefinition
14+
module Indexing
15+
# Extends indexing field references with JSON schema state needed when resolving fields.
16+
#
17+
# @api private
18+
module FieldReferenceExtension
19+
def with_json_schema(json_schema_layers:, json_schema_customizations:)
20+
@json_schema_layers = json_schema_layers
21+
@json_schema_customizations = json_schema_customizations
22+
self
23+
end
24+
25+
def ==(other)
26+
other.is_a?(FieldReferenceExtension) &&
27+
field_reference_comparison_metadata == other.field_reference_comparison_metadata &&
28+
json_schema_comparison_metadata == other.json_schema_comparison_metadata
29+
end
30+
31+
def resolve
32+
return nil unless (resolved_field = super)
33+
34+
json_schema_field = resolved_field.extend(Indexing::FieldExtension) # : ElasticGraph::SchemaDefinition::Indexing::Field & FieldExtension
35+
json_schema_field.with_json_schema(
36+
json_schema_layers: @json_schema_layers,
37+
json_schema_customizations: @json_schema_customizations
38+
)
39+
end
40+
41+
def json_schema_comparison_metadata
42+
[@json_schema_layers, @json_schema_customizations]
43+
end
44+
45+
def field_reference_comparison_metadata
46+
[
47+
name,
48+
name_in_index,
49+
type,
50+
mapping_options,
51+
accuracy_confidence,
52+
source,
53+
runtime_field_script,
54+
doc_comment
55+
]
56+
end
57+
end
58+
end
59+
end
60+
end
61+
end
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# Copyright 2024 - 2026 Block, Inc.
2+
#
3+
# Use of this source code is governed by an MIT-style
4+
# license that can be found in the LICENSE file or at
5+
# https://opensource.org/licenses/MIT.
6+
#
7+
# frozen_string_literal: true
8+
9+
module ElasticGraph
10+
module JSONIngestion
11+
module SchemaDefinition
12+
module Indexing
13+
# Namespace for indexing-field-type extensions that contribute JSON schema generation behavior.
14+
module FieldType
15+
# Extends enum indexing field types with JSON schema serialization.
16+
#
17+
# @private
18+
module EnumExtension
19+
# @return [Hash<String, ::Object>] additional ElasticGraph metadata to put in the JSON schema for this enum type.
20+
def json_schema_field_metadata_by_field_name
21+
{}
22+
end
23+
24+
# @param customizations [Hash<String, ::Object>] JSON schema customizations
25+
# @return [Hash<String, ::Object>] formatted customizations.
26+
def format_field_json_schema_customizations(customizations)
27+
# Since an enum type already restricts the values to a small set of allowed values, we do not need to keep
28+
# other customizations (such as the `maxLength` field customization EG automatically applies to fields
29+
# indexed as a `keyword`--we don't allow enum values to exceed that length, anyway).
30+
#
31+
# It's desirable to restrict what customizations are applied because when a publisher uses the JSON schema
32+
# to generate code using a library such as https://github.com/pwall567/json-kotlin-schema-codegen, we found
33+
# that the presence of extra field customizations inhibits the library's ability to generate code in the way
34+
# we want (it causes the type of the enum to change since the JSON schema changes from a direct `$ref` to
35+
# being wrapped in an `allOf`).
36+
#
37+
# However, we still want to apply `enum` customizations--this allows a user to "narrow" the set of allowed
38+
# values for a field. For example, a `Currency` enum could contain every currency, and a user may want to
39+
# restrict a specific `currency` field to a subset of currencies (e.g. to just USD, CAD, and EUR).
40+
customizations.slice("enum")
41+
end
42+
43+
# @return [Hash<String, ::Object>] the JSON schema for this enum type.
44+
def to_json_schema
45+
{"type" => "string", "enum" => enum_value_names}
46+
end
47+
end
48+
end
49+
end
50+
end
51+
end
52+
end
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
# Copyright 2024 - 2026 Block, Inc.
2+
#
3+
# Use of this source code is governed by an MIT-style
4+
# license that can be found in the LICENSE file or at
5+
# https://opensource.org/licenses/MIT.
6+
#
7+
# frozen_string_literal: true
8+
9+
require "elastic_graph/support/hash_util"
10+
11+
module ElasticGraph
12+
module JSONIngestion
13+
module SchemaDefinition
14+
module Indexing
15+
module FieldType
16+
# Extends object/interface indexing field types with JSON schema serialization.
17+
#
18+
# @private
19+
module ObjectExtension
20+
def json_schema_options
21+
@json_schema_options ||= {}
22+
end
23+
24+
def with_json_schema_options(json_schema_options)
25+
@json_schema_options = json_schema_options
26+
self
27+
end
28+
29+
# @return [Hash<String, JSONSchemaFieldMetadata>] field metadata keyed by field name
30+
def json_schema_field_metadata_by_field_name
31+
subfields.to_h { |field| [field.name, field.json_schema_metadata] }
32+
end
33+
34+
# @param customizations [Hash<String, Object>] the customizations to format
35+
# @return [Hash<String, Object>] the formatted customizations
36+
def format_field_json_schema_customizations(customizations)
37+
customizations
38+
end
39+
40+
# @return [Hash<String, Object>] the JSON schema definition for this object type
41+
def to_json_schema
42+
@to_json_schema ||=
43+
if json_schema_options.empty?
44+
# Fields that are `sourced_from` an alternate type must not be included in this type's JSON schema,
45+
# since events of this type won't include them.
46+
other_source_subfields, json_schema_candidate_subfields = subfields.partition(&:source)
47+
validate_sourced_fields_have_no_json_schema_overrides(other_source_subfields)
48+
json_schema_subfields = json_schema_candidate_subfields.reject(&:runtime_field_script)
49+
required_fields = json_schema_subfields
50+
required_fields = required_fields.reject(&:nullable?) if schema_def_state.allow_omitted_json_schema_fields
51+
52+
{
53+
"type" => "object",
54+
"properties" => json_schema_subfields.to_h { |field| [field.name, field.json_schema] }.merge(json_schema_typename_field),
55+
# Note: `__typename` is intentionally not included in the `required` list. If `__typename` is present
56+
# we want it validated (as we do by merging in `json_schema_typename_field`) but we only want
57+
# to require it in the context of a union type. The union's JSON schema requires the field.
58+
"required" => required_fields.map(&:name).freeze,
59+
"additionalProperties" => (false unless schema_def_state.allow_extra_json_schema_fields),
60+
"description" => doc_comment
61+
}.compact.freeze
62+
else
63+
Support::HashUtil.stringify_keys(json_schema_options)
64+
end
65+
end
66+
67+
private
68+
69+
# Returns a `__typename` property which we use for union types.
70+
#
71+
# This must always be set to the name of the type (thus the const value).
72+
#
73+
# We also add a "default" value. This does not impact validation, but rather
74+
# aids tools like our Kotlin codegen to save publishers from having to set the
75+
# property explicitly when creating events.
76+
def json_schema_typename_field
77+
{
78+
"__typename" => {
79+
"type" => "string",
80+
"const" => type_name,
81+
"default" => type_name
82+
}
83+
}
84+
end
85+
86+
def validate_sourced_fields_have_no_json_schema_overrides(other_source_subfields)
87+
problem_fields = other_source_subfields.reject { |field| field.json_schema_customizations.empty? }
88+
return if problem_fields.empty?
89+
90+
field_descriptions = problem_fields.map(&:name).sort.map { |field| "`#{field}`" }.join(", ")
91+
raise Errors::SchemaError,
92+
"`#{type_name}` has #{problem_fields.size} field(s) (#{field_descriptions}) that are `sourced_from` " \
93+
"another type and also have JSON schema customizations. Instead, put the JSON schema " \
94+
"customizations on the source type's field definitions."
95+
end
96+
end
97+
end
98+
end
99+
end
100+
end
101+
end
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Copyright 2024 - 2026 Block, Inc.
2+
#
3+
# Use of this source code is governed by an MIT-style
4+
# license that can be found in the LICENSE file or at
5+
# https://opensource.org/licenses/MIT.
6+
#
7+
# frozen_string_literal: true
8+
9+
require "elastic_graph/support/hash_util"
10+
11+
module ElasticGraph
12+
module JSONIngestion
13+
module SchemaDefinition
14+
module Indexing
15+
module FieldType
16+
# Extends scalar indexing field types with JSON schema serialization.
17+
#
18+
# @private
19+
module ScalarExtension
20+
# @return [Hash] empty hash, as scalar types have no subfields
21+
def json_schema_field_metadata_by_field_name
22+
{}
23+
end
24+
25+
# @param customizations [Hash<String, Object>] the customizations to format
26+
# @return [Hash<String, Object>] the formatted customizations
27+
def format_field_json_schema_customizations(customizations)
28+
customizations
29+
end
30+
31+
# @return [Hash<String, Object>] the JSON schema definition for this scalar type
32+
def to_json_schema
33+
json_scalar_type = scalar_type # : ElasticGraph::SchemaDefinition::SchemaElements::ScalarType & SchemaElements::ScalarTypeExtension
34+
json_scalar_type.validate_json_schema_configuration!
35+
36+
Support::HashUtil.stringify_keys(json_scalar_type.json_schema_options)
37+
end
38+
end
39+
end
40+
end
41+
end
42+
end
43+
end
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# Copyright 2024 - 2026 Block, Inc.
2+
#
3+
# Use of this source code is governed by an MIT-style
4+
# license that can be found in the LICENSE file or at
5+
# https://opensource.org/licenses/MIT.
6+
#
7+
# frozen_string_literal: true
8+
9+
module ElasticGraph
10+
module JSONIngestion
11+
module SchemaDefinition
12+
module Indexing
13+
module FieldType
14+
# Extends union indexing field types with JSON schema serialization.
15+
#
16+
# @private
17+
module UnionExtension
18+
# @return [Hash] empty hash, as union types have no subfields
19+
def json_schema_field_metadata_by_field_name
20+
{}
21+
end
22+
23+
# @param customizations [Hash<String, Object>] the customizations to format
24+
# @return [Hash<String, Object>] the formatted customizations
25+
def format_field_json_schema_customizations(customizations)
26+
customizations
27+
end
28+
29+
# @return [Hash<String, Object>] the JSON schema definition for this union type
30+
def to_json_schema
31+
subtype_json_schemas = subtypes_by_name.keys.map { |name| {"$ref" => "#/$defs/#{name}"} }
32+
33+
# A union type can represent multiple subtypes, referenced by the "anyOf" clause below.
34+
# We also add a requirement for the presence of __typename to indicate which type
35+
# is being referenced (this property is pre-defined on the type itself as a constant).
36+
#
37+
# Note: Although both "oneOf" and "anyOf" keywords are valid for combining schemas
38+
# to form a union, and validate equivalently when no object can satisfy multiple of the
39+
# subschemas (which is the case here given the __typename requirements are mutually
40+
# exclusive), we chose to use "oneOf" here because it works better with this library:
41+
# https://github.com/pwall567/json-kotlin-schema-codegen
42+
{
43+
"required" => %w[__typename],
44+
"oneOf" => subtype_json_schemas
45+
}
46+
end
47+
end
48+
end
49+
end
50+
end
51+
end
52+
end
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
module ElasticGraph
2+
module JSONIngestion
3+
module SchemaDefinition
4+
module Indexing
5+
module FieldReferenceExtension: ::ElasticGraph::SchemaDefinition::Indexing::FieldReference
6+
@json_schema_layers: ::ElasticGraph::SchemaDefinition::jsonSchemaLayersArray
7+
@json_schema_customizations: ::Hash[::Symbol, untyped]
8+
9+
def with_json_schema: (
10+
json_schema_layers: ::ElasticGraph::SchemaDefinition::jsonSchemaLayersArray,
11+
json_schema_customizations: ::Hash[::Symbol, untyped]
12+
) -> (::ElasticGraph::SchemaDefinition::Indexing::FieldReference & FieldReferenceExtension)
13+
def ==: (untyped) -> bool
14+
def resolve: () -> (::ElasticGraph::SchemaDefinition::Indexing::Field & FieldExtension)?
15+
16+
def json_schema_comparison_metadata: () -> [
17+
::ElasticGraph::SchemaDefinition::jsonSchemaLayersArray,
18+
::Hash[::Symbol, untyped]
19+
]
20+
def field_reference_comparison_metadata: () -> [
21+
::String,
22+
::String,
23+
::ElasticGraph::SchemaDefinition::SchemaElements::TypeReference,
24+
::Hash[::Symbol, untyped],
25+
::ElasticGraph::SchemaDefinition::Indexing::Field::accuracyConfidence,
26+
::ElasticGraph::SchemaDefinition::SchemaElements::FieldSource?,
27+
::String?,
28+
::String?
29+
]
30+
end
31+
end
32+
end
33+
end
34+
end
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
module ElasticGraph
2+
module JSONIngestion
3+
module SchemaDefinition
4+
module Indexing
5+
interface _EnumFieldType
6+
def enum_value_names: () -> ::Array[::String]
7+
end
8+
9+
interface _JSONFieldType
10+
def to_mapping: () -> ::Hash[::String, untyped]
11+
def json_schema_field_metadata_by_field_name: () -> ::Hash[::String, JSONSchemaFieldMetadata]
12+
def format_field_json_schema_customizations: (::Hash[::String, untyped]) -> ::Hash[::String, untyped]
13+
def to_json_schema: () -> ::Hash[::String, untyped]
14+
end
15+
16+
interface _ObjectFieldType
17+
def schema_def_state: () -> ::ElasticGraph::SchemaDefinition::State
18+
def type_name: () -> ::String
19+
def subfields: () -> ::Array[::ElasticGraph::SchemaDefinition::Indexing::Field]
20+
def mapping_options: () -> ::ElasticGraph::SchemaDefinition::Mixins::HasTypeInfo::optionsHash
21+
def doc_comment: () -> ::String?
22+
end
23+
24+
interface _ScalarFieldType
25+
def scalar_type: () -> ::ElasticGraph::SchemaDefinition::SchemaElements::ScalarType
26+
end
27+
28+
interface _UnionFieldType
29+
def subtypes_by_name: () -> ::Hash[::String, ::Object]
30+
end
31+
end
32+
end
33+
end
34+
end
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
module ElasticGraph
2+
module JSONIngestion
3+
module SchemaDefinition
4+
module Indexing
5+
module FieldType
6+
module EnumExtension: _EnumFieldType
7+
def json_schema_field_metadata_by_field_name: () -> ::Hash[::String, JSONSchemaFieldMetadata]
8+
def format_field_json_schema_customizations: (::Hash[::String, untyped]) -> ::Hash[::String, untyped]
9+
def to_json_schema: () -> ::Hash[::String, untyped]
10+
end
11+
end
12+
end
13+
end
14+
end
15+
end

0 commit comments

Comments
 (0)