Skip to content

Commit 7bc0045

Browse files
committed
Namespace JSON schema helpers under JSON ingestion
1 parent 461ba24 commit 7bc0045

18 files changed

Lines changed: 91 additions & 101 deletions

File tree

elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/event_envelope.rb

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,20 @@
77
# frozen_string_literal: true
88

99
require "elastic_graph/constants"
10+
require "elastic_graph/json_ingestion"
1011

11-
module ElasticGraph
12+
module ElasticGraph::JSONIngestion
1213
module SchemaDefinition
1314
module Indexing
14-
# Contains logic related to "event envelope"--the layer of metadata that wraps all indexing events.
15+
# Contains logic related to the JSON schema for ElasticGraph's event envelope.
1516
#
1617
# @api private
1718
module EventEnvelope
18-
# @param indexed_type_names [Array<String>] names of the indexed types
19-
# @param json_schema_version [Integer] the version of the JSON schema
20-
# @return [Hash<String, Object>] the JSON schema for the ElasticGraph event envelope for the given `indexed_type_names`.
19+
# Builds the JSON schema definition for ElasticGraph's event envelope.
20+
#
21+
# @param indexed_type_names [Array<String>] names of indexed types
22+
# @param json_schema_version [Integer] the JSON schema version number
23+
# @return [Hash<String, Object>] the event envelope JSON schema
2124
def self.json_schema(indexed_type_names, json_schema_version)
2225
{
2326
"type" => "object",
@@ -31,13 +34,12 @@ def self.json_schema(indexed_type_names, json_schema_version)
3134
"type" => {
3235
"description" => "The type of object present in `record`.",
3336
"type" => "string",
34-
# Sorting doesn't really matter here, but it's nice for the output in the schema artifact to be consistent.
3537
"enum" => indexed_type_names.sort
3638
},
3739
"id" => {
3840
"description" => "The unique identifier of the record.",
3941
"type" => "string",
40-
"maxLength" => DEFAULT_MAX_KEYWORD_LENGTH
42+
"maxLength" => ::ElasticGraph::DEFAULT_MAX_KEYWORD_LENGTH
4143
},
4244
"version" => {
4345
"description" => 'Used to handle duplicate and out-of-order events. When ElasticGraph ingests multiple events for the same `type` and `id`, the one with the largest `version` will "win".',
@@ -61,7 +63,7 @@ def self.json_schema(indexed_type_names, json_schema_version)
6163
}
6264
}
6365
},
64-
JSON_SCHEMA_VERSION_KEY => {
66+
::ElasticGraph::JSON_SCHEMA_VERSION_KEY => {
6567
"description" => "The version of the JSON schema the publisher was using when the event was published. ElasticGraph will use the JSON schema matching this version to process the event.",
6668
"const" => json_schema_version
6769
},
@@ -71,7 +73,7 @@ def self.json_schema(indexed_type_names, json_schema_version)
7173
}
7274
},
7375
"additionalProperties" => false,
74-
"required" => ["op", "type", "id", "version", JSON_SCHEMA_VERSION_KEY],
76+
"required" => ["op", "type", "id", "version", ::ElasticGraph::JSON_SCHEMA_VERSION_KEY],
7577
"if" => {
7678
"properties" => {
7779
"op" => {"const" => "upsert"}

elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata.rb

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,23 +6,17 @@
66
#
77
# frozen_string_literal: true
88

9-
module ElasticGraph
9+
module ElasticGraph::JSONIngestion
1010
module SchemaDefinition
1111
module Indexing
1212
# @!parse class JSONSchemaFieldMetadata; end
1313
JSONSchemaFieldMetadata = ::Data.define(:type, :name_in_index)
1414

15-
# Metadata about an ElasticGraph field that needs to be stored in our versioned JSON schemas
16-
# alongside the JSON schema fields.
17-
#
18-
# @!attribute [r] type
19-
# @return [String] name of the ElasticGraph type for this field
20-
# @!attribute [r] name_in_index
21-
# @return [String] name of the field in the index
15+
# Metadata about an ElasticGraph field that needs to be stored in our versioned JSON schemas.
2216
#
2317
# @api private
2418
class JSONSchemaFieldMetadata < ::Data
25-
# @return [Hash<String, String>] hash form of the metadata that can be dumped in JSON schema
19+
# @return [Hash<String, String>] hash representation suitable for serialization
2620
def to_dumpable_hash
2721
{"type" => type, "nameInIndex" => name_in_index}
2822
end

elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata.rb

Lines changed: 15 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -7,29 +7,23 @@
77
# frozen_string_literal: true
88

99
require "elastic_graph/constants"
10+
require "elastic_graph/json_ingestion"
1011

11-
module ElasticGraph
12+
module ElasticGraph::JSONIngestion
1213
module SchemaDefinition
1314
module Indexing
14-
# Represents the result of merging a JSON schema with metadata. The result includes both
15-
# the merged JSON schema and a list of `failed_fields` indicating which fields metadata
16-
# could not be determined for.
15+
# Represents the result of merging a JSON schema with ElasticGraph metadata.
1716
#
1817
# @private
1918
class JSONSchemaWithMetadata < ::Data.define(
20-
# The JSON schema.
2119
:json_schema,
22-
# A set of fields (in the form `Type.field`) that were needed but not found.
2320
:missing_fields,
24-
# A set of type names that were needed but not found.
2521
:missing_types,
26-
# A set of `DeprecatedElement` objects that create conflicting definitions.
2722
:definition_conflicts,
28-
# A set of fields that have been deleted but that must be retained (e.g. for custom shard routing or rollover)
2923
:missing_necessary_fields
3024
)
3125
def json_schema_version
32-
json_schema.fetch(JSON_SCHEMA_VERSION_KEY)
26+
json_schema.fetch(::ElasticGraph::JSON_SCHEMA_VERSION_KEY)
3327
end
3428

3529
# Responsible for building `JSONSchemaWithMetadata` instances.
@@ -60,10 +54,10 @@ def merge_metadata_into(json_schema)
6054
missing_fields = ::Set.new
6155
missing_types = ::Set.new
6256
definition_conflicts = ::Set.new
63-
old_type_name_by_current_name = {} # : ::Hash[String, String]
57+
old_type_name_by_current_name = {} # : ::Hash[::String, ::String]
6458

6559
defs = json_schema.fetch("$defs").to_h do |type_name, type_def|
66-
if type_name != EVENT_ENVELOPE_JSON_SCHEMA_NAME && (properties = type_def["properties"])
60+
if type_name != ::ElasticGraph::EVENT_ENVELOPE_JSON_SCHEMA_NAME && (properties = type_def["properties"])
6761
current_type_name = determine_current_type_name(
6862
type_name,
6963
missing_types: missing_types,
@@ -110,7 +104,6 @@ def merge_metadata_into(json_schema)
110104

111105
private
112106

113-
# Given a historical `type_name`, determines (and returns) the current name for that type.
114107
def determine_current_type_name(type_name, missing_types:, definition_conflicts:)
115108
exists_currently = @field_metadata_by_type_and_field_name.key?(type_name)
116109
deleted = @deleted_types_by_old_name[type_name]&.tap { |elem| @unused_deprecated_elements.delete(elem) }
@@ -128,7 +121,6 @@ def determine_current_type_name(type_name, missing_types:, definition_conflicts:
128121
nil
129122
end
130123

131-
# Given a historical `type_name` and `field_name` determines (and returns) the field metadata for it.
132124
def field_metadata_for(type_name, field_name, missing_fields:, definition_conflicts:)
133125
full_name = "#{type_name}.#{field_name}"
134126

@@ -154,27 +146,21 @@ def field_metadata_for(type_name, field_name, missing_fields:, definition_confli
154146

155147
def identify_missing_necessary_fields(json_schema, old_type_name_by_current_name)
156148
json_schema_resolver = JSONSchemaResolver.new(@state, json_schema, old_type_name_by_current_name)
157-
version = json_schema.fetch(JSON_SCHEMA_VERSION_KEY)
158149

159150
@state.object_types_by_name.values
160151
.select { |type| type.has_own_index_def? && !@derived_indexing_type_names.include?(type.name) }
161152
.flat_map do |object_type|
162-
identify_missing_necessary_fields_for_index_def(
163-
object_type,
164-
object_type.own_index_def, # : Indexing::Index
165-
json_schema_resolver, version
166-
)
153+
index_def = object_type.own_index_def # : ::ElasticGraph::SchemaDefinition::Indexing::Index
154+
identify_missing_necessary_fields_for_index_def(object_type, index_def, json_schema_resolver)
167155
end
168156
end
169157

170-
def identify_missing_necessary_fields_for_index_def(object_type, index_def, json_schema_resolver, json_schema_version)
158+
def identify_missing_necessary_fields_for_index_def(object_type, index_def, json_schema_resolver)
171159
{
172160
"routing" => index_def.routing_field_path,
173161
"rollover" => index_def.rollover_config&.timestamp_field_path
174162
}.compact.filter_map do |field_type, field_path|
175163
if json_schema_resolver.necessary_path_missing?(field_path)
176-
# The JSON schema v # {json_schema_version} artifact has no field that maps to the #{field_type} path of `#{field_path.fully_qualified_path_in_index}`.
177-
178164
MissingNecessaryField.new(
179165
field_type: field_type,
180166
fully_qualified_path: field_path.fully_qualified_path_in_index
@@ -198,12 +184,6 @@ def initialize(state, json_schema, old_type_name_by_current_name)
198184
end
199185
end
200186

201-
# Indicates if the given `field_path` is (1) necessary and (2) missing from the JSON schema, indicating a problem.
202-
#
203-
# - Returns `false` is the given `field_path` is present in the JSON schema.
204-
# - Returns `false` is the parent type of `field_path` has not been retained in this JSON schema version
205-
# (in that case, the field path is not necessary).
206-
# - Otherwise, returns `true` since the field path is both necessary and missing.
207187
def necessary_path_missing?(field_path)
208188
parent_type = field_path.first_part.parent_type.name
209189

@@ -227,7 +207,13 @@ def necessary_path_part_missing?(parent_type, name_in_index)
227207
end
228208
end
229209

210+
# @!parse class MissingNecessaryField < ::Data; end
230211
MissingNecessaryField = ::Data.define(:field_type, :fully_qualified_path)
212+
213+
# @private
214+
class MissingNecessaryField < ::Data
215+
# @dynamic initialize, with, field_type, fully_qualified_path
216+
end
231217
end
232218
end
233219
end

elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/json_schema_pruner.rb

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,30 +7,29 @@
77
# frozen_string_literal: true
88

99
require "elastic_graph/constants"
10+
require "elastic_graph/json_ingestion"
1011

11-
module ElasticGraph
12+
module ElasticGraph::JSONIngestion
1213
module SchemaDefinition
1314
# Prunes unused type definitions from a given JSON schema.
1415
#
1516
# @private
1617
class JSONSchemaPruner
1718
def self.prune(original_json_schema)
18-
initial_type_names = [EVENT_ENVELOPE_JSON_SCHEMA_NAME] + original_json_schema
19-
.dig("$defs", EVENT_ENVELOPE_JSON_SCHEMA_NAME, "properties", "type", "enum")
19+
initial_type_names = [::ElasticGraph::EVENT_ENVELOPE_JSON_SCHEMA_NAME] + original_json_schema
20+
.dig("$defs", ::ElasticGraph::EVENT_ENVELOPE_JSON_SCHEMA_NAME, "properties", "type", "enum")
2021

2122
types_to_keep = referenced_type_names(initial_type_names, original_json_schema["$defs"])
2223

23-
# The .select will preserve the sort order of the original hash
24-
# standard:disable Style/HashSlice -- https://github.com/soutaro/steep/issues/1503
25-
pruned_defs = original_json_schema["$defs"].select { |k, _v| types_to_keep.include?(k) }
24+
# The .select will preserve the sort order of the original hash.
25+
# standard:disable Style/HashSlice -- We intentionally preserve the dumped definition order.
26+
pruned_defs = original_json_schema["$defs"].select { |type_name, _type_def| types_to_keep.include?(type_name) }
2627
# standard:enable Style/HashSlice
2728

2829
original_json_schema.merge("$defs" => pruned_defs)
2930
end
3031

31-
# Returns a list of type names indicating all types referenced from any type in source_type_names.
32-
private_class_method
33-
def self.referenced_type_names(source_type_names, original_defs)
32+
private_class_method def self.referenced_type_names(source_type_names, original_defs)
3433
return Set.new if source_type_names.empty?
3534

3635
referenced_type_defs = original_defs.slice(*source_type_names)
@@ -39,8 +38,7 @@ def self.referenced_type_names(source_type_names, original_defs)
3938
referenced_type_names(ref_names, original_defs) + source_type_names
4039
end
4140

42-
private_class_method
43-
def self.collect_ref_names(hash)
41+
private_class_method def self.collect_ref_names(hash)
4442
hash.flat_map do |key, value|
4543
case value
4644
when ::Hash

elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/event_envelope.rbs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
module ElasticGraph
1+
module ElasticGraph::JSONIngestion
22
module SchemaDefinition
33
module Indexing
44
module EventEnvelope

elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata.rbs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
module ElasticGraph
1+
module ElasticGraph::JSONIngestion
22
module SchemaDefinition
33
module Indexing
44
class JSONSchemaFieldMetadata

elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_with_metadata.rbs

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,26 @@
1-
module ElasticGraph
1+
module ElasticGraph::JSONIngestion
22
module SchemaDefinition
33
module Indexing
44
class JSONSchemaWithMetadataSupertype
55
attr_reader json_schema: ::Hash[::String, untyped]
66
attr_reader missing_fields: ::Set[::String]
77
attr_reader missing_types: ::Set[::String]
8-
attr_reader definition_conflicts: ::Set[SchemaElements::DeprecatedElement]
8+
attr_reader definition_conflicts: ::Set[::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement]
99
attr_reader missing_necessary_fields: ::Array[JSONSchemaWithMetadata::MissingNecessaryField]
1010

1111
def initialize: (
1212
json_schema: ::Hash[::String, untyped],
1313
missing_fields: ::Set[::String],
1414
missing_types: ::Set[::String],
15-
definition_conflicts: ::Set[SchemaElements::DeprecatedElement],
15+
definition_conflicts: ::Set[::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement],
1616
missing_necessary_fields: ::Array[JSONSchemaWithMetadata::MissingNecessaryField]
1717
) -> void
1818

1919
def with: (
2020
?json_schema: ::Hash[::String, untyped],
2121
?missing_fields: ::Set[::String],
2222
?missing_types: ::Set[::String],
23-
?definition_conflicts: ::Set[SchemaElements::DeprecatedElement],
23+
?definition_conflicts: ::Set[::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement],
2424
?missing_necessary_fields: ::Array[JSONSchemaWithMetadata::MissingNecessaryField]
2525
) -> instance
2626
end
@@ -30,31 +30,31 @@ module ElasticGraph
3030

3131
class Merger
3232
@field_metadata_by_type_and_field_name: ::Hash[::String, ::Hash[::String, JSONSchemaFieldMetadata]]
33-
@renamed_types_by_old_name: ::Hash[::String, SchemaElements::DeprecatedElement]
34-
@deleted_types_by_old_name: ::Hash[::String, SchemaElements::DeprecatedElement]
35-
@renamed_fields_by_type_name_and_old_field_name: ::Hash[::String, ::Hash[::String, SchemaElements::DeprecatedElement]]
36-
@deleted_fields_by_type_name_and_old_field_name: ::Hash[::String, ::Hash[::String, SchemaElements::DeprecatedElement]]
37-
@state: State
33+
@renamed_types_by_old_name: ::Hash[::String, ::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement]
34+
@deleted_types_by_old_name: ::Hash[::String, ::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement]
35+
@renamed_fields_by_type_name_and_old_field_name: ::Hash[::String, ::Hash[::String, ::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement]]
36+
@deleted_fields_by_type_name_and_old_field_name: ::Hash[::String, ::Hash[::String, ::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement]]
37+
@state: ::ElasticGraph::SchemaDefinition::State
3838
@derived_indexing_type_names: ::Set[::String]
3939

40-
attr_reader unused_deprecated_elements: ::Set[SchemaElements::DeprecatedElement]
40+
attr_reader unused_deprecated_elements: ::Set[::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement]
4141

42-
def initialize: (Results) -> void
42+
def initialize: (::ElasticGraph::SchemaDefinition::Results) -> void
4343
def merge_metadata_into: (::Hash[::String, untyped]) -> JSONSchemaWithMetadata
4444

4545
private
4646

4747
def determine_current_type_name: (
4848
::String,
4949
missing_types: ::Set[::String],
50-
definition_conflicts: ::Set[SchemaElements::DeprecatedElement]
50+
definition_conflicts: ::Set[::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement]
5151
) -> ::String?
5252

5353
def field_metadata_for: (
5454
::String,
5555
::String,
5656
missing_fields: ::Set[::String],
57-
definition_conflicts: ::Set[SchemaElements::DeprecatedElement]
57+
definition_conflicts: ::Set[::ElasticGraph::SchemaDefinition::SchemaElements::DeprecatedElement]
5858
) -> JSONSchemaFieldMetadata?
5959

6060
def identify_missing_necessary_fields: (
@@ -63,19 +63,18 @@ module ElasticGraph
6363
) -> ::Array[MissingNecessaryField]
6464

6565
def identify_missing_necessary_fields_for_index_def: (
66-
indexableType,
67-
Index,
68-
JSONSchemaResolver,
69-
::Integer
66+
::ElasticGraph::SchemaDefinition::indexableType,
67+
::ElasticGraph::SchemaDefinition::Indexing::Index,
68+
JSONSchemaResolver
7069
) -> ::Array[MissingNecessaryField]
7170

7271
class JSONSchemaResolver
73-
@state: State
72+
@state: ::ElasticGraph::SchemaDefinition::State
7473
@old_type_name_by_current_name: ::Hash[::String, ::String]
7574
@meta_by_old_type_and_name_in_index: ::Hash[::String, ::Hash[::String, ::Hash[::String, untyped]]]
7675

77-
def initialize: (State, ::Hash[::String, untyped], ::Hash[::String, ::String]) -> void
78-
def necessary_path_missing?: (SchemaElements::FieldPath) -> bool
76+
def initialize: (::ElasticGraph::SchemaDefinition::State, ::Hash[::String, untyped], ::Hash[::String, ::String]) -> void
77+
def necessary_path_missing?: (::ElasticGraph::SchemaDefinition::SchemaElements::FieldPath) -> bool
7978

8079
private
8180

@@ -91,6 +90,11 @@ module ElasticGraph
9190
field_type: ::String,
9291
fully_qualified_path: ::String
9392
) -> void
93+
94+
def with: (
95+
?field_type: ::String,
96+
?fully_qualified_path: ::String
97+
) -> MissingNecessaryField
9498
end
9599
end
96100
end

elasticgraph-json_ingestion/sig/elastic_graph/json_ingestion/schema_definition/json_schema_pruner.rbs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1-
module ElasticGraph
1+
module ElasticGraph::JSONIngestion
22
module SchemaDefinition
33
class JSONSchemaPruner
44
def self.prune: (::Hash[::String, untyped]) -> ::Hash[::String, untyped]
5+
6+
private
7+
58
def self.referenced_type_names: (::Array[::String], ::Hash[::String, untyped]) -> ::Set[::String]
69
def self.collect_ref_names: (::Hash[::String, untyped]) -> ::Array[::String]
710
end

elasticgraph-schema_definition/lib/elastic_graph/schema_definition/indexing/field.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def json_schema
7777

7878
# @return [JSONSchemaFieldMetadata] additional ElasticGraph metadata to be stored in the JSON schema for this field.
7979
def json_schema_metadata
80-
JSONSchemaFieldMetadata.new(type: type.name, name_in_index: name_in_index)
80+
::ElasticGraph::JSONIngestion::SchemaDefinition::Indexing::JSONSchemaFieldMetadata.new(type: type.name, name_in_index: name_in_index)
8181
end
8282

8383
# Builds a hash containing the mapping for the provided fields, normalizing it in the same way that the

0 commit comments

Comments
 (0)