Skip to content

Commit bf0dd3b

Browse files
committed
Namespace JSON schema helpers under JSON ingestion
1 parent 8956645 commit bf0dd3b

18 files changed

Lines changed: 454 additions & 416 deletions

File tree

elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/event_envelope.rb

Lines changed: 69 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -9,76 +9,79 @@
99
require "elastic_graph/constants"
1010

1111
module ElasticGraph
12-
module SchemaDefinition
13-
module Indexing
14-
# Contains logic related to "event envelope"--the layer of metadata that wraps all indexing events.
15-
#
16-
# @api private
17-
module EventEnvelope
18-
# @param indexed_type_names [Array<String>] names of the indexed types
19-
# @param json_schema_version [Integer] the version of the JSON schema
20-
# @return [Hash<String, Object>] the JSON schema for the ElasticGraph event envelope for the given `indexed_type_names`.
21-
def self.json_schema(indexed_type_names, json_schema_version)
22-
{
23-
"type" => "object",
24-
"description" => "Required by ElasticGraph to wrap every data event.",
25-
"properties" => {
26-
"op" => {
27-
"description" => "Indicates what type of operation the event represents. For now, only `upsert` is supported, but we plan to support other operations in the future.",
28-
"type" => "string",
29-
"enum" => %w[upsert]
30-
},
31-
"type" => {
32-
"description" => "The type of object present in `record`.",
33-
"type" => "string",
34-
# Sorting doesn't really matter here, but it's nice for the output in the schema artifact to be consistent.
35-
"enum" => indexed_type_names.sort
36-
},
37-
"id" => {
38-
"description" => "The unique identifier of the record.",
39-
"type" => "string",
40-
"maxLength" => DEFAULT_MAX_KEYWORD_LENGTH
41-
},
42-
"version" => {
43-
"description" => 'Used to handle duplicate and out-of-order events. When ElasticGraph ingests multiple events for the same `type` and `id`, the one with the largest `version` will "win".',
44-
"type" => "integer",
45-
"minimum" => 0,
46-
"maximum" => (2**63) - 1
47-
},
48-
"record" => {
49-
"description" => "The record of this event. The payload of this field must match the JSON schema of the named `type`.",
50-
"type" => "object"
51-
},
52-
"latency_timestamps" => {
53-
"description" => "Timestamps from which ElasticGraph measures indexing latency. The `ElasticGraphIndexingLatencies` log message produced for each event will include a measurement from each timestamp included in this map.",
54-
"type" => "object",
55-
"additionalProperties" => false,
56-
"patternProperties" => {
57-
"^\\w+_at$" => {
58-
"description" => "A timestamp from which ElasticGraph will measure indexing latency. The timestamp name must end in `_at`.",
59-
"type" => "string",
60-
"format" => "date-time"
12+
module JSONIngestion
13+
module SchemaDefinition
14+
# Indexing support used while generating JSON ingestion schemas.
15+
module Indexing
16+
# Contains logic related to "event envelope"--the layer of metadata that wraps all indexing events.
17+
#
18+
# @api private
19+
module EventEnvelope
20+
# @param indexed_type_names [Array<String>] names of the indexed types
21+
# @param json_schema_version [Integer] the version of the JSON schema
22+
# @return [Hash<String, Object>] the JSON schema for the ElasticGraph event envelope for the given `indexed_type_names`.
23+
def self.json_schema(indexed_type_names, json_schema_version)
24+
{
25+
"type" => "object",
26+
"description" => "Required by ElasticGraph to wrap every data event.",
27+
"properties" => {
28+
"op" => {
29+
"description" => "Indicates what type of operation the event represents. For now, only `upsert` is supported, but we plan to support other operations in the future.",
30+
"type" => "string",
31+
"enum" => %w[upsert]
32+
},
33+
"type" => {
34+
"description" => "The type of object present in `record`.",
35+
"type" => "string",
36+
# Sorting doesn't really matter here, but it's nice for the output in the schema artifact to be consistent.
37+
"enum" => indexed_type_names.sort
38+
},
39+
"id" => {
40+
"description" => "The unique identifier of the record.",
41+
"type" => "string",
42+
"maxLength" => ElasticGraph::DEFAULT_MAX_KEYWORD_LENGTH
43+
},
44+
"version" => {
45+
"description" => 'Used to handle duplicate and out-of-order events. When ElasticGraph ingests multiple events for the same `type` and `id`, the one with the largest `version` will "win".',
46+
"type" => "integer",
47+
"minimum" => 0,
48+
"maximum" => (2**63) - 1
49+
},
50+
"record" => {
51+
"description" => "The record of this event. The payload of this field must match the JSON schema of the named `type`.",
52+
"type" => "object"
53+
},
54+
"latency_timestamps" => {
55+
"description" => "Timestamps from which ElasticGraph measures indexing latency. The `ElasticGraphIndexingLatencies` log message produced for each event will include a measurement from each timestamp included in this map.",
56+
"type" => "object",
57+
"additionalProperties" => false,
58+
"patternProperties" => {
59+
"^\\w+_at$" => {
60+
"description" => "A timestamp from which ElasticGraph will measure indexing latency. The timestamp name must end in `_at`.",
61+
"type" => "string",
62+
"format" => "date-time"
63+
}
6164
}
65+
},
66+
ElasticGraph::JSON_SCHEMA_VERSION_KEY => {
67+
"description" => "The version of the JSON schema the publisher was using when the event was published. ElasticGraph will use the JSON schema matching this version to process the event.",
68+
"const" => json_schema_version
69+
},
70+
"message_id" => {
71+
"description" => "The optional ID of the message containing this event from whatever messaging system is being used between the publisher and the ElasticGraph indexer.",
72+
"type" => "string"
6273
}
6374
},
64-
JSON_SCHEMA_VERSION_KEY => {
65-
"description" => "The version of the JSON schema the publisher was using when the event was published. ElasticGraph will use the JSON schema matching this version to process the event.",
66-
"const" => json_schema_version
75+
"additionalProperties" => false,
76+
"required" => ["op", "type", "id", "version", ElasticGraph::JSON_SCHEMA_VERSION_KEY],
77+
"if" => {
78+
"properties" => {
79+
"op" => {"const" => "upsert"}
80+
}
6781
},
68-
"message_id" => {
69-
"description" => "The optional ID of the message containing this event from whatever messaging system is being used between the publisher and the ElasticGraph indexer.",
70-
"type" => "string"
71-
}
72-
},
73-
"additionalProperties" => false,
74-
"required" => ["op", "type", "id", "version", JSON_SCHEMA_VERSION_KEY],
75-
"if" => {
76-
"properties" => {
77-
"op" => {"const" => "upsert"}
78-
}
79-
},
80-
"then" => {"required" => ["record"]}
81-
}
82+
"then" => {"required" => ["record"]}
83+
}
84+
end
8285
end
8386
end
8487
end

elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata.rb

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,27 +7,29 @@
77
# frozen_string_literal: true
88

99
module ElasticGraph
10-
module SchemaDefinition
11-
module Indexing
12-
# @!parse class JSONSchemaFieldMetadata; end
13-
JSONSchemaFieldMetadata = ::Data.define(:type, :name_in_index)
10+
module JSONIngestion
11+
module SchemaDefinition
12+
module Indexing
13+
# @!parse class JSONSchemaFieldMetadata; end
14+
JSONSchemaFieldMetadata = ::Data.define(:type, :name_in_index)
1415

15-
# Metadata about an ElasticGraph field that needs to be stored in our versioned JSON schemas
16-
# alongside the JSON schema fields.
17-
#
18-
# @!attribute [r] type
19-
# @return [String] name of the ElasticGraph type for this field
20-
# @!attribute [r] name_in_index
21-
# @return [String] name of the field in the index
22-
#
23-
# @api private
24-
class JSONSchemaFieldMetadata < ::Data
25-
# @return [Hash<String, String>] hash form of the metadata that can be dumped in JSON schema
26-
def to_dumpable_hash
27-
{"type" => type, "nameInIndex" => name_in_index}
28-
end
16+
# Metadata about an ElasticGraph field that needs to be stored in our versioned JSON schemas
17+
# alongside the JSON schema fields.
18+
#
19+
# @!attribute [r] type
20+
# @return [String] name of the ElasticGraph type for this field
21+
# @!attribute [r] name_in_index
22+
# @return [String] name of the field in the index
23+
#
24+
# @api private
25+
class JSONSchemaFieldMetadata < ::Data
26+
# @return [Hash<String, String>] hash form of the metadata that can be dumped in JSON schema
27+
def to_dumpable_hash
28+
{"type" => type, "nameInIndex" => name_in_index}
29+
end
2930

30-
# @dynamic initialize, type, name_in_index
31+
# @dynamic initialize, type, name_in_index
32+
end
3133
end
3234
end
3335
end

0 commit comments

Comments
 (0)