Skip to content

Commit eb21230

Browse files
committed
Namespace JSON schema helpers under JSON ingestion
1 parent 7d4becc commit eb21230

18 files changed

Lines changed: 457 additions & 416 deletions

File tree

elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/event_envelope.rb

Lines changed: 70 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -7,78 +7,82 @@
77
# frozen_string_literal: true
88

99
require "elastic_graph/constants"
10+
require "elastic_graph/json_ingestion"
1011

1112
module ElasticGraph
12-
module SchemaDefinition
13-
module Indexing
14-
# Contains logic related to "event envelope"--the layer of metadata that wraps all indexing events.
15-
#
16-
# @api private
17-
module EventEnvelope
18-
# @param indexed_type_names [Array<String>] names of the indexed types
19-
# @param json_schema_version [Integer] the version of the JSON schema
20-
# @return [Hash<String, Object>] the JSON schema for the ElasticGraph event envelope for the given `indexed_type_names`.
21-
def self.json_schema(indexed_type_names, json_schema_version)
22-
{
23-
"type" => "object",
24-
"description" => "Required by ElasticGraph to wrap every data event.",
25-
"properties" => {
26-
"op" => {
27-
"description" => "Indicates what type of operation the event represents. For now, only `upsert` is supported, but we plan to support other operations in the future.",
28-
"type" => "string",
29-
"enum" => %w[upsert]
30-
},
31-
"type" => {
32-
"description" => "The type of object present in `record`.",
33-
"type" => "string",
34-
# Sorting doesn't really matter here, but it's nice for the output in the schema artifact to be consistent.
35-
"enum" => indexed_type_names.sort
36-
},
37-
"id" => {
38-
"description" => "The unique identifier of the record.",
39-
"type" => "string",
40-
"maxLength" => DEFAULT_MAX_KEYWORD_LENGTH
41-
},
42-
"version" => {
43-
"description" => 'Used to handle duplicate and out-of-order events. When ElasticGraph ingests multiple events for the same `type` and `id`, the one with the largest `version` will "win".',
44-
"type" => "integer",
45-
"minimum" => 0,
46-
"maximum" => (2**63) - 1
47-
},
48-
"record" => {
49-
"description" => "The record of this event. The payload of this field must match the JSON schema of the named `type`.",
50-
"type" => "object"
51-
},
52-
"latency_timestamps" => {
53-
"description" => "Timestamps from which ElasticGraph measures indexing latency. The `ElasticGraphIndexingLatencies` log message produced for each event will include a measurement from each timestamp included in this map.",
54-
"type" => "object",
55-
"additionalProperties" => false,
56-
"patternProperties" => {
57-
"^\\w+_at$" => {
58-
"description" => "A timestamp from which ElasticGraph will measure indexing latency. The timestamp name must end in `_at`.",
59-
"type" => "string",
60-
"format" => "date-time"
13+
module JSONIngestion
14+
module SchemaDefinition
15+
# Indexing support used while generating JSON ingestion schemas.
16+
module Indexing
17+
# Contains logic related to "event envelope"--the layer of metadata that wraps all indexing events.
18+
#
19+
# @api private
20+
module EventEnvelope
21+
# @param indexed_type_names [Array<String>] names of the indexed types
22+
# @param json_schema_version [Integer] the version of the JSON schema
23+
# @return [Hash<String, Object>] the JSON schema for the ElasticGraph event envelope for the given `indexed_type_names`.
24+
def self.json_schema(indexed_type_names, json_schema_version)
25+
{
26+
"type" => "object",
27+
"description" => "Required by ElasticGraph to wrap every data event.",
28+
"properties" => {
29+
"op" => {
30+
"description" => "Indicates what type of operation the event represents. For now, only `upsert` is supported, but we plan to support other operations in the future.",
31+
"type" => "string",
32+
"enum" => %w[upsert]
33+
},
34+
"type" => {
35+
"description" => "The type of object present in `record`.",
36+
"type" => "string",
37+
# Sorting doesn't really matter here, but it's nice for the output in the schema artifact to be consistent.
38+
"enum" => indexed_type_names.sort
39+
},
40+
"id" => {
41+
"description" => "The unique identifier of the record.",
42+
"type" => "string",
43+
"maxLength" => ElasticGraph::DEFAULT_MAX_KEYWORD_LENGTH
44+
},
45+
"version" => {
46+
"description" => 'Used to handle duplicate and out-of-order events. When ElasticGraph ingests multiple events for the same `type` and `id`, the one with the largest `version` will "win".',
47+
"type" => "integer",
48+
"minimum" => 0,
49+
"maximum" => (2**63) - 1
50+
},
51+
"record" => {
52+
"description" => "The record of this event. The payload of this field must match the JSON schema of the named `type`.",
53+
"type" => "object"
54+
},
55+
"latency_timestamps" => {
56+
"description" => "Timestamps from which ElasticGraph measures indexing latency. The `ElasticGraphIndexingLatencies` log message produced for each event will include a measurement from each timestamp included in this map.",
57+
"type" => "object",
58+
"additionalProperties" => false,
59+
"patternProperties" => {
60+
"^\\w+_at$" => {
61+
"description" => "A timestamp from which ElasticGraph will measure indexing latency. The timestamp name must end in `_at`.",
62+
"type" => "string",
63+
"format" => "date-time"
64+
}
6165
}
66+
},
67+
ElasticGraph::JSON_SCHEMA_VERSION_KEY => {
68+
"description" => "The version of the JSON schema the publisher was using when the event was published. ElasticGraph will use the JSON schema matching this version to process the event.",
69+
"const" => json_schema_version
70+
},
71+
"message_id" => {
72+
"description" => "The optional ID of the message containing this event from whatever messaging system is being used between the publisher and the ElasticGraph indexer.",
73+
"type" => "string"
6274
}
6375
},
64-
JSON_SCHEMA_VERSION_KEY => {
65-
"description" => "The version of the JSON schema the publisher was using when the event was published. ElasticGraph will use the JSON schema matching this version to process the event.",
66-
"const" => json_schema_version
76+
"additionalProperties" => false,
77+
"required" => ["op", "type", "id", "version", ElasticGraph::JSON_SCHEMA_VERSION_KEY],
78+
"if" => {
79+
"properties" => {
80+
"op" => {"const" => "upsert"}
81+
}
6782
},
68-
"message_id" => {
69-
"description" => "The optional ID of the message containing this event from whatever messaging system is being used between the publisher and the ElasticGraph indexer.",
70-
"type" => "string"
71-
}
72-
},
73-
"additionalProperties" => false,
74-
"required" => ["op", "type", "id", "version", JSON_SCHEMA_VERSION_KEY],
75-
"if" => {
76-
"properties" => {
77-
"op" => {"const" => "upsert"}
78-
}
79-
},
80-
"then" => {"required" => ["record"]}
81-
}
83+
"then" => {"required" => ["record"]}
84+
}
85+
end
8286
end
8387
end
8488
end

elasticgraph-json_ingestion/lib/elastic_graph/json_ingestion/schema_definition/indexing/json_schema_field_metadata.rb

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,27 +7,29 @@
77
# frozen_string_literal: true
88

99
module ElasticGraph
10-
module SchemaDefinition
11-
module Indexing
12-
# @!parse class JSONSchemaFieldMetadata; end
13-
JSONSchemaFieldMetadata = ::Data.define(:type, :name_in_index)
10+
module JSONIngestion
11+
module SchemaDefinition
12+
module Indexing
13+
# @!parse class JSONSchemaFieldMetadata; end
14+
JSONSchemaFieldMetadata = ::Data.define(:type, :name_in_index)
1415

15-
# Metadata about an ElasticGraph field that needs to be stored in our versioned JSON schemas
16-
# alongside the JSON schema fields.
17-
#
18-
# @!attribute [r] type
19-
# @return [String] name of the ElasticGraph type for this field
20-
# @!attribute [r] name_in_index
21-
# @return [String] name of the field in the index
22-
#
23-
# @api private
24-
class JSONSchemaFieldMetadata < ::Data
25-
# @return [Hash<String, String>] hash form of the metadata that can be dumped in JSON schema
26-
def to_dumpable_hash
27-
{"type" => type, "nameInIndex" => name_in_index}
28-
end
16+
# Metadata about an ElasticGraph field that needs to be stored in our versioned JSON schemas
17+
# alongside the JSON schema fields.
18+
#
19+
# @!attribute [r] type
20+
# @return [String] name of the ElasticGraph type for this field
21+
# @!attribute [r] name_in_index
22+
# @return [String] name of the field in the index
23+
#
24+
# @api private
25+
class JSONSchemaFieldMetadata < ::Data
26+
# @return [Hash<String, String>] hash form of the metadata that can be dumped in JSON schema
27+
def to_dumpable_hash
28+
{"type" => type, "nameInIndex" => name_in_index}
29+
end
2930

30-
# @dynamic initialize, type, name_in_index
31+
# @dynamic initialize, type, name_in_index
32+
end
3133
end
3234
end
3335
end

0 commit comments

Comments
 (0)