|
9 | 9 | require "elastic_graph/constants" |
10 | 10 |
|
11 | 11 | module ElasticGraph |
12 | | - module SchemaDefinition |
13 | | - module Indexing |
14 | | - # Contains logic related to "event envelope"--the layer of metadata that wraps all indexing events. |
15 | | - # |
16 | | - # @api private |
17 | | - module EventEnvelope |
18 | | - # @param indexed_type_names [Array<String>] names of the indexed types |
19 | | - # @param json_schema_version [Integer] the version of the JSON schema |
20 | | - # @return [Hash<String, Object>] the JSON schema for the ElasticGraph event envelope for the given `indexed_type_names`. |
21 | | - def self.json_schema(indexed_type_names, json_schema_version) |
22 | | - { |
23 | | - "type" => "object", |
24 | | - "description" => "Required by ElasticGraph to wrap every data event.", |
25 | | - "properties" => { |
26 | | - "op" => { |
27 | | - "description" => "Indicates what type of operation the event represents. For now, only `upsert` is supported, but we plan to support other operations in the future.", |
28 | | - "type" => "string", |
29 | | - "enum" => %w[upsert] |
30 | | - }, |
31 | | - "type" => { |
32 | | - "description" => "The type of object present in `record`.", |
33 | | - "type" => "string", |
34 | | - # Sorting doesn't really matter here, but it's nice for the output in the schema artifact to be consistent. |
35 | | - "enum" => indexed_type_names.sort |
36 | | - }, |
37 | | - "id" => { |
38 | | - "description" => "The unique identifier of the record.", |
39 | | - "type" => "string", |
40 | | - "maxLength" => DEFAULT_MAX_KEYWORD_LENGTH |
41 | | - }, |
42 | | - "version" => { |
43 | | - "description" => 'Used to handle duplicate and out-of-order events. When ElasticGraph ingests multiple events for the same `type` and `id`, the one with the largest `version` will "win".', |
44 | | - "type" => "integer", |
45 | | - "minimum" => 0, |
46 | | - "maximum" => (2**63) - 1 |
47 | | - }, |
48 | | - "record" => { |
49 | | - "description" => "The record of this event. The payload of this field must match the JSON schema of the named `type`.", |
50 | | - "type" => "object" |
51 | | - }, |
52 | | - "latency_timestamps" => { |
53 | | - "description" => "Timestamps from which ElasticGraph measures indexing latency. The `ElasticGraphIndexingLatencies` log message produced for each event will include a measurement from each timestamp included in this map.", |
54 | | - "type" => "object", |
55 | | - "additionalProperties" => false, |
56 | | - "patternProperties" => { |
57 | | - "^\\w+_at$" => { |
58 | | - "description" => "A timestamp from which ElasticGraph will measure indexing latency. The timestamp name must end in `_at`.", |
59 | | - "type" => "string", |
60 | | - "format" => "date-time" |
| 12 | + module JSONIngestion |
| 13 | + module SchemaDefinition |
| 14 | + # Indexing support used while generating JSON ingestion schemas. |
| 15 | + module Indexing |
| 16 | + # Contains logic related to "event envelope"--the layer of metadata that wraps all indexing events. |
| 17 | + # |
| 18 | + # @api private |
| 19 | + module EventEnvelope |
| 20 | + # @param indexed_type_names [Array<String>] names of the indexed types |
| 21 | + # @param json_schema_version [Integer] the version of the JSON schema |
| 22 | + # @return [Hash<String, Object>] the JSON schema for the ElasticGraph event envelope for the given `indexed_type_names`. |
| 23 | + def self.json_schema(indexed_type_names, json_schema_version) |
| 24 | + { |
| 25 | + "type" => "object", |
| 26 | + "description" => "Required by ElasticGraph to wrap every data event.", |
| 27 | + "properties" => { |
| 28 | + "op" => { |
| 29 | + "description" => "Indicates what type of operation the event represents. For now, only `upsert` is supported, but we plan to support other operations in the future.", |
| 30 | + "type" => "string", |
| 31 | + "enum" => %w[upsert] |
| 32 | + }, |
| 33 | + "type" => { |
| 34 | + "description" => "The type of object present in `record`.", |
| 35 | + "type" => "string", |
| 36 | + # Sorting doesn't really matter here, but it's nice for the output in the schema artifact to be consistent. |
| 37 | + "enum" => indexed_type_names.sort |
| 38 | + }, |
| 39 | + "id" => { |
| 40 | + "description" => "The unique identifier of the record.", |
| 41 | + "type" => "string", |
| 42 | + "maxLength" => ElasticGraph::DEFAULT_MAX_KEYWORD_LENGTH |
| 43 | + }, |
| 44 | + "version" => { |
| 45 | + "description" => 'Used to handle duplicate and out-of-order events. When ElasticGraph ingests multiple events for the same `type` and `id`, the one with the largest `version` will "win".', |
| 46 | + "type" => "integer", |
| 47 | + "minimum" => 0, |
| 48 | + "maximum" => (2**63) - 1 |
| 49 | + }, |
| 50 | + "record" => { |
| 51 | + "description" => "The record of this event. The payload of this field must match the JSON schema of the named `type`.", |
| 52 | + "type" => "object" |
| 53 | + }, |
| 54 | + "latency_timestamps" => { |
| 55 | + "description" => "Timestamps from which ElasticGraph measures indexing latency. The `ElasticGraphIndexingLatencies` log message produced for each event will include a measurement from each timestamp included in this map.", |
| 56 | + "type" => "object", |
| 57 | + "additionalProperties" => false, |
| 58 | + "patternProperties" => { |
| 59 | + "^\\w+_at$" => { |
| 60 | + "description" => "A timestamp from which ElasticGraph will measure indexing latency. The timestamp name must end in `_at`.", |
| 61 | + "type" => "string", |
| 62 | + "format" => "date-time" |
| 63 | + } |
61 | 64 | } |
| 65 | + }, |
| 66 | + ElasticGraph::JSON_SCHEMA_VERSION_KEY => { |
| 67 | + "description" => "The version of the JSON schema the publisher was using when the event was published. ElasticGraph will use the JSON schema matching this version to process the event.", |
| 68 | + "const" => json_schema_version |
| 69 | + }, |
| 70 | + "message_id" => { |
| 71 | + "description" => "The optional ID of the message containing this event from whatever messaging system is being used between the publisher and the ElasticGraph indexer.", |
| 72 | + "type" => "string" |
62 | 73 | } |
63 | 74 | }, |
64 | | - JSON_SCHEMA_VERSION_KEY => { |
65 | | - "description" => "The version of the JSON schema the publisher was using when the event was published. ElasticGraph will use the JSON schema matching this version to process the event.", |
66 | | - "const" => json_schema_version |
| 75 | + "additionalProperties" => false, |
| 76 | + "required" => ["op", "type", "id", "version", ElasticGraph::JSON_SCHEMA_VERSION_KEY], |
| 77 | + "if" => { |
| 78 | + "properties" => { |
| 79 | + "op" => {"const" => "upsert"} |
| 80 | + } |
67 | 81 | }, |
68 | | - "message_id" => { |
69 | | - "description" => "The optional ID of the message containing this event from whatever messaging system is being used between the publisher and the ElasticGraph indexer.", |
70 | | - "type" => "string" |
71 | | - } |
72 | | - }, |
73 | | - "additionalProperties" => false, |
74 | | - "required" => ["op", "type", "id", "version", JSON_SCHEMA_VERSION_KEY], |
75 | | - "if" => { |
76 | | - "properties" => { |
77 | | - "op" => {"const" => "upsert"} |
78 | | - } |
79 | | - }, |
80 | | - "then" => {"required" => ["record"]} |
81 | | - } |
| 82 | + "then" => {"required" => ["record"]} |
| 83 | + } |
| 84 | + end |
82 | 85 | end |
83 | 86 | end |
84 | 87 | end |
|
0 commit comments