Skip to content

Commit 8656c44

Browse files
g-talbotclaude
andcommitted
fix: update indexing service fingerprint constants and nightly fmt
Adding ParquetMergePolicyConfig and ParquetIndexingConfig to IndexingSettings changes the Hash output, which changes the pipeline params fingerprints. Updated the hardcoded test constants. Added a comment explaining how to recompute them when IndexingSettings fields change. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 1fbece5 commit 8656c44

14 files changed

Lines changed: 605 additions & 470 deletions

File tree

quickwit/quickwit-config/src/index_config/mod.rs

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -147,12 +147,11 @@ pub struct ParquetIndexingConfig {
147147
/// (`__s` = string, `__i` = int64, `_secs` = uint64 timestamp).
148148
///
149149
/// The sort order determines:
150-
/// - **Query pruning**: queries that filter on leading sort columns can
151-
/// skip entire splits whose row key ranges don't match.
152-
/// - **Compression**: columns with good locality (e.g., metric_name first)
153-
/// compress better in Parquet's columnar format.
154-
/// - **Compaction scope**: splits with different sort schemas are never
155-
/// merged together.
150+
/// - **Query pruning**: queries that filter on leading sort columns can skip entire splits
151+
/// whose row key ranges don't match.
152+
/// - **Compression**: columns with good locality (e.g., metric_name first) compress better in
153+
/// Parquet's columnar format.
154+
/// - **Compaction scope**: splits with different sort schemas are never merged together.
156155
///
157156
/// When `None`, the product-type default is used (see below).
158157
///

quickwit/quickwit-indexing/src/actors/indexing_service.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1250,9 +1250,13 @@ mod tests {
12501250

12511251
#[tokio::test]
12521252
async fn test_indexing_service_apply_plan() {
1253-
const PARAMS_FINGERPRINT_INGEST_API: u64 = 1637744865450232394;
1254-
const PARAMS_FINGERPRINT_SOURCE_1: u64 = 1705211905504908791;
1255-
const PARAMS_FINGERPRINT_SOURCE_2: u64 = 8706667372658059428;
1253+
// These fingerprints are hashes of IndexConfig + SourceConfig. They
1254+
// change whenever IndexingSettings fields are added/removed. Recompute
1255+
// by temporarily adding a test that prints
1256+
// `indexing_pipeline_params_fingerprint(&index_config, &source_config)`.
1257+
const PARAMS_FINGERPRINT_INGEST_API: u64 = 13743320112697107273;
1258+
const PARAMS_FINGERPRINT_SOURCE_1: u64 = 4583554739263945512;
1259+
const PARAMS_FINGERPRINT_SOURCE_2: u64 = 11653028234524633167;
12561260

12571261
quickwit_common::setup_logging_for_tests();
12581262
let transport = ChannelTransport::default();
Lines changed: 123 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -1,200 +1,211 @@
11
{
2-
"version": "0.9",
2+
"delete_tasks": [
3+
{
4+
"create_timestamp": 0,
5+
"delete_query": {
6+
"index_uid": "my-index:00000000000000000000000000",
7+
"query_ast": "{\"type\":\"bool\",\"must\":[{\"type\":\"full_text\",\"field\":\"body\",\"text\":\"Harry\",\"params\":{\"mode\":{\"type\":\"phrase_fallback_to_intersection\"}}},{\"type\":\"full_text\",\"field\":\"body\",\"text\":\"Potter\",\"params\":{\"mode\":{\"type\":\"phrase_fallback_to_intersection\"}}}]}"
8+
},
9+
"opstamp": 10
10+
}
11+
],
312
"index": {
4-
"version": "0.9",
5-
"index_uid": "my-index:00000000000000000000000000",
13+
"checkpoint": {
14+
"kafka-source": {
15+
"00000000000000000000": "00000000000000000042"
16+
}
17+
},
18+
"create_timestamp": 1789,
619
"index_config": {
7-
"version": "0.9",
8-
"index_id": "my-index",
9-
"index_uri": "s3://quickwit-indexes/my-index",
1020
"doc_mapping": {
1121
"doc_mapping_uid": "00000000000000000000000000",
12-
"mode": "dynamic",
1322
"dynamic_mapping": {
14-
"indexed": true,
15-
"tokenizer": "raw",
16-
"record": "basic",
17-
"stored": true,
1823
"expand_dots": true,
1924
"fast": {
2025
"normalizer": "raw"
21-
}
26+
},
27+
"indexed": true,
28+
"record": "basic",
29+
"stored": true,
30+
"tokenizer": "raw"
2231
},
2332
"field_mappings": [
2433
{
34+
"coerce": true,
35+
"fast": true,
36+
"indexed": true,
2537
"name": "tenant_id",
26-
"type": "u64",
38+
"output_format": "number",
2739
"stored": true,
28-
"indexed": true,
29-
"fast": true,
30-
"coerce": true,
31-
"output_format": "number"
40+
"type": "u64"
3241
},
3342
{
34-
"name": "timestamp",
35-
"type": "datetime",
43+
"fast": true,
44+
"fast_precision": "seconds",
45+
"indexed": true,
3646
"input_formats": [
3747
"rfc3339",
3848
"unix_timestamp"
3949
],
50+
"name": "timestamp",
4051
"output_format": "rfc3339",
41-
"fast_precision": "seconds",
42-
"indexed": true,
4352
"stored": true,
44-
"fast": true
53+
"type": "datetime"
4554
},
4655
{
47-
"name": "log_level",
48-
"type": "text",
56+
"fast": false,
57+
"fieldnorms": false,
4958
"indexed": true,
50-
"tokenizer": "raw",
59+
"name": "log_level",
5160
"record": "basic",
52-
"fieldnorms": false,
5361
"stored": true,
54-
"fast": false
62+
"tokenizer": "raw",
63+
"type": "text"
5564
},
5665
{
57-
"name": "message",
58-
"type": "text",
66+
"fast": false,
67+
"fieldnorms": false,
5968
"indexed": true,
60-
"tokenizer": "default",
69+
"name": "message",
6170
"record": "position",
62-
"fieldnorms": false,
6371
"stored": true,
64-
"fast": false
72+
"tokenizer": "default",
73+
"type": "text"
6574
}
6675
],
67-
"timestamp_field": "timestamp",
76+
"index_field_presence": true,
77+
"max_num_partitions": 100,
78+
"mode": "dynamic",
79+
"partition_key": "tenant_id",
80+
"store_document_size": false,
81+
"store_source": true,
6882
"tag_fields": [
6983
"log_level",
7084
"tenant_id"
7185
],
72-
"partition_key": "tenant_id",
73-
"max_num_partitions": 100,
74-
"index_field_presence": true,
75-
"store_document_size": false,
76-
"store_source": true,
86+
"timestamp_field": "timestamp",
7787
"tokenizers": [
7888
{
89+
"filters": [],
7990
"name": "custom_tokenizer",
80-
"type": "regex",
8191
"pattern": "[^\\p{L}\\p{N}]+",
82-
"filters": []
92+
"type": "regex"
8393
}
8494
]
8595
},
96+
"index_id": "my-index",
97+
"index_uri": "s3://quickwit-indexes/my-index",
8698
"indexing_settings": {
8799
"commit_timeout_secs": 301,
88-
"docstore_compression_level": 8,
89100
"docstore_blocksize": 1000000,
90-
"split_num_docs_target": 10000001,
101+
"docstore_compression_level": 8,
91102
"merge_policy": {
92-
"type": "stable_log",
93-
"min_level_num_docs": 100000,
94-
"merge_factor": 9,
103+
"maturation_period": "2days",
95104
"max_merge_factor": 11,
96-
"maturation_period": "2days"
105+
"merge_factor": 9,
106+
"min_level_num_docs": 100000,
107+
"type": "stable_log"
108+
},
109+
"parquet_indexing": {
110+
"window_duration_secs": 900
111+
},
112+
"parquet_merge_policy": {
113+
"maturation_period": "2days",
114+
"max_finalize_merge_operations": 3,
115+
"max_merge_factor": 12,
116+
"max_merge_ops": 4,
117+
"merge_factor": 10,
118+
"target_split_size_bytes": 268435456
97119
},
98120
"resources": {
99121
"heap_size": 50000000
100-
}
122+
},
123+
"split_num_docs_target": 10000001
101124
},
102125
"ingest_settings": {
103126
"min_shards": 1
104127
},
128+
"retention": {
129+
"period": "90 days",
130+
"schedule": "daily"
131+
},
105132
"search_settings": {
106133
"default_search_fields": [
107134
"message"
108135
]
109136
},
110-
"retention": {
111-
"period": "90 days",
112-
"schedule": "daily"
113-
}
137+
"version": "0.9"
114138
},
115-
"checkpoint": {
116-
"kafka-source": {
117-
"00000000000000000000": "00000000000000000042"
118-
}
119-
},
120-
"create_timestamp": 1789,
139+
"index_uid": "my-index:00000000000000000000000000",
121140
"sources": [
122141
{
123-
"version": "0.9",
124-
"source_id": "kafka-source",
125-
"num_pipelines": 2,
126142
"enabled": true,
127-
"source_type": "kafka",
143+
"input_format": "json",
144+
"num_pipelines": 2,
128145
"params": {
129-
"topic": "kafka-topic",
130-
"client_params": {}
146+
"client_params": {},
147+
"topic": "kafka-topic"
131148
},
149+
"source_id": "kafka-source",
150+
"source_type": "kafka",
132151
"transform": {
133152
"script": ".message = downcase(string!(.message))",
134153
"timezone": "UTC"
135154
},
136-
"input_format": "json"
155+
"version": "0.9"
156+
}
157+
],
158+
"version": "0.9"
159+
},
160+
"shards": {
161+
"_ingest-source": [
162+
{
163+
"doc_mapping_uid": "00000000000000000000000000",
164+
"follower_id": "follower-ingester",
165+
"index_uid": "my-index:00000000000000000000000000",
166+
"leader_id": "leader-ingester",
167+
"publish_position_inclusive": "",
168+
"shard_id": "00000000000000000001",
169+
"shard_state": 1,
170+
"source_id": "_ingest-source",
171+
"update_timestamp": 1704067200
137172
}
138173
]
139174
},
140175
"splits": [
141176
{
142-
"split_state": "Published",
143-
"update_timestamp": 1789,
144-
"publish_timestamp": 1789,
145-
"version": "0.9",
146-
"split_id": "split",
147-
"index_uid": "my-index:00000000000000000000000000",
148-
"partition_id": 7,
149-
"source_id": "source",
150-
"node_id": "node",
151-
"num_docs": 12303,
152-
"uncompressed_docs_size_in_bytes": 234234,
153-
"time_range": {
154-
"start": 121000,
155-
"end": 130198
156-
},
157177
"create_timestamp": 3,
178+
"delete_opstamp": 10,
179+
"doc_mapping_uid": "00000000000000000000000000",
180+
"footer_offsets": {
181+
"end": 2000,
182+
"start": 1000
183+
},
184+
"index_uid": "my-index:00000000000000000000000000",
158185
"maturity": {
159-
"type": "immature",
160-
"maturation_period_millis": 4000
186+
"maturation_period_millis": 4000,
187+
"type": "immature"
161188
},
189+
"node_id": "node",
190+
"num_docs": 12303,
191+
"num_merge_ops": 3,
192+
"partition_id": 7,
193+
"publish_timestamp": 1789,
194+
"source_id": "source",
195+
"split_id": "split",
196+
"split_state": "Published",
162197
"tags": [
163198
"234",
164199
"aaa"
165200
],
166-
"footer_offsets": {
167-
"start": 1000,
168-
"end": 2000
201+
"time_range": {
202+
"end": 130198,
203+
"start": 121000
169204
},
170-
"delete_opstamp": 10,
171-
"num_merge_ops": 3,
172-
"doc_mapping_uid": "00000000000000000000000000"
205+
"uncompressed_docs_size_in_bytes": 234234,
206+
"update_timestamp": 1789,
207+
"version": "0.9"
173208
}
174209
],
175-
"shards": {
176-
"_ingest-source": [
177-
{
178-
"index_uid": "my-index:00000000000000000000000000",
179-
"source_id": "_ingest-source",
180-
"shard_id": "00000000000000000001",
181-
"leader_id": "leader-ingester",
182-
"follower_id": "follower-ingester",
183-
"shard_state": 1,
184-
"publish_position_inclusive": "",
185-
"doc_mapping_uid": "00000000000000000000000000",
186-
"update_timestamp": 1704067200
187-
}
188-
]
189-
},
190-
"delete_tasks": [
191-
{
192-
"create_timestamp": 0,
193-
"opstamp": 10,
194-
"delete_query": {
195-
"index_uid": "my-index:00000000000000000000000000",
196-
"query_ast": "{\"type\":\"bool\",\"must\":[{\"type\":\"full_text\",\"field\":\"body\",\"text\":\"Harry\",\"params\":{\"mode\":{\"type\":\"phrase_fallback_to_intersection\"}}},{\"type\":\"full_text\",\"field\":\"body\",\"text\":\"Potter\",\"params\":{\"mode\":{\"type\":\"phrase_fallback_to_intersection\"}}}]}"
197-
}
198-
}
199-
]
210+
"version": "0.9"
200211
}

0 commit comments

Comments
 (0)