Skip to content

Commit a44b8c8

Browse files
committed
Optimize list fields
1 parent 544b50f commit a44b8c8

16 files changed

Lines changed: 1432 additions & 1117 deletions

File tree

quickwit/quickwit-indexing/src/actors/packager.rs

Lines changed: 32 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,7 @@ use quickwit_common::temp_dir::TempDirectory;
2727
use quickwit_directories::write_hotcache;
2828
use quickwit_doc_mapper::NamedField;
2929
use quickwit_doc_mapper::tag_pruning::append_to_tag_set;
30-
use quickwit_proto::search::{
31-
ListFieldType, ListFields, ListFieldsEntryResponse, serialize_split_fields,
32-
};
30+
use quickwit_proto::search::{ListFieldsEntry, ListFieldsMetadata, ListFieldsType};
3331
use tantivy::index::FieldMetadata;
3432
use tantivy::schema::{FieldType, Type};
3533
use tantivy::{InvertedIndexReader, ReloadPolicy, SegmentMeta};
@@ -314,7 +312,7 @@ fn create_packaged_split(
314312
build_hotcache(split.split_scratch_directory.path(), &mut hotcache_bytes)?;
315313
ctx.record_progress();
316314

317-
let serialized_split_fields = serialize_field_metadata(&fields_metadata);
315+
let serialized_split_fields = serialize_fields_metadata(&fields_metadata);
318316

319317
let packaged_split = PackagedSplit {
320318
serialized_split_fields,
@@ -327,37 +325,19 @@ fn create_packaged_split(
327325
Ok(packaged_split)
328326
}
329327

330-
/// Serializes the Split fields.
331-
///
332-
/// `fields_metadata` has to be sorted.
333-
fn serialize_field_metadata(fields_metadata: &[FieldMetadata]) -> Vec<u8> {
334-
let fields = fields_metadata
328+
/// Serializes the fields metadata from a split sorted by (name, type).
329+
fn serialize_fields_metadata(fields_metadata: &[FieldMetadata]) -> Vec<u8> {
330+
let entries = fields_metadata
335331
.iter()
336-
.map(field_metadata_to_list_field_serialized)
332+
.map(field_metadata_to_list_fields_entry)
333+
.sorted_unstable_by(|left, right| left.cmp_by_name_and_type(right))
337334
.collect::<Vec<_>>();
338335

339-
serialize_split_fields(ListFields { fields })
340-
}
341-
342-
fn tantivy_type_to_list_field_type(typ: Type) -> ListFieldType {
343-
match typ {
344-
Type::Str => ListFieldType::Str,
345-
Type::U64 => ListFieldType::U64,
346-
Type::I64 => ListFieldType::I64,
347-
Type::F64 => ListFieldType::F64,
348-
Type::Bool => ListFieldType::Bool,
349-
Type::Date => ListFieldType::Date,
350-
Type::Facet => ListFieldType::Facet,
351-
Type::Bytes => ListFieldType::Bytes,
352-
Type::Json => ListFieldType::Json,
353-
Type::IpAddr => ListFieldType::IpAddr,
354-
}
336+
ListFieldsMetadata { entries }.serialize()
355337
}
356338

357-
fn field_metadata_to_list_field_serialized(
358-
field_metadata: &FieldMetadata,
359-
) -> ListFieldsEntryResponse {
360-
ListFieldsEntryResponse {
339+
fn field_metadata_to_list_fields_entry(field_metadata: &FieldMetadata) -> ListFieldsEntry {
340+
ListFieldsEntry {
361341
field_name: field_metadata.field_name.to_string(),
362342
field_type: tantivy_type_to_list_field_type(field_metadata.typ) as i32,
363343
searchable: field_metadata.is_indexed(),
@@ -368,6 +348,21 @@ fn field_metadata_to_list_field_serialized(
368348
}
369349
}
370350

351+
fn tantivy_type_to_list_field_type(typ: Type) -> ListFieldsType {
352+
match typ {
353+
Type::Bool => ListFieldsType::Bool,
354+
Type::Bytes => ListFieldsType::Bytes,
355+
Type::Date => ListFieldsType::Date,
356+
Type::F64 => ListFieldsType::F64,
357+
Type::Facet => ListFieldsType::Facet,
358+
Type::I64 => ListFieldsType::I64,
359+
Type::IpAddr => ListFieldsType::IpAddr,
360+
Type::Json => ListFieldsType::Json,
361+
Type::Str => ListFieldsType::Str,
362+
Type::U64 => ListFieldsType::U64,
363+
}
364+
}
365+
371366
/// Reads u64 from stored term data.
372367
fn u64_from_term_data(data: &[u8]) -> anyhow::Result<u64> {
373368
let u64_bytes: [u8; 8] = data[0..8]
@@ -382,7 +377,7 @@ mod tests {
382377

383378
use quickwit_actors::{ObservationType, Universe};
384379
use quickwit_metastore::checkpoint::IndexCheckpointDelta;
385-
use quickwit_proto::search::{ListFieldsEntryResponse, deserialize_split_fields};
380+
use quickwit_proto::search::{ListFieldsEntry, ListFieldsMetadata};
386381
use quickwit_proto::types::{DocMappingUid, IndexUid, NodeId};
387382
use tantivy::directory::MmapDirectory;
388383
use tantivy::schema::{FAST, NumericOptions, STRING, Schema, TEXT, Type};
@@ -424,24 +419,24 @@ mod tests {
424419
},
425420
];
426421

427-
let out = serialize_field_metadata(&fields_metadata);
422+
let out = serialize_fields_metadata(&fields_metadata);
428423

429-
let deserialized: Vec<ListFieldsEntryResponse> =
430-
deserialize_split_fields(&mut &out[..]).unwrap().fields;
424+
let deserialized: Vec<ListFieldsEntry> =
425+
ListFieldsMetadata::deserialize(&out[..]).unwrap().entries;
431426

432427
assert_eq!(fields_metadata.len(), deserialized.len());
433428
assert_eq!(deserialized[0].field_name, "test");
434-
assert_eq!(deserialized[0].field_type, ListFieldType::Str as i32);
429+
assert_eq!(deserialized[0].field_type, ListFieldsType::Str as i32);
435430
assert!(deserialized[0].searchable);
436431
assert!(deserialized[0].aggregatable);
437432

438433
assert_eq!(deserialized[1].field_name, "test2");
439-
assert_eq!(deserialized[1].field_type, ListFieldType::Str as i32);
434+
assert_eq!(deserialized[1].field_type, ListFieldsType::Str as i32);
440435
assert!(deserialized[1].searchable);
441436
assert!(!deserialized[1].aggregatable);
442437

443438
assert_eq!(deserialized[2].field_name, "test3");
444-
assert_eq!(deserialized[2].field_type, ListFieldType::U64 as i32);
439+
assert_eq!(deserialized[2].field_type, ListFieldsType::U64 as i32);
445440
assert!(deserialized[2].searchable);
446441
assert!(deserialized[2].aggregatable);
447442
}

quickwit/quickwit-proto/protos/quickwit/search.proto

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ message ListFieldsRequest {
117117
repeated string index_id_patterns = 1;
118118
// Optional limit query to a list of fields
119119
// Wildcard expressions are supported.
120-
repeated string fields = 2;
120+
repeated string field_patterns = 2;
121121

122122
// Time filter, expressed in seconds since epoch.
123123
// That filter is to be interpreted as the semi-open interval:
@@ -144,16 +144,23 @@ message LeafListFieldsRequest {
144144

145145
// Optional limit query to a list of fields
146146
// Wildcard expressions are supported.
147-
repeated string fields = 4;
147+
repeated string field_patterns = 4;
148148
}
149149

150+
/// Message returned by leaf and root list fields requests.
150151
message ListFieldsResponse {
151-
repeated ListFieldsEntryResponse fields = 1;
152+
repeated ListFieldsEntry entries = 1;
152153
}
153154

154-
message ListFieldsEntryResponse {
155+
/// Message containing the fields metadata for a split sorted by (name, type) and stored zstd-compressed in the split. Currently duplicate of ListFieldsResponse, but kept
156+
/// distinct so they can evolve independently.
157+
message ListFieldsMetadata {
158+
repeated ListFieldsEntry entries = 1;
159+
}
160+
161+
message ListFieldsEntry {
155162
string field_name = 1;
156-
ListFieldType field_type = 2;
163+
ListFieldsType field_type = 2;
157164
// The index ids the field exists
158165
repeated string index_ids = 3;
159166
// True means the field is searchable (indexed) in at least some indices.
@@ -168,7 +175,7 @@ message ListFieldsEntryResponse {
168175
repeated string non_aggregatable_index_ids = 7;
169176
}
170177

171-
enum ListFieldType {
178+
enum ListFieldsType {
172179
STR = 0;
173180
U64 = 1;
174181
I64 = 2;
@@ -180,9 +187,7 @@ enum ListFieldType {
180187
IP_ADDR = 8;
181188
JSON = 9;
182189
}
183-
message ListFields {
184-
repeated ListFieldsEntryResponse fields = 1;
185-
}
190+
186191
// -- Search -------------------
187192

188193
message SearchRequest {

quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs

Lines changed: 16 additions & 13 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)