Skip to content

Commit bdcdd7d

Browse files
authored
Improve documentation regarding json fields and concatenated json fields (#6047)
1 parent 51243e4 commit bdcdd7d

5 files changed

Lines changed: 37 additions & 10 deletions

File tree

docs/configuration/index-config.md

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,14 @@ fast:
358358
normalizer: lowercase
359359
```
360360

361+
Stored primitive types are inferred from the JSON value types using the following rules:
362+
- a boolean value `true` or `false` is stored as `bool`
363+
- numeric values are cast to the first compatible format between `i64`, `u64` or
364+
`f64` (in this order)
365+
- for string values (surrounded with quotes), Tantivy attempts to parse a date
366+
in `rfc3339` format. If the parsing fails, the value is stored as `text` using
367+
the configured tokenization rules
368+
361369
**Parameters for JSON field**
362370

363371
| Variable | Description | Default value |
@@ -435,8 +443,13 @@ record: basic
435443
Concatenate fields don't support fast fields, and are never stored. They uses their own tokenizer, independently of the
436444
tokenizer configured on the individual fields.
437445
At query time, concatenate fields don't support range queries.
438-
Only the following types are supported inside a concatenate field: text, bool, i64, u64, f64, json. Other types are rejected
439-
at index creation, or silently discarded during indexation if they are found inside a json field.
446+
Only the following types are supported inside a concatenate field: text, bool,
447+
i64, u64, f64, json. Other types are rejected at index creation, or silently
448+
discarded during indexation if they are found inside a json field. Unlike
449+
regular JSON fields, JSON fields in a concatenate field don't store RFC3339
450+
dates as Tantivy dates. This means you can still perform prefix queries,
451+
e.g `my_default_field:"2025-12-12"*` to work around the lack of support for range
452+
queries.
440453
Adding an object field to a concatenate field doesn't automatically add its subfields (yet).
441454
<!-- typing is made so it wouldn't be too hard to add, as well as things like params_* matching all fields which starts name with params_ , but the feature isn't implemented yet -->
442455
It isn't possible to add subfields from a json field to a concatenate field. For instance if `attributes` is a json field, it's not possible to add only `attributes.color` to a concatenate field.
@@ -552,6 +565,8 @@ src.port:53
552565
src.port:53 AND query_params.ctk:e42bb897d
553566
```
554567

568+
The stored primitive type inference is the [same as for JSON fields](#json-type).
569+
555570
### Field name validation rules
556571

557572
Currently Quickwit only accepts field name that matches the following regular expression:

quickwit/quickwit-doc-mapper/src/doc_mapper/doc_mapper_impl.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ use super::field_presence::populate_field_presence;
3535
use super::tantivy_val_to_json::tantivy_value_to_json;
3636
use crate::doc_mapper::mapping_tree::{
3737
JsonValueIterator, MappingNode, MappingNodeRoot, build_field_path_from_str, build_mapping_tree,
38-
map_primitive_json_to_tantivy,
38+
map_primitive_json_to_concatenate_value,
3939
};
4040
use crate::doc_mapper::{FieldMappingType, JsonObject, Partition};
4141
use crate::query_builder::build_query;
@@ -530,7 +530,7 @@ impl DocMapper {
530530
if !self.concatenate_dynamic_fields.is_empty() {
531531
let json_obj_values =
532532
JsonValueIterator::new(serde_json::Value::Object(dynamic_json_obj.clone()))
533-
.flat_map(map_primitive_json_to_tantivy);
533+
.flat_map(map_primitive_json_to_concatenate_value);
534534

535535
for value in json_obj_values {
536536
for concatenate_dynamic_field in self.concatenate_dynamic_fields.iter() {

quickwit/quickwit-doc-mapper/src/doc_mapper/mapping_tree.rs

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,11 @@ impl<T, I: Iterator<Item = T>> Iterator for OneOrIter<T, I> {
139139
}
140140
}
141141

142-
pub(crate) fn map_primitive_json_to_tantivy(value: JsonValue) -> Option<TantivyValue> {
142+
/// Similar to the native `From<JsonValue> for TantivyValue` implementation, with a
143+
/// subtle difference: no automatic parsing to DateTime is performed when the string
144+
/// is a valid RFC3339 date. This enables some level of range querying through prefix
145+
/// queries despite concatenate fields not supporting fast fields.
146+
pub(crate) fn map_primitive_json_to_concatenate_value(value: JsonValue) -> Option<TantivyValue> {
143147
match value {
144148
JsonValue::Array(_) | JsonValue::Object(_) | JsonValue::Null => None,
145149
JsonValue::String(text) => Some(TantivyValue::Str(text)),
@@ -260,7 +264,7 @@ impl LeafType {
260264
}
261265
}
262266

263-
fn tantivy_value_from_json(
267+
fn concatenate_values_from_json(
264268
&self,
265269
json_val: JsonValue,
266270
) -> Result<impl Iterator<Item = TantivyValue>, String> {
@@ -302,7 +306,7 @@ impl LeafType {
302306
json_obj
303307
.into_iter()
304308
.flat_map(|(_key, val)| JsonValueIterator::new(val))
305-
.flat_map(map_primitive_json_to_tantivy),
309+
.flat_map(map_primitive_json_to_concatenate_value),
306310
))
307311
} else {
308312
Err(format!("expected object, got `{json_val}`"))
@@ -388,7 +392,7 @@ impl MappingLeaf {
388392
if !self.concatenate.is_empty() {
389393
let concat_values = self
390394
.typ
391-
.tantivy_value_from_json(el_json_val.clone())
395+
.concatenate_values_from_json(el_json_val.clone())
392396
.map_err(|err_msg| DocParsingError::ValueError(path.join("."), err_msg))?;
393397
for concat_value in concat_values {
394398
for field in &self.concatenate {
@@ -408,7 +412,7 @@ impl MappingLeaf {
408412
if !self.concatenate.is_empty() {
409413
let concat_values = self
410414
.typ
411-
.tantivy_value_from_json(json_val.clone())
415+
.concatenate_values_from_json(json_val.clone())
412416
.map_err(|err_msg| DocParsingError::ValueError(path.join("."), err_msg))?;
413417
for concat_value in concat_values {
414418
for field in &self.concatenate {

quickwit/rest-api-tests/scenarii/concat_fields/0001_concat_field.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,3 +163,11 @@ params:
163163
expected:
164164
num_hits: 1
165165
---
166+
endpoint: concat/search
167+
params:
168+
# concat date values are stored as strings to enable some level of range
169+
# querying even though they don't support fast fields
170+
query: "concat_raw:\"2024-01-01\"*"
171+
expected:
172+
num_hits: 1
173+
---

quickwit/rest-api-tests/scenarii/concat_fields/_setup.quickwit.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ ndjson:
6767
- {"other-field": "otherfieldvalue", "other-field-number": 9, "other-field-bool": false}
6868
- {"json": {"some_bool": false, "some_int": 10, "nested": {"some_string": "nestedstring"}}}
6969
- {"float": 1.5}
70-
- {"json": {"val:": 2.5}}
70+
- {"json": {"val:": 2.5, "date": "2024-01-01T00:13:00Z"}}
7171
- {"other": 3.5}
7272
# too big to be a i64, parsed as a u64
7373
- {"big": 9223372036854775808}

0 commit comments

Comments
 (0)