diff --git a/nidx/nidx_json/src/lib.rs b/nidx/nidx_json/src/lib.rs index b50b95f116..0d7ce51832 100644 --- a/nidx/nidx_json/src/lib.rs +++ b/nidx/nidx_json/src/lib.rs @@ -23,6 +23,8 @@ mod resource_indexer; mod schema; pub mod search; +pub use tantivy::DateTime; + use std::collections::HashSet; use std::path::Path; diff --git a/nidx/nidx_json/src/resource_indexer.rs b/nidx/nidx_json/src/resource_indexer.rs index 7876594cea..60df4220d7 100644 --- a/nidx/nidx_json/src/resource_indexer.rs +++ b/nidx/nidx_json/src/resource_indexer.rs @@ -41,13 +41,11 @@ pub fn index_json_fields( let resource_uuid = Uuid::parse_str(resource_id)?; let encoded = encode_rid(resource_uuid); - let json_field_type = schema.schema.get_field_entry(schema.json).field_type().clone(); - // Build a single nested object per resource: { "field_id": } let mut nested: Vec<(String, OwnedValue)> = Vec::with_capacity(resource.json_fields.len()); for (field_key, json_info) in resource.json_fields.iter() { let parsed: serde_json::Value = serde_json::from_str(&json_info.value)?; - let owned = json_field_type.value_from_json(parsed)?; + let owned = OwnedValue::from(parsed); nested.push((field_key.clone(), owned)); } diff --git a/nidx/nidx_json/src/search.rs b/nidx/nidx_json/src/search.rs index 82a7fb1397..4fef3f5e9a 100644 --- a/nidx/nidx_json/src/search.rs +++ b/nidx/nidx_json/src/search.rs @@ -35,8 +35,18 @@ pub struct JsonPathFilter { pub enum JsonPredicate { Text(String), - IntRange { lower: Option, upper: Option }, - FloatRange { lower: Option, upper: Option }, + IntRange { + lower: Option, + upper: Option, + }, + FloatRange { + lower: Option, + upper: Option, + }, + DateRange { + lower: Option, + upper: Option, + }, Boolean(bool), } @@ -63,9 +73,13 @@ fn build_leaf_query(filter: &JsonPathFilter, json_field: Field) -> Box { - let mut term = Term::from_field_json_path(json_field, &path, false); + // Use the fast field to do exact match + let mut term = Term::from_field_json_path(json_field, &path, true); term.append_type_and_str(val); - Box::new(TermQuery::new(term, IndexRecordOption::Basic)) + Box::new(FastFieldRangeQuery::new( + Bound::Included(term.clone()), + Bound::Included(term), + )) } JsonPredicate::IntRange { lower, upper } => { @@ -101,6 +115,20 @@ fn build_leaf_query(filter: &JsonPathFilter, json_field: Field) -> Box { + let build_bound = |opt: &Option| -> Bound { + match opt { + None => Bound::Unbounded, + Some(v) => { + let mut term = Term::from_field_json_path(json_field, &path, false); + term.append_type_and_fast_value(*v); + Bound::Included(term) + } + } + }; + Box::new(FastFieldRangeQuery::new(build_bound(lower), build_bound(upper))) + } } } pub(crate) fn build_tantivy_query(expr: &JsonFilterExpression, json_field: Field) -> Box { @@ -247,11 +275,18 @@ mod tests { #[test] fn test_exact_match() { let (svc, apple, _banana, _cherry) = build_test_index(); + // Full stored value matches. let results = search( &svc, - path("t/product", "name", JsonPredicate::Text("apple".to_string())), + path("t/product", "name", JsonPredicate::Text("red apple".to_string())), ); assert!(results.contains(&apple)); + // Partial token no longer matches (fast-field exact match, not token lookup). + let results = search( + &svc, + path("t/product", "name", JsonPredicate::Text("apple".to_string())), + ); + assert!(!results.contains(&apple)); } #[test] @@ -382,4 +417,148 @@ mod tests { assert!(results.contains(&banana)); assert!(!results.contains(&cherry)); } + + fn build_date_index() -> (JsonReaderService, Uuid, Uuid, Uuid) { + let schema = JsonSchema::new(); + let index = Index::create_in_ram(schema.schema.clone()); + let mut writer: IndexWriter = index.writer(15_000_000).expect("writer failed"); + + let old_id = Uuid::parse_str("00000000000000000000000000000011").unwrap(); + let mid_id = Uuid::parse_str("00000000000000000000000000000012").unwrap(); + let new_id = Uuid::parse_str("00000000000000000000000000000013").unwrap(); + + let dt = |secs: i64| OwnedValue::Date(tantivy::DateTime::from_timestamp_secs(secs)); + + // old: 2020-01-01 00:00:00 UTC (1577836800) + add_doc( + &mut writer, + &schema, + old_id, + "t/event", + vec![("ts".to_string(), dt(1577836800))], + ); + // mid: 2022-06-15 00:00:00 UTC (1655251200) + add_doc( + &mut writer, + &schema, + mid_id, + "t/event", + vec![("ts".to_string(), dt(1655251200))], + ); + // new: 2024-01-01 00:00:00 UTC (1704067200) + add_doc( + &mut writer, + &schema, + new_id, + "t/event", + vec![("ts".to_string(), dt(1704067200))], + ); + + writer.commit().expect("commit failed"); + let reader = index + .reader_builder() + .reload_policy(tantivy::ReloadPolicy::Manual) + .try_into() + .expect("reader failed"); + (JsonReaderService { index, schema, reader }, old_id, mid_id, new_id) + } + + #[test] + fn test_exact_match_text_field() { + // Verifies that JsonPredicate::Text does a true exact match against the + // fast-field (columnar) value — no tokenization, case-sensitive. + let schema = JsonSchema::new(); + let index = Index::create_in_ram(schema.schema.clone()); + let mut writer: IndexWriter = index.writer(15_000_000).expect("writer failed"); + + let id = Uuid::parse_str("00000000000000000000000000000099").unwrap(); + add_doc( + &mut writer, + &schema, + id, + "k/product", + vec![("color".to_string(), OwnedValue::Str("Red Apple".to_string()))], + ); + writer.commit().expect("commit failed"); + let reader = index + .reader_builder() + .reload_policy(tantivy::ReloadPolicy::Manual) + .try_into() + .expect("reader failed"); + let svc = JsonReaderService { index, schema, reader }; + + // Exact full value matches. + let results = search( + &svc, + path("k/product", "color", JsonPredicate::Text("Red Apple".to_string())), + ); + assert!(results.contains(&id), "exact full value should match"); + + // Partial token does NOT match. + let results = search(&svc, path("k/product", "color", JsonPredicate::Text("red".to_string()))); + assert!(!results.contains(&id), "partial/lowercased token should not match"); + + // Wrong case does NOT match (fast-field match is case-sensitive). + let results = search( + &svc, + path("k/product", "color", JsonPredicate::Text("red apple".to_string())), + ); + assert!(!results.contains(&id), "wrong case should not match"); + } + + #[test] + fn test_date_range_bounded() { + let (svc, _old, mid, _new) = build_date_index(); + // [2021-01-01 .. 2023-01-01] + let results = search( + &svc, + path( + "t/event", + "ts", + JsonPredicate::DateRange { + lower: Some(tantivy::DateTime::from_timestamp_secs(1609459200)), // 2021 + upper: Some(tantivy::DateTime::from_timestamp_secs(1672531200)), // 2023 + }, + ), + ); + assert_eq!(results, HashSet::from([mid])); + } + + #[test] + fn test_date_range_unbounded_upper() { + let (svc, _old, mid, new) = build_date_index(); + // [2022-01-01 .. ] + let results = search( + &svc, + path( + "t/event", + "ts", + JsonPredicate::DateRange { + lower: Some(tantivy::DateTime::from_timestamp_secs(1640995200)), // 2022 + upper: None, + }, + ), + ); + assert!(results.contains(&mid)); + assert!(results.contains(&new)); + assert!(!results.contains(&_old)); + } + + #[test] + fn test_date_range_unbounded_lower() { + let (svc, old, _mid, _new) = build_date_index(); + // [ .. 2021-01-01] + let results = search( + &svc, + path( + "t/event", + "ts", + JsonPredicate::DateRange { + lower: None, + upper: Some(tantivy::DateTime::from_timestamp_secs(1609459200)), // 2021 + }, + ), + ); + assert_eq!(results, HashSet::from([old])); + } } diff --git a/nidx/nidx_protos/nodereader.proto b/nidx/nidx_protos/nodereader.proto index 5753fa8f2e..902d4eda6f 100644 --- a/nidx/nidx_protos/nodereader.proto +++ b/nidx/nidx_protos/nodereader.proto @@ -339,11 +339,17 @@ message JsonFieldPathFilter { optional double upper = 2; } + message DateRangePredicate { + optional google.protobuf.Timestamp lower = 1; + optional google.protobuf.Timestamp upper = 2; + } + oneof predicate { string text = 3; IntegerRangePredicate int_range = 4; FloatRangePredicate float_range = 5; bool boolean = 6; + DateRangePredicate date_range = 7; } } diff --git a/nidx/src/searcher/query_planner.rs b/nidx/src/searcher/query_planner.rs index 28ad547df5..adf4a284e7 100644 --- a/nidx/src/searcher/query_planner.rs +++ b/nidx/src/searcher/query_planner.rs @@ -249,6 +249,14 @@ fn proto_to_json_filter(expr: &nidx_protos::JsonFilterExpression) -> anyhow::Res upper: r.upper, }, Predicate::Boolean(b) => JsonPredicate::Boolean(*b), + Predicate::DateRange(r) => { + let ts_to_dt = + |ts: &nidx_protos::prost_types::Timestamp| nidx_json::DateTime::from_timestamp_secs(ts.seconds); + JsonPredicate::DateRange { + lower: r.lower.as_ref().map(ts_to_dt), + upper: r.upper.as_ref().map(ts_to_dt), + } + } }; Ok(JsonFilterExpression::Path(JsonPathFilter { field_id: path_filter.field_id.clone(), diff --git a/nucliadb/src/nucliadb/common/filter_expression.py b/nucliadb/src/nucliadb/common/filter_expression.py index ce87933906..e707292ec0 100644 --- a/nucliadb/src/nucliadb/common/filter_expression.py +++ b/nucliadb/src/nucliadb/common/filter_expression.py @@ -39,6 +39,7 @@ Keyword, Kind, KVBoolMatch, + KVDateRange, KVExactMatch, KVFilterExpression, KVRange, @@ -180,6 +181,16 @@ def parse_kv_filter_expression( ) path.boolean = expr.value return nodereader_pb2.JsonFilterExpression(path=path) + elif isinstance(expr, KVDateRange): + path = nodereader_pb2.JsonFieldPathFilter( + field_id=f"k/{expr.field_id}", + json_path=expr.key, + ) + if expr.gte is not None: + path.date_range.lower.FromDatetime(expr.gte) + if expr.lte is not None: + path.date_range.upper.FromDatetime(expr.lte) + return nodereader_pb2.JsonFilterExpression(path=path) else: assert_never(expr) @@ -206,7 +217,7 @@ def _parse_kv_filter_expression( result = nodereader_pb2.JsonFilterExpression() result.bool_not.CopyFrom(_parse_kv_filter_expression(expr.operand, all_schemas, kbid)) return result - elif isinstance(expr, (KVExactMatch, KVRange, KVBoolMatch)): + elif isinstance(expr, (KVExactMatch, KVRange, KVBoolMatch, KVDateRange)): schema = all_schemas.schemas.get(expr.field_id) if schema is None: raise InvalidQueryError("key_value", f"Unknown key-value schema: '{expr.field_id}'") @@ -235,6 +246,12 @@ def _parse_kv_filter_expression( f"Key '{expr.key}' in schema '{expr.field_id}' is of type '{schema_field.type}', " f"but 'bool_match' requires type 'boolean'", ) + elif isinstance(expr, KVDateRange) and schema_field.type != "date": + raise InvalidQueryError( + "key_value", + f"Key '{expr.key}' in schema '{expr.field_id}' is of type '{schema_field.type}', " + f"but 'date_range' requires type 'date'", + ) return parse_kv_filter_expression(expr) else: assert_never(expr) diff --git a/nucliadb/src/nucliadb/ingest/fields/key_value.py b/nucliadb/src/nucliadb/ingest/fields/key_value.py index 3b26b4985f..89abc0fe9a 100644 --- a/nucliadb/src/nucliadb/ingest/fields/key_value.py +++ b/nucliadb/src/nucliadb/ingest/fields/key_value.py @@ -19,6 +19,8 @@ # from __future__ import annotations +from datetime import datetime + from typing_extensions import assert_never from nucliadb.ingest.fields.base import Field @@ -57,16 +59,41 @@ def _validate_keys(data: dict, schema: KVSchema) -> None: def check_kv_type(schema_name: str, key: str, value: object, expected: KVFieldType) -> None: ok = False if expected is KVFieldType.TEXT: - ok = isinstance(value, str) + if isinstance(value, str): + try: + dt = datetime.fromisoformat(value) + # Tantivy's JSON indexer auto-parses strings as DateTime only when + # they parse as RFC 3339, which requires both a time component and a + # timezone offset (Z or ±HH:MM). + ok = dt.tzinfo is None + except ValueError: + ok = True # not parseable as a date at all, safe + else: + ok = False elif expected is KVFieldType.INTEGER: ok = isinstance(value, int) and not isinstance(value, bool) elif expected is KVFieldType.FLOAT: ok = isinstance(value, (int, float)) and not isinstance(value, bool) elif expected is KVFieldType.BOOLEAN: ok = isinstance(value, bool) + elif expected is KVFieldType.DATE: + # Dates must be stored as ISO-8601 strings (e.g. "2024-01-15T00:00:00Z") + if isinstance(value, str): + try: + datetime.fromisoformat(value) + ok = True + except ValueError: + ok = False + else: + ok = False else: assert_never(expected) if not ok: + if expected is KVFieldType.TEXT and isinstance(value, str): + raise ValueError( + f"Key {key!r} in schema {schema_name!r} expects type 'text', but the value looks like " + f"a date. Use a 'date' field type for date values." + ) raise ValueError( f"Key {key!r} in schema {schema_name!r} expects type {expected.value!r}, got {type(value).__name__}" ) diff --git a/nucliadb/tests/nucliadb/integration/test_key_value_fields.py b/nucliadb/tests/nucliadb/integration/test_key_value_fields.py index 20ea0729c4..dfe5236c7f 100644 --- a/nucliadb/tests/nucliadb/integration/test_key_value_fields.py +++ b/nucliadb/tests/nucliadb/integration/test_key_value_fields.py @@ -28,6 +28,7 @@ {"key": "price", "type": "float", "required": True}, {"key": "in_stock", "type": "boolean", "required": False}, {"key": "quantity", "type": "integer", "required": False}, + {"key": "launched_at", "type": "date", "required": False}, ], } @@ -38,6 +39,7 @@ "price": 12.5, "in_stock": True, "quantity": 3, + "launched_at": "2024-01-15T00:00:00Z", }, } @@ -84,6 +86,7 @@ async def test_kv_field_crud( assert value["price"] == 12.5 assert value["in_stock"] is True assert value["quantity"] == 3 + assert value["launched_at"] == "2024-01-15T00:00:00Z" # --- Update resource via PATCH --- resp = await nucliadb_writer.patch( @@ -118,6 +121,7 @@ async def test_kv_field_crud( assert value["price"] == 12.5 assert value["in_stock"] is True assert value["quantity"] == 3 + assert value["launched_at"] == "2024-01-15T00:00:00Z" # --- Update field: only required keys; optional keys disappear --- resp = await nucliadb_writer.put( @@ -205,6 +209,23 @@ async def test_kv_field_validation( ) assert resp.status_code == 422, resp.text + # Wrong type: launched_at should be ISO date string, not an integer timestamp + resp = await nucliadb_writer.put( + f"{base_url}/product", + json={"schema_id": "product", "data": {"color": "red", "price": 1.0, "launched_at": 1234567890}}, + ) + assert resp.status_code == 422, resp.text + + # Wrong type: launched_at must be a valid ISO string, not free text + resp = await nucliadb_writer.put( + f"{base_url}/product", + json={ + "schema_id": "product", + "data": {"color": "red", "price": 1.0, "launched_at": "not-a-date"}, + }, + ) + assert resp.status_code == 422, resp.text + # Field name in URL must match schema_id in body resp = await nucliadb_writer.put( f"{base_url}/product", @@ -243,6 +264,7 @@ async def test_kv_field_filter( "price": 12.5, "in_stock": True, "quantity": 3, + "launched_at": "2023-06-01T00:00:00Z", }, } }, @@ -265,6 +287,7 @@ async def test_kv_field_filter( "price": 5.0, "in_stock": False, "quantity": 10, + "launched_at": "2024-06-01T00:00:00Z", }, } }, @@ -379,6 +402,34 @@ async def find_with_filter(filter_expression: dict) -> set: assert rid1 in rids, f"Expected rid1 in results for color=red AND in_stock=True, got {rids}" assert rid2 not in rids, f"Expected rid2 NOT in results for color=red AND in_stock=True, got {rids}" + # --- Date range: launched_at >= 2024-01-01 → finds resource 2 only --- + rids = await find_with_filter( + { + "key_value": { + "op": "date_range", + "field_id": "product", + "key": "launched_at", + "gte": "2024-01-01T00:00:00Z", + } + } + ) + assert rid2 in rids, f"Expected rid2 in results for launched_at>=2024, got {rids}" + assert rid1 not in rids, f"Expected rid1 NOT in results for launched_at>=2024, got {rids}" + + # --- Date range: launched_at <= 2023-12-31 → finds resource 1 only --- + rids = await find_with_filter( + { + "key_value": { + "op": "date_range", + "field_id": "product", + "key": "launched_at", + "lte": "2023-12-31T23:59:59Z", + } + } + ) + assert rid1 in rids, f"Expected rid1 in results for launched_at<=2023, got {rids}" + assert rid2 not in rids, f"Expected rid2 NOT in results for launched_at<=2023, got {rids}" + @pytest.mark.deploy_modes("standalone") async def test_kv_filter_schema_validation( @@ -459,7 +510,7 @@ async def find_with_filter(filter_expression: dict) -> int: ) assert status == 412, f"Expected 412 for bool_match on text field, got {status}" - # --- Wrong predicate type: range on a text field → 422 --- + # --- Wrong predicate type: range on a text field → 412 --- status = await find_with_filter( { "key_value": { @@ -471,3 +522,16 @@ async def find_with_filter(filter_expression: dict) -> int: } ) assert status == 412, f"Expected 412 for range on text field, got {status}" + + # --- Wrong predicate type: date_range on a text field → 412 --- + status = await find_with_filter( + { + "key_value": { + "op": "date_range", + "field_id": "product", + "key": "color", + "gte": "2024-01-01T00:00:00Z", + } + } + ) + assert status == 412, f"Expected 412 for date_range on text field, got {status}" diff --git a/nucliadb/tests/nucliadb/integration/test_kv_schemas.py b/nucliadb/tests/nucliadb/integration/test_kv_schemas.py index e65ba70a82..b623cb6db9 100644 --- a/nucliadb/tests/nucliadb/integration/test_kv_schemas.py +++ b/nucliadb/tests/nucliadb/integration/test_kv_schemas.py @@ -30,6 +30,7 @@ {"key": "price", "type": "float", "description": "Product price", "required": True}, {"key": "in_stock", "type": "boolean", "required": False}, {"key": "quantity", "type": "integer", "required": False}, + {"key": "launched_at", "type": "date", "required": False}, ], } @@ -70,7 +71,7 @@ async def test_kv_schema_create( data = resp.json() assert data["name"] == "product" assert data["description"] == "A product schema" - assert len(data["fields"]) == 4 + assert len(data["fields"]) == 5 # Read it back resp = await nucliadb_reader.get(f"/kb/{kbid}/kv-schemas/product") @@ -82,6 +83,9 @@ async def test_kv_schema_create( assert data["fields"][0]["required"] is True assert data["fields"][2]["key"] == "in_stock" assert data["fields"][2]["required"] is False + assert data["fields"][4]["key"] == "launched_at" + assert data["fields"][4]["type"] == "date" + assert data["fields"][4]["required"] is False @pytest.mark.deploy_modes("standalone") @@ -180,7 +184,7 @@ async def test_kv_schema_update( data = resp.json() assert data["description"] == "Updated description" # Fields unchanged - assert len(data["fields"]) == 4 + assert len(data["fields"]) == 5 # Update fields only new_fields = [ diff --git a/nucliadb/tests/nucliadb/unit/test_kv_schemas.py b/nucliadb/tests/nucliadb/unit/test_kv_schemas.py index e7f235c15b..49d1bbb20a 100644 --- a/nucliadb/tests/nucliadb/unit/test_kv_schemas.py +++ b/nucliadb/tests/nucliadb/unit/test_kv_schemas.py @@ -33,6 +33,15 @@ ], ) +DATE_SCHEMA = KVSchema( + name="event", + fields=[ + KVSchemaField(key="name", type=KVFieldType.TEXT, required=True), + KVSchemaField(key="ts", type=KVFieldType.DATE, required=True), + KVSchemaField(key="end_ts", type=KVFieldType.DATE, required=False), + ], +) + class TestValidateKvData: def test_valid_data_all_fields(self): @@ -57,6 +66,23 @@ def test_wrong_type_text_field(self): with pytest.raises(ValueError, match="expects type 'text'"): validate_kv_data({"color": 123, "price": 1.0}, SCHEMA) + def test_date_string_rejected_in_text_field(self): + # Rejected: Tantivy would auto-detect these as DateTime (RFC 3339 with timezone). + with pytest.raises(ValueError, match="looks like a date"): + validate_kv_data({"color": "2024-01-15T00:00:00Z", "price": 1.0}, SCHEMA) + + def test_date_string_with_offset_rejected_in_text_field(self): + with pytest.raises(ValueError, match="looks like a date"): + validate_kv_data({"color": "2024-01-15T00:00:00+02:00", "price": 1.0}, SCHEMA) + + def test_date_only_string_allowed_in_text_field(self): + # Allowed: Tantivy keeps date-only strings as Str (not RFC 3339). + validate_kv_data({"color": "2024-01-15", "price": 1.0}, SCHEMA) + + def test_naive_datetime_string_allowed_in_text_field(self): + # Allowed: no timezone → Tantivy keeps as Str (not RFC 3339). + validate_kv_data({"color": "2024-01-15T00:00:00", "price": 1.0}, SCHEMA) + def test_wrong_type_float_field(self): with pytest.raises(ValueError, match="expects type 'float'"): validate_kv_data({"color": "red", "price": "not-a-number"}, SCHEMA) @@ -85,6 +111,32 @@ def test_integer_accepted_as_float(self): # Integers are acceptable for float fields validate_kv_data({"color": "red", "price": 10}, SCHEMA) + def test_valid_date_iso_datetime_string(self): + validate_kv_data({"name": "launch", "ts": "2024-01-15T00:00:00Z"}, DATE_SCHEMA) + + def test_valid_date_iso_date_only_string(self): + # Date-only ISO strings are valid (no time component) + validate_kv_data({"name": "launch", "ts": "2024-01-15"}, DATE_SCHEMA) + + def test_valid_date_with_optional_end(self): + validate_kv_data( + {"name": "launch", "ts": "2024-01-15T00:00:00Z", "end_ts": "2024-01-20T00:00:00Z"}, + DATE_SCHEMA, + ) + + def test_invalid_date_string_rejected(self): + with pytest.raises(ValueError, match="expects type 'date'"): + validate_kv_data({"name": "launch", "ts": "not-a-date"}, DATE_SCHEMA) + + def test_non_string_rejected_for_date_field(self): + # Timestamps as integers are not accepted; must be ISO strings + with pytest.raises(ValueError, match="expects type 'date'"): + validate_kv_data({"name": "launch", "ts": 1234567890}, DATE_SCHEMA) + + def test_bool_not_accepted_as_date(self): + with pytest.raises(ValueError, match="expects type 'date'"): + validate_kv_data({"name": "launch", "ts": True}, DATE_SCHEMA) + class TestKVSchemaModel: def test_max_fields_limit(self): diff --git a/nucliadb_models/src/nucliadb_models/filters.py b/nucliadb_models/src/nucliadb_models/filters.py index 1e1a557a88..804804ddf1 100644 --- a/nucliadb_models/src/nucliadb_models/filters.py +++ b/nucliadb_models/src/nucliadb_models/filters.py @@ -342,6 +342,28 @@ class KVBoolMatch(BaseModel, extra="forbid"): value: bool = pydantic.Field(description="The boolean value to match") +class KVDateRange(BaseModel, extra="forbid"): + """Matches a key-value field where the given date key falls within a date/time range""" + + op: Literal["date_range"] = "date_range" + field_id: str = pydantic.Field(description="The KV field/schema name, e.g. 'event'") + key: str = pydantic.Field(description="The key within the KV data, e.g. 'ts'") + gte: DateTime | None = pydantic.Field( + default=None, description="Greater than or equal to (inclusive lower bound)" + ) + lte: DateTime | None = pydantic.Field( + default=None, description="Less than or equal to (inclusive upper bound)" + ) + + @model_validator(mode="after") + def check_bounds(self) -> "KVDateRange": + if self.gte is None and self.lte is None: + raise ValueError("KVDateRange requires at least one bound (gte or lte)") + if self.gte is not None and self.lte is not None and self.lte < self.gte: + raise ValueError(f"KVDateRange lte ({self.lte}) must be >= gte ({self.gte})") + return self + + def kv_discriminator(v: Any) -> str | None: if isinstance(v, dict): if "and" in v: @@ -369,7 +391,8 @@ def kv_discriminator(v: Any) -> str | None: | Annotated[Not["KVFilterExpression"], Tag("not")] | Annotated[KVExactMatch, Tag("exact_match")] | Annotated[KVRange, Tag("range")] - | Annotated[KVBoolMatch, Tag("bool_match")], + | Annotated[KVBoolMatch, Tag("bool_match")] + | Annotated[KVDateRange, Tag("date_range")], Discriminator(kv_discriminator), ] diff --git a/nucliadb_models/src/nucliadb_models/kv_schemas.py b/nucliadb_models/src/nucliadb_models/kv_schemas.py index 34a1c88c31..012074aaa5 100644 --- a/nucliadb_models/src/nucliadb_models/kv_schemas.py +++ b/nucliadb_models/src/nucliadb_models/kv_schemas.py @@ -31,12 +31,13 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . # +from datetime import datetime from enum import Enum from pydantic import BaseModel, Field, model_validator # Type alias for valid KV field values -KVValue = str | int | float | bool +KVValue = str | int | float | bool | datetime MAX_KV_SCHEMAS = 20 MAX_KV_SCHEMA_FIELDS = 50 @@ -47,6 +48,7 @@ class KVFieldType(str, Enum): INTEGER = "integer" FLOAT = "float" BOOLEAN = "boolean" + DATE = "date" class KVSchemaField(BaseModel):