|
19 | 19 | ) |
20 | 20 |
|
21 | 21 |
|
| 22 | +def parse_date_to_timestamp(value: str) -> int | None: |
| 23 | + """Parse an ISO 8601 date/datetime string to Unix timestamp. |
| 24 | +
|
| 25 | + Supports: |
| 26 | + - Date: '2024-01-01' (interpreted as midnight UTC) |
| 27 | + - Datetime: '2024-01-01T12:00:00' or '2024-01-01 12:00:00' |
| 28 | + - Datetime with timezone: '2024-01-01T12:00:00Z', '2024-01-01T12:00:00+00:00' |
| 29 | +
|
| 30 | + Args: |
| 31 | + value: The string value to parse. |
| 32 | +
|
| 33 | + Returns: |
| 34 | + Unix timestamp as integer, or None if not a valid date string. |
| 35 | + """ |
| 36 | + # Check if it matches date pattern |
| 37 | + if DATE_PATTERN.match(value): |
| 38 | + try: |
| 39 | + dt = datetime.strptime(value, "%Y-%m-%d") |
| 40 | + # Treat as UTC midnight |
| 41 | + dt = dt.replace(tzinfo=timezone.utc) |
| 42 | + return int(dt.timestamp()) |
| 43 | + except ValueError: |
| 44 | + return None |
| 45 | + |
| 46 | + # Check if it matches datetime pattern |
| 47 | + if DATETIME_PATTERN.match(value): |
| 48 | + # Normalize: replace space with T for parsing |
| 49 | + normalized = value.replace(" ", "T") |
| 50 | + |
| 51 | + # Normalize 'Z' (UTC designator) to '+00:00' for fromisoformat |
| 52 | + if normalized.endswith("Z"): |
| 53 | + normalized = normalized[:-1] + "+00:00" |
| 54 | + |
| 55 | + # Normalize timezone offsets without colon (+0000 -> +00:00) |
| 56 | + # This ensures compatibility with datetime.fromisoformat |
| 57 | + normalized = re.sub(r"([+-]\d{2})(\d{2})$", r"\1:\2", normalized) |
| 58 | + |
| 59 | + try: |
| 60 | + # Use fromisoformat for robust parsing (handles fractional seconds) |
| 61 | + dt = datetime.fromisoformat(normalized) |
| 62 | + # If no timezone info, treat as UTC |
| 63 | + if dt.tzinfo is None: |
| 64 | + dt = dt.replace(tzinfo=timezone.utc) |
| 65 | + return int(dt.timestamp()) |
| 66 | + except ValueError: |
| 67 | + return None |
| 68 | + |
| 69 | + return None |
| 70 | + |
| 71 | + |
22 | 72 | @dataclass |
23 | 73 | class AggregationSpec: |
24 | 74 | """Specification for an aggregation function.""" |
@@ -364,7 +414,7 @@ def _process_select_expression_inner( |
364 | 414 | field=field_name, |
365 | 415 | alias=alias or func_name_lower, |
366 | 416 | ) |
367 | | - elif func_name == "geo_distance": |
| 417 | + elif func_name_lower == "geo_distance": |
368 | 418 | # geo_distance(field, POINT(lon, lat), unit) in SELECT |
369 | 419 | self._process_geo_distance_select(expression, result, alias) |
370 | 420 | elif func_name_lower in redis_reducers: |
@@ -493,16 +543,25 @@ def _process_date_function( |
493 | 543 | field_name = None |
494 | 544 | format_string = None |
495 | 545 |
|
496 | | - if expression.expressions: |
497 | | - first_arg = expression.expressions[0] |
| 546 | + args = expression.expressions or [] |
| 547 | + |
| 548 | + if func_name == "DATE_FORMAT": |
| 549 | + # DATE_FORMAT requires exactly 2 arguments: field, format_string |
| 550 | + if len(args) != 2: |
| 551 | + raise ValueError( |
| 552 | + "DATE_FORMAT requires exactly 2 arguments: field, format_string" |
| 553 | + ) |
| 554 | + first_arg, second_arg = args |
| 555 | + if isinstance(first_arg, exp.Column): |
| 556 | + field_name = first_arg.name |
| 557 | + # Format argument must be a literal string |
| 558 | + if not isinstance(second_arg, exp.Literal) or not second_arg.is_string: |
| 559 | + raise ValueError("DATE_FORMAT format argument must be a literal string") |
| 560 | + format_string = second_arg.this |
| 561 | + elif args: |
| 562 | + first_arg = args[0] |
498 | 563 | if isinstance(first_arg, exp.Column): |
499 | 564 | field_name = first_arg.name |
500 | | - |
501 | | - # For DATE_FORMAT, extract the format string as second argument |
502 | | - if func_name == "DATE_FORMAT" and len(expression.expressions) >= 2: |
503 | | - second_arg = expression.expressions[1] |
504 | | - if isinstance(second_arg, exp.Literal): |
505 | | - format_string = second_arg.this |
506 | 565 |
|
507 | 566 | if field_name: |
508 | 567 | # Generate default alias if not provided |
@@ -822,12 +881,15 @@ def _add_function_condition( |
822 | 881 | ) |
823 | 882 | ) |
824 | 883 |
|
825 | | - def _extract_literal_value(self, expression, convert_dates: bool = True): |
| 884 | + def _extract_literal_value(self, expression, convert_dates: bool = False): |
826 | 885 | """Extract a Python value from a sqlglot Literal or Neg expression. |
827 | 886 |
|
828 | 887 | Args: |
829 | 888 | expression: The sqlglot expression to extract from. |
830 | 889 | convert_dates: If True, convert ISO 8601 date strings to Unix timestamps. |
| 890 | + Default is False to avoid changing semantics for TEXT/TAG |
| 891 | + fields. Date conversion should be handled by the translator |
| 892 | + when the field type is known to be NUMERIC. |
831 | 893 |
|
832 | 894 | Returns: |
833 | 895 | The extracted value, or None if not a literal. |
@@ -872,48 +934,6 @@ def _validate_geo_unit(self, unit_val: object) -> str: |
872 | 934 | def _parse_date_to_timestamp(self, value: str) -> int | None: |
873 | 935 | """Parse an ISO 8601 date/datetime string to Unix timestamp. |
874 | 936 |
|
875 | | - Supports: |
876 | | - - Date: '2024-01-01' (interpreted as midnight UTC) |
877 | | - - Datetime: '2024-01-01T12:00:00' or '2024-01-01 12:00:00' |
878 | | - - Datetime with timezone: '2024-01-01T12:00:00Z', '2024-01-01T12:00:00+00:00' |
879 | | -
|
880 | | - Args: |
881 | | - value: The string value to parse. |
882 | | -
|
883 | | - Returns: |
884 | | - Unix timestamp as integer, or None if not a valid date string. |
| 937 | + Delegates to module-level parse_date_to_timestamp function. |
885 | 938 | """ |
886 | | - # Check if it matches date pattern |
887 | | - if DATE_PATTERN.match(value): |
888 | | - try: |
889 | | - dt = datetime.strptime(value, "%Y-%m-%d") |
890 | | - # Treat as UTC midnight |
891 | | - dt = dt.replace(tzinfo=timezone.utc) |
892 | | - return int(dt.timestamp()) |
893 | | - except ValueError: |
894 | | - return None |
895 | | - |
896 | | - # Check if it matches datetime pattern |
897 | | - if DATETIME_PATTERN.match(value): |
898 | | - # Normalize: replace space with T for parsing |
899 | | - normalized = value.replace(" ", "T") |
900 | | - |
901 | | - # Normalize 'Z' (UTC designator) to '+00:00' for fromisoformat |
902 | | - if normalized.endswith("Z"): |
903 | | - normalized = normalized[:-1] + "+00:00" |
904 | | - |
905 | | - # Normalize timezone offsets without colon (+0000 -> +00:00) |
906 | | - # This ensures compatibility with datetime.fromisoformat |
907 | | - normalized = re.sub(r"([+-]\d{2})(\d{2})$", r"\1:\2", normalized) |
908 | | - |
909 | | - try: |
910 | | - # Use fromisoformat for robust parsing (handles fractional seconds) |
911 | | - dt = datetime.fromisoformat(normalized) |
912 | | - # If no timezone info, treat as UTC |
913 | | - if dt.tzinfo is None: |
914 | | - dt = dt.replace(tzinfo=timezone.utc) |
915 | | - return int(dt.timestamp()) |
916 | | - except ValueError: |
917 | | - return None |
918 | | - |
919 | | - return None |
| 939 | + return parse_date_to_timestamp(value) |
0 commit comments