From e0aac184ebfac8be14bc249354727319c6d07099 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 28 Aug 2025 18:54:24 +0000 Subject: [PATCH 1/6] feat: add robust datetime parsing fallback to DatetimeBasedCursor - Add ab_datetime_try_parse as fallback when expected formats fail - Maintain backward compatibility by trying expected formats first - Update schema documentation to reflect new fallback behavior - Update test to use truly unparseable string for error case - Preserve original cursor value format in state storage - Eliminate parsing errors for ISO8601/RFC3339 compliant datetimes Co-Authored-By: AJ Steers --- .../sources/declarative/declarative_component_schema.yaml | 4 +++- .../declarative/incremental/datetime_based_cursor.py | 6 ++++++ .../declarative/incremental/test_datetime_based_cursor.py | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml index e7f8d0793..63cb1b1bd 100644 --- a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml +++ b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml @@ -944,7 +944,9 @@ definitions: items: type: string description: | - The possible formats for the cursor field, in order of preference. The first format that matches the cursor field value will be used to parse it. If not provided, the Outgoing Datetime Format will be used. + The possible formats for the cursor field, in order of preference. The first format that matches the cursor field value will be used to parse it. + If none of the specified formats match, the system will attempt to parse the value using robust datetime parsing that handles most ISO8601/RFC3339 compliant formats. + If not provided, the Outgoing Datetime Format will be used as the first attempt. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available: * **%s**: Epoch unix timestamp - `1686218963` * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456` diff --git a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py index 4eadf68e1..83dbb6148 100644 --- a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +++ b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py @@ -21,6 +21,7 @@ ) from airbyte_cdk.sources.message import MessageRepository from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState +from airbyte_cdk.utils.datetime_helpers import ab_datetime_format, ab_datetime_try_parse from airbyte_cdk.utils.mapping_helpers import _validate_component_request_option_paths @@ -313,6 +314,11 @@ def parse_date(self, date: str) -> datetime.datetime: return self._parser.parse(date, datetime_format) except ValueError: pass + + parsed_dt = ab_datetime_try_parse(date) + if parsed_dt: + return parsed_dt + raise ValueError(f"No format in {self.cursor_datetime_formats} matching {date}") @classmethod diff --git a/unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py b/unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py index b4f990ee7..344650bd0 100644 --- a/unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py +++ b/unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py @@ -1021,7 +1021,7 @@ def test_given_unknown_format_when_parse_date_then_raise_error(): parameters={}, ) with pytest.raises(ValueError): - slicer.parse_date("2021-01-01T00:00:00.000000+0000") + slicer.parse_date("not-a-valid-datetime-string") @pytest.mark.parametrize( From b08ccc18c10e0ad6f3001a2b5962409b0cc68d6f Mon Sep 17 00:00:00 2001 From: "Aaron (\"AJ\") Steers" Date: Thu, 28 Aug 2025 12:01:33 -0700 Subject: [PATCH 2/6] Apply suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .../sources/declarative/incremental/datetime_based_cursor.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py index 83dbb6148..dc5a43343 100644 --- a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +++ b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py @@ -315,8 +315,7 @@ def parse_date(self, date: str) -> datetime.datetime: except ValueError: pass - parsed_dt = ab_datetime_try_parse(date) - if parsed_dt: + if parsed_dt is not None: return parsed_dt raise ValueError(f"No format in {self.cursor_datetime_formats} matching {date}") From 876880e39ff253670388c792ab1f998bb6c14a99 Mon Sep 17 00:00:00 2001 From: "Aaron (\"AJ\") Steers" Date: Thu, 28 Aug 2025 12:02:45 -0700 Subject: [PATCH 3/6] Apply suggestion from @aaronsteers --- .../sources/declarative/incremental/datetime_based_cursor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py index dc5a43343..4f07cf18b 100644 --- a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +++ b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py @@ -315,6 +315,7 @@ def parse_date(self, date: str) -> datetime.datetime: except ValueError: pass + parsed_dt = ab_datetime_try_parse(date) if parsed_dt is not None: return parsed_dt From b2b8788663397563cdcce8debf40c3448ab84b6c Mon Sep 17 00:00:00 2001 From: "Aaron (\"AJ\") Steers" Date: Thu, 28 Aug 2025 12:04:49 -0700 Subject: [PATCH 4/6] Apply suggestion from @aaronsteers --- .../sources/declarative/incremental/datetime_based_cursor.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py index 4f07cf18b..c43dfefe2 100644 --- a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +++ b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py @@ -315,6 +315,9 @@ def parse_date(self, date: str) -> datetime.datetime: except ValueError: pass + # If we have not parsed by now, use the robust parser which handles + # all common formats, including all formats supported by ISO8601 + # and RFC3306. parsed_dt = ab_datetime_try_parse(date) if parsed_dt is not None: return parsed_dt From 9b2d88bbfb076e499148eba2f93eb28052273056 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 28 Aug 2025 19:06:17 +0000 Subject: [PATCH 5/6] docs: add explanatory comment for robust datetime parsing fallback - Add comment explaining that ab_datetime_try_parse handles ISO8601/RFC3339 formats - Addresses PR feedback from @aaronsteers requesting documentation Co-Authored-By: AJ Steers --- .../sources/declarative/incremental/datetime_based_cursor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py index c43dfefe2..031e9121f 100644 --- a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +++ b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py @@ -317,7 +317,7 @@ def parse_date(self, date: str) -> datetime.datetime: # If we have not parsed by now, use the robust parser which handles # all common formats, including all formats supported by ISO8601 - # and RFC3306. + # and RFC3339. parsed_dt = ab_datetime_try_parse(date) if parsed_dt is not None: return parsed_dt From 81bd744893e0dfb9ca6ca88a4113e417475210e9 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 28 Aug 2025 19:30:39 +0000 Subject: [PATCH 6/6] feat: add MinMaxDatetime robust parsing and UI field hiding - Add robust datetime parsing fallback to MinMaxDatetime using ab_datetime_try_parse - Hide cursor_datetime_formats and datetime_format fields in UI with airbyte_hidden: true - Expand parametrized tests with robust fallback scenarios that now succeed - Maintain backward compatibility while simplifying configuration - Keep datetime_format for API output formatting, decouple from cursor storage Co-Authored-By: AJ Steers --- .../declarative/datetime/min_max_datetime.py | 25 +++++++---- .../declarative_component_schema.yaml | 2 + .../incremental/test_datetime_based_cursor.py | 43 +++++++++++++++++++ 3 files changed, 62 insertions(+), 8 deletions(-) diff --git a/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py b/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py index eb407db44..7d9ac9ad4 100644 --- a/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +++ b/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py @@ -8,6 +8,7 @@ from airbyte_cdk.sources.declarative.datetime.datetime_parser import DatetimeParser from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString +from airbyte_cdk.utils.datetime_helpers import ab_datetime_try_parse @dataclass @@ -65,15 +66,23 @@ def get_datetime( if not datetime_format: datetime_format = "%Y-%m-%dT%H:%M:%S.%f%z" - time = self._parser.parse( - str( - self.datetime.eval( # type: ignore[union-attr] # str has no attribute "eval" - config, - **additional_parameters, + datetime_str = str( + self.datetime.eval( # type: ignore[union-attr] # str has no attribute "eval" + config, + **additional_parameters, + ) + ) + + try: + time = self._parser.parse(datetime_str, datetime_format) + except ValueError: + parsed_dt = ab_datetime_try_parse(datetime_str) + if parsed_dt is not None: + time = parsed_dt + else: + raise ValueError( + f"Unable to parse datetime '{datetime_str}' with format '{datetime_format}' or robust parsing" ) - ), - datetime_format, - ) # type: ignore # datetime is always cast to an interpolated string if self.min_datetime: min_time = str(self.min_datetime.eval(config, **additional_parameters)) # type: ignore # min_datetime is always cast to an interpolated string diff --git a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml index 63cb1b1bd..f7bb0a6c5 100644 --- a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml +++ b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml @@ -943,6 +943,7 @@ definitions: type: array items: type: string + airbyte_hidden: true description: | The possible formats for the cursor field, in order of preference. The first format that matches the cursor field value will be used to parse it. If none of the specified formats match, the system will attempt to parse the value using robust datetime parsing that handles most ISO8601/RFC3339 compliant formats. @@ -2805,6 +2806,7 @@ definitions: - "{{ now_utc().strftime('%Y-%m-%dT%H:%M:%SZ') }}" datetime_format: title: Datetime Format + airbyte_hidden: true description: | Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available: * **%s**: Epoch unix timestamp - `1686218963` diff --git a/unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py b/unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py index 344650bd0..b45b8f40f 100644 --- a/unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py +++ b/unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py @@ -997,6 +997,30 @@ def test_parse_date_legacy_merge_datetime_format_in_cursor_datetime_format( ["%Y-%m-%dT%H:%M:%S.%f%z", "%s"], datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), ), + ( + "test_robust_fallback_z_suffix", + "2021-01-01T00:00:00Z", + ["%Y-%m-%d"], + datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), + ), + ( + "test_robust_fallback_iso_with_colon_tz", + "2021-01-01T00:00:00+00:00", + ["%Y-%m-%d"], + datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), + ), + ( + "test_robust_fallback_date_only", + "2021-01-01", + ["%s"], + datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), + ), + ( + "test_robust_fallback_unix_timestamp_string", + "1609459200", + ["%Y-%m-%d"], + datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), + ), ], ) def test_parse_date(test_name, input_date, date_formats, expected_output_date): @@ -1024,6 +1048,25 @@ def test_given_unknown_format_when_parse_date_then_raise_error(): slicer.parse_date("not-a-valid-datetime-string") +def test_minmax_datetime_robust_fallback(): + from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime + + test_cases = [ + ("2021-01-01T00:00:00Z", "%Y-%m-%d"), + ("2021-01-01T00:00:00+00:00", "%Y-%m-%d"), + ("1609459200", "%Y-%m-%d"), + ] + + for input_date, incompatible_format in test_cases: + min_max_dt = MinMaxDatetime( + datetime=input_date, datetime_format=incompatible_format, parameters={} + ) + result = min_max_dt.get_datetime({}) + assert result.year == 2021 + assert result.month == 1 + assert result.day == 1 + + @pytest.mark.parametrize( "test_name, input_dt, datetimeformat, datetimeformat_granularity, expected_output", [