From f7f2420afd07cf1c4cf40289979a91cc58a915f1 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 30 Jan 2026 22:50:39 +0000 Subject: [PATCH 1/8] fix: use flexible datetime parsing for start_date in file-based connectors Replace strict regex pattern with ab_datetime_try_parse validator to accept any valid ISO8601/RFC3339 datetime format. This fixes issues where valid datetime strings like '2025-01-01T00:00:00Z' (without microseconds) were incorrectly rejected. Fixes: airbytehq/oncall#9390 Co-Authored-By: AJ Steers --- .../config/abstract_file_based_spec.py | 23 ++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py b/airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py index d9b67e34c..92c4a650e 100644 --- a/airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +++ b/airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py @@ -7,12 +7,13 @@ from typing import Any, Dict, List, Literal, Optional, Union import dpath -from pydantic.v1 import AnyUrl, BaseModel, Field +from pydantic.v1 import AnyUrl, BaseModel, Field, validator from airbyte_cdk import OneOfOptionConfig from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig from airbyte_cdk.sources.specs.transfer_modes import DeliverPermissions from airbyte_cdk.sources.utils import schema_helpers +from airbyte_cdk.utils.datetime_helpers import ab_datetime_try_parse class DeliverRecords(BaseModel): @@ -55,11 +56,27 @@ class AbstractFileBasedSpec(BaseModel): description="UTC date and time in the format 2017-01-25T00:00:00.000000Z. Any file modified before this date will not be replicated.", examples=["2021-01-01T00:00:00.000000Z"], format="date-time", - pattern="^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]{6}Z$", - pattern_descriptor="YYYY-MM-DDTHH:mm:ss.SSSSSSZ", order=1, ) + @validator("start_date", pre=True) + def validate_start_date(cls, v: Optional[str]) -> Optional[str]: + """Validate that start_date is a parseable datetime string. + + Uses ab_datetime_try_parse which accepts any common ISO8601/RFC3339 format, + including formats with or without microseconds (e.g., both + '2021-01-01T00:00:00Z' and '2021-01-01T00:00:00.000000Z' are valid). + """ + if v is None: + return v + parsed = ab_datetime_try_parse(v) + if parsed is None: + raise ValueError( + f"'{v}' is not a valid datetime string. " + "Please use a format like '2021-01-01T00:00:00Z' or '2021-01-01T00:00:00.000000Z'." + ) + return v + streams: List[FileBasedStreamConfig] = Field( title="The list of streams to sync", description='Each instance of this configuration defines a stream. Use this to define which files belong in the stream, their format, and how they should be parsed and validated. When sending data to warehouse destination such as Snowflake or BigQuery, each stream is a separate table.', From 6886ab2715295bad9b8b85526815da9c00b11286 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 30 Jan 2026 23:00:39 +0000 Subject: [PATCH 2/8] test: update expected spec to remove pattern field for start_date Co-Authored-By: AJ Steers --- unit_tests/sources/file_based/scenarios/csv_scenarios.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/unit_tests/sources/file_based/scenarios/csv_scenarios.py b/unit_tests/sources/file_based/scenarios/csv_scenarios.py index f31585412..b04a4c2bf 100644 --- a/unit_tests/sources/file_based/scenarios/csv_scenarios.py +++ b/unit_tests/sources/file_based/scenarios/csv_scenarios.py @@ -63,8 +63,6 @@ "description": "UTC date and time in the format 2017-01-25T00:00:00.000000Z. Any file modified before this date will not be replicated.", "examples": ["2021-01-01T00:00:00.000000Z"], "format": "date-time", - "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]{6}Z$", - "pattern_descriptor": "YYYY-MM-DDTHH:mm:ss.SSSSSSZ", "order": 1, "type": "string", }, From 198de6da1ac07439afc91e4ad6ee09bc1c3997c3 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 30 Jan 2026 23:32:11 +0000 Subject: [PATCH 3/8] test: add unit tests for start_date validation Co-Authored-By: AJ Steers --- .../config/test_abstract_file_based_spec.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/unit_tests/sources/file_based/config/test_abstract_file_based_spec.py b/unit_tests/sources/file_based/config/test_abstract_file_based_spec.py index ec37567a8..1e74470cb 100644 --- a/unit_tests/sources/file_based/config/test_abstract_file_based_spec.py +++ b/unit_tests/sources/file_based/config/test_abstract_file_based_spec.py @@ -7,7 +7,9 @@ import pytest from jsonschema import ValidationError, validate from pydantic.v1 import BaseModel +from pydantic.v1 import ValidationError as PydanticValidationError +from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec from airbyte_cdk.sources.file_based.config.file_based_stream_config import ( AvroFormat, CsvFormat, @@ -40,3 +42,28 @@ def test_parquet_file_type_is_not_a_valid_csv_file_type( validate(instance=format_config[file_type], schema=file_format.schema()) else: validate(instance=format_config[file_type], schema=file_format.schema()) + + +@pytest.mark.parametrize( + "start_date, should_pass", + [ + pytest.param("2021-01-01T00:00:00.000000Z", True, id="with_microseconds"), + pytest.param("2021-01-01T00:00:00Z", True, id="without_microseconds"), + pytest.param("2021-01-01T00:00:00.000Z", True, id="with_milliseconds"), + pytest.param("2025-01-01T00:00:00Z", True, id="terraform_provider_format"), + pytest.param("2021-01-01T00:00:00+00:00", True, id="with_timezone_offset"), + pytest.param("2021-01-01", True, id="date_only"), + pytest.param(None, True, id="none_value"), + pytest.param("not-a-date", False, id="invalid_string"), + pytest.param("2021/01/01", True, id="slash_separator_also_accepted"), + pytest.param("", False, id="empty_string"), + ], +) +def test_start_date_validation(start_date: str, should_pass: bool) -> None: + """Test that start_date accepts various valid ISO8601/RFC3339 formats.""" + if should_pass: + result = AbstractFileBasedSpec.validate_start_date(start_date) + assert result == start_date + else: + with pytest.raises(ValueError, match="is not a valid datetime string"): + AbstractFileBasedSpec.validate_start_date(start_date) From cd5b9f032f53e8d2055983a8d731b8efdc5358bd Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 30 Jan 2026 23:34:48 +0000 Subject: [PATCH 4/8] style: make validator signature multiline with noqa comment Co-Authored-By: AJ Steers --- .../sources/file_based/config/abstract_file_based_spec.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py b/airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py index 92c4a650e..26891dd02 100644 --- a/airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +++ b/airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py @@ -60,7 +60,10 @@ class AbstractFileBasedSpec(BaseModel): ) @validator("start_date", pre=True) - def validate_start_date(cls, v: Optional[str]) -> Optional[str]: + def validate_start_date( + cls, # noqa: N805 # Pydantic validators use cls, not self + v: Optional[str], + ) -> Optional[str]: """Validate that start_date is a parseable datetime string. Uses ab_datetime_try_parse which accepts any common ISO8601/RFC3339 format, From 85262497d7daa5051489ff7dc8596ad55c5aef82 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 30 Jan 2026 23:35:53 +0000 Subject: [PATCH 5/8] fix: remove unused PydanticValidationError import Co-Authored-By: AJ Steers --- .../sources/file_based/config/test_abstract_file_based_spec.py | 1 - 1 file changed, 1 deletion(-) diff --git a/unit_tests/sources/file_based/config/test_abstract_file_based_spec.py b/unit_tests/sources/file_based/config/test_abstract_file_based_spec.py index 1e74470cb..b12994e22 100644 --- a/unit_tests/sources/file_based/config/test_abstract_file_based_spec.py +++ b/unit_tests/sources/file_based/config/test_abstract_file_based_spec.py @@ -7,7 +7,6 @@ import pytest from jsonschema import ValidationError, validate from pydantic.v1 import BaseModel -from pydantic.v1 import ValidationError as PydanticValidationError from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec from airbyte_cdk.sources.file_based.config.file_based_stream_config import ( From ef9c1dd02d91b79970ded286909972355620bc1b Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 30 Jan 2026 23:37:32 +0000 Subject: [PATCH 6/8] feat: add flexible pattern and multiple examples for start_date field Co-Authored-By: AJ Steers --- .../file_based/config/abstract_file_based_spec.py | 9 ++++++++- unit_tests/sources/file_based/scenarios/csv_scenarios.py | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py b/airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py index 26891dd02..2da16b586 100644 --- a/airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +++ b/airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py @@ -54,8 +54,15 @@ class AbstractFileBasedSpec(BaseModel): start_date: Optional[str] = Field( title="Start Date", description="UTC date and time in the format 2017-01-25T00:00:00.000000Z. Any file modified before this date will not be replicated.", - examples=["2021-01-01T00:00:00.000000Z"], + examples=[ + "2021-01-01T00:00:00.000000Z", + "2021-01-01T00:00:00Z", + "2021-01-01T00:00:00.000Z", + "2021-01-01", + ], format="date-time", + pattern=r"^[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+)?(Z|[+-][0-9]{2}:[0-9]{2})?)?$", + pattern_descriptor="YYYY-MM-DD, YYYY-MM-DDTHH:mm:ssZ, or YYYY-MM-DDTHH:mm:ss.SSSSSSZ", order=1, ) diff --git a/unit_tests/sources/file_based/scenarios/csv_scenarios.py b/unit_tests/sources/file_based/scenarios/csv_scenarios.py index b04a4c2bf..13dce53e1 100644 --- a/unit_tests/sources/file_based/scenarios/csv_scenarios.py +++ b/unit_tests/sources/file_based/scenarios/csv_scenarios.py @@ -61,8 +61,15 @@ "start_date": { "title": "Start Date", "description": "UTC date and time in the format 2017-01-25T00:00:00.000000Z. Any file modified before this date will not be replicated.", - "examples": ["2021-01-01T00:00:00.000000Z"], + "examples": [ + "2021-01-01T00:00:00.000000Z", + "2021-01-01T00:00:00Z", + "2021-01-01T00:00:00.000Z", + "2021-01-01", + ], "format": "date-time", + "pattern": r"^[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+)?(Z|[+-][0-9]{2}:[0-9]{2})?)?$", + "pattern_descriptor": "YYYY-MM-DD, YYYY-MM-DDTHH:mm:ssZ, or YYYY-MM-DDTHH:mm:ss.SSSSSSZ", "order": 1, "type": "string", }, From c80799130bbcad5bd77fffb4d3f34fce9c792290 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 30 Jan 2026 23:39:03 +0000 Subject: [PATCH 7/8] style: reorder examples from shortest to longest Co-Authored-By: AJ Steers --- .../sources/file_based/config/abstract_file_based_spec.py | 4 ++-- unit_tests/sources/file_based/scenarios/csv_scenarios.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py b/airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py index 2da16b586..6c884f3e1 100644 --- a/airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +++ b/airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py @@ -55,10 +55,10 @@ class AbstractFileBasedSpec(BaseModel): title="Start Date", description="UTC date and time in the format 2017-01-25T00:00:00.000000Z. Any file modified before this date will not be replicated.", examples=[ - "2021-01-01T00:00:00.000000Z", + "2021-01-01", "2021-01-01T00:00:00Z", "2021-01-01T00:00:00.000Z", - "2021-01-01", + "2021-01-01T00:00:00.000000Z", ], format="date-time", pattern=r"^[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+)?(Z|[+-][0-9]{2}:[0-9]{2})?)?$", diff --git a/unit_tests/sources/file_based/scenarios/csv_scenarios.py b/unit_tests/sources/file_based/scenarios/csv_scenarios.py index 13dce53e1..f16d83e20 100644 --- a/unit_tests/sources/file_based/scenarios/csv_scenarios.py +++ b/unit_tests/sources/file_based/scenarios/csv_scenarios.py @@ -62,10 +62,10 @@ "title": "Start Date", "description": "UTC date and time in the format 2017-01-25T00:00:00.000000Z. Any file modified before this date will not be replicated.", "examples": [ - "2021-01-01T00:00:00.000000Z", + "2021-01-01", "2021-01-01T00:00:00Z", "2021-01-01T00:00:00.000Z", - "2021-01-01", + "2021-01-01T00:00:00.000000Z", ], "format": "date-time", "pattern": r"^[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+)?(Z|[+-][0-9]{2}:[0-9]{2})?)?$", From a29abeceb3079a414e0dae1127b1da0917e94714 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 30 Jan 2026 23:41:59 +0000 Subject: [PATCH 8/8] test: remove slash separator test case (non-standard format) Co-Authored-By: AJ Steers --- .../sources/file_based/config/test_abstract_file_based_spec.py | 1 - 1 file changed, 1 deletion(-) diff --git a/unit_tests/sources/file_based/config/test_abstract_file_based_spec.py b/unit_tests/sources/file_based/config/test_abstract_file_based_spec.py index b12994e22..57d98da76 100644 --- a/unit_tests/sources/file_based/config/test_abstract_file_based_spec.py +++ b/unit_tests/sources/file_based/config/test_abstract_file_based_spec.py @@ -54,7 +54,6 @@ def test_parquet_file_type_is_not_a_valid_csv_file_type( pytest.param("2021-01-01", True, id="date_only"), pytest.param(None, True, id="none_value"), pytest.param("not-a-date", False, id="invalid_string"), - pytest.param("2021/01/01", True, id="slash_separator_also_accepted"), pytest.param("", False, id="empty_string"), ], )