Skip to content

Commit 5245884

Browse files
fix(cdk): support start_date format without microseconds in file-based connectors
Co-Authored-By: Daryna Ishchenko <darina.ishchenko17@gmail.com>
1 parent f550424 commit 5245884

2 files changed

Lines changed: 52 additions & 1 deletion

File tree

airbyte_cdk/sources/file_based/file_based_stream_reader.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,14 +98,22 @@ def get_matching_files(
9898
"""
9999
...
100100

101+
@staticmethod
102+
def _parse_start_date(start_date_str: str) -> datetime:
103+
"""Parse a start_date string, supporting both with and without microseconds."""
104+
try:
105+
return datetime.strptime(start_date_str, AbstractFileBasedStreamReader.DATE_TIME_FORMAT)
106+
except ValueError:
107+
return datetime.strptime(start_date_str, "%Y-%m-%dT%H:%M:%SZ")
108+
101109
def filter_files_by_globs_and_start_date(
102110
self, files: List[RemoteFile], globs: List[str]
103111
) -> Iterable[RemoteFile]:
104112
"""
105113
Utility method for filtering files based on globs.
106114
"""
107115
start_date = (
108-
datetime.strptime(self.config.start_date, self.DATE_TIME_FORMAT)
116+
self._parse_start_date(self.config.start_date)
109117
if self.config and self.config.start_date
110118
else None
111119
)

unit_tests/sources/file_based/test_file_based_stream_reader.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,13 @@ def documentation_url(cls) -> AnyUrl:
401401
set(),
402402
id="all_csvs_modified_before_start_date",
403403
),
404+
pytest.param(
405+
["**/*.csv"],
406+
{"start_date": "2023-06-01T03:54:07Z", "streams": []},
407+
{"a.csv", "a/b.csv", "a/c.csv", "a/b/c.csv", "a/c/c.csv", "a/b/c/d.csv"},
408+
set(),
409+
id="all_csvs_start_date_without_microseconds",
410+
),
404411
pytest.param(
405412
["**/*.csv"],
406413
{"start_date": "2023-06-05T03:54:07.000Z", "streams": []},
@@ -494,6 +501,42 @@ def test_preserve_sub_directories_scenarios(
494501
assert file_paths[AbstractFileBasedStreamReader.FILE_FOLDER] == path.dirname(source_file_path)
495502

496503

504+
@pytest.mark.parametrize(
505+
"start_date_str, expected",
506+
[
507+
pytest.param(
508+
"2025-01-01T00:00:00.000000Z",
509+
datetime(2025, 1, 1, 0, 0, 0),
510+
id="with_microseconds_zero",
511+
),
512+
pytest.param(
513+
"2025-06-15T12:30:45.123456Z",
514+
datetime(2025, 6, 15, 12, 30, 45, 123456),
515+
id="with_microseconds_nonzero",
516+
),
517+
pytest.param(
518+
"2025-01-01T00:00:00Z",
519+
datetime(2025, 1, 1, 0, 0, 0),
520+
id="without_microseconds",
521+
),
522+
pytest.param(
523+
"2025-12-31T23:59:59Z",
524+
datetime(2025, 12, 31, 23, 59, 59),
525+
id="without_microseconds_end_of_day",
526+
),
527+
],
528+
)
529+
def test_parse_start_date(start_date_str: str, expected: datetime) -> None:
530+
reader = TestStreamReader()
531+
assert reader._parse_start_date(start_date_str) == expected
532+
533+
534+
def test_parse_start_date_invalid_raises() -> None:
535+
reader = TestStreamReader()
536+
with pytest.raises(ValueError):
537+
reader._parse_start_date("not-a-date")
538+
539+
497540
def test_upload_with_file_transfer_reader():
498541
stream_reader = TestStreamReaderWithDefaultUpload()
499542

0 commit comments

Comments
 (0)