Skip to content
Merged
35 changes: 31 additions & 4 deletions airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@
from typing import Any, Dict, List, Literal, Optional, Union

import dpath
from pydantic.v1 import AnyUrl, BaseModel, Field
from pydantic.v1 import AnyUrl, BaseModel, Field, validator

from airbyte_cdk import OneOfOptionConfig
from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
from airbyte_cdk.sources.specs.transfer_modes import DeliverPermissions
from airbyte_cdk.sources.utils import schema_helpers
from airbyte_cdk.utils.datetime_helpers import ab_datetime_try_parse


class DeliverRecords(BaseModel):
Expand Down Expand Up @@ -53,13 +54,39 @@ class AbstractFileBasedSpec(BaseModel):
start_date: Optional[str] = Field(
title="Start Date",
description="UTC date and time in the format 2017-01-25T00:00:00.000000Z. Any file modified before this date will not be replicated.",
examples=["2021-01-01T00:00:00.000000Z"],
examples=[
"2021-01-01",
"2021-01-01T00:00:00Z",
"2021-01-01T00:00:00.000Z",
"2021-01-01T00:00:00.000000Z",
],
format="date-time",
pattern="^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]{6}Z$",
pattern_descriptor="YYYY-MM-DDTHH:mm:ss.SSSSSSZ",
pattern=r"^[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+)?(Z|[+-][0-9]{2}:[0-9]{2})?)?$",
pattern_descriptor="YYYY-MM-DD, YYYY-MM-DDTHH:mm:ssZ, or YYYY-MM-DDTHH:mm:ss.SSSSSSZ",
order=1,
)

@validator("start_date", pre=True)
def validate_start_date(
cls, # noqa: N805 # Pydantic validators use cls, not self
v: Optional[str],
) -> Optional[str]:
"""Validate that start_date is a parseable datetime string.

Uses ab_datetime_try_parse which accepts any common ISO8601/RFC3339 format,
including formats with or without microseconds (e.g., both
'2021-01-01T00:00:00Z' and '2021-01-01T00:00:00.000000Z' are valid).
"""
if v is None:
return v
parsed = ab_datetime_try_parse(v)
if parsed is None:
raise ValueError(
f"'{v}' is not a valid datetime string. "
"Please use a format like '2021-01-01T00:00:00Z' or '2021-01-01T00:00:00.000000Z'."
)
return v

streams: List[FileBasedStreamConfig] = Field(
title="The list of streams to sync",
description='Each instance of this configuration defines a <a href="https://docs.airbyte.com/cloud/core-concepts#stream">stream</a>. Use this to define which files belong in the stream, their format, and how they should be parsed and validated. When sending data to warehouse destination such as Snowflake or BigQuery, each stream is a separate table.',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from jsonschema import ValidationError, validate
from pydantic.v1 import BaseModel

from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec
from airbyte_cdk.sources.file_based.config.file_based_stream_config import (
AvroFormat,
CsvFormat,
Expand Down Expand Up @@ -40,3 +41,27 @@ def test_parquet_file_type_is_not_a_valid_csv_file_type(
validate(instance=format_config[file_type], schema=file_format.schema())
else:
validate(instance=format_config[file_type], schema=file_format.schema())


@pytest.mark.parametrize(
"start_date, should_pass",
[
pytest.param("2021-01-01T00:00:00.000000Z", True, id="with_microseconds"),
pytest.param("2021-01-01T00:00:00Z", True, id="without_microseconds"),
pytest.param("2021-01-01T00:00:00.000Z", True, id="with_milliseconds"),
pytest.param("2025-01-01T00:00:00Z", True, id="terraform_provider_format"),
pytest.param("2021-01-01T00:00:00+00:00", True, id="with_timezone_offset"),
pytest.param("2021-01-01", True, id="date_only"),
pytest.param(None, True, id="none_value"),
pytest.param("not-a-date", False, id="invalid_string"),
pytest.param("", False, id="empty_string"),
],
)
def test_start_date_validation(start_date: str, should_pass: bool) -> None:
"""Test that start_date accepts various valid ISO8601/RFC3339 formats."""
if should_pass:
result = AbstractFileBasedSpec.validate_start_date(start_date)
assert result == start_date
else:
with pytest.raises(ValueError, match="is not a valid datetime string"):
AbstractFileBasedSpec.validate_start_date(start_date)
11 changes: 8 additions & 3 deletions unit_tests/sources/file_based/scenarios/csv_scenarios.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,15 @@
"start_date": {
"title": "Start Date",
"description": "UTC date and time in the format 2017-01-25T00:00:00.000000Z. Any file modified before this date will not be replicated.",
"examples": ["2021-01-01T00:00:00.000000Z"],
"examples": [
"2021-01-01",
"2021-01-01T00:00:00Z",
"2021-01-01T00:00:00.000Z",
"2021-01-01T00:00:00.000000Z",
],
"format": "date-time",
"pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]{6}Z$",
"pattern_descriptor": "YYYY-MM-DDTHH:mm:ss.SSSSSSZ",
"pattern": r"^[0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+)?(Z|[+-][0-9]{2}:[0-9]{2})?)?$",
"pattern_descriptor": "YYYY-MM-DD, YYYY-MM-DDTHH:mm:ssZ, or YYYY-MM-DDTHH:mm:ss.SSSSSSZ",
"order": 1,
"type": "string",
},
Expand Down
Loading