diff --git a/airbyte_cdk/sources/declarative/datetime/datetime_parser.py b/airbyte_cdk/sources/declarative/datetime/datetime_parser.py index 2707ffe11..d34308276 100644 --- a/airbyte_cdk/sources/declarative/datetime/datetime_parser.py +++ b/airbyte_cdk/sources/declarative/datetime/datetime_parser.py @@ -19,6 +19,17 @@ class DatetimeParser: _UNIX_EPOCH = datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc) def parse(self, date: Union[str, int], format: str) -> datetime.datetime: + if date is None: + raise ValueError( + f"Cannot parse None as a datetime. Expected a string, integer, or float representing a timestamp." + ) + + if isinstance(date, (list, dict)): + raise TypeError( + f"Cannot parse {type(date).__name__} as a datetime. " + f"Expected a string, integer, or float representing a timestamp, but got: {date}" + ) + # "%s" is a valid (but unreliable) directive for formatting, but not for parsing # It is defined as # The number of seconds since the Epoch, 1970-01-01 00:00:00+0000 (UTC). https://man7.org/linux/man-pages/man3/strptime.3.html @@ -26,13 +37,25 @@ def parse(self, date: Union[str, int], format: str) -> datetime.datetime: # The recommended way to parse a date from its timestamp representation is to use datetime.fromtimestamp # See https://stackoverflow.com/a/4974930 if format == "%s": - return datetime.datetime.fromtimestamp(int(date), tz=datetime.timezone.utc) + try: + return datetime.datetime.fromtimestamp(int(date), tz=datetime.timezone.utc) + except (ValueError, OverflowError) as e: + raise ValueError(f"Cannot parse '{date}' as a Unix timestamp: {e}") elif format == "%s_as_float": - return datetime.datetime.fromtimestamp(float(date), tz=datetime.timezone.utc) + try: + return datetime.datetime.fromtimestamp(float(date), tz=datetime.timezone.utc) + except (ValueError, OverflowError) as e: + raise ValueError(f"Cannot parse '{date}' as a float Unix timestamp: {e}") elif format == "%epoch_microseconds": - return self._UNIX_EPOCH + datetime.timedelta(microseconds=int(date)) + try: + return self._UNIX_EPOCH + datetime.timedelta(microseconds=int(date)) + except (ValueError, OverflowError) as e: + raise ValueError(f"Cannot parse '{date}' as epoch microseconds: {e}") elif format == "%ms": - return self._UNIX_EPOCH + datetime.timedelta(milliseconds=int(date)) + try: + return self._UNIX_EPOCH + datetime.timedelta(milliseconds=int(date)) + except (ValueError, OverflowError) as e: + raise ValueError(f"Cannot parse '{date}' as milliseconds: {e}") elif "%_ms" in format: format = format.replace("%_ms", "%f") parsed_datetime = datetime.datetime.strptime(str(date), format) diff --git a/unit_tests/sources/declarative/datetime/test_datetime_parser.py b/unit_tests/sources/declarative/datetime/test_datetime_parser.py index b9da9852f..d6d1e179f 100644 --- a/unit_tests/sources/declarative/datetime/test_datetime_parser.py +++ b/unit_tests/sources/declarative/datetime/test_datetime_parser.py @@ -125,3 +125,61 @@ def test_format_datetime(input_dt: datetime.datetime, datetimeformat: str, expec parser = DatetimeParser() output_date = parser.format(input_dt, datetimeformat) assert output_date == expected_output + + +@pytest.mark.parametrize( + "input_date, date_format, expected_error_type, expected_error_message", + [ + ( + None, + "%s", + ValueError, + "Cannot parse None as a datetime", + ), + ( + [1694902531, 1694902532], + "%s", + TypeError, + "Cannot parse list as a datetime", + ), + ( + {"timestamp": 1694902531}, + "%s", + TypeError, + "Cannot parse dict as a datetime", + ), + ( + [1694902531], + "%s_as_float", + TypeError, + "Cannot parse list as a datetime", + ), + ( + None, + "%ms", + ValueError, + "Cannot parse None as a datetime", + ), + ( + {"states": [{"cursor": {"updated_at": "1694902531"}}]}, + "%s", + TypeError, + "Cannot parse dict as a datetime", + ), + ], + ids=[ + "test_parse_none_raises_value_error", + "test_parse_list_raises_type_error", + "test_parse_dict_raises_type_error", + "test_parse_list_float_format_raises_type_error", + "test_parse_none_ms_format_raises_value_error", + "test_parse_partitioned_state_dict_raises_type_error", + ], +) +def test_parse_invalid_types( + input_date, date_format: str, expected_error_type, expected_error_message: str +): + parser = DatetimeParser() + with pytest.raises(expected_error_type) as exc_info: + parser.parse(input_date, date_format) + assert expected_error_message in str(exc_info.value)