-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtime_interval.py
More file actions
73 lines (62 loc) · 3.49 KB
/
Copy pathtime_interval.py
File metadata and controls
73 lines (62 loc) · 3.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
"""
Hypothesis strategies for generating random test data for tests.
"""
from datetime import datetime, timezone
import pandas as pd
from hypothesis.strategies import DrawFn, booleans, composite, datetimes, just, sampled_from
from pandas.core.tools.datetimes import DatetimeScalar
from tilebox.datasets.query.time_interval import TimeInterval, datetime_to_us
# The minimum and maximum datetime that can be represented by pandas.Timestamp and are therefore supported
# by the pd.to_datetime function which we are using for parsing datetime scalars.
_MIN_TIME_NANO_I64 = datetime(1677, 9, 22)
_MAX_TIME_NANO_I64 = datetime(2262, 4, 11)
# datetimes in a range that fit into a 64 bit signed integer when converted to a nanoseconds timestamp
i64_datetimes = datetimes(_MIN_TIME_NANO_I64, _MAX_TIME_NANO_I64, timezones=just(timezone.utc))
@composite
def time_intervals(draw: DrawFn, tzinfo: timezone | None = None) -> TimeInterval:
"""A hypothesis strategy for generating random time intervals"""
datetime_strategy = datetimes(timezones=just(tzinfo)) if tzinfo is not None else datetimes()
start = draw(datetime_strategy)
end = draw(datetime_strategy)
start, end = min(start, end), max(start, end) # make sure start is before end
start_exclusive = draw(booleans())
end_inclusive = draw(booleans())
return TimeInterval(start, end, start_exclusive, end_inclusive)
@composite
def datetime_scalars(draw: DrawFn) -> tuple[DatetimeScalar, datetime]:
"""A hypothesis strategy for generating random datetime scalars for utc datetimes which can be parsed by pandas."""
dt = draw(i64_datetimes)
scalar = draw(datetime_scalar_for_datetime(dt))
return scalar, dt
@composite
def datetime_scalar_for_datetime(draw: DrawFn, dt: datetime) -> DatetimeScalar:
"""
A hypothesis strategy for generating random datetime scalars for the given datetime.
The datetime scalar is a representation of the given datetime in a format that can be parsed by pandas.to_datetime
and is therefore understood by the tilebox.datasets.data._convert_to_datetime function, which is the backbone
of the TimeInterval parsing functionality in the load() function of a dataset collection
"""
understood_formats = [
lambda dt: dt, # converting a datetime to a datetime scalar should be a no-op
lambda dt: pd.to_datetime(dt), # noqa: PLW0108 # pandas Timestamp objects are also supported
lambda dt: pd.to_datetime(dt).to_datetime64(), # and so are numpy datetime64 objects
lambda dt: datetime_to_us(dt) * 10**3, # timestamp in nanoseconds
# as well as strings in various formats
lambda dt: dt.strftime("%Y-%m-%dT%H:%M:%S.%f"),
lambda dt: dt.strftime("%Y-%m-%dT%H:%M:%S.%f %Z"),
lambda dt: dt.strftime("%Y-%m-%d %H:%M:%S.%f"),
lambda dt: dt.strftime("%Y-%m-%d %H:%M:%S.%f %Z"),
]
if dt.microsecond == 0: # if the datetime has no microseconds we can also use formats without microseconds
understood_formats += [
lambda dt: dt.strftime("%Y-%m-%dT%H:%M:%S"),
lambda dt: dt.strftime("%Y-%m-%dT%H:%M:%S %Z"),
lambda dt: dt.strftime("%Y-%m-%d %H:%M:%S"),
lambda dt: dt.strftime("%Y-%m-%d %H:%M:%S %Z"),
]
if dt.microsecond == 0 and dt.second == 0 and dt.minute == 0 and dt.hour == 0: # date formats without time
understood_formats += [
lambda dt: dt.strftime("%Y-%m-%d"),
]
scalar_format = draw(sampled_from(understood_formats))
return scalar_format(dt)