|
| 1 | +import pandas as pd |
| 2 | + |
| 3 | +__all__ = ["dst_st"] |
| 4 | + |
| 5 | +def dst_st(ts, |
| 6 | + src_tz: str = "US/Pacific", |
| 7 | + target_tz: str = "Etc/GMT+8"): |
| 8 | + """ |
| 9 | + Convert a pandas Series with a datetime index from a timezone-unaware index |
| 10 | + that observes DST (e.g., US/Pacific) to a fixed standard time zone (e.g., Etc/GMT+8) |
| 11 | + which is expressed using posix conventions. |
| 12 | +
|
| 13 | + Parameters: |
| 14 | + ts (pd.Series): Time series with a naive or timezone-unaware DatetimeIndex. |
| 15 | + src_tz (str): Source timezone name (default: 'US/Pacific'). |
| 16 | + target_tz (str): Target standard timezone name (default: 'Etc/GMT+8'). |
| 17 | +
|
| 18 | + Returns: |
| 19 | + pd.Series: Time series with index converted to the target standard timezone and made naive. |
| 20 | +
|
| 21 | + Notes: |
| 22 | + - The function assumes the index is not already timezone-aware. |
| 23 | + - 'Etc/GMT+8' is the correct tz name for UTC-8 (PST) in pytz; note the sign is reversed from what |
| 24 | + might be expected. |
| 25 | + - Handles ambiguous/nonexistent times due to DST transitions. |
| 26 | + - The returned index is naive (timezone-unaware) but represents the correct standard time. |
| 27 | + - If the input index is already timezone-aware, this function will raise an error. |
| 28 | + """ |
| 29 | + ts = ts.copy() |
| 30 | + orig_freq = getattr(ts.index, 'freq', None) |
| 31 | + ts.index = ts.index.tz_localize( |
| 32 | + src_tz, |
| 33 | + nonexistent="shift_backward", # Handle nonexistent times (e.g., spring forward) |
| 34 | + ambiguous="NaT" # Mark ambiguous times (e.g., fall back) as NaT |
| 35 | + ) |
| 36 | + ts.index = ts.index.tz_convert(target_tz) |
| 37 | + ts.index = ts.index.tz_localize(None) |
| 38 | + # Drop NaT values in the index (from ambiguous times) |
| 39 | + mask = ~ts.index.isna() |
| 40 | + ts = ts[mask] |
| 41 | + # Try to restore original frequency if possible |
| 42 | + if orig_freq is not None: |
| 43 | + try: |
| 44 | + ts = ts.asfreq(orig_freq) |
| 45 | + except Exception: |
| 46 | + pass |
| 47 | + return ts |
| 48 | + |
| 49 | +if __name__ == "__main__": |
| 50 | + |
| 51 | + # Create a DatetimeIndex that spans the PDT to PST transition (first Sunday in November) |
| 52 | + rng = pd.date_range("2023-11-05 00:00", "2023-11-05 04:00", freq="30min") |
| 53 | + ts = pd.Series(range(len(rng)), index=rng) |
| 54 | + |
| 55 | + print("Original (naive, US/Pacific):") |
| 56 | + print(ts) |
| 57 | + |
| 58 | + converted = dst_st(ts) |
| 59 | + |
| 60 | + print("\nConverted to standard time (Etc/GMT+8, naive):") |
| 61 | + print(converted) |
0 commit comments