Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 73 additions & 78 deletions climada/hazard/tc_tracks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1944,86 +1944,81 @@ def from_FAST(cls, folder_name: str):
if Path(file).suffix != ".nc":
continue
with xr.open_dataset(file) as dataset:
for year in dataset.year:
for i in dataset.n_trk:

# Select track
track = dataset.sel(n_trk=i, year=year)
# chunk dataset at first NaN value
lon = track.lon_trks.data
last_valid_index = np.where(np.isfinite(lon))[0][-1]
track = track.isel(time=slice(0, last_valid_index + 1))
# Select lat, lon
lat = track.lat_trks.data
lon = track.lon_trks.data
# Convert lon from 0-360 to -180 - 180
lon = ((lon + 180) % 360) - 180
# Convert time to pandas Datetime "yyyy.mm.dd"
reference_time = (
f"{track.tc_years.item()}-{int(track.tc_month.item())}-01"
)
time = pd.to_datetime(
track.time.data, unit="s", origin=reference_time
).astype("datetime64[s]")
# Define variables
ms_to_kn = 1.943844
max_wind_kn = track.vmax_trks.data * ms_to_kn
env_pressure = BASIN_ENV_PRESSURE[track.tc_basins.data.item()]
cen_pres = _estimate_pressure(
np.full(lat.shape, np.nan),
lat,
lon,
max_wind_kn,
)
for i in dataset.n_trk:

# Select track
track = dataset.sel(n_trk=i)
# chunk dataset at first NaN value
lon = track.lon_trks.data
last_valid_index = np.where(np.isfinite(lon))[0][-1]
track = track.isel(time=slice(0, last_valid_index + 1))
# Select lat, lon
lat = track.lat_trks.data
lon = track.lon_trks.data
# Convert lon from 0-360 to -180 - 180
lon = ((lon + 180) % 360) - 180
# Convert time to pandas Datetime "yyyy.mm.dd"
reference_time = (
f"{track.tc_years.item()}-{int(track.tc_month.item())}-01"
)
time = pd.to_datetime(
track.time.data, unit="s", origin=reference_time
).astype("datetime64[s]")
# Define variables
ms_to_kn = 1.943844
max_wind_kn = track.vmax_trks.data * ms_to_kn
env_pressure = BASIN_ENV_PRESSURE[track.tc_basins.data.item()]
cen_pres = _estimate_pressure(
np.full(lat.shape, np.nan),
lat,
lon,
max_wind_kn,
)

data.append(
xr.Dataset(
{
"time_step": (
"time",
np.full(time.shape[0], track.time.data[1]),
),
"max_sustained_wind": (
"time",
track.vmax_trks.data,
),
"central_pressure": ("time", cen_pres),
"radius_max_wind": (
"time",
estimate_rmw(
np.full(lat.shape, np.nan), cen_pres
),
),
"environmental_pressure": (
"time",
np.full(time.shape[0], env_pressure),
),
"basin": (
"time",
np.full(
time.shape[0], track.tc_basins.data.item()
),
),
},
coords={
"time": ("time", time),
"lat": ("time", lat),
"lon": ("time", lon),
},
attrs={
"max_sustained_wind_unit": "m/s",
"central_pressure_unit": "hPa",
"name": f"storm_{track.n_trk.item()}",
"sid": track.n_trk.item(),
"orig_event_flag": True,
"data_provider": "FAST",
"id_no": track.n_trk.item(),
"category": set_category(
max_wind_kn, wind_unit="kn", saffir_scale=None
),
},
)
data.append(
xr.Dataset(
{
"time_step": (
"time",
np.full(time.shape[0], track.time.data[1]),
),
"max_sustained_wind": (
"time",
track.vmax_trks.data,
),
"central_pressure": ("time", cen_pres),
"radius_max_wind": (
"time",
estimate_rmw(np.full(lat.shape, np.nan), cen_pres),
),
"environmental_pressure": (
"time",
np.full(time.shape[0], env_pressure),
),
"basin": (
"time",
np.full(time.shape[0], track.tc_basins.data.item()),
),
},
coords={
"time": ("time", time),
"lat": ("time", lat),
"lon": ("time", lon),
},
attrs={
"max_sustained_wind_unit": "m/s",
"central_pressure_unit": "hPa",
"name": f"storm_{track.n_trk.item()}",
"sid": track.n_trk.item(),
"orig_event_flag": True,
"data_provider": "FAST",
"id_no": track.n_trk.item(),
"category": set_category(
max_wind_kn, wind_unit="kn", saffir_scale=None
),
},
)
)

return cls(data)

Expand Down
65 changes: 65 additions & 0 deletions climada/hazard/test/test_tc_tracks.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
Test tc_tracks module.
"""

import tempfile
import unittest
from datetime import datetime as dt

Expand Down Expand Up @@ -704,6 +705,70 @@ def test_from_FAST(self):
self.assertEqual(tc_track.data[0].environmental_pressure.data[0], 1010)
self.assertEqual(tc_track.data[0].basin[0], "NA")

def test_from_FAST_not_multiplied_by_year_dim(self):
"""Regression test: FAST tracks must not be repeated across `year` dimension."""
with tempfile.TemporaryDirectory() as tmpdir:
ds = xr.Dataset(
{
"lon_trks": (
("n_trk", "time"),
np.array(
[
[290.0, 291.0, 292.0],
[300.0, 301.0, 302.0],
],
dtype=float,
),
),
"lat_trks": (
("n_trk", "time"),
np.array(
[
[10.0, 10.5, 11.0],
[15.0, 15.5, 16.0],
],
dtype=float,
),
),
"vmax_trks": (
("n_trk", "time"),
np.array(
[
[20.0, 21.0, 22.0],
[25.0, 26.0, 27.0],
],
dtype=float,
),
),
"tc_month": ("n_trk", np.array([8, 9], dtype=np.int64)),
"tc_basins": ("n_trk", np.array(["NA", "NA"], dtype="<U2")),
"tc_years": ("n_trk", np.array([1998, 1999], dtype=np.int64)),
"seeds_per_month": (
("year", "basin", "month"),
np.zeros((4, 1, 12), dtype=float),
),
},
coords={
"n_trk": ("n_trk", np.array([0, 1], dtype=np.int64)),
"time": ("time", np.array([0, 10800, 21600], dtype=float)),
"year": (
"year",
np.array([1998, 1999, 2000, 2001], dtype=np.int64),
),
"basin": ("basin", np.array(["NA"], dtype="<U2")),
"month": ("month", np.arange(1, 13, dtype=np.int64)),
},
)

path = DATA_DIR.joinpath(tmpdir, "fast_regression.nc")
ds.to_netcdf(path)
Copy link
Copy Markdown
Collaborator

@NicolasColombi NicolasColombi Mar 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since there is already a test file named FAST_test_tracks.nc, with only one year (hence the test did not reveled the bug you are suggesting to fix), it might be a good idea to update such file to contain 2 years, and then test the updated from_FAST function on such file with only one test. I do not think this would be an issue size wise, as it is only 62KB at the moment. This would require, either reproducing the small file for two years, or simply fabricating such file by duplicating the existing one, concatenate it, and manually modify the second year number. Lastly, you will need to update the original test to capture this.

This way I think we can have a single file and single test, reducing the code, since the majority of the code in your test is there to create a temporary file.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the suggestion, this makes a lot of sense.

I’ve updated the existing fixture file (FAST_test_tracks.nc) to include two years (year = [2025, 2026]) by duplicating the seeds_per_month data along the year dimension. The track-related variables (tc_years, lon_trks, etc.) remain unchanged, so the file structure stays consistent with the original intent.

With this update, I removed the separate regression test and its temporary-file setup. The existing assertion in test_from_FAST:

self.assertEqual(len(tc_track.data), 5)

now ensures that tracks are not duplicated when a year dimension is present. With two years present, the previous buggy implementation would have returned 10 tracks instead of 5.

This keeps the test setup simpler and avoids duplicating logic for temporary file creation.


tc_track = tc.TCTracks.from_FAST(DATA_DIR.joinpath(tmpdir))

self.assertEqual(tc_track.size, 2)
self.assertEqual(tc_track.data[0].time.dt.year.values[0], 1998)
self.assertEqual(tc_track.data[1].time.dt.year.values[0], 1999)

def test_to_geodataframe_points(self):
"""Conversion of TCTracks to GeoDataFrame using Points."""
tc_track = tc.TCTracks.from_processed_ibtracs_csv(TEST_TRACK)
Expand Down
Loading