Skip to content

Commit 5ae3f7f

Browse files
simonameilerSimona MeilerChahan Kropf
authored
Fix TCTracks.from_FAST duplicate loading from year loop (#1269)
* Fix TCTracks.from_FAST duplicate loading from year loop * Refactor test_from_FAST: extend fixture to 2 years, remove separate regression test The test fixture FAST_test_tracks.nc now has year=[2025,2026] (only seeds_per_month is extended; track variables retain their n_trk dim). The existing len(tc_track.data)==5 assertion now acts as the regression check: the buggy year-loop code would return 5x2=10 tracks. The separate test_from_FAST_not_multiplied_by_year_dim (with its temporary-file scaffolding) is removed. * Update changelo --------- Co-authored-by: Simona Meiler <simona.meiler@usys.ethz.ch> Co-authored-by: Chahan Kropf <chahan.kropf@posteo.com>
1 parent 3133447 commit 5ae3f7f

4 files changed

Lines changed: 81 additions & 80 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ Code freeze date: YYYY-MM-DD
2323

2424
- Fixed asset count in impact logging message [#1195](https://github.com/CLIMADA-project/climada_python/pull/1195).
2525
- `Hazard.from_raster_xarray` now returns a sparse matrix instead of a sparse array [#1261](https://github.com/CLIMADA-project/climada_python/pull/1261).
26+
- Fix TCTracks.from_FAST duplicate loading from year loop [#1269](github.com/CLIMADA-project/climada_python/pull/1269)
2627

2728
### Deprecated
2829
- `Impact.calc_freq_curve()` should not be given the parameter `return_per`. Use the parameter `return_periods` in `Impact.calc_freq_curve().interpolate()` instead.

climada/hazard/tc_tracks.py

Lines changed: 73 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -1944,86 +1944,81 @@ def from_FAST(cls, folder_name: str):
19441944
if Path(file).suffix != ".nc":
19451945
continue
19461946
with xr.open_dataset(file) as dataset:
1947-
for year in dataset.year:
1948-
for i in dataset.n_trk:
1949-
1950-
# Select track
1951-
track = dataset.sel(n_trk=i, year=year)
1952-
# chunk dataset at first NaN value
1953-
lon = track.lon_trks.data
1954-
last_valid_index = np.where(np.isfinite(lon))[0][-1]
1955-
track = track.isel(time=slice(0, last_valid_index + 1))
1956-
# Select lat, lon
1957-
lat = track.lat_trks.data
1958-
lon = track.lon_trks.data
1959-
# Convert lon from 0-360 to -180 - 180
1960-
lon = ((lon + 180) % 360) - 180
1961-
# Convert time to pandas Datetime "yyyy.mm.dd"
1962-
reference_time = (
1963-
f"{track.tc_years.item()}-{int(track.tc_month.item())}-01"
1964-
)
1965-
time = pd.to_datetime(
1966-
track.time.data, unit="s", origin=reference_time
1967-
).astype("datetime64[s]")
1968-
# Define variables
1969-
ms_to_kn = 1.943844
1970-
max_wind_kn = track.vmax_trks.data * ms_to_kn
1971-
env_pressure = BASIN_ENV_PRESSURE[track.tc_basins.data.item()]
1972-
cen_pres = _estimate_pressure(
1973-
np.full(lat.shape, np.nan),
1974-
lat,
1975-
lon,
1976-
max_wind_kn,
1977-
)
1947+
for i in dataset.n_trk:
1948+
1949+
# Select track
1950+
track = dataset.sel(n_trk=i)
1951+
# chunk dataset at first NaN value
1952+
lon = track.lon_trks.data
1953+
last_valid_index = np.where(np.isfinite(lon))[0][-1]
1954+
track = track.isel(time=slice(0, last_valid_index + 1))
1955+
# Select lat, lon
1956+
lat = track.lat_trks.data
1957+
lon = track.lon_trks.data
1958+
# Convert lon from 0-360 to -180 - 180
1959+
lon = ((lon + 180) % 360) - 180
1960+
# Convert time to pandas Datetime "yyyy.mm.dd"
1961+
reference_time = (
1962+
f"{track.tc_years.item()}-{int(track.tc_month.item())}-01"
1963+
)
1964+
time = pd.to_datetime(
1965+
track.time.data, unit="s", origin=reference_time
1966+
).astype("datetime64[s]")
1967+
# Define variables
1968+
ms_to_kn = 1.943844
1969+
max_wind_kn = track.vmax_trks.data * ms_to_kn
1970+
env_pressure = BASIN_ENV_PRESSURE[track.tc_basins.data.item()]
1971+
cen_pres = _estimate_pressure(
1972+
np.full(lat.shape, np.nan),
1973+
lat,
1974+
lon,
1975+
max_wind_kn,
1976+
)
19781977

1979-
data.append(
1980-
xr.Dataset(
1981-
{
1982-
"time_step": (
1983-
"time",
1984-
np.full(time.shape[0], track.time.data[1]),
1985-
),
1986-
"max_sustained_wind": (
1987-
"time",
1988-
track.vmax_trks.data,
1989-
),
1990-
"central_pressure": ("time", cen_pres),
1991-
"radius_max_wind": (
1992-
"time",
1993-
estimate_rmw(
1994-
np.full(lat.shape, np.nan), cen_pres
1995-
),
1996-
),
1997-
"environmental_pressure": (
1998-
"time",
1999-
np.full(time.shape[0], env_pressure),
2000-
),
2001-
"basin": (
2002-
"time",
2003-
np.full(
2004-
time.shape[0], track.tc_basins.data.item()
2005-
),
2006-
),
2007-
},
2008-
coords={
2009-
"time": ("time", time),
2010-
"lat": ("time", lat),
2011-
"lon": ("time", lon),
2012-
},
2013-
attrs={
2014-
"max_sustained_wind_unit": "m/s",
2015-
"central_pressure_unit": "hPa",
2016-
"name": f"storm_{track.n_trk.item()}",
2017-
"sid": track.n_trk.item(),
2018-
"orig_event_flag": True,
2019-
"data_provider": "FAST",
2020-
"id_no": track.n_trk.item(),
2021-
"category": set_category(
2022-
max_wind_kn, wind_unit="kn", saffir_scale=None
2023-
),
2024-
},
2025-
)
1978+
data.append(
1979+
xr.Dataset(
1980+
{
1981+
"time_step": (
1982+
"time",
1983+
np.full(time.shape[0], track.time.data[1]),
1984+
),
1985+
"max_sustained_wind": (
1986+
"time",
1987+
track.vmax_trks.data,
1988+
),
1989+
"central_pressure": ("time", cen_pres),
1990+
"radius_max_wind": (
1991+
"time",
1992+
estimate_rmw(np.full(lat.shape, np.nan), cen_pres),
1993+
),
1994+
"environmental_pressure": (
1995+
"time",
1996+
np.full(time.shape[0], env_pressure),
1997+
),
1998+
"basin": (
1999+
"time",
2000+
np.full(time.shape[0], track.tc_basins.data.item()),
2001+
),
2002+
},
2003+
coords={
2004+
"time": ("time", time),
2005+
"lat": ("time", lat),
2006+
"lon": ("time", lon),
2007+
},
2008+
attrs={
2009+
"max_sustained_wind_unit": "m/s",
2010+
"central_pressure_unit": "hPa",
2011+
"name": f"storm_{track.n_trk.item()}",
2012+
"sid": track.n_trk.item(),
2013+
"orig_event_flag": True,
2014+
"data_provider": "FAST",
2015+
"id_no": track.n_trk.item(),
2016+
"category": set_category(
2017+
max_wind_kn, wind_unit="kn", saffir_scale=None
2018+
),
2019+
},
20262020
)
2021+
)
20272022

20282023
return cls(data)
20292024

1.59 KB
Binary file not shown.

climada/hazard/test/test_tc_tracks.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -660,8 +660,13 @@ def test_from_simulations_storm(self):
660660
self.assertEqual(len(tc_track.data), 0)
661661

662662
def test_from_FAST(self):
663-
"""test the correct import of netcdf files from FAST model and the conversion to a
664-
different xr.array structure compatible with CLIMADA."""
663+
"""Test the correct import of netcdf files from FAST model and the conversion to a
664+
different xr.array structure compatible with CLIMADA.
665+
666+
The test fixture contains 5 tracks across 2 years. The regression check
667+
``len(tc_track.data) == 5`` ensures that tracks are not multiplied by the
668+
``year`` dimension (the bug would produce 5 × 2 = 10 tracks instead).
669+
"""
665670

666671
tc_track = tc.TCTracks.from_FAST(TEST_TRACK_FAST)
667672

0 commit comments

Comments
 (0)