Fix TCTracks.from_FAST duplicate loading from year loop (#1269)

simonameiler · Simona Meiler · Chahan Kropf · web-flow · commit 5ae3f7fbd40f · 2026-05-15T12:58:37.000+02:00
* Fix TCTracks.from_FAST duplicate loading from year loop

* Refactor test_from_FAST: extend fixture to 2 years, remove separate regression test

The test fixture FAST_test_tracks.nc now has year=[2025,2026] (only
seeds_per_month is extended; track variables retain their n_trk dim).
The existing len(tc_track.data)==5 assertion now acts as the regression
check: the buggy year-loop code would return 5x2=10 tracks.

The separate test_from_FAST_not_multiplied_by_year_dim (with its
temporary-file scaffolding) is removed.

* Update changelo

---------

Co-authored-by: Simona Meiler &lt;simona.meiler@usys.ethz.ch&gt;
Co-authored-by: Chahan Kropf &lt;chahan.kropf@posteo.com&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -23,6 +23,7 @@ Code freeze date: YYYY-MM-DD
 
 - Fixed asset count in impact logging message [#1195](https://github.com/CLIMADA-project/climada_python/pull/1195).
 - `Hazard.from_raster_xarray` now returns a sparse matrix instead of a sparse array [#1261](https://github.com/CLIMADA-project/climada_python/pull/1261).
+- Fix TCTracks.from_FAST duplicate loading from year loop [#1269](github.com/CLIMADA-project/climada_python/pull/1269)
 
 ### Deprecated
 - `Impact.calc_freq_curve()` should not be given the parameter `return_per`. Use the parameter `return_periods` in `Impact.calc_freq_curve().interpolate()` instead.
diff --git a/climada/hazard/tc_tracks.py b/climada/hazard/tc_tracks.py
@@ -1944,86 +1944,81 @@ def from_FAST(cls, folder_name: str):
             if Path(file).suffix != ".nc":
                 continue
             with xr.open_dataset(file) as dataset:
-                for year in dataset.year:
-                    for i in dataset.n_trk:
-
-                        # Select track
-                        track = dataset.sel(n_trk=i, year=year)
-                        # chunk dataset at first NaN value
-                        lon = track.lon_trks.data
-                        last_valid_index = np.where(np.isfinite(lon))[0][-1]
-                        track = track.isel(time=slice(0, last_valid_index + 1))
-                        # Select lat, lon
-                        lat = track.lat_trks.data
-                        lon = track.lon_trks.data
-                        # Convert lon from 0-360 to -180 - 180
-                        lon = ((lon + 180) % 360) - 180
-                        # Convert time to pandas Datetime "yyyy.mm.dd"
-                        reference_time = (
-                            f"{track.tc_years.item()}-{int(track.tc_month.item())}-01"
-                        )
-                        time = pd.to_datetime(
-                            track.time.data, unit="s", origin=reference_time
-                        ).astype("datetime64[s]")
-                        # Define variables
-                        ms_to_kn = 1.943844
-                        max_wind_kn = track.vmax_trks.data * ms_to_kn
-                        env_pressure = BASIN_ENV_PRESSURE[track.tc_basins.data.item()]
-                        cen_pres = _estimate_pressure(
-                            np.full(lat.shape, np.nan),
-                            lat,
-                            lon,
-                            max_wind_kn,
-                        )
+                for i in dataset.n_trk:
+
+                    # Select track
+                    track = dataset.sel(n_trk=i)
+                    # chunk dataset at first NaN value
+                    lon = track.lon_trks.data
+                    last_valid_index = np.where(np.isfinite(lon))[0][-1]
+                    track = track.isel(time=slice(0, last_valid_index + 1))
+                    # Select lat, lon
+                    lat = track.lat_trks.data
+                    lon = track.lon_trks.data
+                    # Convert lon from 0-360 to -180 - 180
+                    lon = ((lon + 180) % 360) - 180
+                    # Convert time to pandas Datetime "yyyy.mm.dd"
+                    reference_time = (
+                        f"{track.tc_years.item()}-{int(track.tc_month.item())}-01"
+                    )
+                    time = pd.to_datetime(
+                        track.time.data, unit="s", origin=reference_time
+                    ).astype("datetime64[s]")
+                    # Define variables
+                    ms_to_kn = 1.943844
+                    max_wind_kn = track.vmax_trks.data * ms_to_kn
+                    env_pressure = BASIN_ENV_PRESSURE[track.tc_basins.data.item()]
+                    cen_pres = _estimate_pressure(
+                        np.full(lat.shape, np.nan),
+                        lat,
+                        lon,
+                        max_wind_kn,
+                    )
 
-                        data.append(
-                            xr.Dataset(
-                                {
-                                    "time_step": (
-                                        "time",
-                                        np.full(time.shape[0], track.time.data[1]),
-                                    ),
-                                    "max_sustained_wind": (
-                                        "time",
-                                        track.vmax_trks.data,
-                                    ),
-                                    "central_pressure": ("time", cen_pres),
-                                    "radius_max_wind": (
-                                        "time",
-                                        estimate_rmw(
-                                            np.full(lat.shape, np.nan), cen_pres
-                                        ),
-                                    ),
-                                    "environmental_pressure": (
-                                        "time",
-                                        np.full(time.shape[0], env_pressure),
-                                    ),
-                                    "basin": (
-                                        "time",
-                                        np.full(
-                                            time.shape[0], track.tc_basins.data.item()
-                                        ),
-                                    ),
-                                },
-                                coords={
-                                    "time": ("time", time),
-                                    "lat": ("time", lat),
-                                    "lon": ("time", lon),
-                                },
-                                attrs={
-                                    "max_sustained_wind_unit": "m/s",
-                                    "central_pressure_unit": "hPa",
-                                    "name": f"storm_{track.n_trk.item()}",
-                                    "sid": track.n_trk.item(),
-                                    "orig_event_flag": True,
-                                    "data_provider": "FAST",
-                                    "id_no": track.n_trk.item(),
-                                    "category": set_category(
-                                        max_wind_kn, wind_unit="kn", saffir_scale=None
-                                    ),
-                                },
-                            )
+                    data.append(
+                        xr.Dataset(
+                            {
+                                "time_step": (
+                                    "time",
+                                    np.full(time.shape[0], track.time.data[1]),
+                                ),
+                                "max_sustained_wind": (
+                                    "time",
+                                    track.vmax_trks.data,
+                                ),
+                                "central_pressure": ("time", cen_pres),
+                                "radius_max_wind": (
+                                    "time",
+                                    estimate_rmw(np.full(lat.shape, np.nan), cen_pres),
+                                ),
+                                "environmental_pressure": (
+                                    "time",
+                                    np.full(time.shape[0], env_pressure),
+                                ),
+                                "basin": (
+                                    "time",
+                                    np.full(time.shape[0], track.tc_basins.data.item()),
+                                ),
+                            },
+                            coords={
+                                "time": ("time", time),
+                                "lat": ("time", lat),
+                                "lon": ("time", lon),
+                            },
+                            attrs={
+                                "max_sustained_wind_unit": "m/s",
+                                "central_pressure_unit": "hPa",
+                                "name": f"storm_{track.n_trk.item()}",
+                                "sid": track.n_trk.item(),
+                                "orig_event_flag": True,
+                                "data_provider": "FAST",
+                                "id_no": track.n_trk.item(),
+                                "category": set_category(
+                                    max_wind_kn, wind_unit="kn", saffir_scale=None
+                                ),
+                            },
                         )
+                    )
 
         return cls(data)
 
diff --git a/climada/hazard/test/data/FAST_test_tracks.nc b/climada/hazard/test/data/FAST_test_tracks.nc
diff --git a/climada/hazard/test/test_tc_tracks.py b/climada/hazard/test/test_tc_tracks.py
@@ -660,8 +660,13 @@ def test_from_simulations_storm(self):
         self.assertEqual(len(tc_track.data), 0)
 
     def test_from_FAST(self):
-        """test the correct import of netcdf files from FAST model and the conversion to a
-        different xr.array structure compatible with CLIMADA."""
+        """Test the correct import of netcdf files from FAST model and the conversion to a
+        different xr.array structure compatible with CLIMADA.
+
+        The test fixture contains 5 tracks across 2 years. The regression check
+        ``len(tc_track.data) == 5`` ensures that tracks are not multiplied by the
+        ``year`` dimension (the bug would produce 5 × 2 = 10 tracks instead).
+        """
 
         tc_track = tc.TCTracks.from_FAST(TEST_TRACK_FAST)