From ffe0ebf7b4a31e1867945507db9c3504e3865403 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Fri, 17 Apr 2026 15:15:04 +0200
Subject: [PATCH 01/34] Update .gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 45b79435b..6f07f7d82 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,6 +14,7 @@ out-*
 *.pyc
 **/*.zarr/*
 .DS_Store
+*.parquet
 
 .vscode
 .env

From 9ebb653cca1164009a7ad03de71cc727d3fc2c12 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Fri, 17 Apr 2026 15:17:27 +0200
Subject: [PATCH 02/34] Disable zarr writing

---
 pyproject.toml                    |  1 +
 src/parcels/_core/particlefile.py | 27 ++++++-------
 tests/test_advection.py           |  1 +
 tests/test_fieldset.py            |  1 +
 tests/test_particlefile.py        | 63 +++++++++++++++++++++++++++++++
 tests/test_uxadvection.py         |  1 +
 6 files changed, 81 insertions(+), 13 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 85aba3a67..0d4d56784 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -60,6 +60,7 @@ markers = [ # can be skipped by doing `pytest -m "not slow"` etc.
   "v4alpha: failing tests that should work for v4alpha",
   "v4future: failing tests that should work for a future release of v4",
   "v4remove: failing tests that should probably be removed later",
+  "uses_old_zarr: tests that need to be migrated to the new particleset format"
 ]
 
 filterwarnings = [
diff --git a/src/parcels/_core/particlefile.py b/src/parcels/_core/particlefile.py
index 788c6e572..0d4e0d53b 100644
--- a/src/parcels/_core/particlefile.py
+++ b/src/parcels/_core/particlefile.py
@@ -222,21 +222,22 @@ def _write_particle_data(self, *, particle_data, pclass, time_interval, time, in
                         dims = ["trajectory", "obs"]
                     ds[var.name] = xr.DataArray(data=data, dims=dims, attrs=attrs[var.name])
                     ds[var.name].encoding["chunks"] = self.chunks[0] if var.to_write == "once" else self.chunks
-            ds.to_zarr(store, mode="w")
+            # ds.to_zarr(store, mode="w")
             self._create_new_zarrfile = False
         else:
-            Z = zarr.group(store=store, overwrite=False)
-            obs = particle_data["obs_written"][indices_to_write]
-            for var in vars_to_write:
-                if self._maxids > Z[var.name].shape[0]:
-                    self._extend_zarr_dims(Z[var.name], store, dtype=var.dtype, axis=0)
-                if var.to_write == "once":
-                    if len(once_ids) > 0:
-                        Z[var.name].vindex[ids_once] = particle_data[var.name][indices_to_write_once]
-                else:
-                    if max(obs) >= Z[var.name].shape[1]:
-                        self._extend_zarr_dims(Z[var.name], store, dtype=var.dtype, axis=1)
-                    Z[var.name].vindex[ids, obs] = particle_data[var.name][indices_to_write]
+            pass
+            # Z = zarr.group(store=store, overwrite=False)
+            # obs = particle_data["obs_written"][indices_to_write]
+            # for var in vars_to_write:
+            #     if self._maxids > Z[var.name].shape[0]:
+            #         self._extend_zarr_dims(Z[var.name], store, dtype=var.dtype, axis=0)
+            #     if var.to_write == "once":
+            #         if len(once_ids) > 0:
+            #             Z[var.name].vindex[ids_once] = particle_data[var.name][indices_to_write_once]
+            #     else:
+            #         if max(obs) >= Z[var.name].shape[1]:
+            #             self._extend_zarr_dims(Z[var.name], store, dtype=var.dtype, axis=1)
+            #         Z[var.name].vindex[ids, obs] = particle_data[var.name][indices_to_write]
 
         particle_data["obs_written"][indices_to_write] = obs + 1
 
diff --git a/tests/test_advection.py b/tests/test_advection.py
index d8c6d2a45..06e46e231 100644
--- a/tests/test_advection.py
+++ b/tests/test_advection.py
@@ -60,6 +60,7 @@ def test_advection_zonal(mesh, npart=10):
     np.testing.assert_allclose(pset.lat, startlat, atol=1e-5)
 
 
+@pytest.mark.uses_old_zarr
 def test_advection_zonal_with_particlefile(tmp_store):
     """Particles at high latitude move geographically faster due to the pole correction."""
     npart = 10
diff --git a/tests/test_fieldset.py b/tests/test_fieldset.py
index b2b05d33f..e51d13f38 100644
--- a/tests/test_fieldset.py
+++ b/tests/test_fieldset.py
@@ -95,6 +95,7 @@ def test_fieldset_gridset(fieldset):
     assert len(fieldset.gridset) == 2
 
 
+@pytest.mark.uses_old_zarr
 def test_fieldset_no_UV(tmp_zarrfile):
     grid = XGrid.from_dataset(ds, mesh="flat")
     fieldset = FieldSet([Field("P", ds["U_A_grid"], grid, interp_method=XLinear)])
diff --git a/tests/test_particlefile.py b/tests/test_particlefile.py
index 84cb90ffa..5c5f6d566 100755
--- a/tests/test_particlefile.py
+++ b/tests/test_particlefile.py
@@ -4,6 +4,7 @@
 from datetime import datetime, timedelta
 
 import numpy as np
+import pyarrow as pa
 import pytest
 import xarray as xr
 from zarr.storage import MemoryStore
@@ -21,6 +22,7 @@
     XGrid,
 )
 from parcels._core.particle import Particle, create_particle_data, get_default_particle
+from parcels._core.particlefile import _get_schema
 from parcels._core.utils.time import TimeInterval, timedelta_to_float
 from parcels._datasets.structured.generated import peninsula_dataset
 from parcels._datasets.structured.generic import datasets
@@ -44,6 +46,7 @@ def fieldset() -> FieldSet:  # TODO v4: Move into a `conftest.py` file and remov
     )
 
 
+@pytest.mark.uses_old_zarr
 def test_metadata(fieldset, tmp_zarrfile):
     pset = ParticleSet(fieldset, pclass=Particle, lon=0, lat=0)
 
@@ -54,6 +57,7 @@ def test_metadata(fieldset, tmp_zarrfile):
     assert ds.attrs["parcels_kernels"].lower() == "DoNothing".lower()
 
 
+@pytest.mark.uses_old_zarr
 def test_pfile_array_write_zarr_memorystore(fieldset):
     """Check that writing to a Zarr MemoryStore works."""
     npart = 10
@@ -72,6 +76,7 @@ def test_pfile_array_write_zarr_memorystore(fieldset):
     assert ds.sizes["trajectory"] == npart
 
 
+@pytest.mark.uses_old_zarr
 def test_write_fieldset_without_time(tmp_zarrfile):
     ds = peninsula_dataset()  # DataSet without time
     assert "time" not in ds.dims
@@ -87,6 +92,7 @@ def test_write_fieldset_without_time(tmp_zarrfile):
     assert ds.time.values[0, 1] == np.timedelta64(1, "s")
 
 
+@pytest.mark.uses_old_zarr
 def test_pfile_array_remove_particles(fieldset, tmp_zarrfile):
     npart = 10
     pset = ParticleSet(
@@ -108,6 +114,7 @@ def test_pfile_array_remove_particles(fieldset, tmp_zarrfile):
     assert (np.isnat(timearr[3, 1])) and (np.isfinite(timearr[3, 0]))
 
 
+@pytest.mark.uses_old_zarr
 @pytest.mark.parametrize("chunks_obs", [1, None])
 def test_pfile_array_remove_all_particles(fieldset, chunks_obs, tmp_zarrfile):
     npart = 10
@@ -135,6 +142,7 @@ def test_pfile_array_remove_all_particles(fieldset, chunks_obs, tmp_zarrfile):
         assert np.all(np.isnan(ds["time"][:, 1:]))
 
 
+@pytest.mark.uses_old_zarr
 def test_variable_write_double(fieldset, tmp_zarrfile):
     def Update_lon(particles, fieldset):  # pragma: no cover
         particles.dlon += 0.1
@@ -150,6 +158,7 @@ def Update_lon(particles, fieldset):  # pragma: no cover
     assert isinstance(lons.values[0, 0], np.float64)
 
 
+@pytest.mark.uses_old_zarr
 def test_write_dtypes_pfile(fieldset, tmp_zarrfile):
     dtypes = [
         np.float32,
@@ -226,6 +235,7 @@ def IncrLon(particles, fieldset):  # pragma: no cover
     assert filesize < 1024 * 65  # test that chunking leads to filesize less than 65KB
 
 
+@pytest.mark.uses_old_zarr
 def test_file_warnings(fieldset, tmp_zarrfile):
     pset = ParticleSet(fieldset, lon=[0, 0], lat=[0, 0], time=[np.timedelta64(0, "s"), np.timedelta64(1, "s")])
     pfile = ParticleFile(tmp_zarrfile, outputdt=np.timedelta64(2, "s"))
@@ -250,6 +260,7 @@ def test_outputdt_types(outputdt, expectation, tmp_zarrfile):
         assert pfile.outputdt == timedelta_to_float(outputdt)
 
 
+@pytest.mark.uses_old_zarr
 def test_write_timebackward(fieldset, tmp_zarrfile):
     release_time = fieldset.time_interval.left + [np.timedelta64(i + 1, "s") for i in range(3)]
     pset = ParticleSet(fieldset, lat=[0, 1, 2], lon=[0, 0, 0], time=release_time)
@@ -322,6 +333,7 @@ def SampleP(particles, fieldset):  # pragma: no cover
             assert fieldset.U.grid.lat[yi] <= lat < fieldset.U.grid.lat[yi + 1]
 
 
+@pytest.mark.uses_old_zarr
 @pytest.mark.parametrize("outputdt", [np.timedelta64(1, "s"), np.timedelta64(2, "s"), np.timedelta64(3, "s")])
 def test_time_is_age(fieldset, tmp_zarrfile, outputdt):
     # Test that particle age is same as time - initial_time
@@ -346,6 +358,7 @@ def IncreaseAge(particles, fieldset):  # pragma: no cover
     np.testing.assert_equal(age, ds_timediff)
 
 
+@pytest.mark.uses_old_zarr
 def test_reset_dt(fieldset, tmp_zarrfile):
     # Assert that p.dt gets reset when a write_time is not a multiple of dt
     # for p.dt=0.02 to reach outputdt=0.05 and endtime=0.1, the steps should be [0.2, 0.2, 0.1, 0.2, 0.2, 0.1], resulting in 6 kernel executions
@@ -362,6 +375,7 @@ def Update_lon(particles, fieldset):  # pragma: no cover
     assert np.allclose(pset.lon, 0.6)
 
 
+@pytest.mark.uses_old_zarr
 def test_correct_misaligned_outputdt_dt(fieldset, tmp_zarrfile):
     """Testing that outputdt does not need to be a multiple of dt."""
 
@@ -398,6 +412,7 @@ def setup_pset_execute(*, fieldset: FieldSet, outputdt: timedelta, execute_kwarg
     return ds
 
 
+@pytest.mark.uses_old_zarr
 def test_pset_execute_outputdt_forwards(fieldset):
     """Testing output data dt matches outputdt in forward time."""
     outputdt = timedelta(hours=1)
@@ -409,6 +424,7 @@ def test_pset_execute_outputdt_forwards(fieldset):
     assert np.all(ds.isel(trajectory=0).time.diff(dim="obs").values == np.timedelta64(outputdt))
 
 
+@pytest.mark.uses_old_zarr
 def test_pset_execute_output_time_forwards(fieldset):
     """Testing output times start at initial time and end at initial time + runtime."""
     outputdt = np.timedelta64(1, "h")
@@ -423,6 +439,7 @@ def test_pset_execute_output_time_forwards(fieldset):
     )
 
 
+@pytest.mark.uses_old_zarr
 def test_pset_execute_outputdt_backwards(fieldset):
     """Testing output data dt matches outputdt in backwards time."""
     outputdt = timedelta(hours=1)
@@ -434,6 +451,7 @@ def test_pset_execute_outputdt_backwards(fieldset):
     assert np.all(file_outputdt == np.timedelta64(-outputdt))
 
 
+@pytest.mark.uses_old_zarr
 def test_pset_execute_outputdt_backwards_fieldset_timevarying():
     """test_pset_execute_outputdt_backwards() still passed despite #1722 as it doesn't account for time-varying fields,
     which for some reason #1722
@@ -469,6 +487,7 @@ def test_particlefile_init_invalid(tmp_store):  # TODO: Add test for read only s
         ParticleFile(tmp_store, outputdt=np.timedelta64(1, "s"), chunks=1)
 
 
+@pytest.mark.uses_old_zarr
 def test_particlefile_write_particle_data(tmp_store):
     nparticles = 100
 
@@ -535,3 +554,47 @@ def Update_lon(particles, fieldset):  # pragma: no cover
     # For pytest purposes, we need to reset to original status
     pset.set_variable_write_status("z", True)
     pset.set_variable_write_status("lat", True)
+
+
+@pytest.mark.parametrize(
+    "particle",
+    [
+        Particle,
+        parcels.ParticleClass(
+            variables=[
+                Variable(
+                    "lon",
+                    dtype=np.float32,
+                    attrs={"standard_name": "longitude", "units": "degrees_east", "axis": "X"},
+                ),
+                Variable(
+                    "lat",
+                    dtype=np.float32,
+                    attrs={"standard_name": "latitude", "units": "degrees_north", "axis": "Y"},
+                ),
+                Variable(
+                    "z",
+                    dtype=np.float32,
+                    attrs={"standard_name": "vertical coordinate", "units": "m", "positive": "down"},
+                ),
+            ]
+        ),
+    ],
+)
+def test_particle_schema(particle):
+    s = _get_schema(particle, {})
+
+    written_variables = [v for v in particle.variables if v.to_write]
+
+    assert len(s.names) == len(written_variables), (
+        "Number of particles in the output schema should be the same as the writable variables in the ParticleClass object."
+    )
+
+    for variable, pyarrow_field in zip(
+        written_variables,
+        s,
+        strict=False,
+    ):
+        assert variable.name == pyarrow_field.name
+        assert variable.attrs == {k.decode(): v.decode() for k, v in pyarrow_field.metadata.items()}
+        assert pa.from_numpy_dtype(variable.dtype) == pyarrow_field.type
diff --git a/tests/test_uxadvection.py b/tests/test_uxadvection.py
index 3f27536f8..95e517140 100644
--- a/tests/test_uxadvection.py
+++ b/tests/test_uxadvection.py
@@ -11,6 +11,7 @@
 )
 
 
+@pytest.mark.uses_old_zarr
 @pytest.mark.parametrize("integrator", [AdvectionEE, AdvectionRK2, AdvectionRK4])
 def test_ux_constant_flow_face_centered_2D(integrator, tmp_zarrfile):
     ds = datasets_unstructured["ux_constant_flow_face_centered_2D"]

From 0218c28f4d8f13af0ce9946c52915693322ab3d7 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Fri, 17 Apr 2026 15:41:28 +0200
Subject: [PATCH 03/34] Fix parquet writing

---
 pyproject.toml                    |   2 +-
 src/parcels/_core/particlefile.py | 225 ++++++------------------------
 src/parcels/_core/particleset.py  |   9 +-
 src/parcels/_reprs.py             |   7 +-
 tests/conftest.py                 |   5 +
 tests/test_advection.py           |  11 +-
 6 files changed, 64 insertions(+), 195 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 0d4d56784..d0e6f7ba1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -60,7 +60,7 @@ markers = [ # can be skipped by doing `pytest -m "not slow"` etc.
   "v4alpha: failing tests that should work for v4alpha",
   "v4future: failing tests that should work for a future release of v4",
   "v4remove: failing tests that should probably be removed later",
-  "uses_old_zarr: tests that need to be migrated to the new particleset format"
+  "uses_old_zarr: tests that need to be migrated to the new particleset format",
 ]
 
 filterwarnings = [
diff --git a/src/parcels/_core/particlefile.py b/src/parcels/_core/particlefile.py
index 0d4e0d53b..125a925c7 100644
--- a/src/parcels/_core/particlefile.py
+++ b/src/parcels/_core/particlefile.py
@@ -2,16 +2,14 @@
 
 from __future__ import annotations
 
-import os
 from datetime import datetime, timedelta
 from pathlib import Path
-from typing import TYPE_CHECKING, Literal
+from typing import TYPE_CHECKING, Any, Literal
 
 import cftime
 import numpy as np
-import xarray as xr
-import zarr
-from zarr.storage import DirectoryStore
+import pyarrow as pa
+import pyarrow.parquet as pq
 
 import parcels
 from parcels._core.particle import ParticleClass
@@ -25,20 +23,19 @@
 
 __all__ = ["ParticleFile"]
 
-_DATATYPES_TO_FILL_VALUES = {
-    np.dtype(np.float16): np.nan,
-    np.dtype(np.float32): np.nan,
-    np.dtype(np.float64): np.nan,
-    np.dtype(np.bool_): np.iinfo(np.int8).max,
-    np.dtype(np.int8): np.iinfo(np.int8).max,
-    np.dtype(np.int16): np.iinfo(np.int16).max,
-    np.dtype(np.int32): np.iinfo(np.int32).max,
-    np.dtype(np.int64): np.iinfo(np.int64).min,
-    np.dtype(np.uint8): np.iinfo(np.uint8).max,
-    np.dtype(np.uint16): np.iinfo(np.uint16).max,
-    np.dtype(np.uint32): np.iinfo(np.uint32).max,
-    np.dtype(np.uint64): np.iinfo(np.uint64).max,
-}
+
+def _get_schema(particle: parcels.ParticleClass, file_metadata: dict[Any, Any]) -> pa.Schema:
+    return pa.schema(
+        [
+            pa.field(
+                v.name,
+                pa.from_numpy_dtype(v.dtype),
+                metadata=v.attrs,
+            )
+            for v in _get_vars_to_write(particle)
+        ],
+        metadata=file_metadata.copy(),
+    )
 
 
 class ParticleFile:
@@ -54,10 +51,6 @@ class ParticleFile:
         Interval which dictates the update frequency of file output
         while ParticleFile is given as an argument of ParticleSet.execute()
         It is either a numpy.timedelta64, a datimetime.timedelta object or a positive float (in seconds).
-    chunks :
-        Tuple (trajs, obs) to control the size of chunks in the zarr output.
-    create_new_zarrfile : bool
-        Whether to create a new file. Default is True
 
     Returns
     -------
@@ -65,7 +58,7 @@ class ParticleFile:
         ParticleFile object that can be used to write particle data to file
     """
 
-    def __init__(self, store, outputdt, chunks=None, create_new_zarrfile=True):
+    def __init__(self, path: Path, outputdt):
         if not isinstance(outputdt, (np.timedelta64, timedelta, float)):
             raise ValueError(
                 f"Expected outputdt to be a np.timedelta64, datetime.timedelta or float (in seconds), got {type(outputdt)}"
@@ -78,21 +71,19 @@ def __init__(self, store, outputdt, chunks=None, create_new_zarrfile=True):
 
         self._outputdt = outputdt
 
-        _assert_valid_chunks_tuple(chunks)
-        self._chunks = chunks
+        self._path = Path(
+            path
+        )  # TODO v4: Consider https://arrow.apache.org/docs/python/getstarted.html#working-with-large-data - though a significant question becomes how to partition, perhaps using a particle variable "partition"?
+        self._writer: pq.ParquetWriter | None = None
+        if path.exists():
+            # TODO: Add logic for recovering/appending to existing parquet file
+            raise ValueError(f"{path=!r} already exists. Either delete this file or use a path that doesn't exist.")
+        if not path.parent.exists():
+            raise ValueError(f"Folder location for {path=!r} does not exist. Create the folder location first.")
+
         self._maxids = 0
         self._pids_written = {}
-        self.metadata = {}
-        self._create_new_zarrfile = create_new_zarrfile
-
-        if not isinstance(store, zarr.storage.Store):
-            store = _get_store_from_pathlike(store)
-
-        self._store = store
-
-        # TODO v4: Enable once updating to zarr v3
-        # if store.read_only:
-        #     raise ValueError(f"Store {store} is read-only. Please provide a writable store.")
+        self.extra_metadata = {}
 
         # TODO v4: Add check that if create_new_zarrfile is False, the store already exists
 
@@ -100,7 +91,7 @@ def __repr__(self) -> str:
         return particlefile_repr(self)
 
     def set_metadata(self, parcels_grid_mesh: Literal["spherical", "flat"]):
-        self.metadata.update(
+        self.extra_metadata.update(
             {
                 "feature_type": "trajectory",
                 "Conventions": "CF-1.6/CF-1.7",
@@ -115,31 +106,8 @@ def outputdt(self):
         return self._outputdt
 
     @property
-    def chunks(self):
-        return self._chunks
-
-    @property
-    def store(self):
-        return self._store
-
-    @property
-    def create_new_zarrfile(self):
-        return self._create_new_zarrfile
-
-    def _extend_zarr_dims(self, Z, store, dtype, axis):  # noqa: N803
-        if axis == 1:
-            a = np.full((Z.shape[0], self.chunks[1]), _DATATYPES_TO_FILL_VALUES[dtype], dtype=dtype)
-            obs = zarr.group(store=store, overwrite=False)["obs"]
-            if len(obs) == Z.shape[1]:
-                obs.append(np.arange(self.chunks[1]) + obs[-1] + 1)
-        else:
-            extra_trajs = self._maxids - Z.shape[0]
-            if len(Z.shape) == 2:
-                a = np.full((extra_trajs, Z.shape[1]), _DATATYPES_TO_FILL_VALUES[dtype], dtype=dtype)
-            else:
-                a = np.full((extra_trajs,), _DATATYPES_TO_FILL_VALUES[dtype], dtype=dtype)
-        Z.append(a, axis=axis)
-        zarr.consolidate_metadata(store)
+    def path(self):
+        return self._path
 
     def write(self, pset: ParticleSet, time, indices=None):
         """Write all data from one time step to the zarr file,
@@ -156,125 +124,35 @@ def write(self, pset: ParticleSet, time, indices=None):
         time_interval = pset.fieldset.time_interval
         particle_data = pset._data
 
-        self._write_particle_data(
-            particle_data=particle_data, pclass=pclass, time_interval=time_interval, time=time, indices=indices
-        )
+        if self._writer is None:
+            assert not self.path.exists(), "If the file exists, the writer should already be set"
+            self._writer = pq.ParquetWriter(self.path, _get_schema(pclass, self.extra_metadata))
 
-    def _write_particle_data(self, *, particle_data, pclass, time_interval, time, indices=None):
-        # if pset._data._ncount == 0:
-        #     warnings.warn(
-        #         f"ParticleSet is empty on writing as array at time {time:g}",
-        #         RuntimeWarning,
-        #         stacklevel=2,
-        #     )
-        #     return
         if isinstance(time, (np.timedelta64, np.datetime64)):
             time = timedelta_to_float(time - time_interval.left)
-        nparticles = len(particle_data["trajectory"])
         vars_to_write = _get_vars_to_write(pclass)
         if indices is None:
             indices_to_write = _to_write_particles(particle_data, time)
         else:
             indices_to_write = indices
+        
+        self._writer.write_table(
+            pa.table({v.name: pa.array(particle_data[v.name][indices_to_write]) for v in vars_to_write}),
+        )
 
-        if len(indices_to_write) == 0:
-            return
-
-        pids = particle_data["trajectory"][indices_to_write]
-        to_add = sorted(set(pids) - set(self._pids_written.keys()))
-        for i, pid in enumerate(to_add):
-            self._pids_written[pid] = self._maxids + i
-        ids = np.array([self._pids_written[p] for p in pids], dtype=int)
-        self._maxids = len(self._pids_written)
-
-        once_ids = np.where(particle_data["obs_written"][indices_to_write] == 0)[0]
-        if len(once_ids) > 0:
-            ids_once = ids[once_ids]
-            indices_to_write_once = indices_to_write[once_ids]
-
-        store = self.store
-        if self.create_new_zarrfile:
-            if self.chunks is None:
-                self._chunks = (nparticles, 1)
-            if (self._maxids > len(ids)) or (self._maxids > self.chunks[0]):
-                arrsize = (self._maxids, self.chunks[1])
-            else:
-                arrsize = (len(ids), self.chunks[1])
-            ds = xr.Dataset(
-                attrs=self.metadata,
-                coords={"trajectory": ("trajectory", pids), "obs": ("obs", np.arange(arrsize[1], dtype=np.int32))},
-            )
-            attrs = _create_variables_attribute_dict(pclass, time_interval)
-            obs = np.zeros((self._maxids), dtype=np.int32)
-            for var in vars_to_write:
-                if var.name not in ["trajectory"]:  # because 'trajectory' is written as coordinate
-                    if var.to_write == "once":
-                        data = np.full(
-                            (arrsize[0],),
-                            _DATATYPES_TO_FILL_VALUES[var.dtype],
-                            dtype=var.dtype,
-                        )
-                        data[ids_once] = particle_data[var.name][indices_to_write_once]
-                        dims = ["trajectory"]
-                    else:
-                        data = np.full(arrsize, _DATATYPES_TO_FILL_VALUES[var.dtype], dtype=var.dtype)
-                        data[ids, 0] = particle_data[var.name][indices_to_write]
-                        dims = ["trajectory", "obs"]
-                    ds[var.name] = xr.DataArray(data=data, dims=dims, attrs=attrs[var.name])
-                    ds[var.name].encoding["chunks"] = self.chunks[0] if var.to_write == "once" else self.chunks
-            # ds.to_zarr(store, mode="w")
-            self._create_new_zarrfile = False
-        else:
-            pass
-            # Z = zarr.group(store=store, overwrite=False)
-            # obs = particle_data["obs_written"][indices_to_write]
-            # for var in vars_to_write:
-            #     if self._maxids > Z[var.name].shape[0]:
-            #         self._extend_zarr_dims(Z[var.name], store, dtype=var.dtype, axis=0)
-            #     if var.to_write == "once":
-            #         if len(once_ids) > 0:
-            #             Z[var.name].vindex[ids_once] = particle_data[var.name][indices_to_write_once]
-            #     else:
-            #         if max(obs) >= Z[var.name].shape[1]:
-            #             self._extend_zarr_dims(Z[var.name], store, dtype=var.dtype, axis=1)
-            #         Z[var.name].vindex[ids, obs] = particle_data[var.name][indices_to_write]
-
-        particle_data["obs_written"][indices_to_write] = obs + 1
-
-
-def _get_store_from_pathlike(path: Path | str) -> DirectoryStore:
-    path = str(Path(path))  # Ensure valid path, and convert to string
-    extension = os.path.splitext(path)[1]
-    if extension != ".zarr":
-        raise ValueError(f"ParticleFile name must end with '.zarr' extension. Got path {path!r}.")
+        # if len(indices_to_write) == 0: # TODO: Remove this?
+        #     return
 
-    return DirectoryStore(path)
+    def close(self):
+        if self._writer is not None:
+            self._writer.close()
+            self._writer = None
 
 
 def _get_vars_to_write(particle: ParticleClass) -> list[Variable]:
     return [v for v in particle.variables if v.to_write is not False]
 
 
-def _create_variables_attribute_dict(particle: ParticleClass, time_interval: TimeInterval) -> dict:
-    """Creates the dictionary with variable attributes.
-
-    Notes
-    -----
-    For ParticleSet structures other than SoA, and structures where ID != index, this has to be overridden.
-    """
-    attrs = {}
-
-    vars = [var for var in particle.variables if var.to_write is not False]
-    for var in vars:
-        fill_value = {"_FillValue": _DATATYPES_TO_FILL_VALUES[var.dtype]}
-
-        attrs[var.name] = {**var.attrs, **fill_value}
-
-    attrs["time"].update(_get_calendar_and_units(time_interval))
-
-    return attrs
-
-
 def _to_write_particles(particle_data, time):
     """Return the Particles that need to be written at time: if particle.time is between time-dt/2 and time+dt (/2)"""
     return np.where(
@@ -299,7 +177,7 @@ def _to_write_particles(particle_data, time):
     )[0]
 
 
-def _get_calendar_and_units(time_interval: TimeInterval) -> dict[str, str]:
+def _get_calendar_and_units(time_interval: TimeInterval) -> dict[str, str]:  # TODO: Remove?
     calendar = None
     units = "seconds"
     if time_interval:
@@ -316,16 +194,3 @@ def _get_calendar_and_units(time_interval: TimeInterval) -> dict[str, str]:
         attrs["calendar"] = calendar
 
     return attrs
-
-
-def _assert_valid_chunks_tuple(chunks):
-    e = ValueError(f"chunks must be a tuple of integers with length 2, got {chunks=!r} instead.")
-    if chunks is None:
-        return
-
-    if not isinstance(chunks, tuple):
-        raise e
-    if len(chunks) != 2:
-        raise e
-    if not all(isinstance(c, int) for c in chunks):
-        raise e
diff --git a/src/parcels/_core/particleset.py b/src/parcels/_core/particleset.py
index 5483ffbe4..e4ecb252b 100644
--- a/src/parcels/_core/particleset.py
+++ b/src/parcels/_core/particleset.py
@@ -20,7 +20,7 @@
 )
 from parcels._core.warnings import ParticleSetWarning
 from parcels._logger import logger
-from parcels._reprs import _format_zarr_output_location, particleset_repr
+from parcels._reprs import particleset_repr
 
 __all__ = ["ParticleSet"]
 
@@ -395,7 +395,7 @@ def execute(
 
         if output_file is not None:
             output_file.set_metadata(self.fieldset.gridset[0]._mesh)
-            output_file.metadata["parcels_kernels"] = self._kernel.funcname
+            output_file.extra_metadata["parcels_kernels"] = self._kernel.funcname
 
         dt, sign_dt = _convert_dt_to_float(dt)
         self._data["dt"][:] = dt
@@ -415,7 +415,7 @@ def execute(
 
         # Set up pbar
         if output_file:
-            logger.info(f"Output files are stored in {_format_zarr_output_location(output_file.store)}")
+            logger.info(f"Output files are stored in {output_file.path}")
 
         if verbose_progress:
             pbar = tqdm(total=end_time - start_time, file=sys.stdout)
@@ -451,6 +451,9 @@ def execute(
 
             time = next_time
 
+        if output_file is not None:
+            output_file.close()
+            
         if verbose_progress:
             pbar.close()
 
diff --git a/src/parcels/_reprs.py b/src/parcels/_reprs.py
index ad6d0cca2..34b6814a0 100644
--- a/src/parcels/_reprs.py
+++ b/src/parcels/_reprs.py
@@ -128,7 +128,7 @@ def timeinterval_repr(ti: Any) -> str:
 def particlefile_repr(pfile: Any) -> str:
     """Return a pretty repr for ParticleFile"""
     out = f"""<{type(pfile).__name__}>
-    store               : {_format_zarr_output_location(pfile.store)}
+    path                : {pfile.path}
     outputdt            : {pfile.outputdt!r}
     chunks              : {pfile.chunks!r}
     create_new_zarrfile : {pfile.create_new_zarrfile!r}
@@ -178,11 +178,6 @@ def _format_list_items_multiline(items: list[str] | dict, level: int = 1, with_b
         return "\n".join([textwrap.indent(e, indentation_str) for e in entries])
 
 
-def _format_zarr_output_location(zarr_obj):
-    if isinstance(zarr_obj, DirectoryStore):
-        return zarr_obj.path
-    return repr(zarr_obj)
-
 
 def is_builtin_object(obj):
     return obj.__class__.__module__ == "builtins"
diff --git a/tests/conftest.py b/tests/conftest.py
index 82020c37e..3308d7e3e 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -11,3 +11,8 @@ def tmp_zarrfile(tmp_path, request):
 @pytest.fixture
 def tmp_store():
     return MemoryStore()
+
+@pytest.fixture
+def tmp_parquet(tmp_path):
+    return tmp_path / 'tmp.parquet'
+
diff --git a/tests/test_advection.py b/tests/test_advection.py
index 06e46e231..365e9e6f4 100644
--- a/tests/test_advection.py
+++ b/tests/test_advection.py
@@ -4,6 +4,7 @@
 
 import parcels
 import parcels.tutorial
+import pandas as pd
 from parcels import (
     Field,
     FieldSet,
@@ -60,8 +61,7 @@ def test_advection_zonal(mesh, npart=10):
     np.testing.assert_allclose(pset.lat, startlat, atol=1e-5)
 
 
-@pytest.mark.uses_old_zarr
-def test_advection_zonal_with_particlefile(tmp_store):
+def test_advection_zonal_with_particlefile(tmp_parquet):
     """Particles at high latitude move geographically faster due to the pole correction."""
     npart = 10
     ds = simple_UV_dataset(mesh="flat")
@@ -69,12 +69,13 @@ def test_advection_zonal_with_particlefile(tmp_store):
     fieldset = FieldSet.from_sgrid_conventions(ds, mesh="flat")
 
     pset = ParticleSet(fieldset, lon=np.zeros(npart) + 20.0, lat=np.linspace(0, 80, npart))
-    pfile = ParticleFile(tmp_store, outputdt=np.timedelta64(30, "m"))
+    pfile = ParticleFile(tmp_parquet, outputdt=np.timedelta64(30, "m"))
     pset.execute(AdvectionRK4, runtime=np.timedelta64(2, "h"), dt=np.timedelta64(15, "m"), output_file=pfile)
 
     assert (np.diff(pset.lon) < 1.0e-4).all()
-    ds = xr.open_zarr(tmp_store)
-    np.testing.assert_allclose(ds.isel(obs=-1).lon.values, pset.lon)
+    df = pd.read_parquet(tmp_parquet)
+    final_time = df["time"].max()
+    np.testing.assert_allclose(df[df["time"] == final_time]["lon"].values, pset.lon, atol=1e-5)
 
 
 def periodicBC(particles, fieldset):

From 4e7de3eab2af85c3c33e2d38fc1872606db5d601 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Fri, 17 Apr 2026 19:48:53 +0200
Subject: [PATCH 04/34] Remove test_vriable_write_double


From bc653f1ec5ac2691572aa22d2c802a4885224a71 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Fri, 17 Apr 2026 19:08:16 +0200
Subject: [PATCH 05/34] Fix all "uses_old_zarr" tests

---
 tests-v3/test_advection.py         |   6 +-
 tests-v3/test_fieldset_sampling.py |   6 +-
 tests-v3/test_particlesets.py      |   6 +-
 tests/conftest.py                  |   2 +-
 tests/test_fieldset.py             |  10 +--
 tests/test_particlefile.py         | 112 ++++++++++++++---------------
 tests/test_uxadvection.py          |  10 +--
 7 files changed, 76 insertions(+), 76 deletions(-)

diff --git a/tests-v3/test_advection.py b/tests-v3/test_advection.py
index 3d8f06bac..77abee900 100644
--- a/tests-v3/test_advection.py
+++ b/tests-v3/test_advection.py
@@ -79,7 +79,7 @@ def test_analyticalAgrid():
 @pytest.mark.parametrize("v", [1, -0.3, 0, -1])
 @pytest.mark.parametrize("w", [None, 1, -0.3, 0, -1])
 @pytest.mark.parametrize("direction", [1, -1])
-def test_uniform_analytical(u, v, w, direction, tmp_zarrfile):
+def test_uniform_analytical(u, v, w, direction, tmp_parquet):
     lon = np.arange(0, 15, dtype=np.float32)
     lat = np.arange(0, 15, dtype=np.float32)
     if w is not None:
@@ -99,14 +99,14 @@ def test_uniform_analytical(u, v, w, direction, tmp_zarrfile):
     x0, y0, z0 = 6.1, 6.2, 20
     pset = ParticleSet(fieldset, pclass=Particle, lon=x0, lat=y0, depth=z0)
 
-    outfile = pset.ParticleFile(name=tmp_zarrfile, outputdt=1, chunks=(1, 1))
+    outfile = pset.ParticleFile(name=tmp_parquet, outputdt=1, chunks=(1, 1))
     pset.execute(AdvectionAnalytical, runtime=4, dt=direction, output_file=outfile)
     assert np.abs(pset.lon - x0 - pset.time * u) < 1e-6
     assert np.abs(pset.lat - y0 - pset.time * v) < 1e-6
     if w is not None:
         assert np.abs(pset.depth - z0 - pset.time * w) < 1e-4
 
-    ds = xr.open_zarr(tmp_zarrfile)
+    ds = xr.open_zarr(tmp_parquet)
     times = (direction * ds["time"][:]).values.astype("timedelta64[s]")[0]
     timeref = np.arange(1, 5).astype("timedelta64[s]")
     assert np.allclose(times, timeref, atol=np.timedelta64(1, "ms"))
diff --git a/tests-v3/test_fieldset_sampling.py b/tests-v3/test_fieldset_sampling.py
index 291c27b88..5f25e355c 100644
--- a/tests-v3/test_fieldset_sampling.py
+++ b/tests-v3/test_fieldset_sampling.py
@@ -773,7 +773,7 @@ def test_multiple_grid_addlater_error():
     assert fail
 
 
-def test_fieldset_sampling_updating_order(tmp_zarrfile):
+def test_fieldset_sampling_updating_order(tmp_parquet):
     def calc_p(t, y, x):
         return 10 * t + x + 0.2 * y
 
@@ -805,10 +805,10 @@ def SampleP(particle, fieldset, time):  # pragma: no cover
 
     kernels = [AdvectionRK4, SampleP]
 
-    pfile = pset.ParticleFile(tmp_zarrfile, outputdt=1)
+    pfile = pset.ParticleFile(tmp_parquet, outputdt=1)
     pset.execute(kernels, endtime=1, dt=1, output_file=pfile)
 
-    ds = xr.open_zarr(tmp_zarrfile)
+    ds = xr.open_zarr(tmp_parquet)
     for t in range(len(ds["obs"])):
         for i in range(len(ds["trajectory"])):
             assert np.isclose(
diff --git a/tests-v3/test_particlesets.py b/tests-v3/test_particlesets.py
index ed884f595..5c0f2495f 100644
--- a/tests-v3/test_particlesets.py
+++ b/tests-v3/test_particlesets.py
@@ -39,7 +39,7 @@ def test_pset_create_list_with_customvariable(fieldset):
 
 
 @pytest.mark.parametrize("restart", [True, False])
-def test_pset_create_fromparticlefile(fieldset, restart, tmp_zarrfile):
+def test_pset_create_fromparticlefile(fieldset, restart, tmp_parquet):
     lon = np.linspace(0, 1, 10, dtype=np.float32)
     lat = np.linspace(1, 0, 10, dtype=np.float32)
 
@@ -48,7 +48,7 @@ def test_pset_create_fromparticlefile(fieldset, restart, tmp_zarrfile):
     TestParticle = TestParticle.add_variable("p3", np.float64, to_write="once")
 
     pset = ParticleSet(fieldset, lon=lon, lat=lat, depth=[4] * len(lon), pclass=TestParticle, p3=np.arange(len(lon)))
-    pfile = pset.ParticleFile(tmp_zarrfile, outputdt=1)
+    pfile = pset.ParticleFile(tmp_parquet, outputdt=1)
 
     def Kernel(particle, fieldset, time):  # pragma: no cover
         particle.p = 2.0
@@ -58,7 +58,7 @@ def Kernel(particle, fieldset, time):  # pragma: no cover
     pset.execute(Kernel, runtime=2, dt=1, output_file=pfile)
 
     pset_new = ParticleSet.from_particlefile(
-        fieldset, pclass=TestParticle, filename=tmp_zarrfile, restart=restart, repeatdt=1
+        fieldset, pclass=TestParticle, filename=tmp_parquet, restart=restart, repeatdt=1
     )
 
     for var in ["lon", "lat", "depth", "time", "p", "p2", "p3"]:
diff --git a/tests/conftest.py b/tests/conftest.py
index 3308d7e3e..56bbfa480 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -3,7 +3,7 @@
 
 
 @pytest.fixture()
-def tmp_zarrfile(tmp_path, request):
+def tmp_parquet(tmp_path, request):
     test_name = request.node.name
     yield tmp_path / f"{test_name}-output.zarr"
 
diff --git a/tests/test_fieldset.py b/tests/test_fieldset.py
index e51d13f38..d9b8d6000 100644
--- a/tests/test_fieldset.py
+++ b/tests/test_fieldset.py
@@ -95,8 +95,8 @@ def test_fieldset_gridset(fieldset):
     assert len(fieldset.gridset) == 2
 
 
-@pytest.mark.uses_old_zarr
-def test_fieldset_no_UV(tmp_zarrfile):
+
+def test_fieldset_no_UV(tmp_parquet):
     grid = XGrid.from_dataset(ds, mesh="flat")
     fieldset = FieldSet([Field("P", ds["U_A_grid"], grid, interp_method=XLinear)])
 
@@ -104,11 +104,11 @@ def SampleP(particles, fieldset):
         particles.dlon += fieldset.P[particles]
 
     pset = ParticleSet(fieldset, lon=0, lat=0)
-    ofile = ParticleFile(tmp_zarrfile, outputdt=np.timedelta64(1, "s"))
+    ofile = ParticleFile(tmp_parquet, outputdt=np.timedelta64(1, "s"))
     pset.execute(SampleP, runtime=np.timedelta64(1, "s"), dt=np.timedelta64(1, "s"), output_file=ofile)
 
-    ds_out = xr.open_zarr(tmp_zarrfile)
-    assert ds_out["lon"].shape == (1, 2)
+    df = xr.open_zarr(tmp_parquet)
+    assert len(df["lon"]) == 2
 
 
 @pytest.mark.parametrize("ds", [pytest.param(ds, id=k) for k, ds in datasets_structured.items()])
diff --git a/tests/test_particlefile.py b/tests/test_particlefile.py
index 5c5f6d566..67775fa30 100755
--- a/tests/test_particlefile.py
+++ b/tests/test_particlefile.py
@@ -46,18 +46,18 @@ def fieldset() -> FieldSet:  # TODO v4: Move into a `conftest.py` file and remov
     )
 
 
-@pytest.mark.uses_old_zarr
-def test_metadata(fieldset, tmp_zarrfile):
+
+def test_metadata(fieldset, tmp_parquet):
     pset = ParticleSet(fieldset, pclass=Particle, lon=0, lat=0)
 
-    ofile = ParticleFile(tmp_zarrfile, outputdt=np.timedelta64(1, "s"))
+    ofile = ParticleFile(tmp_parquet, outputdt=np.timedelta64(1, "s"))
     pset.execute(DoNothing, runtime=np.timedelta64(1, "s"), dt=np.timedelta64(1, "s"), output_file=ofile)
 
-    ds = xr.open_zarr(tmp_zarrfile)
+    ds = xr.open_zarr(tmp_parquet)
     assert ds.attrs["parcels_kernels"].lower() == "DoNothing".lower()
 
 
-@pytest.mark.uses_old_zarr
+
 def test_pfile_array_write_zarr_memorystore(fieldset):
     """Check that writing to a Zarr MemoryStore works."""
     npart = 10
@@ -76,8 +76,8 @@ def test_pfile_array_write_zarr_memorystore(fieldset):
     assert ds.sizes["trajectory"] == npart
 
 
-@pytest.mark.uses_old_zarr
-def test_write_fieldset_without_time(tmp_zarrfile):
+
+def test_write_fieldset_without_time(tmp_parquet):
     ds = peninsula_dataset()  # DataSet without time
     assert "time" not in ds.dims
     grid = XGrid.from_dataset(ds, mesh="flat")
@@ -85,15 +85,15 @@ def test_write_fieldset_without_time(tmp_zarrfile):
 
     pset = ParticleSet(fieldset, pclass=Particle, lon=0, lat=0)
 
-    ofile = ParticleFile(tmp_zarrfile, outputdt=np.timedelta64(1, "s"))
+    ofile = ParticleFile(tmp_parquet, outputdt=np.timedelta64(1, "s"))
     pset.execute(DoNothing, runtime=np.timedelta64(1, "s"), dt=np.timedelta64(1, "s"), output_file=ofile)
 
-    ds = xr.open_zarr(tmp_zarrfile)
+    ds = xr.open_zarr(tmp_parquet)
     assert ds.time.values[0, 1] == np.timedelta64(1, "s")
 
 
-@pytest.mark.uses_old_zarr
-def test_pfile_array_remove_particles(fieldset, tmp_zarrfile):
+
+def test_pfile_array_remove_particles(fieldset, tmp_parquet):
     npart = 10
     pset = ParticleSet(
         fieldset,
@@ -102,21 +102,21 @@ def test_pfile_array_remove_particles(fieldset, tmp_zarrfile):
         lat=0.5 * np.ones(npart),
         time=fieldset.time_interval.left,
     )
-    pfile = ParticleFile(tmp_zarrfile, outputdt=np.timedelta64(1, "s"))
+    pfile = ParticleFile(tmp_parquet, outputdt=np.timedelta64(1, "s"))
     pset._data["time"][:] = 0
     pfile.write(pset, time=fieldset.time_interval.left)
     pset.remove_indices(3)
     new_time = 86400  # s in a day
     pset._data["time"][:] = new_time
     pfile.write(pset, new_time)
-    ds = xr.open_zarr(tmp_zarrfile)
+    ds = xr.open_zarr(tmp_parquet)
     timearr = ds["time"][:]
     assert (np.isnat(timearr[3, 1])) and (np.isfinite(timearr[3, 0]))
 
 
-@pytest.mark.uses_old_zarr
+
 @pytest.mark.parametrize("chunks_obs", [1, None])
-def test_pfile_array_remove_all_particles(fieldset, chunks_obs, tmp_zarrfile):
+def test_pfile_array_remove_all_particles(fieldset, chunks_obs, tmp_parquet):
     npart = 10
     pset = ParticleSet(
         fieldset,
@@ -126,14 +126,14 @@ def test_pfile_array_remove_all_particles(fieldset, chunks_obs, tmp_zarrfile):
         time=fieldset.time_interval.left,
     )
     chunks = (npart, chunks_obs) if chunks_obs else None
-    pfile = ParticleFile(tmp_zarrfile, chunks=chunks, outputdt=np.timedelta64(1, "s"))
+    pfile = ParticleFile(tmp_parquet, chunks=chunks, outputdt=np.timedelta64(1, "s"))
     pfile.write(pset, time=0)
     for _ in range(npart):
         pset.remove_indices(-1)
     pfile.write(pset, fieldset.time_interval.left + np.timedelta64(1, "D"))
     pfile.write(pset, fieldset.time_interval.left + np.timedelta64(2, "D"))
 
-    ds = xr.open_zarr(tmp_zarrfile)
+    ds = xr.open_zarr(tmp_parquet)
     np.testing.assert_allclose(ds["time"][:, 0] - fieldset.time_interval.left, np.timedelta64(0, "s"))
     if chunks_obs is not None:
         assert ds["time"][:].shape == chunks
@@ -158,8 +158,8 @@ def Update_lon(particles, fieldset):  # pragma: no cover
     assert isinstance(lons.values[0, 0], np.float64)
 
 
-@pytest.mark.uses_old_zarr
-def test_write_dtypes_pfile(fieldset, tmp_zarrfile):
+
+def test_write_dtypes_pfile(fieldset, tmp_parquet):
     dtypes = [
         np.float32,
         np.float64,
@@ -178,11 +178,11 @@ def test_write_dtypes_pfile(fieldset, tmp_zarrfile):
     MyParticle = Particle.add_variable(extra_vars)
 
     pset = ParticleSet(fieldset, pclass=MyParticle, lon=0, lat=0, time=fieldset.time_interval.left)
-    pfile = ParticleFile(tmp_zarrfile, outputdt=np.timedelta64(1, "s"))
+    pfile = ParticleFile(tmp_parquet, outputdt=np.timedelta64(1, "s"))
     pfile.write(pset, time=fieldset.time_interval.left)
 
     ds = xr.open_zarr(
-        tmp_zarrfile, mask_and_scale=False
+        tmp_parquet, mask_and_scale=False
     )  # Note masking issue at https://stackoverflow.com/questions/68460507/xarray-loading-int-data-as-float
     for d in dtypes:
         assert ds[f"v_{d.__name__}"].dtype == d
@@ -196,7 +196,7 @@ def test_variable_written_once():
 @pytest.mark.skip(reason="Pending ParticleFile refactor; see issue #2386")
 @pytest.mark.parametrize("dt", [-np.timedelta64(1, "s"), np.timedelta64(1, "s")])
 @pytest.mark.parametrize("maxvar", [2, 4, 10])
-def test_pset_repeated_release_delayed_adding_deleting(fieldset, tmp_zarrfile, dt, maxvar):
+def test_pset_repeated_release_delayed_adding_deleting(fieldset, tmp_parquet, dt, maxvar):
     """Tests that if particles are released and deleted based on age that resulting output file is correct."""
     npart = 10
     fieldset.add_constant("maxvar", maxvar)
@@ -212,7 +212,7 @@ def test_pset_repeated_release_delayed_adding_deleting(fieldset, tmp_zarrfile, d
         pclass=MyParticle,
         time=fieldset.time_interval.left + [np.timedelta64(i + 1, "s") for i in range(npart)],
     )
-    pfile = ParticleFile(tmp_zarrfile, outputdt=abs(dt), chunks=(1, 1))
+    pfile = ParticleFile(tmp_parquet, outputdt=abs(dt), chunks=(1, 1))
 
     def IncrLon(particles, fieldset):  # pragma: no cover
         particles.sample_var += 1.0
@@ -225,20 +225,20 @@ def IncrLon(particles, fieldset):  # pragma: no cover
     for _ in range(npart):
         pset.execute(IncrLon, dt=dt, runtime=np.timedelta64(1, "s"), output_file=pfile)
 
-    ds = xr.open_zarr(tmp_zarrfile)
+    ds = xr.open_zarr(tmp_parquet)
     samplevar = ds["sample_var"][:]
     assert samplevar.shape == (npart, min(maxvar, npart + 1))
     # test whether samplevar[:, k] = k
     for k in range(samplevar.shape[1]):
         assert np.allclose([p for p in samplevar[:, k] if np.isfinite(p)], k + 1)
-    filesize = os.path.getsize(str(tmp_zarrfile))
+    filesize = os.path.getsize(str(tmp_parquet))
     assert filesize < 1024 * 65  # test that chunking leads to filesize less than 65KB
 
 
-@pytest.mark.uses_old_zarr
-def test_file_warnings(fieldset, tmp_zarrfile):
+
+def test_file_warnings(fieldset, tmp_parquet):
     pset = ParticleSet(fieldset, lon=[0, 0], lat=[0, 0], time=[np.timedelta64(0, "s"), np.timedelta64(1, "s")])
-    pfile = ParticleFile(tmp_zarrfile, outputdt=np.timedelta64(2, "s"))
+    pfile = ParticleFile(tmp_parquet, outputdt=np.timedelta64(2, "s"))
     with pytest.warns(ParticleSetWarning, match="Some of the particles have a start time difference.*"):
         pset.execute(AdvectionRK4, runtime=3, dt=1, output_file=pfile)
 
@@ -254,20 +254,20 @@ def test_file_warnings(fieldset, tmp_zarrfile):
         (-np.timedelta64(5, "s"), pytest.raises(ValueError)),
     ],
 )
-def test_outputdt_types(outputdt, expectation, tmp_zarrfile):
+def test_outputdt_types(outputdt, expectation, tmp_parquet):
     with expectation:
-        pfile = ParticleFile(tmp_zarrfile, outputdt=outputdt)
+        pfile = ParticleFile(tmp_parquet, outputdt=outputdt)
         assert pfile.outputdt == timedelta_to_float(outputdt)
 
 
-@pytest.mark.uses_old_zarr
-def test_write_timebackward(fieldset, tmp_zarrfile):
+
+def test_write_timebackward(fieldset, tmp_parquet):
     release_time = fieldset.time_interval.left + [np.timedelta64(i + 1, "s") for i in range(3)]
     pset = ParticleSet(fieldset, lat=[0, 1, 2], lon=[0, 0, 0], time=release_time)
-    pfile = ParticleFile(tmp_zarrfile, outputdt=np.timedelta64(1, "s"))
+    pfile = ParticleFile(tmp_parquet, outputdt=np.timedelta64(1, "s"))
     pset.execute(DoNothing, runtime=np.timedelta64(3, "s"), dt=-np.timedelta64(1, "s"), output_file=pfile)
 
-    ds = xr.open_zarr(tmp_zarrfile)
+    ds = xr.open_zarr(tmp_parquet)
     trajs = ds["trajectory"][:]
 
     output_time = ds["time"][:].values
@@ -280,7 +280,7 @@ def test_write_timebackward(fieldset, tmp_zarrfile):
 
 @pytest.mark.xfail
 @pytest.mark.v4alpha
-def test_write_xiyi(fieldset, tmp_zarrfile):
+def test_write_xiyi(fieldset, tmp_parquet):
     fieldset.U.data[:] = 1  # set a non-zero zonal velocity
     fieldset.add_field(
         Field(name="P", data=np.zeros((3, 20)), lon=np.linspace(0, 1, 20), lat=[-2, 0, 2], interp_method=XLinear)
@@ -311,10 +311,10 @@ def SampleP(particles, fieldset):  # pragma: no cover
             _ = fieldset.P[particles]  # To trigger sampling of the P field
 
     pset = ParticleSet(fieldset, pclass=XiYiParticle, lon=[0, 0.2], lat=[0.2, 1])
-    pfile = ParticleFile(tmp_zarrfile, outputdt=dt)
+    pfile = ParticleFile(tmp_parquet, outputdt=dt)
     pset.execute([SampleP, Get_XiYi, AdvectionRK4], endtime=10 * dt, dt=dt, output_file=pfile)
 
-    ds = xr.open_zarr(tmp_zarrfile)
+    ds = xr.open_zarr(tmp_parquet)
     pxi0 = ds["pxi0"][:].values.astype(np.int32)
     pxi1 = ds["pxi1"][:].values.astype(np.int32)
     lons = ds["lon"][:].values
@@ -333,9 +333,9 @@ def SampleP(particles, fieldset):  # pragma: no cover
             assert fieldset.U.grid.lat[yi] <= lat < fieldset.U.grid.lat[yi + 1]
 
 
-@pytest.mark.uses_old_zarr
+
 @pytest.mark.parametrize("outputdt", [np.timedelta64(1, "s"), np.timedelta64(2, "s"), np.timedelta64(3, "s")])
-def test_time_is_age(fieldset, tmp_zarrfile, outputdt):
+def test_time_is_age(fieldset, tmp_parquet, outputdt):
     # Test that particle age is same as time - initial_time
     npart = 10
 
@@ -346,11 +346,11 @@ def IncreaseAge(particles, fieldset):  # pragma: no cover
 
     time = fieldset.time_interval.left + np.arange(npart) * np.timedelta64(1, "s")
     pset = ParticleSet(fieldset, pclass=AgeParticle, lon=npart * [0], lat=npart * [0], time=time)
-    ofile = ParticleFile(tmp_zarrfile, outputdt=outputdt)
+    ofile = ParticleFile(tmp_parquet, outputdt=outputdt)
 
     pset.execute(IncreaseAge, runtime=np.timedelta64(npart * 2, "s"), dt=np.timedelta64(1, "s"), output_file=ofile)
 
-    ds = xr.open_zarr(tmp_zarrfile)
+    ds = xr.open_zarr(tmp_parquet)
     age = ds["age"][:].values.astype("timedelta64[s]")
     ds_timediff = np.zeros_like(age)
     for i in range(npart):
@@ -358,8 +358,8 @@ def IncreaseAge(particles, fieldset):  # pragma: no cover
     np.testing.assert_equal(age, ds_timediff)
 
 
-@pytest.mark.uses_old_zarr
-def test_reset_dt(fieldset, tmp_zarrfile):
+
+def test_reset_dt(fieldset, tmp_parquet):
     # Assert that p.dt gets reset when a write_time is not a multiple of dt
     # for p.dt=0.02 to reach outputdt=0.05 and endtime=0.1, the steps should be [0.2, 0.2, 0.1, 0.2, 0.2, 0.1], resulting in 6 kernel executions
     dt = np.timedelta64(20, "s")
@@ -369,14 +369,14 @@ def Update_lon(particles, fieldset):  # pragma: no cover
 
     particle = get_default_particle(np.float64)
     pset = ParticleSet(fieldset, pclass=particle, lon=[0], lat=[0])
-    ofile = ParticleFile(tmp_zarrfile, outputdt=np.timedelta64(50, "s"))
+    ofile = ParticleFile(tmp_parquet, outputdt=np.timedelta64(50, "s"))
     pset.execute(Update_lon, runtime=5 * dt, dt=dt, output_file=ofile)
 
     assert np.allclose(pset.lon, 0.6)
 
 
-@pytest.mark.uses_old_zarr
-def test_correct_misaligned_outputdt_dt(fieldset, tmp_zarrfile):
+
+def test_correct_misaligned_outputdt_dt(fieldset, tmp_parquet):
     """Testing that outputdt does not need to be a multiple of dt."""
 
     def Update_lon(particles, fieldset):  # pragma: no cover
@@ -384,10 +384,10 @@ def Update_lon(particles, fieldset):  # pragma: no cover
 
     particle = get_default_particle(np.float64)
     pset = ParticleSet(fieldset, pclass=particle, lon=[0], lat=[0])
-    ofile = ParticleFile(tmp_zarrfile, outputdt=np.timedelta64(3, "s"))
+    ofile = ParticleFile(tmp_parquet, outputdt=np.timedelta64(3, "s"))
     pset.execute(Update_lon, runtime=np.timedelta64(11, "s"), dt=np.timedelta64(2, "s"), output_file=ofile)
 
-    ds = xr.open_zarr(tmp_zarrfile)
+    ds = xr.open_zarr(tmp_parquet)
     assert np.allclose(ds.lon.values, [0, 3, 6, 9])
     assert np.allclose(timedelta_to_float(ds.time.values - ds.time.values[0, 0]), [0, 3, 6, 9])
 
@@ -412,7 +412,7 @@ def setup_pset_execute(*, fieldset: FieldSet, outputdt: timedelta, execute_kwarg
     return ds
 
 
-@pytest.mark.uses_old_zarr
+
 def test_pset_execute_outputdt_forwards(fieldset):
     """Testing output data dt matches outputdt in forward time."""
     outputdt = timedelta(hours=1)
@@ -424,7 +424,7 @@ def test_pset_execute_outputdt_forwards(fieldset):
     assert np.all(ds.isel(trajectory=0).time.diff(dim="obs").values == np.timedelta64(outputdt))
 
 
-@pytest.mark.uses_old_zarr
+
 def test_pset_execute_output_time_forwards(fieldset):
     """Testing output times start at initial time and end at initial time + runtime."""
     outputdt = np.timedelta64(1, "h")
@@ -439,7 +439,7 @@ def test_pset_execute_output_time_forwards(fieldset):
     )
 
 
-@pytest.mark.uses_old_zarr
+
 def test_pset_execute_outputdt_backwards(fieldset):
     """Testing output data dt matches outputdt in backwards time."""
     outputdt = timedelta(hours=1)
@@ -451,7 +451,7 @@ def test_pset_execute_outputdt_backwards(fieldset):
     assert np.all(file_outputdt == np.timedelta64(-outputdt))
 
 
-@pytest.mark.uses_old_zarr
+
 def test_pset_execute_outputdt_backwards_fieldset_timevarying():
     """test_pset_execute_outputdt_backwards() still passed despite #1722 as it doesn't account for time-varying fields,
     which for some reason #1722
@@ -487,7 +487,7 @@ def test_particlefile_init_invalid(tmp_store):  # TODO: Add test for read only s
         ParticleFile(tmp_store, outputdt=np.timedelta64(1, "s"), chunks=1)
 
 
-@pytest.mark.uses_old_zarr
+
 def test_particlefile_write_particle_data(tmp_store):
     nparticles = 100
 
@@ -533,19 +533,19 @@ def test_pfile_write_custom_particle():
 @pytest.mark.xfail(
     reason="set_variable_write_status should be removed - with Particle writing defined on the particle level. GH2186"
 )
-def test_pfile_set_towrite_False(fieldset, tmp_zarrfile):
+def test_pfile_set_towrite_False(fieldset, tmp_parquet):
     npart = 10
     pset = ParticleSet(fieldset, pclass=Particle, lon=np.linspace(0, 1, npart), lat=0.5 * np.ones(npart))
     pset.set_variable_write_status("z", False)
     pset.set_variable_write_status("lat", False)
-    pfile = pset.ParticleFile(tmp_zarrfile, outputdt=1)
+    pfile = pset.ParticleFile(tmp_parquet, outputdt=1)
 
     def Update_lon(particles, fieldset):  # pragma: no cover
         particles.dlon += 0.1
 
     pset.execute(Update_lon, runtime=10, output_file=pfile)
 
-    ds = xr.open_zarr(tmp_zarrfile)
+    ds = xr.open_zarr(tmp_parquet)
     assert "time" in ds
     assert "z" not in ds
     assert "lat" not in ds
diff --git a/tests/test_uxadvection.py b/tests/test_uxadvection.py
index 95e517140..04591019b 100644
--- a/tests/test_uxadvection.py
+++ b/tests/test_uxadvection.py
@@ -11,19 +11,19 @@
 )
 
 
-@pytest.mark.uses_old_zarr
+
 @pytest.mark.parametrize("integrator", [AdvectionEE, AdvectionRK2, AdvectionRK4])
-def test_ux_constant_flow_face_centered_2D(integrator, tmp_zarrfile):
+def test_ux_constant_flow_face_centered_2D(integrator, tmp_parquet):
     ds = datasets_unstructured["ux_constant_flow_face_centered_2D"]
     T = np.timedelta64(3600, "s")
     dt = np.timedelta64(300, "s")
 
     fieldset = parcels.FieldSet.from_ugrid_conventions(ds, mesh="flat")
     pset = parcels.ParticleSet(fieldset, lon=[5.0], lat=[5.0])
-    pfile = parcels.ParticleFile(store=tmp_zarrfile, outputdt=dt)
+    pfile = parcels.ParticleFile(store=tmp_parquet, outputdt=dt)
     pset.execute(integrator, runtime=T, dt=dt, output_file=pfile, verbose_progress=False)
     expected_lon = 8.6
     np.testing.assert_allclose(pset.lon, expected_lon, atol=1e-5)
 
-    ds_out = xr.open_zarr(tmp_zarrfile)
-    np.testing.assert_allclose(ds_out["lon"][:, -1], expected_lon, atol=1e-5)
+    df = xr.open_zarr(tmp_parquet)
+    np.testing.assert_allclose(df["lon"][:, -1], expected_lon, atol=1e-5)

From 5daec3bc131dfb1f2ee43e299c0d2b40b52cdb8a Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Fri, 17 Apr 2026 19:33:49 +0200
Subject: [PATCH 06/34] Remove test_variable_write_double

Covered by test_write_dtypes_pfile
---
 tests/test_particlefile.py | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/tests/test_particlefile.py b/tests/test_particlefile.py
index 67775fa30..eeade646f 100755
--- a/tests/test_particlefile.py
+++ b/tests/test_particlefile.py
@@ -142,22 +142,6 @@ def test_pfile_array_remove_all_particles(fieldset, chunks_obs, tmp_parquet):
         assert np.all(np.isnan(ds["time"][:, 1:]))
 
 
-@pytest.mark.uses_old_zarr
-def test_variable_write_double(fieldset, tmp_zarrfile):
-    def Update_lon(particles, fieldset):  # pragma: no cover
-        particles.dlon += 0.1
-
-    dt = np.timedelta64(1, "s")
-    particle = get_default_particle(np.float64)
-    pset = ParticleSet(fieldset, pclass=particle, lon=[0], lat=[0])
-    ofile = ParticleFile(tmp_zarrfile, outputdt=dt)
-    pset.execute(Update_lon, runtime=np.timedelta64(10, "s"), dt=dt, output_file=ofile)
-
-    ds = xr.open_zarr(tmp_zarrfile)
-    lons = ds["lon"][:]
-    assert isinstance(lons.values[0, 0], np.float64)
-
-
 
 def test_write_dtypes_pfile(fieldset, tmp_parquet):
     dtypes = [

From 2a07ced6996d5966a68ab005ca403128a8b6d0e5 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Fri, 17 Apr 2026 19:25:35 +0200
Subject: [PATCH 07/34] Fixing tests

---
 src/parcels/_core/particlefile.py  |  11 ++-
 tests-v3/test_advection.py         |   1 +
 tests-v3/test_fieldset_sampling.py |   1 +
 tests/test_fieldset.py             |   1 +
 tests/test_particlefile.py         | 116 ++++++++---------------------
 5 files changed, 39 insertions(+), 91 deletions(-)

diff --git a/src/parcels/_core/particlefile.py b/src/parcels/_core/particlefile.py
index 125a925c7..8d121e55d 100644
--- a/src/parcels/_core/particlefile.py
+++ b/src/parcels/_core/particlefile.py
@@ -9,6 +9,7 @@
 import cftime
 import numpy as np
 import pyarrow as pa
+from parcels._typing import PathLike
 import pyarrow.parquet as pq
 
 import parcels
@@ -58,22 +59,24 @@ class ParticleFile:
         ParticleFile object that can be used to write particle data to file
     """
 
-    def __init__(self, path: Path, outputdt):
+    def __init__(self, path: PathLike, outputdt):
         if not isinstance(outputdt, (np.timedelta64, timedelta, float)):
             raise ValueError(
                 f"Expected outputdt to be a np.timedelta64, datetime.timedelta or float (in seconds), got {type(outputdt)}"
             )
 
         outputdt = timedelta_to_float(outputdt)
+        path = Path(path)
+
+        if path.suffix != ".parquet":
+            raise ValueError(f"ParticleFile data is stored in Parquet files - file extension must be '.parquet'. Got {path.suffix=!r}.")
 
         if outputdt <= 0:
             raise ValueError(f"outputdt must be positive/non-zero. Got {outputdt=!r}")
 
         self._outputdt = outputdt
 
-        self._path = Path(
-            path
-        )  # TODO v4: Consider https://arrow.apache.org/docs/python/getstarted.html#working-with-large-data - though a significant question becomes how to partition, perhaps using a particle variable "partition"?
+        self._path = path # TODO v4: Consider https://arrow.apache.org/docs/python/getstarted.html#working-with-large-data - though a significant question becomes how to partition, perhaps using a particle variable "partition"?
         self._writer: pq.ParquetWriter | None = None
         if path.exists():
             # TODO: Add logic for recovering/appending to existing parquet file
diff --git a/tests-v3/test_advection.py b/tests-v3/test_advection.py
index 77abee900..bdd7a4221 100644
--- a/tests-v3/test_advection.py
+++ b/tests-v3/test_advection.py
@@ -1,5 +1,6 @@
 import numpy as np
 import pytest
+import pandas as pd
 import xarray as xr
 
 from parcels import (
diff --git a/tests-v3/test_fieldset_sampling.py b/tests-v3/test_fieldset_sampling.py
index 5f25e355c..176eedab1 100644
--- a/tests-v3/test_fieldset_sampling.py
+++ b/tests-v3/test_fieldset_sampling.py
@@ -3,6 +3,7 @@
 from math import cos, pi
 
 import numpy as np
+import pandas as pd
 import pytest
 import xarray as xr
 
diff --git a/tests/test_fieldset.py b/tests/test_fieldset.py
index d9b8d6000..8a64546f8 100644
--- a/tests/test_fieldset.py
+++ b/tests/test_fieldset.py
@@ -4,6 +4,7 @@
 import cftime
 import numpy as np
 import pytest
+import pandas as pd
 import xarray as xr
 
 from parcels import Field, ParticleFile, ParticleSet, VectorField, XGrid, convert
diff --git a/tests/test_particlefile.py b/tests/test_particlefile.py
index eeade646f..397ec6218 100755
--- a/tests/test_particlefile.py
+++ b/tests/test_particlefile.py
@@ -2,7 +2,8 @@
 import tempfile
 from contextlib import nullcontext as does_not_raise
 from datetime import datetime, timedelta
-
+import pyarrow.parquet as pq
+import pandas as pd
 import numpy as np
 import pyarrow as pa
 import pytest
@@ -53,28 +54,8 @@ def test_metadata(fieldset, tmp_parquet):
     ofile = ParticleFile(tmp_parquet, outputdt=np.timedelta64(1, "s"))
     pset.execute(DoNothing, runtime=np.timedelta64(1, "s"), dt=np.timedelta64(1, "s"), output_file=ofile)
 
-    ds = xr.open_zarr(tmp_parquet)
-    assert ds.attrs["parcels_kernels"].lower() == "DoNothing".lower()
-
-
-
-def test_pfile_array_write_zarr_memorystore(fieldset):
-    """Check that writing to a Zarr MemoryStore works."""
-    npart = 10
-    zarr_store = MemoryStore()
-    pset = ParticleSet(
-        fieldset,
-        pclass=Particle,
-        lon=np.linspace(0, 1, npart),
-        lat=0.5 * np.ones(npart),
-        time=fieldset.time_interval.left,
-    )
-    pfile = ParticleFile(zarr_store, outputdt=np.timedelta64(1, "s"))
-    pfile.write(pset, time=fieldset.time_interval.left)
-
-    ds = xr.open_zarr(zarr_store)
-    assert ds.sizes["trajectory"] == npart
-
+    tab = pq.read_table(tmp_parquet)
+    assert tab.schema.metadata[b"parcels_kernels"].decode().lower() == "DoNothing".lower()
 
 
 def test_write_fieldset_without_time(tmp_parquet):
@@ -88,12 +69,14 @@ def test_write_fieldset_without_time(tmp_parquet):
     ofile = ParticleFile(tmp_parquet, outputdt=np.timedelta64(1, "s"))
     pset.execute(DoNothing, runtime=np.timedelta64(1, "s"), dt=np.timedelta64(1, "s"), output_file=ofile)
 
-    ds = xr.open_zarr(tmp_parquet)
-    assert ds.time.values[0, 1] == np.timedelta64(1, "s")
-
+    df = pd.read_parquet(tmp_parquet)
+    pytest.skip("# TODO: Need to figure out how times work with parquet output (#2386)")
+    assert df['time'][1] == np.timedelta64(1, "s") 
 
 
+@pytest.mark.xfail("Keep or remove? Introduced in 5d7dd6bba800baa0fe4bd38edfc17ca3e310062b ")
 def test_pfile_array_remove_particles(fieldset, tmp_parquet):
+    """If a particle from the middle of a particleset is removed, that writing doesn't crash"""
     npart = 10
     pset = ParticleSet(
         fieldset,
@@ -115,8 +98,7 @@ def test_pfile_array_remove_particles(fieldset, tmp_parquet):
 
 
-@pytest.mark.parametrize("chunks_obs", [1, None])
-def test_pfile_array_remove_all_particles(fieldset, chunks_obs, tmp_parquet):
+def test_pfile_array_remove_all_particles(fieldset, tmp_parquet):
     npart = 10
     pset = ParticleSet(
         fieldset,
@@ -125,21 +107,17 @@ def test_pfile_array_remove_all_particles(fieldset, chunks_obs, tmp_parquet):
         lat=0.5 * np.ones(npart),
         time=fieldset.time_interval.left,
     )
-    chunks = (npart, chunks_obs) if chunks_obs else None
-    pfile = ParticleFile(tmp_parquet, chunks=chunks, outputdt=np.timedelta64(1, "s"))
+    pfile = ParticleFile(tmp_parquet, outputdt=np.timedelta64(1, "s"))
     pfile.write(pset, time=0)
     for _ in range(npart):
         pset.remove_indices(-1)
     pfile.write(pset, fieldset.time_interval.left + np.timedelta64(1, "D"))
     pfile.write(pset, fieldset.time_interval.left + np.timedelta64(2, "D"))
+    pfile.close()
 
-    ds = xr.open_zarr(tmp_parquet)
-    np.testing.assert_allclose(ds["time"][:, 0] - fieldset.time_interval.left, np.timedelta64(0, "s"))
-    if chunks_obs is not None:
-        assert ds["time"][:].shape == chunks
-    else:
-        assert ds["time"][:].shape[0] == npart
-        assert np.all(np.isnan(ds["time"][:, 1:]))
+    df = pd.read_parquet(tmp_parquet)
+    # np.testing.assert_allclose(ds["time"][:, 0] - fieldset.time_interval.left, np.timedelta64(0, "s")) # TODO: Need to figure out how times work with parquet output (#2386)
+    assert df['trajectory'].nunique() == npart
 
 
@@ -164,12 +142,11 @@ def test_write_dtypes_pfile(fieldset, tmp_parquet):
     pset = ParticleSet(fieldset, pclass=MyParticle, lon=0, lat=0, time=fieldset.time_interval.left)
     pfile = ParticleFile(tmp_parquet, outputdt=np.timedelta64(1, "s"))
     pfile.write(pset, time=fieldset.time_interval.left)
+    pfile.close()
 
-    ds = xr.open_zarr(
-        tmp_parquet, mask_and_scale=False
-    )  # Note masking issue at https://stackoverflow.com/questions/68460507/xarray-loading-int-data-as-float
+    tab = pq.read_table(tmp_parquet)
     for d in dtypes:
-        assert ds[f"v_{d.__name__}"].dtype == d
+        assert tab[f"v_{d.__name__}"].type == pa.from_numpy_dtype(d)
 
 
 def test_variable_written_once():
@@ -196,7 +173,7 @@ def test_pset_repeated_release_delayed_adding_deleting(fieldset, tmp_parquet, dt
         pclass=MyParticle,
         time=fieldset.time_interval.left + [np.timedelta64(i + 1, "s") for i in range(npart)],
     )
-    pfile = ParticleFile(tmp_parquet, outputdt=abs(dt), chunks=(1, 1))
+    pfile = ParticleFile(tmp_parquet, outputdt=abs(dt))
 
     def IncrLon(particles, fieldset):  # pragma: no cover
         particles.sample_var += 1.0
@@ -387,7 +364,7 @@ def setup_pset_execute(*, fieldset: FieldSet, outputdt: timedelta, execute_kwarg
     )
 
     with tempfile.TemporaryDirectory() as dir:
-        name = f"{dir}/test.zarr"
+        name = f"{dir}/tmp.parquet"
         output_file = ParticleFile(name, outputdt=outputdt)
 
         pset.execute(DoNothing, output_file=output_file, **execute_kwargs)
@@ -455,59 +432,24 @@ def test_pset_execute_outputdt_backwards_fieldset_timevarying():
     assert np.all(file_outputdt == np.timedelta64(-outputdt)), (file_outputdt, np.timedelta64(-outputdt))
 
 
-def test_particlefile_init(tmp_store):
-    ParticleFile(tmp_store, outputdt=np.timedelta64(1, "s"), chunks=(1, 3))
+def test_particlefile_init(tmp_parquet):
+    ParticleFile(tmp_parquet, outputdt=np.timedelta64(1, "s"))
 
 
-@pytest.mark.parametrize("name", ["store", "outputdt", "chunks", "create_new_zarrfile"])
-def test_particlefile_readonly_attrs(tmp_store, name):
-    pfile = ParticleFile(tmp_store, outputdt=np.timedelta64(1, "s"), chunks=(1, 3))
+@pytest.mark.parametrize("name", ["path", "outputdt"])
+def test_particlefile_readonly_attrs(tmp_parquet, name):
+    pfile = ParticleFile(tmp_parquet, outputdt=np.timedelta64(1, "s"))
     with pytest.raises(AttributeError, match="property .* of 'ParticleFile' object has no setter"):
         setattr(pfile, name, "something")
 
 
-def test_particlefile_init_invalid(tmp_store):  # TODO: Add test for read only store
-    with pytest.raises(ValueError, match="chunks must be a tuple"):
-        ParticleFile(tmp_store, outputdt=np.timedelta64(1, "s"), chunks=1)
+def test_particlefile_init_invalid(tmp_path):
+    path = tmp_path / 'file.not-parquet'
+    with pytest.raises(ValueError, match="file extension must be '.parquet'"):
+        ParticleFile(path, outputdt=np.timedelta64(1, "s"))
 
 
-def test_particlefile_write_particle_data(tmp_store):
-    nparticles = 100
-
-    pfile = ParticleFile(tmp_store, outputdt=np.timedelta64(1, "s"), chunks=(nparticles, 40))
-    pclass = Particle
-
-    left, right = np.datetime64("2019-05-30T12:00:00.000000000", "ns"), np.datetime64("2020-01-02", "ns")
-    time_interval = TimeInterval(left=left, right=right)
-
-    initial_lon = np.linspace(0, 1, nparticles)
-    data = create_particle_data(
-        pclass=pclass,
-        nparticles=nparticles,
-        ngrids=4,
-        time_interval=time_interval,
-        initial={
-            "time": np.full(nparticles, fill_value=0),
-            "lon": initial_lon,
-            "dt": np.full(nparticles, fill_value=1.0),
-            "trajectory": np.arange(nparticles),
-        },
-    )
-    np.testing.assert_array_equal(data["time"], 0)
-    pfile._write_particle_data(
-        particle_data=data,
-        pclass=pclass,
-        time_interval=time_interval,
-        time=left,
-    )
-    ds = xr.open_zarr(tmp_store)
-    assert ds.time.dtype == "datetime64[ns]"
-    np.testing.assert_equal(ds["time"].isel(obs=0).values, left)
-    assert ds.sizes["trajectory"] == nparticles
-    np.testing.assert_allclose(ds["lon"].isel(obs=0).values, initial_lon)
-
-
 def test_pfile_write_custom_particle():
     # Test the writing of a custom particle with variables that are to_write, some to_write once, and some not to_write
     # ? This is more of an integration test... Should it be housed here?

From b8c547761a729ce0503a5986b7f5307ac472d737 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 20 Apr 2026 11:11:50 +0200
Subject: [PATCH 08/34] More test fixing

---
 tests/test_particlefile.py | 64 +++++++++++++++++---------------------
 1 file changed, 28 insertions(+), 36 deletions(-)

diff --git a/tests/test_particlefile.py b/tests/test_particlefile.py
index 397ec6218..a3fe47b00 100755
--- a/tests/test_particlefile.py
+++ b/tests/test_particlefile.py
@@ -47,7 +47,6 @@ def fieldset() -> FieldSet:  # TODO v4: Move into a `conftest.py` file and remov
     )
 
 
-
 def test_metadata(fieldset, tmp_parquet):
     pset = ParticleSet(fieldset, pclass=Particle, lon=0, lat=0)
 
@@ -71,10 +70,10 @@ def test_write_fieldset_without_time(tmp_parquet):
 
     df = pd.read_parquet(tmp_parquet)
     pytest.skip("# TODO: Need to figure out how times work with parquet output (#2386)")
-    assert df['time'][1] == np.timedelta64(1, "s") 
+    assert df["time"][1] == np.timedelta64(1, "s")
 
 
-@pytest.mark.xfail("Keep or remove? Introduced in 5d7dd6bba800baa0fe4bd38edfc17ca3e310062b ")
+@pytest.mark.skip("Keep or remove? Introduced in 5d7dd6bba800baa0fe4bd38edfc17ca3e310062b ")
 def test_pfile_array_remove_particles(fieldset, tmp_parquet):
     """If a particle from the middle of a particleset is removed, that writing doesn't crash"""
     npart = 10
@@ -97,7 +96,6 @@ def test_pfile_array_remove_particles(fieldset, tmp_parquet):
     assert (np.isnat(timearr[3, 1])) and (np.isfinite(timearr[3, 0]))
 
 
-
 def test_pfile_array_remove_all_particles(fieldset, tmp_parquet):
     npart = 10
     pset = ParticleSet(
@@ -117,8 +115,7 @@ def test_pfile_array_remove_all_particles(fieldset, tmp_parquet):
 
     df = pd.read_parquet(tmp_parquet)
     # np.testing.assert_allclose(ds["time"][:, 0] - fieldset.time_interval.left, np.timedelta64(0, "s")) # TODO: Need to figure out how times work with parquet output (#2386)
-    assert df['trajectory'].nunique() == npart
-
+    assert df["trajectory"].nunique() == npart
 
 
 def test_write_dtypes_pfile(fieldset, tmp_parquet):
@@ -196,7 +193,6 @@ def IncrLon(particles, fieldset):  # pragma: no cover
     assert filesize < 1024 * 65  # test that chunking leads to filesize less than 65KB
 
 
-
 def test_file_warnings(fieldset, tmp_parquet):
     pset = ParticleSet(fieldset, lon=[0, 0], lat=[0, 0], time=[np.timedelta64(0, "s"), np.timedelta64(1, "s")])
     pfile = ParticleFile(tmp_parquet, outputdt=np.timedelta64(2, "s"))
@@ -221,22 +217,22 @@ def test_outputdt_types(outputdt, expectation, tmp_parquet):
         assert pfile.outputdt == timedelta_to_float(outputdt)
 
 
-
 def test_write_timebackward(fieldset, tmp_parquet):
     release_time = fieldset.time_interval.left + [np.timedelta64(i + 1, "s") for i in range(3)]
     pset = ParticleSet(fieldset, lat=[0, 1, 2], lon=[0, 0, 0], time=release_time)
     pfile = ParticleFile(tmp_parquet, outputdt=np.timedelta64(1, "s"))
     pset.execute(DoNothing, runtime=np.timedelta64(3, "s"), dt=-np.timedelta64(1, "s"), output_file=pfile)
 
-    ds = xr.open_zarr(tmp_parquet)
-    trajs = ds["trajectory"][:]
-
-    output_time = ds["time"][:].values
+    df = pd.read_parquet(tmp_parquet)
 
-    assert trajs.values.dtype == "int64"
-    assert np.all(np.diff(trajs.values) < 0)  # all particles written in order of release
-    doutput_time = np.diff(output_time, axis=1)
-    assert np.all(doutput_time[~np.isnan(doutput_time)] < 0)  # all times written in decreasing order
+    assert df["trajectory"].dtype == "int64"
+    assert bool(
+        df.groupby("trajectory")
+        .apply(
+            lambda x: (np.diff(x["time"]) < 0).all()  # for each particle - set True if it has decreasing time
+        )
+        .all()  # ensure for all particles
+    )
 
 
 @pytest.mark.xfail
@@ -294,7 +290,6 @@ def SampleP(particles, fieldset):  # pragma: no cover
             assert fieldset.U.grid.lat[yi] <= lat < fieldset.U.grid.lat[yi + 1]
 
 
-
 @pytest.mark.parametrize("outputdt", [np.timedelta64(1, "s"), np.timedelta64(2, "s"), np.timedelta64(3, "s")])
 def test_time_is_age(fieldset, tmp_parquet, outputdt):
     # Test that particle age is same as time - initial_time
@@ -311,6 +306,7 @@ def IncreaseAge(particles, fieldset):  # pragma: no cover
 
     pset.execute(IncreaseAge, runtime=np.timedelta64(npart * 2, "s"), dt=np.timedelta64(1, "s"), output_file=ofile)
 
+    pytest.skip("# TODO: Need to figure out how times work with parquet output (#2386)")
     ds = xr.open_zarr(tmp_parquet)
     age = ds["age"][:].values.astype("timedelta64[s]")
     ds_timediff = np.zeros_like(age)
@@ -319,7 +315,6 @@ def IncreaseAge(particles, fieldset):  # pragma: no cover
     np.testing.assert_equal(age, ds_timediff)
 
 
-
 def test_reset_dt(fieldset, tmp_parquet):
     # Assert that p.dt gets reset when a write_time is not a multiple of dt
     # for p.dt=0.02 to reach outputdt=0.05 and endtime=0.1, the steps should be [0.2, 0.2, 0.1, 0.2, 0.2, 0.1], resulting in 6 kernel executions
@@ -336,7 +331,6 @@ def Update_lon(particles, fieldset):  # pragma: no cover
     assert np.allclose(pset.lon, 0.6)
 
 
-
 def test_correct_misaligned_outputdt_dt(fieldset, tmp_parquet):
     """Testing that outputdt does not need to be a multiple of dt."""
 
@@ -348,9 +342,10 @@ def Update_lon(particles, fieldset):  # pragma: no cover
     ofile = ParticleFile(tmp_parquet, outputdt=np.timedelta64(3, "s"))
     pset.execute(Update_lon, runtime=np.timedelta64(11, "s"), dt=np.timedelta64(2, "s"), output_file=ofile)
 
-    ds = xr.open_zarr(tmp_parquet)
-    assert np.allclose(ds.lon.values, [0, 3, 6, 9])
-    assert np.allclose(timedelta_to_float(ds.time.values - ds.time.values[0, 0]), [0, 3, 6, 9])
+    df = pd.read_parquet(tmp_parquet)
+    assert np.allclose(df['lon'].values, [0, 3, 6, 9])
+    pytest.skip("# TODO: Need to figure out how times work with parquet output (#2386)")
+    assert np.allclose(timedelta_to_float(df.time.values - df.time.values[0, 0]), [0, 3, 6, 9])
 
 
 def setup_pset_execute(*, fieldset: FieldSet, outputdt: timedelta, execute_kwargs, particle_class=Particle):
@@ -368,10 +363,9 @@ def setup_pset_execute(*, fieldset: FieldSet, outputdt: timedelta, execute_kwarg
         output_file = ParticleFile(name, outputdt=outputdt)
 
         pset.execute(DoNothing, output_file=output_file, **execute_kwargs)
-        ds = xr.open_zarr(name).load()
-
-    return ds
+        df = pd.read_parquet(name)
 
+    return df
 
 
 def test_pset_execute_outputdt_forwards(fieldset):
@@ -380,39 +374,37 @@ def test_pset_execute_outputdt_forwards(fieldset):
     runtime = timedelta(hours=5)
     dt = timedelta(minutes=5)
 
-    ds = setup_pset_execute(fieldset=fieldset, outputdt=outputdt, execute_kwargs=dict(runtime=runtime, dt=dt))
-
+    df = setup_pset_execute(fieldset=fieldset, outputdt=outputdt, execute_kwargs=dict(runtime=runtime, dt=dt))
+    pytest.skip("# TODO: Need to figure out how times work with parquet output (#2386)")
     assert np.all(ds.isel(trajectory=0).time.diff(dim="obs").values == np.timedelta64(outputdt))
 
 
-
 def test_pset_execute_output_time_forwards(fieldset):
     """Testing output times start at initial time and end at initial time + runtime."""
     outputdt = np.timedelta64(1, "h")
     runtime = np.timedelta64(5, "h")
     dt = np.timedelta64(5, "m")
 
-    ds = setup_pset_execute(fieldset=fieldset, outputdt=outputdt, execute_kwargs=dict(runtime=runtime, dt=dt))
-
+    df = setup_pset_execute(fieldset=fieldset, outputdt=outputdt, execute_kwargs=dict(runtime=runtime, dt=dt))
+    pytest.skip("# TODO: Need to figure out how times work with parquet output (#2386)")
     assert (
         ds.time[0, 0].values == fieldset.time_interval.left
         and ds.time[0, -1].values == fieldset.time_interval.left + runtime
     )
 
 
-
 def test_pset_execute_outputdt_backwards(fieldset):
     """Testing output data dt matches outputdt in backwards time."""
     outputdt = timedelta(hours=1)
     runtime = timedelta(days=2)
     dt = -timedelta(minutes=5)
 
-    ds = setup_pset_execute(fieldset=fieldset, outputdt=outputdt, execute_kwargs=dict(runtime=runtime, dt=dt))
+    df = setup_pset_execute(fieldset=fieldset, outputdt=outputdt, execute_kwargs=dict(runtime=runtime, dt=dt))
+    pytest.skip("# TODO: Need to figure out how times work with parquet output (#2386)")
     file_outputdt = ds.isel(trajectory=0).time.diff(dim="obs").values
     assert np.all(file_outputdt == np.timedelta64(-outputdt))
 
 
-
 def test_pset_execute_outputdt_backwards_fieldset_timevarying():
     """test_pset_execute_outputdt_backwards() still passed despite #1722 as it doesn't account for time-varying fields,
     which for some reason #1722
@@ -427,7 +419,8 @@ def test_pset_execute_outputdt_backwards_fieldset_timevarying():
     ds_fset = copernicusmarine_to_sgrid(fields=fields)
     fieldset = FieldSet.from_sgrid_conventions(ds_fset)
 
-    ds = setup_pset_execute(outputdt=outputdt, execute_kwargs=dict(runtime=runtime, dt=dt), fieldset=fieldset)
+    df = setup_pset_execute(outputdt=outputdt, execute_kwargs=dict(runtime=runtime, dt=dt), fieldset=fieldset)
+    pytest.skip("# TODO: Need to figure out how times work with parquet output (#2386)")
     file_outputdt = ds.isel(trajectory=0).time.diff(dim="obs").values
     assert np.all(file_outputdt == np.timedelta64(-outputdt)), (file_outputdt, np.timedelta64(-outputdt))
 
@@ -444,12 +437,11 @@ def test_particlefile_readonly_attrs(tmp_parquet, name):
 
 
 def test_particlefile_init_invalid(tmp_path):
-    path = tmp_path / 'file.not-parquet'
+    path = tmp_path / "file.not-parquet"
     with pytest.raises(ValueError, match="file extension must be '.parquet'"):
         ParticleFile(path, outputdt=np.timedelta64(1, "s"))
 
 
-
 def test_pfile_write_custom_particle():
     # Test the writing of a custom particle with variables that are to_write, some to_write once, and some not to_write
     # ? This is more of an integration test... Should it be housed here?

From 2d438c5f7b9cad3e27264b51f30334d571328af3 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 20 Apr 2026 11:41:07 +0200
Subject: [PATCH 09/34] Fix last tests

---
 tests/test_fieldset.py    | 2 +-
 tests/test_uxadvection.py | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/tests/test_fieldset.py b/tests/test_fieldset.py
index 8a64546f8..c7131608d 100644
--- a/tests/test_fieldset.py
+++ b/tests/test_fieldset.py
@@ -108,7 +108,7 @@ def SampleP(particles, fieldset):
     ofile = ParticleFile(tmp_parquet, outputdt=np.timedelta64(1, "s"))
     pset.execute(SampleP, runtime=np.timedelta64(1, "s"), dt=np.timedelta64(1, "s"), output_file=ofile)
 
-    df = xr.open_zarr(tmp_parquet)
+    df = pd.read_parquet(tmp_parquet)
     assert len(df["lon"]) == 2
 
 
diff --git a/tests/test_uxadvection.py b/tests/test_uxadvection.py
index 04591019b..52509ba28 100644
--- a/tests/test_uxadvection.py
+++ b/tests/test_uxadvection.py
@@ -9,6 +9,7 @@
     AdvectionRK2,
     AdvectionRK4,
 )
+import pandas as pd
 
 
@@ -20,10 +21,10 @@ def test_ux_constant_flow_face_centered_2D(integrator, tmp_parquet):
 
     fieldset = parcels.FieldSet.from_ugrid_conventions(ds, mesh="flat")
     pset = parcels.ParticleSet(fieldset, lon=[5.0], lat=[5.0])
-    pfile = parcels.ParticleFile(store=tmp_parquet, outputdt=dt)
+    pfile = parcels.ParticleFile(path=tmp_parquet, outputdt=dt)
     pset.execute(integrator, runtime=T, dt=dt, output_file=pfile, verbose_progress=False)
     expected_lon = 8.6
     np.testing.assert_allclose(pset.lon, expected_lon, atol=1e-5)
 
-    df = xr.open_zarr(tmp_parquet)
-    np.testing.assert_allclose(df["lon"][:, -1], expected_lon, atol=1e-5)
+    df = pd.read_parquet(tmp_parquet)
+    np.testing.assert_allclose(df["lon"].iloc[-1], expected_lon, atol=1e-5)

From 32a82fac4236a3824a7190d65b31b39f78cbbc00 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 20 Apr 2026 09:48:43 +0000
Subject: [PATCH 10/34] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/parcels/_core/particlefile.py | 10 ++++++----
 src/parcels/_core/particleset.py  |  2 +-
 src/parcels/_reprs.py             |  2 --
 tests/conftest.py                 |  4 ++--
 tests/test_advection.py           |  2 +-
 tests/test_fieldset.py            |  3 +--
 tests/test_particlefile.py        | 12 ++++++------
 tests/test_uxadvection.py         |  4 +---
 8 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/src/parcels/_core/particlefile.py b/src/parcels/_core/particlefile.py
index 8d121e55d..c9cdfa62a 100644
--- a/src/parcels/_core/particlefile.py
+++ b/src/parcels/_core/particlefile.py
@@ -9,13 +9,13 @@
 import cftime
 import numpy as np
 import pyarrow as pa
-from parcels._typing import PathLike
 import pyarrow.parquet as pq
 
 import parcels
 from parcels._core.particle import ParticleClass
 from parcels._core.utils.time import timedelta_to_float
 from parcels._reprs import particlefile_repr
+from parcels._typing import PathLike
 
 if TYPE_CHECKING:
     from parcels._core.particle import Variable
@@ -69,14 +69,16 @@ def __init__(self, path: PathLike, outputdt):
         path = Path(path)
 
         if path.suffix != ".parquet":
-            raise ValueError(f"ParticleFile data is stored in Parquet files - file extension must be '.parquet'. Got {path.suffix=!r}.")
+            raise ValueError(
+                f"ParticleFile data is stored in Parquet files - file extension must be '.parquet'. Got {path.suffix=!r}."
+            )
 
         if outputdt <= 0:
             raise ValueError(f"outputdt must be positive/non-zero. Got {outputdt=!r}")
 
         self._outputdt = outputdt
 
-        self._path = path # TODO v4: Consider https://arrow.apache.org/docs/python/getstarted.html#working-with-large-data - though a significant question becomes how to partition, perhaps using a particle variable "partition"?
+        self._path = path  # TODO v4: Consider https://arrow.apache.org/docs/python/getstarted.html#working-with-large-data - though a significant question becomes how to partition, perhaps using a particle variable "partition"?
         self._writer: pq.ParquetWriter | None = None
         if path.exists():
             # TODO: Add logic for recovering/appending to existing parquet file
@@ -138,7 +140,7 @@ def write(self, pset: ParticleSet, time, indices=None):
             indices_to_write = _to_write_particles(particle_data, time)
         else:
             indices_to_write = indices
-        
+
         self._writer.write_table(
             pa.table({v.name: pa.array(particle_data[v.name][indices_to_write]) for v in vars_to_write}),
         )
diff --git a/src/parcels/_core/particleset.py b/src/parcels/_core/particleset.py
index e4ecb252b..f2e74112f 100644
--- a/src/parcels/_core/particleset.py
+++ b/src/parcels/_core/particleset.py
@@ -453,7 +453,7 @@ def execute(
 
         if output_file is not None:
             output_file.close()
-            
+
         if verbose_progress:
             pbar.close()
 
diff --git a/src/parcels/_reprs.py b/src/parcels/_reprs.py
index 34b6814a0..d27eee379 100644
--- a/src/parcels/_reprs.py
+++ b/src/parcels/_reprs.py
@@ -7,7 +7,6 @@
 
 import numpy as np
 import xarray as xr
-from zarr.storage import DirectoryStore
 
 if TYPE_CHECKING:
     from parcels import Field, FieldSet, ParticleSet
@@ -178,6 +177,5 @@ def _format_list_items_multiline(items: list[str] | dict, level: int = 1, with_b
         return "\n".join([textwrap.indent(e, indentation_str) for e in entries])
 
 
-
 def is_builtin_object(obj):
     return obj.__class__.__module__ == "builtins"
diff --git a/tests/conftest.py b/tests/conftest.py
index 56bbfa480..1853f9774 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -12,7 +12,7 @@ def tmp_parquet(tmp_path, request):
 def tmp_store():
     return MemoryStore()
 
+
 @pytest.fixture
 def tmp_parquet(tmp_path):
-    return tmp_path / 'tmp.parquet'
-
+    return tmp_path / "tmp.parquet"
diff --git a/tests/test_advection.py b/tests/test_advection.py
index 365e9e6f4..95eca30f3 100644
--- a/tests/test_advection.py
+++ b/tests/test_advection.py
@@ -1,10 +1,10 @@
 import numpy as np
+import pandas as pd
 import pytest
 import xarray as xr
 
 import parcels
 import parcels.tutorial
-import pandas as pd
 from parcels import (
     Field,
     FieldSet,
diff --git a/tests/test_fieldset.py b/tests/test_fieldset.py
index c7131608d..6eeef20a6 100644
--- a/tests/test_fieldset.py
+++ b/tests/test_fieldset.py
@@ -3,8 +3,8 @@
 import cf_xarray  # noqa: F401
 import cftime
 import numpy as np
-import pytest
 import pandas as pd
+import pytest
 import xarray as xr
 
 from parcels import Field, ParticleFile, ParticleSet, VectorField, XGrid, convert
@@ -96,7 +96,6 @@ def test_fieldset_gridset(fieldset):
     assert len(fieldset.gridset) == 2
 
 
-
 def test_fieldset_no_UV(tmp_parquet):
     grid = XGrid.from_dataset(ds, mesh="flat")
     fieldset = FieldSet([Field("P", ds["U_A_grid"], grid, interp_method=XLinear)])
diff --git a/tests/test_particlefile.py b/tests/test_particlefile.py
index a3fe47b00..70ef5b505 100755
--- a/tests/test_particlefile.py
+++ b/tests/test_particlefile.py
@@ -2,13 +2,13 @@
 import tempfile
 from contextlib import nullcontext as does_not_raise
 from datetime import datetime, timedelta
-import pyarrow.parquet as pq
-import pandas as pd
+
 import numpy as np
+import pandas as pd
 import pyarrow as pa
+import pyarrow.parquet as pq
 import pytest
 import xarray as xr
-from zarr.storage import MemoryStore
 
 import parcels.tutorial
 from parcels import (
@@ -22,9 +22,9 @@
     VectorField,
     XGrid,
 )
-from parcels._core.particle import Particle, create_particle_data, get_default_particle
+from parcels._core.particle import Particle, get_default_particle
 from parcels._core.particlefile import _get_schema
-from parcels._core.utils.time import TimeInterval, timedelta_to_float
+from parcels._core.utils.time import timedelta_to_float
 from parcels._datasets.structured.generated import peninsula_dataset
 from parcels._datasets.structured.generic import datasets
 from parcels.convert import copernicusmarine_to_sgrid
@@ -343,7 +343,7 @@ def Update_lon(particles, fieldset):  # pragma: no cover
     pset.execute(Update_lon, runtime=np.timedelta64(11, "s"), dt=np.timedelta64(2, "s"), output_file=ofile)
 
     df = pd.read_parquet(tmp_parquet)
-    assert np.allclose(df['lon'].values, [0, 3, 6, 9])
+    assert np.allclose(df["lon"].values, [0, 3, 6, 9])
     pytest.skip("# TODO: Need to figure out how times work with parquet output (#2386)")
     assert np.allclose(timedelta_to_float(df.time.values - df.time.values[0, 0]), [0, 3, 6, 9])
 
diff --git a/tests/test_uxadvection.py b/tests/test_uxadvection.py
index 52509ba28..d3db9aecd 100644
--- a/tests/test_uxadvection.py
+++ b/tests/test_uxadvection.py
@@ -1,6 +1,6 @@
 import numpy as np
+import pandas as pd
 import pytest
-import xarray as xr
 
 import parcels
 from parcels._datasets.unstructured.generic import datasets as datasets_unstructured
@@ -9,8 +9,6 @@
     AdvectionRK2,
     AdvectionRK4,
 )
-import pandas as pd
-
 
 
 @pytest.mark.parametrize("integrator", [AdvectionEE, AdvectionRK2, AdvectionRK4])

From 19fbd8dddac5272915a4d9a1d343ecb47a779b41 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 20 Apr 2026 11:51:15 +0200
Subject: [PATCH 11/34] Remove old fixtures

---
 tests/conftest.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 1853f9774..0fd949880 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,16 +1,4 @@
 import pytest
-from zarr.storage import MemoryStore
-
-
-@pytest.fixture()
-def tmp_parquet(tmp_path, request):
-    test_name = request.node.name
-    yield tmp_path / f"{test_name}-output.zarr"
-
-
-@pytest.fixture
-def tmp_store():
-    return MemoryStore()
 
 
 @pytest.fixture

From e74672fefe40d7e75d67b256748df0ea47746f22 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 20 Apr 2026 11:52:04 +0200
Subject: [PATCH 12/34] Fix pre-commit errors

---
 tests/test_particlefile.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/test_particlefile.py b/tests/test_particlefile.py
index 70ef5b505..f2a3c4553 100755
--- a/tests/test_particlefile.py
+++ b/tests/test_particlefile.py
@@ -374,9 +374,9 @@ def test_pset_execute_outputdt_forwards(fieldset):
     runtime = timedelta(hours=5)
     dt = timedelta(minutes=5)
 
-    df = setup_pset_execute(fieldset=fieldset, outputdt=outputdt, execute_kwargs=dict(runtime=runtime, dt=dt))
+    df = setup_pset_execute(fieldset=fieldset, outputdt=outputdt, execute_kwargs=dict(runtime=runtime, dt=dt))  # noqa: F841
     pytest.skip("# TODO: Need to figure out how times work with parquet output (#2386)")
-    assert np.all(ds.isel(trajectory=0).time.diff(dim="obs").values == np.timedelta64(outputdt))
+    assert np.all(ds.isel(trajectory=0).time.diff(dim="obs").values == np.timedelta64(outputdt))  # noqa: F821
 
 
 def test_pset_execute_output_time_forwards(fieldset):
@@ -385,11 +385,11 @@ def test_pset_execute_output_time_forwards(fieldset):
     runtime = np.timedelta64(5, "h")
     dt = np.timedelta64(5, "m")
 
-    df = setup_pset_execute(fieldset=fieldset, outputdt=outputdt, execute_kwargs=dict(runtime=runtime, dt=dt))
+    df = setup_pset_execute(fieldset=fieldset, outputdt=outputdt, execute_kwargs=dict(runtime=runtime, dt=dt))  # noqa: F841
     pytest.skip("# TODO: Need to figure out how times work with parquet output (#2386)")
     assert (
-        ds.time[0, 0].values == fieldset.time_interval.left
-        and ds.time[0, -1].values == fieldset.time_interval.left + runtime
+        ds.time[0, 0].values == fieldset.time_interval.left  # noqa: F821
+        and ds.time[0, -1].values == fieldset.time_interval.left + runtime  # noqa: F821
     )
 
 
@@ -399,9 +399,9 @@ def test_pset_execute_outputdt_backwards(fieldset):
     runtime = timedelta(days=2)
     dt = -timedelta(minutes=5)
 
-    df = setup_pset_execute(fieldset=fieldset, outputdt=outputdt, execute_kwargs=dict(runtime=runtime, dt=dt))
+    df = setup_pset_execute(fieldset=fieldset, outputdt=outputdt, execute_kwargs=dict(runtime=runtime, dt=dt))  # noqa: F841
     pytest.skip("# TODO: Need to figure out how times work with parquet output (#2386)")
-    file_outputdt = ds.isel(trajectory=0).time.diff(dim="obs").values
+    file_outputdt = ds.isel(trajectory=0).time.diff(dim="obs").values  # noqa: F821
     assert np.all(file_outputdt == np.timedelta64(-outputdt))
 
 
@@ -419,9 +419,9 @@ def test_pset_execute_outputdt_backwards_fieldset_timevarying():
     ds_fset = copernicusmarine_to_sgrid(fields=fields)
     fieldset = FieldSet.from_sgrid_conventions(ds_fset)
 
-    df = setup_pset_execute(outputdt=outputdt, execute_kwargs=dict(runtime=runtime, dt=dt), fieldset=fieldset)
+    df = setup_pset_execute(outputdt=outputdt, execute_kwargs=dict(runtime=runtime, dt=dt), fieldset=fieldset)  # noqa: F841
     pytest.skip("# TODO: Need to figure out how times work with parquet output (#2386)")
-    file_outputdt = ds.isel(trajectory=0).time.diff(dim="obs").values
+    file_outputdt = ds.isel(trajectory=0).time.diff(dim="obs").values  # noqa: F821
     assert np.all(file_outputdt == np.timedelta64(-outputdt)), (file_outputdt, np.timedelta64(-outputdt))
 
 
From db9f9834296dfab2804a782a0ad28e936cfe0188 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 20 Apr 2026 12:08:54 +0200
Subject: [PATCH 13/34] Cleanup

This mark was only introduced during refactoring
---
 pyproject.toml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index d0e6f7ba1..85aba3a67 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -60,7 +60,6 @@ markers = [ # can be skipped by doing `pytest -m "not slow"` etc.
   "v4alpha: failing tests that should work for v4alpha",
   "v4future: failing tests that should work for a future release of v4",
   "v4remove: failing tests that should probably be removed later",
-  "uses_old_zarr: tests that need to be migrated to the new particleset format",
 ]
 
 filterwarnings = [

From ac2a8309f197f4a98121d62b3ea8447791685086 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 20 Apr 2026 13:56:37 +0200
Subject: [PATCH 14/34] Add pandas and pyarrow as explicit dependencies

---
 .github/ci/recipe.yaml | 2 ++
 pixi.toml              | 4 ++++
 pyproject.toml         | 2 ++
 3 files changed, 8 insertions(+)

diff --git a/.github/ci/recipe.yaml b/.github/ci/recipe.yaml
index 2a887e8b0..388c60b2d 100644
--- a/.github/ci/recipe.yaml
+++ b/.github/ci/recipe.yaml
@@ -36,6 +36,8 @@ requirements:
     - numpy >=2.1.0
     - tqdm >=4.50.0
     - xarray >=2025.8.0,<2026.4.0 # TODO: remove upper pin when https://github.com/UXARRAY/uxarray/issues/1490 is resolved
+    - pandas >=2.2
+    - pyarrow >=20.0.0
     - cf_xarray >=0.8.6
     - xgcm >=0.9.0
     - zarr >=2.15.0,!=2.18.0,<3
diff --git a/pixi.toml b/pixi.toml
index a71be98ff..286d7f28c 100644
--- a/pixi.toml
+++ b/pixi.toml
@@ -24,6 +24,8 @@ netcdf4 = ">=1.6.0"
 numpy = ">=2.1.0"
 tqdm = ">=4.50.0"
 xarray = ">=2024.5.0,<2026.4.0" # TODO: remove upper pin when https://github.com/UXARRAY/uxarray/issues/1490 is resolved
+pandas = ">=2.2"
+pyarrow = ">=20.0.0"
 holoviews = ">=1.22.0" # https://github.com/prefix-dev/rattler-build/issues/2326
 uxarray = ">=2025.3.0"
 dask = ">=2024.5.1"
@@ -51,6 +53,8 @@ netcdf4 = "1.6.*"
 numpy = "2.1.*"
 tqdm = "4.50.*"
 xarray = "2025.8.*"
+pandas = "2.2.*"
+pyarrow = "20.0.*"
 uxarray = "2025.3.*"
 dask = "2024.6.*"
 zarr = "2.18.*"
diff --git a/pyproject.toml b/pyproject.toml
index 85aba3a67..072da3b77 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,6 +27,8 @@ dependencies = [
   "zarr >=2.15.0,!=2.18.0,<3",
   "tqdm >=4.50.0",
   "xarray >=2024.5.0,<2026.4.0", # TODO: remove upper pin when https://github.com/UXARRAY/uxarray/issues/1490 is resolved
+  "pandas >= 2.2",
+  "pyarrow >=20.0.0",
   "uxarray >=2025.3.0",
   "pooch >=1.8.0",
   "xgcm >=0.9.0",

From de464e561ca97627b0003cb54a1a112e44ffb707 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Wed, 22 Apr 2026 13:47:35 +0200
Subject: [PATCH 15/34] Add assert_cftime_like_particlefile

Remove temporary test_cftime.py file
---
 tests/test_utils.py | 29 +++++++++++++++++++++++++++++
 tests/utils.py      | 31 +++++++++++++++++++++++++++++++
 2 files changed, 60 insertions(+)

diff --git a/tests/test_utils.py b/tests/test_utils.py
index b42e13330..65ce4ea7a 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,4 +1,9 @@
+import json
+
 import numpy as np
+import pyarrow as pa
+import pyarrow.parquet as pq
+import pytest
 
 from tests import utils
 
@@ -17,3 +22,27 @@ def test_round_and_hash_float_array():
     arr_test = arr + 0.51 * delta
     h3 = utils.round_and_hash_float_array(arr_test, decimals=decimals)
     assert h3 != h
+
+
+@pytest.mark.parametrize("cal", ["julian", "proleptic_gregorian", "365_day", "366_day", "360_day"])
+def test_assert_cftime_like_particlefile(tmp_path, cal):
+    path = tmp_path / "test.parquet"
+    attrs = {"units": "seconds since 2000-01-01 17:00:00", "calendar": cal}
+    field = pa.field("time", pa.float64(), metadata={"attrs": json.dumps(attrs)})
+    schema = pa.schema([field])
+    table = pa.table({"time": pa.array([-20.0, 1.0])}, schema=schema)
+    pq.write_table(table, path)
+
+    utils.assert_cftime_like_particlefile(path)
+
+
+def test_assert_cftime_like_particlefile_broken_parquet(tmp_path):
+    path = tmp_path / "test.parquet"
+    attrs = {"units": "broken-units", "calendar": "365_day"}
+    field = pa.field("time", pa.float64(), metadata={"attrs": json.dumps(attrs)})
+    schema = pa.schema([field])
+    table = pa.table({"time": pa.array([-20.0, 1.0])}, schema=schema)
+    pq.write_table(table, path)
+
+    with pytest.raises(Exception, match="CF-time values in Parquet did not get properly decoded"):
+        utils.assert_cftime_like_particlefile(path)
diff --git a/tests/utils.py b/tests/utils.py
index 3213abd31..d20eaf845 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -151,3 +151,34 @@ def round_and_hash_float_array(arr, decimals=6):
     # Mimic Java's HashMap hash transformation
     h ^= (h >> 20) ^ (h >> 12)
     return h ^ (h >> 7) ^ (h >> 4)
+
+
+def assert_cftime_like_particlefile(parquet_path: Path) -> None:
+    import json
+
+    import cftime
+    import pyarrow as pa
+    import pyarrow.parquet as pq
+
+    assert parquet_path.suffix == ".parquet", "Path must be a parquet file"
+
+    table = pq.read_table(parquet_path)
+    assert "time" in table.schema.names, "Parquet file must have a 'time' column"
+
+    time_field = table.schema.field("time")
+    assert pa.types.is_floating(time_field.type) or pa.types.is_integer(time_field.type), (
+        f"'time' column must be numeric, got {time_field.type}"
+    )
+
+    raw_meta = time_field.metadata
+    attrs = json.loads(raw_meta[b"attrs"])
+
+    values = table.column("time").to_pylist()
+    v = xr.Variable(("time",), values, attrs)
+    decoded = xr.coders.CFDatetimeCoder(time_unit="s").decode(v)
+
+    # check first value (and hence rest of array) is what we expect
+    assert isinstance(decoded.values[0], (cftime.datetime, np.datetime64)), (
+        "CF-time values in Parquet did not get properly decoded. Are the attributes correct?"
+    )
+    return

From 57ccf6f9b49d20417dfe4972589b1ba2d2df6995 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Mon, 20 Apr 2026 14:31:47 +0200
Subject: [PATCH 16/34] MAINT: Cleanup create_particle_data

This function is now independent of the time_interval as time is now stored as float
---
 src/parcels/_core/particle.py    | 19 +++++--------------
 src/parcels/_core/particleset.py |  1 -
 tests/test_particle.py           |  5 +----
 3 files changed, 6 insertions(+), 19 deletions(-)

diff --git a/src/parcels/_core/particle.py b/src/parcels/_core/particle.py
index dc39a063c..0e1be1792 100644
--- a/src/parcels/_core/particle.py
+++ b/src/parcels/_core/particle.py
@@ -8,7 +8,6 @@
 from parcels._compat import _attrgetter_helper
 from parcels._core.statuscodes import StatusCode
 from parcels._core.utils.string import _assert_str_and_python_varname
-from parcels._core.utils.time import TimeInterval
 from parcels._reprs import particleclass_repr, variable_repr
 
 __all__ = ["Particle", "ParticleClass", "Variable"]
@@ -176,7 +175,6 @@ def create_particle_data(
     pclass: ParticleClass,
     nparticles: int,
     ngrids: int,
-    time_interval: TimeInterval,
     initial: dict[str, np.ndarray] | None = None,
 ):
     if initial is None:
@@ -207,16 +205,9 @@ def create_particle_data(
             name_to_copy = var.initial(_attrgetter_helper)
             data[var.name] = data[name_to_copy].copy()
         else:
-            data[var.name] = _create_array_for_variable(var, nparticles, time_interval)
+            data[var.name] = np.full(
+                shape=(nparticles,),
+                fill_value=var.initial,
+                dtype=var.dtype,
+            )
     return data
-
-
-def _create_array_for_variable(variable: Variable, nparticles: int, time_interval: TimeInterval):
-    assert not isinstance(variable.initial, operator.attrgetter), (
-        "This function cannot handle attrgetter initial values."
-    )
-    return np.full(
-        shape=(nparticles,),
-        fill_value=variable.initial,
-        dtype=variable.dtype,
-    )
diff --git a/src/parcels/_core/particleset.py b/src/parcels/_core/particleset.py
index f2e74112f..e59e2860b 100644
--- a/src/parcels/_core/particleset.py
+++ b/src/parcels/_core/particleset.py
@@ -111,7 +111,6 @@ def __init__(
             pclass=pclass,
             nparticles=lon.size,
             ngrids=len(fieldset.gridset),
-            time_interval=fieldset.time_interval,
             initial=dict(
                 lon=lon,
                 lat=lat,
diff --git a/tests/test_particle.py b/tests/test_particle.py
index dabe6944c..62eb65cff 100644
--- a/tests/test_particle.py
+++ b/tests/test_particle.py
@@ -7,8 +7,6 @@
     Variable,
     create_particle_data,
 )
-from parcels._core.utils.time import TimeInterval
-from parcels._datasets.structured.generic import TIME
 
 
 def test_variable_init():
@@ -140,9 +138,8 @@ def test_particleclass_add_variable_collision():
 )
 @pytest.mark.parametrize("nparticles", [5, 10])
 def test_create_particle_data(particle, nparticles):
-    time_interval = TimeInterval(TIME[0], TIME[-1])
     ngrids = 4
-    data = create_particle_data(pclass=particle, nparticles=nparticles, ngrids=ngrids, time_interval=time_interval)
+    data = create_particle_data(pclass=particle, nparticles=nparticles, ngrids=ngrids)
 
     assert isinstance(data, dict)
     assert len(data) == len(particle.variables) + 1  # ei variable is separate

From b2bde50c568a985d1c954e7da9a533d5a6f6e48a Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Wed, 22 Apr 2026 15:05:18 +0200
Subject: [PATCH 17/34] Add cftime metadata serialization

---
 src/parcels/_core/particle.py     |  6 +++++-
 src/parcels/_core/particlefile.py | 26 ++++++++++++++++---------
 src/parcels/_core/utils/time.py   | 32 ++++++++++++++++++++++++++++++-
 tests/utils/test_time.py          | 12 +++++++++++-
 4 files changed, 64 insertions(+), 12 deletions(-)

diff --git a/src/parcels/_core/particle.py b/src/parcels/_core/particle.py
index 0e1be1792..5152b1130 100644
--- a/src/parcels/_core/particle.py
+++ b/src/parcels/_core/particle.py
@@ -148,7 +148,11 @@ def get_default_particle(spatial_dtype: type[np.float32] | type[np.float64]) ->
             Variable(
                 "time",
                 dtype=np.float64,
-                attrs={"standard_name": "time", "units": "seconds", "axis": "T"},
+                attrs={
+                    "standard_name": "time",
+                    "units": "seconds",
+                    "axis": "T",
+                },  # "units" and "calendar" gets updated/set if working with cftime time domain
             ),
             Variable(
                 "trajectory",
diff --git a/src/parcels/_core/particlefile.py b/src/parcels/_core/particlefile.py
index c9cdfa62a..439db2668 100644
--- a/src/parcels/_core/particlefile.py
+++ b/src/parcels/_core/particlefile.py
@@ -25,18 +25,24 @@
 __all__ = ["ParticleFile"]
 
 
-def _get_schema(particle: parcels.ParticleClass, file_metadata: dict[Any, Any]) -> pa.Schema:
-    return pa.schema(
-        [
+def _get_schema(
+    particle: parcels.ParticleClass, file_metadata: dict[Any, Any], fset_time_interval: TimeInterval | None
+) -> pa.Schema:
+
+    fields = []
+    for v in _get_vars_to_write(particle):
+        attrs = v.attrs.copy()
+        if v.name == "time":
+            if fset_time_interval is not None:
+                attrs.update(fset_time_interval.get_cf_attrs())
+        fields.append(
             pa.field(
                 v.name,
                 pa.from_numpy_dtype(v.dtype),
-                metadata=v.attrs,
+                metadata=attrs,
             )
-            for v in _get_vars_to_write(particle)
-        ],
-        metadata=file_metadata.copy(),
-    )
+        )
+    return pa.schema(fields, metadata=file_metadata.copy())
 
 
 class ParticleFile:
@@ -131,7 +137,9 @@ def write(self, pset: ParticleSet, time, indices=None):
 
         if self._writer is None:
             assert not self.path.exists(), "If the file exists, the writer should already be set"
-            self._writer = pq.ParquetWriter(self.path, _get_schema(pclass, self.extra_metadata))
+            self._writer = pq.ParquetWriter(
+                self.path, _get_schema(pclass, self.extra_metadata, pset.fieldset.time_interval)
+            )
 
         if isinstance(time, (np.timedelta64, np.datetime64)):
             time = timedelta_to_float(time - time_interval.left)
diff --git a/src/parcels/_core/utils/time.py b/src/parcels/_core/utils/time.py
index b76473a3f..7b9299f3a 100644
--- a/src/parcels/_core/utils/time.py
+++ b/src/parcels/_core/utils/time.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from datetime import datetime, timedelta
-from typing import TYPE_CHECKING, TypeVar
+from typing import TYPE_CHECKING, Literal, TypeVar, cast
 
 import cftime
 import numpy as np
@@ -85,6 +85,36 @@ def intersection(self, other: TimeInterval) -> TimeInterval | None:
 
         return TimeInterval(start, end) if start <= end else None
 
+    def get_cf_attrs(self) -> dict[Literal["units", "calendar"], str]:
+        """Return the cf-attrs that would correspond to x seconds from the left edge."""
+        return _get_cf_attrs(self.left)
+
+
+def _get_cf_attrs(dt: TimeLike) -> dict[Literal["units", "calendar"], str]:
+    if isinstance(dt, cftime.datetime):
+        dt = cast(cftime.datetime, dt)
+        return {"units": f"seconds since {dt.strftime(dt.format)}", "calendar": dt.calendar}
+
+    from pandas import Timestamp
+
+    if isinstance(dt, np.datetime64):
+        dt = Timestamp(dt)
+
+    if isinstance(dt, (Timestamp, datetime)):
+        dt_cf = cftime.datetime(
+            year=dt.year,
+            month=dt.month,
+            day=dt.day,
+            hour=dt.hour,
+            minute=dt.minute,
+            second=dt.second,
+            microsecond=dt.microsecond,
+            calendar="gregorian",  # What is the cftime proleptic_gregorian calendar? is that relevant here?
+        )
+        return _get_cf_attrs(dt_cf)
+
+    raise NotImplementedError(f"Not implemented for time object {type(dt)=!r}")
+
 
 def is_compatible(
     t1: datetime | cftime.datetime | np.timedelta64, t2: datetime | cftime.datetime | np.timedelta64
diff --git a/tests/utils/test_time.py b/tests/utils/test_time.py
index ef1f39346..2734b3e91 100644
--- a/tests/utils/test_time.py
+++ b/tests/utils/test_time.py
@@ -8,7 +8,12 @@
 from hypothesis import given
 from hypothesis import strategies as st
 
-from parcels._core.utils.time import TimeInterval, maybe_convert_python_timedelta_to_numpy, timedelta_to_float
+from parcels._core.utils.time import (
+    TimeInterval,
+    _get_cf_attrs,
+    maybe_convert_python_timedelta_to_numpy,
+    timedelta_to_float,
+)
 
 calendar_strategy = st.sampled_from(
     [
@@ -215,3 +220,8 @@ def test_timedelta_to_float(input, expected):
 def test_timedelta_to_float_exceptions():
     with pytest.raises((ValueError, TypeError)):
         timedelta_to_float("invalid_type")
+
+
+@given(datetime_strategy())
+def test_datetime_get_cf_attrs(dt):
+    _get_cf_attrs(dt)

From 55493a9d8b5d4353ac90a3fc068a0b843bb76b3d Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Wed, 22 Apr 2026 15:58:26 +0200
Subject: [PATCH 18/34] Add np.timedelta64 support

---
 src/parcels/_core/utils/time.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/parcels/_core/utils/time.py b/src/parcels/_core/utils/time.py
index 7b9299f3a..bdce7fc13 100644
--- a/src/parcels/_core/utils/time.py
+++ b/src/parcels/_core/utils/time.py
@@ -95,6 +95,9 @@ def _get_cf_attrs(dt: TimeLike) -> dict[Literal["units", "calendar"], str]:
         dt = cast(cftime.datetime, dt)
         return {"units": f"seconds since {dt.strftime(dt.format)}", "calendar": dt.calendar}
 
+    if isinstance(dt, np.timedelta64):
+        return {"units": "seconds"}
+
     from pandas import Timestamp
 
     if isinstance(dt, np.datetime64):

From bab4d5d7fc6f56d569b9a64513eb962f4ed243c6 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Wed, 22 Apr 2026 16:04:20 +0200
Subject: [PATCH 19/34] Fix assert_cftime_like_particlefile

Remove nested key - save on root instead
---
 tests/test_advection.py | 3 ++-
 tests/test_utils.py     | 6 ++----
 tests/utils.py          | 4 +---
 3 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/tests/test_advection.py b/tests/test_advection.py
index 95eca30f3..e5c000132 100644
--- a/tests/test_advection.py
+++ b/tests/test_advection.py
@@ -37,7 +37,7 @@
     AdvectionRK4_3D,
     AdvectionRK45,
 )
-from tests.utils import DEFAULT_PARTICLES
+from tests.utils import DEFAULT_PARTICLES, assert_cftime_like_particlefile
 
 
 @pytest.mark.parametrize("mesh", ["spherical", "flat"])
@@ -76,6 +76,7 @@ def test_advection_zonal_with_particlefile(tmp_parquet):
     df = pd.read_parquet(tmp_parquet)
     final_time = df["time"].max()
     np.testing.assert_allclose(df[df["time"] == final_time]["lon"].values, pset.lon, atol=1e-5)
+    assert_cftime_like_particlefile(tmp_parquet)
 
 
 def periodicBC(particles, fieldset):
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 65ce4ea7a..d8d695c18 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,5 +1,3 @@
-import json
-
 import numpy as np
 import pyarrow as pa
 import pyarrow.parquet as pq
@@ -28,7 +26,7 @@ def test_round_and_hash_float_array():
 def test_assert_cftime_like_particlefile(tmp_path, cal):
     path = tmp_path / "test.parquet"
     attrs = {"units": "seconds since 2000-01-01 17:00:00", "calendar": cal}
-    field = pa.field("time", pa.float64(), metadata={"attrs": json.dumps(attrs)})
+    field = pa.field("time", pa.float64(), metadata=attrs)
     schema = pa.schema([field])
     table = pa.table({"time": pa.array([-20.0, 1.0])}, schema=schema)
     pq.write_table(table, path)
@@ -39,7 +37,7 @@ def test_assert_cftime_like_particlefile(tmp_path, cal):
 def test_assert_cftime_like_particlefile_broken_parquet(tmp_path):
     path = tmp_path / "test.parquet"
     attrs = {"units": "broken-units", "calendar": "365_day"}
-    field = pa.field("time", pa.float64(), metadata={"attrs": json.dumps(attrs)})
+    field = pa.field("time", pa.float64(), metadata=attrs)
     schema = pa.schema([field])
     table = pa.table({"time": pa.array([-20.0, 1.0])}, schema=schema)
     pq.write_table(table, path)
diff --git a/tests/utils.py b/tests/utils.py
index d20eaf845..f474aca2f 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -154,7 +154,6 @@ def round_and_hash_float_array(arr, decimals=6):
 
 
 def assert_cftime_like_particlefile(parquet_path: Path) -> None:
-    import json
 
     import cftime
     import pyarrow as pa
@@ -170,8 +169,7 @@ def assert_cftime_like_particlefile(parquet_path: Path) -> None:
         f"'time' column must be numeric, got {time_field.type}"
     )
 
-    raw_meta = time_field.metadata
-    attrs = json.loads(raw_meta[b"attrs"])
+    attrs = {k.decode(): v.decode() for k, v in time_field.metadata.items()}
 
     values = table.column("time").to_pylist()
     v = xr.Variable(("time",), values, attrs)

From b28665c4d8375c9390c42330e42d7f4c2284115a Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Wed, 22 Apr 2026 17:27:04 +0200
Subject: [PATCH 20/34] Move imports

---
 tests/utils.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/tests/utils.py b/tests/utils.py
index f474aca2f..374c8ecc3 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -6,7 +6,10 @@
 from collections import defaultdict
 from pathlib import Path
 
+import cftime
 import numpy as np
+import pyarrow as pa
+import pyarrow.parquet as pq
 import xarray as xr
 
 import parcels
@@ -154,11 +157,6 @@ def round_and_hash_float_array(arr, decimals=6):
 
 
 def assert_cftime_like_particlefile(parquet_path: Path) -> None:
-
-    import cftime
-    import pyarrow as pa
-    import pyarrow.parquet as pq
-
     assert parquet_path.suffix == ".parquet", "Path must be a parquet file"
 
     table = pq.read_table(parquet_path)

From 7184e1f3c7b541c68e598fda71895b8e152a8c5a Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Wed, 22 Apr 2026 17:29:23 +0200
Subject: [PATCH 21/34] Fixing tests

---
 tests/test_particlefile.py | 41 +++++++++++++++++---------------------
 1 file changed, 18 insertions(+), 23 deletions(-)

diff --git a/tests/test_particlefile.py b/tests/test_particlefile.py
index f2a3c4553..0a9534e05 100755
--- a/tests/test_particlefile.py
+++ b/tests/test_particlefile.py
@@ -68,9 +68,10 @@ def test_write_fieldset_without_time(tmp_parquet):
     ofile = ParticleFile(tmp_parquet, outputdt=np.timedelta64(1, "s"))
     pset.execute(DoNothing, runtime=np.timedelta64(1, "s"), dt=np.timedelta64(1, "s"), output_file=ofile)
 
-    df = pd.read_parquet(tmp_parquet)
-    pytest.skip("# TODO: Need to figure out how times work with parquet output (#2386)")
-    assert df["time"][1] == np.timedelta64(1, "s")
+    table = pq.read_table(tmp_parquet)
+    assert table.schema.field("time").metadata[b"units"] == b"seconds"
+    assert b"calendar" not in table.schema.field("time").metadata
+    assert table["time"].to_numpy()[1] == 1.0
 
 
 @pytest.mark.skip("Keep or remove? Introduced in 5d7dd6bba800baa0fe4bd38edfc17ca3e310062b ")
@@ -114,7 +115,6 @@ def test_pfile_array_remove_all_particles(fieldset, tmp_parquet):
     pfile.close()
 
     df = pd.read_parquet(tmp_parquet)
-    # np.testing.assert_allclose(ds["time"][:, 0] - fieldset.time_interval.left, np.timedelta64(0, "s")) # TODO: Need to figure out how times work with parquet output (#2386)
     assert df["trajectory"].nunique() == npart
 
 
@@ -344,8 +344,7 @@ def Update_lon(particles, fieldset):  # pragma: no cover
 
     df = pd.read_parquet(tmp_parquet)
     assert np.allclose(df["lon"].values, [0, 3, 6, 9])
-    pytest.skip("# TODO: Need to figure out how times work with parquet output (#2386)")
-    assert np.allclose(timedelta_to_float(df.time.values - df.time.values[0, 0]), [0, 3, 6, 9])
+    assert np.allclose(df.time - df.time.min(), [0, 3, 6, 9])
 
 
 def setup_pset_execute(*, fieldset: FieldSet, outputdt: timedelta, execute_kwargs, particle_class=Particle):
@@ -374,9 +373,10 @@ def test_pset_execute_outputdt_forwards(fieldset):
     runtime = timedelta(hours=5)
     dt = timedelta(minutes=5)
 
-    df = setup_pset_execute(fieldset=fieldset, outputdt=outputdt, execute_kwargs=dict(runtime=runtime, dt=dt))  # noqa: F841
-    pytest.skip("# TODO: Need to figure out how times work with parquet output (#2386)")
-    assert np.all(ds.isel(trajectory=0).time.diff(dim="obs").values == np.timedelta64(outputdt))  # noqa: F821
+    df = setup_pset_execute(fieldset=fieldset, outputdt=outputdt, execute_kwargs=dict(runtime=runtime, dt=dt))
+    particle_0_times = df[df.trajectory == 0].time.values
+
+    np.testing.assert_equal(np.diff(particle_0_times), outputdt.seconds)
 
 
 def test_pset_execute_output_time_forwards(fieldset):
@@ -385,12 +385,9 @@ def test_pset_execute_output_time_forwards(fieldset):
     runtime = np.timedelta64(5, "h")
     dt = np.timedelta64(5, "m")
 
-    df = setup_pset_execute(fieldset=fieldset, outputdt=outputdt, execute_kwargs=dict(runtime=runtime, dt=dt))  # noqa: F841
-    pytest.skip("# TODO: Need to figure out how times work with parquet output (#2386)")
-    assert (
-        ds.time[0, 0].values == fieldset.time_interval.left  # noqa: F821
-        and ds.time[0, -1].values == fieldset.time_interval.left + runtime  # noqa: F821
-    )
+    df = setup_pset_execute(fieldset=fieldset, outputdt=outputdt, execute_kwargs=dict(runtime=runtime, dt=dt))
+    assert df.time.min() == 0.0
+    assert df.time.max() == runtime / np.timedelta64(1, "s")
 
 
 def test_pset_execute_outputdt_backwards(fieldset):
@@ -399,10 +396,9 @@ def test_pset_execute_outputdt_backwards(fieldset):
     runtime = timedelta(days=2)
     dt = -timedelta(minutes=5)
 
-    df = setup_pset_execute(fieldset=fieldset, outputdt=outputdt, execute_kwargs=dict(runtime=runtime, dt=dt))  # noqa: F841
-    pytest.skip("# TODO: Need to figure out how times work with parquet output (#2386)")
-    file_outputdt = ds.isel(trajectory=0).time.diff(dim="obs").values  # noqa: F821
-    assert np.all(file_outputdt == np.timedelta64(-outputdt))
+    df = setup_pset_execute(fieldset=fieldset, outputdt=outputdt, execute_kwargs=dict(runtime=runtime, dt=dt))
+    particle_0_times = df[df.trajectory == 0].time.values
+    np.testing.assert_equal(np.diff(particle_0_times), -outputdt.seconds)
 
 
 def test_pset_execute_outputdt_backwards_fieldset_timevarying():
@@ -419,10 +415,9 @@ def test_pset_execute_outputdt_backwards_fieldset_timevarying():
     ds_fset = copernicusmarine_to_sgrid(fields=fields)
     fieldset = FieldSet.from_sgrid_conventions(ds_fset)
 
-    df = setup_pset_execute(outputdt=outputdt, execute_kwargs=dict(runtime=runtime, dt=dt), fieldset=fieldset)  # noqa: F841
-    pytest.skip("# TODO: Need to figure out how times work with parquet output (#2386)")
-    file_outputdt = ds.isel(trajectory=0).time.diff(dim="obs").values  # noqa: F821
-    assert np.all(file_outputdt == np.timedelta64(-outputdt)), (file_outputdt, np.timedelta64(-outputdt))
+    df = setup_pset_execute(outputdt=outputdt, execute_kwargs=dict(runtime=runtime, dt=dt), fieldset=fieldset)
+    particle_0_times = df[df.trajectory == 0].time.values
+    np.testing.assert_equal(np.diff(particle_0_times), -outputdt.seconds)
 
 
 def test_particlefile_init(tmp_parquet):

From e7e37ef3a81228e6280787c39d98b9389df0189f Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Wed, 22 Apr 2026 18:31:39 +0200
Subject: [PATCH 22/34] Fix test_time_is_age test

---
 tests/test_particlefile.py | 15 ++++++++-------
 tests/utils.py             | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/tests/test_particlefile.py b/tests/test_particlefile.py
index 0a9534e05..0afc81f44 100755
--- a/tests/test_particlefile.py
+++ b/tests/test_particlefile.py
@@ -30,6 +30,7 @@
 from parcels.convert import copernicusmarine_to_sgrid
 from parcels.interpolators import XLinear, XLinear_Velocity
 from parcels.kernels import AdvectionRK4
+from tests import utils
 from tests.common_kernels import DoNothing
 
 
@@ -306,13 +307,13 @@ def IncreaseAge(particles, fieldset):  # pragma: no cover
 
     pset.execute(IncreaseAge, runtime=np.timedelta64(npart * 2, "s"), dt=np.timedelta64(1, "s"), output_file=ofile)
 
-    pytest.skip("# TODO: Need to figure out how times work with parquet output (#2386)")
-    ds = xr.open_zarr(tmp_parquet)
-    age = ds["age"][:].values.astype("timedelta64[s]")
-    ds_timediff = np.zeros_like(age)
-    for i in range(npart):
-        ds_timediff[i, :] = ds.time.values[i, :] - time[i]
-    np.testing.assert_equal(age, ds_timediff)
+    # df = pd.read_parquet(tmp_parquet)
+    df = utils.read_particlefile(tmp_parquet)
+
+    # Map sorted trajectory IDs to release times (0, 1, ..., npart-1 seconds)
+    for index, df_traj in df.groupby("trajectory"):
+        release_time = time[index]
+        np.testing.assert_equal(df_traj["age"].astype("timedelta64[s]").values, (df_traj["time"] - release_time).values)
 
 
 def test_reset_dt(fieldset, tmp_parquet):
diff --git a/tests/utils.py b/tests/utils.py
index 374c8ecc3..f2d88919f 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -8,6 +8,7 @@
 
 import cftime
 import numpy as np
+import pandas as pd
 import pyarrow as pa
 import pyarrow.parquet as pq
 import xarray as xr
@@ -178,3 +179,35 @@ def assert_cftime_like_particlefile(parquet_path: Path) -> None:
         "CF-time values in Parquet did not get properly decoded. Are the attributes correct?"
     )
     return
+
+
+def read_particlefile(path: Path, decode_times: bool = True) -> pd.DataFrame:
+    assert path.suffix == ".parquet", "Only Parquet files are supported"
+
+    table = pq.read_table(path)
+
+    try:
+        time_field = table.field("time")
+    except KeyError as e:
+        raise ValueError(
+            f"Could not find 'time' column in parquet file. Are you sure {path=!r} is a particlefile?"
+        ) from e
+
+    try:
+        assert b"units" in time_field.metadata
+    except AssertionError as e:
+        raise ValueError(f"Could not find 'units' in the 'time' column metadata for parquet {path=!r}.") from e
+
+    attrs = {k.decode(): v.decode() for k, v in time_field.metadata.items()}
+
+    df = pd.read_parquet(path)
+    if not decode_times:
+        return df
+
+    values = table.column("time").to_numpy()
+    var = xr.Variable(("time",), values, attrs)
+    values = xr.coders.CFDatetimeCoder(time_unit="s").decode(var).values
+
+    df["time"] = values
+
+    return df

From 54c829a7dd04584827b4395da33b61741c4d9dae Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Thu, 23 Apr 2026 13:03:09 +0200
Subject: [PATCH 23/34] Refactor assert_cftime_like_particlefile

---
 tests/utils.py | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/tests/utils.py b/tests/utils.py
index f2d88919f..166710bec 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -160,22 +160,10 @@ def round_and_hash_float_array(arr, decimals=6):
 def assert_cftime_like_particlefile(parquet_path: Path) -> None:
     assert parquet_path.suffix == ".parquet", "Path must be a parquet file"
 
-    table = pq.read_table(parquet_path)
-    assert "time" in table.schema.names, "Parquet file must have a 'time' column"
-
-    time_field = table.schema.field("time")
-    assert pa.types.is_floating(time_field.type) or pa.types.is_integer(time_field.type), (
-        f"'time' column must be numeric, got {time_field.type}"
-    )
-
-    attrs = {k.decode(): v.decode() for k, v in time_field.metadata.items()}
-
-    values = table.column("time").to_pylist()
-    v = xr.Variable(("time",), values, attrs)
-    decoded = xr.coders.CFDatetimeCoder(time_unit="s").decode(v)
+    df = read_particlefile(parquet_path, decode_times=True)
 
     # check first value (and hence rest of array) is what we expect
-    assert isinstance(decoded.values[0], (cftime.datetime, np.datetime64)), (
+    assert isinstance(df["time"].values[0], (cftime.datetime, np.datetime64)), (
         "CF-time values in Parquet did not get properly decoded. Are the attributes correct?"
     )
     return
@@ -193,6 +181,10 @@ def read_particlefile(path: Path, decode_times: bool = True) -> pd.DataFrame:
             f"Could not find 'time' column in parquet file. Are you sure {path=!r} is a particlefile?"
         ) from e
 
+    assert pa.types.is_floating(time_field.type) or pa.types.is_integer(time_field.type), (
+        f"'time' column must be numeric, got {time_field.type}"
+    )
+
     try:
         assert b"units" in time_field.metadata
     except AssertionError as e:

From 8626d486cbceaf2f79cc5c51b2941cba133268fc Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Thu, 23 Apr 2026 13:37:45 +0200
Subject: [PATCH 24/34] Self-review feedback

---
 tests/test_particlefile.py | 6 +++---
 tests/utils.py             | 5 ++++-
 tests/utils/test_time.py   | 3 ++-
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/tests/test_particlefile.py b/tests/test_particlefile.py
index 0afc81f44..d8cd8fd1f 100755
--- a/tests/test_particlefile.py
+++ b/tests/test_particlefile.py
@@ -363,7 +363,7 @@ def setup_pset_execute(*, fieldset: FieldSet, outputdt: timedelta, execute_kwarg
         output_file = ParticleFile(name, outputdt=outputdt)
 
         pset.execute(DoNothing, output_file=output_file, **execute_kwargs)
-        df = pd.read_parquet(name)
+        df = utils.read_particlefile(name)
 
     return df
 
@@ -387,8 +387,8 @@ def test_pset_execute_output_time_forwards(fieldset):
     dt = np.timedelta64(5, "m")
 
     df = setup_pset_execute(fieldset=fieldset, outputdt=outputdt, execute_kwargs=dict(runtime=runtime, dt=dt))
-    assert df.time.min() == 0.0
-    assert df.time.max() == runtime / np.timedelta64(1, "s")
+    assert df.time.min() == pd.Timestamp(fieldset.time_interval.left)
+    assert df.time.max() - df.time.min() == runtime
 
 
 def test_pset_execute_outputdt_backwards(fieldset):
diff --git a/tests/utils.py b/tests/utils.py
index 166710bec..93214848d 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -17,6 +17,7 @@
 from parcels import FieldSet, Particle, Variable
 from parcels._core.xgrid import _FIELD_DATA_ORDERING, XGrid, get_axis_from_dim_name
 from parcels._datasets.structured.generated import simple_UV_dataset
+from parcels._typing import PathLike
 
 PROJECT_ROOT = Path(__file__).resolve().parents[1]
 TEST_ROOT = PROJECT_ROOT / "tests"
@@ -169,7 +170,9 @@ def assert_cftime_like_particlefile(parquet_path: Path) -> None:
     return
 
 
-def read_particlefile(path: Path, decode_times: bool = True) -> pd.DataFrame:
+def read_particlefile(path: PathLike, decode_times: bool = True) -> pd.DataFrame:
+    path = Path(path)
+
     assert path.suffix == ".parquet", "Only Parquet files are supported"
 
     table = pq.read_table(path)
diff --git a/tests/utils/test_time.py b/tests/utils/test_time.py
index 2734b3e91..26cc39c1c 100644
--- a/tests/utils/test_time.py
+++ b/tests/utils/test_time.py
@@ -224,4 +224,5 @@ def test_timedelta_to_float_exceptions():
 
 @given(datetime_strategy())
 def test_datetime_get_cf_attrs(dt):
-    _get_cf_attrs(dt)
+    attrs = _get_cf_attrs(dt)
+    assert "seconds" in attrs["units"]

From 3693329fcf6fcb0bc44a499d19cddbfbf1b15b8d Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Thu, 23 Apr 2026 14:34:53 +0200
Subject: [PATCH 25/34] Fix test_particle_schema

---
 tests/test_particlefile.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tests/test_particlefile.py b/tests/test_particlefile.py
index d8cd8fd1f..fb5b34bf7 100755
--- a/tests/test_particlefile.py
+++ b/tests/test_particlefile.py
@@ -24,7 +24,7 @@
 )
 from parcels._core.particle import Particle, get_default_particle
 from parcels._core.particlefile import _get_schema
-from parcels._core.utils.time import timedelta_to_float
+from parcels._core.utils.time import TimeInterval, timedelta_to_float
 from parcels._datasets.structured.generated import peninsula_dataset
 from parcels._datasets.structured.generic import datasets
 from parcels.convert import copernicusmarine_to_sgrid
@@ -496,7 +496,7 @@ def Update_lon(particles, fieldset):  # pragma: no cover
     ],
 )
 def test_particle_schema(particle):
-    s = _get_schema(particle, {})
+    s = _get_schema(particle, {}, TimeInterval(datetime(2023, 1, 1, 12, 0), datetime(2023, 1, 2, 12, 0)))
 
     written_variables = [v for v in particle.variables if v.to_write]
 
@@ -510,5 +510,9 @@ def test_particle_schema(particle):
         strict=False,
     ):
         assert variable.name == pyarrow_field.name
-        assert variable.attrs == {k.decode(): v.decode() for k, v in pyarrow_field.metadata.items()}
+        if variable.name != "time":
+            assert variable.attrs == {k.decode(): v.decode() for k, v in pyarrow_field.metadata.items()}
+        else:
+            assert b"units" in pyarrow_field.metadata
+            assert b"calendar" in pyarrow_field.metadata
         assert pa.from_numpy_dtype(variable.dtype) == pyarrow_field.type

From 81f127bdb4d6d696b49d6b4e39752d77f46b6e20 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Thu, 23 Apr 2026 14:49:08 +0200
Subject: [PATCH 26/34] Make read_particlefile public

---
 src/parcels/__init__.py           |  3 ++-
 src/parcels/_core/particlefile.py | 40 ++++++++++++++++++++++++++++
 tests/test_particlefile.py        |  5 ++--
 tests/utils.py                    | 44 +------------------------------
 4 files changed, 45 insertions(+), 47 deletions(-)

diff --git a/src/parcels/__init__.py b/src/parcels/__init__.py
index 2a7854cde..7ae1f6928 100644
--- a/src/parcels/__init__.py
+++ b/src/parcels/__init__.py
@@ -11,7 +11,7 @@
 
 from parcels._core.fieldset import FieldSet
 from parcels._core.particleset import ParticleSet
-from parcels._core.particlefile import ParticleFile
+from parcels._core.particlefile import ParticleFile, read_particlefile
 from parcels._core.particle import (
     Variable,
     Particle,
@@ -67,6 +67,7 @@
     "ParticleSetWarning",
     # Utilities
     "logger",
+    "read_particlefile",
 ]
 
 _stdlib_warnings.warn(
diff --git a/src/parcels/_core/particlefile.py b/src/parcels/_core/particlefile.py
index 439db2668..b43d4fad2 100644
--- a/src/parcels/_core/particlefile.py
+++ b/src/parcels/_core/particlefile.py
@@ -8,8 +8,10 @@
 
 import cftime
 import numpy as np
+import pandas as pd
 import pyarrow as pa
 import pyarrow.parquet as pq
+import xarray as xr
 
 import parcels
 from parcels._core.particle import ParticleClass
@@ -207,3 +209,41 @@ def _get_calendar_and_units(time_interval: TimeInterval) -> dict[str, str]:  # T
         attrs["calendar"] = calendar
 
     return attrs
+
+
+def read_particlefile(path: PathLike, decode_times: bool = True) -> pd.DataFrame:
+    path = Path(path)
+
+    assert path.suffix == ".parquet", "Only Parquet files are supported"
+
+    table = pq.read_table(path)
+
+    try:
+        time_field = table.field("time")
+    except KeyError as e:
+        raise ValueError(
+            f"Could not find 'time' column in parquet file. Are you sure {path=!r} is a particlefile?"
+        ) from e
+
+    assert pa.types.is_floating(time_field.type) or pa.types.is_integer(time_field.type), (
+        f"'time' column must be numeric, got {time_field.type}"
+    )
+
+    try:
+        assert b"units" in time_field.metadata
+    except AssertionError as e:
+        raise ValueError(f"Could not find 'units' in the 'time' column metadata for parquet {path=!r}.") from e
+
+    attrs = {k.decode(): v.decode() for k, v in time_field.metadata.items()}
+
+    df = pd.read_parquet(path)
+    if not decode_times:
+        return df
+
+    values = table.column("time").to_numpy()
+    var = xr.Variable(("time",), values, attrs)
+    values = xr.coders.CFDatetimeCoder(time_unit="s").decode(var).values
+
+    df["time"] = values
+
+    return df
diff --git a/tests/test_particlefile.py b/tests/test_particlefile.py
index fb5b34bf7..4fac1a809 100755
--- a/tests/test_particlefile.py
+++ b/tests/test_particlefile.py
@@ -30,7 +30,6 @@
 from parcels.convert import copernicusmarine_to_sgrid
 from parcels.interpolators import XLinear, XLinear_Velocity
 from parcels.kernels import AdvectionRK4
-from tests import utils
 from tests.common_kernels import DoNothing
 
 
@@ -308,7 +307,7 @@ def IncreaseAge(particles, fieldset):  # pragma: no cover
     pset.execute(IncreaseAge, runtime=np.timedelta64(npart * 2, "s"), dt=np.timedelta64(1, "s"), output_file=ofile)
 
     # df = pd.read_parquet(tmp_parquet)
-    df = utils.read_particlefile(tmp_parquet)
+    df = parcels.read_particlefile(tmp_parquet)
 
     # Map sorted trajectory IDs to release times (0, 1, ..., npart-1 seconds)
     for index, df_traj in df.groupby("trajectory"):
@@ -363,7 +362,7 @@ def setup_pset_execute(*, fieldset: FieldSet, outputdt: timedelta, execute_kwarg
         output_file = ParticleFile(name, outputdt=outputdt)
 
         pset.execute(DoNothing, output_file=output_file, **execute_kwargs)
-        df = utils.read_particlefile(name)
+        df = parcels.read_particlefile(name)
 
     return df
 
diff --git a/tests/utils.py b/tests/utils.py
index 93214848d..33d6e0012 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -8,16 +8,12 @@
 
 import cftime
 import numpy as np
-import pandas as pd
-import pyarrow as pa
-import pyarrow.parquet as pq
 import xarray as xr
 
 import parcels
 from parcels import FieldSet, Particle, Variable
 from parcels._core.xgrid import _FIELD_DATA_ORDERING, XGrid, get_axis_from_dim_name
 from parcels._datasets.structured.generated import simple_UV_dataset
-from parcels._typing import PathLike
 
 PROJECT_ROOT = Path(__file__).resolve().parents[1]
 TEST_ROOT = PROJECT_ROOT / "tests"
@@ -161,48 +157,10 @@ def round_and_hash_float_array(arr, decimals=6):
 def assert_cftime_like_particlefile(parquet_path: Path) -> None:
     assert parquet_path.suffix == ".parquet", "Path must be a parquet file"
 
-    df = read_particlefile(parquet_path, decode_times=True)
+    df = parcels.read_particlefile(parquet_path, decode_times=True)
 
     # check first value (and hence rest of array) is what we expect
     assert isinstance(df["time"].values[0], (cftime.datetime, np.datetime64)), (
         "CF-time values in Parquet did not get properly decoded. Are the attributes correct?"
     )
     return
-
-
-def read_particlefile(path: PathLike, decode_times: bool = True) -> pd.DataFrame:
-    path = Path(path)
-
-    assert path.suffix == ".parquet", "Only Parquet files are supported"
-
-    table = pq.read_table(path)
-
-    try:
-        time_field = table.field("time")
-    except KeyError as e:
-        raise ValueError(
-            f"Could not find 'time' column in parquet file. Are you sure {path=!r} is a particlefile?"
-        ) from e
-
-    assert pa.types.is_floating(time_field.type) or pa.types.is_integer(time_field.type), (
-        f"'time' column must be numeric, got {time_field.type}"
-    )
-
-    try:
-        assert b"units" in time_field.metadata
-    except AssertionError as e:
-        raise ValueError(f"Could not find 'units' in the 'time' column metadata for parquet {path=!r}.") from e
-
-    attrs = {k.decode(): v.decode() for k, v in time_field.metadata.items()}
-
-    df = pd.read_parquet(path)
-    if not decode_times:
-        return df
-
-    values = table.column("time").to_numpy()
-    var = xr.Variable(("time",), values, attrs)
-    values = xr.coders.CFDatetimeCoder(time_unit="s").decode(var).values
-
-    df["time"] = values
-
-    return df

From 9fcb5bfbc8f89534d4191d17b332929a14034942 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Thu, 23 Apr 2026 14:52:55 +0200
Subject: [PATCH 27/34] Add docstring to read_particlefile

---
 src/parcels/_core/particlefile.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/src/parcels/_core/particlefile.py b/src/parcels/_core/particlefile.py
index b43d4fad2..e5b7ee5af 100644
--- a/src/parcels/_core/particlefile.py
+++ b/src/parcels/_core/particlefile.py
@@ -212,6 +212,31 @@ def _get_calendar_and_units(time_interval: TimeInterval) -> dict[str, str]:  # T
 
 
 def read_particlefile(path: PathLike, decode_times: bool = True) -> pd.DataFrame:
+    """Read a Parcels particlefile (Parquet format) into a pandas DataFrame.
+
+    Parameters
+    ----------
+    path : PathLike
+        Path to the ``.parquet`` particlefile.
+    decode_times : bool, optional
+        If ``True`` (default), use Xarray to decode the numeric ``time`` column from CF
+        conventions into ``datetime`` or ``cftime.datetime`` values using the units stored in
+        the column metadata.  If ``False``, the raw numeric values are
+        returned unchanged.
+
+    Returns
+    -------
+    pd.DataFrame
+        DataFrame containing the particle data.  When *decode_times* is
+        ``True``, the ``time`` column contains datetime-like values;
+        otherwise it contains the original numeric representation.
+
+    Notes
+    -----
+    For larger datasets, consider using `Polars <https://docs.pola.rs/>`_ directly,
+    e.g. ``polars.read_parquet(path)``, which offers better performance and lower
+    memory usage than pandas for large Parquet files.
+    """
     path = Path(path)
 
     assert path.suffix == ".parquet", "Only Parquet files are supported"

From 04d8676f113bea63dd85a2229b9f2e5574fbc852 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Fri, 24 Apr 2026 10:30:17 +0200
Subject: [PATCH 28/34] Review feedback

---
 src/parcels/_core/particlefile.py  | 35 +++---------------------------
 tests-v3/test_advection.py         |  7 +++---
 tests-v3/test_fieldset_sampling.py |  7 +++---
 tests-v3/test_particlesets.py      |  6 ++---
 tests/test_particlefile.py         |  1 -
 5 files changed, 12 insertions(+), 44 deletions(-)

diff --git a/src/parcels/_core/particlefile.py b/src/parcels/_core/particlefile.py
index e5b7ee5af..58483572d 100644
--- a/src/parcels/_core/particlefile.py
+++ b/src/parcels/_core/particlefile.py
@@ -2,11 +2,10 @@
 
 from __future__ import annotations
 
-from datetime import datetime, timedelta
+from datetime import timedelta
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Literal
 
-import cftime
 import numpy as np
 import pandas as pd
 import pyarrow as pa
@@ -52,10 +51,8 @@ class ParticleFile:
 
     Parameters
     ----------
-    name : str
-        Basename of the output file. This can also be a Zarr store object.
-    particleset :
-        ParticleSet to output
+    path : PathLike
+        Path of the output Parquet file.
     outputdt :
         Interval which dictates the update frequency of file output
         while ParticleFile is given as an argument of ParticleSet.execute()
@@ -94,12 +91,8 @@ def __init__(self, path: PathLike, outputdt):
         if not path.parent.exists():
             raise ValueError(f"Folder location for {path=!r} does not exist. Create the folder location first.")
 
-        self._maxids = 0
-        self._pids_written = {}
         self.extra_metadata = {}
 
-        # TODO v4: Add check that if create_new_zarrfile is False, the store already exists
-
     def __repr__(self) -> str:
         return particlefile_repr(self)
 
@@ -155,9 +148,6 @@ def write(self, pset: ParticleSet, time, indices=None):
             pa.table({v.name: pa.array(particle_data[v.name][indices_to_write]) for v in vars_to_write}),
         )
 
-        # if len(indices_to_write) == 0: # TODO: Remove this?
-        #     return
-
     def close(self):
         if self._writer is not None:
             self._writer.close()
@@ -192,25 +182,6 @@ def _to_write_particles(particle_data, time):
     )[0]
 
 
-def _get_calendar_and_units(time_interval: TimeInterval) -> dict[str, str]:  # TODO: Remove?
-    calendar = None
-    units = "seconds"
-    if time_interval:
-        if isinstance(time_interval.left, (np.datetime64, datetime)):
-            calendar = "standard"
-        elif isinstance(time_interval.left, cftime.datetime):
-            calendar = time_interval.left.calendar
-
-    if calendar is not None:
-        units += f" since {time_interval.left}"
-
-    attrs = {"units": units}
-    if calendar is not None:
-        attrs["calendar"] = calendar
-
-    return attrs
-
-
 def read_particlefile(path: PathLike, decode_times: bool = True) -> pd.DataFrame:
     """Read a Parcels particlefile (Parquet format) into a pandas DataFrame.
 
diff --git a/tests-v3/test_advection.py b/tests-v3/test_advection.py
index bdd7a4221..3d8f06bac 100644
--- a/tests-v3/test_advection.py
+++ b/tests-v3/test_advection.py
@@ -1,6 +1,5 @@
 import numpy as np
 import pytest
-import pandas as pd
 import xarray as xr
 
 from parcels import (
@@ -80,7 +79,7 @@ def test_analyticalAgrid():
 @pytest.mark.parametrize("v", [1, -0.3, 0, -1])
 @pytest.mark.parametrize("w", [None, 1, -0.3, 0, -1])
 @pytest.mark.parametrize("direction", [1, -1])
-def test_uniform_analytical(u, v, w, direction, tmp_parquet):
+def test_uniform_analytical(u, v, w, direction, tmp_zarrfile):
     lon = np.arange(0, 15, dtype=np.float32)
     lat = np.arange(0, 15, dtype=np.float32)
     if w is not None:
@@ -100,14 +99,14 @@ def test_uniform_analytical(u, v, w, direction, tmp_parquet):
     x0, y0, z0 = 6.1, 6.2, 20
     pset = ParticleSet(fieldset, pclass=Particle, lon=x0, lat=y0, depth=z0)
 
-    outfile = pset.ParticleFile(name=tmp_parquet, outputdt=1, chunks=(1, 1))
+    outfile = pset.ParticleFile(name=tmp_zarrfile, outputdt=1, chunks=(1, 1))
     pset.execute(AdvectionAnalytical, runtime=4, dt=direction, output_file=outfile)
     assert np.abs(pset.lon - x0 - pset.time * u) < 1e-6
     assert np.abs(pset.lat - y0 - pset.time * v) < 1e-6
     if w is not None:
         assert np.abs(pset.depth - z0 - pset.time * w) < 1e-4
 
-    ds = xr.open_zarr(tmp_parquet)
+    ds = xr.open_zarr(tmp_zarrfile)
     times = (direction * ds["time"][:]).values.astype("timedelta64[s]")[0]
     timeref = np.arange(1, 5).astype("timedelta64[s]")
     assert np.allclose(times, timeref, atol=np.timedelta64(1, "ms"))
diff --git a/tests-v3/test_fieldset_sampling.py b/tests-v3/test_fieldset_sampling.py
index 176eedab1..291c27b88 100644
--- a/tests-v3/test_fieldset_sampling.py
+++ b/tests-v3/test_fieldset_sampling.py
@@ -3,7 +3,6 @@
 from math import cos, pi
 
 import numpy as np
-import pandas as pd
 import pytest
 import xarray as xr
 
@@ -774,7 +773,7 @@ def test_multiple_grid_addlater_error():
     assert fail
 
 
-def test_fieldset_sampling_updating_order(tmp_parquet):
+def test_fieldset_sampling_updating_order(tmp_zarrfile):
     def calc_p(t, y, x):
         return 10 * t + x + 0.2 * y
 
@@ -806,10 +805,10 @@ def SampleP(particle, fieldset, time):  # pragma: no cover
 
     kernels = [AdvectionRK4, SampleP]
 
-    pfile = pset.ParticleFile(tmp_parquet, outputdt=1)
+    pfile = pset.ParticleFile(tmp_zarrfile, outputdt=1)
     pset.execute(kernels, endtime=1, dt=1, output_file=pfile)
 
-    ds = xr.open_zarr(tmp_parquet)
+    ds = xr.open_zarr(tmp_zarrfile)
     for t in range(len(ds["obs"])):
         for i in range(len(ds["trajectory"])):
             assert np.isclose(
diff --git a/tests-v3/test_particlesets.py b/tests-v3/test_particlesets.py
index 5c0f2495f..ed884f595 100644
--- a/tests-v3/test_particlesets.py
+++ b/tests-v3/test_particlesets.py
@@ -39,7 +39,7 @@ def test_pset_create_list_with_customvariable(fieldset):
 
 
 @pytest.mark.parametrize("restart", [True, False])
-def test_pset_create_fromparticlefile(fieldset, restart, tmp_parquet):
+def test_pset_create_fromparticlefile(fieldset, restart, tmp_zarrfile):
     lon = np.linspace(0, 1, 10, dtype=np.float32)
     lat = np.linspace(1, 0, 10, dtype=np.float32)
 
@@ -48,7 +48,7 @@ def test_pset_create_fromparticlefile(fieldset, restart, tmp_parquet):
     TestParticle = TestParticle.add_variable("p3", np.float64, to_write="once")
 
     pset = ParticleSet(fieldset, lon=lon, lat=lat, depth=[4] * len(lon), pclass=TestParticle, p3=np.arange(len(lon)))
-    pfile = pset.ParticleFile(tmp_parquet, outputdt=1)
+    pfile = pset.ParticleFile(tmp_zarrfile, outputdt=1)
 
     def Kernel(particle, fieldset, time):  # pragma: no cover
         particle.p = 2.0
@@ -58,7 +58,7 @@ def Kernel(particle, fieldset, time):  # pragma: no cover
     pset.execute(Kernel, runtime=2, dt=1, output_file=pfile)
 
     pset_new = ParticleSet.from_particlefile(
-        fieldset, pclass=TestParticle, filename=tmp_parquet, restart=restart, repeatdt=1
+        fieldset, pclass=TestParticle, filename=tmp_zarrfile, restart=restart, repeatdt=1
     )
 
     for var in ["lon", "lat", "depth", "time", "p", "p2", "p3"]:
diff --git a/tests/test_particlefile.py b/tests/test_particlefile.py
index 4fac1a809..d53ff6aca 100755
--- a/tests/test_particlefile.py
+++ b/tests/test_particlefile.py
@@ -306,7 +306,6 @@ def IncreaseAge(particles, fieldset):  # pragma: no cover
 
     pset.execute(IncreaseAge, runtime=np.timedelta64(npart * 2, "s"), dt=np.timedelta64(1, "s"), output_file=ofile)
 
-    # df = pd.read_parquet(tmp_parquet)
     df = parcels.read_particlefile(tmp_parquet)
 
     # Map sorted trajectory IDs to release times (0, 1, ..., npart-1 seconds)

From b4e221412749cf0d3532bbd6383a51c02fec63ff Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Fri, 24 Apr 2026 10:54:48 +0200
Subject: [PATCH 29/34] Remove obs_written

---
 src/parcels/_core/particle.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/parcels/_core/particle.py b/src/parcels/_core/particle.py
index 5152b1130..15c55a519 100644
--- a/src/parcels/_core/particle.py
+++ b/src/parcels/_core/particle.py
@@ -163,7 +163,6 @@ def get_default_particle(spatial_dtype: type[np.float32] | type[np.float64]) ->
                     "cf_role": "trajectory_id",
                 },
             ),
-            Variable("obs_written", dtype=np.int32, initial=0, to_write=False),
             Variable("dt", dtype=np.float64, initial=1.0, to_write=False),
             Variable("state", dtype=np.int32, initial=StatusCode.Evaluate, to_write=False),
         ]

From 663f80ca14d8297443f7289eb2456aec316c38d8 Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Fri, 24 Apr 2026 10:56:16 +0200
Subject: [PATCH 30/34] Update migration guide

---
 docs/user_guide/v4-migration.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/docs/user_guide/v4-migration.md b/docs/user_guide/v4-migration.md
index 7e33be83c..655f1e9c1 100644
--- a/docs/user_guide/v4-migration.md
+++ b/docs/user_guide/v4-migration.md
@@ -36,7 +36,10 @@ Version 4 of Parcels is unreleased at the moment. The information in this migrat
 ## ParticleFile
 
 - Particlefiles should be created by `ParticleFile(...)` instead of `pset.ParticleFile(...)`
-- The `name` argument in `ParticleFile` has been replaced by `store` and can now be a string, a Path or a zarr store.
+- `ParticleFile` output is now in Parquet format
+- `ParticleFile` writing behaviour now errors out if there's existing output (this be being further discussed in https://github.com/Parcels-code/Parcels/issues/2593 )
+- A utility to read in ParticleFile output is now available. `parcels.read_particlefile()`
+
 
 ## Field
 

From 5be22aa1adf4fb7386dd101bb5708fcddd1fbe94 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 24 Apr 2026 09:15:56 +0000
Subject: [PATCH 31/34] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 docs/user_guide/v4-migration.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/user_guide/v4-migration.md b/docs/user_guide/v4-migration.md
index 655f1e9c1..0e06bbc7b 100644
--- a/docs/user_guide/v4-migration.md
+++ b/docs/user_guide/v4-migration.md
@@ -40,7 +40,6 @@ Version 4 of Parcels is unreleased at the moment. The information in this migrat
 - `ParticleFile` writing behaviour now errors out if there's existing output (this be being further discussed in https://github.com/Parcels-code/Parcels/issues/2593 )
 - A utility to read in ParticleFile output is now available. `parcels.read_particlefile()`
 
-
 ## Field
 
 - `Field.eval()` returns an array of floats instead of a single float (related to the vectorization)

From 002b8f2a97e73a7baa059b01cabdc6dd719e8a9a Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Fri, 24 Apr 2026 11:18:52 +0200
Subject: [PATCH 32/34] Revert from extra_metadata to metadata

---
 src/parcels/_core/particlefile.py | 8 +++-----
 src/parcels/_core/particleset.py  | 2 +-
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/parcels/_core/particlefile.py b/src/parcels/_core/particlefile.py
index 58483572d..cb0bf4050 100644
--- a/src/parcels/_core/particlefile.py
+++ b/src/parcels/_core/particlefile.py
@@ -91,13 +91,13 @@ def __init__(self, path: PathLike, outputdt):
         if not path.parent.exists():
             raise ValueError(f"Folder location for {path=!r} does not exist. Create the folder location first.")
 
-        self.extra_metadata = {}
+        self.metadata = {}
 
     def __repr__(self) -> str:
         return particlefile_repr(self)
 
     def set_metadata(self, parcels_grid_mesh: Literal["spherical", "flat"]):
-        self.extra_metadata.update(
+        self.metadata.update(
             {
                 "feature_type": "trajectory",
                 "Conventions": "CF-1.6/CF-1.7",
@@ -132,9 +132,7 @@ def write(self, pset: ParticleSet, time, indices=None):
 
         if self._writer is None:
             assert not self.path.exists(), "If the file exists, the writer should already be set"
-            self._writer = pq.ParquetWriter(
-                self.path, _get_schema(pclass, self.extra_metadata, pset.fieldset.time_interval)
-            )
+            self._writer = pq.ParquetWriter(self.path, _get_schema(pclass, self.metadata, pset.fieldset.time_interval))
 
         if isinstance(time, (np.timedelta64, np.datetime64)):
             time = timedelta_to_float(time - time_interval.left)
diff --git a/src/parcels/_core/particleset.py b/src/parcels/_core/particleset.py
index e59e2860b..b25c269a6 100644
--- a/src/parcels/_core/particleset.py
+++ b/src/parcels/_core/particleset.py
@@ -394,7 +394,7 @@ def execute(
 
         if output_file is not None:
             output_file.set_metadata(self.fieldset.gridset[0]._mesh)
-            output_file.extra_metadata["parcels_kernels"] = self._kernel.funcname
+            output_file.metadata["parcels_kernels"] = self._kernel.funcname
 
         dt, sign_dt = _convert_dt_to_float(dt)
         self._data["dt"][:] = dt

From 3c5264798044135b7f603a0a1b8a1ff529fc7f7d Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Fri, 24 Apr 2026 11:28:20 +0200
Subject: [PATCH 33/34] Fix test_pfile_array_remove_particles

---
 tests/test_particlefile.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/tests/test_particlefile.py b/tests/test_particlefile.py
index d53ff6aca..d782fb171 100755
--- a/tests/test_particlefile.py
+++ b/tests/test_particlefile.py
@@ -1,4 +1,3 @@
-import os
 import tempfile
 from contextlib import nullcontext as does_not_raise
 from datetime import datetime, timedelta
@@ -74,7 +73,6 @@ def test_write_fieldset_without_time(tmp_parquet):
     assert table["time"].to_numpy()[1] == 1.0
 
 
-@pytest.mark.skip("Keep or remove? Introduced in 5d7dd6bba800baa0fe4bd38edfc17ca3e310062b ")
 def test_pfile_array_remove_particles(fieldset, tmp_parquet):
     """If a particle from the middle of a particleset is removed, that writing doesn't crash"""
     npart = 10
@@ -92,9 +90,7 @@ def test_pfile_array_remove_particles(fieldset, tmp_parquet):
     new_time = 86400  # s in a day
     pset._data["time"][:] = new_time
     pfile.write(pset, new_time)
-    ds = xr.open_zarr(tmp_parquet)
-    timearr = ds["time"][:]
-    assert (np.isnat(timearr[3, 1])) and (np.isfinite(timearr[3, 0]))
+    pfile.close()
 
 
 def test_pfile_array_remove_all_particles(fieldset, tmp_parquet):
@@ -189,8 +185,6 @@ def IncrLon(particles, fieldset):  # pragma: no cover
     # test whether samplevar[:, k] = k
     for k in range(samplevar.shape[1]):
         assert np.allclose([p for p in samplevar[:, k] if np.isfinite(p)], k + 1)
-    filesize = os.path.getsize(str(tmp_parquet))
-    assert filesize < 1024 * 65  # test that chunking leads to filesize less than 65KB
 
 
 def test_file_warnings(fieldset, tmp_parquet):

From ae375b372a7d332a67dbbfe132174c022a4a973a Mon Sep 17 00:00:00 2001
From: Vecko <36369090+VeckoTheGecko@users.noreply.github.com>
Date: Fri, 24 Apr 2026 13:44:24 +0200
Subject: [PATCH 34/34] Fix numpy warning

https://github.com/Parcels-code/Parcels/pull/2583#discussion_r3131150923
---
 src/parcels/_core/particlefile.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/parcels/_core/particlefile.py b/src/parcels/_core/particlefile.py
index cb0bf4050..90384f356 100644
--- a/src/parcels/_core/particlefile.py
+++ b/src/parcels/_core/particlefile.py
@@ -164,15 +164,17 @@ def _to_write_particles(particle_data, time):
                 time - np.abs(particle_data["dt"] / 2),
                 particle_data["time"],
                 where=np.isfinite(particle_data["time"]),
+                out=None,
             )
             & np.greater_equal(
                 time + np.abs(particle_data["dt"] / 2),
                 particle_data["time"],
                 where=np.isfinite(particle_data["time"]),
+                out=None,
             )  # check time - dt/2 <= particle_data["time"] <= time + dt/2
             | (
                 (np.isnan(particle_data["dt"]))
-                & np.equal(time, particle_data["time"], where=np.isfinite(particle_data["time"]))
+                & np.equal(time, particle_data["time"], where=np.isfinite(particle_data["time"]), out=None)
             )  # or dt is NaN and time matches particle_data["time"]
         )
         & (np.isfinite(particle_data["trajectory"]))