Skip to content

Commit 439ea29

Browse files
Base implementation of Parquet file writing (#2583)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent eb0ab4b commit 439ea29

20 files changed

Lines changed: 389 additions & 422 deletions

.github/ci/recipe.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ requirements:
3636
- numpy >=2.1.0
3737
- tqdm >=4.50.0
3838
- xarray >=2025.8.0,<2026.4.0 # TODO: remove upper pin when https://github.com/UXARRAY/uxarray/issues/1490 is resolved
39+
- pandas >=2.2
40+
- pyarrow >=20.0.0
3941
- cf_xarray >=0.8.6
4042
- xgcm >=0.9.0
4143
- zarr >=2.15.0,!=2.18.0,<3

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ out-*
1414
*.pyc
1515
**/*.zarr/*
1616
.DS_Store
17+
*.parquet
1718

1819
.vscode
1920
.env

docs/user_guide/v4-migration.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,9 @@ Version 4 of Parcels is unreleased at the moment. The information in this migrat
3636
## ParticleFile
3737

3838
- Particlefiles should be created by `ParticleFile(...)` instead of `pset.ParticleFile(...)`
39-
- The `name` argument in `ParticleFile` has been replaced by `store` and can now be a string, a Path or a zarr store.
39+
- `ParticleFile` output is now in Parquet format
40+
- `ParticleFile` writing behaviour now errors out if there's existing output (this be being further discussed in https://github.com/Parcels-code/Parcels/issues/2593 )
41+
- A utility to read in ParticleFile output is now available. `parcels.read_particlefile()`
4042

4143
## Field
4244

pixi.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ netcdf4 = ">=1.6.0"
2424
numpy = ">=2.1.0"
2525
tqdm = ">=4.50.0"
2626
xarray = ">=2024.5.0,<2026.4.0" # TODO: remove upper pin when https://github.com/UXARRAY/uxarray/issues/1490 is resolved
27+
pandas = ">=2.2"
28+
pyarrow = ">=20.0.0"
2729
holoviews = ">=1.22.0" # https://github.com/prefix-dev/rattler-build/issues/2326
2830
uxarray = ">=2025.3.0"
2931
dask = ">=2024.5.1"
@@ -51,6 +53,8 @@ netcdf4 = "1.6.*"
5153
numpy = "2.1.*"
5254
tqdm = "4.50.*"
5355
xarray = "2025.8.*"
56+
pandas = "2.2.*"
57+
pyarrow = "20.0.*"
5458
uxarray = "2025.3.*"
5559
dask = "2024.6.*"
5660
zarr = "2.18.*"

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ dependencies = [
2727
"zarr >=2.15.0,!=2.18.0,<3",
2828
"tqdm >=4.50.0",
2929
"xarray >=2024.5.0,<2026.4.0", # TODO: remove upper pin when https://github.com/UXARRAY/uxarray/issues/1490 is resolved
30+
"pandas >= 2.2",
31+
"pyarrow >=20.0.0",
3032
"uxarray >=2025.3.0",
3133
"pooch >=1.8.0",
3234
"xgcm >=0.9.0",

src/parcels/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
from parcels._core.fieldset import FieldSet
1313
from parcels._core.particleset import ParticleSet
14-
from parcels._core.particlefile import ParticleFile
14+
from parcels._core.particlefile import ParticleFile, read_particlefile
1515
from parcels._core.particle import (
1616
Variable,
1717
Particle,
@@ -67,6 +67,7 @@
6767
"ParticleSetWarning",
6868
# Utilities
6969
"logger",
70+
"read_particlefile",
7071
]
7172

7273
_stdlib_warnings.warn(

src/parcels/_core/particle.py

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from parcels._compat import _attrgetter_helper
99
from parcels._core.statuscodes import StatusCode
1010
from parcels._core.utils.string import _assert_str_and_python_varname
11-
from parcels._core.utils.time import TimeInterval
1211
from parcels._reprs import particleclass_repr, variable_repr
1312

1413
__all__ = ["Particle", "ParticleClass", "Variable"]
@@ -149,7 +148,11 @@ def get_default_particle(spatial_dtype: type[np.float32] | type[np.float64]) ->
149148
Variable(
150149
"time",
151150
dtype=np.float64,
152-
attrs={"standard_name": "time", "units": "seconds", "axis": "T"},
151+
attrs={
152+
"standard_name": "time",
153+
"units": "seconds",
154+
"axis": "T",
155+
}, # "units" and "calendar" gets updated/set if working with cftime time domain
153156
),
154157
Variable(
155158
"trajectory",
@@ -160,7 +163,6 @@ def get_default_particle(spatial_dtype: type[np.float32] | type[np.float64]) ->
160163
"cf_role": "trajectory_id",
161164
},
162165
),
163-
Variable("obs_written", dtype=np.int32, initial=0, to_write=False),
164166
Variable("dt", dtype=np.float64, initial=1.0, to_write=False),
165167
Variable("state", dtype=np.int32, initial=StatusCode.Evaluate, to_write=False),
166168
]
@@ -176,7 +178,6 @@ def create_particle_data(
176178
pclass: ParticleClass,
177179
nparticles: int,
178180
ngrids: int,
179-
time_interval: TimeInterval,
180181
initial: dict[str, np.ndarray] | None = None,
181182
):
182183
if initial is None:
@@ -207,16 +208,9 @@ def create_particle_data(
207208
name_to_copy = var.initial(_attrgetter_helper)
208209
data[var.name] = data[name_to_copy].copy()
209210
else:
210-
data[var.name] = _create_array_for_variable(var, nparticles, time_interval)
211+
data[var.name] = np.full(
212+
shape=(nparticles,),
213+
fill_value=var.initial,
214+
dtype=var.dtype,
215+
)
211216
return data
212-
213-
214-
def _create_array_for_variable(variable: Variable, nparticles: int, time_interval: TimeInterval):
215-
assert not isinstance(variable.initial, operator.attrgetter), (
216-
"This function cannot handle attrgetter initial values."
217-
)
218-
return np.full(
219-
shape=(nparticles,),
220-
fill_value=variable.initial,
221-
dtype=variable.dtype,
222-
)

0 commit comments

Comments
 (0)