Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .binder/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ channels:
- conda-forge
dependencies:
- parcels
- pooch
Comment thread
VeckoTheGecko marked this conversation as resolved.
- trajan
1 change: 0 additions & 1 deletion .github/ci/min-core-deps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ dependencies:
# (see https://github.com/Unidata/netcdf4-python/issues/1090)
- netcdf4=1.6
- numpy=1.23
- platformdirs=2.5
- psutil=5.9
- pymbolic=2022.1
- pytest=7.1
Expand Down
2 changes: 1 addition & 1 deletion docs/v4/TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
List of tasks that are important to do before the release of version 4 (but can't be done now via code changes in `v4-dev`).

- [ ] Make migration guide for v3 to v4
- [ ] Just prior to release: Update conda feedstock recipe dependencies (remove cgen and compiler dependencies). Make sure that recipe is up-to-date.
- [ ] Just prior to release: Update conda feedstock recipe dependencies (remove cgen and compiler dependencies, add pooch as dependency and remove platformdirs). Make sure that recipe is up-to-date.
- [ ] Revamp the oceanparcels.org landing page, and perhaps also consider new logo/branding?
- [ ] Rerun all the tutorials so that their output is in line with new v4 print statements etc
- Documentation
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ dependencies: #! Keep in sync with [tool.pixi.dependencies] in pyproject.toml
- matplotlib-base>=2.0.2
- netcdf4>=1.1.9
- numpy>=1.9.1
- platformdirs
- psutil
- pymbolic
- scipy>=0.16.0
Expand All @@ -18,6 +17,7 @@ dependencies: #! Keep in sync with [tool.pixi.dependencies] in pyproject.toml
- dask>=2.0
- scikit-learn
- zarr>=2.11.0,!=2.18.0,<3
- pooch

# Notebooks
- trajan
Expand Down
97 changes: 63 additions & 34 deletions parcels/tools/exampledata_utils.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,33 @@
import os
from datetime import datetime, timedelta
from pathlib import Path
from urllib.request import urlretrieve

import platformdirs
import pooch
import xarray as xr

from parcels.tools._v3to4 import patch_dataset_v4_compat

__all__ = ["download_example_dataset", "get_data_home", "list_example_datasets"]

example_data_files = {
__all__ = ["download_example_dataset", "list_example_datasets"]

# When modifying existing datasets in a backwards incompatible way,
# make a new release in the repo and update the DATA_REPO_TAG to the new tag
DATA_REPO_TAG = "main"

DATA_URL = f"https://github.com/OceanParcels/parcels-data/raw/{DATA_REPO_TAG}/data"

# Keys are the dataset names. Values are the filenames in the dataset folder. Note that
# you can specify subfolders in the dataset folder putting slashes in the filename list.
# e.g.,
# "my_dataset": ["file0.nc", "folder1/file1.nc", "folder2/file2.nc"]
# my_dataset/
# ├── file0.nc
# ├── folder1/
# │ └── file1.nc
# └── folder2/
# └── file2.nc
#
# See instructions at https://github.com/OceanParcels/parcels-data for adding new datasets
Comment on lines +18 to +29
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@fluidnumerics-joe are these instructions clear? Do you think you can add your datasets now?

EXAMPLE_DATA_FILES: dict[str, list[str]] = {
"MovingEddies_data": [
"moving_eddiesP.nc",
"moving_eddiesU.nc",
Expand Down Expand Up @@ -79,24 +96,32 @@
}


example_data_url = "http://oceanparcels.org/examples-data"
def _create_pooch_registry() -> dict[str, None]:
"""Collapses the mapping of dataset names to filenames into a pooch registry.

Hashes are set to None for all files.
"""
registry: dict[str, None] = {}
for dataset, filenames in EXAMPLE_DATA_FILES.items():
for filename in filenames:
registry[f"{dataset}/{filename}"] = None
return registry

def get_data_home(data_home=None):
"""Return a path to the cache directory for example datasets.

This directory is used by :func:`load_dataset`.
POOCH_REGISTRY = _create_pooch_registry()

If the ``data_home`` argument is not provided, it will use a directory
specified by the ``PARCELS_EXAMPLE_DATA`` environment variable (if it exists)
or otherwise default to an OS-appropriate user cache location.
"""

def _get_pooch(data_home=None):
if data_home is None:
data_home = os.environ.get("PARCELS_EXAMPLE_DATA")
if data_home is None:
Comment thread
erikvansebille marked this conversation as resolved.
data_home = os.environ.get("PARCELS_EXAMPLE_DATA", platformdirs.user_cache_dir("parcels"))
data_home = os.path.expanduser(data_home)
if not os.path.exists(data_home):
os.makedirs(data_home)
return data_home
data_home = pooch.os_cache("parcels")

return pooch.create(
path=data_home,
base_url=DATA_URL,
registry=POOCH_REGISTRY,
)


def list_example_datasets() -> list[str]:
Expand All @@ -109,7 +134,7 @@
datasets : list of str
The names of the available example datasets.
"""
return list(example_data_files.keys())
return list(EXAMPLE_DATA_FILES.keys())


def download_example_dataset(dataset: str, data_home=None):
Expand All @@ -133,26 +158,30 @@
Path to the folder containing the downloaded dataset files.
"""
# Dev note: `dataset` is assumed to be a folder name with netcdf files
if dataset not in example_data_files:
if dataset not in EXAMPLE_DATA_FILES:
raise ValueError(
f"Dataset {dataset!r} not found. Available datasets are: " + ", ".join(example_data_files.keys())
f"Dataset {dataset!r} not found. Available datasets are: " + ", ".join(EXAMPLE_DATA_FILES.keys())
)
odie = _get_pooch(data_home=data_home)

cache_folder = get_data_home(data_home)
dataset_folder = Path(cache_folder) / dataset
cache_folder = Path(odie.path)
dataset_folder = cache_folder / dataset

if not dataset_folder.exists():
dataset_folder.mkdir(parents=True)
for file_name in odie.registry:
if file_name.startswith(dataset):
should_patch = dataset == "GlobCurrent_example_data"
Comment thread
erikvansebille marked this conversation as resolved.
odie.fetch(file_name, processor=v4_compat_patch if should_patch else None)

for filename in example_data_files[dataset]:
filepath = dataset_folder / filename
if not filepath.exists():
url = f"{example_data_url}/{dataset}/{filename}"
urlretrieve(url, str(filepath))
return dataset_folder

should_patch = dataset == "GlobCurrent_example_data"

if should_patch:
xr.load_dataset(filepath).pipe(patch_dataset_v4_compat).to_netcdf(filepath)
def v4_compat_patch(fname, action, pup):
"""
Patch the GlobCurrent example dataset to be compatible with v4.

return dataset_folder
See https://www.fatiando.org/pooch/latest/processors.html#creating-your-own-processors
"""
if action == "fetch":
return fname
xr.load_dataset(fname).pipe(patch_dataset_v4_compat).to_netcdf(fname)
return fname

Check warning on line 187 in parcels/tools/exampledata_utils.py

View check run for this annotation

Codecov / codecov/patch

parcels/tools/exampledata_utils.py#L184-L187

Added lines #L184 - L187 were not covered by tests
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ dependencies = [
"pytest",
"scipy",
"xarray",
"pooch",
]

[project.urls]
Expand Down Expand Up @@ -63,7 +64,6 @@ jupyter = "*"
matplotlib-base = ">=2.0.2"
netcdf4 = ">=1.1.9"
numpy = ">=1.9.1"
platformdirs = "*"
psutil = "*"
pymbolic = "*"
scipy = ">=0.16.0"
Expand Down Expand Up @@ -222,5 +222,6 @@ module = [
"cftime",
"pykdtree.kdtree",
"netCDF4",
"pooch",
]
ignore_missing_imports = true
28 changes: 7 additions & 21 deletions tests/tools/test_exampledata_utils.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,21 @@
from pathlib import Path

import pytest
import requests

from parcels.tools.exampledata_utils import (
_get_pooch,
download_example_dataset,
list_example_datasets,
)


@pytest.fixture
def mock_download(monkeypatch):
"""Avoid the download, only check the status code and create empty file."""

def mock_urlretrieve(url, filename):
response = requests.head(url)

if 400 <= response.status_code < 600:
raise Exception(f"Failed to access URL: {url}. Status code: {response.status_code}")

Path(filename).touch()

monkeypatch.setattr("parcels.tools.exampledata_utils.urlretrieve", mock_urlretrieve)

@pytest.mark.parametrize("url", [_get_pooch().get_url(filename) for filename in _get_pooch().registry.keys()])
def test_pooch_registry_url_reponse(url):
response = requests.head(url)
assert not (400 <= response.status_code < 600)

@pytest.mark.usefixtures("mock_download")
@pytest.mark.parametrize("dataset", list_example_datasets())
def test_download_example_dataset(tmp_path, dataset):
if dataset == "GlobCurrent_example_data":
pytest.skip(f"{dataset} too time consuming.")

@pytest.mark.parametrize("dataset", list_example_datasets()[:1])
Comment thread
erikvansebille marked this conversation as resolved.
def test_download_example_dataset_folder_creation(tmp_path, dataset):
dataset_folder_path = download_example_dataset(dataset, data_home=tmp_path)

assert dataset_folder_path.exists()
Expand Down
Loading