Skip to content

Commit 2370d59

Browse files
authored
refactor(models): introduce model registry classes (#210)
Introduce a ModelRegistry base class with LocalRegistry and PoochRegistry variants. Addresses an open topic from #134: One question is whether the last two use cases can be merged somehow, i.e. how to give the models API the ability to access local models. It is often necessary to modify a local model during development, and it is inconvenient to have to switch between the models API and the fixtures for this. The aim here is for an mf6 developer to be able to transparently swap in a local registry in order to access local models, where otherwise the "official" registry would be the (user- and developer-facing) default. This changeset should not break any existing usages of the preexisting model registry, fingers crossed.
1 parent 7116292 commit 2370d59

4 files changed

Lines changed: 520 additions & 254 deletions

File tree

autotest/test_models.py

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -9,25 +9,23 @@
99

1010
TAKE = 5 if is_in_ci() else None
1111
PROJ_ROOT = Path(__file__).parents[1]
12-
MODELMAP_PATH = PROJ_ROOT / "modflow_devtools" / "registry" / models.MODELMAP_FILE_NAME
13-
with MODELMAP_PATH.open("rb") as f:
14-
MODELMAP = tomli.load(f)
12+
MODELS_PATH = PROJ_ROOT / "modflow_devtools" / "registry" / "models.toml"
13+
MODELS = tomli.load(MODELS_PATH.open("rb"))
14+
REGISTRY = models.DEFAULT_REGISTRY
1515

1616

17-
def test_registry():
18-
registry = models.get_registry()
19-
assert registry is not None, "Registry was not loaded"
20-
assert registry is models.POOCH.registry
21-
assert any(registry), "Registry is empty"
17+
def test_files():
18+
files = models.get_files()
19+
assert files is not None, "Files not loaded"
20+
assert files is REGISTRY.files
21+
assert any(files), "Registry is empty"
2222

2323

24-
@pytest.mark.parametrize(
25-
"model_name, files", MODELMAP.items(), ids=list(MODELMAP.keys())
26-
)
24+
@pytest.mark.parametrize("model_name, files", MODELS.items(), ids=list(MODELS.keys()))
2725
def test_models(model_name, files):
2826
model_names = list(models.get_models().keys())
2927
assert model_name in model_names, f"Model {model_name} not found in model map"
30-
assert files == models.MODELMAP[model_name], (
28+
assert files == REGISTRY.models[model_name], (
3129
f"Files for model {model_name} do not match"
3230
)
3331
if "mf6" in model_name:
@@ -39,16 +37,16 @@ def test_models(model_name, files):
3937
models.get_examples().items(),
4038
ids=list(models.get_examples().keys()),
4139
)
42-
def test_get_examples(example_name, model_names):
43-
assert example_name in models.EXAMPLES
40+
def test_examples(example_name, model_names):
41+
assert example_name in models.get_examples()
4442
for model_name in model_names:
45-
assert model_name in models.MODELMAP
43+
assert model_name in REGISTRY.models
4644

4745

4846
@pytest.mark.parametrize(
4947
"model_name, files",
50-
list(islice(MODELMAP.items(), TAKE)),
51-
ids=list(MODELMAP.keys())[:TAKE],
48+
list(islice(MODELS.items(), TAKE)),
49+
ids=list(MODELS.keys())[:TAKE],
5250
)
5351
def test_copy_to(model_name, files, tmp_path):
5452
workspace = models.copy_to(tmp_path, model_name, verbose=True)

docs/md/models.md

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,21 @@
11
# Models API
22

3-
The `modflow_devtools.models` module provides programmatic access to MODFLOW 6 example models via [Pooch](https://www.fatiando.org/pooch/latest/index.html).
3+
The `modflow_devtools.models` module provides programmatic access to MODFLOW 6 example models via a `ModelRegistry`. There is one "official" `PoochRegistry`, aimed at users and developers — developers may create `LocalRegistry` instances to load models from the local filesystem.
4+
5+
This module leans heavily on [Pooch](https://www.fatiando.org/pooch/latest/index.html), but it is an independent layer on top with strong opinions about how to train (configure) the fetch-happy friend.
6+
7+
## `ModelRegistry`
8+
9+
Registries expose the following properties:
10+
11+
- `path`: the data path
12+
- `files`: a map of files to file info
13+
- `models`: a map of models to files
14+
- `examples`: a map of example scenarios to models
15+
16+
An *example* is a set of models which run in a particular order.
17+
18+
The default `PoochRegistry` is available at `modflow_devtools.models.DEFAULT_REGISTRY`. Its `path` is the pooch cache. Values in the `files` are dictionaries including a hash and url. Configuring the default registry is a developer task — see the instructions on [creating a registry](#creating-a-registry) below.
419

520
## Listing models
621

@@ -51,7 +66,19 @@ with TemporaryDirectory() as td:
5166

5267
If the target directory doesn't exist, it will be created.
5368

54-
## Developers
69+
## Creating a registry
70+
71+
### Local registries
72+
73+
A `LocalRegistry` accepts a `path` on initialization. This must be a directory containing model subdirectories at arbitrary depth. Model subdirectories are identified by the presence of a namefile matching `namefile_pattern`. By default `namefile_pattern="mfsim.nam"`, causing only MODFLOW 6 models to be returned.
74+
75+
For instance, to load all MODFLOW models (pre-MF6 as well):
76+
77+
```python
78+
registry = LocalRegistry("path/to/models", namefile_pattern="*.nam")
79+
```
80+
81+
### Pooch registry
5582

5683
The `make_registry.py` script is responsible for generating a registry text file and a mapping between files and models. This script should be run in the CI pipeline at release time before the package is built. The generated registry file and model mapping are used to create a pooch instance for fetching model files, and should be distributed with the package.
5784

modflow_devtools/make_registry.py

Lines changed: 7 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -1,124 +1,6 @@
11
import argparse
2-
import hashlib
3-
from os import PathLike
4-
from pathlib import Path
5-
6-
import tomli_w as tomli
7-
from boltons.iterutils import remap
8-
9-
from modflow_devtools.misc import get_model_paths
10-
from modflow_devtools.models import BASE_URL
11-
12-
REGISTRY_DIR = Path(__file__).parent / "registry"
13-
REGISTRY_PATH = REGISTRY_DIR / "registry.toml"
14-
MODELMAP_PATH = REGISTRY_DIR / "models.toml"
15-
EXAMPLES_PATH = REGISTRY_DIR / "examples.toml"
16-
17-
18-
def _sha256(path: Path) -> str:
19-
"""
20-
Compute the SHA256 hash of the given file.
21-
Reference: https://stackoverflow.com/a/44873382/6514033
22-
"""
23-
h = hashlib.sha256()
24-
b = bytearray(128 * 1024)
25-
mv = memoryview(b)
26-
with path.open("rb", buffering=0) as f:
27-
for n in iter(lambda: f.readinto(mv), 0):
28-
h.update(mv[:n])
29-
return h.hexdigest()
30-
31-
32-
def write_registry(
33-
path: str | PathLike,
34-
url: str,
35-
prefix: str = "",
36-
append: bool = False,
37-
namefile: str = "mfsim.nam",
38-
):
39-
"""
40-
Make registry files for a directory of models.
41-
42-
The directory may contain model subdirectories
43-
at arbitrary depth. Model input subdirectories
44-
are identified by the presence of a namefile
45-
matching the provided pattern. A prefix may be
46-
specified for model names to avoid collisions.
47-
The registry files are written to the registry
48-
folder alongside this script. Typically, this
49-
function will run once or more in append mode
50-
to iteratively create a registry.
51-
52-
Parameters
53-
----------
54-
path : str | PathLike
55-
Path to the directory containing the models.
56-
url : str
57-
Base URL for the models.
58-
prefix : str
59-
Prefix to add to model names.
60-
append : bool
61-
Append to the registry files instead of overwriting them.
62-
namefile : str
63-
Namefile pattern to look for in the model directories.
64-
"""
65-
path = Path(path).expanduser().absolute()
66-
if not path.is_dir():
67-
raise NotADirectoryError(f"Path {path} is not a directory.")
68-
69-
registry: dict[str, dict[str, str | None]] = {}
70-
modelmap: dict[str, list[str]] = {}
71-
examples: dict[str, list[str]] = {}
72-
exclude = [".DS_Store", "compare"]
73-
if is_zip := url.endswith((".zip", ".tar")):
74-
registry[url.rpartition("/")[2]] = {"hash": None, "url": url}
75-
76-
model_paths = get_model_paths(path, namefile=namefile)
77-
for model_path in model_paths:
78-
model_path = model_path.expanduser().absolute()
79-
rel_path = model_path.relative_to(path)
80-
parts = [prefix, *list(rel_path.parts)] if prefix else list(rel_path.parts)
81-
model_name = "/".join(parts)
82-
modelmap[model_name] = []
83-
if is_zip:
84-
name = rel_path.parts[0]
85-
if name not in examples:
86-
examples[name] = []
87-
examples[name].append(model_name)
88-
for p in model_path.rglob("*"):
89-
if not p.is_file() or any(e in p.name for e in exclude):
90-
continue
91-
if is_zip:
92-
relpath = p.expanduser().absolute().relative_to(path)
93-
name = "/".join(relpath.parts)
94-
url_ = url
95-
hash = None
96-
else:
97-
relpath = p.expanduser().absolute().relative_to(path)
98-
name = "/".join(relpath.parts)
99-
url_ = f"{url}/{relpath!s}"
100-
hash = _sha256(p)
101-
registry[name] = {"hash": hash, "url": url_}
102-
modelmap[model_name].append(name)
103-
104-
def drop_none_or_empty(path, key, value):
105-
if value is None or value == "":
106-
return False
107-
return True
108-
109-
REGISTRY_DIR.mkdir(parents=True, exist_ok=True)
110-
with REGISTRY_PATH.open("ab+" if append else "wb") as registry_file:
111-
tomli.dump(
112-
remap(dict(sorted(registry.items())), visit=drop_none_or_empty),
113-
registry_file,
114-
)
115-
116-
with MODELMAP_PATH.open("ab+" if append else "wb") as modelmap_file:
117-
tomli.dump(dict(sorted(modelmap.items())), modelmap_file)
118-
119-
with EXAMPLES_PATH.open("ab+" if append else "wb") as examples_file:
120-
tomli.dump(dict(sorted(examples.items())), examples_file)
1212

3+
import modflow_devtools.models as models
1224

1235
if __name__ == "__main__":
1246
parser = argparse.ArgumentParser(description="Make a registry of models.")
@@ -137,7 +19,7 @@ def drop_none_or_empty(path, key, value):
13719
"-u",
13820
type=str,
13921
help="Base URL for models.",
140-
default=BASE_URL,
22+
default=models._DEFAULT_BASE_URL,
14123
)
14224
parser.add_argument(
14325
"--namefile",
@@ -147,10 +29,13 @@ def drop_none_or_empty(path, key, value):
14729
default="mfsim.nam",
14830
)
14931
args = parser.parse_args()
150-
write_registry(
32+
if not args.append:
33+
models.DEFAULT_REGISTRY = models.PoochRegistry(
34+
base_url=args.url, env=models._DEFAULT_ENV
35+
)
36+
models.DEFAULT_REGISTRY.index(
15137
path=args.path,
15238
url=args.url,
15339
prefix=args.prefix,
154-
append=args.append,
15540
namefile=args.namefile,
15641
)

0 commit comments

Comments
 (0)