Skip to content

Commit 60e2edc

Browse files
committed
refactor(models): introduce model registry class
1 parent 7116292 commit 60e2edc

5 files changed

Lines changed: 492 additions & 252 deletions

File tree

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,25 +9,25 @@
99

1010
TAKE = 5 if is_in_ci() else None
1111
PROJ_ROOT = Path(__file__).parents[1]
12-
MODELMAP_PATH = PROJ_ROOT / "modflow_devtools" / "registry" / models.MODELMAP_FILE_NAME
13-
with MODELMAP_PATH.open("rb") as f:
14-
MODELMAP = tomli.load(f)
12+
MODELS_PATH = PROJ_ROOT / "modflow_devtools" / "registry" / "models.toml"
13+
MODELS = tomli.load(MODELS_PATH.open("rb"))
14+
REGISTRY = models._DEFAULT_REGISTRY
1515

1616

17-
def test_registry():
18-
registry = models.get_registry()
19-
assert registry is not None, "Registry was not loaded"
20-
assert registry is models.POOCH.registry
21-
assert any(registry), "Registry is empty"
17+
def test_files():
18+
files = models.get_files()
19+
assert files is not None, "Files not loaded"
20+
assert files is REGISTRY.files
21+
assert any(files), "Registry is empty"
2222

2323

2424
@pytest.mark.parametrize(
25-
"model_name, files", MODELMAP.items(), ids=list(MODELMAP.keys())
25+
"model_name, files", MODELS.items(), ids=list(MODELS.keys())
2626
)
2727
def test_models(model_name, files):
2828
model_names = list(models.get_models().keys())
2929
assert model_name in model_names, f"Model {model_name} not found in model map"
30-
assert files == models.MODELMAP[model_name], (
30+
assert files == REGISTRY.models[model_name], (
3131
f"Files for model {model_name} do not match"
3232
)
3333
if "mf6" in model_name:
@@ -39,16 +39,16 @@ def test_models(model_name, files):
3939
models.get_examples().items(),
4040
ids=list(models.get_examples().keys()),
4141
)
42-
def test_get_examples(example_name, model_names):
43-
assert example_name in models.EXAMPLES
42+
def test_examples(example_name, model_names):
43+
assert example_name in models.get_examples()
4444
for model_name in model_names:
45-
assert model_name in models.MODELMAP
45+
assert model_name in REGISTRY.models
4646

4747

4848
@pytest.mark.parametrize(
4949
"model_name, files",
50-
list(islice(MODELMAP.items(), TAKE)),
51-
ids=list(MODELMAP.keys())[:TAKE],
50+
list(islice(MODELS.items(), TAKE)),
51+
ids=list(MODELS.keys())[:TAKE],
5252
)
5353
def test_copy_to(model_name, files, tmp_path):
5454
workspace = models.copy_to(tmp_path, model_name, verbose=True)

docs/md/models.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# Models API
22

3-
The `modflow_devtools.models` module provides programmatic access to MODFLOW 6 example models via [Pooch](https://www.fatiando.org/pooch/latest/index.html).
3+
The `modflow_devtools.models` module provides programmatic access to MODFLOW 6 example models via a `Models` registry class. There is one "official" registry, aimed at users and developers — developers may create additional instances e.g. to load models from the local filesystem.
4+
5+
This module leans heavily on [Pooch](https://www.fatiando.org/pooch/latest/index.html), but it has strong opinions about how to train (configure) it.
46

57
## Listing models
68

@@ -51,7 +53,7 @@ with TemporaryDirectory() as td:
5153

5254
If the target directory doesn't exist, it will be created.
5355

54-
## Developers
56+
## Creating a registry
5557

5658
The `make_registry.py` script is responsible for generating a registry text file and a mapping between files and models. This script should be run in the CI pipeline at release time before the package is built. The generated registry file and model mapping are used to create a pooch instance for fetching model files, and should be distributed with the package.
5759

modflow_devtools/make_registry.py

Lines changed: 7 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -1,124 +1,6 @@
11
import argparse
2-
import hashlib
3-
from os import PathLike
4-
from pathlib import Path
5-
6-
import tomli_w as tomli
7-
from boltons.iterutils import remap
8-
9-
from modflow_devtools.misc import get_model_paths
10-
from modflow_devtools.models import BASE_URL
11-
12-
REGISTRY_DIR = Path(__file__).parent / "registry"
13-
REGISTRY_PATH = REGISTRY_DIR / "registry.toml"
14-
MODELMAP_PATH = REGISTRY_DIR / "models.toml"
15-
EXAMPLES_PATH = REGISTRY_DIR / "examples.toml"
16-
17-
18-
def _sha256(path: Path) -> str:
19-
"""
20-
Compute the SHA256 hash of the given file.
21-
Reference: https://stackoverflow.com/a/44873382/6514033
22-
"""
23-
h = hashlib.sha256()
24-
b = bytearray(128 * 1024)
25-
mv = memoryview(b)
26-
with path.open("rb", buffering=0) as f:
27-
for n in iter(lambda: f.readinto(mv), 0):
28-
h.update(mv[:n])
29-
return h.hexdigest()
30-
31-
32-
def write_registry(
33-
path: str | PathLike,
34-
url: str,
35-
prefix: str = "",
36-
append: bool = False,
37-
namefile: str = "mfsim.nam",
38-
):
39-
"""
40-
Make registry files for a directory of models.
41-
42-
The directory may contain model subdirectories
43-
at arbitrary depth. Model input subdirectories
44-
are identified by the presence of a namefile
45-
matching the provided pattern. A prefix may be
46-
specified for model names to avoid collisions.
47-
The registry files are written to the registry
48-
folder alongside this script. Typically, this
49-
function will run once or more in append mode
50-
to iteratively create a registry.
51-
52-
Parameters
53-
----------
54-
path : str | PathLike
55-
Path to the directory containing the models.
56-
url : str
57-
Base URL for the models.
58-
prefix : str
59-
Prefix to add to model names.
60-
append : bool
61-
Append to the registry files instead of overwriting them.
62-
namefile : str
63-
Namefile pattern to look for in the model directories.
64-
"""
65-
path = Path(path).expanduser().absolute()
66-
if not path.is_dir():
67-
raise NotADirectoryError(f"Path {path} is not a directory.")
68-
69-
registry: dict[str, dict[str, str | None]] = {}
70-
modelmap: dict[str, list[str]] = {}
71-
examples: dict[str, list[str]] = {}
72-
exclude = [".DS_Store", "compare"]
73-
if is_zip := url.endswith((".zip", ".tar")):
74-
registry[url.rpartition("/")[2]] = {"hash": None, "url": url}
75-
76-
model_paths = get_model_paths(path, namefile=namefile)
77-
for model_path in model_paths:
78-
model_path = model_path.expanduser().absolute()
79-
rel_path = model_path.relative_to(path)
80-
parts = [prefix, *list(rel_path.parts)] if prefix else list(rel_path.parts)
81-
model_name = "/".join(parts)
82-
modelmap[model_name] = []
83-
if is_zip:
84-
name = rel_path.parts[0]
85-
if name not in examples:
86-
examples[name] = []
87-
examples[name].append(model_name)
88-
for p in model_path.rglob("*"):
89-
if not p.is_file() or any(e in p.name for e in exclude):
90-
continue
91-
if is_zip:
92-
relpath = p.expanduser().absolute().relative_to(path)
93-
name = "/".join(relpath.parts)
94-
url_ = url
95-
hash = None
96-
else:
97-
relpath = p.expanduser().absolute().relative_to(path)
98-
name = "/".join(relpath.parts)
99-
url_ = f"{url}/{relpath!s}"
100-
hash = _sha256(p)
101-
registry[name] = {"hash": hash, "url": url_}
102-
modelmap[model_name].append(name)
103-
104-
def drop_none_or_empty(path, key, value):
105-
if value is None or value == "":
106-
return False
107-
return True
108-
109-
REGISTRY_DIR.mkdir(parents=True, exist_ok=True)
110-
with REGISTRY_PATH.open("ab+" if append else "wb") as registry_file:
111-
tomli.dump(
112-
remap(dict(sorted(registry.items())), visit=drop_none_or_empty),
113-
registry_file,
114-
)
115-
116-
with MODELMAP_PATH.open("ab+" if append else "wb") as modelmap_file:
117-
tomli.dump(dict(sorted(modelmap.items())), modelmap_file)
118-
119-
with EXAMPLES_PATH.open("ab+" if append else "wb") as examples_file:
120-
tomli.dump(dict(sorted(examples.items())), examples_file)
1212

3+
import modflow_devtools.models as models
1224

1235
if __name__ == "__main__":
1246
parser = argparse.ArgumentParser(description="Make a registry of models.")
@@ -137,7 +19,7 @@ def drop_none_or_empty(path, key, value):
13719
"-u",
13820
type=str,
13921
help="Base URL for models.",
140-
default=BASE_URL,
22+
default=models._DEFAULT_BASE_URL,
14123
)
14224
parser.add_argument(
14325
"--namefile",
@@ -147,10 +29,13 @@ def drop_none_or_empty(path, key, value):
14729
default="mfsim.nam",
14830
)
14931
args = parser.parse_args()
150-
write_registry(
32+
if not args.append:
33+
models._DEFAULT_REGISTRY = models.PoochRegistry(
34+
base_url=args.url, env=models._DEFAULT_ENV
35+
)
36+
models._DEFAULT_REGISTRY.index(
15137
path=args.path,
15238
url=args.url,
15339
prefix=args.prefix,
154-
append=args.append,
15540
namefile=args.namefile,
15641
)

0 commit comments

Comments
 (0)