Skip to content

Commit 1a4a309

Browse files
committed
consolidate
1 parent d047c34 commit 1a4a309

2 files changed

Lines changed: 132 additions & 126 deletions

File tree

modflow_devtools/models/__init__.py

Lines changed: 78 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
import hashlib
66
import importlib.resources as pkg_resources
7-
from abc import ABC, abstractmethod
87
from collections.abc import Callable
98
from functools import partial
109
from os import PathLike
@@ -19,9 +18,11 @@
1918
from boltons.iterutils import remap
2019
from filelock import FileLock
2120
from pooch import Pooch
21+
from pydantic import Field
2222

2323
import modflow_devtools
2424
from modflow_devtools.misc import drop_none_or_empty, get_model_paths
25+
from modflow_devtools.models.schema import FileEntry, Registry, RegistryMetadata
2526

2627
# Best-effort sync flag (to avoid multiple sync attempts)
2728
_SYNC_ATTEMPTED = False
@@ -47,43 +48,7 @@ def _sha256(path: Path) -> str:
4748
return h.hexdigest()
4849

4950

50-
class ModelRegistry(ABC):
51-
@property
52-
@abstractmethod
53-
def files(self) -> dict:
54-
"""
55-
A map of file name to file-scoped information. Note that
56-
this map contains no information on which files belong to
57-
which model; that info is in the `models` dictionary.
58-
"""
59-
...
60-
61-
@property
62-
@abstractmethod
63-
def models(self) -> dict:
64-
"""
65-
A map of model name to the model's input files.
66-
"""
67-
...
68-
69-
@property
70-
@abstractmethod
71-
def examples(self) -> dict:
72-
"""
73-
A map of example name to model names in the example.
74-
An *example* is an ordered set of models/simulations.
75-
"""
76-
...
77-
78-
@abstractmethod
79-
def copy_to(
80-
self, workspace: str | PathLike, model_name: str, verbose: bool = False
81-
) -> Path | None:
82-
"""Copy a model's input files to the given workspace."""
83-
...
84-
85-
86-
class LocalRegistry(ModelRegistry):
51+
class LocalRegistry(Registry):
8752
"""
8853
A registry of models in one or more local directories.
8954
@@ -95,11 +60,14 @@ class LocalRegistry(ModelRegistry):
9560

9661
exclude: ClassVar = [".DS_Store", "compare"]
9762

63+
#Non-Pydantic instance variable for tracking indexed paths
64+
_paths: set[Path]
65+
9866
def __init__(self) -> None:
99-
self._paths: set[Path] = set()
100-
self._files: dict[str, Path] = {}
101-
self._models: dict[str, list[Path]] = {}
102-
self._examples: dict[str, list[str]] = {}
67+
# Initialize Pydantic parent with empty data (no meta for local registries)
68+
super().__init__(meta=None, files={}, models={}, examples={})
69+
# Initialize non-Pydantic tracking variable
70+
self._paths = set()
10371

10472
def index(
10573
self,
@@ -145,19 +113,20 @@ def index(
145113
else list(rel_path.parts)
146114
)
147115
model_name = "/".join(parts)
148-
self._models[model_name] = []
116+
self.models[model_name] = []
149117
if len(rel_path.parts) > 1:
150118
name = rel_path.parts[0]
151-
if name not in self._examples:
152-
self._examples[name] = []
153-
self._examples[name].append(model_name)
119+
if name not in self.examples:
120+
self.examples[name] = []
121+
self.examples[name].append(model_name)
154122
for p in model_path.rglob("*"):
155123
if not p.is_file() or any(e in p.name for e in LocalRegistry.exclude):
156124
continue
157125
relpath = p.expanduser().absolute().relative_to(path)
158126
name = "/".join(relpath.parts)
159-
self._files[name] = p
160-
self._models[model_name].append(p)
127+
# Create FileEntry with local path
128+
self.files[name] = FileEntry(path=p, url=None, hash=None)
129+
self.models[model_name].append(name)
161130

162131
def copy_to(
163132
self, workspace: str | PathLike, model_name: str, verbose: bool = False
@@ -167,8 +136,12 @@ def copy_to(
167136
The workspace will be created if it does not exist.
168137
"""
169138

170-
if not any(file_paths := self.models.get(model_name, [])):
139+
if not any(file_names := self.models.get(model_name, [])):
171140
return None
141+
142+
# Get actual file paths from FileEntry objects
143+
file_paths = [self.files[name].path for name in file_names]
144+
172145
# create the workspace if needed
173146
workspace = Path(workspace).expanduser().absolute()
174147
if verbose:
@@ -177,7 +150,7 @@ def copy_to(
177150
# copy the files. some might be in nested folders,
178151
# but the first is guaranteed not to be, so use it
179152
# to determine relative path in the new workspace.
180-
base = Path(file_paths[0]).parent
153+
base = file_paths[0].parent
181154
for file_path in file_paths:
182155
if verbose:
183156
print(f"Copying {file_path} to workspace")
@@ -188,22 +161,11 @@ def copy_to(
188161

189162
@property
190163
def paths(self) -> set[Path]:
164+
"""Set of paths that have been indexed."""
191165
return self._paths
192166

193-
@property
194-
def files(self) -> dict:
195-
return self._files
196-
197-
@property
198-
def models(self) -> dict:
199-
return self._models
200-
201-
@property
202-
def examples(self) -> dict:
203-
return self._examples
204-
205167

206-
class PoochRegistry(ModelRegistry):
168+
class PoochRegistry(Registry):
207169
"""
208170
A registry of models living in one or more GitHub repositories, accessible via
209171
URLs. The registry uses Pooch to fetch models from the remote(s) where needed.
@@ -224,13 +186,27 @@ class PoochRegistry(ModelRegistry):
224186
models_file_name: ClassVar = "models.toml"
225187
examples_file_name: ClassVar = "examples.toml"
226188

189+
# Non-Pydantic instance variables
190+
_registry_path: Path
191+
_registry_file_path: Path
192+
_models_file_path: Path
193+
_examples_file_path: Path
194+
_path: Path
195+
_pooch: Pooch
196+
_fetchers: dict
197+
_urls: dict
198+
227199
def __init__(
228200
self,
229201
path: str | PathLike | None = None,
230202
base_url: str | None = None,
231203
env: str | None = None,
232204
retries: int = 3,
233205
):
206+
# Initialize Pydantic parent with empty data (will be populated by _load())
207+
super().__init__(meta=None, files={}, models={}, examples={})
208+
209+
# Initialize non-Pydantic instance variables
234210
self._registry_path = Path(__file__).parent.parent / "registry"
235211
self._registry_path.mkdir(parents=True, exist_ok=True)
236212
self._registry_file_path = (
@@ -252,8 +228,8 @@ def __init__(
252228
env=env,
253229
retry_if_failed=retries,
254230
)
255-
self._fetchers: dict = {}
256-
self._urls: dict = {}
231+
self._fetchers = {}
232+
self._urls = {}
257233
self._load()
258234

259235
def _fetcher(self, model_name, file_names) -> Callable:
@@ -307,48 +283,41 @@ def _try_load_from_cache(self) -> bool:
307283
if not cached:
308284
return False
309285

310-
# Merge all cached registries
311-
all_files = {}
312-
all_models = {}
313-
all_examples = {}
314-
286+
# Merge all cached registries into Pydantic fields
315287
for source, ref in cached:
316288
registry = load_cached_registry(source, ref)
317289
if registry:
318-
# Merge files
290+
# Merge files - create FileEntry with both url and cached path
319291
for fname, file_entry in registry.files.items():
320-
all_files[fname] = {
321-
"path": self.pooch.path / fname,
322-
"hash": file_entry.hash,
323-
"url": file_entry.url,
324-
}
292+
self.files[fname] = FileEntry(
293+
url=file_entry.url,
294+
path=self.pooch.path / fname,
295+
hash=file_entry.hash,
296+
)
325297

326-
# Merge models
327-
all_models.update(registry.models)
298+
# Merge models and examples
299+
self.models.update(registry.models)
300+
self.examples.update(registry.examples)
328301

329-
# Merge examples
330-
all_examples.update(registry.examples)
302+
# Store metadata from first registry
303+
if not self.meta and registry.meta:
304+
self.meta = registry.meta
331305

332-
if not all_files:
306+
if not self.files:
333307
return False
334308

335-
# Set registry data
336-
self._files = all_files
337-
self._models = all_models
338-
self._examples = all_examples
339-
340309
# Configure Pooch
341-
self.urls = {
342-
k: v["url"] for k, v in all_files.items() if v.get("url")
310+
self._urls = {
311+
name: entry.url for name, entry in self.files.items() if entry.url
343312
}
344313
self.pooch.registry = {
345-
k: v.get("hash") for k, v in all_files.items()
314+
name: entry.hash for name, entry in self.files.items()
346315
}
347-
self.pooch.urls = self.urls
316+
self.pooch.urls = self._urls
348317

349318
# Set up fetchers
350319
self._fetchers = {}
351-
for model_name, file_list in self._models.items():
320+
for model_name, file_list in self.models.items():
352321
self._fetchers[model_name] = self._fetcher(model_name, file_list)
353322

354323
return True
@@ -376,21 +345,25 @@ def _load_from_bundled(self):
376345
PoochRegistry.anchor, PoochRegistry.registry_file_name
377346
) as registry_file:
378347
registry = tomli.load(registry_file)
379-
self._files = {
380-
k: {"path": self.pooch.path / k, "hash": v.get("hash", None)}
381-
for k, v in registry.items()
382-
}
383-
# extract urls then drop them. registry directly maps file name to hash
384-
self.urls = {
385-
k: v["url"] for k, v in registry.items() if v.get("url", None)
348+
# Create FileEntry objects for each file
349+
for fname, entry in registry.items():
350+
self.files[fname] = FileEntry(
351+
url=entry.get("url"),
352+
path=self.pooch.path / fname,
353+
hash=entry.get("hash"),
354+
)
355+
# Extract URLs and configure Pooch
356+
self._urls = {
357+
fname: entry.url
358+
for fname, entry in self.files.items()
359+
if entry.url
386360
}
387361
self.pooch.registry = {
388-
k: v.get("hash", None) for k, v in registry.items()
362+
fname: entry.hash for fname, entry in self.files.items()
389363
}
390-
self.pooch.urls = self.urls
364+
self.pooch.urls = self._urls
391365
except: # noqa: E722
392366
self._urls = {}
393-
self._files = {}
394367
self.pooch.registry = {}
395368
warn(
396369
f"No registry file '{PoochRegistry.registry_file_name}' "
@@ -401,11 +374,10 @@ def _load_from_bundled(self):
401374
with pkg_resources.open_binary(
402375
PoochRegistry.anchor, PoochRegistry.models_file_name
403376
) as models_file:
404-
self._models = tomli.load(models_file)
405-
for model_name, registry in self.models.items():
406-
self._fetchers[model_name] = self._fetcher(model_name, registry)
377+
self.models.update(tomli.load(models_file))
378+
for model_name, file_list in self.models.items():
379+
self._fetchers[model_name] = self._fetcher(model_name, file_list)
407380
except: # noqa: E722
408-
self._models = {}
409381
warn(
410382
f"No model mapping file '{PoochRegistry.models_file_name}' "
411383
f"in module '{PoochRegistry.anchor}' resources"
@@ -415,9 +387,8 @@ def _load_from_bundled(self):
415387
with pkg_resources.open_binary(
416388
PoochRegistry.anchor, PoochRegistry.examples_file_name
417389
) as examples_file:
418-
self._examples = tomli.load(examples_file)
390+
self.examples.update(tomli.load(examples_file))
419391
except: # noqa: E722
420-
self._examples = {}
421392
warn(
422393
f"No examples file '{PoochRegistry.examples_file_name}' "
423394
f"in module '{PoochRegistry.anchor}' resources"
@@ -612,18 +583,6 @@ def pooch(self) -> Pooch:
612583
def path(self) -> Path:
613584
return self.pooch.path
614585

615-
@property
616-
def files(self) -> dict:
617-
return self._files
618-
619-
@property
620-
def models(self) -> dict:
621-
return self._models
622-
623-
@property
624-
def examples(self) -> dict:
625-
return self._examples
626-
627586

628587
_DEFAULT_ENV = "MFMODELS_PATH"
629588
_DEFAULT_BASE_URL = (

0 commit comments

Comments
 (0)