44
55import hashlib
66import importlib .resources as pkg_resources
7- from abc import ABC , abstractmethod
87from collections .abc import Callable
98from functools import partial
109from os import PathLike
1918from boltons .iterutils import remap
2019from filelock import FileLock
2120from pooch import Pooch
21+ from pydantic import Field
2222
2323import modflow_devtools
2424from modflow_devtools .misc import drop_none_or_empty , get_model_paths
25+ from modflow_devtools .models .schema import FileEntry , Registry , RegistryMetadata
2526
2627# Best-effort sync flag (to avoid multiple sync attempts)
2728_SYNC_ATTEMPTED = False
@@ -47,43 +48,7 @@ def _sha256(path: Path) -> str:
4748 return h .hexdigest ()
4849
4950
50- class ModelRegistry (ABC ):
51- @property
52- @abstractmethod
53- def files (self ) -> dict :
54- """
55- A map of file name to file-scoped information. Note that
56- this map contains no information on which files belong to
57- which model; that info is in the `models` dictionary.
58- """
59- ...
60-
61- @property
62- @abstractmethod
63- def models (self ) -> dict :
64- """
65- A map of model name to the model's input files.
66- """
67- ...
68-
69- @property
70- @abstractmethod
71- def examples (self ) -> dict :
72- """
73- A map of example name to model names in the example.
74- An *example* is an ordered set of models/simulations.
75- """
76- ...
77-
78- @abstractmethod
79- def copy_to (
80- self , workspace : str | PathLike , model_name : str , verbose : bool = False
81- ) -> Path | None :
82- """Copy a model's input files to the given workspace."""
83- ...
84-
85-
86- class LocalRegistry (ModelRegistry ):
51+ class LocalRegistry (Registry ):
8752 """
8853 A registry of models in one or more local directories.
8954
@@ -95,11 +60,14 @@ class LocalRegistry(ModelRegistry):
9560
9661 exclude : ClassVar = [".DS_Store" , "compare" ]
9762
63+ #Non-Pydantic instance variable for tracking indexed paths
64+ _paths : set [Path ]
65+
9866 def __init__ (self ) -> None :
99- self . _paths : set [ Path ] = set ( )
100- self . _files : dict [ str , Path ] = {}
101- self . _models : dict [ str , list [ Path ]] = {}
102- self ._examples : dict [ str , list [ str ]] = {}
67+ # Initialize Pydantic parent with empty data (no meta for local registries )
68+ super (). __init__ ( meta = None , files = {}, models = {}, examples = {})
69+ # Initialize non-Pydantic tracking variable
70+ self ._paths = set ()
10371
10472 def index (
10573 self ,
@@ -145,19 +113,20 @@ def index(
145113 else list (rel_path .parts )
146114 )
147115 model_name = "/" .join (parts )
148- self ._models [model_name ] = []
116+ self .models [model_name ] = []
149117 if len (rel_path .parts ) > 1 :
150118 name = rel_path .parts [0 ]
151- if name not in self ._examples :
152- self ._examples [name ] = []
153- self ._examples [name ].append (model_name )
119+ if name not in self .examples :
120+ self .examples [name ] = []
121+ self .examples [name ].append (model_name )
154122 for p in model_path .rglob ("*" ):
155123 if not p .is_file () or any (e in p .name for e in LocalRegistry .exclude ):
156124 continue
157125 relpath = p .expanduser ().absolute ().relative_to (path )
158126 name = "/" .join (relpath .parts )
159- self ._files [name ] = p
160- self ._models [model_name ].append (p )
127+ # Create FileEntry with local path
128+ self .files [name ] = FileEntry (path = p , url = None , hash = None )
129+ self .models [model_name ].append (name )
161130
162131 def copy_to (
163132 self , workspace : str | PathLike , model_name : str , verbose : bool = False
@@ -167,8 +136,12 @@ def copy_to(
167136 The workspace will be created if it does not exist.
168137 """
169138
170- if not any (file_paths := self .models .get (model_name , [])):
139+ if not any (file_names := self .models .get (model_name , [])):
171140 return None
141+
142+ # Get actual file paths from FileEntry objects
143+ file_paths = [self .files [name ].path for name in file_names ]
144+
172145 # create the workspace if needed
173146 workspace = Path (workspace ).expanduser ().absolute ()
174147 if verbose :
@@ -177,7 +150,7 @@ def copy_to(
177150 # copy the files. some might be in nested folders,
178151 # but the first is guaranteed not to be, so use it
179152 # to determine relative path in the new workspace.
180- base = Path ( file_paths [0 ]) .parent
153+ base = file_paths [0 ].parent
181154 for file_path in file_paths :
182155 if verbose :
183156 print (f"Copying { file_path } to workspace" )
@@ -188,22 +161,11 @@ def copy_to(
188161
189162 @property
190163 def paths (self ) -> set [Path ]:
164+ """Set of paths that have been indexed."""
191165 return self ._paths
192166
193- @property
194- def files (self ) -> dict :
195- return self ._files
196-
197- @property
198- def models (self ) -> dict :
199- return self ._models
200-
201- @property
202- def examples (self ) -> dict :
203- return self ._examples
204-
205167
206- class PoochRegistry (ModelRegistry ):
168+ class PoochRegistry (Registry ):
207169 """
208170 A registry of models living in one or more GitHub repositories, accessible via
209171 URLs. The registry uses Pooch to fetch models from the remote(s) where needed.
@@ -224,13 +186,27 @@ class PoochRegistry(ModelRegistry):
224186 models_file_name : ClassVar = "models.toml"
225187 examples_file_name : ClassVar = "examples.toml"
226188
189+ # Non-Pydantic instance variables
190+ _registry_path : Path
191+ _registry_file_path : Path
192+ _models_file_path : Path
193+ _examples_file_path : Path
194+ _path : Path
195+ _pooch : Pooch
196+ _fetchers : dict
197+ _urls : dict
198+
227199 def __init__ (
228200 self ,
229201 path : str | PathLike | None = None ,
230202 base_url : str | None = None ,
231203 env : str | None = None ,
232204 retries : int = 3 ,
233205 ):
206+ # Initialize Pydantic parent with empty data (will be populated by _load())
207+ super ().__init__ (meta = None , files = {}, models = {}, examples = {})
208+
209+ # Initialize non-Pydantic instance variables
234210 self ._registry_path = Path (__file__ ).parent .parent / "registry"
235211 self ._registry_path .mkdir (parents = True , exist_ok = True )
236212 self ._registry_file_path = (
@@ -252,8 +228,8 @@ def __init__(
252228 env = env ,
253229 retry_if_failed = retries ,
254230 )
255- self ._fetchers : dict = {}
256- self ._urls : dict = {}
231+ self ._fetchers = {}
232+ self ._urls = {}
257233 self ._load ()
258234
259235 def _fetcher (self , model_name , file_names ) -> Callable :
@@ -307,48 +283,41 @@ def _try_load_from_cache(self) -> bool:
307283 if not cached :
308284 return False
309285
310- # Merge all cached registries
311- all_files = {}
312- all_models = {}
313- all_examples = {}
314-
286+ # Merge all cached registries into Pydantic fields
315287 for source , ref in cached :
316288 registry = load_cached_registry (source , ref )
317289 if registry :
318- # Merge files
290+ # Merge files - create FileEntry with both url and cached path
319291 for fname , file_entry in registry .files .items ():
320- all_files [fname ] = {
321- "path" : self . pooch . path / fname ,
322- "hash" : file_entry . hash ,
323- "url" : file_entry .url ,
324- }
292+ self . files [fname ] = FileEntry (
293+ url = file_entry . url ,
294+ path = self . pooch . path / fname ,
295+ hash = file_entry .hash ,
296+ )
325297
326- # Merge models
327- all_models .update (registry .models )
298+ # Merge models and examples
299+ self .models .update (registry .models )
300+ self .examples .update (registry .examples )
328301
329- # Merge examples
330- all_examples .update (registry .examples )
302+ # Store metadata from first registry
303+ if not self .meta and registry .meta :
304+ self .meta = registry .meta
331305
332- if not all_files :
306+ if not self . files :
333307 return False
334308
335- # Set registry data
336- self ._files = all_files
337- self ._models = all_models
338- self ._examples = all_examples
339-
340309 # Configure Pooch
341- self .urls = {
342- k : v [ " url" ] for k , v in all_files . items () if v . get ( " url" )
310+ self ._urls = {
311+ name : entry . url for name , entry in self . files . items () if entry . url
343312 }
344313 self .pooch .registry = {
345- k : v . get ( " hash" ) for k , v in all_files .items ()
314+ name : entry . hash for name , entry in self . files .items ()
346315 }
347- self .pooch .urls = self .urls
316+ self .pooch .urls = self ._urls
348317
349318 # Set up fetchers
350319 self ._fetchers = {}
351- for model_name , file_list in self ._models .items ():
320+ for model_name , file_list in self .models .items ():
352321 self ._fetchers [model_name ] = self ._fetcher (model_name , file_list )
353322
354323 return True
@@ -376,21 +345,25 @@ def _load_from_bundled(self):
376345 PoochRegistry .anchor , PoochRegistry .registry_file_name
377346 ) as registry_file :
378347 registry = tomli .load (registry_file )
379- self ._files = {
380- k : {"path" : self .pooch .path / k , "hash" : v .get ("hash" , None )}
381- for k , v in registry .items ()
382- }
383- # extract urls then drop them. registry directly maps file name to hash
384- self .urls = {
385- k : v ["url" ] for k , v in registry .items () if v .get ("url" , None )
348+ # Create FileEntry objects for each file
349+ for fname , entry in registry .items ():
350+ self .files [fname ] = FileEntry (
351+ url = entry .get ("url" ),
352+ path = self .pooch .path / fname ,
353+ hash = entry .get ("hash" ),
354+ )
355+ # Extract URLs and configure Pooch
356+ self ._urls = {
357+ fname : entry .url
358+ for fname , entry in self .files .items ()
359+ if entry .url
386360 }
387361 self .pooch .registry = {
388- k : v . get ( " hash" , None ) for k , v in registry .items ()
362+ fname : entry . hash for fname , entry in self . files .items ()
389363 }
390- self .pooch .urls = self .urls
364+ self .pooch .urls = self ._urls
391365 except : # noqa: E722
392366 self ._urls = {}
393- self ._files = {}
394367 self .pooch .registry = {}
395368 warn (
396369 f"No registry file '{ PoochRegistry .registry_file_name } ' "
@@ -401,11 +374,10 @@ def _load_from_bundled(self):
401374 with pkg_resources .open_binary (
402375 PoochRegistry .anchor , PoochRegistry .models_file_name
403376 ) as models_file :
404- self ._models = tomli .load (models_file )
405- for model_name , registry in self .models .items ():
406- self ._fetchers [model_name ] = self ._fetcher (model_name , registry )
377+ self .models . update ( tomli .load (models_file ) )
378+ for model_name , file_list in self .models .items ():
379+ self ._fetchers [model_name ] = self ._fetcher (model_name , file_list )
407380 except : # noqa: E722
408- self ._models = {}
409381 warn (
410382 f"No model mapping file '{ PoochRegistry .models_file_name } ' "
411383 f"in module '{ PoochRegistry .anchor } ' resources"
@@ -415,9 +387,8 @@ def _load_from_bundled(self):
415387 with pkg_resources .open_binary (
416388 PoochRegistry .anchor , PoochRegistry .examples_file_name
417389 ) as examples_file :
418- self ._examples = tomli .load (examples_file )
390+ self .examples . update ( tomli .load (examples_file ) )
419391 except : # noqa: E722
420- self ._examples = {}
421392 warn (
422393 f"No examples file '{ PoochRegistry .examples_file_name } ' "
423394 f"in module '{ PoochRegistry .anchor } ' resources"
@@ -612,18 +583,6 @@ def pooch(self) -> Pooch:
612583 def path (self ) -> Path :
613584 return self .pooch .path
614585
615- @property
616- def files (self ) -> dict :
617- return self ._files
618-
619- @property
620- def models (self ) -> dict :
621- return self ._models
622-
623- @property
624- def examples (self ) -> dict :
625- return self ._examples
626-
627586
628587_DEFAULT_ENV = "MFMODELS_PATH"
629588_DEFAULT_BASE_URL = (
0 commit comments