diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 20292bec..8831d690 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -51,10 +51,10 @@ jobs: steps: - name: Checkout repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup uv - uses: astral-sh/setup-uv@v5 + uses: astral-sh/setup-uv@v7 with: cache-dependency-glob: "**/pyproject.toml" @@ -80,9 +80,10 @@ jobs: fail-fast: false matrix: os: [ ubuntu-22.04, macos-14, windows-2022 ] - python: [ "3.10", "3.11", "3.12", "3.13" ] + python: [ "3.11", "3.12", "3.13", "3.14" ] env: GCC_V: 11 + UV_PROJECT_ENVIRONMENT: ${{ github.workspace }}/.venv steps: - name: Checkout repo @@ -97,10 +98,11 @@ jobs: path: modflow6 - name: Setup uv - uses: astral-sh/setup-uv@v5 + uses: astral-sh/setup-uv@v7 with: cache-dependency-glob: "**/pyproject.toml" python-version: ${{ matrix.python }} + working-directory: modflow-devtools - name: Setup Fortran if: runner.os != 'Windows' @@ -124,16 +126,16 @@ jobs: working-directory: modflow-devtools/autotest env: REPOS_PATH: ${{ github.workspace }} + DFNS_PATH: ${{ github.workspace }}/modflow6/doc/mf6io/mf6ivar/dfn MODFLOW_DEVTOOLS_AUTO_SYNC: 0 - TEST_DFN_PATH: ${{ github.workspace }}/modflow6/doc/mf6io/mf6ivar/dfn # use --dist loadfile to so tests requiring pytest-virtualenv run on the same worker - run: uv run pytest -v -n auto --dist loadfile --durations 0 --ignore test_download.py --ignore test_models.py --ignore test_dfns_registry.py + run: uv run pytest -v -n auto --dist loadfile --durations 0 --ignore test_download.py --ignore test_models.py --ignore dfns/test_dfns_registry.py - name: Run network-dependent tests # only invoke the GH API on one OS and Python version # to avoid rate limits (1000 rqs / hour / repository) # https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration#usage-limits - if: runner.os == 'Linux' && matrix.python == '3.10' + if: runner.os == 'Linux' && matrix.python == '3.11' working-directory: modflow-devtools/autotest env: REPOS_PATH: ${{ github.workspace }} @@ -151,7 +153,7 @@ jobs: TEST_PROGRAMS_REPO: MODFLOW-ORG/modflow6 TEST_PROGRAMS_REF: develop TEST_PROGRAMS_SOURCE: modflow6 - run: uv run pytest -v -n auto --dist loadgroup --durations 0 test_download.py test_models.py test_dfns_registry.py + run: uv run pytest -v -n auto --dist loadgroup --durations 0 test_download.py test_models.py dfns/test_dfns_registry.py rtd: name: Docs diff --git a/DEVELOPER.md b/DEVELOPER.md index b05fb98a..8d91d627 100644 --- a/DEVELOPER.md +++ b/DEVELOPER.md @@ -19,7 +19,7 @@ This document provides guidance to set up a development environment and discusse ## Requirements -Python3.10+ is currently required. This project has historically aimed to support several recent versions of Python, loosely following [NEP 29](https://numpy.org/neps/nep-0029-deprecation_policy.html#implementation). In current and future development this window may narrow to follow [SPEC 0](https://scientific-python.org/specs/spec-0000/#support-window) instead. +Python3.11+. This project has historically aimed to support several recent versions of Python, loosely following [NEP 29](https://numpy.org/neps/nep-0029-deprecation_policy.html#implementation). In current and future development this window may narrow to follow [SPEC 0](https://scientific-python.org/specs/spec-0000/#support-window) instead. ## Installation diff --git a/README.md b/README.md index 9c2dad64..15159ef5 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ Python development tools for MODFLOW 6 and related projects. ## Requirements -Python3.10+, dependency-free by default. +Python3.11+, dependency-free by default. Two main dependency groups are available, oriented around specific use cases: diff --git a/autotest/dfn/__init__.py b/autotest/dfn/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/autotest/dfn/conftest.py b/autotest/dfn/conftest.py new file mode 100644 index 00000000..b49e5d71 --- /dev/null +++ b/autotest/dfn/conftest.py @@ -0,0 +1,16 @@ +import pytest + +from modflow_devtools.dfn import fetch_dfns + +MF6_OWNER = "MODFLOW-ORG" +MF6_REPO = "modflow6" +MF6_REF = "develop" + + +@pytest.fixture(scope="module") +def dfn_dir(module_tmpdir): + pytest.importorskip("boltons") + path = module_tmpdir / "dfn" + path.mkdir() + fetch_dfns(MF6_OWNER, MF6_REPO, MF6_REF, path, verbose=True) + return path diff --git a/autotest/dfn/test_dfn.py b/autotest/dfn/test_dfn.py new file mode 100644 index 00000000..2bf363c4 --- /dev/null +++ b/autotest/dfn/test_dfn.py @@ -0,0 +1,23 @@ +from modflow_devtools.dfn import Dfn +from modflow_devtools.markers import requires_pkg + + +@requires_pkg("boltons") +def test_load_v1(dfn_dir): + common = {} + common_path = dfn_dir / "common.dfn" + if common_path.exists(): + with common_path.open() as f: + common, _ = Dfn._load_v1_flat(f) + names = [p.stem for p in dfn_dir.glob("*.dfn") if p.stem not in ("common", "flopy")] + assert names + for name in names: + with (dfn_dir / f"{name}.dfn").open() as f: + dfn = Dfn.load(f, name=name, common=common) + assert any(dfn) + + +@requires_pkg("boltons") +def test_load_all(dfn_dir): + dfns = Dfn.load_all(dfn_dir) + assert any(dfns) diff --git a/autotest/dfn/test_mapper.py b/autotest/dfn/test_mapper.py new file mode 100644 index 00000000..ccc96a8f --- /dev/null +++ b/autotest/dfn/test_mapper.py @@ -0,0 +1,91 @@ +from modflow_devtools.dfn.mapper import map as map_v1_1 +from modflow_devtools.dfn.mapper import map_field +from modflow_devtools.dfn.schema import Dfn, Field + + +def _field(**kwargs) -> Field: + """Build a complete v1 Field dict for testing.""" + base: dict = { + "name": "test_field", + "type": "keyword", + "block": "options", + "default": None, + "longname": None, + "description": None, + "optional": False, + "developmode": False, + "shape": None, + "valid": None, + "netcdf": False, + "tagged": False, + } + base.update(kwargs) + return Field(**base) + + +def _dfn(**kwargs) -> Dfn: + """Build a minimal v1 Dfn dict for testing.""" + base: dict = { + "schema_version": "1", + "name": "test-dfn", + "parent": None, + "blocks": None, + "advanced": False, + "multi": False, + } + base.update(kwargs) + return Dfn(**base) + + +def test_map_field_preserves_base_attrs(): + field = _field( + name="save_flows", + type="keyword", + description="save calculated flows", + optional=True, + tagged=True, + longname="save flows flag", + ) + result = map_field(field) + assert result["name"] == "save_flows" + assert result["type"] == "keyword" + assert result["description"] == "save calculated flows" + assert result["optional"] is True + assert result["tagged"] is True + assert result["longname"] == "save flows flag" + + +def test_map_field_strips_v1_specific_attrs(): + field = _field(in_record=True, reader="urword") + result = map_field(field) + assert "in_record" not in result + assert "reader" not in result + + +def test_map_sets_schema_version(): + dfn = _dfn() + result = map_v1_1(dfn) + assert result["schema_version"] == "1.1" + + +def test_map_preserves_metadata(): + dfn = _dfn(name="gwf-chd", parent="gwf-nam") + result = map_v1_1(dfn) + assert result["name"] == "gwf-chd" + assert result["schema_version"] == "1.1" + + +def test_map_empty_blocks(): + dfn = _dfn(blocks=None) + result = map_v1_1(dfn) + assert result["blocks"] is None + + +def test_map_maps_block_fields(): + field = _field(name="maxbound", type="integer", block="dimensions") + dfn = _dfn(blocks={"dimensions": {"maxbound": field}}) + result = map_v1_1(dfn) + assert result["blocks"] is not None + assert "maxbound" in result["blocks"]["dimensions"] + assert result["blocks"]["dimensions"]["maxbound"]["name"] == "maxbound" + assert "in_record" not in result["blocks"]["dimensions"]["maxbound"] diff --git a/autotest/dfns/__init__.py b/autotest/dfns/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/autotest/dfns/conftest.py b/autotest/dfns/conftest.py new file mode 100644 index 00000000..5f8f2900 --- /dev/null +++ b/autotest/dfns/conftest.py @@ -0,0 +1,34 @@ +import os +from pathlib import Path + +import pytest + +from modflow_devtools.dfns import fetch_dfns + +PROJ_ROOT = Path(__file__).parents[1] + +DFNS_REPO = os.getenv("TEST_DFNS_REPO", "MODFLOW-ORG/modflow6") +DFNS_REF = os.getenv("TEST_DFNS_REF", "develop") +DFNS_SOURCE = os.getenv("TEST_DFNS_SOURCE", "modflow6") +DFNS_VERSION = os.getenv("TEST_DFNS_VERSION", "6.6.0") + + +@pytest.fixture(scope="module") +def dfn_dir(module_tmpdir): + """ + Path to DFN files: $DFNS_PATH if set, otherwise fetched from develop branch + to a temp dir (for LocalDfnRegistry tests). + """ + env_var = "DFNS_PATH" + if dfns_path := os.getenv(env_var): + dfn_path = Path(dfns_path).expanduser().resolve() + assert dfn_path.exists(), f"{env_var}={dfns_path} does not exist" + assert any(dfn_path.glob("*.dfn")), f"{env_var}={dfns_path} empty" + return dfn_path + + dfns_path = module_tmpdir / "dfns" + dfns_path.mkdir() + owner = DFNS_REPO.split("/")[0] + repo = DFNS_REPO.split("/")[1] + fetch_dfns(owner, repo, DFNS_REF, dfns_path, verbose=True) + return dfns_path diff --git a/autotest/dfns/test_dfns.py b/autotest/dfns/test_dfns.py new file mode 100644 index 00000000..6a41c4a4 --- /dev/null +++ b/autotest/dfns/test_dfns.py @@ -0,0 +1,56 @@ +from modflow_devtools.dfns import Dfns +from modflow_devtools.markers import requires_pkg + + +def test_load(dfn_dir): + spec = Dfns.load(dfn_dir) + assert spec.schema_version == "2" + assert spec.root is not None + assert spec.root.name == "sim-nam" + assert len(spec.components) > 100 + assert "sim-nam" in spec.components + assert "gwf-nam" in spec.components + assert "gwf-chd" in spec.components + assert "gwf-wel" in spec.components.keys() + assert "garbage" not in spec.components + + gwf_chd = spec.components["gwf-chd"] + assert gwf_chd.name == "gwf-chd" + assert gwf_chd.parent == "gwf-nam" + + sim_children = spec.children("sim-nam") + assert "gwf-nam" in sim_children + + gwf_children = spec.children("gwf-nam") + assert "gwf-chd" in gwf_children + + +def test_load_empty_directory(function_tmpdir): + spec = Dfns.load(function_tmpdir) + assert len(spec.components) == 0 + + +# ============================================================================= +# CLI +# ============================================================================= + + +@requires_pkg("pydantic") +class TestCLI: + def test_main_help(self): + from modflow_devtools.dfns.__main__ import main + + result = main([]) + assert result == 0 + + def test_info_command(self): + from modflow_devtools.dfns.__main__ import main + + result = main(["info"]) + assert result == 0 + + def test_clean_command(self): + from modflow_devtools.dfns.__main__ import main + + result = main(["clean"]) + assert result == 0 diff --git a/autotest/dfns/test_dfns_registry.py b/autotest/dfns/test_dfns_registry.py new file mode 100644 index 00000000..d4bae0db --- /dev/null +++ b/autotest/dfns/test_dfns_registry.py @@ -0,0 +1,202 @@ +import json +from unittest.mock import MagicMock, patch + +import pytest +from flaky import flaky + +from modflow_devtools.dfns.registry import LocalDfnRegistry, RemoteDfnRegistry + + +def test_local_dfn_registry(dfn_dir): + registry = LocalDfnRegistry(path=dfn_dir) + assert registry.path == dfn_dir.resolve() + + spec = registry.spec + assert spec.schema_version == "2" + assert len(spec.components) > 100 + assert "gwf-chd" in spec.components + assert "sim-nam" in spec.components + + dfn = spec.components["gwf-chd"] + assert dfn.name == "gwf-chd" + assert dfn.parent == "gwf-nam" + + path = registry.get_path("gwf-chd") + assert path.exists() + assert path.name == "gwf-chd.dfn" + + with pytest.raises(FileNotFoundError, match="nonexistent"): + registry.get_path("nonexistent") + + +def test_remote_dfn_registry_init(): + release_id = "MODFLOW-ORG/modflow6@6.6.0" + registry = RemoteDfnRegistry(release_id=release_id) + assert registry.release_id == release_id + + cache_dir = registry.cache_path + assert "modflow6" in str(cache_dir) + assert "6.6.0" in str(cache_dir) + + +def test_latest_tag_exact_tag(): + registry = RemoteDfnRegistry(release_id="MODFLOW-ORG/modflow6@6.6.0") + assert registry.latest_tag() == "6.6.0" + assert registry._latest is None # no network call, nothing cached + + +def test_latest_tag_resolves_via_api(): + registry = RemoteDfnRegistry(release_id="MODFLOW-ORG/modflow6@latest") + + mock_response = MagicMock() + mock_response.read.return_value = json.dumps({"tag_name": "v6.6.1"}).encode() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + + with patch( + "modflow_devtools.dfns.registry.urllib.request.urlopen", + return_value=mock_response, + ) as mock_open: + tag = registry.latest_tag() + + assert tag == "v6.6.1" + assert registry._latest == "v6.6.1" + mock_open.assert_called_once_with( + "https://api.github.com/repos/MODFLOW-ORG/modflow6/releases/latest" + ) + + +def test_latest_tag_cached(): + registry = RemoteDfnRegistry(release_id="MODFLOW-ORG/modflow6@latest") + + mock_response = MagicMock() + mock_response.read.return_value = json.dumps({"tag_name": "v6.6.1"}).encode() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + + with patch( + "modflow_devtools.dfns.registry.urllib.request.urlopen", + return_value=mock_response, + ) as mock_open: + registry.latest_tag() + registry.latest_tag() + + mock_open.assert_called_once() # second call uses cached _latest + + +def test_cache_path_latest_uses_resolved_tag(): + registry = RemoteDfnRegistry(release_id="MODFLOW-ORG/modflow6@latest") + + mock_response = MagicMock() + mock_response.read.return_value = json.dumps({"tag_name": "v6.6.1"}).encode() + mock_response.__enter__ = lambda s: s + mock_response.__exit__ = MagicMock(return_value=False) + + with patch("modflow_devtools.dfns.registry.urllib.request.urlopen", return_value=mock_response): + cache_dir = registry.cache_path + + assert "latest" not in str(cache_dir) + assert "v6.6.1" in str(cache_dir) + assert "modflow6" in str(cache_dir) + + +def test_cached_tag_exact_not_cached(tmp_path): + registry = RemoteDfnRegistry(release_id="MODFLOW-ORG/modflow6@6.6.0") + with patch.object( + type(registry), + "cache_path", + new_callable=lambda: property(lambda self: tmp_path / "empty"), + ): + assert registry.cached_tag() is None + + +def test_cached_tag_exact_cached(tmp_path): + cache_dir = tmp_path / "populated" + cache_dir.mkdir() + (cache_dir / "gwf-chd.toml").write_text("name = 'gwf-chd'") + registry = RemoteDfnRegistry(release_id="MODFLOW-ORG/modflow6@6.6.0") + with patch.object( + type(registry), + "cache_path", + new_callable=lambda: property(lambda self: cache_dir), + ): + assert registry.cached_tag() == "6.6.0" + + +def test_cached_tag_latest_not_cached(tmp_path): + registry = RemoteDfnRegistry(release_id="MODFLOW-ORG/modflow6@latest") + with patch.object(RemoteDfnRegistry, "base_cache_path", return_value=tmp_path): + assert registry.cached_tag() is None + + +def test_cached_tag_latest_cached(tmp_path): + repo_cache = tmp_path / "MODFLOW-ORG" / "modflow6" + tag_dir = repo_cache / "6.7.0" + tag_dir.mkdir(parents=True) + (tag_dir / "gwf-chd.toml").write_text("name = 'gwf-chd'") + + registry = RemoteDfnRegistry(release_id="MODFLOW-ORG/modflow6@latest") + with patch.object(RemoteDfnRegistry, "base_cache_path", return_value=tmp_path): + assert registry.cached_tag() == "6.7.0" + + +def test_cmd_info_no_network(tmp_path, capsys): + from modflow_devtools.dfns.__main__ import cmd_info + + repo_cache = tmp_path / "MODFLOW-ORG" / "modflow6" + tag_dir = repo_cache / "6.7.0" + tag_dir.mkdir(parents=True) + (tag_dir / "gwf-chd.toml").write_text("name = 'gwf-chd'") + + registry = RemoteDfnRegistry(release_id="MODFLOW-ORG/modflow6@latest") + + with ( + patch.object( + RemoteDfnRegistry, + "load_default", + return_value={"MODFLOW-ORG/modflow6@latest": registry}, + ), + patch.object(RemoteDfnRegistry, "base_cache_path", return_value=tmp_path), + patch("modflow_devtools.dfns.registry.urllib.request.urlopen") as mock_open, + ): + import argparse + + result = cmd_info(argparse.Namespace()) + + mock_open.assert_not_called() + assert result == 0 + out = capsys.readouterr().out + assert "Cached" in out + assert "6.7.0" in out + assert "latest" in out + + +@pytest.mark.skip(reason="Requires network access to GitHub API") +@flaky(max_runs=3, min_passes=1) +def test_latest_tag_live(): + registry = RemoteDfnRegistry(release_id="MODFLOW-ORG/modflow6@latest") + tag = registry.latest_tag() + assert tag.startswith("v") or tag[0].isdigit() + assert registry._latest == tag + + +@pytest.mark.skip(reason="Requires dfns.zip release asset on GitHub") +@flaky(max_runs=3, min_passes=1) +def test_remote_dfn_registry_sync(): + release_id = "MODFLOW-ORG/modflow6@6.6.0" + registry = RemoteDfnRegistry(release_id=release_id) + registry.sync(force=True) + + cache_dir = registry.cache_path + assert cache_dir.exists() + assert any(cache_dir.iterdir()) + + path = registry.get_path("gwf-chd") + assert path.exists() + + spec = registry.spec + assert "gwf-chd" in spec.components + assert "sim-nam" in spec.components + + dfn = spec.components["gwf-chd"] + assert dfn.name == "gwf-chd" diff --git a/autotest/dfns/test_dfns_schema.py b/autotest/dfns/test_dfns_schema.py new file mode 100644 index 00000000..be7cea81 --- /dev/null +++ b/autotest/dfns/test_dfns_schema.py @@ -0,0 +1,1328 @@ +import ast + +import pytest + +from modflow_devtools.dfn import schema as v1 +from modflow_devtools.dfns import Dfns +from modflow_devtools.dfns.mapper import map as map_v2 +from modflow_devtools.dfns.schema import ( + Array, + Block, + Dim, + Double, + FieldBase, + Integer, + Keyword, + List, + Model, + Package, + Record, + Simulation, + String, + _names_in_expr, + _resolve_derived_dims, + _validate_fk_fields, + _validate_shape_element, + _validate_sum_call, +) + + +def _v1_field(**kwargs) -> v1.Field: + base: dict = { + "name": "test_field", + "type": "keyword", + "block": "options", + "in_record": False, + "default": None, + "longname": None, + "description": None, + "optional": False, + "developmode": False, + "shape": None, + "valid": None, + "netcdf": False, + "tagged": False, + } + base.update(kwargs) + return v1.Field(**base) + + +def _v1_dfn(**kwargs) -> v1.Dfn: + base: dict = { + "schema_version": "1", + "name": "test-dfn", + "parent": None, + "blocks": None, + "advanced": False, + "multi": False, + "subcomponents": None, + } + base.update(kwargs) + return v1.Dfn(**base) + + +def _dim_block(*names: str) -> Block: + return Block( + name="dimensions", + fields={n: Integer(name=n) for n in names}, + ) + + +def _pkg(name: str, blocks=None, dims=None, parent=None, **kw) -> Package: + return Package(name=name, blocks=blocks, dims=dims, parent=parent, **kw) + + +def test_fieldv2_from_dict(): + d = { + "name": "test_field", + "type": "keyword", + "extra_key": "should be allowed", + "another_extra": 123, + } + f = FieldBase.from_dict(d) + assert f.name == "test_field" + assert f.type == "keyword" + assert isinstance(f, Keyword) + + +def test_fieldv2_from_dict_strict(): + d = { + "name": "test_field", + "type": "keyword", + "extra_key": "should cause error", + } + with pytest.raises(ValueError, match="Unrecognized keys in field data"): + FieldBase.from_dict(d, strict=True) + + +def test_fieldv2_from_dict_roundtrip(): + i = Integer( + name="nper", + description="number of stress periods", + optional=False, + ) + d = i.model_dump() + f = FieldBase.from_dict(d) + assert isinstance(f, Integer) + assert f.name == i.name + assert f.type == i.type + assert f.description == i.description + assert f.optional == i.optional + + +def test_map_v2(): + dfn = _v1_dfn(name="sim-nam") + result = map_v2(dfn) + assert isinstance(result, Simulation) + + dfn = _v1_dfn(name="gwf-nam") + result = map_v2(dfn) + assert isinstance(result, Model) + + dfn = _v1_dfn(name="sln-ims") + result = map_v2(dfn) + assert isinstance(result, Package) + assert result.subtype == "solution" + + dfn = _v1_dfn(name="exg-gwfgwf") + result = map_v2(dfn) + assert isinstance(result, Package) + assert result.subtype == "exchange" + + dfn = _v1_dfn(name="utl-obs") + result = map_v2(dfn) + assert isinstance(result, Package) + assert result.subtype == "utility" + + dfn = _v1_dfn(name="gwf-sfr", advanced=True) + result = map_v2(dfn) + assert isinstance(result, Package) + assert result.subtype == "advanced" + + +def test_map_v2_field_conversion(): + dfn = _v1_dfn( + name="test-dfn", + blocks={ + "options": { + "save_flows": _v1_field( + name="save_flows", + type="keyword", + block="options", + description="save calculated flows", + tagged=True, + in_record=False, + ), + "some_float": _v1_field( + name="some_float", + type="double precision", + block="options", + description="a floating point value", + ), + } + }, + ) + + component = map_v2(dfn) + assert component.blocks is not None + assert "options" in component.blocks + + options = component.blocks["options"].fields + assert "save_flows" in options + + save_flows = options["save_flows"] + assert isinstance(save_flows, Keyword) + assert isinstance(save_flows, FieldBase) + assert save_flows.name == "save_flows" + assert save_flows.type == "keyword" + assert save_flows.description == "save calculated flows" + assert not hasattr(save_flows, "in_record") + assert not hasattr(save_flows, "reader") + + some_float = options["some_float"] + assert isinstance(some_float, Double) + assert some_float.name == "some_float" + assert some_float.type == "double" + assert some_float.description == "a floating point value" + + +def test_map_v2_period_block_conversion(): + dfn = _v1_dfn( + name="test-pkg", + blocks={ + "period": { + "stress_period_data": _v1_field( + name="stress_period_data", + type="recarray cellid q", + block="period", + description="stress period data", + shape="(maxbound)", + ), + "cellid": _v1_field( + name="cellid", + type="integer", + block="period", + shape="(ncelldim)", + in_record=True, + ), + "q": _v1_field( + name="q", + type="double precision", + block="period", + in_record=True, + ), + } + }, + ) + + component = map_v2(dfn) + assert component.blocks is not None + for block in component.blocks.values(): + for f in block.fields.values(): + assert isinstance(f, FieldBase) + if f.children: + for child in f.children.values(): + assert isinstance(child, FieldBase) + + period_fields = component.blocks["period"].fields + assert "stress_period_data" in period_fields + spd = period_fields["stress_period_data"] + assert isinstance(spd, List) + assert isinstance(spd.item, Record) + item_fields = spd.item.fields + assert "cellid" in item_fields + assert "q" in item_fields + + +def test_map_v2_record_conversion(): + """Record type with multiple scalar fields.""" + dfn = _v1_dfn( + name="test-dfn", + blocks={ + "options": { + "auxrecord": _v1_field( + name="auxrecord", + type="record auxiliary auxname", + block="options", + in_record=False, + ), + "auxiliary": _v1_field( + name="auxiliary", + type="keyword", + block="options", + in_record=True, + ), + "auxname": _v1_field( + name="auxname", + type="string", + block="options", + in_record=True, + ), + } + }, + ) + + component = map_v2(dfn) + auxrecord = component.blocks["options"].fields["auxrecord"] + assert isinstance(auxrecord, Record) + assert auxrecord.type == "record" + assert auxrecord.children is not None + assert "auxiliary" in auxrecord.children + assert "auxname" in auxrecord.children + assert isinstance(auxrecord.children["auxiliary"], Keyword) + assert isinstance(auxrecord.children["auxname"], String) + + +def test_keystring_type_conversion(): + """Keystring (union) type conversion.""" + dfn = _v1_dfn( + name="test-dfn", + blocks={ + "options": { + "obs_filerecord": _v1_field( + name="obs_filerecord", + type="record obs6 filein obs6_filename", + block="options", + tagged=True, + ), + "obs6": _v1_field( + name="obs6", + type="keyword", + block="options", + in_record=True, + ), + "filein": _v1_field( + name="filein", + type="keyword", + block="options", + in_record=True, + ), + "obs6_filename": _v1_field( + name="obs6_filename", + type="string", + block="options", + in_record=True, + preserve_case=True, + ), + } + }, + ) + + component = map_v2(dfn) + obs_rec = component.blocks["options"].fields["obs_filerecord"] + assert isinstance(obs_rec, Record) + assert obs_rec.type == "record" + assert obs_rec.children is not None + assert all(isinstance(child, FieldBase) for child in obs_rec.children.values()) + + +def test_local_dims(): + # dims section populated → local_dims returns those names + block = _dim_block("nlay", "nrow", "ncol") + pkg = Package( + name="gwf-dis", + blocks={"dimensions": block}, + dims={ + "nlay": Dim(field="nlay", scope="model"), + "nrow": Dim(field="nrow", scope="model"), + "ncol": Dim(field="ncol", scope="model"), + }, + ) + spec = Dfns(components={"gwf-dis": pkg}) + assert spec.local_dims("gwf-dis") == {"nlay", "nrow", "ncol"} + + # no dims section → empty + pkg2 = Package(name="gwf-chd", blocks=None, dims=None) + spec2 = Dfns(components={"gwf-chd": pkg2}) + assert spec2.local_dims("gwf-chd") == set() + + # derived dims also included + pkg3 = Package( + name="test", + blocks=None, + dims={"nodes": Dim(expr="42", scope="component")}, + ) + spec3 = Dfns(components={"test": pkg3}) + assert spec3.local_dims("test") == {"nodes"} + + +def test_names_in_expr_simple_arithmetic(): + assert _names_in_expr("nlay * nrow * ncol") == {"nlay", "nrow", "ncol"} + + +def test_names_in_expr_single_name(): + assert _names_in_expr("nodes") == {"nodes"} + + +def test_names_in_expr_excludes_sum_internals(): + names = _names_in_expr("sum(packagedata.nlakeconn)") + assert "packagedata" not in names + assert "nlakeconn" not in names + + +def test_names_in_expr_mixed_sum_and_arithmetic(): + names = _names_in_expr("nlay * nrow + sum(packagedata.nlakeconn)") + assert names == {"nlay", "nrow"} + + +def test_names_in_expr_excludes_sum_func_name_itself(): + names = _names_in_expr("sum(list.col)") + assert "sum" not in names + + +def test_names_in_expr_invalid_syntax(): + with pytest.raises(ValueError, match="Invalid expression"): + _names_in_expr("nlay * (") + + +def _make_sum_call(expr: str): + tree = ast.parse(expr, mode="eval") + for node in ast.walk(tree): + if isinstance(node, ast.Call): + return node + raise AssertionError("No Call node found") + + +def _pkg_with_list(list_field_name: str, col_name: str, col_type=None) -> Package: + col = (col_type or Integer)(name=col_name) + item = Record(name="item", fields={col_name: col}) + lst = List(name=list_field_name, item=item) + block = Block(name=list_field_name, fields={list_field_name: lst}) + return _pkg("test", blocks={list_field_name: block}) + + +def test_validate_sum_expr(): + pkg = _pkg_with_list("packagedata", "nlakeconn") + call = _make_sum_call("sum(packagedata.nlakeconn)") + _validate_sum_call(call, pkg, "sum(packagedata.nlakeconn)") + + # fully qualified + pkg = _pkg_with_list("packagedata", "nlakeconn") + call = _make_sum_call("sum(packagedata.packagedata.nlakeconn)") + _validate_sum_call(call, pkg, "sum(packagedata.packagedata.nlakeconn)") + + # unrecognized + pkg = _pkg("test", blocks=None) + call = _make_sum_call("sum(nolist.col)") + with pytest.raises(ValueError, match="unknown list field"): + _validate_sum_call(call, pkg, "sum(nolist.col)") + + pkg = _pkg_with_list("packagedata", "nlakeconn") + call = _make_sum_call("sum(wrongblock.packagedata.nlakeconn)") + with pytest.raises(ValueError, match="block qualifier"): + _validate_sum_call(call, pkg, "sum(wrongblock.packagedata.nlakeconn)") + + pkg = _pkg_with_list("packagedata", "name", col_type=String) + call = _make_sum_call("sum(packagedata.name)") + with pytest.raises(ValueError, match="must be Integer"): + _validate_sum_call(call, pkg, "sum(packagedata.name)") + + pkg = _pkg_with_list("packagedata", "nlakeconn") + call = _make_sum_call("sum(packagedata.nosuchcol)") + with pytest.raises(ValueError, match="not found"): + _validate_sum_call(call, pkg, "sum(packagedata.nosuchcol)") + + +def test_resolve_derived_dims(): + block = _dim_block("nlay", "nrow", "ncol") + pkg = Package( + name="test", + blocks={"dimensions": block}, + dims={ + "nlay": Dim(field="nlay", scope="component"), + "nrow": Dim(field="nrow", scope="component"), + "ncol": Dim(field="ncol", scope="component"), + "nodes": Dim(expr="nlay * nrow * ncol", scope="component"), + }, + ) + order = _resolve_derived_dims(pkg, {"nlay", "nrow", "ncol"}) + assert order == ["nodes"] + + pkg = Package( + name="test", + blocks={"dimensions": block}, + dims={ + "nlay": Dim(field="nlay", scope="component"), + "nrow": Dim(field="nrow", scope="component"), + "ncol": Dim(field="ncol", scope="component"), + "nodes": Dim(expr="nlay * nrow * ncol", scope="component"), + "nodouble": Dim(expr="nodes * 2", scope="component"), + }, + ) + order = _resolve_derived_dims(pkg, {"nlay", "nrow", "ncol"}) + assert order.index("nodes") < order.index("nodouble") + + pkg = Package( + name="test", + blocks=None, + dims={"derived": Dim(expr="nodes + 1", scope="component")}, + ) + order = _resolve_derived_dims(pkg, {"nodes"}) + assert order == ["derived"] + + +def test_resolve_derived_dims_sum_operand_allowed(): + pkg = _pkg_with_list("packagedata", "nlakeconn") + pkg = Package( + name="test", + blocks=pkg.blocks, + dims={"total_conn": Dim(expr="sum(packagedata.nlakeconn)", scope="component")}, + ) + order = _resolve_derived_dims(pkg, set()) + assert order == ["total_conn"] + + +def test_resolve_derived_dims_no_derived_returns_empty(): + pkg = Package(name="test", blocks=None, dims=None) + assert _resolve_derived_dims(pkg, set()) == [] + + +def test_resolve_derived_dims_cycle_error(): + pkg = Package( + name="test", + blocks=None, + dims={ + "a": Dim(expr="b + 1", scope="component"), + "b": Dim(expr="a + 1", scope="component"), + }, + ) + with pytest.raises(ValueError, match="Cycle in"): + _resolve_derived_dims(pkg, set()) + + +def test_resolve_derived_dims_unknown_operand_error(): + pkg = Package( + name="test", + blocks=None, + dims={"nodes": Dim(expr="mystery_dim * 2", scope="component")}, + ) + with pytest.raises(ValueError, match="not a known dimension"): + _resolve_derived_dims(pkg, set()) + + +def test_resolve_derived_dims_invalid_expression_error(): + pkg = Package( + name="test", + blocks=None, + dims={"nodes": Dim(expr="nlay * (", scope="component")}, + ) + with pytest.raises(ValueError, match="Invalid"): + _resolve_derived_dims(pkg, set()) + + +def test_dfnspec_construction_validates_dims(): + block = _dim_block("nlay", "nrow", "ncol") + pkg = Package( + name="gwf-dis", + blocks={"dimensions": block}, + dims={ + "nlay": Dim(field="nlay", scope="model"), + "nrow": Dim(field="nrow", scope="model"), + "ncol": Dim(field="ncol", scope="model"), + "nodes": Dim(expr="nlay * nrow * ncol", scope="model"), + }, + ) + spec = Dfns(components={"gwf-dis": pkg}) + assert "gwf-dis" in spec.components + + +def test_dfnspec_construction_cycle_raises(): + pkg = Package( + name="bad", + blocks=None, + dims={ + "a": Dim(expr="b + 1", scope="component"), + "b": Dim(expr="a + 1", scope="component"), + }, + ) + with pytest.raises(ValueError, match="Cycle in"): + Dfns(components={"bad": pkg}) + + +def test_dfnspec_construction_unknown_operand_raises(): + pkg = Package( + name="bad", + blocks=None, + dims={"nodes": Dim(expr="ghost_dim * 2", scope="component")}, + ) + with pytest.raises(ValueError, match="not a known dimension"): + Dfns(components={"bad": pkg}) + + +def test_dfnspec_no_dims_constructs_fine(): + pkg = Package(name="gwf-chd", blocks=None, dims=None) + spec = Dfns(components={"gwf-chd": pkg}) + assert "gwf-chd" in spec.components + + +# ============================================================================= +# dfns.schema.v2 — DfnSpec.local_dims +# ============================================================================= + + +def test_dfnspec_local_dims(): + block = _dim_block("nlay", "nrow", "ncol") + pkg = Package( + name="gwf-dis", + blocks={"dimensions": block}, + dims={ + "nlay": Dim(field="nlay", scope="model"), + "nrow": Dim(field="nrow", scope="model"), + "ncol": Dim(field="ncol", scope="model"), + }, + ) + spec = Dfns(components={"gwf-dis": pkg}) + assert spec.local_dims("gwf-dis") == {"nlay", "nrow", "ncol"} + + +def test_dfnspec_local_dims_empty(): + pkg = Package(name="gwf-chd", blocks=None, dims=None) + spec = Dfns(components={"gwf-chd": pkg}) + assert spec.local_dims("gwf-chd") == set() + + +def test_dfnspec_inherited_dims_includes_dis_dims(): + dis_block = _dim_block("nlay", "nrow", "ncol") + dis = Package( + name="gwf-dis", + parent="gwf-nam", + blocks={"dimensions": dis_block}, + dims={ + "nlay": Dim(field="nlay", scope="model"), + "nrow": Dim(field="nrow", scope="model"), + "ncol": Dim(field="ncol", scope="model"), + "nodes": Dim(expr="nlay * nrow * ncol", scope="model"), + }, + ) + chd = _pkg("gwf-chd", parent="gwf-nam", blocks=None) + gwf = Model(name="gwf-nam", blocks=None) + spec = Dfns(components={"gwf-nam": gwf, "gwf-dis": dis, "gwf-chd": chd}) + + inherited = spec.inherited_dims("gwf-chd") + assert "nlay" in inherited + assert "nrow" in inherited + assert "ncol" in inherited + assert "nodes" in inherited # derived dim from gwf-dis, model-scoped + + +def test_dfnspec_inherited_dims_disv(): + disv_block = _dim_block("nlay", "ncpl") + disv = Package( + name="gwf-disv", + parent="gwf-nam", + blocks={"dimensions": disv_block}, + dims={ + "nlay": Dim(field="nlay", scope="model"), + "ncpl": Dim(field="ncpl", scope="model"), + }, + ) + chd = _pkg("gwf-chd", parent="gwf-nam", blocks=None) + gwf = Model(name="gwf-nam", blocks=None) + spec = Dfns(components={"gwf-nam": gwf, "gwf-disv": disv, "gwf-chd": chd}) + + inherited = spec.inherited_dims("gwf-chd") + assert "nlay" in inherited + assert "ncpl" in inherited + + +def test_dfnspec_inherited_dims_disu(): + disu_block = _dim_block("nodes", "nja") + disu = Package( + name="gwf-disu", + parent="gwf-nam", + blocks={"dimensions": disu_block}, + dims={ + "nodes": Dim(field="nodes", scope="model"), + "nja": Dim(field="nja", scope="model"), + }, + ) + chd = _pkg("gwf-chd", parent="gwf-nam", blocks=None) + gwf = Model(name="gwf-nam", blocks=None) + spec = Dfns(components={"gwf-nam": gwf, "gwf-disu": disu, "gwf-chd": chd}) + + inherited = spec.inherited_dims("gwf-chd") + assert "nodes" in inherited + assert "nja" in inherited + + +def test_dfnspec_inherited_dims_excludes_own(): + """Own dims appear in local_dims but not in inherited_dims.""" + dis_block = _dim_block("nlay", "nrow", "ncol") + dis = Package( + name="gwf-dis", + parent="gwf-nam", + blocks={"dimensions": dis_block}, + dims={ + "nlay": Dim(field="nlay", scope="model"), + "nrow": Dim(field="nrow", scope="model"), + "ncol": Dim(field="ncol", scope="model"), + }, + ) + chd = Package( + name="gwf-chd", + parent="gwf-nam", + blocks={"dimensions": _dim_block("secret_dim")}, + dims={"secret_dim": Dim(field="secret_dim", scope="model")}, + ) + gwf = Model(name="gwf-nam", blocks=None) + spec = Dfns(components={"gwf-nam": gwf, "gwf-dis": dis, "gwf-chd": chd}) + + inherited = spec.inherited_dims("gwf-chd") + assert "nlay" in inherited + assert "secret_dim" not in inherited # own dim: not in inherited_dims + + +# ============================================================================= +# dfns.schema.v2 — DfnSpec Mapping protocol +# ============================================================================= + + +def test_dfnspec_components_getitem(): + pkg = _pkg("gwf-chd", parent="gwf-nam") + spec = Dfns(components={"gwf-chd": pkg}) + assert spec.components["gwf-chd"] is pkg + + +def test_dfnspec_components_iter(): + pkg = _pkg("gwf-chd", parent="gwf-nam") + spec = Dfns(components={"gwf-chd": pkg}) + assert list(spec.components) == ["gwf-chd"] + + +def test_dfnspec_components_len(): + pkgs = {f"gwf-p{i}": _pkg(f"gwf-p{i}") for i in range(3)} + spec = Dfns(components=pkgs) + assert len(spec.components) == 3 + + +def test_dfnspec_components_contains(): + pkg = _pkg("gwf-chd") + spec = Dfns(components={"gwf-chd": pkg}) + assert "gwf-chd" in spec.components + assert "gwf-rch" not in spec.components + + +# ============================================================================= +# dfns.schema.v2 — DfnSpec.schema_version +# ============================================================================= + + +def test_dfnspec_schema_version_from_component(): + pkg = Package(name="gwf-chd", schema_version="2") + spec = Dfns(components={"gwf-chd": pkg}) + assert spec.schema_version == "2" + + +def test_dfnspec_schema_version_default(): + pkg = _pkg("gwf-chd") + spec = Dfns(components={"gwf-chd": pkg}) + assert spec.schema_version == "2" + + +# ============================================================================= +# dfns.schema.v2 — DfnSpec.children_of +# ============================================================================= + + +def test_dfnspec_children_of(): + gwf = Model(name="gwf-nam", blocks=None) + chd = _pkg("gwf-chd", parent="gwf-nam") + rch = _pkg("gwf-rch", parent="gwf-nam") + sim = Simulation(name="sim-nam", blocks=None) + spec = Dfns(components={"sim-nam": sim, "gwf-nam": gwf, "gwf-chd": chd, "gwf-rch": rch}) + children = spec.children("gwf-nam") + assert set(children) == {"gwf-chd", "gwf-rch"} + + +def test_dfnspec_children_of_empty(): + pkg = _pkg("gwf-chd", parent="gwf-nam") + spec = Dfns(components={"gwf-chd": pkg}) + assert spec.children("gwf-chd") == {} + + +# ============================================================================= +# dfns.schema.v2 — Dfns.dims +# ============================================================================= + + +def _dis_spec() -> Dfns: + """A minimal gwf-dis + gwf-nam DfnSpec used as shared fixture scaffolding.""" + dis_block = _dim_block("nlay", "nrow", "ncol") + gwf = Model(name="gwf-nam", blocks=None) + dis = Package( + name="gwf-dis", + parent="gwf-nam", + blocks={"dimensions": dis_block}, + dims={ + "nlay": Dim(field="nlay", scope="model"), + "nrow": Dim(field="nrow", scope="model"), + "ncol": Dim(field="ncol", scope="model"), + "nodes": Dim(expr="nlay * nrow * ncol", scope="model"), + }, + ) + return Dfns(components={"gwf-nam": gwf, "gwf-dis": dis}) + + +def _lake_spec(period_item: Record) -> Dfns: + """ + DfnSpec with a gwf-lak that has a packagedata list block and a + period list block whose item is `period_item`. + """ + nlakeconn = Integer(name="nlakeconn") + lakeno_pk = Integer(name="lakeno", pk=True) + pkg_item = Record(name="item", fields={"lakeno": lakeno_pk, "nlakeconn": nlakeconn}) + pkg_list = List(name="packagedata", item=pkg_item) + pkg_block = Block(name="packagedata", fields={"packagedata": pkg_list}) + period_list = List(name="period", item=period_item) + period_block = Block(name="period", fields={"period": period_list}) + gwf = Model(name="gwf-nam", blocks=None) + lak = Package( + name="gwf-lak", + parent="gwf-nam", + blocks={"packagedata": pkg_block, "period": period_block}, + ) + return Dfns(components={"gwf-nam": gwf, "gwf-lak": lak}) + + +def test_dims_includes_own(): + spec = _dis_spec() + known = spec.dims("gwf-dis") + assert {"nlay", "nrow", "ncol", "nodes"} <= known + + +def test_dims_includes_derived(): + dis_block = _dim_block("nlay", "nrow", "ncol") + gwf = Model(name="gwf-nam", blocks=None) + dis = Package( + name="gwf-dis", + parent="gwf-nam", + blocks={"dimensions": dis_block}, + dims={ + "nlay": Dim(field="nlay", scope="model"), + "nrow": Dim(field="nrow", scope="model"), + "ncol": Dim(field="ncol", scope="model"), + "nodes": Dim(expr="nlay * nrow * ncol", scope="model"), + }, + ) + spec = Dfns(components={"gwf-nam": gwf, "gwf-dis": dis}) + known = spec.dims("gwf-dis") + assert "nodes" in known + + +def test_dims_includes_model_scoped(): + """A gwf-chd component inherits model-scoped dims from gwf-dis.""" + spec = _dis_spec() + chd = _pkg("gwf-chd", parent="gwf-nam") + spec2 = Dfns(components=dict(spec.components) | {"gwf-chd": chd}) + known = spec2.dims("gwf-chd") + assert "nodes" in known # derived dim from gwf-dis, scope="model" + assert "nlay" in known # field-backed dim from gwf-dis, scope="model" + + +# ============================================================================= +# dfns.schema.v2 — _validate_shape_element: dim reference +# ============================================================================= + + +def _make_ctx(dim_names: set[str], derived: dict | None = None): + """Return (array, component, known_dims) for shape element tests.""" + dims: dict[str, Dim] = {n: Dim(field=n, scope="component") for n in dim_names} + if derived: + dims.update({n: Dim(expr=e, scope="component") for n, e in derived.items()}) + blocks = {"dimensions": _dim_block(*dim_names)} if dim_names else None + pkg = Package(name="test", blocks=blocks, dims=dims or None) + gwf = Model(name="gwf-nam", blocks=None) + spec = Dfns(components={"gwf-nam": gwf, "test": pkg}) + known = spec.dims("test") + arr = Array(name="arr", dtype="double", shape=[]) + return arr, pkg, known + + +def test_shape_element_valid_explicit_dim(): + arr, pkg, known = _make_ctx({"nlay", "nrow", "ncol"}) + _validate_shape_element("nlay", arr, pkg, None, known) # no error + + +def test_shape_element_valid_inherited_dim(): + """A dim declared in a sibling component (model-scoped) is valid.""" + dis = Package( + name="gwf-dis", + parent="gwf-nam", + blocks=None, + dims={"nodes": Dim(expr="42", scope="model")}, + ) + test_pkg = Package(name="gwf-test", parent="gwf-nam", blocks=None) + gwf = Model(name="gwf-nam", blocks=None) + spec = Dfns(components={"gwf-nam": gwf, "gwf-dis": dis, "gwf-test": test_pkg}) + known = spec.dims("gwf-test") + arr = Array(name="arr", dtype="double", shape=[]) + _validate_shape_element("nodes", arr, test_pkg, None, known) + + +def test_shape_element_valid_derived_dim(): + arr, pkg, known = _make_ctx({"nlay", "nrow", "ncol"}, derived={"nodes": "nlay * nrow * ncol"}) + _validate_shape_element("nodes", arr, pkg, None, known) + + +def test_shape_element_unknown_dim_raises(): + arr, pkg, known = _make_ctx({"nlay"}) + with pytest.raises(ValueError, match="does not resolve"): + _validate_shape_element("mystery", arr, pkg, None, known) + + +def test_shape_element_invalid_syntax_raises(): + arr, pkg, known = _make_ctx({"nlay"}) + with pytest.raises(ValueError, match="invalid shape element"): + _validate_shape_element("123bad", arr, pkg, None, known) + + +def test_shape_element_empty_string_raises(): + arr, pkg, known = _make_ctx({"nlay"}) + with pytest.raises(ValueError, match="invalid shape element"): + _validate_shape_element("", arr, pkg, None, known) + + +# ============================================================================= +# dfns.schema.v2 — _validate_shape_element: row-level lookup +# ============================================================================= + + +def _lookup_ctx(): + """ + Returns (array, enclosing_record, component, known_dims) for a valid + row-level lookup scenario mirroring the gwf-lak period table. + + packagedata block has a List with item Record(lakeno pk, nlakeconn int). + The array lives inside a Record with sibling lakeno(fk='packagedata'). + """ + nlakeconn = Integer(name="nlakeconn") + lakeno_pk = Integer(name="lakeno", pk=True) + pkg_item = Record(name="item", fields={"lakeno": lakeno_pk, "nlakeconn": nlakeconn}) + pkg_list = List(name="packagedata", item=pkg_item) + pkg_block = Block(name="packagedata", fields={"packagedata": pkg_list}) + + fk_lakeno = Integer(name="lakeno", fk="packagedata") + arr = Array(name="outflow", dtype="double", shape=[]) + enc_record = Record(name="item", fields={"lakeno": fk_lakeno, "outflow": arr}) + + lak = Package( + name="gwf-lak", + parent="gwf-nam", + blocks={"packagedata": pkg_block}, + ) + gwf = Model(name="gwf-nam", blocks=None) + spec = Dfns(components={"gwf-nam": gwf, "gwf-lak": lak}) + known = spec.dims("gwf-lak") + return arr, enc_record, lak, known + + +def test_shape_element_valid_row_level_lookup(): + arr, enc, pkg, known = _lookup_ctx() + _validate_shape_element("packagedata.nlakeconn(lakeno)", arr, pkg, enc, known) + + +def test_shape_element_lookup_on_top_level_array_raises(): + arr, _enc, pkg, known = _lookup_ctx() + with pytest.raises(ValueError, match="not inside a record"): + _validate_shape_element("packagedata.nlakeconn(lakeno)", arr, pkg, None, known) + + +def test_shape_element_lookup_unknown_list_block_raises(): + arr, enc, pkg, known = _lookup_ctx() + with pytest.raises(ValueError, match="not a list block"): + _validate_shape_element("noblock.nlakeconn(lakeno)", arr, pkg, enc, known) + + +def test_shape_element_lookup_unknown_column_raises(): + arr, enc, pkg, known = _lookup_ctx() + with pytest.raises(ValueError, match="is not a field"): + _validate_shape_element("packagedata.nocol(lakeno)", arr, pkg, enc, known) + + +def test_shape_element_lookup_non_integer_column_raises(): + nlakeconn = String(name="nlakeconn") + lakeno_pk = Integer(name="lakeno", pk=True) + pkg_item = Record(name="item", fields={"lakeno": lakeno_pk, "nlakeconn": nlakeconn}) + pkg_list = List(name="packagedata", item=pkg_item) + pkg_block = Block(name="packagedata", fields={"packagedata": pkg_list}) + fk_lakeno = Integer(name="lakeno", fk="packagedata") + arr = Array(name="outflow", dtype="double", shape=[]) + enc = Record(name="item", fields={"lakeno": fk_lakeno, "outflow": arr}) + lak = Package(name="gwf-lak", parent="gwf-nam", blocks={"packagedata": pkg_block}) + gwf = Model(name="gwf-nam", blocks=None) + spec = Dfns(components={"gwf-nam": gwf, "gwf-lak": lak}) + known = spec.dims("gwf-lak") + with pytest.raises(ValueError, match="must be Integer"): + _validate_shape_element("packagedata.nlakeconn(lakeno)", arr, lak, enc, known) + + +def test_shape_element_lookup_missing_fk_sibling_raises(): + arr, enc, pkg, known = _lookup_ctx() + with pytest.raises(ValueError, match="not a sibling field"): + _validate_shape_element("packagedata.nlakeconn(nosuchfield)", arr, pkg, enc, known) + + +def test_shape_element_lookup_fk_not_set_raises(): + nlakeconn = Integer(name="nlakeconn") + lakeno_pk = Integer(name="lakeno", pk=True) + pkg_item = Record(name="item", fields={"lakeno": lakeno_pk, "nlakeconn": nlakeconn}) + pkg_list = List(name="packagedata", item=pkg_item) + pkg_block = Block(name="packagedata", fields={"packagedata": pkg_list}) + no_fk_lakeno = Integer(name="lakeno") # fk=None + arr = Array(name="outflow", dtype="double", shape=[]) + enc = Record(name="item", fields={"lakeno": no_fk_lakeno, "outflow": arr}) + lak = Package(name="gwf-lak", parent="gwf-nam", blocks={"packagedata": pkg_block}) + gwf = Model(name="gwf-nam", blocks=None) + spec = Dfns(components={"gwf-nam": gwf, "gwf-lak": lak}) + known = spec.dims("gwf-lak") + with pytest.raises(ValueError, match=r"\.fk is not set"): + _validate_shape_element("packagedata.nlakeconn(lakeno)", arr, lak, enc, known) + + +def test_shape_element_lookup_fk_block_mismatch_raises(): + nlakeconn = Integer(name="nlakeconn") + lakeno_pk = Integer(name="lakeno", pk=True) + pkg_item = Record(name="item", fields={"lakeno": lakeno_pk, "nlakeconn": nlakeconn}) + pkg_list = List(name="packagedata", item=pkg_item) + pkg_block = Block(name="packagedata", fields={"packagedata": pkg_list}) + fk_lakeno = Integer(name="lakeno", fk="otherblock") # fk → wrong block + arr = Array(name="outflow", dtype="double", shape=[]) + enc = Record(name="item", fields={"lakeno": fk_lakeno, "outflow": arr}) + lak = Package(name="gwf-lak", parent="gwf-nam", blocks={"packagedata": pkg_block}) + gwf = Model(name="gwf-nam", blocks=None) + spec = Dfns(components={"gwf-nam": gwf, "gwf-lak": lak}) + known = spec.dims("gwf-lak") + with pytest.raises(ValueError, match="does not reference block"): + _validate_shape_element("packagedata.nlakeconn(lakeno)", arr, lak, enc, known) + + +# ============================================================================= +# dfns.schema.v2 — DfnSpec shape validation end-to-end +# ============================================================================= + + +def test_dfnspec_valid_top_level_array_shape(): + dis_block = _dim_block("nlay", "nrow", "ncol") + arr = Array(name="botm", dtype="double", shape=["nlay", "nrow", "ncol"]) + grid_block = Block(name="griddata", fields={"botm": arr}) + dis = Package( + name="gwf-dis", + parent="gwf-nam", + blocks={"dimensions": dis_block, "griddata": grid_block}, + dims={ + "nlay": Dim(field="nlay", scope="model"), + "nrow": Dim(field="nrow", scope="model"), + "ncol": Dim(field="ncol", scope="model"), + }, + ) + gwf = Model(name="gwf-nam", blocks=None) + spec = Dfns(components={"gwf-nam": gwf, "gwf-dis": dis}) + assert "gwf-dis" in spec.components + + +def test_dfnspec_valid_array_in_record(): + dis_block = _dim_block("nlay", "nrow", "ncol") + arr = Array(name="vals", dtype="double", shape=["ncol"]) + rec = Record(name="myrec", fields={"vals": arr}) + opt_block = Block(name="options", fields={"myrec": rec}) + dis = Package( + name="gwf-dis", + parent="gwf-nam", + blocks={"dimensions": dis_block, "options": opt_block}, + dims={ + "nlay": Dim(field="nlay", scope="model"), + "nrow": Dim(field="nrow", scope="model"), + "ncol": Dim(field="ncol", scope="model"), + }, + ) + gwf = Model(name="gwf-nam", blocks=None) + Dfns(components={"gwf-nam": gwf, "gwf-dis": dis}) + + +def test_dfnspec_valid_row_level_lookup_in_list_item(): + nlakeconn = Integer(name="nlakeconn") + lakeno_pk = Integer(name="lakeno", pk=True) + pkg_item = Record(name="item", fields={"lakeno": lakeno_pk, "nlakeconn": nlakeconn}) + pkg_list = List(name="packagedata", item=pkg_item) + pkg_block = Block(name="packagedata", fields={"packagedata": pkg_list}) + + fk_lakeno = Integer(name="lakeno", fk="packagedata") + outflow = Array(name="outflow", dtype="double", shape=["packagedata.nlakeconn(lakeno)"]) + period_item = Record(name="item", fields={"lakeno": fk_lakeno, "outflow": outflow}) + period_list = List(name="period", item=period_item) + period_block = Block(name="period", fields={"period": period_list}) + + gwf = Model(name="gwf-nam", blocks=None) + lak = Package( + name="gwf-lak", + parent="gwf-nam", + blocks={"packagedata": pkg_block, "period": period_block}, + ) + Dfns(components={"gwf-nam": gwf, "gwf-lak": lak}) + + +def test_dfnspec_invalid_array_shape_raises(): + dis_block = _dim_block("nlay", "nrow", "ncol") + arr = Array(name="botm", dtype="double", shape=["nlay", "no_such_dim"]) + grid_block = Block(name="griddata", fields={"botm": arr}) + dis = Package( + name="gwf-dis", + parent="gwf-nam", + blocks={"dimensions": dis_block, "griddata": grid_block}, + dims={ + "nlay": Dim(field="nlay", scope="model"), + "nrow": Dim(field="nrow", scope="model"), + "ncol": Dim(field="ncol", scope="model"), + }, + ) + gwf = Model(name="gwf-nam", blocks=None) + with pytest.raises(ValueError, match="does not resolve"): + Dfns(components={"gwf-nam": gwf, "gwf-dis": dis}) + + +def test_dfnspec_array_shape_resolves_via_derived_dim(): + dis_block = _dim_block("nlay", "nrow", "ncol") + arr = Array(name="botm", dtype="double", shape=["nodes"]) + grid_block = Block(name="griddata", fields={"botm": arr}) + dis = Package( + name="gwf-dis", + parent="gwf-nam", + blocks={"dimensions": dis_block, "griddata": grid_block}, + dims={ + "nlay": Dim(field="nlay", scope="model"), + "nrow": Dim(field="nrow", scope="model"), + "ncol": Dim(field="ncol", scope="model"), + "nodes": Dim(expr="nlay * nrow * ncol", scope="model"), + }, + ) + gwf = Model(name="gwf-nam", blocks=None) + Dfns(components={"gwf-nam": gwf, "gwf-dis": dis}) + + +def test_dfnspec_array_shape_resolves_via_sibling_dis(): + """An array in gwf-chd can reference nlay and nodes from sibling gwf-dis.""" + dis_block = _dim_block("nlay", "nrow", "ncol") + dis = Package( + name="gwf-dis", + parent="gwf-nam", + blocks={"dimensions": dis_block}, + dims={ + "nlay": Dim(field="nlay", scope="model"), + "nrow": Dim(field="nrow", scope="model"), + "ncol": Dim(field="ncol", scope="model"), + "nodes": Dim(expr="nlay * nrow * ncol", scope="model"), + }, + ) + chd_arr = Array(name="head", dtype="double", shape=["nlay", "nodes"]) + chd_block = Block(name="period", fields={"head": chd_arr}) + chd = Package(name="gwf-chd", parent="gwf-nam", blocks={"period": chd_block}) + gwf = Model(name="gwf-nam", blocks=None) + Dfns(components={"gwf-nam": gwf, "gwf-dis": dis, "gwf-chd": chd}) + + +# ============================================================================= +# dfns.schema.v2 — _validate_fk_fields +# ============================================================================= + + +def _fk_pkg_and_spec(fk_val, pk_on_item=True, fk_ref=None): + """ + Build a Package with a packagedata list block and a period block whose + item record has a lakeno field with fk=fk_val (and optionally fk_ref). + """ + nlakeconn = Integer(name="nlakeconn") + lakeno_item = Integer(name="lakeno", pk=pk_on_item) + pkg_item = Record(name="item", fields={"lakeno": lakeno_item, "nlakeconn": nlakeconn}) + pkg_list = List(name="packagedata", item=pkg_item) + pkg_block = Block(name="packagedata", fields={"packagedata": pkg_list}) + + fk_field = Integer(name="lakeno", fk=fk_val, fk_ref=fk_ref) + period_item = Record(name="item", fields={"lakeno": fk_field}) + period_list = List(name="period", item=period_item) + period_block = Block(name="period", fields={"period": period_list}) + + gwf = Model(name="gwf-nam", blocks=None) + lak = Package( + name="gwf-lak", + parent="gwf-nam", + blocks={"packagedata": pkg_block, "period": period_block}, + ) + return lak, gwf + + +def test_validate_fk_fields_valid(): + lak, gwf = _fk_pkg_and_spec("packagedata", pk_on_item=True) + spec = Dfns(components={"gwf-nam": gwf, "gwf-lak": lak}) + assert "gwf-lak" in spec.components + + +def test_validate_fk_fields_unknown_block_raises(): + lak, gwf = _fk_pkg_and_spec("nosuchblock", pk_on_item=True) + with pytest.raises(ValueError, match="is not a list block"): + Dfns(components={"gwf-nam": gwf, "gwf-lak": lak}) + + +def test_validate_fk_fields_no_pk_on_item_raises(): + lak, gwf = _fk_pkg_and_spec("packagedata", pk_on_item=False) + with pytest.raises(ValueError, match="has no pk=True field"): + Dfns(components={"gwf-nam": gwf, "gwf-lak": lak}) + + +def test_validate_fk_fields_fk_ref_valid(): + lak, gwf = _fk_pkg_and_spec("packagedata", pk_on_item=True, fk_ref="gwf-nam") + spec = Dfns(components={"gwf-nam": gwf, "gwf-lak": lak}) + assert "gwf-lak" in spec.components + + +def test_validate_fk_fields_fk_ref_unknown_raises(): + lak, gwf = _fk_pkg_and_spec("packagedata", pk_on_item=True, fk_ref="no-such-comp") + with pytest.raises(ValueError, match="not found in spec"): + Dfns(components={"gwf-nam": gwf, "gwf-lak": lak}) + + +def test_validate_fk_fields_no_fk_set_passes(): + item = Record(name="item", fields={"val": Double(name="val")}) + lst = List(name="data", item=item) + block = Block(name="data", fields={"data": lst}) + pkg = Package(name="gwf-test", blocks={"data": block}) + gwf = Model(name="gwf-nam", blocks=None) + spec = Dfns(components={"gwf-nam": gwf, "gwf-test": pkg}) + assert "gwf-test" in spec.components + + +def test_validate_fk_fields_called_directly(): + lak, gwf = _fk_pkg_and_spec("packagedata", pk_on_item=True) + spec = Dfns(components={"gwf-nam": gwf, "gwf-lak": lak}) + _validate_fk_fields(lak, spec) # should not raise + + +# ============================================================================= +# dfns.schema.v2 — Block.optional +# ============================================================================= + + +def test_block_optional_all_optional_fields(): + from modflow_devtools.dfns.schema import Keyword + + block = Block( + name="options", + fields={ + "verbose": Keyword(name="verbose", optional=True), + "maxiter": Integer(name="maxiter", optional=True), + }, + ) + assert block.optional is True + + +def test_block_optional_has_required_field(): + block = Block( + name="dimensions", + fields={ + "nlay": Integer(name="nlay"), + "nrow": Integer(name="nrow", optional=True), + }, + ) + assert block.optional is False + + +def test_block_optional_empty_fields(): + block = Block(name="empty", fields={}) + assert block.optional is True + + +# ============================================================================= +# dfns.schema.v2 — Array shape position-based rules +# ============================================================================= + + +def test_top_level_array_empty_shape_valid(): + arr = Array(name="auxiliary", dtype="string", shape=[]) + block = Block(name="options", fields={"auxiliary": arr}) + pkg = Package(name="gwf-test", blocks={"options": block}) + gwf = Model(name="gwf-nam", blocks=None) + Dfns(components={"gwf-nam": gwf, "gwf-test": pkg}) + + +def test_non_rightmost_inline_array_empty_shape_raises(): + arr = Array(name="vals", dtype="double", shape=[]) + extra = Integer(name="extra") + rec = Record(name="myrec", fields={"vals": arr, "extra": extra}) + block = Block(name="data", fields={"myrec": rec}) + pkg = Package(name="gwf-test", blocks={"data": block}) + gwf = Model(name="gwf-nam", blocks=None) + with pytest.raises(ValueError, match="rightmost"): + Dfns(components={"gwf-nam": gwf, "gwf-test": pkg}) + + +def test_rightmost_inline_array_empty_shape_valid(): + arr = Array(name="auxvals", dtype="double", shape=[]) + rec = Record(name="myrec", fields={"auxvals": arr}) + block = Block(name="data", fields={"myrec": rec}) + pkg = Package(name="gwf-test", blocks={"data": block}) + gwf = Model(name="gwf-nam", blocks=None) + Dfns(components={"gwf-nam": gwf, "gwf-test": pkg}) + + +def test_rightmost_inline_string_array_empty_shape_valid(): + arr = Array(name="auxname", dtype="string", shape=[]) + rec = Record(name="aux_rec", fields={"auxname": arr}) + block = Block(name="options", fields={"aux_rec": rec}) + pkg = Package(name="gwf-test", blocks={"options": block}) + gwf = Model(name="gwf-nam", blocks=None) + Dfns(components={"gwf-nam": gwf, "gwf-test": pkg}) + + +# ============================================================================= +# dfns.schema.v2 — DfnSpec schema version consistency +# ============================================================================= + + +def test_dfnspec_schema_version_consistency_raises(): + pkg1 = Package(name="gwf-chd", schema_version="2") + pkg2 = Package(name="gwf-wel", schema_version="3") + with pytest.raises(ValueError, match="schema_version"): + Dfns(components={"gwf-chd": pkg1, "gwf-wel": pkg2}) + + +def test_dfnspec_schema_version_consistency_null_ignored(): + pkg1 = Package(name="gwf-chd", schema_version="2") + pkg2 = Package(name="gwf-wel", schema_version=None) + spec = Dfns(components={"gwf-chd": pkg1, "gwf-wel": pkg2}) + assert spec.schema_version == "2" + + +# ============================================================================= +# dfns.schema.v2 — Bound-annotated shape elements +# ============================================================================= + + +def test_shape_element_bound_lt(): + arr, pkg, known = _make_ctx({"nrow"}) + _validate_shape_element("nrow", arr, pkg, None, known) + + +def test_shape_element_bound_lte(): + arr, pkg, known = _make_ctx({"ncol"}) + _validate_shape_element("<=ncol", arr, pkg, None, known) + + +def test_shape_element_bound_gte(): + arr, pkg, known = _make_ctx({"ncol"}) + _validate_shape_element(">=ncol", arr, pkg, None, known) + + +def test_shape_element_bound_unknown_dim_raises(): + arr, pkg, known = _make_ctx({"nlay"}) + with pytest.raises(ValueError, match="does not resolve"): + _validate_shape_element(" v1.Field: + base: dict = { + "name": "test_field", + "type": "keyword", + "block": "options", + "in_record": False, + "default": None, + "longname": None, + "description": None, + "optional": False, + "developmode": False, + "shape": None, + "valid": None, + "netcdf": False, + "tagged": False, + } + base.update(kwargs) + return v1.Field(**base) + + +def _v1_dfn(**kwargs) -> v1.Dfn: + base: dict = { + "schema_version": "1", + "name": "test-dfn", + "parent": None, + "blocks": None, + "advanced": False, + "multi": False, + "subcomponents": None, + } + base.update(kwargs) + return v1.Dfn(**base) + + +def test_map_sim_nam_returns_simulation(): + dfn = _v1_dfn(name="sim-nam") + result = map_v2(dfn) + assert isinstance(result, Simulation) + + +def test_map_model_returns_model(): + dfn = _v1_dfn(name="gwf-nam") + result = map_v2(dfn) + assert isinstance(result, Model) + + dfn = _v1_dfn(name="gwt-nam") + result = map_v2(dfn) + assert isinstance(result, Model) + + +def test_map_solution_package(): + dfn = _v1_dfn(name="sln-ims") + result = map_v2(dfn) + assert isinstance(result, Package) + assert result.subtype == "solution" + + +def test_map_exchange_package(): + dfn = _v1_dfn(name="exg-gwfgwf") + result = map_v2(dfn) + assert isinstance(result, Package) + assert result.subtype == "exchange" + + +def test_map_utility_package(): + dfn = _v1_dfn(name="utl-obs") + result = map_v2(dfn) + assert isinstance(result, Package) + assert result.subtype == "utility" + + +def test_map_advanced_package(): + dfn = _v1_dfn(name="gwf-sfr", advanced=True) + result = map_v2(dfn) + assert isinstance(result, Package) + assert result.subtype == "advanced" + + +def test_map_wrong_schema_version_raises(): + dfn = _v1_dfn(schema_version="2") + with pytest.raises(ValueError, match="schema version"): + map_v2(dfn) + + +def test_map_keyword_field_conversion(): + dfn = _v1_dfn( + name="gwf-chd", + blocks={ + "options": { + "save_flows": _v1_field( + name="save_flows", + type="keyword", + description="save calculated flows", + tagged=True, + in_record=False, + ), + } + }, + ) + component = map_v2(dfn) + assert component.blocks is not None + options = component.blocks["options"].fields + assert "save_flows" in options + field = options["save_flows"] + assert isinstance(field, Keyword) + assert field.name == "save_flows" + assert field.description == "save calculated flows" + + +def test_map_double_precision_field_conversion(): + dfn = _v1_dfn( + name="gwf-chd", + blocks={ + "options": { + "some_float": _v1_field( + name="some_float", + type="double precision", + description="a floating point value", + ), + } + }, + ) + component = map_v2(dfn) + options = component.blocks["options"].fields + field = options["some_float"] + assert isinstance(field, Double) + assert field.type == "double" + + +def test_map_record_conversion(): + dfn = _v1_dfn( + name="test-dfn", + blocks={ + "options": { + "auxrecord": _v1_field( + name="auxrecord", + type="record auxiliary auxname", + in_record=False, + ), + "auxiliary": _v1_field( + name="auxiliary", + type="keyword", + in_record=True, + ), + "auxname": _v1_field( + name="auxname", + type="string", + in_record=True, + ), + } + }, + ) + component = map_v2(dfn) + auxrecord = component.blocks["options"].fields["auxrecord"] + assert isinstance(auxrecord, Record) + assert "auxiliary" in auxrecord.children + assert "auxname" in auxrecord.children + assert isinstance(auxrecord.children["auxiliary"], Keyword) + assert isinstance(auxrecord.children["auxname"], String) + + +def test_map_recarray_conversion(): + dfn = _v1_dfn( + name="test-pkg", + blocks={ + "dimensions": { + "maxbound": _v1_field( + name="maxbound", + type="integer", + block="dimensions", + in_record=False, + ), + }, + "period": { + "stress_period_data": _v1_field( + name="stress_period_data", + type="recarray cellid q", + block="period", + shape="(maxbound)", + ), + "cellid": _v1_field( + name="cellid", + type="integer", + block="period", + shape="(ncelldim)", + in_record=True, + ), + "q": _v1_field( + name="q", + type="double precision", + block="period", + in_record=True, + ), + }, + }, + ) + component = map_v2(dfn) + period_fields = component.blocks["period"].fields + spd = period_fields["stress_period_data"] + assert isinstance(spd, List) + assert spd.shape == ["maxbound"] + assert isinstance(spd.item, Record) + assert "cellid" in spd.item.fields + assert "q" in spd.item.fields + + +def test_map_recarray_missing_shape_inferred_from_maxbound(): + """Period list with empty shape gets shape=["maxbound"] when maxbound dim exists.""" + dfn = _v1_dfn( + name="utl-spc", + blocks={ + "dimensions": { + "maxbound": _v1_field( + name="maxbound", + type="integer", + block="dimensions", + in_record=False, + ), + }, + "period": { + "spd": _v1_field( + name="spd", + type="recarray bndno spcsetting", + block="period", + shape="", # empty in v1 + ), + "bndno": _v1_field( + name="bndno", + type="integer", + block="period", + in_record=True, + ), + "spcsetting": _v1_field( + name="spcsetting", + type="keystring concentration", + block="period", + in_record=True, + ), + "concentration": _v1_field( + name="concentration", + type="double precision", + block="period", + tagged=True, + in_record=True, + ), + }, + }, + ) + component = map_v2(dfn) + period_fields = component.blocks["period"].fields + spd = period_fields["spd"] + assert isinstance(spd, List) + assert spd.shape == ["maxbound"] + + +def test_map_recarray_no_shape_no_maxbound(): + """Period list with no shape and no maxbound dim keeps shape=[].""" + dfn = _v1_dfn( + name="gwf-sfr", + advanced=True, + blocks={ + "period": { + "perioddata": _v1_field( + name="perioddata", + type="recarray ifno sfrsetting", + block="period", + shape="", + ), + "ifno": _v1_field( + name="ifno", + type="integer", + block="period", + in_record=True, + ), + "sfrsetting": _v1_field( + name="sfrsetting", + type="keystring status", + block="period", + in_record=True, + ), + "status": _v1_field( + name="status", + type="string", + block="period", + tagged=True, + in_record=True, + ), + }, + }, + ) + component = map_v2(dfn) + period_fields = component.blocks["period"].fields + lst = period_fields["perioddata"] + assert isinstance(lst, List) + assert lst.shape == [] diff --git a/autotest/test_dfn.py b/autotest/test_dfn.py deleted file mode 100644 index 72c58d66..00000000 --- a/autotest/test_dfn.py +++ /dev/null @@ -1,62 +0,0 @@ -from pathlib import Path - -import pytest - -from modflow_devtools.dfn import Dfn, get_dfns -from modflow_devtools.dfn2toml import convert -from modflow_devtools.markers import requires_pkg - -PROJ_ROOT = Path(__file__).parents[1] -DFN_DIR = PROJ_ROOT / "autotest" / "temp" / "dfn" -TOML_DIR = DFN_DIR / "toml" -VERSIONS = {1: DFN_DIR, 2: TOML_DIR} -MF6_OWNER = "MODFLOW-ORG" -MF6_REPO = "modflow6" -MF6_REF = "develop" - - -def pytest_generate_tests(metafunc): - if "dfn_name" in metafunc.fixturenames: - if not any(DFN_DIR.glob("*.dfn")): - get_dfns(MF6_OWNER, MF6_REPO, MF6_REF, DFN_DIR, verbose=True) - dfn_names = [ - dfn.stem for dfn in DFN_DIR.glob("*.dfn") if dfn.stem not in ["common", "flopy"] - ] - metafunc.parametrize("dfn_name", dfn_names, ids=dfn_names) - - if "toml_name" in metafunc.fixturenames: - # Only convert if TOML files don't exist yet (avoid repeated conversions) - dfn_paths = [p for p in DFN_DIR.glob("*.dfn") if p.stem not in ["common", "flopy"]] - if not TOML_DIR.exists() or not all( - (TOML_DIR / f"{dfn.stem}.toml").is_file() for dfn in dfn_paths - ): - convert(DFN_DIR, TOML_DIR) - # Verify all expected TOML files were created - assert all((TOML_DIR / f"{dfn.stem}.toml").is_file() for dfn in dfn_paths) - toml_names = [toml.stem for toml in TOML_DIR.glob("*.toml")] - metafunc.parametrize("toml_name", toml_names, ids=toml_names) - - -@requires_pkg("boltons") -def test_load_v1(dfn_name): - with ( - (DFN_DIR / "common.dfn").open() as common_file, - (DFN_DIR / f"{dfn_name}.dfn").open() as dfn_file, - ): - common, _ = Dfn._load_v1_flat(common_file) - dfn = Dfn.load(dfn_file, name=dfn_name, common=common) - assert any(dfn) - - -@requires_pkg("boltons") -def test_load_v2(toml_name): - with (TOML_DIR / f"{toml_name}.toml").open(mode="rb") as toml_file: - toml = Dfn.load(toml_file, name=toml_name, version=2) - assert any(toml) - - -@requires_pkg("boltons") -@pytest.mark.parametrize("version", list(VERSIONS.keys())) -def test_load_all(version): - dfns = Dfn.load_all(VERSIONS[version], version=version) - assert any(dfns) diff --git a/autotest/test_dfnmap.py b/autotest/test_dfnmap.py new file mode 100644 index 00000000..840a87c4 --- /dev/null +++ b/autotest/test_dfnmap.py @@ -0,0 +1,85 @@ +import json +import tomllib +from pathlib import Path + +import pytest +import yaml + +from modflow_devtools.dfn import Dfn, fetch_dfns +from modflow_devtools.dfnmap import migrate +from modflow_devtools.markers import requires_pkg + +FORMATS = ["yaml", "toml", "json"] +MF6_OWNER = "MODFLOW-ORG" +MF6_REPO = "modflow6" +MF6_REF = "develop" + + +def _load(path: Path, fmt: str) -> dict: + if fmt == "toml": + with path.open("rb") as f: + return tomllib.load(f) + elif fmt == "json": + with path.open() as f: + return json.load(f) + else: + with path.open() as f: + return yaml.safe_load(f) + + +@pytest.fixture(scope="module") +def dfn_dir(module_tmpdir): + pytest.importorskip("boltons") + path = module_tmpdir / "dfn" + path.mkdir() + fetch_dfns(MF6_OWNER, MF6_REPO, MF6_REF, path, verbose=True) + return path + + +@pytest.fixture(scope="module", params=FORMATS) +def converted_v1_1(request, dfn_dir, module_tmpdir): + fmt = request.param + out = module_tmpdir / f"v1.1-{fmt}" + migrate(dfn_dir, out, schema_version="1.1", fmt=fmt) + return out, fmt + + +@pytest.fixture(scope="module", params=FORMATS) +def converted_v2(request, dfn_dir, module_tmpdir): + fmt = request.param + out = module_tmpdir / f"v2-{fmt}" + migrate(dfn_dir, out, schema_version="2", fmt=fmt) + return out, fmt + + +@requires_pkg("boltons") +def test_convert_v1_1(converted_v1_1): + out, fmt = converted_v1_1 + files = list(out.glob(f"*.{fmt}")) + assert files + for p in files: + data = _load(p, fmt) + assert data["name"] == p.stem + assert data["schema_version"] == "1.1" + + +@requires_pkg("boltons") +def test_convert_v2(converted_v2): + out, fmt = converted_v2 + files = list(out.glob(f"*.{fmt}")) + assert files + for p in files: + data = _load(p, fmt) + assert data["name"] == p.stem + assert data["schema_version"] == "2" + + +@requires_pkg("boltons") +def test_roundtrip(converted_v2): + """Verify Dfn.load can read v2-schema files in any format.""" + out, fmt = converted_v2 + mode = "rb" if fmt == "toml" else "r" + for p in out.glob(f"*.{fmt}"): + with p.open(mode) as f: + dfn = Dfn.load(f, name=p.stem, version=fmt) + assert any(dfn) diff --git a/autotest/test_dfns.py b/autotest/test_dfns.py deleted file mode 100644 index af4b8310..00000000 --- a/autotest/test_dfns.py +++ /dev/null @@ -1,616 +0,0 @@ -from dataclasses import asdict -from pathlib import Path - -import pytest -from packaging.version import Version - -from modflow_devtools.dfns import Dfn, _load_common, load, load_flat -from modflow_devtools.dfns.dfn2toml import convert, is_valid -from modflow_devtools.dfns.fetch import fetch_dfns -from modflow_devtools.dfns.schema.v1 import FieldV1 -from modflow_devtools.dfns.schema.v2 import FieldV2 -from modflow_devtools.markers import requires_pkg - -PROJ_ROOT = Path(__file__).parents[1] -DFN_DIR = PROJ_ROOT / "autotest" / "temp" / "dfns" -TOML_DIR = DFN_DIR / "toml" -SPEC_DIRS = {1: DFN_DIR, 2: TOML_DIR} -MF6_OWNER = "MODFLOW-ORG" -MF6_REPO = "modflow6" -MF6_REF = "develop" -EMPTY_DFNS = {"exg-gwfgwe", "exg-gwfgwt", "exg-gwfprt", "sln-ems"} - - -def pytest_generate_tests(metafunc): - if "dfn_name" in metafunc.fixturenames: - if not any(DFN_DIR.glob("*.dfn")): - fetch_dfns(MF6_OWNER, MF6_REPO, MF6_REF, DFN_DIR, verbose=True) - dfn_names = [ - dfn.stem for dfn in DFN_DIR.glob("*.dfn") if dfn.stem not in ["common", "flopy"] - ] - metafunc.parametrize("dfn_name", dfn_names, ids=dfn_names) - - if "toml_name" in metafunc.fixturenames: - # Only convert if TOML files don't exist yet (avoid repeated conversions) - dfn_paths = [p for p in DFN_DIR.glob("*.dfn") if p.stem not in ["common", "flopy"]] - if not TOML_DIR.exists() or not all( - (TOML_DIR / f"{dfn.stem}.toml").is_file() for dfn in dfn_paths - ): - convert(DFN_DIR, TOML_DIR) - # Verify all expected TOML files were created - assert all((TOML_DIR / f"{dfn.stem}.toml").is_file() for dfn in dfn_paths) - toml_names = [toml.stem for toml in TOML_DIR.glob("*.toml")] - metafunc.parametrize("toml_name", toml_names, ids=toml_names) - - -@requires_pkg("boltons") -def test_load_v1(dfn_name): - with ( - (DFN_DIR / "common.dfn").open() as common_file, - (DFN_DIR / f"{dfn_name}.dfn").open() as dfn_file, - ): - common = _load_common(common_file) - dfn = load(dfn_file, name=dfn_name, format="dfn", common=common) - assert any(dfn.fields) == (dfn.name not in EMPTY_DFNS) - - -@requires_pkg("boltons") -def test_load_v2(toml_name): - with (TOML_DIR / f"{toml_name}.toml").open(mode="rb") as toml_file: - dfn = load(toml_file, name=toml_name, format="toml") - assert any(dfn.fields) == (dfn.name not in EMPTY_DFNS) - - -@requires_pkg("boltons") -@pytest.mark.parametrize("schema_version", list(SPEC_DIRS.keys())) -def test_load_all(schema_version): - dfns = load_flat(path=SPEC_DIRS[schema_version]) - for dfn in dfns.values(): - assert any(dfn.fields) == (dfn.name not in EMPTY_DFNS) - - -@requires_pkg("boltons", "tomli") -def test_convert(function_tmpdir): - import tomli - - convert(DFN_DIR, function_tmpdir) - - assert (function_tmpdir / "sim-nam.toml").exists() - assert (function_tmpdir / "gwf-nam.toml").exists() - - with (function_tmpdir / "sim-nam.toml").open("rb") as f: - sim_data = tomli.load(f) - assert sim_data["name"] == "sim-nam" - assert sim_data["schema_version"] == "2" - assert "parent" not in sim_data - - with (function_tmpdir / "gwf-nam.toml").open("rb") as f: - gwf_data = tomli.load(f) - assert gwf_data["name"] == "gwf-nam" - assert gwf_data["parent"] == "sim-nam" - assert gwf_data["schema_version"] == "2" - - dfns = load_flat(function_tmpdir) - roots = [] - for dfn in dfns.values(): - if dfn.parent: - assert dfn.parent in dfns - else: - roots.append(dfn.name) - assert len(roots) == 1 - root = dfns[roots[0]] - assert root.name == "sim-nam" - - models = root.children or {} - for mdl in models: - assert models[mdl].name == mdl - assert models[mdl].parent == "sim-nam" - - if gwf := models.get("gwf-nam", None): - pkgs = gwf.children or {} - pkgs = {k: v for k, v in pkgs.items() if k.startswith("gwf-") and isinstance(v, dict)} - assert len(pkgs) > 0 - if dis := pkgs.get("gwf-dis", None): - assert dis.name == "gwf-dis" - assert dis.parent == "gwf" - assert "options" in (dis.blocks or {}) - assert "dimensions" in (dis.blocks or {}) - - -def test_dfn_from_dict_ignores_extra_keys(): - d = { - "schema_version": Version("2"), - "name": "test-dfn", - "extra_key": "should be allowed", - "another_extra": 123, - } - dfn = Dfn.from_dict(d) - assert dfn.name == "test-dfn" - assert dfn.schema_version == Version("2") - - -def test_dfn_from_dict_strict_mode(): - d = { - "schema_version": Version("2"), - "name": "test-dfn", - "extra_key": "should cause error", - } - with pytest.raises(ValueError, match="Unrecognized keys in DFN data"): - Dfn.from_dict(d, strict=True) - - -def test_dfn_from_dict_strict_mode_nested(): - d = { - "schema_version": Version("2"), - "name": "test-dfn", - "blocks": { - "options": { - "test_field": { - "name": "test_field", - "type": "keyword", - "extra_key": "should cause error", - }, - }, - }, - } - with pytest.raises(ValueError, match="Unrecognized keys in field data"): - Dfn.from_dict(d, strict=True) - - -def test_dfn_from_dict_roundtrip(): - original = Dfn( - schema_version=Version("2"), - name="gwf-nam", - parent="sim-nam", - advanced=False, - multi=True, - blocks={"options": {}}, - ) - d = asdict(original) - reconstructed = Dfn.from_dict(d) - assert reconstructed.name == original.name - assert reconstructed.schema_version == original.schema_version - assert reconstructed.parent == original.parent - assert reconstructed.advanced == original.advanced - assert reconstructed.multi == original.multi - assert reconstructed.blocks == original.blocks - - -def test_fieldv1_from_dict_ignores_extra_keys(): - d = { - "name": "test_field", - "type": "keyword", - "extra_key": "should be allowed", - "another_extra": 123, - } - field = FieldV1.from_dict(d) - assert field.name == "test_field" - assert field.type == "keyword" - - -def test_fieldv1_from_dict_strict_mode(): - d = { - "name": "test_field", - "type": "keyword", - "extra_key": "should cause error", - } - with pytest.raises(ValueError, match="Unrecognized keys in field data"): - FieldV1.from_dict(d, strict=True) - - -def test_fieldv1_from_dict_roundtrip(): - original = FieldV1( - name="maxbound", - type="integer", - block="dimensions", - description="maximum number of cells", - tagged=True, - ) - d = asdict(original) - reconstructed = FieldV1.from_dict(d) - assert reconstructed.name == original.name - assert reconstructed.type == original.type - assert reconstructed.block == original.block - assert reconstructed.description == original.description - assert reconstructed.tagged == original.tagged - - -def test_fieldv2_from_dict_ignores_extra_keys(): - d = { - "name": "test_field", - "type": "keyword", - "extra_key": "should be allowed", - "another_extra": 123, - } - field = FieldV2.from_dict(d) - assert field.name == "test_field" - assert field.type == "keyword" - - -def test_fieldv2_from_dict_strict_mode(): - d = { - "name": "test_field", - "type": "keyword", - "extra_key": "should cause error", - } - with pytest.raises(ValueError, match="Unrecognized keys in field data"): - FieldV2.from_dict(d, strict=True) - - -def test_fieldv2_from_dict_roundtrip(): - original = FieldV2( - name="nper", - type="integer", - block="dimensions", - description="number of stress periods", - optional=False, - ) - d = asdict(original) - reconstructed = FieldV2.from_dict(d) - assert reconstructed.name == original.name - assert reconstructed.type == original.type - assert reconstructed.block == original.block - assert reconstructed.description == original.description - assert reconstructed.optional == original.optional - - -def test_dfn_from_dict_with_v1_field_dicts(): - d = { - "schema_version": Version("1"), - "name": "test-dfn", - "blocks": { - "options": { - "save_flows": { - "name": "save_flows", - "type": "keyword", - "tagged": True, - "in_record": False, - }, - }, - }, - } - dfn = Dfn.from_dict(d) - assert dfn.schema_version == Version("1") - assert dfn.name == "test-dfn" - assert dfn.blocks is not None - assert "options" in dfn.blocks - assert "save_flows" in dfn.blocks["options"] - - field = dfn.blocks["options"]["save_flows"] - assert isinstance(field, FieldV1) - assert field.name == "save_flows" - assert field.type == "keyword" - assert field.tagged is True - assert field.in_record is False - - -def test_dfn_from_dict_with_v2_field_dicts(): - d = { - "schema_version": Version("2"), - "name": "test-dfn", - "blocks": { - "dimensions": { - "nper": { - "name": "nper", - "type": "integer", - "optional": False, - }, - }, - }, - } - dfn = Dfn.from_dict(d) - assert dfn.schema_version == Version("2") - assert dfn.name == "test-dfn" - assert dfn.blocks is not None - assert "dimensions" in dfn.blocks - assert "nper" in dfn.blocks["dimensions"] - - field = dfn.blocks["dimensions"]["nper"] - assert isinstance(field, FieldV2) - assert field.name == "nper" - assert field.type == "integer" - assert field.optional is False - - -def test_dfn_from_dict_defaults_to_v2_fields(): - d = { - "name": "test-dfn", - "blocks": { - "options": { - "some_field": { - "name": "some_field", - "type": "keyword", - }, - }, - }, - } - dfn = Dfn.from_dict(d) - assert dfn.blocks is not None - field = dfn.blocks["options"]["some_field"] - assert isinstance(field, FieldV2) - assert dfn.schema_version == Version("2") - - -def test_dfn_from_dict_with_already_deserialized_fields(): - field = FieldV2(name="test", type="keyword") - d = { - "schema_version": Version("2"), - "name": "test-dfn", - "blocks": { - "options": { - "test": field, - }, - }, - } - dfn = Dfn.from_dict(d) - assert dfn.blocks is not None - assert dfn.blocks["options"]["test"] is field - - -@requires_pkg("boltons") -def test_validate_directory(): - """Test validation on a directory of DFN files.""" - assert is_valid(DFN_DIR) - - -@requires_pkg("boltons") -def test_validate_single_file(dfn_name): - """Test validation on a single DFN file.""" - if dfn_name == "common": - pytest.skip("common.dfn is handled separately") - assert is_valid(DFN_DIR / f"{dfn_name}.dfn") - - -@requires_pkg("boltons") -def test_validate_common_file(): - """Test validation on common.dfn.""" - assert is_valid(DFN_DIR / "common.dfn") - - -@requires_pkg("boltons") -def test_validate_invalid_file(function_tmpdir): - """Test validation on an invalid DFN file.""" - invalid_dfn = function_tmpdir / "invalid.dfn" - invalid_dfn.write_text("invalid content") - assert not is_valid(invalid_dfn) - - -@requires_pkg("boltons") -def test_validate_nonexistent_file(function_tmpdir): - """Test validation on a nonexistent file.""" - nonexistent = function_tmpdir / "nonexistent.dfn" - assert not is_valid(nonexistent) - - -def test_fieldv1_to_fieldv2_conversion(): - """Test that FieldV1 instances are properly converted to FieldV2.""" - from modflow_devtools.dfns import map - - dfn_v1 = Dfn( - schema_version=Version("1"), - name="test-dfn", - blocks={ - "options": { - "save_flows": FieldV1( - name="save_flows", - type="keyword", - block="options", - description="save calculated flows", - tagged=True, - in_record=False, - reader="urword", - ), - "some_float": FieldV1( - name="some_float", - type="double precision", - block="options", - description="a floating point value", - ), - } - }, - ) - - dfn_v2 = map(dfn_v1, schema_version="2") - assert dfn_v2.schema_version == Version("2") - assert dfn_v2.blocks is not None - assert "options" in dfn_v2.blocks - assert "save_flows" in dfn_v2.blocks["options"] - - save_flows = dfn_v2.blocks["options"]["save_flows"] - assert isinstance(save_flows, FieldV2) - assert save_flows.name == "save_flows" - assert save_flows.type == "keyword" - assert save_flows.block == "options" - assert save_flows.description == "save calculated flows" - assert hasattr(save_flows, "tagged") - assert not hasattr(save_flows, "in_record") - assert not hasattr(save_flows, "reader") - - some_float = dfn_v2.blocks["options"]["some_float"] - assert isinstance(some_float, FieldV2) - assert some_float.name == "some_float" - assert some_float.type == "double" - assert some_float.block == "options" - assert some_float.description == "a floating point value" - - -def test_fieldv1_to_fieldv2_conversion_with_children(): - """Test that FieldV1 with nested children are properly converted to FieldV2.""" - from modflow_devtools.dfns import map - - # Create nested fields for a record - child_field_v1 = FieldV1( - name="cellid", - type="integer", - block="period", - description="cell identifier", - in_record=True, - tagged=False, - ) - - parent_field_v1 = FieldV1( - name="stress_period_data", - type="recarray cellid", - block="period", - description="stress period data", - in_record=False, - ) - - dfn_v1 = Dfn( - schema_version=Version("1"), - name="test-dfn", - blocks={ - "period": { - "stress_period_data": parent_field_v1, - "cellid": child_field_v1, - } - }, - ) - - # Convert to v2 - dfn_v2 = map(dfn_v1, schema_version="2") - - # Check that all fields are FieldV2 instances - assert dfn_v2.blocks is not None - for block_name, block_fields in dfn_v2.blocks.items(): - for field_name, field in block_fields.items(): - assert isinstance(field, FieldV2) - # Check nested children too - if field.children: - for child_name, child_field in field.children.items(): - assert isinstance(child_field, FieldV2) - - -def test_period_block_conversion(): - """Test period block recarray conversion to individual arrays.""" - from modflow_devtools.dfns import map - - dfn_v1 = Dfn( - schema_version=Version("1"), - name="test-pkg", - blocks={ - "period": { - "stress_period_data": FieldV1( - name="stress_period_data", - type="recarray cellid q", - block="period", - description="stress period data", - ), - "cellid": FieldV1( - name="cellid", - type="integer", - block="period", - shape="(ncelldim)", - in_record=True, - ), - "q": FieldV1( - name="q", - type="double precision", - block="period", - shape="(maxbound)", - in_record=True, - ), - } - }, - ) - - dfn_v2 = map(dfn_v1, schema_version="2") - - period_block = dfn_v2.blocks["period"] - assert "cellid" not in period_block # cellid removed - assert "q" in period_block - assert isinstance(period_block["q"], FieldV2) - # Shape should be transformed: maxbound removed, nper and nnodes added - assert "nper" in period_block["q"].shape - assert "nnodes" in period_block["q"].shape - assert "maxbound" not in period_block["q"].shape - - -def test_record_type_conversion(): - """Test record type with multiple scalar fields.""" - from modflow_devtools.dfns import map - - dfn_v1 = Dfn( - schema_version=Version("1"), - name="test-dfn", - blocks={ - "options": { - "auxrecord": FieldV1( - name="auxrecord", - type="record auxiliary auxname", - block="options", - in_record=False, - ), - "auxiliary": FieldV1( - name="auxiliary", - type="keyword", - block="options", - in_record=True, - ), - "auxname": FieldV1( - name="auxname", - type="string", - block="options", - in_record=True, - ), - } - }, - ) - - dfn_v2 = map(dfn_v1, schema_version="2") - - auxrecord = dfn_v2.blocks["options"]["auxrecord"] - assert isinstance(auxrecord, FieldV2) - assert auxrecord.type == "record" - assert auxrecord.children is not None - assert "auxiliary" in auxrecord.children - assert "auxname" in auxrecord.children - assert isinstance(auxrecord.children["auxiliary"], FieldV2) - assert isinstance(auxrecord.children["auxname"], FieldV2) - - -def test_keystring_type_conversion(): - """Test keystring type conversion.""" - from modflow_devtools.dfns import map - - dfn_v1 = Dfn( - schema_version=Version("1"), - name="test-dfn", - blocks={ - "options": { - "obs_filerecord": FieldV1( - name="obs_filerecord", - type="record obs6 filein obs6_filename", - block="options", - tagged=True, - ), - "obs6": FieldV1( - name="obs6", - type="keyword", - block="options", - in_record=True, - ), - "filein": FieldV1( - name="filein", - type="keyword", - block="options", - in_record=True, - ), - "obs6_filename": FieldV1( - name="obs6_filename", - type="string", - block="options", - in_record=True, - preserve_case=True, - ), - } - }, - ) - - dfn_v2 = map(dfn_v1, schema_version="2") - - obs_rec = dfn_v2.blocks["options"]["obs_filerecord"] - assert isinstance(obs_rec, FieldV2) - assert obs_rec.type == "record" - assert obs_rec.children is not None - assert all(isinstance(child, FieldV2) for child in obs_rec.children.values()) diff --git a/autotest/test_dfns_registry.py b/autotest/test_dfns_registry.py deleted file mode 100644 index f9c3a7e2..00000000 --- a/autotest/test_dfns_registry.py +++ /dev/null @@ -1,853 +0,0 @@ -""" -Tests for the DFNs API registry infrastructure. - -Tests can be configured via environment variables (loaded from .env file). -""" - -from __future__ import annotations - -import os -from pathlib import Path -from unittest.mock import patch - -import pytest -from flaky import flaky -from packaging.version import Version - -from modflow_devtools.dfns.fetch import fetch_dfns -from modflow_devtools.markers import requires_pkg - -PROJ_ROOT = Path(__file__).parents[1] -DFN_DIR = PROJ_ROOT / "autotest" / "temp" / "dfn" - -# Test configuration (loaded from .env file via pytest-dotenv plugin) -TEST_DFN_REPO = os.getenv("TEST_DFNS_REPO", "MODFLOW-ORG/modflow6") -TEST_DFN_REF = os.getenv("TEST_DFNS_REF", "develop") -TEST_DFN_SOURCE = os.getenv("TEST_DFNS_SOURCE", "modflow6") - -# For fetching DFN files directly (legacy tests) -MF6_OWNER = TEST_DFN_REPO.split("/")[0] -MF6_REPO = TEST_DFN_REPO.split("/")[1] -MF6_REF = TEST_DFN_REF - -# Path to cloned MF6 repository for autodiscovery (set by CI or local testing) -# If set, use this instead of fetching individual DFN files -TEST_DFN_PATH = os.getenv("TEST_DFN_PATH") - - -@pytest.fixture(scope="module") -def dfn_dir(): - """ - Provide path to DFN files for testing. - - Priority: - 1. If TEST_DFN_PATH is set, use the DFN directory from a cloned MF6 repo (autodiscovery) - 2. Otherwise, fetch individual DFN files to temp directory (legacy behavior) - - The autodiscovery approach is preferred in CI to avoid needing registry files. - """ - # If TEST_DFN_PATH is set, use it (points to cloned MF6 DFN directory) - if TEST_DFN_PATH: - dfn_path = Path(TEST_DFN_PATH).expanduser().resolve() - if not dfn_path.exists(): - raise ValueError(f"TEST_DFN_PATH={TEST_DFN_PATH} does not exist") - if not any(dfn_path.glob("*.dfn")): - raise ValueError(f"No DFN files found in TEST_DFN_PATH={TEST_DFN_PATH}") - return dfn_path - - # Fall back to fetching individual DFN files (legacy behavior for local development) - if not any(DFN_DIR.glob("*.dfn")): - fetch_dfns(MF6_OWNER, MF6_REPO, MF6_REF, DFN_DIR, verbose=True) - return DFN_DIR - - -@requires_pkg("boltons") -class TestDfnSpec: - """Tests for the DfnSpec class.""" - - def test_load_from_directory(self, dfn_dir): - """Test loading a DfnSpec from a directory of DFN files.""" - from modflow_devtools.dfns import DfnSpec - - spec = DfnSpec.load(dfn_dir) - - # Should have loaded and mapped to v2 - assert spec.schema_version == Version("2") - assert spec.root is not None - assert spec.root.name == "sim-nam" - - def test_load_with_explicit_schema_version(self, dfn_dir): - """Test loading with explicit schema version.""" - from modflow_devtools.dfns import DfnSpec - - spec = DfnSpec.load(dfn_dir, schema_version="2") - - assert spec.schema_version == Version("2") - - def test_mapping_protocol(self, dfn_dir): - """Test that DfnSpec implements the Mapping protocol.""" - from modflow_devtools.dfns import DfnSpec - - spec = DfnSpec.load(dfn_dir) - - # Test __len__ - assert len(spec) > 100 # Should have many components - - # Test __iter__ - names = list(spec) - assert "sim-nam" in names - assert "gwf-nam" in names - assert "gwf-chd" in names - - # Test __getitem__ - gwf_chd = spec["gwf-chd"] - assert gwf_chd.name == "gwf-chd" - assert gwf_chd.parent == "gwf-nam" - - # Test __contains__ - assert "gwf-chd" in spec - assert "nonexistent" not in spec - - # Test keys(), values(), items() - assert "gwf-wel" in spec.keys() - assert any(d.name == "gwf-wel" for d in spec.values()) - assert any(n == "gwf-wel" for n, d in spec.items()) - - def test_getitem_raises_keyerror(self, dfn_dir): - """Test that __getitem__ raises KeyError for missing components.""" - from modflow_devtools.dfns import DfnSpec - - spec = DfnSpec.load(dfn_dir) - - with pytest.raises(KeyError, match="nonexistent"): - _ = spec["nonexistent"] - - def test_hierarchical_access(self, dfn_dir): - """Test accessing components through the hierarchical tree.""" - from modflow_devtools.dfns import DfnSpec - - spec = DfnSpec.load(dfn_dir) - - # Root should be sim-nam - assert spec.root.name == "sim-nam" - - # Root should have children - assert spec.root.children is not None - assert "gwf-nam" in spec.root.children - - # gwf-nam should have its own children - gwf_nam = spec.root.children["gwf-nam"] - assert gwf_nam.children is not None - assert "gwf-chd" in gwf_nam.children - - def test_load_empty_directory_raises(self, tmp_path): - """Test that loading from empty directory raises ValueError.""" - from modflow_devtools.dfns import DfnSpec - - with pytest.raises(ValueError, match="No DFN files found"): - DfnSpec.load(tmp_path) - - -@requires_pkg("pydantic") -class TestBootstrapConfig: - """Tests for bootstrap configuration schemas.""" - - def test_source_config_defaults(self): - """Test SourceConfig default values.""" - from modflow_devtools.dfns.registry import SourceConfig - - config = SourceConfig(repo="owner/repo") - - assert config.repo == "owner/repo" - assert config.dfn_path == "doc/mf6io/mf6ivar/dfn" - assert config.registry_path == ".registry/dfns.toml" - assert config.refs == [] - - def test_source_config_custom_values(self): - """Test SourceConfig with custom values.""" - from modflow_devtools.dfns.registry import SourceConfig - - config = SourceConfig( - repo="custom/repo", - dfn_path="custom/path", - registry_path="custom/registry.toml", - refs=["main", "v1.0"], - ) - - assert config.repo == "custom/repo" - assert config.dfn_path == "custom/path" - assert config.registry_path == "custom/registry.toml" - assert config.refs == ["main", "v1.0"] - - def test_bootstrap_config_load(self, tmp_path): - """Test loading BootstrapConfig from TOML file.""" - from modflow_devtools.dfns.registry import BootstrapConfig - - config_file = tmp_path / "dfns.toml" - config_file.write_text(""" -[sources.test] -repo = "test/repo" -refs = ["main"] -""") - - config = BootstrapConfig.load(config_file) - - assert "test" in config.sources - assert config.sources["test"].repo == "test/repo" - assert config.sources["test"].refs == ["main"] - - def test_bootstrap_config_load_nonexistent(self, tmp_path): - """Test loading from nonexistent file returns empty config.""" - from modflow_devtools.dfns.registry import BootstrapConfig - - config = BootstrapConfig.load(tmp_path / "nonexistent.toml") - - assert config.sources == {} - - def test_bootstrap_config_merge(self): - """Test merging two bootstrap configs.""" - from modflow_devtools.dfns.registry import BootstrapConfig, SourceConfig - - base = BootstrapConfig( - sources={ - "source1": SourceConfig(repo="base/source1", refs=["v1"]), - "source2": SourceConfig(repo="base/source2"), - } - ) - overlay = BootstrapConfig( - sources={ - "source1": SourceConfig(repo="overlay/source1", refs=["v2"]), - "source3": SourceConfig(repo="overlay/source3"), - } - ) - - merged = BootstrapConfig.merge(base, overlay) - - # overlay overrides base for source1 - assert merged.sources["source1"].repo == "overlay/source1" - assert merged.sources["source1"].refs == ["v2"] - # source2 from base preserved - assert merged.sources["source2"].repo == "base/source2" - # source3 from overlay added - assert merged.sources["source3"].repo == "overlay/source3" - - def test_get_bootstrap_config(self): - """Test loading bundled bootstrap config.""" - from modflow_devtools.dfns.registry import get_bootstrap_config - - config = get_bootstrap_config() - - assert "modflow6" in config.sources - assert config.sources["modflow6"].repo == "MODFLOW-ORG/modflow6" - - -@requires_pkg("pydantic") -class TestRegistryMeta: - """Tests for registry metadata schemas.""" - - def test_dfn_registry_meta_load(self, tmp_path): - """Test loading DfnRegistryMeta from TOML file.""" - from modflow_devtools.dfns.registry import DfnRegistryMeta - - registry_file = tmp_path / "dfns.toml" - registry_file.write_text(""" -schema_version = "1.0" - -[metadata] -ref = "6.6.0" - -[files."gwf-chd.dfn"] -hash = "sha256:abc123" - -[files."gwf-wel.dfn"] -hash = "sha256:def456" -""") - - meta = DfnRegistryMeta.load(registry_file) - - assert meta.schema_version == "1.0" - assert meta.ref == "6.6.0" - assert len(meta.files) == 2 - assert meta.files["gwf-chd.dfn"].hash == "sha256:abc123" - assert meta.files["gwf-wel.dfn"].hash == "sha256:def456" - - def test_dfn_registry_meta_save(self, tmp_path): - """Test saving DfnRegistryMeta to TOML file.""" - import tomli - - from modflow_devtools.dfns.registry import DfnRegistryFile, DfnRegistryMeta - - meta = DfnRegistryMeta( - schema_version="1.0", - ref="test-ref", - files={ - "test.dfn": DfnRegistryFile(hash="sha256:abc123"), - }, - ) - - output_path = tmp_path / "output.toml" - meta.save(output_path) - - assert output_path.exists() - - with output_path.open("rb") as f: - data = tomli.load(f) - - assert data["schema_version"] == "1.0" - assert data["metadata"]["ref"] == "test-ref" - assert data["files"]["test.dfn"]["hash"] == "sha256:abc123" - - -@requires_pkg("boltons", "pydantic") -class TestLocalDfnRegistry: - """Tests for LocalDfnRegistry class.""" - - def test_init(self, dfn_dir): - """Test LocalDfnRegistry initialization.""" - from modflow_devtools.dfns import LocalDfnRegistry - - registry = LocalDfnRegistry(path=dfn_dir, ref="local") - - assert registry.source == "modflow6" - assert registry.ref == "local" - assert registry.path == dfn_dir.resolve() - - def test_spec_property(self, dfn_dir): - """Test accessing spec through registry.""" - from modflow_devtools.dfns import LocalDfnRegistry - - registry = LocalDfnRegistry(path=dfn_dir) - - spec = registry.spec - - assert spec.schema_version == Version("2") - assert len(spec) > 100 - - def test_get_dfn(self, dfn_dir): - """Test getting a DFN by name.""" - from modflow_devtools.dfns import LocalDfnRegistry - - registry = LocalDfnRegistry(path=dfn_dir) - - dfn = registry.get_dfn("gwf-chd") - - assert dfn.name == "gwf-chd" - assert dfn.parent == "gwf-nam" - - def test_get_dfn_path(self, dfn_dir): - """Test getting file path for a component.""" - from modflow_devtools.dfns import LocalDfnRegistry - - registry = LocalDfnRegistry(path=dfn_dir) - - path = registry.get_dfn_path("gwf-chd") - - assert path.exists() - assert path.name == "gwf-chd.dfn" - - def test_get_dfn_path_not_found(self, dfn_dir): - """Test getting path for nonexistent component raises FileNotFoundError.""" - from modflow_devtools.dfns import LocalDfnRegistry - - registry = LocalDfnRegistry(path=dfn_dir) - - with pytest.raises(FileNotFoundError, match="nonexistent"): - registry.get_dfn_path("nonexistent") - - def test_schema_version_property(self, dfn_dir): - """Test schema_version property.""" - from modflow_devtools.dfns import LocalDfnRegistry - - registry = LocalDfnRegistry(path=dfn_dir) - - assert registry.schema_version == Version("2") - - def test_components_property(self, dfn_dir): - """Test components property returns flat dict.""" - from modflow_devtools.dfns import LocalDfnRegistry - - registry = LocalDfnRegistry(path=dfn_dir) - - components = registry.components - - assert isinstance(components, dict) - assert "gwf-chd" in components - assert components["gwf-chd"].name == "gwf-chd" - - -@requires_pkg("pydantic") -class TestCacheUtilities: - """Tests for cache and config utilities.""" - - def test_get_cache_dir(self): - """Test getting cache directory path.""" - from modflow_devtools.dfns.registry import get_cache_dir - - cache_dir = get_cache_dir("dfn") - - assert cache_dir.name == "dfn" - assert "modflow-devtools" in str(cache_dir) - - def test_get_user_config_path(self): - """Test getting user config path.""" - from modflow_devtools.dfns.registry import get_user_config_path - - config_path = get_user_config_path("dfn") - - assert config_path.name == "dfns.toml" - assert "modflow-devtools" in str(config_path) - - def test_get_cache_dir_custom_subdir(self): - """Test cache dir with custom subdirectory.""" - from modflow_devtools.dfns.registry import get_cache_dir - - cache_dir = get_cache_dir("custom") - - assert cache_dir.name == "custom" - - -@requires_pkg("tomli", "tomli_w") -class TestMakeRegistry: - """Tests for the registry generation tool.""" - - def test_compute_file_hash(self, tmp_path): - """Test computing file hash.""" - from modflow_devtools.dfns.make_registry import compute_file_hash - - test_file = tmp_path / "test.txt" - test_file.write_text("hello world") - - hash_value = compute_file_hash(test_file) - - assert hash_value.startswith("sha256:") - # Known hash for "hello world" - assert "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9" in hash_value - - def test_scan_dfn_directory(self, dfn_dir): - """Test scanning a DFN directory.""" - from modflow_devtools.dfns.make_registry import scan_dfn_directory - - files = scan_dfn_directory(dfn_dir) - - assert len(files) > 100 - assert "gwf-chd.dfn" in files - assert "common.dfn" in files - assert all(h.startswith("sha256:") for h in files.values()) - - def test_generate_registry(self, dfn_dir, tmp_path): - """Test generating a registry file.""" - import tomli - - from modflow_devtools.dfns.make_registry import generate_registry - - output_path = tmp_path / "dfns.toml" - - generate_registry( - dfn_path=dfn_dir, - output_path=output_path, - ref="test-ref", - ) - - assert output_path.exists() - - with output_path.open("rb") as f: - data = tomli.load(f) - - assert data["schema_version"] == "1.0" - assert "generated_at" in data - assert data["metadata"]["ref"] == "test-ref" - assert "gwf-chd.dfn" in data["files"] - - def test_generate_registry_empty_dir(self, tmp_path): - """Test generating registry from empty directory raises ValueError.""" - from modflow_devtools.dfns.make_registry import generate_registry - - with pytest.raises(ValueError, match="No DFN files found"): - generate_registry( - dfn_path=tmp_path, - output_path=tmp_path / "dfns.toml", - ) - - def test_cli_help(self): - """Test CLI help output.""" - from modflow_devtools.dfns.make_registry import main - - # --help should exit with 0 - with pytest.raises(SystemExit) as exc_info: - main(["--help"]) - assert exc_info.value.code == 0 - - def test_cli_generate(self, dfn_dir, tmp_path): - """Test CLI generate command.""" - from modflow_devtools.dfns.make_registry import main - - output_path = tmp_path / "dfns.toml" - - result = main( - [ - "--dfn-path", - str(dfn_dir), - "--output", - str(output_path), - "--ref", - "test-ref", - ] - ) - - assert result == 0 - assert output_path.exists() - - -@requires_pkg("pydantic") -class TestCLI: - """Tests for the DFNs CLI.""" - - def test_main_help(self): - """Test CLI help output.""" - from modflow_devtools.dfns.__main__ import main - - result = main([]) - assert result == 0 - - def test_info_command(self): - """Test info command.""" - from modflow_devtools.dfns.__main__ import main - - result = main(["info"]) - assert result == 0 - - def test_clean_command_no_cache(self, tmp_path): - """Test clean command when cache doesn't exist.""" - from modflow_devtools.dfns.__main__ import main - - # Patch get_cache_dir to return nonexistent directory - with patch("modflow_devtools.dfns.__main__.get_cache_dir") as mock_cache_dir: - mock_cache_dir.return_value = tmp_path / "nonexistent" - result = main(["clean"]) - - assert result == 0 - - def test_sync_command_no_registry(self): - """Test sync command when registry doesn't exist (expected to fail).""" - from modflow_devtools.dfns.__main__ import main - - # This should fail because MODFLOW 6 repo doesn't have the registry yet - result = main(["sync", "--ref", "nonexistent-ref"]) - assert result == 1 - - -@requires_pkg("pydantic", "pooch", "boltons") -class TestRemoteDfnRegistry: - """Tests for RemoteDfnRegistry with mocked network calls.""" - - def test_init(self): - """Test RemoteDfnRegistry initialization.""" - from modflow_devtools.dfns import RemoteDfnRegistry - - registry = RemoteDfnRegistry(source="modflow6", ref="develop") - - assert registry.source == "modflow6" - assert registry.ref == "develop" - - def test_unknown_source_raises(self): - """Test that unknown source raises ValueError.""" - from modflow_devtools.dfns import RemoteDfnRegistry - - with pytest.raises(ValueError, match="Unknown source"): - RemoteDfnRegistry(source="nonexistent", ref="develop") - - def test_construct_raw_url(self): - """Test URL construction.""" - from modflow_devtools.dfns.registry import RemoteDfnRegistry - - registry = RemoteDfnRegistry(source="modflow6", ref="6.6.0") - - url = registry._construct_raw_url("doc/mf6io/mf6ivar/dfn") - - assert "raw.githubusercontent.com" in url - assert "MODFLOW-ORG/modflow6" in url - assert "6.6.0" in url - - def test_get_registry_cache_path(self): - """Test getting registry cache path.""" - from modflow_devtools.dfns.registry import RemoteDfnRegistry - - registry = RemoteDfnRegistry(source="modflow6", ref="6.6.0") - - path = registry._get_registry_cache_path() - - assert "registries" in str(path) - assert "modflow6" in str(path) - assert "6.6.0" in str(path) - assert path.name == "dfns.toml" - - def test_get_files_cache_dir(self): - """Test getting files cache directory.""" - from modflow_devtools.dfns.registry import RemoteDfnRegistry - - registry = RemoteDfnRegistry(source="modflow6", ref="6.6.0") - - path = registry._get_files_cache_dir() - - assert "files" in str(path) - assert "modflow6" in str(path) - assert "6.6.0" in str(path) - - def test_fetch_registry_not_found(self): - """Test that fetching nonexistent registry raises appropriate error.""" - from modflow_devtools.dfns.registry import ( - DfnRegistryNotFoundError, - RemoteDfnRegistry, - ) - - registry = RemoteDfnRegistry(source="modflow6", ref="nonexistent-ref-12345") - - with pytest.raises(DfnRegistryNotFoundError): - registry._fetch_registry(force=True) - - def test_init_with_repo_override(self): - """Test RemoteDfnRegistry with repo override.""" - from modflow_devtools.dfns import RemoteDfnRegistry - - registry = RemoteDfnRegistry( - source=TEST_DFN_SOURCE, - ref=TEST_DFN_REF, - repo=TEST_DFN_REPO, - ) - - assert registry.source == TEST_DFN_SOURCE - assert registry.ref == TEST_DFN_REF - assert registry.repo == TEST_DFN_REPO - - def test_construct_raw_url_with_repo_override(self): - """Test URL construction with repo override.""" - from modflow_devtools.dfns.registry import RemoteDfnRegistry - - registry = RemoteDfnRegistry( - source=TEST_DFN_SOURCE, - ref=TEST_DFN_REF, - repo=TEST_DFN_REPO, - ) - - url = registry._construct_raw_url("doc/mf6io/mf6ivar/dfn") - - assert "raw.githubusercontent.com" in url - assert TEST_DFN_REPO in url - assert TEST_DFN_REF in url - - @flaky(max_runs=3, min_passes=1) - def test_fetch_registry(self): - """Test fetching registry from the test repository.""" - from modflow_devtools.dfns.registry import RemoteDfnRegistry - - registry = RemoteDfnRegistry( - source=TEST_DFN_SOURCE, - ref=TEST_DFN_REF, - repo=TEST_DFN_REPO, - ) - - meta = registry._fetch_registry(force=True) - - assert meta is not None - assert len(meta.files) > 0 - # Registry file may have a different ref than what we requested - # (e.g., generated from develop branch but accessed on registry branch) - assert meta.ref is not None - - @flaky(max_runs=3, min_passes=1) - def test_sync_files(self): - """Test syncing DFN files from the test repository.""" - from modflow_devtools.dfns.registry import RemoteDfnRegistry - - registry = RemoteDfnRegistry( - source=TEST_DFN_SOURCE, - ref=TEST_DFN_REF, - repo=TEST_DFN_REPO, - ) - - # Sync should succeed (fetches registry and sets up pooch) - registry.sync(force=True) - - # Should be able to fetch a DFN file - path = registry.get_dfn_path("gwf-chd") - assert path.exists() - - @flaky(max_runs=3, min_passes=1) - def test_get_dfn(self): - """Test getting a DFN from the test repository.""" - from modflow_devtools.dfns import Dfn - from modflow_devtools.dfns.registry import RemoteDfnRegistry - - registry = RemoteDfnRegistry( - source=TEST_DFN_SOURCE, - ref=TEST_DFN_REF, - repo=TEST_DFN_REPO, - ) - - # Ensure synced - registry.sync() - - dfn = registry.get_dfn("gwf-chd") - - assert isinstance(dfn, Dfn) - assert dfn.name == "gwf-chd" - - @flaky(max_runs=3, min_passes=1) - def test_get_spec(self): - """Test getting the full spec from the test repository.""" - from modflow_devtools.dfns import DfnSpec - from modflow_devtools.dfns.registry import RemoteDfnRegistry - - registry = RemoteDfnRegistry( - source=TEST_DFN_SOURCE, - ref=TEST_DFN_REF, - repo=TEST_DFN_REPO, - ) - - # Ensure synced - registry.sync() - - spec = registry.spec - - assert isinstance(spec, DfnSpec) - assert "gwf-chd" in spec - assert "sim-nam" in spec - - @flaky(max_runs=3, min_passes=1) - def test_list_components(self): - """Test listing available components from the test repository.""" - from modflow_devtools.dfns.registry import RemoteDfnRegistry - - registry = RemoteDfnRegistry( - source=TEST_DFN_SOURCE, - ref=TEST_DFN_REF, - repo=TEST_DFN_REPO, - ) - - # Ensure synced - registry.sync() - - # Use spec.keys() to list components - components = list(registry.spec.keys()) - - assert len(components) > 100 - assert "gwf-chd" in components - assert "sim-nam" in components - - -@requires_pkg("boltons", "pydantic") -class TestModuleFunctions: - """Tests for module-level convenience functions.""" - - def test_list_components_local(self, dfn_dir): - """Test list_components with local registry.""" - from modflow_devtools.dfns import LocalDfnRegistry - - registry = LocalDfnRegistry(path=dfn_dir) - components = list(registry.spec.keys()) - - assert len(components) > 100 - assert "gwf-chd" in components - assert "sim-nam" in components - - def test_get_sync_status(self): - """Test get_sync_status function.""" - from modflow_devtools.dfns.registry import get_sync_status - - status = get_sync_status() - - assert isinstance(status, dict) - # All refs should be either True or False - assert all(isinstance(v, bool) for v in status.values()) - - -@requires_pkg("boltons", "pydantic") -class TestGetRegistryWithPath: - """Tests for get_registry() with path parameter.""" - - def test_get_registry_with_path_returns_local_registry(self, dfn_dir): - """Test that get_registry with path returns LocalDfnRegistry.""" - from modflow_devtools.dfns.registry import LocalDfnRegistry, get_registry - - registry = get_registry(path=dfn_dir) - - assert isinstance(registry, LocalDfnRegistry) - assert registry.path == dfn_dir.resolve() - - def test_get_registry_with_path_and_metadata(self, dfn_dir): - """Test that source/ref metadata is preserved with path.""" - from modflow_devtools.dfns.registry import get_registry - - registry = get_registry(path=dfn_dir, source="test", ref="local") - - assert registry.source == "test" - assert registry.ref == "local" - - def test_get_registry_without_path_returns_remote_registry(self): - """Test that get_registry without path still returns RemoteDfnRegistry.""" - from modflow_devtools.dfns.registry import RemoteDfnRegistry, get_registry - - registry = get_registry(source="modflow6", ref="develop", auto_sync=False) - - assert isinstance(registry, RemoteDfnRegistry) - - -@requires_pkg("boltons", "pydantic") -class TestConvenienceFunctionsWithPath: - """Tests for convenience functions with path parameter.""" - - def test_get_dfn_with_path(self, dfn_dir): - """Test get_dfn() with path parameter.""" - from modflow_devtools.dfns import get_dfn - - dfn = get_dfn("gwf-chd", path=dfn_dir) - - assert dfn.name == "gwf-chd" - assert dfn.parent == "gwf-nam" - - def test_get_dfn_path_with_path(self, dfn_dir): - """Test get_dfn_path() with path parameter.""" - from modflow_devtools.dfns import get_dfn_path - - file_path = get_dfn_path("gwf-chd", path=dfn_dir) - - assert file_path.exists() - assert file_path.name == "gwf-chd.dfn" - - def test_list_components_with_path(self, dfn_dir): - """Test list_components() with path parameter.""" - from modflow_devtools.dfns import list_components - - components = list_components(path=dfn_dir) - - assert len(components) > 100 - assert "gwf-chd" in components - - -@requires_pkg("boltons", "pydantic") -def test_autodiscovery_workflow(dfn_dir): - """Test complete autodiscovery workflow.""" - from modflow_devtools.dfns import get_dfn, get_registry, list_components - - # Get registry pointing at local directory - registry = get_registry(path=dfn_dir, ref="local") - - # List components - components = list(registry.spec.keys()) - assert len(components) > 100 - - # Get specific DFN - gwf_chd = registry.get_dfn("gwf-chd") - assert gwf_chd.name == "gwf-chd" - assert gwf_chd.blocks is not None - - # Get file path - chd_path = registry.get_dfn_path("gwf-chd") - assert chd_path.exists() - - # Use convenience functions - components_list = list_components(path=dfn_dir) - assert "gwf-chd" in components_list - - dfn = get_dfn("gwf-wel", path=dfn_dir) - assert dfn.name == "gwf-wel" diff --git a/autotest/test_models.py b/autotest/test_models.py index f54b1c72..6b8e12ca 100644 --- a/autotest/test_models.py +++ b/autotest/test_models.py @@ -276,7 +276,6 @@ def test_sync_single_source_single_ref(self): repo=TEST_MODELS_REPO, name=TEST_MODELS_SOURCE_NAME, refs=[TEST_MODELS_REF], - verbose=True, ) result = source.sync(ref=TEST_MODELS_REF, verbose=True) diff --git a/autotest/test_programs.py b/autotest/test_programs.py index bc3e7dfc..9a6a2cd9 100644 --- a/autotest/test_programs.py +++ b/autotest/test_programs.py @@ -1,4 +1,5 @@ import warnings +from datetime import UTC from pathlib import Path import pytest @@ -304,7 +305,7 @@ def test_program_manager_error_handling(self): def test_installation_metadata_integration(self): """Test InstallationMetadata integration with ProgramManager.""" - from datetime import datetime, timezone + from datetime import datetime from pathlib import Path from modflow_devtools.programs import ( @@ -322,7 +323,7 @@ def test_installation_metadata_integration(self): version="1.0.0", platform="linux", bindir=Path("/tmp/test"), - installed_at=datetime.now(timezone.utc), + installed_at=datetime.now(UTC), source={ "repo": "test/repo", "tag": "1.0.0", diff --git a/docs/index.rst b/docs/index.rst index c5e21c12..5f3b08f2 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -10,40 +10,46 @@ The `modflow-devtools` package provides a set of tools for developing and testin .. toctree:: :maxdepth: 2 - :caption: Introduction + :caption: Installation md/install.md - .. toctree:: :maxdepth: 2 - :caption: Test fixtures + :caption: Testing md/fixtures.md md/markers.md md/snapshots.md - .. toctree:: :maxdepth: 2 - :caption: Miscellaneous + :caption: Specification + md/dfn-schema.md md/dfns.md - md/download.md - md/latex.md - md/models.md - md/ostags.md - md/programs.md - md/timed.md - md/zip.md +.. toctree:: + :maxdepth: 2 + :caption: Models + + md/models.md .. toctree:: :maxdepth: 2 - :caption: External tools + :caption: Programs + + md/programs.md + md/ostags.md - md/act.md - md/doctoc.md +.. toctree:: + :maxdepth: 2 + :caption: Miscellaneous + + md/zip.md + md/download.md + md/timed.md + md/latex.md .. toctree:: :maxdepth: 2 diff --git a/docs/md/act.md b/docs/md/act.md deleted file mode 100644 index bee9d9cc..00000000 --- a/docs/md/act.md +++ /dev/null @@ -1,21 +0,0 @@ -# Testing CI workflows locally - -The [`act`](https://github.com/nektos/act) tool uses Docker to run CI workflows in a simulated GitHub Actions environment. [Docker Desktop](https://www.docker.com/products/docker-desktop/) is required for Mac or Windows and [Docker Engine](https://docs.docker.com/engine/) on Linux. - -**Note:** `act` can only run Linux-based container definitions. Mac or Windows workflows or matrix OS entries will be skipped. - -With Docker installed and running, run `act -l` from the project root to see available CI workflows. To run all workflows and jobs, just run `act`. To run a particular workflow use `-W`: - -```shell -act -W .github/workflows/commit.yml -``` - -To run a particular job within a workflow, add the `-j` option: - -```shell -act -W .github/workflows/commit.yml -j build -``` - -**Note:** GitHub API rate limits are easy to exceed, especially with job matrices. Authenticated GitHub users have a much higher rate limit: use `-s GITHUB_TOKEN=` when invoking `act` to provide a personal access token. Note that this will log your token in shell history — leave the value blank for a prompt to enter it more securely. - -The `-n` flag can be used to execute a dry run, which doesn't run anything, just evaluates workflow, job and step definitions. See the [docs](https://github.com/nektos/act#example-commands) for more. diff --git a/docs/md/dev/dfns.md b/docs/md/dev/dfns.md index f300e0d2..e6fd6ba3 100644 --- a/docs/md/dev/dfns.md +++ b/docs/md/dev/dfns.md @@ -1,1049 +1,175 @@ -# DFNs API Design - -This document describes the design of the DFNs (Definition Files) API ([GitHub issue #262](https://github.com/MODFLOW-ORG/modflow-devtools/issues/262)). It is intended to be developer-facing, not user-facing, though users may also find it informative. - -This is a living document which will be updated as development proceeds. - - - - - -- [Background](#background) -- [Objective](#objective) -- [Overview](#overview) -- [Architecture](#architecture) - - [Bootstrap file](#bootstrap-file) - - [Bootstrap file contents](#bootstrap-file-contents) - - [Sample bootstrap file](#sample-bootstrap-file) - - [DFN spec and registry files](#dfn-spec-and-registry-files) - - [Registry file format](#registry-file-format) - - [Sample files](#sample-files) - - [Registry discovery](#registry-discovery) - - [Discovery modes](#discovery-modes) - - [Registry discovery procedure](#registry-discovery-procedure) - - [Registry/DFN caching](#registrydfn-caching) - - [Registry synchronization](#registry-synchronization) - - [Manual sync](#manual-sync) - - [Automatic sync](#automatic-sync) - - [Source repository integration](#source-repository-integration) - - [DFN addressing](#dfn-addressing) - - [Registry classes](#registry-classes) - - [DfnRegistry (base class)](#dfnregistry-base-class) - - [RemoteDfnRegistry](#remotedfnregistry) - - [LocalDfnRegistry](#localdfnregistry) - - [Module-level API](#module-level-api) -- [Schema Versioning](#schema-versioning) - - [Separating format from schema](#separating-format-from-schema) - - [Schema evolution](#schema-evolution) - - [Tentative v2 schema design](#tentative-v2-schema-design) -- [Component Hierarchy](#component-hierarchy) -- [Schema version support](#schema-version-support) -- [Implementation Dependencies](#implementation-dependencies) - - [Completed work](#completed-work) - - [Core components](#core-components) - - [MODFLOW 6 repository integration](#modflow-6-repository-integration) - - [Testing and documentation](#testing-and-documentation) -- [Relationship to Models and Programs APIs](#relationship-to-models-and-programs-apis) -- [Design Decisions](#design-decisions) - - [Use Pooch for fetching](#use-pooch-for-fetching) - - [Use Pydantic for schema validation](#use-pydantic-for-schema-validation) - - [Schema versioning strategy](#schema-versioning-strategy) - - [Future enhancements](#future-enhancements) - - +# DFNs API -## Background - -The `modflow_devtools.dfns` module currently provides utilities for parsing and working with MODFLOW 6 definition files. Significant work already completed includes: - -- Object models for DFN components (`Dfn`, `Block`, `Field` classes) -- Schema definitions for both v1 (legacy) and v2 (in development) -- Parsers for the old DFN format -- Schema mapping capabilities including utilities for converting between flat and hierarchical component representations -- A `fetch_dfns()` function for manually downloading DFN files from the MODFLOW 6 repository -- Validation tools - -However, there is currently no registry-based API for: -- Automatically discovering and synchronizing DFN files from remote sources -- Managing multiple versions of definition files simultaneously -- Caching definition files locally for offline use - -Users must manually download definition files or rely on whatever happens to be bundled with their installation. This creates similar problems to what the Models API addressed: -1. **Version coupling**: Users are locked to whatever DFN version is bundled -2. **Manual management**: Users must manually track and download DFN updates -3. **No multi-version support**: Difficult to work with multiple MODFLOW 6 versions simultaneously -4. **Maintenance burden**: Developers must manually update bundled DFNs - -## Objective - -Create a DFNs API that: -1. **Mirrors Models/Programs API patterns** for consistency and familiarity -2. **Leverages existing dfn module work** (parsers, schemas, object models) -3. **Provides automated discovery** of definition files from MODFLOW 6 repository -4. **Supports multiple versions** simultaneously with explicit version addressing -5. **Uses Pooch** for fetching and caching (avoiding custom HTTP client code) -6. **Handles schema evolution** with proper separation of file format vs schema version -7. **Maintains loose coupling** between devtools and remote DFN sources - -## Overview - -Make the MODFLOW 6 repository responsible for publishing a definition file registry. - -Make `modflow-devtools` responsible for: -- Defining the DFN registry publication contract -- Providing registry-creation machinery -- Storing bootstrap information locating the MODFLOW 6 repository -- Discovering remote registries at install time or on demand -- Caching registry metadata and definition files -- Exposing a synchronized view of available definition files -- Parsing and validating definition files -- Mapping between schema versions - -MODFLOW 6 is currently the only repository using the DFN specification system, but this leaves the door open for other repositories to begin using it. - -## Architecture - -The DFNs API will mirror the Models and Programs API architecture, adapted for definition file-specific concerns. - -**Implementation approach**: Core classes are split across `modflow_devtools/dfns/__init__.py` (spec/parsing) and `modflow_devtools/dfns/registry.py` (registry infrastructure): -- `get_cache_dir()`: Cache directory path utility -- `BootstrapConfig` / `SourceConfig`: Pydantic models for bootstrap configuration -- `DfnRegistry`: Pydantic base class for registry access -- `RemoteDfnRegistry`: Remote fetching with Pooch integration -- `LocalDfnRegistry`: Local filesystem registry for development use -- `DfnRegistryMeta`: Pydantic model for `dfns.toml` registry file contents -- `DfnSpec`: Full specification with hierarchical and flat access -- `Dfn`, `Block`, `Field`: Core component dataclasses - -### Bootstrap file - -The **bootstrap** file tells `modflow-devtools` where to look for DFN registries. This file will be checked into the repository at `modflow_devtools/dfns/dfns.toml` and distributed with the package. - -#### Bootstrap file contents - -At the top level, the bootstrap file consists of a table of `sources`, each describing a repository that publishes definition files. - -Each source has: -- `repo`: Repository identifier (owner/name) -- `dfn_path`: Path within the repository to the directory containing DFN files (defaults to `doc/mf6io/mf6ivar/dfn`) -- `registry_path`: Path within the repository to the registry metadata file (defaults to `.registry/dfns.toml`) -- `refs`: List of git refs (branches, tags, or commit hashes) to sync by default - -#### User config overlay - -Users can customize or extend the bundled bootstrap configuration by creating a user config file at: -- Linux/macOS: `~/.config/modflow-devtools/dfns.toml` (respects `$XDG_CONFIG_HOME`) -- Windows: `%APPDATA%/modflow-devtools/dfns.toml` - -The user config follows the same format as the bundled bootstrap file. Sources defined in the user config will override or extend those in the bundled config, allowing users to: -- Add custom DFN repositories -- Point to forks of existing repositories (useful for testing experimental schema versions) -- Override default refs for existing sources - -**Implementation note**: The user config path logic (`get_user_config_path("dfn")`) is shared across all three APIs (Models, Programs, DFNs) via `modflow_devtools.config`, but each API implements its own `merge_bootstrap()` function using API-specific bootstrap schemas. - -#### Sample bootstrap file - -```toml -[sources.modflow6] -repo = "MODFLOW-ORG/modflow6" -dfn_path = "doc/mf6io/mf6ivar/dfn" -registry_path = ".registry/dfns.toml" -refs = [ - "6.6.0", - "6.5.0", - "6.4.4", - "develop", -] -``` - -### DFN spec and registry files - -The registry file (`dfns.toml`) is the metadata file that supports the DFNs API for discovery and distribution. - -#### Registry file format - -A **`dfns.toml`** registry file for **discovery and distribution** (the specific naming distinguishes it from `models.toml` and `programs.toml`): - -```toml -# Registry metadata (top-level, optional) -schema_version = "1.0" -generated_at = "2025-01-02T10:30:00Z" -devtools_version = "1.9.0" - -[metadata] -ref = "6.6.0" # Optional, known from discovery context - -# File listings (filenames and hashes, URLs constructed as needed) -[files] -"sim-nam.dfn" = {hash = "sha256:..."} -"sim-tdis.dfn" = {hash = "sha256:..."} -"gwf-nam.dfn" = {hash = "sha256:..."} -"gwf-chd.dfn" = {hash = "sha256:..."} -# ... all DFN files -``` - -**Notes**: -- Registry is purely **infrastructure** for discovery and distribution -- The `files` section maps filenames to hashes for verification -- URLs are constructed dynamically from bootstrap metadata (repo, ref, dfn_path) + filename -- This allows using personal forks by changing the bootstrap file -- **All registry metadata is optional** - registries can be handwritten minimally - -**Minimal handwritten registry**: -```toml -[files] -"sim-nam.dfn" = {hash = "sha256:def456..."} -"gwf-nam.dfn" = {hash = "sha256:789abc..."} -``` - -#### Sample files - -**Per-component TOML files** (current format in the MODFLOW 6 repository): - -Each component has its own `.toml` file named by component, e.g. `gwf-chd.toml`: - -```toml -name = "gwf-chd" -advanced = false -multi = true - -[options.auxiliary] -block = "options" -name = "auxiliary" -type = "string" -shape = "(naux)" -optional = true -description = "..." -# ... -``` - -The registry lists all component files: -```toml -[files] -"sim-nam.toml" = {hash = "sha256:..."} -"gwf-nam.toml" = {hash = "sha256:..."} -"gwf-chd.toml" = {hash = "sha256:..."} -# ... all component files -``` - -**Single-blob TOML** (output of `DfnSpec.dump()`, used for `mf6 --spec`): - -`DfnSpec.dump()` serializes the entire spec as a single TOML document with each component as a top-level key: - -```toml -schema_version = "2" - -["gwf-chd"] -name = "gwf-chd" -advanced = false -multi = true - -["gwf-chd".options.auxiliary] -block = "options" -name = "auxiliary" -# ... +This document describes the design and architecture of the `modflow_devtools.dfns` module. It is intended for developers working on or extending `modflow-devtools`. -["gwf-dis"] -name = "gwf-dis" -# ... -``` - -This format requires no preprocessing — consumers can pipe `mf6 --spec` output directly into `tomllib`. Hierarchy is preserved via `parent` attributes embedded in each component's data, and can be reconstructed by `DfnSpec.load()` using naming convention inference (`to_tree()`). - -### Registry discovery - -DFN registries can be discovered in two modes, similar to the Models API. - -#### Discovery modes - -**1. Registry as version-controlled file**: - -Registry files can be versioned in the repository at a conventional path, in which case discovery uses GitHub raw content URLs: - -``` -https://raw.githubusercontent.com/{org}/{repo}/{ref}/.registry/dfns.toml -``` - -This mode supports any git ref (branches, tags, commit hashes). - -**2. Registry as release asset**: - -Registry files can also be published as release assets: - -``` -https://github.com/{org}/{repo}/releases/download/{tag}/dfns.toml -``` - -This mode: -- Requires release tags only -- Allows registry generation in CI without committing to repo -- Provides faster discovery (no need to check multiple ref types) - -**Discovery precedence**: Release asset mode takes precedence if both exist (same as Models API). - -#### Registry discovery procedure - -At sync time, `modflow-devtools` discovers remote registries for each configured ref: - -1. **Check for release tag** (if release asset mode enabled): - - Look for a GitHub release with the specified tag - - Try to fetch `dfns.toml` from release assets - - If found, use it and skip step 2 - - If release exists but lacks registry asset, fall through to step 2 - -2. **Check for version-controlled registry**: - - Look for a commit hash, tag, or branch matching the ref - - Try to fetch registry from `{registry_path}` via raw content URL - - If found, use it - - If ref exists but lacks registry file, raise error: - ```python - DfnRegistryDiscoveryError( - f"Registry file not found in {registry_path} for 'modflow6@{ref}'" - ) - ``` - -3. **Failure case**: - - If no matching ref found at all, raise error: - ```python - DfnRegistryDiscoveryError( - f"Registry discovery failed, ref 'modflow6@{ref}' does not exist" - ) - ``` - -**Note**: For initial implementation, focus on version-controlled mode. Release asset mode requires MODFLOW 6 to start distributing DFN files with releases (currently they don't), but would be a natural addition once that happens. - -### Registry/DFN caching - -Cache structure mirrors the Models API pattern: - -``` -~/.cache/modflow-devtools/ -├── dfn/ -│ ├── registries/ -│ │ └── modflow6/ # by source repo -│ │ ├── 6.6.0/ -│ │ │ └── dfns.toml -│ │ ├── 6.5.0/ -│ │ │ └── dfns.toml -│ │ └── develop/ -│ │ └── dfns.toml -│ └── files/ # Actual DFN files, managed by Pooch -│ └── modflow6/ -│ ├── 6.6.0/ -│ │ ├── sim-nam.dfn -│ │ ├── gwf-nam.dfn -│ │ └── ... -│ ├── 6.5.0/ -│ │ └── ... -│ └── develop/ -│ └── ... -``` - -**Cache management**: -- Registry files cached per source repository and ref -- DFN files fetched and cached individually by Pooch, verified against registry hashes -- Cache persists across Python sessions for offline use -- Cache can be cleared with `dfn clean` command -- Users can check cache status with `dfn info` - -### Registry synchronization - -Synchronization updates the local registry cache with remote metadata. - -#### Manual sync - -Exposed as a CLI command and Python API: - -```bash -# Sync all configured refs -python -m modflow_devtools.dfns sync +## Background -# Sync specific ref -python -m modflow_devtools.dfns sync --ref 6.6.0 +MODFLOW 6 describes its input format in *definition files* (DFNs). The `modflow_devtools.dfns` module provides a structured, typed Python API for loading and navigating those definitions. -# Sync to any git ref (branch, tag, commit hash) -python -m modflow_devtools.dfns sync --ref develop -python -m modflow_devtools.dfns sync --ref f3df630a +The module complements the older `modflow_devtools.dfn` module, which provides simpler utilities for parsing the legacy flat text format. `modflow_devtools.dfn` remains stable; `modflow_devtools.dfns` is experimental and may change. -# Force re-download -python -m modflow_devtools.dfns sync --force +## Architecture overview -# Show sync status -python -m modflow_devtools.dfns info +The module is split across three files: -# List available DFNs for a ref -python -m modflow_devtools.dfns list --ref 6.6.0 +| File | Responsibility | +|---|---| +| `schema.py` | Pydantic models for all field types, blocks, and components; the `Dfns` top-level class | +| `registry.py` | Registry classes for local and remote DFN sources | +| `mapper.py` | Maps v1 `.dfn`-format data to the v2 schema used by `schema.py` | +| `__main__.py` | CLI entry point (`sync`, `info`, `clean`) | +| `dfns.toml` | Bundled list of default remote release IDs | -# List all synced refs -python -m modflow_devtools.dfns list -``` +## Schema (`schema.py`) -Or via Python API: +### Field types -```python -from modflow_devtools.dfns import sync_dfns, get_sync_status +Fields are Pydantic models, all inheriting from `FieldBase`. Scalar types are `Keyword`, `String`, `Integer`, `Double`, and `File`. Composite types are `Array`, `Record`, `Union`, and `List`. Each has a frozen `type` literal discriminator field, which drives Pydantic's discriminated-union validation. -# Sync all configured refs -sync_dfns() +`FieldBase.from_dict(d, strict=False)` is the low-level factory: it reads the `type` key and dispatches to the appropriate subclass. -# Sync specific ref -sync_dfns(ref="6.6.0") +### Blocks -# Check sync status -status = get_sync_status() -``` +`Block` is a Pydantic model with `name`, `fields` (ordered dict), and `repeats`. Its `optional` property is derived: a block is optional iff all its fields are optional. -#### Automatic sync +`Blocks` is a type alias for `Mapping[str, Block]`. -- **At install time**: Best-effort sync to default refs during package installation (fail silently on network errors) -- **On first use**: If registry cache is empty for requested ref, attempt to sync before raising errors -- **Lazy loading**: Don't sync until DFN access is actually requested -- **Configurable (Experimental)**: Auto-sync is opt-in via environment variable: `MODFLOW_DEVTOOLS_AUTO_SYNC=1` (set to "1", "true", or "yes") +### Components -### Source repository integration +Three component types are distinguished by a `type` discriminator: -For the MODFLOW 6 repository to integrate: +- `Simulation` — always the root; `parent` is `null` +- `Model` — adds a `solution` field (compatible solution type) +- `Package` — adds `multi` (bool) and `subtype` -1. **Generate registry** in CI: - ```bash - # In MODFLOW 6 repository CI - python -m modflow_devtools.dfns.make_registry \ - --dfn-path doc/mf6io/mf6ivar/dfn \ - --output .registry/dfns.toml \ - --ref ${{ github.ref_name }} - ``` +`Component` is an annotated discriminated union of the three. -2. **Commit registry** to `.registry/dfns.toml` +`ComponentBase` is the shared Pydantic base class. It carries `name`, `blocks`, `parent`, `schema_version`, and `derived_dims`. -3. **Example CI integration** (GitHub Actions): - ```yaml - - name: Generate DFN registry - run: | - pip install modflow-devtools - python -m modflow_devtools.dfns.make_registry \ - --dfn-path doc/mf6io/mf6ivar/dfn \ - --output .registry/dfns.toml \ - --ref ${{ github.ref_name }} +### `Dfns` - - name: Commit registry - run: | - git config user.name "github-actions[bot]" - git config user.email "github-actions[bot]@users.noreply.github.com" - git add .registry/dfns.toml - git diff-index --quiet HEAD || git commit -m "chore: update DFN registry" - git push - ``` +`Dfns` is a Pydantic model that holds a `components` dict. It is the top-level object produced by loading a directory of DFN files. -**Note**: Initially generate registries for version-controlled mode. Release asset mode would require MODFLOW 6 to start distributing DFNs with releases. +Key members: -### DFN addressing +| Member | Type | Description | +|---|---|---| +| `components` | `dict[str, Component]` | All components, keyed by name | +| `schema_version` | `str` (computed) | Version string from components, or `"2"` | +| `root` | `Simulation \| None` | The simulation component, or `None` | +| `children_of(name)` | `dict[str, Component]` | All components whose `parent == name` | +| `explicit_dims_for(name)` | `set[str]` | Explicit dimension names for a component | +| `grid_dims_for(name)` | `set[str]` | Dims inherited from other components | +| `load(path)` | classmethod | Load a directory of `.dfn` or `.toml` files | -**Format**: `mf6@{ref}/{component}` +`Dfns.load()` supports both formats in the same directory. `.dfn` files are parsed by the `modflow_devtools.dfn.schema` module and then mapped to v2 schema objects via `mapper.map`. TOML files carry v2 content directly and are loaded with `tomli` and passed straight to the Pydantic validator. -Components include: -- `ref`: Git ref (branch, tag, or commit hash) corresponding to a MODFLOW 6 version -- `component`: DFN component name (without file extension) +Two model validators run at construction time: +- `_validate_schema_version_consistency` — all non-null `schema_version` values must agree. +- `_validate_dims_and_shapes` — validates `derived_dims` expressions and all `Array.shape` elements. -Examples: -- `mf6@6.6.0/sim-nam` - Simulation name file definition for MODFLOW 6 v6.6.0 -- `mf6@6.6.0/gwf-chd` - GWF CHD package definition for v6.6.0 -- `mf6@develop/gwf-wel` - GWF WEL package definition from develop branch -- `mf6@f3df630a/gwt-adv` - GWT ADV package definition from specific commit +### Array dimension validation -**Benefits**: -- Explicit versioning prevents confusion -- Supports multiple MODFLOW 6 versions simultaneously -- Enables comparison between versions -- Works with any git ref (not just releases) +The validation logic in `schema.py` is non-trivial. Three resolution scopes are checked for each shape element: -**Note**: The source is always "mf6" (MODFLOW 6), but the addressing scheme allows for future sources if needed. +1. **Local explicit dims** — `Integer` or `Array` fields with a `dimension` attribute. +2. **Local derived dims** — entries in `component.derived_dims`. +3. **Grid dims** — explicit dims from other components in the spec, filtered by scope. +4. **Intra-record sibling** — fallback for array subfields of records; resolves to a sibling `Integer` with `dimension="record"`. -### Registry classes +Row-level column lookup expressions (`block.column(fk_field)`) are also validated structurally. -The registry class hierarchy is based on a Pydantic `DfnRegistry` base class (in `modflow_devtools/dfns/registry.py`): +`derived_dims` expressions are validated for well-formedness (Python arithmetic syntax), operand scope, and absence of cycles (topological sort). -**`DfnRegistry` (base class)**: -- Pydantic model with `source` and `ref` fields -- Abstract `spec` property and `get_dfn_path()` method for subclasses to implement -- Concrete helpers: - - `get_dfn(component)` - convenience for `spec[component]` - - `schema_version` - convenience for `spec.schema_version` - - `components` - convenience for `dict(spec.items())` +## Mapper (`mapper.py`) -**`RemoteDfnRegistry(DfnRegistry)`**: +The mapper converts a v1 `Dfn` object (from `modflow_devtools.dfn.schema`) to a v2 `Component`. The entry point is `map(dfn: v1.Dfn) -> Component`. It raises `ValueError` if the input schema version is not `"1"` or `"1.1"`. -Handles remote registry discovery, caching, and DFN fetching. Constructs DFN file URLs dynamically from `BootstrapConfig`/`SourceConfig` — URLs are never stored in the registry file itself. +Component type is inferred from the component name: +- `sim-nam` → `Simulation` +- `*-nam` → `Model` +- `sln-*` → `Package(subtype="solution")` +- `exg-*` → `Package(subtype="exchange")` +- `utl-*` → `Package(subtype="utility")` +- advanced flag set → `Package(subtype="advanced")` +- all others → `Package` -Optional field overrides (`repo`, `dfn_path`, `registry_path`) allow bypassing the bootstrap config, e.g. for testing against a personal fork: +v1 field types are mapped to v2 equivalents. `recarray` fields become `List` with a nested `Record` item. `record` fields become `Record`. `in_record` fields are promoted into their enclosing record's `fields` dict. -```python -# Use bootstrap config (normal usage) -registry = RemoteDfnRegistry(source="modflow6", ref="6.6.0") +## Registry (`registry.py`) -# Override repo directly (e.g., testing a fork) -registry = RemoteDfnRegistry( - source="modflow6", - ref="registry", - repo="wpbonelli/modflow6", -) -``` +### `DfnRegistry` -Key methods: `sync(force=False)`, `get_dfn_path(component)`, `registry_meta` property. +Pydantic base class. Declares a `_spec` private attribute and stubs `spec` and `get_path()` for subclasses. -**`LocalDfnRegistry(DfnRegistry)`**: +### `LocalDfnRegistry` -For developers working with a local DFN directory: +Takes a `path` field. On first access to `.spec`, calls `Dfns.load(self.path)` and caches the result. `get_path(component)` searches for `.dfn` then `.toml` in the directory. -```python -registry = LocalDfnRegistry(path="/path/to/mf6/doc/mf6io/mf6ivar/dfn") -dfn = registry.get_dfn("gwf-chd") -``` +### `RemoteDfnRegistry` -Loads `DfnSpec` lazily via `DfnSpec.load(path)` on first access. - -**Supporting Pydantic models** (in `registry.py`): -- `BootstrapConfig` / `SourceConfig`: bootstrap file schema (sources, refs, paths) -- `DfnRegistryMeta`: `dfns.toml` registry file schema (schema_version, generated_at, files) -- `DfnRegistryFile`: per-file entry with SHA256 hash - -**Design decisions**: -- **Pydantic-based** (not ABC) — allows Pydantic validation and field introspection -- **Dynamic URL construction** — DFN file URLs constructed at runtime from bootstrap metadata, not stored in registry files -- **No `MergedRegistry`** — users work with one MODFLOW 6 version at a time - -### Module-level API - -Convenient module-level functions: - -```python -from modflow_devtools.dfns import ( - DfnSpec, - get_dfn, - get_dfn_path, - list_components, - sync_dfns, - get_registry, - map, -) - -# Get individual DFNs (defaults to ref="develop") -dfn = get_dfn("gwf-chd") -dfn = get_dfn("gwf-chd", ref="6.5.0") # Specific version - -# Get file path -path = get_dfn_path("gwf-wel", ref="6.6.0") - -# List available components -components = list_components(ref="6.6.0") - -# Work with specific registry -registry = get_registry(ref="6.6.0") -gwf_nam = registry.get_dfn("gwf-nam") - -# Load full specification - single canonical hierarchical representation -spec = DfnSpec.load("/path/to/dfns") # Load from directory - -# Hierarchical access -spec.schema_version # Version('2') when loaded from legacy .dfn files (auto-mapped) -spec.root # Root Dfn (simulation component) -spec.root.children["gwf-nam"] # Navigate hierarchy -spec.root.children["gwf-nam"].children["gwf-chd"] - -# Flat dict-like access via Mapping protocol -gwf_chd = spec["gwf-chd"] # Get component by name -for name, dfn in spec.items(): # Iterate all components - print(name) -len(spec) # Total number of components - -# Access spec through registry (registry provides the spec) -registry = get_registry(ref="6.6.0") -spec = registry.spec # Registry wraps a DfnSpec -gwf_chd = registry.spec["gwf-chd"] - -# Map between schema versions -dfn_v1 = get_dfn("gwf-chd", ref="6.4.4") # Older version in v1 schema -dfn_v2 = map(dfn_v1, schema_version="2") # Convert to v2 schema -``` +Takes a `release_id` string of the form `"owner/repo@tag"`, where `tag` may be a specific version string or `"latest"`. -**`DfnSpec` class**: +**Tag resolution**: `latest_tag()` returns the tag part directly if it's not `"latest"`. For `"latest"`, it queries the GitHub API (`/releases/latest`) once and caches the result in `_latest`. -The `DfnSpec` dataclass represents the full specification with a single canonical hierarchical representation: +**Cache path**: `~/.cache/modflow-devtools/dfns/{owner}/{repo}/{resolved_tag}/` on Unix; `%LOCALAPPDATA%/...` on Windows. Respects `XDG_CACHE_HOME`. -```python -from collections.abc import Mapping -from dataclasses import dataclass +**Sync**: `sync(force=False)` downloads `dfns.zip` from `https://github.com/{repo}/releases/download/{tag}/dfns.zip`, extracts it into `cache_path` using `pooch`. Skips if the cache dir already contains files and `force=False`. -@dataclass -class DfnSpec(Mapping): - """Full DFN specification with hierarchical structure and flat dict access.""" +**Spec access**: `.spec` calls `sync()` if the cache is empty, then calls `Dfns.load(cache_path)`. - schema_version: str - root: Dfn # Hierarchical canonical representation (simulation component) +**`load(path)`** classmethod: reads a TOML file with a `releases` list of release ID strings and returns a dict of `RemoteDfnRegistry` objects. - # Mapping protocol - provides flat dict-like access - def __getitem__(self, name: str) -> Dfn: - """Get component by name (flattened lookup).""" - ... +**`load_default()`** classmethod: loads the bundled `dfns.toml` config, then merges any user config overlay (see below). Returns a merged dict, with user config entries taking precedence. - def __iter__(self): - """Iterate over all component names.""" - ... +**`from_ids(*ids)`** classmethod: creates registries from ID strings; auto-syncs if `MODFLOW_DEVTOOLS_AUTO_SYNC` is set and the cache is empty. - def __len__(self): - """Total number of components in the spec.""" - ... +**`cached_tag()`**: returns the cached tag without network access. For exact tags, checks whether `cache_path` exists and is non-empty. For `@latest`, scans the repo's cache directory and returns the most-recently-modified tag directory's name. - @classmethod - def load(cls, path: Path | str) -> "DfnSpec": - """ - Load specification from a directory of DFN files. +### Bootstrap and user config - The specification is always loaded as a hierarchical tree, - with flat access available via the Mapping protocol. - """ - ... -``` +The bundled config is at `modflow_devtools/dfns/dfns.toml`: -**Design benefits**: -- **Single canonical representation**: Hierarchical tree is the source of truth -- **Flat access when needed**: Mapping protocol provides dict-like interface -- **Simple, focused responsibility**: `DfnSpec` only knows how to load from a directory -- **Clean layering**: Registries built on top of `DfnSpec`, not intertwined -- **Clean semantics**: `DfnSpec` = full specification, `Dfn` = individual component -- **Pythonic**: Implements standard `Mapping` protocol - -**Separation of concerns**: -- **`DfnSpec`**: Canonical representation of the full specification (foundation) - - Loads from a directory of DFN files via `load()` classmethod - - Hierarchical tree via `.root` property - - Flat dict access via `Mapping` protocol - - No knowledge of registries, caching, or remote sources -- **Registries**: Handle discovery, distribution, and caching (built on DfnSpec) - - Fetch and cache DFN files from remote sources - - Internally use `DfnSpec` to represent the loaded specification - - Provide access via `.spec` property - - `get_dfn(component)` → convenience for `spec[component]` - - `get_dfn_path(component)` → returns cached file path - -Backwards compatibility with existing `fetch_dfns()`: - -```python -# Old API — still works for manual downloads (stable modflow_devtools.dfn module) -from modflow_devtools.dfn import get_dfns -get_dfns("MODFLOW-ORG", "modflow6", "6.6.0", "/tmp/dfns") - -# New API (preferred - uses registry and caching) -from modflow_devtools.dfns import sync_dfns, get_registry, DfnSpec -sync_dfns(ref="6.6.0") -registry = get_registry(ref="6.6.0") -spec = registry.spec # Registry wraps a DfnSpec -``` - -## Schema Versioning - -A key design consideration is properly handling schema evolution while separating file format from schema version. - -### Separating format from schema - -As discussed in [issue #259](https://github.com/MODFLOW-ORG/modflow-devtools/issues/259), **file format and schema version are orthogonal concerns**: - -**File format** (serialization): -- `dfn` - Legacy DFN text format -- `toml` - Modern TOML format (or potentially YAML, see below) - -The format is simply how the data is serialized to disk. Any schema version can be serialized in any supported format. - -**Schema version** (structural specification): -- Defines what components exist and how they relate to each other -- Defines which variables each component contains -- Defines variable types, shapes, and constraints -- Separates structural specification from input format representation concerns - -The schema describes the semantic structure and meaning of the specification, independent of how it's serialized. - -**Key distinction**: The schema migration is about separating structural specification (components, relationships, variables, types) from input format representation. This is discussed in detail in [pyphoenix-project issue #246](https://github.com/modflowpy/pyphoenix-project/issues/246). - -For example: -- **Input format issue** (v1): Period data defined as recarrays with artificial dimensions like `maxbound` -- **Structural reality** (v2): Each column is actually a variable living on (a subset of) the grid, using semantically meaningful dimensions - -The v1 schema conflates: -- **Structural information**: Components, their relationships, and variables within each component -- **Format information**: How MF6 allows arrays to be provided, when keywords like `FILEIN`/`FILEOUT` are necessary - -The v2 schema should treat these as **separate layers**, where consumers can selectively apply formatting details atop a canonical data model. - -**Current state**: -- The code supports loading both `dfn` and `toml` formats -- The `Dfn.load()` function accepts a `format` parameter -- Schema version is determined independently of file format -- V1→V1.1 and V1→V2 schema mapping is implemented - -**Implications for DFNs API**: -- Registry metadata includes both `format` and `schema_version` fields -- Registries can have different formats at different refs (some refs: dfn, others: toml) -- The same schema version can be serialized in different formats -- Schema mapping happens after loading, independent of file format -- Users can request specific schema versions via `map()` function - -### Schema evolution - -**v1 schema** (original): -- Current MODFLOW 6 releases through 6.6.x -- Flat structure with `in_record`, `tagged`, `preserve_case`, etc. attributes -- Mixes structural specification with input format representation (recarray/maxbound issue) -- Can be serialized as `.dfn` (original) or `.toml` - -**v1.1 schema** (intermediate): -- Cleaned-up v1 with data normalization -- Removed unnecessary attributes (`in_record`, `tagged`, etc.) -- Structural improvements (period block arrays separated into individual variables) -- Better parent-child relationships inferred from naming conventions -- Can be serialized as `.dfn` or `.toml` -- **Recommendation from issue #259**: Use this as the mainline, not jump to v2 - -**v2 schema** (future - comprehensive redesign): -- For devtools 2.x / FloPy 4.x / eventually MF6 -- **Explicit parent-child relationships** via `parent` attributes in per-component TOML files (no inference needed) -- **Complete separation of structural specification from input format concerns** (see [pyphoenix-project #246](https://github.com/modflowpy/pyphoenix-project/issues/246)) - - Structural layer: components, relationships, variables, data models - - Format layer: how MF6 allows arrays to be provided, FILEIN/FILEOUT keywords, etc. - - Consumers can selectively apply formatting details atop canonical data model -- **Explicit parent-child relationships in DFN files** (see Component Hierarchy section) -- Modern type system with proper array types and semantically meaningful dimensions -- Consolidated attribute representation (see Tentative v2 schema design) -- Likely serialized as TOML or YAML (with JSON-Schema validation via Pydantic) - -**DFNs API strategy**: -- Support all schema versions via registry metadata -- Provide transparent schema mapping where needed -- Default to native schema version from registry -- Allow explicit schema version selection via API -- Maintain backwards compatibility during transitions - -### Tentative v2 schema design - -Based on feedback from mwtoews in [PR #229](https://github.com/MODFLOW-ORG/modflow-devtools/pull/229) and the structural/format separation discussed in [pyphoenix-project #246](https://github.com/modflowpy/pyphoenix-project/issues/246): - -**Structural vs format separation**: -The v2 schema should cleanly separate: -- **Structural specification**: Component definitions, relationships, variable data models - - Generated classes encode only structure and data models - - Use semantically meaningful dimensions (grid dimensions, time periods) -- **Format specification**: How MF6 reads/writes the data (separate layer) - - I/O layers exclusively handle input format concerns - - FILEIN/FILEOUT keywords, array input methods, etc. - -**Consolidated attributes**: Replace individual boolean fields with an `attrs` list: ```toml -# Instead of this (v1/v1.1): -optional = true -time_series = true -layered = false - -# Use this (v2): -attrs = ["optional", "time_series"] -``` - -**Array syntax for shapes**: Use actual arrays instead of string representations: -```toml -# Instead of this (v1/v1.1): -shape = "(nper, nnodes)" - -# Use this (v2): -shape = ["nper", "nnodes"] -``` - -**Format considerations**: -- **TOML vs YAML**: YAML's more forgiving whitespace better accommodates long descriptions (common for scientific parameters) -- **Validation approach**: Use Pydantic for both schema definition and validation - - Pydantic provides rigorous validation (addresses pyphoenix-project #246 requirement for formal specification) - - Built-in validation after parsing TOML/YAML to dict (no custom parsing logic) - - Automatic JSON-Schema generation for documentation and external tooling - - More Pythonic than using `python-jsonschema` directly - -**Pydantic integration**: -```python -from pydantic import BaseModel, Field -from typing import Any - -class FieldV2(BaseModel): - name: str - type: str - block: str | None = None - shape: list[str] | None = None - attrs: list[str] = Field(default_factory=list) - description: str = "" - default: Any = None - children: dict[str, "FieldV2"] | None = None - -# Usage: -# 1. Parse TOML/YAML to dict (using tomli/pyyaml/etc) -# 2. Validate with Pydantic (built-in) -parsed = tomli.load(f) -field = FieldV2(**parsed) # Validates automatically - -# 3. Export JSON-Schema if needed (for docs, external tools) -schema = FieldV2.model_json_schema() +releases = [ + "MODFLOW-ORG/modflow6@latest", + "MODFLOW-ORG/modflow6-nightly-build@latest", +] ``` -Benefits: -- **Validation and schema in one**: Pydantic handles both, no separate validation library needed -- **Type safety**: Full Python type hints and IDE support -- **JSON-Schema export**: Available for documentation and external tooling -- **Widely adopted**: Well-maintained, used throughout Python ecosystem -- **Better UX**: Clear error messages, better handling of multi-line descriptions (if using YAML) +The user config path (`RemoteDfnRegistry.user_config_path()`) is: +- Linux/macOS: `$XDG_CONFIG_HOME/modflow-devtools/dfns.toml` (default `~/.config/`) +- Windows: `%APPDATA%/modflow-devtools/dfns.toml` -## Component Hierarchy +Both files use the same format. `load_default()` merges them with `base | user` (user entries override base entries of the same key). -Component parent-child relationships are inferred from naming conventions by `to_tree()`. No separate specification file is required. +### Auto-sync -**Current inference rules** (in `to_tree()`): -- `sim-nam` has no parent (root) -- `*-nam` components (e.g. `gwf-nam`, `gwt-nam`) are children of `sim-nam` -- `exg-*`, `sln-*`, `utl-*` components are children of `sim-nam` -- All other `-` components (e.g. `gwf-chd`) are children of `-nam` +`_auto_sync()` checks whether `MODFLOW_DEVTOOLS_AUTO_SYNC` is set to a truthy value (`"1"`, `"true"`, or `"yes"`). When true, `from_ids()` calls `sync()` for any registry whose cache is empty. -This inference is applied during `DfnSpec.load()` regardless of whether the underlying DFN files are legacy `.dfn` format or TOML. For v2 TOML files, `parent` attributes in individual component files are respected when present and take precedence over inference. +## CLI (`__main__.py`) -**Planned for v2**: Explicit parent-child relationships via `parent` attributes in per-component TOML files, eliminating reliance on naming conventions. The `to_tree()` inference will remain as a fallback for v1/v1.1 compatibility. +Three subcommands: -## Schema version support +| Command | Action | +|---|---| +| `sync [--force/-f]` | Call `sync()` on each registry from `load_default()` | +| `info` | Call `cached_tag()` on each registry and print cache status | +| `clean` | Delete the entire base cache directory | -The DFNs API will support **multiple schema versions simultaneously**: +The CLI entry point is `main(argv=None)`. -```python -# Schema version is tracked per registry/ref -registry_v1 = get_registry(ref="6.4.4") # MODFLOW 6.4.4 uses v1 schema -registry_v11 = get_registry(ref="6.6.0") # MODFLOW 6.6.0 uses v1.1 schema -registry_v2 = get_registry(ref="develop") # Future: develop uses v2 schema +## Relationship to `modflow_devtools.dfn` -# Get DFN in native schema version -dfn_v1 = registry_v1.get_dfn("gwf-chd") # Returns v1 schema -dfn_v11 = registry_v11.get_dfn("gwf-chd") # Returns v1.1 schema +The v1 `modflow_devtools.dfn` module remains the stable baseline. `modflow_devtools.dfns` builds on top of it: `Dfns.load()` imports `modflow_devtools.dfn.schema` to parse `.dfn` files, and `mapper.py` converts the resulting v1 objects to v2 schema. The v1 module's `fetch_dfns()` function is re-exported from `modflow_devtools.dfns.__init__` for convenience. -# Transparently map to desired schema version -from modflow_devtools.dfns import map -dfn_v2 = map(dfn_v1, schema_version="2") # v1 → v2 -dfn_v2 = map(dfn_v11, schema_version="2") # v1.1 → v2 -``` +## Testing -**Registry support**: -- Each registry metadata includes `schema_version` (from component files or inferred) -- Different refs can have different schema versions -- `RemoteDfnRegistry` loads appropriate schema version for each ref -- `load()` function detects schema version and uses appropriate parser/validator - -**Schema detection**: -```python -# In RemoteDfnRegistry or DfnSpec.load() -def _detect_schema_version(self) -> Version: - # 1. Infer from component file content (schema_version field) - sample_dfn = self._load_sample_dfn() - return infer_schema_version(sample_dfn) - - # 2. Default to latest stable - return Version("1.1") -``` +Tests for the dfns module live under `autotest/dfns/`: +- `test_dfns.py` — tests for `Dfns.load()`, `children_of()`, and the CLI +- `test_dfns_registry.py` — tests for `LocalDfnRegistry`, `RemoteDfnRegistry`, and caching behavior +- `test_dfns_schema.py` — tests for schema validation (dims, shapes, fk/pk) +- `test_mapper.py` — unit tests for the v1→v2 mapper -## Implementation Dependencies - -### Completed work - -The `modflow_devtools.dfns` package is implemented in full. The following is a summary of what exists: - -- ✅ `Dfn`, `Block`, `Field` dataclasses (in `__init__.py`) -- ✅ Schema definitions (`FieldV1`, `FieldV2`) (in `schema/`) -- ✅ Parsers for both DFN and TOML formats (`parse.py`, `load()`, `load_flat()`, `load_tree()`) -- ✅ Schema mapping (V1 → V2) with `MapV1To2` -- ✅ Hierarchy inference via `to_tree()` / `to_flat()` -- ✅ `DfnSpec` dataclass with `Mapping` protocol and `load()` classmethod -- ✅ `DfnSpec.dump()` / `DfnSpec.dumps()` — serialize full spec as single TOML blob -- ✅ Validation utilities (`is_valid()`) -- ✅ `dfn2toml` conversion tool (`dfn2toml.py`) -- ✅ Bootstrap file and registry schema (`BootstrapConfig`, `SourceConfig`, `DfnRegistryMeta`) -- ✅ Registry classes (`DfnRegistry`, `RemoteDfnRegistry`, `LocalDfnRegistry`) (in `registry.py`) -- ✅ Registry discovery and synchronization (`sync_dfns()`, `get_sync_status()`) -- ✅ Pooch integration for file caching -- ✅ Module-level convenience API (`get_dfn`, `get_dfn_path`, `list_components`, `get_registry`) -- ✅ CLI (`__main__.py`): `sync`, `info`, `list`, `clean` -- ✅ Registry generation tool (`make_registry.py`) -- ⚠️ Integration with MODFLOW 6 CI (requires registry branch merge in MF6 repo) - -The legacy `modflow_devtools.dfn` module (`dfn.py`) remains alongside the new package for backwards compatibility. - -**Implementation status** (DFNs API): -- ✅ Bootstrap file and registry schema (`BootstrapConfig`, `SourceConfig`, `DfnRegistryMeta`) -- ✅ Registry discovery and synchronization -- ✅ Pooch integration for file caching -- ✅ Registry classes (`DfnRegistry`, `RemoteDfnRegistry`, `LocalDfnRegistry`) -- ✅ CLI commands (sync, info, list, clean) -- ✅ Module-level convenience API (`get_dfn`, `get_dfn_path`, `list_components`, `sync_dfns`, `get_registry`) -- ✅ Registry generation tool (`make_registry.py`) -- ✅ `DfnSpec.dump()` / `DfnSpec.dumps()` — serialize full spec as single TOML blob -- ⚠️ Integration with MODFLOW 6 CI (requires registry branch merge in MF6 repo) - -### Core components - -**Foundation** (no dependencies): -1. ✅ Core dfns package (schema, parser, utility code) — already merged -2. Add bootstrap file (`modflow_devtools/dfns/dfns.toml`) -3. Define registry schema with Pydantic (handles validation and provides JSON-Schema export) -4. Implement registry discovery logic -5. Create cache directory structure utilities - -**Registry infrastructure** (depends on Foundation): -1. Add Pooch as dependency -2. Implement `DfnRegistry` abstract base class -3. Implement `RemoteDfnRegistry` with Pooch for file fetching -4. Refactor existing code into `LocalDfnRegistry` -5. Implement `sync_dfns()` function -6. Add registry metadata caching with hash verification -7. Implement version-controlled registry discovery -8. Add auto-sync on first use (opt-in via `MODFLOW_DEVTOOLS_AUTO_SYNC` while experimental) -9. **Implement `DfnSpec` dataclass** with `Mapping` protocol for single canonical hierarchical representation with flat dict access - -**CLI and module API** (depends on Registry infrastructure): -1. Create `modflow_devtools/dfns/__main__.py` -2. Add commands: `sync`, `info`, `list`, `clean` -3. Add `--ref` flag for version selection -4. Add `--force` flag for re-download -5. Add convenience functions (`get_dfn`, `get_dfn_path`, `list_components`, etc.) -6. Default `ref="develop"` in `get_registry()` / `get_dfn()` etc. for "latest" access -7. Maintain backwards compatibility with `fetch_dfns()` - -**Registry generation tool** (depends on Foundation): -1. Implement `modflow_devtools/dfns/make_registry.py` -2. Scan DFN directory and generate **registry file** (`dfns.toml`): file listings with hashes -3. Compute file hashes (SHA256) for all DFN/TOML files -4. Registry output: just filename -> hash mapping (no URLs - constructed dynamically) -5. Support both full output (for CI) and minimal output (for handwriting) -6. For v1/v1.1: infer hierarchy from naming conventions for validation -7. For v2: read explicit `parent` attributes from component files for validation - -### MODFLOW 6 repository integration - -**CI workflow** (depends on Registry generation tool): -1. Install modflow-devtools in MODFLOW 6 CI -2. Generate registry on push to develop and release tags -3. Commit registry to `.registry/dfns.toml` -4. Test registry discovery and sync -5. **Note**: No separate `spec.toml` is needed — hierarchy is inferred from naming conventions for v1/v1.1, or read from `parent` attributes in component files for v2 - -**Bootstrap configuration** (depends on MODFLOW 6 CI): -1. Add stable MODFLOW 6 releases to bootstrap refs (6.6.0, 6.5.0, etc.) -2. Include `develop` branch for latest definitions -3. Test multi-ref discovery and sync - -### Testing and documentation - -**Testing** (depends on all core components): -1. Unit tests for registry classes -2. Integration tests for sync mechanism -3. Network failure scenarios -4. Multi-version scenarios -5. Schema mapping tests (v1 → v1.1 → v2) -6. Both file format tests (dfn and toml) -7. Backwards compatibility tests with existing FloPy usage - -**Documentation** (can be done concurrently with implementation): -1. Update `docs/md/dfn.md` with API examples -2. Document format vs schema separation clearly -3. Document schema evolution roadmap (v1 → v1.1 → v2) -4. Document component hierarchy approach (explicit in DFN files for v2) -5. Add migration guide for existing code -6. CLI usage examples -7. MODFLOW 6 CI integration guide - -## Relationship to Models and Programs APIs - -The DFNs API deliberately mirrors the Models and Programs API architecture for consistency: - -| Aspect | Models API | Programs API | **DFNs API** | -|--------|-----------|--------------|--------------| -| **Bootstrap file** | `models/models.toml` | `programs/programs.toml` | `dfns/dfns.toml` | -| **Registry format** | TOML with files/models/examples | TOML with programs/binaries | TOML with files/components/hierarchy | -| **Discovery** | Release assets or version control | Release assets only | Version control (+ release assets future) | -| **Caching** | `~/.cache/.../models` | `~/.cache/.../programs` | `~/.cache/.../dfn` | -| **Addressing** | `source@ref/path/to/model` | `program@version` | `mf6@ref/component` | -| **CLI** | `models sync/info/list` | `programs sync/info/install` | `dfns sync/info/list/clean` | -| **Primary use** | Access model input files | Install program binaries | Parse definition files | - -**Key differences**: -- DFNs API focuses on metadata/parsing, not installation -- DFNs API leverages existing parser infrastructure (Dfn, Block, Field classes) -- DFNs API handles schema versioning/mapping (format vs schema separation) -- DFNs API supports both flat and hierarchical representations - -**Shared patterns**: -- Bootstrap-driven discovery -- Remote sync with Pooch caching -- Ref-based versioning (branches, tags, commits) -- CLI command structure -- Lazy loading / auto-sync on first use -- Environment variable opt-out for auto-sync - -This consistency benefits both developers and users with a familiar experience across all three APIs. - -## Cross-API Consistency - -The DFNs API follows the same design patterns as the Models and Programs APIs for consistency. See the **Cross-API Consistency** section in `models.md` for full details. - -**Key shared patterns**: -- Pydantic-based registry classes (not ABCs) -- Dynamic URL construction (URLs built at runtime, not stored in registries) -- Bootstrap and user config files with identical naming (`dfns.toml`), distinguished by location -- Top-level `schema_version` metadata field -- Distinctly named registry file (`dfns.toml`) -- Shared config utility: `get_user_config_path("dfn")` - -**Unique to DFNs API**: -- Discovery via version control (release assets mode planned for future) -- Extra `dfn_path` bootstrap field (location of DFN files within repo) -- Schema versioning and mapping capabilities -- No `MergedRegistry` (users work with one MF6 version at a time) - -## Design Decisions - -### Use Pooch for fetching - -Following the recommendation in [issue #262](https://github.com/MODFLOW-ORG/modflow-devtools/issues/262), the DFNs API will use Pooch for fetching to avoid maintaining custom HTTP client code. This provides: - -- **Automatic caching**: Pooch handles local caching with verification -- **Hash verification**: Ensures file integrity -- **Progress bars**: Better user experience for downloads -- **Well-tested**: Pooch is mature and widely used -- **Consistency**: Same approach as Models API - -### Use Pydantic for schema validation - -Pydantic will be used for defining and validating DFN schemas (both registry schemas and DFN content schemas): - -- **Built-in validation**: No need for separate validation libraries like `python-jsonschema` -- **Type safety**: Full Python type hints and IDE support -- **JSON-Schema export**: Can generate JSON-Schema for documentation and external tooling -- **Developer experience**: Clear error messages, good Python integration -- **Justification**: Widely adopted, well-maintained, addresses the formal specification requirement from [pyphoenix-project #246](https://github.com/modflowpy/pyphoenix-project/issues/246) - -### Schema versioning strategy - -Based on [issue #259](https://github.com/MODFLOW-ORG/modflow-devtools/issues/259): - -- **Separate format from schema**: Registry metadata includes both -- **Support v1.1 as mainline**: Don't jump straight to v2 -- **Backwards compatible**: Continue supporting v1 for existing MODFLOW 6 releases -- **Schema mapping**: Provide transparent conversion via `map()` function -- **Future-proof**: Design allows for v2 when ready (devtools 2.x / FloPy 4.x) - -### Future enhancements - -1. **Release asset mode**: Add support for registries as release assets (in addition to version control) -2. **Registry compression**: Compress registry files for faster downloads -3. **Partial updates**: Diff-based registry synchronization -4. **Offline mode**: Explicit offline mode that never attempts sync -5. **Conda integration**: Coordinate with conda-forge for bundled DFN packages -6. **Multi-source support**: Support definition files from sources other than MODFLOW 6 -7. **Validation API**: Expose validation functionality for user-provided input files -8. **Diff/compare API**: Compare DFNs across versions to identify changes +Network-dependent tests (`test_latest_tag_live`, `test_remote_dfn_registry_sync`) are skipped by default with `@pytest.mark.skip`. The registry tests use `unittest.mock.patch` to avoid network calls. diff --git a/docs/md/dfns.md b/docs/md/dfns.md index a7434e7c..f80d944c 100644 --- a/docs/md/dfns.md +++ b/docs/md/dfns.md @@ -4,10 +4,8 @@ MODFLOW 6 specifies input components and their variables in configuration files `modflow_devtools` provides two modules for working with MODFLOW 6 input specification files: -- **`modflow_devtools.dfn`** — stable module, available in all current releases -- **`modflow_devtools.dfns`** — experimental new API, subject to change without notice - ---- +- **`modflow_devtools.dfn`:** stable utilities for parsing legacy `.dfn` files +- **`modflow_devtools.dfns`:** experimental structured API, subject to change without notice ## `modflow_devtools.dfn` (stable) @@ -16,241 +14,272 @@ The stable `modflow_devtools.dfn` module provides basic utilities for parsing le ### Downloading definition files ```python -from modflow_devtools.dfn import get_dfns +from modflow_devtools.dfn import fetch_dfns -get_dfns("MODFLOW-ORG", "modflow6", "6.6.0", "/tmp/dfns") +fetch_dfns("MODFLOW-ORG", "modflow6", "6.6.0", "/tmp/dfns") ``` -Downloads all `.dfn` files for the specified MODFLOW 6 release into the given output directory (returns `None`). - -### Types - -The core types are `TypedDict`s: - -```python -from modflow_devtools.dfn import Dfn, Field - -# Dfn: top-level component (e.g. "gwf-chd") -# name: str -# advanced: bool -# multi: bool -# : dict[str, Field] (one key per block, e.g. "options", "period") - -# Field: individual input variable within a block -# name: str -# type: str (e.g. "keyword", "integer", "double precision", "string", ...) -# block: str -# shape: str | None (e.g. "(naux)") -# default: Any -# children: dict[str, Field] | None -# description: str | None -# reader: str (e.g. "urword") -``` +Downloads all `.dfn` files for the specified MODFLOW 6 release into the given output directory. ### Converting to TOML -The `dfn` dependency group is required for the TOML conversion tool: +The `dfn` dependency group is required for the conversion tool: ```shell pip install modflow-devtools[dfn] ``` -To convert legacy `.dfn` files to TOML: +To convert legacy `.dfn` files (default output format is YAML): ```shell -python -m modflow_devtools.dfn2toml -i -o +python -m modflow_devtools.dfnmap -i -o ``` -The tool may also be used on individual files. To validate legacy format files, use the `--validate` flag. +Use `--format` / `-f` to select `yaml` (default), `toml`, or `json`. The tool may also be used on individual files. --- ## `modflow_devtools.dfns` (experimental) -> **Note**: This module is experimental. The API may change without following normal deprecation procedures. +> **Note**: This module is experimental. The API may change without following normal deprecation procedures. To suppress the warning emitted on import, use: +> ```python +> import warnings +> warnings.filterwarnings('ignore', message='.*modflow_devtools.dfns.*experimental.*') +> ``` + +The `modflow_devtools.dfns` module provides a structured API for working with MODFLOW 6 input specifications, including typed Python objects representing each component and field type, a registry system for remote caching, and tools for loading and navigating the full specification. -The `modflow_devtools.dfns` module provides a richer API for working with MODFLOW 6 input specifications, including structured Python objects, a registry system for remote discovery and caching, and serialization to a single TOML document. +### File format and schema version -### Formats +These are two separate concerns. -MODFLOW 6 input specifications exist in two formats: +**File format** is the serialization: -**Legacy DFN format** (`.dfn` files): The original text-based format, used in current MODFLOW 6 releases. Flat lists of variables with comments demarcating blocks. +- **Legacy DFN format** (`.dfn`): flat text with comments demarcating blocks, used by MODFLOW 6 releases. +- **TOML format** (`.toml`): per-component TOML documents. +- **YAML format** (`.yaml`): per-component YAML documents. +- **JSON format** (`.json`): per-component JSON documents. -**TOML format** (`.toml` files): A structured, hierarchical representation. Each component is a TOML document with blocks as top-level sections and variables as entries within each section. Variables may be scalar or composite — composites contain fields (if records), choices (if unions), or items (if lists). The MODFLOW 6 repository stores per-component TOML files alongside the legacy `.dfn` files. +TOML, YAML, and JSON files are produced by the `dfnmap` conversion tool. -Both formats are supported by `modflow_devtools.dfns`. The v2 schema (TOML) is the canonical target format; legacy `.dfn` files can be mapped to v2 schema with `map()`. +**Schema version** describes the structure and semantics of the content: + +- **v1 schema**: the original structure embedded in legacy `.dfn` files. Mixes structural definitions with input format details (e.g., `in_record`, `tagged`). +- **v2 schema**: a cleaner, hierarchical representation. Each component has explicitly typed, nested fields; blocks and records are first-class objects; structural specification is separated from input format concerns. + +`modflow_devtools.dfns` always works with v2 schema objects internally. When loading a directory of `.dfn` files, they are parsed as v1 and automatically mapped to v2. TOML, YAML, and JSON files carry v2 content directly and are loaded without mapping. All file formats are supported by `Dfns.load()`. ### Core classes -#### `Dfn` +#### `Dfns` -Represents a single MODFLOW 6 input component (e.g. `gwf-chd`, `sim-nam`). A dataclass with attributes including `name`, `schema_version`, `blocks`, `parent`, `advanced`, `multi`, `subcomponents`, and optionally `children` (when part of a tree). +`Dfns` is a Pydantic model representing the full set of component definitions for a release. It is the primary object returned by `Dfns.load()` (see also [Registry](#registry) below, which loads and caches DFN files from remote releases). ```python -from modflow_devtools.dfns import load +from modflow_devtools.dfns import Dfns + +# Load all component definitions from a directory +spec = Dfns.load("/path/to/mf6/doc/mf6io/mf6ivar/dfn") + +spec.schema_version # e.g. "2" +spec.root # the Simulation component, or None +len(spec.components) # total number of components -# Load a single component from a TOML file -with open("gwf-chd.toml", "rb") as f: - dfn = load(f, format="toml") +# Dict-like access to components +gwf_chd = spec.components["gwf-chd"] +gwf_chd.name # "gwf-chd" +gwf_chd.parent # "gwf-nam" -print(dfn.name) # "gwf-chd" -print(dfn.schema_version) # Version('2') -print(list(dfn.blocks)) # ['options', 'dimensions', 'period'] +# Navigate the component hierarchy +sim_children = spec.children_of("sim-nam") # {"gwf-nam": ..., ...} +gwf_children = spec.children_of("gwf-nam") # {"gwf-chd": ..., "gwf-wel": ..., ...} ``` -#### `DfnSpec` +#### Component types -Represents the full MODFLOW 6 input specification. Implements the `Mapping` protocol for flat dict-like access to components by name, and exposes the root component (simulation) with the full component hierarchy via `.root`. +Each entry in `spec.components` is one of three component types, discriminated by a `type` field: + +- `Simulation` — the root component (`sim-nam`) +- `Model` — a hydrologic process model (e.g. `gwf-nam`, `gwt-nam`) +- `Package` — any other input component (e.g. `gwf-chd`, `gwf-wel`) ```python -from modflow_devtools.dfns import DfnSpec +from modflow_devtools.dfns import Simulation, Model, Package -# Load from a directory of DFN files (legacy or TOML) -spec = DfnSpec.load("/path/to/mf6/doc/mf6io/mf6ivar/dfn") +gwf_nam = spec.components["gwf-nam"] +assert isinstance(gwf_nam, Model) -# Hierarchical access -spec.root.name # "sim-nam" -spec.root.children["gwf-nam"] # GWF model name file Dfn -spec.root.children["gwf-nam"].children["gwf-chd"] # GWF CHD package Dfn +gwf_chd = spec.components["gwf-chd"] +assert isinstance(gwf_chd, Package) +assert gwf_chd.multi is False +assert gwf_chd.subtype == "stress" +``` -# Flat dict-like access -gwf_chd = spec["gwf-chd"] -for name, dfn in spec.items(): - print(name) -len(spec) # total number of components +#### Blocks and fields -# Serialize the full spec as a single TOML document -with open("mf6spec.toml", "wb") as f: - spec.dump(f) +Each component has `blocks`, a dict mapping block names to `Block` objects. Each `Block` has a `fields` dict of typed field objects. -toml_str = spec.dumps() +```python +from modflow_devtools.dfns import Block, Keyword, Double, List, Record + +period = gwf_chd.blocks["period"] +assert period.repeats is True + +spd = period.fields["stress_period_data"] +assert isinstance(spd, List) +assert isinstance(spd.item, Record) + +cellid = spd.item.fields["cellid"] +assert isinstance(cellid, Array) ``` +Available field types: + +| Class | `type` value | Description | +|---|---|---| +| `Keyword` | `"keyword"` | Boolean presence/absence | +| `String` | `"string"` | String value | +| `Integer` | `"integer"` | Integer value | +| `Double` | `"double"` | Floating-point value | +| `File` | `"file"` (legacy `"path"`) | File path | +| `Array` | `"array"` | Fixed or dynamic array | +| `Record` | `"record"` | Single-line product type | +| `Union` | `"union"` | Tagged sum type | +| `List` | `"list"` | Tabular collection | + +See [DFN specification](dfnspec.md) for full attribute documentation. + ### Registry -The registry system handles discovering, caching, and accessing DFN files from remote sources (primarily the MODFLOW 6 GitHub repository). +The registry system handles caching and accessing DFN files from MODFLOW 6 releases. #### `LocalDfnRegistry` -For working with DFN files on the local filesystem: +For working with DFN files on the local filesystem. ```python from modflow_devtools.dfns import LocalDfnRegistry registry = LocalDfnRegistry(path="/path/to/mf6/doc/mf6io/mf6ivar/dfn") -dfn = registry.get_dfn("gwf-chd") -spec = registry.spec +spec = registry.spec # Dfns instance +path = registry.get_path("gwf-chd") # Path to the component file ``` #### `RemoteDfnRegistry` -For fetching and caching DFN files from a remote source. Uses [Pooch](https://www.fatiando.org/pooch/) for caching and hash verification. +For fetching and caching DFN files from a MODFLOW 6 release. The `release_id` takes the form `"owner/repo@tag"`, where `tag` may be a specific version or `"latest"`. ```python from modflow_devtools.dfns import RemoteDfnRegistry -registry = RemoteDfnRegistry(source="modflow6", ref="6.6.0") -registry.sync() # downloads and caches the registry + DFN files - -dfn = registry.get_dfn("gwf-chd") -spec = registry.spec -``` - -#### Convenience functions +registry = RemoteDfnRegistry(release_id="MODFLOW-ORG/modflow6@6.6.0") +registry.sync() # download and cache DFN files +registry.sync(force=True) # force re-download -```python -from modflow_devtools.dfns import get_dfn, get_dfn_path, get_registry, list_components, sync_dfns +spec = registry.spec # Dfns (auto-syncs if needed) +path = registry.get_path("gwf-chd") # Path to cached component file -# Sync all configured refs -sync_dfns() +tag = registry.latest_tag() # resolve "latest" to actual tag (network) +tag = registry.cached_tag() # return cached tag (no network) +``` -# Sync a specific ref -sync_dfns(ref="6.6.0") +For `@latest`, `latest_tag()` queries the GitHub API once and caches the result. -# Get a component (auto-syncs if MODFLOW_DEVTOOLS_AUTO_SYNC=1) -dfn = get_dfn("gwf-chd", ref="6.6.0") +#### Default registries -# Get the local cached path to a component file -path = get_dfn_path("gwf-wel", ref="6.6.0") +The package ships with a built-in configuration (`modflow_devtools/dfns/dfns.toml`) that lists the default release IDs to track: -# List all components for a ref -components = list_components(ref="6.6.0") +```toml +releases = [ + "MODFLOW-ORG/modflow6@latest", + "MODFLOW-ORG/modflow6-nightly-build@latest", +] +``` -# Get a registry object -registry = get_registry(ref="6.6.0") +To load these defaults: -# Use a local path instead of remote -registry = get_registry(path="/path/to/dfns") -dfn = get_dfn("gwf-chd", path="/path/to/dfns") +```python +registries = RemoteDfnRegistry.load_default() +# {"MODFLOW-ORG/modflow6@latest": RemoteDfnRegistry(...), ...} ``` -#### CLI +To load specific release IDs programmatically: -```shell -# Sync all configured refs -python -m modflow_devtools.dfns sync +```python +registries = RemoteDfnRegistry.from_ids( + "MODFLOW-ORG/modflow6@6.6.0", + "MODFLOW-ORG/modflow6@6.5.0", +) +``` -# Sync a specific ref -python -m modflow_devtools.dfns sync --ref 6.6.0 +#### User config overlay -# Force re-download -python -m modflow_devtools.dfns sync --force +You can extend or override the default registry configuration by creating: -# Show sync status and cache info -python -m modflow_devtools.dfns info +- Linux/macOS: `~/.config/modflow-devtools/dfns.toml` (respects `$XDG_CONFIG_HOME`) +- Windows: `%APPDATA%/modflow-devtools/dfns.toml` -# List available components for a ref -python -m modflow_devtools.dfns list --ref 6.6.0 +The file uses the same format as the bundled config: -# Clear cache -python -m modflow_devtools.dfns clean -python -m modflow_devtools.dfns clean --all +```toml +releases = [ + "my-org/my-mf6-fork@main", +] ``` -#### Auto-sync - -Auto-sync is opt-in (off by default). Enable it by setting the environment variable: +Entries in the user config are merged with (and take precedence over) the defaults. -```shell -MODFLOW_DEVTOOLS_AUTO_SYNC=1 -``` +#### Cache location -When enabled, `get_registry()` will automatically sync if no cached registry exists for the requested ref. +Downloaded DFN files are cached under: -#### Cache location +- Linux/macOS: `$XDG_CACHE_HOME/modflow-devtools/dfns/` (default `~/.cache/`) +- Windows: `%LOCALAPPDATA%/modflow-devtools/dfns/` -Cached registries and DFN files are stored under: +The cache is organized by repository and release tag: ``` -~/.cache/modflow-devtools/dfn/ -├── registries/ -│ └── modflow6/ -│ └── 6.6.0/ -│ └── dfns.toml -└── files/ +~/.cache/modflow-devtools/dfns/ +└── MODFLOW-ORG/ └── modflow6/ - └── 6.6.0/ - ├── sim-nam.toml - ├── gwf-chd.toml + ├── 6.6.0/ + │ ├── sim-nam.dfn + │ ├── gwf-chd.dfn + │ └── ... + └── 6.5.0/ └── ... ``` -### Schema versioning and mapping - -`modflow_devtools.dfns` supports multiple schema versions simultaneously: +To get the base cache path programmatically: -- **v1**: Original MODFLOW 6 releases. Mixes structural specification with input format details. Serialized as `.dfn` files. -- **v1.1**: Cleaned-up v1 with normalized attributes, structural improvements, and better parent-child inference. Can be serialized as `.dfn` or `.toml`. -- **v2**: Current TOML schema. Separates structural specification from input format concerns. Per-component `.toml` files in the MODFLOW 6 repository use this schema. +```python +RemoteDfnRegistry.base_cache_path() +``` -Use `map()` to convert between schema versions: +#### Checking cache status ```python -from modflow_devtools.dfns import get_dfn, map +from modflow_devtools.dfns.registry import is_cached -dfn_v1 = get_dfn("gwf-chd", ref="6.4.4") # v1 schema -dfn_v2 = map(dfn_v1, schema_version="2") # convert to v2 +is_cached("MODFLOW-ORG/modflow6@6.6.0") # True/False (no network) ``` -`DfnSpec.load()` automatically maps v1 DFNs to v2 when loading from a directory of legacy `.dfn` files. +#### Auto-sync + +When `MODFLOW_DEVTOOLS_AUTO_SYNC=1` is set, `RemoteDfnRegistry.from_ids()` will automatically call `sync()` for any release ID that has no cached files yet. + +### CLI + +```shell +# Show sync status for all configured releases +python -m modflow_devtools.dfns info + +# Sync all configured releases (downloads dfns.zip from GitHub releases) +python -m modflow_devtools.dfns sync + +# Force re-download even if already cached +python -m modflow_devtools.dfns sync --force + +# Clean the entire DFN cache +python -m modflow_devtools.dfns clean +``` diff --git a/docs/md/dfnspec.md b/docs/md/dfnspec.md new file mode 100644 index 00000000..44e2a346 --- /dev/null +++ b/docs/md/dfnspec.md @@ -0,0 +1,625 @@ +# DFN specification + +This document describes the MODFLOW 6 component definition (DFN) system. This system is used to specify MODFLOW 6 components and their inputs, and reflects the MODFLOW 6 input data model as described in the MF6 IO guide. + +- [Overview](#overview) +- [Components](#components) + - [Shared attributes](#shared-attributes) + - [`type`](#type) + - [`name`](#name) + - [`blocks`](#blocks) + - [`parent`](#parent) + - [`schema_version`](#schema_version) + - [`dims`](#dims) + - [Component types](#component-types) + - [Simulation](#simulation) + - [Model](#model) + - [Type-specific attributes](#type-specific-attributes) + - [`solution`](#solution) + - [Package](#package) + - [Type-specific attributes](#type-specific-attributes-1) + - [`multi`](#multi) + - [`subtype`](#subtype) +- [Blocks](#blocks-1) + - [Attributes](#attributes) + - [`name`](#name-1) + - [`fields`](#fields) + - [`repeats`](#repeats) +- [Fields](#fields-1) + - [Shared attributes](#shared-attributes-1) + - [`name`](#name-2) + - [`type`](#type-1) + - [`longname`](#longname) + - [`description`](#description) + - [`optional`](#optional-1) + - [`default`](#default) + - [`developmode`](#developmode) + - [`netcdf`](#netcdf) + - [`tagged`](#tagged) + - [Scalars](#scalars) + - [Keyword](#keyword) + - [String](#string) + - [Type-specific attributes](#type-specific-attributes-2) + - [`valid`](#valid) + - [`case_sensitive`](#case_sensitive) + - [`time_series`](#time_series) + - [`pk`](#pk) + - [`fk`](#fk) + - [`fk_ref`](#fk_ref) + - [Integer](#integer) + - [Type-specific attributes](#type-specific-attributes-3) + - [`valid`](#valid-1) + - [`time_series`](#time_series) + - [`pk`](#pk-1) + - [`fk`](#fk-1) + - [`fk_ref`](#fk_ref-1) + - [Double](#double) + - [Type-specific attributes](#type-specific-attributes-4) + - [`time_series`](#time_series-1) + - [File](#file) + - [Type-specific attributes](#type-specific-attributes-5) + - [`mode`](#mode) + - [Composites](#composites) + - [Array](#array) + - [Type-specific attributes](#type-specific-attributes-6) + - [`dtype`](#dtype) + - [`shape`](#shape) + - [`time_series`](#time_series-2) + - [`repeat`](#repeat) + - [Record](#record) + - [Type-specific attributes](#type-specific-attributes-7) + - [`fields`](#fields-2) + - [Union](#union) + - [Type-specific attributes](#type-specific-attributes-8) + - [`arms`](#arms) + - [List](#list) + - [Type-specific attributes](#type-specific-attributes-9) + - [`item`](#item) + - [Dimensions](#dimensions) + - [Dimension sources](#dimension-sources) + - [Dimension scope](#dimension-scope) + - [Primary/foreign keys](#primaryforeign-keys) + - [Examples](#examples) + +## Overview + +A MODFLOW 6 simulation consists of a hierarchy of modules, each one representing some functional element, such as a grid discretization, a hydrologic process (i.e. model), or a boundary condition. + +Modules are specified by **component definitions** (DFNs), each of which describes the module's general properties, its input fields, and its relationships to other modules. A module be represented by more than one component definition. A definition describes one way of representing a module; it may not be the only way. Any number of representational variants may exist, each of which reflects a certain tradeoff between properties like program runtime, memory or disk usage, and convenience. + +This document refers to **components** instead of modules to emphasize this distinction. + +## Components + +Component definitions consist of a number of attributes: + +- `type`: the component type (`"simulation"`, `"model"`, or `"package"`) +- `name`: the component's name +- `blocks`: block definitions +- `parent`: parent component(s) +- `schema_version`: DFN schema version +- `dims`: named dimensions (field-backed or derived) available for use in array shapes + +Components may refer to, i.e. be constrained by, other components. Cross-component constraints include parent-child relations, solution compatibility, and format variants. + +### Shared attributes + +#### `type` + +The component's type. Required. One of: + +- `"simulation"`: the root of the runtime hierarchy. +- `"model"`: a hydrologic process model. +- `"package"`: a model input package. + +#### `name` + +`string`. Required. The component's name. By convention the hyphenated `abc-xyz` stem of the DFN file name. + +#### `blocks` + +`{string: Block} (default: {})`. The component's input blocks. A component may be empty, i.e. have no blocks. See section below. + +#### `parent` + +Except for the simulation, which is the root of the runtime hierarchy, all MF6 components have a parent. Parent-child relations may range from fully constrained (e.g., a `gwf-chd` package must be a child of a GWF model) to completely unconstrained. Components which may be attached to multiple possible parents are historically called **subpackages**. + +Parent relationships are defined bottom-up with attribute `parent`: + +- `null` — no parent; only valid for the root, i.e. simulation. +- `"*"` — unconstrained; any parent component type is allowed. +- A string or list of strings — declares the set of valid parent component types. Entries are either: + - **Component type names:** `"simulation"`, `"model"`, or `"package"`. + - **Concrete component names:** e.g. `"gwf-sfr"`, `"gwf-nam"` + +**Note:** Type names and concrete component names may be mixed. A type name subsumes any named component of the same type: e.g., `["gwf-sfr", "package"]` reduces to `["package"]` since `gwf-sfr` is a package. + +#### `schema_version` + +`string | null (default: null)`. The version of the DFN schema. Optional but recommended. When multiple components are loaded together into a `DfnSpec`, all non-null `schema_version` values must agree; mixed versions are a validation error. + +#### `dims` + +`{string: Dim} (default: {})`. Named dimensions available for use in array and list shape expressions. Each entry is either field-backed (`field`: the name of an integer or self-sizing array field in this component) or derived (`expr`: an arithmetic expression over other dims), plus a `scope` that controls visibility to other components. See [Dimensions](#dimensions). + +### Component types + +Three component types can be distinguished: simulation, model, and package. + +#### Simulation + +The simulation is the root of the MODFLOW 6 runtime module hierarchy. A simulation may contain one or more models, organized into one or more solutions. It has no parent (`parent: null`) and no type-specific attributes beyond those shared by all components. + +#### Model + +A model represents a hydrologic process. Models are managed and solved by the simulation. + +##### Type-specific attributes + +###### `solution` + +`"ims" | "ems" | "sln-ims" | "sln-ems" | null (default: null)`. MF6 supports different solution schemes: implicit solutions (solve systems of coupled equations iteratively) and explicit solutions (used when closed-form solutions are available). A model declares which solution type it requires with the optional `solution` attribute. Solution packages do not redundantly declare which model types they support; compatibility is determined entirely from the model side. + +#### Package + +A package is any component that is not a simulation or a model. + +##### Type-specific attributes + +###### `multi` + +`boolean (default: false)`. Indicates that multiple instances of this component may be attached to the same parent component. Components of which multiple instances are allowed are called "multi-packages". + +###### `subtype` + +Optional discriminator indicating the package's functional role. Several package subtypes may be distinguished: solutions, exchanges, stress packages, advanced packages, and utility packages. + +- `"solution"`: provides solving capability for models. Compatibility with a model is determined by the model's `solution` attribute. +- `"exchange"`: connects two models, enabling them to share boundary conditions or state at their interface. Parent is the simulation. +- `"stress"`: imposes boundary conditions on a model. Period data is provided per stress period; each period block replaces the full set of stresses for that period. Stress packages always declare a `maxbound` integer dimension (the maximum number of boundary entries per stress period) and their period block list carries `shape: ["maxbound"]`. +- `"advanced"`: an advanced stress package. Differs from `"stress"` in three key ways: + 1. Solves a continuity equation. Each feature (well, reach, lake cell, UZF cell) internally balances inflows, outflows, and change in storage. Traditional stress packages impose static conditions and do not have an internal water budget. **Note:** advanced packages can act as receivers in the Water Mover (MVR/MVE) package because they have an internal continuity equation to receive diverted water into. Traditional stress packages cannot. + 2. Has dynamic state variables. Advanced packages compute a dependent variable (e.g., lake stage, well head, reach stage) that is part of the solution. Traditional stress packages use fixed/user-specified values. + 3. Stress periods have feature replacement rather than block replacement semantics: when a new period block configuration is provided, traditional stress packages replace the entire previous configuration; advanced packages perform partial updates, modifying only features explicitly appearing in the new period block. **Note:** both simple and advanced packages fill-forward across omitted stress periods; the distinction is only in what happens when a new period block configuration is specified. + + Advanced packages do **not** declare a `maxbound` dimension. Their list lengths (packagedata rows, period entries per feature, etc.) are determined at runtime — typically derived from a linked flow package's budget object or internal state — and are not declared in the DFN. Accordingly, their list fields carry `shape: []`. +- `"utility"`: an auxiliary package that may be attached to models or packages, such as time series, time-array series, or observations. Utility packages (`utl-*`) are distinguished from primary model input packages by providing configurational or cross-cutting concerns rather than representing a first-class hydrologic process. They may support `multi`. + +`subtype: null` (the default) covers packages that don't fall into any named category, such as output control packages. + +## Blocks + +A block is group of related fields, essentially a product type. Record fields are also product types; the distinction is that records occupy a single line in MF6 input files, while blocks are multiline constructs delimited by headers, e.g. + +``` +begin + field1 value + field2 value +end +``` + +Blocks are treated differently depending on the structural composition of their top-level fields. The sample above is typical of a block containing configuration options, which is essentially a dictionary mapping field names to values. + +A field's value need not be preceded by its name; see the `tagged` section below. Tagged fields must precede any and all untagged fields in the block definition and consequently in input files. + +### Attributes + +#### `name` + +`string`. Required. The identifier used in `begin ` / `end ` delimiters in MF6 input files. + +#### `fields` + +`{string: Field} (default: {})`. The block's fields, in definition order. + +#### `repeats` + +`boolean (default: false)`. Whether the block may appear multiple times in an input file. When true, each occurrence is read independently and associated with a unique label. The canonical repeating block is the period block, whose label is the stress period number. + +A block has no explicit `optional` attribute. Its optionality is derived from its fields: a block is optional if and only if all of its fields are optional (vacuously true for an empty block). This combines with `repeats` to give four configurations: + +| `repeats` | derived optional | Meaning | +|---|---|---| +| `false` | `false` | must appear exactly once | +| `false` | `true` | may appear at most once | +| `true` | `false` | must appear at least once | +| `true` | `true` | may appear zero or more times | + +## Fields + +A field is a [union](https://en.wikipedia.org/wiki/Tagged_union) of concrete data types, discriminated by a `type` attribute. A field consists of a set of attributes, some shared, some type-specific. + +Field definitions are not entirely self-contained. Some fields may refer to other fields, in the same component or in another. There are two cases of this: + +- array dimensions +- list primary/foreign keys + +These cases are associated with type-specific attributes described below. + +### Shared attributes + +There is a core set of attributes shared by all field types: + +- `name` +- `type` +- `longname` +- `description` +- `optional` +- `default` +- `developmode` +- `netcdf` +- `tagged` + +#### `name` + +`string`. Required. + +#### `type` + +Required. One of: + +- `keyword` +- `integer` +- `double` +- `array` +- `string` +- `path` +- `record` +- `union` +- `list` + +#### `longname` + +`string | null (default: null)`. A longer, more descriptive name. From the [NetCDF conventions](https://docs.unidata.ucar.edu/nug/current/attribute_conventions.html#long_name). May contain spaces. + +#### `description` + +`string | null (default: null)`. A detailed description of the field. + +#### `optional` + +`boolean (default: false)`. Indicates that the field is not mandatory and may be omitted. May be applied to both composite and scalar fields. + +#### `default` + +The field's default value. Only relevant for optional fields. TODO: determine whether to keep. MF6 doesn't read DFN defaults, only flopy does. MF6 implements defaults internally, so care must be taken to keep DFNs in sync, or maybe IDM could read the default from the DFNs. + +#### `developmode` + +`boolean (default: false)`. Feature flag indicating that the field is not released yet, only allowed in develop mode builds. + +#### `netcdf` + +`boolean (default: false)`. Marks a field that can appear in NetCDF input files. + +#### `tagged` + +`boolean (default: true)`. Indicates that the field value should be preceded by the field name. + +### Scalars + +Scalar fields define a single value. + +#### Keyword + +Type `keyword`. Represents a boolean choice. In input files, the presence of a keyword indicates true, its absence false. + +#### String + +Type `string`. + +##### Type-specific attributes + +###### `valid` + +`[string] | null`. Permitted values (enumeration constraint). Empty list is treated as absent. + +###### `case_sensitive` + +`boolean (default: false)`. Indicates that the string's case must be preserved. The MF6 parser uppercases strings by default. + +###### `time_series` + +`boolean (default: false)`. Marks fields where the parser accepts either a string literal or a time-series name (referencing a `utl-ts` object). + +###### `pk` + +`boolean (default: false)`. Marks this scalar as the primary key of its containing list's item record. Valid only on integer or string scalars that are columns in a list item record. Exactly one column per list item may be marked pk. + +###### `fk` + +`string | null (default: null)`. Marks this scalar as a foreign key. Valid only on integer or string scalars that are columns in a list item record. Three forms: (1) hierarchical path `"block.field"` or `"component.block.field"` — fully static, used without `fk_ref`; (2) sentinel `"node"` — grid cell reference, used without `fk_ref`; (3) bare block name (e.g., `"packagedata"`) — used together with `fk_ref` to name the block within the runtime-resolved target component, leaving only the pk field to be discovered. See "Primary/foreign keys". + +###### `fk_ref` + +`string | null (default: null)`. For FKs whose target component is only known at runtime. Names a sibling string field whose value identifies the target component. May be set alone (block within target also unknown) or together with `fk` as a bare block name (block known, component not). See "Primary/foreign keys". + +#### Integer + +Type `integer`. + +##### Type-specific attributes + +###### `valid` + +`[integer] | null`. Permitted values (enumeration constraint). Empty list is treated as absent. + +###### `time_series` + +`boolean (default: false)`. Marks fields where the parser accepts either a numeric literal or a time-series name (referencing a `utl-ts` object). Not inferable from structural type. Also appears on array fields (where it references a `utl-tas` object instead). Note that `utl-tas` currently only works with layered arrays, not full-grid arrays, though generalizing has been considered. + +###### `pk` + +`boolean (default: false)`. Marks this scalar as the primary key of its containing list's item record. Valid only on integer or string scalars that are columns in a list item record. Exactly one column per list item may be marked pk. + +###### `fk` + +`string | null (default: null)`. Marks this scalar as a foreign key. Valid only on integer or string scalars that are columns in a list item record. Three forms: (1) hierarchical path `"block.field"` or `"component.block.field"` — fully static, used without `fk_ref`; (2) sentinel `"node"` — grid cell reference, used without `fk_ref`; (3) bare block name (e.g., `"packagedata"`) — used together with `fk_ref` to name the block within the runtime-resolved target component, leaving only the pk field to be discovered. See "Primary/foreign keys". + +###### `fk_ref` + +`string | null (default: null)`. For FKs whose target component is only known at runtime. Names a sibling string field whose value identifies the target component. May be set alone (block within target also unknown) or together with `fk` as a bare block name (block known, component not). See "Primary/foreign keys". + +#### Double + +Type `double`. + +##### Type-specific attributes + +###### `time_series` + +`boolean (default: false)`. Marks fields where the parser accepts either a numeric literal or a time-series name (referencing a `utl-ts` object). Not inferable from structural type. Also appears on array fields (where it references a `utl-tas` object instead). Note that `utl-tas` currently only works with layered arrays, not full-grid arrays, though generalizing has been considered. + +#### File + +Type `file`. + +##### Type-specific attributes + +###### `mode` + +`"filein" | "fileout"`. Whether the path is to an input or output file. Required. + +### Composites + +Three kinds of composite type are relevant to MF6: [product](https://en.wikipedia.org/wiki/Product_type) (record), [sum](https://en.wikipedia.org/wiki/Tagged_union) (union), and collection (array, list). + +Composite fields are explicitly nested so that the composite structure is reflected in the schema. Product and sum types have multiple nested subfields. Lists have a single nested subfield. Arrays have no nested subfields; see below. + +#### Array + +Type `array`. + +Arrays are not proper composites. An array does not have an item subfield as does a list. Instead, it has a `dtype` attribute identifying its scalar element type. An array may not contain composite elements; `dtype` must be a scalar type. + +A 1D array may have absent or empty `shape`, indicating no constraint on its size, in which case it is called **self-sizing**. Self-sizing arrays are parsed by MF6 dynamically at runtime. The size of a self-sizing array may serve as a dimension for other arrays (see below). + +A 1D array appearing as a subfield of a record is called an **inline array**. Inline arrays with a declared shape are self-explanatory. An inline array may only be self-sizing if it is the right-most subfield of the record; in this case the record is essentially a variadic tuple. + +##### Type-specific attributes + +###### `dtype` + +`string`. The array's data type. Must be one of the scalar types. + +###### `shape` + +`[string] (default: [])`. The array's shape, as a list of shape expressions, one per dimension. An empty list means the array is 1-dimensional and **self-sizing** (see above). + +###### `time_series` + +`boolean (default: false)`. Marks fields where the READARRAY invocation may be replaced by a TAS name referencing a `utl-tas` time-array series object. At any model time, the TAS provides an interpolated grid-shaped array. Distinct from the scalar case: references `utl-tas`, not `utl-ts`. Note that `utl-tas` currently only works with layered arrays, not full-grid arrays, though generalizing has been considered. + +###### `repeat` + +`string | null (default: null)`. Names the field (within the same component) whose runtime length determines how many times this field is read sequentially within an array block, with each reading appended to an accumulated sequence. See `repeat` section below. + +#### Record + +Type `record`. Product type. In MF6 input files, records appear on a single line. Record subfields may or may not be `tagged`. While blocks can be considered product types also, in the DFN specification only records are considered fields; blocks are considered named collections of related fields. + +##### Type-specific attributes + +###### `fields` + +`{string: Scalar | Array | Record | Union}`. Subfields, required. + +**Note:** An array appearing as a subfield of a record is read inline on the same line, not in the READARRAY format. If the array's `shape` uses a row-level column lookup, the record is effectively a variadic tuple: its width varies per row as determined by a column in a FK-linked list. See [Row-level column lookups](#row-level-column-lookups). + +**Note:** if a nested record appears inside another record, the inner record's contents should appear inline inside the outer record's contents, on the same line. + +#### Union + +Type `union`. Sum type. + +##### Type-specific attributes + +###### `arms` + +`{string: Scalar | Record}`. Subfields, required. + +#### List + +Type `list`. Collection type. Unlimited but for one rule: a list may not contain another list. Lists are distinct from arrays in two ways: a list element may be a composite type and a list admits sparse representations. + +##### Type-specific attributes + +###### `item` + +`record | union`. Item type, required. + +###### `shape` + +`[string] (default: [])`. The list's shape, as a list of at most one shape expression (lists are necessarily 1-dimensional). + +An **empty `shape`** means the list length is unconstrained at schema-definition time. This is the correct representation for any list whose length is determined at runtime rather than from a declared dimension — including all list fields in advanced packages (LAK, SFR, MAW, UZF, and their transport-side counterparts), whose lengths are derived from a linked flow package's budget or internal state and are never declared in the DFN. + +A **non-empty `shape`** (exactly one element) names a declared dimension that bounds the list. The canonical case is a stress package's period block list, which carries `shape: ["maxbound"]`. The `maxbound` dimension is explicitly declared in the stress package's `dimensions` block as an integer field the user must supply. + +**Note:** Some non-period lists in stress-type packages (e.g. `utl-spc`) also reference `maxbound`; these follow the same rule — `maxbound` must be an explicitly declared dimension for the shape to be meaningful. + +### Dimensions + +Dimensions may be declared by a component with a `dims` map. Each entry in `dims` consists of a **source** (a field or an expression combining other dimension fields) and a **scope**: + +```yaml +dims: + nlay: + field: nlay # backed by a sibling integer field 'nlay' + scope: model + nodes: + expr: "nlay * nrow * ncol" # derived from other dims + scope: model + nper: + field: nper + scope: simulation +``` + +Dimensions may be used in the `shape` expression of `list` and `array` fields. + +#### Dimension sources + +The `field` and `expr` attributes define dimension sources. These attributes are mutually exclusive, distinguishing **explicit dimensions** from **derived dimensions**. + +Explicit dimensions are most straightforwardly defined with an `integer` field, directly indicating the size of the dimension. Self-sizing `array` fields may also may serve as explicit dimension sources; a self-sizing array dimension is effectively a dynamic dimension size, indicating "the same size as this array". + +A dimension with an `expr` attribute rather than `field` is a derived dimension. Shape expressions use Python-like syntax and may contain several kinds of reference, resolved in the order presented below: + +- **Local dimension**: an explicit or derived dimension in this component, resolved in dependency order. +- **Inherited dimension**: a dimension inherited from another component, per scoping rules (see below). +- **Record subfield**: a sibling `integer` subfield in the same record. Makes the record a variadic tuple whose width varies per row. Valid only when the array is a subfield of a record. +- **List column**: a subfield of a record which is the item type of a regular (i.e. tabular) list, in this component or another. If in another component, the name must be fully qualified (see below). Valid only when the array is a subfield of a record. + +Shape expressions may also include simple integer arithmetic, e.g. `nlay + 1`, as well as constraints, e.g. `<`, `>`, `<=`, or `>=` and simple math functions like `sum()`. + +Canonical examples of dimensions: + +```yaml +dims: + # sim-tdis + nper: {field: nper, scope: simulation} + + # gwf-dis + nlay: {field: nlay, scope: model} + nrow: {field: nrow, scope: model} + ncol: {field: ncol, scope: model} + ncpl: {expr: "nrow * ncol", scope: model} + nodes: {expr: "nlay * nrow * ncol", scope: model} + ncelldim: {expr: "3", scope: model} + + # gwf-disv + ncpl: {field: ncpl, scope: model} + nodes: {expr: "nlay * ncpl", scope: model} + ncelldim: {expr: "2", scope: model} + + # gwf-lak + total_lake_connections: {expr: "sum(packagedata.nlakeconn)", scope: component} +``` + +An inline array may have its size determined by an integer subfield in the same record, or if the record it is within is the item type of a list, by a column in another list. + +If a record contains an `integer` field followed by an `array` whose shape expression names that field, the record is self-describing: it carries its own sizing information. Inline array dimensions of this kind need not be declared at the component level. For instance: + +```yaml +connectiondata: + type: list + item: + type: record + fields: + icno: + type: integer + fk: "packagedata.icno" + ncvert: + type: integer + icvert: + type: array + dtype: integer + shape: ["ncvert"] +``` + +If an inline array appears in a record which is the item type of a list, the array's size may be specified by a column in another regular list. This is a form of primary-/foreign-key relation; see [Primary/foreign keys](#primaryforeign-keys). The syntax for this form is `[component.]block.column(fk_field)`. If the list is in the same component, the dimension need not be declared in `dims`. + +- `component`: the component name. Only required if in a different component. +- `block`: the name of the block containing the list. In all current cases the list field has the same name as its containing block, so this also serves as the list name. +- `column`: an integer subfield in the list's record item type. +- `fk_field`: an integer subfield in the referring array's record whose `fk` attribute resolves to a `pk` subfield in the dimension-providing record. + +The parenthetical `(fk_field)` serves as the row selector, distinguishing this form from a derived dimension expression over the same path, e.g. `[component.]block.column` + +The canonical example of this is `gwf-sfr.connectiondata.ic`, an array field whose length varies per reach according to the `ncon` column in `packagedata`: + +```yaml +packagedata: + type: list + item: + type: record + fields: + ifno: + type: integer + pk: true + ncon: + type: integer + # ... + +connectiondata: + type: list + item: + type: record + fields: + ifno: + type: integer + fk: "packagedata.ifno" + ic: + type: array + dtype: integer + shape: ["packagedata.ncon(ifno)"] +``` + +`packagedata.ncon(ifno)` means: follow `ifno`'s FK to identify the `packagedata` row, then read `ncon` from it. Each `connectiondata` row has a different number of `ic` values; each record is a variadic tuple, making the list jagged. + +#### Dimension scope + +Dimensions may specify a `scope` attribute controlling which other components can inherit the dimension: + +- **`"component"`**: only visible within this component (or to subpackages that list this component as their explicit parent). The default scope. +- **`"model"`**: visible to any component that can share the same model instance, as determined by component `parent` attributes. A dimension defined in component A with `scope: "model"` is visible to component B if: + - A's `parent` contains a concrete model-name entry (e.g. `"gwf-nam"`, `"chf-nam"`), and + - B's `parent` contains either that same model-name entry (meaning B can belong to the same model type), a generic type (`"model"` or `"package"`, meaning B can be attached to any such type), or a wildcard pattern (`"*"`, meaning B can be attached to any component). +- **`"simulation"`**: always visible to all components. + +Examples: +- `gwf-dis.dims.nrow`, `ncol`, `nlay` have `scope: "model"`: accessible to `gwf-chd`, `gwf-wel`, and any other component whose parent is `gwf-nam`, but also to e.g. `utl-spca` (which has `parent: "package"`). +- `sim-tdis.dims.nper` has `scope: "simulation"`: accessible from all components. +- `gwf-lak.dims.total_lake_connections` has `scope: "component"`: private to that component. + +### Primary/foreign keys + +Sometimes a column in one list identifies a row in another list, or a grid cell. This can be conceptualized as a primary key (PK) / foreign key (FK) relation. Integers and strings may encode PK/FK semantics with attributes `pk`, `fk`, and `fk_ref`. + +**Note**: PK/FK attributes are only valid on integer and string fields, and only fields appearing as columns in a list item record type. + +The `fk` attribute can take one of three forms: + +- **Hierarchical path** — `"block.field"` for within-component references, `"component.block.field"` for cross-component references where the target is statically known. Used without `fk_ref`. +- **`"node"` sentinel** — indicates a grid cell reference. The target is the parent model's spatial discretization, resolved at runtime. Used without `fk_ref`, wherever a field carries a cellid (e.g., `cellid` columns in sparse stress period blocks, the integer arm of `utl-obs.continuous.id`). +- **Bare block name** (e.g., `"packagedata"`) — used together with `fk_ref`. `fk_ref` resolves the target component at runtime; `fk` names the block within it. The codec then finds the unique `pk: true` field in that block. A bare block name contains no dot and is not `"node"`. + +The `fk_ref` attribute names a sibling string field whose runtime value identifies the target component. Two sub-cases exist: + +- **With `fk`** (bare block name): the codec resolves the component from `fk_ref`, then finds the unique `pk: true` field in the block named by `fk`. This is fully explicit and preferred when the target block is known. For all current corpus cases where `fk_ref` is used with a polymorphic integer pk target (SFR, MAW, UZF, LAK via `gwf-mvr`), the block is `packagedata`; `fk: "packagedata"` should therefore always be set alongside `fk_ref` for these cases. +- **Without `fk`**: the target block is also unknown at schema time. The codec must resolve case-by-case. This mode is unavoidable when the target block itself varies by component (e.g., the `utl-obs.continuous.id` string arm, where the target is a boundary name field whose block varies by package type). Document these cases explicitly rather than relying on a generic convention. + +#### Examples + +| Field | `pk` | `fk` | `fk_ref` | Notes | +|---|---|---|---|---| +| `gwf-sfr.packagedata.rno` | `true` | — | — | PK of the reach list | +| `gwf-sfr.connectiondata.rno` | — | `"packagedata.rno"` | — | FK; also row selector for ic's shape lookup | +| `gwf-sfr.connectiondata.ic` | — | — | — | inline array; `shape: ["packagedata.ncon(rno)"]`; elements are signed reach refs (sign encodes upstream/downstream direction) | +| `gwf-sfr.period.rno` | — | `"packagedata.rno"` | — | within-component | +| `gwf-mvr.packages.pname` | `true` | — | — | string PK of the package list | +| `gwf-mvr.period.pname1` | — | `"packages.pname"` | — | string FK, within-component | +| `gwf-mvr.period.id1` | — | `"packagedata"` | `"pname1"` | component resolved from pname1; codec finds unique pk in packagedata | +| `utl-obs.continuous.id` (string arm) | — | — | `"obstype"` | **Open:** target block varies by package type; codec must handle case-by-case | +| `utl-obs.continuous.id` (integer arm) | — | `"node"` | — | grid cell reference | +| `gwf-wel.period.cellid` | — | `"node"` | — | grid cell reference | diff --git a/docs/md/doctoc.md b/docs/md/doctoc.md deleted file mode 100644 index ec752632..00000000 --- a/docs/md/doctoc.md +++ /dev/null @@ -1,46 +0,0 @@ -# Generating TOCs - -The [`doctoc`](https://www.npmjs.com/package/doctoc) tool generates table of contents sections for markdown files. - -## Installing Node.js, `npm` and `doctoc`` - -`doctoc` is distributed with the [Node Package Manager](https://docs.npmjs.com/cli/v7/configuring-npm/install). [Node](https://nodejs.org/en) is a JavaScript runtime environment. - -On Ubuntu, Node can be installed with: - -```shell -sudo apt update -sudo apt install nodejs -``` - -On Windows, with [Chocolatey](https://community.chocolatey.org/packages/nodejs): - -```shell -choco install nodejs -``` - -Installers and binaries for Windows and macOS are [available for download](https://nodejs.org/en/download). - -Once Node is installed, install `doctoc` with: - -```shell -npm install -g doctoc -``` - -## Using `doctoc` - -Then TOCs can be generated with `doctoc `, e.g.: - -```shell -doctoc DEVELOPER.md -``` - -This will insert HTML comments surrounding an automatically edited region, in which `doctoc` will create an appropriately indented TOC tree. Subsequent runs are idempotent, scanning for headers and only updating the TOC if the file header structure has changed. - -To run `doctoc` for all markdown files in a particular directory (recursive), use `doctoc some/path`. - -By default `doctoc` inserts a self-descriptive comment - -> **Table of Contents** *generated with DocToc* - -This can be removed (and other content within the TOC region edited) — `doctoc` will not overwrite it, only the table. diff --git a/docs/md/programs.md b/docs/md/programs.md index d3942dbc..8d834927 100644 --- a/docs/md/programs.md +++ b/docs/md/programs.md @@ -1,16 +1,12 @@ # Programs API -> **Experimental API Warning** -> -> This API is experimental and may change or be removed in future versions without following normal deprecation procedures. Use at your own risk. +> **Warning**: This API is experimental and may change or be removed in future versions without following normal deprecation procedures. Use at your own risk. > > When importing this module programmatically, you will see a `FutureWarning`. To suppress this warning: > ```python > import warnings > warnings.filterwarnings('ignore', message='.*modflow_devtools.programs.*experimental.*') > ``` -> -> The `mf programs` CLI command is stable and does not trigger warnings. The `modflow_devtools.programs` module provides programmatic access to MODFLOW and related programs in the MODFLOW ecosystem. It can be used with MODFLOW organization releases or custom program repositories. diff --git a/modflow_devtools/dfns/fetch.py b/modflow_devtools/dfn/__init__.py similarity index 55% rename from modflow_devtools/dfns/fetch.py rename to modflow_devtools/dfn/__init__.py index ecbb7b28..d2c86cb1 100644 --- a/modflow_devtools/dfns/fetch.py +++ b/modflow_devtools/dfn/__init__.py @@ -1,17 +1,43 @@ +import shutil +import tempfile from os import PathLike from pathlib import Path -from shutil import copytree -from tempfile import TemporaryDirectory +from modflow_devtools.dfn.schema import ( + Dfn, + Dfns, + Field, + Fields, + FieldType, + FormatVersion, + Reader, + Ref, + Sln, + get_fields, +) from modflow_devtools.download import download_and_unzip +__all__ = [ + "Dfn", + "Dfns", + "Field", + "FieldType", + "Fields", + "FormatVersion", + "Reader", + "Ref", + "Sln", + "fetch_dfns", + "get_fields", +] + def fetch_dfns(owner: str, repo: str, ref: str, outdir: str | PathLike, verbose: bool = False): """Fetch definition files from the MODFLOW 6 repository.""" url = f"https://github.com/{owner}/{repo}/archive/{ref}.zip" if verbose: - print(f"Downloading MODFLOW 6 repository archive from {url}") - with TemporaryDirectory() as tmp: + print(f"Downloading MODFLOW 6 repository from {url}") + with tempfile.TemporaryDirectory() as tmp: dl_path = download_and_unzip(url, Path(tmp), verbose=verbose) contents = list(dl_path.glob("modflow6-*")) proj_path = next(iter(contents), None) @@ -19,7 +45,4 @@ def fetch_dfns(owner: str, repo: str, ref: str, outdir: str | PathLike, verbose: raise ValueError(f"Missing proj dir in {dl_path}, found {contents}") if verbose: print("Copying dfns from download dir to output dir") - copytree(proj_path / "doc" / "mf6io" / "mf6ivar" / "dfn", outdir, dirs_exist_ok=True) - - -get_dfns = fetch_dfns # alias for backward compatibility + shutil.copytree(proj_path / "doc" / "mf6io" / "mf6ivar" / "dfn", outdir, dirs_exist_ok=True) diff --git a/modflow_devtools/dfn/mapper.py b/modflow_devtools/dfn/mapper.py new file mode 100644 index 00000000..31f29e06 --- /dev/null +++ b/modflow_devtools/dfn/mapper.py @@ -0,0 +1,44 @@ +from modflow_devtools.dfn import schema as v1 + + +def map_field(field: v1.Field) -> v1.Field: + """ + Map a component field definition from the v1 schema to v1.1. + This simply + """ + + return v1.Field( + name=field["name"], + type=field["type"], + block=field.get("block"), + default=field.get("default"), + longname=field.get("longname"), + description=field.get("description"), + optional=field.get("optional", False), + developmode=field.get("developmode", False), + shape=field.get("shape"), + valid=field.get("valid"), + netcdf=field.get("netcdf", False), + tagged=field.get("tagged", False), + ) + + +def map(dfn: v1.Dfn) -> v1.Dfn: + """Map a component definition from the v1 schema to v1.1.""" + + blocks: dict[str, dict] = {} + for block_name, block_fields in (dfn["blocks"] or {}).items(): + blocks[block_name] = { + field_name: map_field(field) + for field_name, field in block_fields.items() + if isinstance(field, dict) + } + + for block_name, block_fields in blocks.items(): + dfn.setdefault(block_name, {}) # type: ignore[misc] + for field_name, field_data in block_fields.items(): + dfn[block_name][field_name] = field_data # type: ignore[literal-required] + + dfn["blocks"] = blocks if blocks else None + dfn["schema_version"] = "1.1" + return dfn diff --git a/modflow_devtools/dfns/parse.py b/modflow_devtools/dfn/parser.py similarity index 85% rename from modflow_devtools/dfns/parse.py rename to modflow_devtools/dfn/parser.py index de68a906..0eb397dc 100644 --- a/modflow_devtools/dfns/parse.py +++ b/modflow_devtools/dfn/parser.py @@ -56,10 +56,22 @@ def try_parse_bool(value: Any) -> Any: return value -def try_parse_parent(meta: list[str]) -> str | None: +_FLOPY_CLASS_TO_V2_TYPE: dict[str, str] = { + "MFSimulation": "simulation", + "MFModel": "model", + "MFPackage": "package", +} + + +def try_get_parent(meta: list[str]) -> "str | list[str] | None": """ - Try to parse a component's parent component name from its metadata. - Return `None` if it has no parent specified. + Try to parse a component's parent from its metadata. + + Returns a v2 component type label (e.g. "model", "package", + ["model", "package"]) when the metadata uses the flopy + ``parent_name_type `` format, + a specific component name when a legacy ``parent `` line + is present, or ``None`` if no parent is declared. """ line = next( iter(m for m in meta if isinstance(m, str) and m.startswith("parent")), @@ -68,7 +80,18 @@ def try_parse_parent(meta: list[str]) -> str | None: if not line: return None split = line.split() - return split[1] + if not split: + return None + # "parent_name_type " — flopy class names + # map to v2 component type labels. + if split[0] == "parent_name_type" and len(split) >= 3: + classes = split[2].split("/") + types = [_FLOPY_CLASS_TO_V2_TYPE[c] for c in classes if c in _FLOPY_CLASS_TO_V2_TYPE] + if types: + return types[0] if len(types) == 1 else types + return None + # Legacy: "parent " + return split[1] if len(split) >= 2 else None def is_advanced_package(meta: list[str]) -> bool: @@ -81,7 +104,7 @@ def is_multi_package(meta: list[str]) -> bool: return any("multi-package" in m for m in meta) -def parse_mf6_subpackages(meta: list[str]) -> list[str]: +def get_subpackages(meta: list[str]) -> list[str]: """ Return MF6 subpackage abbreviations declared via '# mf6 subpackage '. diff --git a/modflow_devtools/dfn.py b/modflow_devtools/dfn/schema.py similarity index 66% rename from modflow_devtools/dfn.py rename to modflow_devtools/dfn/schema.py index e967c8ed..3fc365a1 100644 --- a/modflow_devtools/dfn.py +++ b/modflow_devtools/dfn/schema.py @@ -5,8 +5,6 @@ a function to fetch DFNs from the MF6 repository. """ -import shutil -import tempfile from ast import literal_eval from collections.abc import Mapping from itertools import groupby @@ -15,7 +13,7 @@ from typing import ( Any, Literal, - Optional, + NotRequired, TypedDict, ) from warnings import warn @@ -24,11 +22,7 @@ from boltons.dictutils import OMD from boltons.iterutils import remap -from modflow_devtools.download import download_and_unzip - -# TODO: use dataclasses instead of typed dicts? static -# methods on typed dicts are evidently not allowed -# mypy: ignore-errors +from modflow_devtools.dfn import parser def _try_literal_eval(value: str) -> Any: @@ -90,10 +84,24 @@ def _field_attr_sort_key(item) -> int: return 8 +def block_sort_key(item: tuple[str, Any]) -> int: + """Sort blocks in canonical MF6 order.""" + order = ["options", "dimensions", "griddata", "packagedata", "connectiondata", "period"] + name = item[0] + try: + return order.index(name) + except ValueError: + return len(order) + + FormatVersion = Literal[1, 2] """DFN format version number.""" +DfnFormat = Literal["dfn", "toml", "yaml", "json"] +"""DFN serialization format.""" + + FieldType = Literal[ "keyword", "integer", @@ -113,11 +121,8 @@ def _field_attr_sort_key(item) -> int: ] -_SCALAR_TYPES = FieldType.__args__[:4] - - -Dfns = dict[str, "Dfn"] -Fields = dict[str, "Field"] +_SCALAR_TYPES = ("keyword", "integer", "double precision", "string") +SCALAR_TYPES = _SCALAR_TYPES # public alias class Field(TypedDict): @@ -125,12 +130,32 @@ class Field(TypedDict): name: str type: FieldType - shape: Any | None = None - block: str | None = None - default: Any | None = None - children: Optional["Fields"] = None - description: str | None = None - reader: Reader = "urword" + block: NotRequired[str | None] + default: NotRequired[Any | None] + longname: NotRequired[str | None] + description: NotRequired[str | None] + optional: NotRequired[bool] + developmode: NotRequired[bool] + shape: NotRequired[str | None] + valid: NotRequired[tuple[str, ...] | None] + netcdf: NotRequired[bool] + tagged: NotRequired[bool] + reader: NotRequired[Reader] + in_record: NotRequired[bool] + layered: NotRequired[bool | None] + preserve_case: NotRequired[bool] + numeric_index: NotRequired[bool] + deprecated: NotRequired[bool] + removed: NotRequired[bool] + mf6internal: NotRequired[str | None] + block_variable: NotRequired[bool] + just_data: NotRequired[bool] + time_series: NotRequired[bool] + children: NotRequired[Mapping[str, "Field"] | None] + + +Fields = Mapping[str, "Field"] +Blocks = Mapping[str, Fields] class Ref(TypedDict): @@ -169,6 +194,9 @@ class Sln(TypedDict): pattern: str +Dfns = dict[str, "Dfn"] + + class Dfn(TypedDict): """ MODFLOW 6 input definition. An input definition @@ -210,15 +238,20 @@ class Dfn(TypedDict): Distinct from fkeys, which are field-level references. """ + schema_version: str name: str - advanced: bool = False - multi: bool = False - ref: Ref | None = None - sln: Sln | None = None - fkeys: Dfns | None = None - subcomponents: list[str] | None = None - - @staticmethod + ftype: NotRequired[str | None] + parent: NotRequired[str | list[str] | None] + blocks: NotRequired[Blocks | None] + children: NotRequired[Dfns | None] + advanced: NotRequired[bool] + multi: NotRequired[bool] + ref: NotRequired[Ref | None] + sln: NotRequired[Sln | None] + fkeys: NotRequired[Dfns | None] # deprecated + subcomponents: NotRequired[list[str] | None] + + @staticmethod # type: ignore[misc] def _load_v1_flat(f, common: dict | None = None) -> tuple[Mapping, list[str]]: field = {} flat = [] @@ -292,7 +325,7 @@ def _load_v1_flat(f, common: dict | None = None) -> tuple[Mapping, list[str]]: # the point of the OMD is to losslessly handle duplicate variable names return OMD(flat), meta - @classmethod + @classmethod # type: ignore[misc] def _load_v1(cls, f, name, **kwargs) -> "Dfn": """ Temporary load routine for the v1 DFN format. @@ -567,7 +600,7 @@ def _subcomponents() -> list[str] | None: result.append(abbr) return result if result else None - return cls( + return cls( # type: ignore[misc] name=name, fkeys=fkeys, advanced=_advanced(), @@ -578,95 +611,254 @@ def _subcomponents() -> list[str] | None: **blocks, ) - @classmethod - def _load_v2(cls, f, name) -> "Dfn": - data = tomli.load(f) + @classmethod # type: ignore[misc] + def _load_v2(cls, f, name, fmt: str = "toml") -> "Dfn": + if fmt == "toml": + data = tomli.load(f) + elif fmt == "json": + import json + + data = json.load(f) + elif fmt == "yaml": + import yaml + + data = yaml.safe_load(f) + else: + raise ValueError(f"Unsupported format: {fmt!r}") if name and name != data.get("name", None): raise ValueError(f"Name mismatch, expected {name}") + for block in (data.get("blocks") or {}).values(): + for field_name, field in block.items(): + field.setdefault("name", field_name) return cls(**data) - @classmethod + @classmethod # type: ignore[misc] def load( cls, f, name: str | None = None, - version: FormatVersion = 1, + version: FormatVersion | DfnFormat = "dfn", **kwargs, ) -> "Dfn": """ Load a component definition from a definition file. """ - if version == 1: + if version in ["dfn", 1]: return cls._load_v1(f, name, **kwargs) - elif version == 2: - return cls._load_v2(f, name) + elif version in ["toml", 2]: + return cls._load_v2(f, name, fmt="toml") + elif version == "yaml": + return cls._load_v2(f, name, fmt="yaml") + elif version == "json": + return cls._load_v2(f, name, fmt="json") else: - raise ValueError(f"Unsupported version, expected one of {version.__args__}") + raise ValueError( + f"Unsupported version {version!r}, expected one of: 'dfn', 'toml', 'yaml', 'json'" + ) - @staticmethod - def _load_all_v1(dfndir: PathLike) -> Dfns: - paths: list[Path] = [p for p in dfndir.glob("*.dfn") if p.stem not in ["common", "flopy"]] + @staticmethod # type: ignore[misc] + def load_all(dfndir: PathLike, version: FormatVersion | None = None) -> Dfns: + """Load all component definitions from the given directory.""" - # load common variables - common_path: Path | None = dfndir / "common.dfn" - if not common_path.is_file(): - common = None - else: - with common_path.open() as f: - common, _ = Dfn._load_v1_flat(f) - - # load references (subpackages) - refs = {} - for path in paths: - with path.open() as f: - dfn = Dfn.load(f, name=path.stem, common=common) - ref = dfn.get("ref", None) - if ref: - refs[ref["key"]] = ref + if version: + warn("load_all() argument 'version' is deprecated and ignored") - # load definitions dfns: Dfns = {} - for path in paths: - with path.open() as f: - dfn = Dfn.load(f, name=path.stem, common=common, refs=refs) - dfns[path.stem] = dfn + dfndir = Path(dfndir) + _EXCLUDE = {"common", "flopy"} + + dfn_paths: list[Path] = [p for p in dfndir.glob("*.dfn") if p.stem not in _EXCLUDE] + toml_paths: list[Path] = [p for p in dfndir.glob("*.toml") if p.stem not in _EXCLUDE] + yaml_paths: list[Path] = [ + p for ext in ("*.yaml", "*.yml") for p in dfndir.glob(ext) if p.stem not in _EXCLUDE + ] + json_paths: list[Path] = [p for p in dfndir.glob("*.json") if p.stem not in _EXCLUDE] + + groups = [g for g in [dfn_paths, toml_paths, yaml_paths, json_paths] if g] + if len(groups) > 1: + raise ValueError("Directory contains definition files in multiple formats") + if not groups: + raise ValueError("Directory contains no definition files") + + if dfn_paths: + # load common fields + common_path: Path | None = dfndir / "common.dfn" + if not common_path.is_file(): + common = None + else: + with common_path.open() as f: + common, _ = Dfn._load_v1_flat(f) + + # load subpackages + refs = {} + for path in dfn_paths: + with path.open() as f: + dfn = Dfn.load(f, name=path.stem, common=common) + ref = dfn.get("ref", None) + if ref: + refs[ref["key"]] = ref + + # load definitions + for path in dfn_paths: + with path.open() as f: + dfn = Dfn.load(f, name=path.stem, common=common, refs=refs) + dfns[path.stem] = dfn + elif toml_paths: + for path in toml_paths: + with path.open(mode="rb") as f: + dfn = Dfn.load(f, name=path.stem, version="toml") + dfns[path.stem] = dfn + elif yaml_paths: + for path in yaml_paths: + with path.open() as f: + dfn = Dfn.load(f, name=path.stem, version="yaml") + dfns[path.stem] = dfn + elif json_paths: + for path in json_paths: + with path.open() as f: + dfn = Dfn.load(f, name=path.stem, version="json") + dfns[path.stem] = dfn return dfns - @staticmethod - def _load_all_v2(dfndir: PathLike) -> Dfns: - paths: list[Path] = [p for p in dfndir.glob("*.toml") if p.stem not in ["common", "flopy"]] - dfns: Dfns = {} - for path in paths: - with path.open(mode="rb") as f: - dfn = Dfn.load(f, name=path.stem, version=2) - dfns[path.stem] = dfn - - return dfns - @staticmethod - def load_all(dfndir: PathLike, version: FormatVersion = 1) -> Dfns: - """Load all component definitions from the given directory.""" - if version == 1: - return Dfn._load_all_v1(dfndir) - elif version == 2: - return Dfn._load_all_v2(dfndir) - else: - raise ValueError(f"Unsupported version, expected one of {version.__args__}") - - -def get_dfns(owner: str, repo: str, ref: str, outdir: str | PathLike, verbose: bool = False): - """Fetch definition files from the MODFLOW 6 repository.""" - url = f"https://github.com/{owner}/{repo}/archive/{ref}.zip" - if verbose: - print(f"Downloading MODFLOW 6 repository from {url}") - with tempfile.TemporaryDirectory() as tmp: - dl_path = download_and_unzip(url, tmp, verbose=verbose) - contents = list(dl_path.glob("modflow6-*")) - proj_path = next(iter(contents), None) - if not proj_path: - raise ValueError(f"Missing proj dir in {dl_path}, found {contents}") - if verbose: - print("Copying dfns from download dir to output dir") - shutil.copytree(proj_path / "doc" / "mf6io" / "mf6ivar" / "dfn", outdir, dirs_exist_ok=True) +def _load_common(f: Any) -> tuple[OMD, list[str]]: + common, _ = parser.parse_dfn(f) + return common + + +load_common = _load_common # public alias + + +def load(f: Any, format: str = "dfn", **kwargs: Any) -> Dfn: + """Load a v1 definition file.""" + + if format != "dfn": + raise ValueError(f"Unsupported format: {format!r}. Expected 'dfn'.") + + name = kwargs.pop("name") + fields, meta = parser.parse_dfn(f, **kwargs) + parent = parser.try_get_parent(meta) + blocks = { + block_name: {field["name"]: Field(field) for field in block} # type: ignore[misc] + for block_name, block in groupby(fields.values(multi=True), lambda fd: fd["block"]) + } + multi = parser.is_multi_package(meta) + advanced = parser.is_advanced_package(meta) + subcomponents = parser.get_subpackages(meta) or None + + return Dfn( + schema_version="1", + name=name, + parent=parent, + blocks=blocks, + multi=multi, + advanced=advanced, + subcomponents=subcomponents, + ) + + +EXCLUDE_DFNS = ["common.dfn", "flopy.dfn"] + + +def load_all(path: str | PathLike) -> Dfns: + """Load definition files in a directory.""" + path = Path(path).expanduser().resolve() + dfn_paths = {p.stem: p for p in path.glob("*.dfn") if p.name not in EXCLUDE_DFNS} + dfns: Dfns = {} + if dfn_paths: + with (path / "common.dfn").open() as f: + common = _load_common(f) + for dfn_name, dfn_path in dfn_paths.items(): + with dfn_path.open() as f: + dfns[dfn_name] = load(f, name=dfn_name, common=common, format="dfn") + return dfns + + +def get_fields(dfn: Dfn) -> OMD: + """Combined map of fields from all blocks (flat, top-level only).""" + items = [] + for block in (dfn["blocks"] or {}).values(): + for f in block.values(): + items.append((f["name"], f)) + return OMD(items) + + +def _has_grid_dependent_shapes(dfn: Dfn) -> bool: + """Return True if any field uses a semicolon grid-type-dependent shape.""" + blocks = dfn.get("blocks", {}) + if not blocks: + return False + for block in blocks.values(): + for field in block.values(): + if ";" in str(field.get("shape") or ""): + return True + return False + + +def infer_parent(dfn: Dfn) -> str | None: + """Infer a component's parent using naming conventions.""" + if dfn["name"] == "sim-nam": + return None + if dfn["name"].endswith("-nam"): + return "sim-nam" + if dfn["name"].startswith(("exg-", "sln-")): + return "sim-nam" + if dfn["name"].startswith("utl-"): + # Grid-dependent shapes (semicolon notation) mean the utility must be + # model-attached, not simulation-level. + if _has_grid_dependent_shapes(dfn): + return "package" + return "sim-nam" + if "-" in dfn["name"]: + mdl = dfn["name"].split("-")[0] + return f"{mdl}-nam" + return None + + +def resolve_parent(dfn: Dfn) -> Dfn: + """Infer and set a component's parent using naming conventions.""" + if dfn["parent"] is None: + dfn["parent"] = infer_parent(dfn) + return dfn + + +def resolve_parents(dfns: Dfns) -> Dfns: + """Infer and set component parents using naming conventions.""" + return {name: resolve_parent(dfn) for name, dfn in dfns.items()} + + +def to_tree(dfns: Dfns) -> Dfn: + """Condense flat definitions to a hierarchical definition.""" + + first_dfn = next(iter(dfns.values()), None) + if first_dfn is None: + raise ValueError("No definitions found") + if first_dfn["schema_version"] != "1": + raise ValueError(f"Expected schema version 1, got {first_dfn['schema_version']!r}") + + dfns = resolve_parents(dfns) + roots = {name: dfn for name, dfn in dfns.items() if dfn["parent"] is None} + if (nroots := len(roots)) != 1: + raise ValueError(f"Expected one root component, found {nroots}") + + def _to_tree(dfn: Dfn) -> Dfn: + children = {name: _dfn for name, _dfn in dfns.items() if _dfn["parent"] == dfn["name"]} + dfn["children"] = {name: _to_tree(_dfn) for name, _dfn in children.items()} or None + return dfn + + return _to_tree(next(iter(roots.values()))) + + +def to_flat(dfn: Dfn) -> Dfns: + """Flatten a hierarchical definition into its constituent definitions.""" + + def _to_flat(_dfn: Dfn) -> Dfns: + result: Dfns = {_dfn["name"]: _dfn} + result[_dfn["name"]]["children"] = None + for child in (_dfn["children"] or {}).values(): + result.update(_to_flat(child)) + return result + + return _to_flat(dfn) diff --git a/modflow_devtools/dfn2toml.py b/modflow_devtools/dfn2toml.py deleted file mode 100644 index 96a68661..00000000 --- a/modflow_devtools/dfn2toml.py +++ /dev/null @@ -1,46 +0,0 @@ -"""Convert DFNs to TOML.""" - -import argparse -from os import PathLike -from pathlib import Path - -import tomli_w as tomli -from boltons.iterutils import remap - -from modflow_devtools.dfn import Dfn - -# mypy: ignore-errors - - -def convert(indir: PathLike, outdir: PathLike): - indir = Path(indir).expanduser().absolute() - outdir = Path(outdir).expanduser().absolute() - outdir.mkdir(exist_ok=True, parents=True) - for dfn in Dfn.load_all(indir).values(): - with Path.open(outdir / f"{dfn['name']}.toml", "wb") as f: - - def drop_none_or_empty(path, key, value): - if value is None or value == "" or value == [] or value == {}: - return False - return True - - tomli.dump(remap(dfn, visit=drop_none_or_empty), f) - - -if __name__ == "__main__": - """Convert DFN files to TOML.""" - - parser = argparse.ArgumentParser(description="Convert DFN files to TOML.") - parser.add_argument( - "--indir", - "-i", - type=str, - help="Directory containing DFN files.", - ) - parser.add_argument( - "--outdir", - "-o", - help="Output directory.", - ) - args = parser.parse_args() - convert(args.indir, args.outdir) diff --git a/modflow_devtools/dfnmap.py b/modflow_devtools/dfnmap.py new file mode 100644 index 00000000..804feb53 --- /dev/null +++ b/modflow_devtools/dfnmap.py @@ -0,0 +1,173 @@ +"""Map MODFLOW 6 DFN files to a new schema version and serialize to YAML, TOML, or JSON.""" + +import argparse +import json +from os import PathLike +from pathlib import Path +from typing import Any, Literal + +import pyaml +import tomli_w +from pydantic import BaseModel + +from modflow_devtools.dfn import schema as v1 +from modflow_devtools.dfn.mapper import map as map_v1_1 +from modflow_devtools.dfns.mapper import map as map_v2 + +Format = Literal["yaml", "toml", "json"] + +# YAML 1.1 (PyYAML default) serializes booleans as yes/no; override to true/false (YAML 1.2). +pyaml.add_representer( + bool, + lambda dumper, v: dumper.represent_scalar("tag:yaml.org,2002:bool", "true" if v else "false"), +) + + +def _serialize_safe(obj: Any) -> Any: + """Recursively coerce non-native types to primitives suitable for serialization.""" + + if isinstance(obj, BaseModel): + # strip_names context propagates through v2 FieldBase/_Block serializers; + # ignored harmlessly by v1/v1.1 models that don't inspect it. + return obj.model_dump( + context={"strip_names": True}, + exclude_none=True, + exclude_unset=True, + exclude_defaults=True, + ) + if isinstance(obj, dict): + result = {k: _serialize_safe(v) for k, v in obj.items() if v is not None} + # Strip redundant name from v1/v1.1 field dicts — name is the dict key in the parent block. + if "name" in result and "type" in result: + del result["name"] + return result + if isinstance(obj, list): + return [_serialize_safe(v) for v in obj] + if isinstance(obj, (str, int, float, bool)) or obj is None: + return obj + return str(obj) # Version → str, etc. + + +def _scalars_first(obj: Any) -> Any: + """Recursively reorder dict keys so scalar values precede dicts and lists.""" + if isinstance(obj, dict): + scalars = {k: _scalars_first(v) for k, v in obj.items() if not isinstance(v, (dict, list))} + complex_ = {k: _scalars_first(v) for k, v in obj.items() if isinstance(v, (dict, list))} + return {**scalars, **complex_} + if isinstance(obj, list): + return [_scalars_first(v) for v in obj] + return obj + + +def _write(data: dict, path: Path, fmt: Format) -> None: + data = _scalars_first(data) + if fmt == "toml": + with path.open("wb") as f: + tomli_w.dump(data, f) + elif fmt == "json": + with path.open("w") as f: + json.dump(data, f, indent=2) + elif fmt == "yaml": + with path.open("w") as f: + pyaml.dump(data, f, vspacing=False, sort_keys=False) + + +# mypy: ignore-errors + + +def migrate( + inpath: str | PathLike, + outdir: str | PathLike, + schema_version: str = "2", + fmt: Format = "yaml", +) -> None: + """Migrate DFN files' schema version and serialize to the given format. + + Parameters + ---------- + inpath : str or PathLike + Input file or directory. + outdir : str or PathLike + Output directory. + schema_version : str, optional + Target schema version: "1.1" or "2". Default "2". + fmt : str, optional + Output format: "yaml", "toml", or "json". Default "yaml". + """ + inpath = Path(inpath).expanduser().absolute() + outdir = Path(outdir).expanduser().absolute() + outdir.mkdir(exist_ok=True, parents=True) + ext = f".{fmt}" + + if inpath.is_file(): + if inpath.name == "common.dfn": + raise ValueError("Cannot convert common.dfn as a standalone file") + + common = {} + if (common_path := inpath.parent / "common.dfn").is_file(): + with common_path.open() as f: + common = v1.load_common(f) + + with inpath.open() as f: + dfn = v1.Dfn.load(f, name=inpath.stem, common=common) + + if schema_version == "1.1": + dfn = map_v1_1(dfn) + elif schema_version == "2": + dfn = map_v2(dfn) + else: + raise ValueError(f"Got schema version {schema_version}, supported versions are: 1.1, 2") + + _write(_serialize_safe(dfn), outdir / f"{inpath.stem}{ext}", fmt) + else: + dfns = v1.load_all(inpath) + + if schema_version == "1.1": + dfns = v1.to_flat(v1.to_tree(dfns)) + dfns = {name: map_v1_1(dfn) for name, dfn in dfns.items()} + elif schema_version == "2": + dfns = {name: map_v2(dfn) for name, dfn in dfns.items()} + else: + raise ValueError(f"Got schema version {schema_version}, supported versions are: 1.1, 2") + + for dfn_name, dfn in dfns.items(): + _write(_serialize_safe(dfn), outdir / f"{dfn_name}{ext}", fmt) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Migrate DFN files' schema version and serialize to YAML, TOML, or JSON.", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "--input", + "-i", + type=str, + help="Input file or directory containing DFN files.", + ) + parser.add_argument( + "--output", + "-o", + help="Output directory.", + ) + parser.add_argument( + "--schema-version", + "-s", + default="2", + choices=["1.1", "2"], + help="Target schema version (default: 2).", + ) + parser.add_argument( + "--format", + "-f", + default="yaml", + choices=["yaml", "toml", "json"], + help="Output format (default: yaml).", + ) + args = parser.parse_args() + migrate( + inpath=args.input, + outdir=args.output, + schema_version=args.schema_version, + fmt=args.format, + ) diff --git a/modflow_devtools/dfns/__init__.py b/modflow_devtools/dfns/__init__.py index 156de0c6..c87cc0c2 100644 --- a/modflow_devtools/dfns/__init__.py +++ b/modflow_devtools/dfns/__init__.py @@ -1,38 +1,25 @@ -""" -MODFLOW 6 definition file tools. -""" +"""Definition file tools""" import warnings -from abc import ABC, abstractmethod -from collections.abc import Iterator, Mapping -from dataclasses import asdict, dataclass, field, replace -from itertools import groupby -from os import PathLike -from pathlib import Path -from typing import ( - Literal, - cast, -) - -import tomli -from boltons.dictutils import OMD -from boltons.iterutils import remap -from packaging.version import Version -from modflow_devtools.dfns.parse import ( - is_advanced_package, - is_multi_package, - parse_dfn, - parse_mf6_subpackages, - try_parse_bool, - try_parse_parent, +from modflow_devtools.dfn import fetch_dfns +from modflow_devtools.dfns.registry import DfnRegistry, LocalDfnRegistry, RemoteDfnRegistry +from modflow_devtools.dfns.schema import ( + Array, + Block, + Blocks, + Component, + Dfns, + Double, + FieldBase, + File, + Integer, + Keyword, + List, + Record, + String, + Union, ) -from modflow_devtools.dfns.schema.block import Block, Blocks, block_sort_key -from modflow_devtools.dfns.schema.field import Field, Fields -from modflow_devtools.dfns.schema.v1 import SCALAR_TYPES as V1_SCALAR_TYPES -from modflow_devtools.dfns.schema.v1 import FieldV1 -from modflow_devtools.dfns.schema.v2 import FieldV2 -from modflow_devtools.misc import drop_none_or_empty, try_literal_eval # Experimental API warning warnings.warn( @@ -46,909 +33,22 @@ ) __all__ = [ + "Array", "Block", "Blocks", - "Dfn", + "Component", "DfnRegistry", - "DfnRegistryDiscoveryError", - "DfnRegistryError", - "DfnRegistryNotFoundError", - "DfnSpec", "Dfns", - "Field", - "FieldV1", - "FieldV2", - "Fields", + "Double", + "FieldBase", + "File", + "Integer", + "Keyword", + "List", "LocalDfnRegistry", + "Record", "RemoteDfnRegistry", - "block_sort_key", - "get_dfn", - "get_dfn_path", - "get_registry", - "get_sync_status", - "is_valid", - "list_components", - "load", - "load_flat", - "load_tree", - "map", - "sync_dfns", - "to_flat", - "to_tree", + "String", + "Union", + "fetch_dfns", ] - - -Format = Literal["dfn", "toml"] -"""DFN serialization format.""" - - -Dfns = dict[str, "Dfn"] - - -@dataclass -class Dfn: - """ - MODFLOW 6 input component definition. - - Attributes - ---------- - schema_version : Version - Schema version of this definition. - name : str - Component name (e.g., "gwf-chd", "sim-nam"). - parent : str | None - Parent component name (instance-level hierarchy). - advanced : bool - Whether this is an advanced package. - multi : bool - Whether this is a multi-package. - ftype : str | None - File type identifier. - blocks : Blocks | None - Block definitions containing field specifications. - children : Dfns | None - Actual child component instances (instance-level). - subcomponents : list[str] | None - Allowed child component types (schema-level constraint). - Populated from DFN comments like: # mf6 subpackage - Example: ['UTL-NCF'] means this component can have utl-ncf children. - """ - - schema_version: Version - name: str - parent: str | None = None - advanced: bool = False - multi: bool = False - ftype: str | None = None - blocks: Blocks | None = None - children: Dfns | None = None - subcomponents: list[str] | None = None - - @property - def fields(self) -> Fields: - """ - A combined map of fields from all blocks. - - Only top-level fields are included, no subfields of composites - such as records or recarrays. - """ - fields = [] - for block in (self.blocks or {}).values(): - for f in block.values(): - fields.append((f.name, f)) - - # for now return a multidict to support duplicate field names. - # TODO: change to normal dict after deprecating v1 schema - return OMD(fields) - - def __post_init__(self): - if not isinstance(self.schema_version, Version): - self.schema_version = Version(str(self.schema_version)) - if self.blocks: - self.blocks = dict(sorted(self.blocks.items(), key=block_sort_key)) - - @classmethod - def from_dict(cls, d: dict, strict: bool = False) -> "Dfn": - """ - Create a Dfn instance from a dictionary. - - Parameters - ---------- - d : dict - Dictionary containing DFN data - strict : bool, optional - If True, raise ValueError if dict contains unrecognized keys at the - top level or in nested field dicts. If False (default), ignore - unrecognized keys. - """ - keys = list(cls.__annotations__.keys()) - if strict: - extra_keys = set(d.keys()) - set(keys) - if extra_keys: - raise ValueError(f"Unrecognized keys in DFN data: {extra_keys}") - data = {k: v for k, v in d.items() if k in keys} - schema_version = data.get("schema_version", Version("2")) - field_cls = FieldV1 if schema_version == Version("1") else FieldV2 - - def _fields(block_name, block_data): - fields = {} - for field_name, field_data in block_data.items(): - if isinstance(field_data, dict): - fields[field_name] = field_cls.from_dict(field_data, strict=strict) - elif isinstance(field_data, field_cls): - fields[field_name] = field_data - else: - raise TypeError( - f"Invalid field data for {field_name} in block {block_name}: " - f"expected dict or Field, got {type(field_data)}" - ) - return fields - - if blocks := data.get("blocks"): - data["schema_version"] = schema_version - data["blocks"] = { - block_name: _fields(block_name, block_data) - for block_name, block_data in blocks.items() - } - - return cls(**data) - - -@dataclass -class DfnSpec(Mapping): - """ - Full MODFLOW 6 input specification with hierarchical structure and flat dict access. - - The specification maintains a single canonical hierarchical representation via - the `root` property (simulation component with nested children), while also - providing flat dict-like access to any component by name via the Mapping protocol. - - Parameters - ---------- - schema_version : Version - The schema version of the specification (e.g., "1", "1.1", "2"). - root : Dfn - The root component (simulation) with hierarchical children populated. - - Examples - -------- - >>> spec = DfnSpec.load("/path/to/dfns") - >>> spec.schema_version - Version('2') - >>> spec.root.name - 'sim-nam' - >>> spec["gwf-chd"] # Flat access by component name - Dfn(name='gwf-chd', ...) - >>> list(spec.keys())[:3] - ['sim-nam', 'sim-tdis', 'gwf-nam'] - """ - - schema_version: Version - root: "Dfn" - _flat: Dfns = field(default_factory=dict, repr=False, compare=False) - - def __post_init__(self): - if not isinstance(self.schema_version, Version): - self.schema_version = Version(str(self.schema_version)) - # Build flat index if not already populated - if not self._flat: - self._flat = to_flat(self.root) - - def __getitem__(self, name: str) -> "Dfn": - """Get a component by name (flattened lookup).""" - if name not in self._flat: - raise KeyError(f"Component '{name}' not found in specification") - return self._flat[name] - - def __iter__(self) -> Iterator[str]: - """Iterate over all component names.""" - return iter(self._flat) - - def __len__(self) -> int: - """Total number of components in the specification.""" - return len(self._flat) - - def __contains__(self, name: object) -> bool: - """Check if a component exists by name.""" - return name in self._flat - - def dump(self, f) -> None: - """Serialize the full spec to a TOML byte stream.""" - import tomli_w - - doc = {"schema_version": str(self.schema_version)} - for name, dfn in self._flat.items(): - doc[name] = _toml_safe(remap(asdict(dfn), visit=drop_none_or_empty)) - f.write(tomli_w.dumps(doc).encode()) - - def dumps(self) -> str: - """Serialize the full spec to a TOML string.""" - import io - - buf = io.BytesIO() - self.dump(buf) - return buf.getvalue().decode() - - @classmethod - def load( - cls, - path: str | PathLike, - schema_version: str | Version | None = None, - ) -> "DfnSpec": - """ - Load a specification from a directory of DFN files. - - The specification is always loaded as a hierarchical tree, - with flat access available via the Mapping protocol. - - Parameters - ---------- - path : str or PathLike - Path to directory containing DFN files. - schema_version : str or Version, optional - Target schema version. If provided and different from the native - schema version, DFNs will be mapped to the target version. - If not provided, uses the native schema version from the files. - - Returns - ------- - DfnSpec - The loaded specification with hierarchical structure. - - Examples - -------- - >>> spec = DfnSpec.load("/path/to/dfns") - >>> spec.root.name - 'sim-nam' - >>> spec["gwf-dis"] - Dfn(name='gwf-dis', ...) - """ - path = Path(path).expanduser().resolve() - - # Load flat DFNs from directory - dfns = load_flat(path) - - if not dfns: - raise ValueError(f"No DFN files found in {path}") - - # Determine native schema version from first DFN - first_dfn = next(iter(dfns.values())) - native_version = first_dfn.schema_version - - # Determine target version: - # - If explicitly specified, use that - # - If native is v1, default to v2 (since to_tree only works with v2) - # - Otherwise use native version - if schema_version: - target_version = Version(str(schema_version)) - elif native_version == Version("1"): - target_version = Version("2") - else: - target_version = native_version - - if target_version != native_version: - # Map DFNs to target schema version - dfns = {name: map(dfn, target_version) for name, dfn in dfns.items()} - - # Build hierarchical tree - root = to_tree(dfns) - - return cls( - schema_version=target_version, - root=root, - ) - - -class SchemaMap(ABC): - @abstractmethod - def map(self, dfn: Dfn) -> Dfn: ... - - -class MapV1To2(SchemaMap): - @staticmethod - def map_period_block(dfn: Dfn, block: Block) -> Block: - """ - Convert a period block recarray to individual arrays, one per column. - - Extracts recarray fields and creates separate array variables. Gives - each an appropriate grid- or tdis-aligned shape as opposed to sparse - list shape in terms of maxbound as previously. - """ - - block = dict(block) - fields = list(block.values()) - if fields[0].type == "list": - assert len(fields) == 1 - recarray_name = fields[0].name - block.pop(recarray_name, None) - item = next(iter((fields[0].children or {}).values())) - columns = dict(item.children or {}) - else: - recarray_name = None - columns = block - - cellid = columns.pop("cellid", None) - for col_name, column in columns.items(): - old_dims = column.shape - if old_dims: - old_dims = old_dims[1:-1].split(",") # type: ignore - new_dims = ["nper"] - if cellid: - new_dims.append("nnodes") - if old_dims: - new_dims.extend([dim for dim in old_dims if dim != "maxbound"]) - block[col_name] = replace(column, shape=f"({', '.join(new_dims)})") - - return block - - @staticmethod - def map_field(dfn: Dfn, field: Field) -> Field: - """ - Convert an input field specification from its representation - in a v1 format definition file to the v2 (structured) format. - - Notes - ----- - If the field does not have a `default` attribute, it will - default to `False` if it is a keyword, otherwise to `None`. - - A filepath field whose name functions as a foreign key - for a separate context will be given a reference to it. - """ - - fields = cast(OMD, dfn.fields) - - def _map_field(_field) -> Field: - field_dict = asdict(_field) - # parse booleans from strings. everything else can - # stay a string except default values, which we'll - # try to parse as arbitrary literals below, and at - # some point types, once we introduce type hinting - field_dict = {k: try_parse_bool(v) for k, v in field_dict.items()} - _name = field_dict.pop("name") - _type = field_dict.pop("type", None) - shape = field_dict.pop("shape", None) - shape = None if shape == "" else shape - block = field_dict.pop("block", None) - default = field_dict.pop("default_value", None) - default = try_literal_eval(default) if _type != "string" else default - description = field_dict.pop("description", "") - - def _row_field() -> Field: - """Parse a table's record (row) field""" - item_names = _type.split()[1:] - item_types = [ - f.type - for f in fields.values(multi=True) - if f.name in item_names and f.in_record - ] - n_item_names = len(item_names) - if n_item_names < 1: - raise ValueError(f"Missing list definition: {_type}") - - # explicit record or keystring - if n_item_names == 1 and ( - item_types[0].startswith("record") or item_types[0].startswith("keystring") - ): - return MapV1To2.map_field(dfn, next(iter(fields.getlist(item_names[0])))) - - # implicit record with all scalar fields - if all(t in V1_SCALAR_TYPES for t in item_types): - children = _record_fields() - return FieldV2.from_dict( - { - **field_dict, - "name": _name, - "type": "record", - "block": block, - "children": children, - "description": description.replace( - "is the list of", "is the record of" - ), - } - ) - - # implicit record with composite fields - children = { - f.name: MapV1To2.map_field(dfn, f) - for f in fields.values(multi=True) - if f.name in item_names and f.in_record - } - first = next(iter(children.values())) - if not first.type: - raise ValueError(f"Missing type for field: {first.name}") - single = len(children) == 1 - item_type = "keystring" if single and "keystring" in first.type else "record" - return FieldV2.from_dict( - { - "name": first.name if single else _name, - "type": item_type, - "block": block, - "children": first.children if single else children, - "description": description.replace( - "is the list of", f"is the {item_type} of" - ), - **field_dict, - } - ) - - def _union_fields() -> Fields: - """Parse a union's fields""" - names = _type.split()[1:] - return { - f.name: MapV1To2.map_field(dfn, f) - for f in fields.values(multi=True) - if f.name in names and f.in_record - } - - def _record_fields() -> Fields: - """Parse a record's fields""" - names = _type.split()[1:] - result = {} - for name in names: - matching = [ - f - for f in fields.values(multi=True) - if f.name == name and f.in_record and not f.type.startswith("record") - ] - if matching: - result[name] = _map_field(matching[0]) - return result - - _field = FieldV2.from_dict( - { - "name": _name, - "shape": shape, - "block": block, - "description": description, - "default": default, - **field_dict, - } - ) - - if _type.startswith("recarray"): - child = _row_field() - _field.children = {child.name: child} - _field.type = "list" - - elif _type.startswith("keystring"): - _field.children = _union_fields() - _field.type = "union" - - elif _type.startswith("record"): - _field.children = _record_fields() - _field.type = "record" - - # for now, we can tell a var is an array if its type - # is scalar and it has a shape. once we have proper - # typing, this can be read off the type itself. - elif shape is not None and _type not in V1_SCALAR_TYPES: - raise TypeError(f"Unsupported array type: {_type}") - - else: - # Map v1 type names to v2 type names - type_map = { - "double precision": "double", - } - _field.type = type_map.get(_type, _type) - - return _field - - return _map_field(field) - - @staticmethod - def map_blocks(dfn: Dfn) -> Blocks: - fields = { - field.name: MapV1To2.map_field(dfn, field) - for field in cast(OMD, dfn.fields).values(multi=True) - if not field.in_record # type: ignore - } - block_dicts = { - block_name: {f.name: f for f in block} - for block_name, block in groupby(fields.values(), lambda f: f.block) - } - blocks = {} - - # Handle period blocks specially - if (period_block := block_dicts.get("period", None)) is not None: - blocks["period"] = MapV1To2.map_period_block(dfn, period_block) - - for block_name, block_data in block_dicts.items(): - if block_name != "period": - blocks[block_name] = block_data - - def remove_attrs(path, key, value): - # remove unneeded variable attributes - if key in ["in_record", "tagged", "preserve_case"]: - return False - return True - - return remap(blocks, visit=remove_attrs) - - def map(self, dfn: Dfn) -> Dfn: - if dfn.schema_version == (v2 := Version("2")): - return dfn - - return Dfn( - name=dfn.name, - advanced=dfn.advanced, - multi=dfn.multi, - ftype=dfn.ftype or (dfn.name.split("-", 1)[1].upper() if "-" in dfn.name else None), - blocks=MapV1To2.map_blocks(dfn), - schema_version=v2, - parent=dfn.parent, - ) - - -def _toml_safe(obj): - """Recursively coerce non-TOML-native types to str.""" - if isinstance(obj, dict): - return {k: _toml_safe(v) for k, v in obj.items()} - if isinstance(obj, list): - return [_toml_safe(v) for v in obj] - if isinstance(obj, (str, int, float, bool)) or obj is None: - return obj - return str(obj) - - -def map( - dfn: Dfn, - schema_version: str | Version = "2", -) -> Dfn: - """Map a MODFLOW 6 specification to another schema version.""" - version = Version(str(schema_version)) - if version == dfn.schema_version: - return dfn - elif version == Version("1"): - raise NotImplementedError("Mapping to schema version 1 is not implemented yet.") - elif version == Version("2"): - return MapV1To2().map(dfn) - raise ValueError(f"Unsupported schema version: {schema_version}. Expected 1 or 2.") - - -def load(f, format: str = "dfn", **kwargs) -> Dfn: - """Load a MODFLOW 6 definition file.""" - if format == "dfn": - name = kwargs.pop("name") - fields, meta = parse_dfn(f, **kwargs) - blocks = { - block_name: {field["name"]: FieldV1.from_dict(field) for field in block} - for block_name, block in groupby( - fields.values(multi=True), lambda field: field["block"] - ) - } - subcomponents = parse_mf6_subpackages(meta) - return Dfn( - name=name, - schema_version=Version("1"), - parent=try_parse_parent(meta), - advanced=is_advanced_package(meta), - multi=is_multi_package(meta), - ftype=name.split("-", 1)[1].upper() if "-" in name else None, - blocks=blocks, - subcomponents=subcomponents if subcomponents else None, - ) - - elif format == "toml": - data = tomli.load(f) - dfn_name = data.pop("name", kwargs.pop("name", None)) - - dfn_fields = { - "name": dfn_name, - "schema_version": Version(str(data.pop("schema_version", "2"))), - "parent": data.pop("parent", None), - "advanced": data.pop("advanced", False), - "multi": data.pop("multi", False), - "ftype": data.pop("ftype", None) - or (dfn_name.split("-", 1)[1].upper() if dfn_name and "-" in dfn_name else None), - } - - if (expected_name := kwargs.pop("name", None)) is not None: - if dfn_fields["name"] != expected_name: - raise ValueError(f"DFN name mismatch: {expected_name} != {dfn_fields['name']}") - - blocks = {} - for section_name, section_data in data.items(): - if isinstance(section_data, dict): - block_fields = {} - for field_name, field_data in section_data.items(): - if isinstance(field_data, dict): - block_fields[field_name] = FieldV2.from_dict(field_data) - else: - block_fields[field_name] = field_data - blocks[section_name] = block_fields # type: ignore - - dfn_fields["blocks"] = blocks if blocks else None - - return Dfn(**dfn_fields) - - raise ValueError(f"Unsupported format: {format}. Expected 'dfn' or 'toml'.") - - -def _load_common(f) -> Fields: - common, _ = parse_dfn(f) - return common - - -def load_flat(path: str | PathLike) -> Dfns: - """ - Load a flat MODFLOW 6 specification from definition files in a directory. - - Returns a dictionary of unlinked DFNs, i.e. without `children` populated. - Components will have `parent` populated if the schema is v2 but not if v1. - """ - exclude = ["common", "flopy"] - path = Path(path).expanduser().resolve() - dfn_paths = {p.stem: p for p in path.glob("*.dfn") if p.stem not in exclude} - toml_paths = {p.stem: p for p in path.glob("*.toml") if p.stem not in exclude} - dfns = {} - if dfn_paths: - with (path / "common.dfn").open() as f: - common = _load_common(f) - for dfn_name, dfn_path in dfn_paths.items(): - with dfn_path.open() as f: - dfns[dfn_name] = load(f, name=dfn_name, common=common, format="dfn") - if toml_paths: - for toml_name, toml_path in toml_paths.items(): - with toml_path.open("rb") as f: - dfns[toml_name] = load(f, name=toml_name, format="toml") - return dfns - - -def load_tree(path: str | PathLike) -> Dfn: - """ - Load a structured MODFLOW 6 specification from definition files in a directory. - - A single root component definition (the simulation) is returned. This contains - child (and grandchild) components for the relevant models and packages. - """ - return to_tree(load_flat(path)) - - -def to_tree(dfns: Dfns) -> Dfn: - """ - Infer the MODFLOW 6 input component hierarchy from a flat spec: - unlinked DFNs, i.e. without `children` populated, only `parent`. - - Returns the root component. There must be exactly one root, i.e. - component with no `parent`. Composite components have `children` - populated. - - Assumes DFNs are already in v2 schema, just lacking parent-child - links; before calling this function, map them first with `map()`. - """ - - def set_parent(dfn): - dfn = asdict(dfn) - if (dfn_name := dfn["name"]) == "sim-nam": - pass - elif dfn_name.endswith("-nam"): - dfn["parent"] = "sim-nam" - elif ( - dfn_name.startswith("exg-") - or dfn_name.startswith("sln-") - or dfn_name.startswith("utl-") - ): - dfn["parent"] = "sim-nam" - elif "-" in dfn_name: - mdl = dfn_name.split("-")[0] - dfn["parent"] = f"{mdl}-nam" - - return Dfn(**remap(dfn, visit=drop_none_or_empty)) - - dfns = {name: set_parent(dfn) for name, dfn in dfns.items()} - first_dfn = next(iter(dfns.values()), None) - match schema_version := str(first_dfn.schema_version if first_dfn else Version("1")): - case "1": - raise NotImplementedError("Tree inference from v1 schema not implemented") - case "2": - if ( - nroots := len( - roots := {name: dfn for name, dfn in dfns.items() if dfn.parent is None} - ) - ) != 1: - raise ValueError(f"Expected one root component, found {nroots}") - - def _build_tree(node_name: str) -> Dfn: - node = dfns[node_name] - children = {name: dfn for name, dfn in dfns.items() if dfn.parent == node_name} - if any(children): - node.children = {name: _build_tree(name) for name in children.keys()} - return node - - return _build_tree(next(iter(roots.keys()))) - case _: - raise ValueError(f"Unsupported schema version: {schema_version}. Expected 1 or 2.") - - -def to_flat(dfn: Dfn) -> Dfns: - """ - Flatten a MODFLOW 6 input component hierarchy to a flat spec: - unlinked DFNs, i.e. without `children` populated, only `parent`. - - Returns a dictionary of all components in the specification. - """ - - def _flatten(dfn: Dfn) -> Dfns: - dfns = {dfn.name: replace(dfn, children=None)} - for child in (dfn.children or {}).values(): - dfns.update(_flatten(child)) - return dfns - - return _flatten(dfn) - - -def is_valid(path: str | PathLike, format: str = "dfn", verbose: bool = False) -> bool: - """Validate DFN file(s).""" - path = Path(path).expanduser().absolute() - try: - if not path.exists(): - raise FileNotFoundError(f"Path does not exist: {path}") - - if path.is_file(): - common = {} # type: ignore - if (common_path := path.parent / "common.dfn").exists(): - with common_path.open() as f: - common, _ = parse_dfn(f) - if path.name == "common.dfn": - return True - with path.open() as f: - load(f, name=path.stem, common=common, format=format) - else: - load_flat(path) - return True - except Exception as e: - if verbose: - print(f"Validation failed: {e}") - return False - - -# ============================================================================= -# Registry imports and convenience functions -# ============================================================================= - -# Import registry classes and functions (lazy to avoid circular imports) -# These are re-exported for convenience - - -def _get_registry_module(): - """Lazy import of registry module to avoid circular imports.""" - from modflow_devtools.dfns import registry - - return registry - - -# Re-export registry classes -def __getattr__(name: str): - """Lazy attribute access for registry classes.""" - registry_exports = { - "DfnRegistry", - "DfnRegistryDiscoveryError", - "DfnRegistryError", - "DfnRegistryNotFoundError", - "LocalDfnRegistry", - "RemoteDfnRegistry", - "get_registry", - "get_sync_status", - "sync_dfns", - } - if name in registry_exports: - registry = _get_registry_module() - return getattr(registry, name) - raise AttributeError(f"module {__name__!r} has no attribute {name!r}") - - -# ============================================================================= -# Module-level convenience functions -# ============================================================================= - - -def get_dfn( - component: str, - ref: str = "develop", - source: str = "modflow6", - path: str | PathLike | None = None, -) -> "Dfn": - """ - Get a DFN by component name from the registry. - - This is a convenience function that gets the registry and retrieves - the specified component. - - Parameters - ---------- - component : str - Component name (e.g., "gwf-chd", "sim-nam"). - ref : str, optional - Git ref (branch, tag, or commit hash). Default is "develop". - source : str, optional - Source repository name. Default is "modflow6". - path : str or PathLike, optional - Path to a local directory containing DFN files. If provided, - uses autodiscovery from local filesystem instead of remote. - - Returns - ------- - Dfn - The requested component definition. - - Examples - -------- - >>> dfn = get_dfn("gwf-chd") - >>> dfn = get_dfn("gwf-chd", ref="6.6.0") - >>> dfn = get_dfn("gwf-chd", path="/path/to/dfns") - """ - registry = _get_registry_module() - reg = registry.get_registry(source=source, ref=ref, path=path) - return reg.get_dfn(component) - - -def get_dfn_path( - component: str, - ref: str = "develop", - source: str = "modflow6", - path: str | PathLike | None = None, -) -> Path: - """ - Get the local cached file path for a DFN component. - - Parameters - ---------- - component : str - Component name (e.g., "gwf-chd", "sim-nam"). - ref : str, optional - Git ref (branch, tag, or commit hash). Default is "develop". - source : str, optional - Source repository name. Default is "modflow6". - path : str or PathLike, optional - Path to a local directory containing DFN files. If provided, - returns path from local filesystem instead of cache. - - Returns - ------- - Path - Path to the local DFN file (cached or local directory). - - Examples - -------- - >>> path = get_dfn_path("gwf-chd", ref="6.6.0") - >>> path = get_dfn_path("gwf-chd", path="/path/to/dfns") - """ - registry = _get_registry_module() - reg = registry.get_registry(source=source, ref=ref, path=path) - return reg.get_dfn_path(component) - - -def list_components( - ref: str = "develop", - source: str = "modflow6", - path: str | PathLike | None = None, -) -> list[str]: - """ - List available components for a registry. - - Parameters - ---------- - ref : str, optional - Git ref (branch, tag, or commit hash). Default is "develop". - source : str, optional - Source repository name. Default is "modflow6". - path : str or PathLike, optional - Path to a local directory containing DFN files. If provided, - lists components from local filesystem. - - Returns - ------- - list[str] - List of component names available in the registry. - - Examples - -------- - >>> components = list_components(ref="6.6.0") - >>> "gwf-chd" in components - True - >>> components = list_components(path="/path/to/dfns") - """ - registry = _get_registry_module() - reg = registry.get_registry(source=source, ref=ref, path=path) - return list(reg.spec.keys()) diff --git a/modflow_devtools/dfns/__main__.py b/modflow_devtools/dfns/__main__.py index bdfe4d78..383e5d7f 100644 --- a/modflow_devtools/dfns/__main__.py +++ b/modflow_devtools/dfns/__main__.py @@ -2,248 +2,90 @@ Command-line interface for the DFNs API. Usage: - mf dfns sync [--ref REF] [--force] - mf dfns info - mf dfns list [--ref REF] - mf dfns clean [--all] + python -m modflow_devtools.dfns sync + python -m modflow_devtools.dfns info + python -m modflow_devtools.dfns clean """ -from __future__ import annotations - import argparse import shutil import sys -from modflow_devtools.dfns.registry import ( - DfnRegistryDiscoveryError, - DfnRegistryNotFoundError, - get_bootstrap_config, - get_cache_dir, - get_registry, - get_sync_status, - sync_dfns, -) +from modflow_devtools.dfns.registry import RemoteDfnRegistry def cmd_sync(args: argparse.Namespace) -> int: - """Sync DFN registries from remote sources.""" - source = args.source - ref = args.ref - force = args.force - - try: - if ref: - print(f"Syncing {source}@{ref}...") - registries = sync_dfns(source=source, ref=ref, force=force) - else: - print(f"Syncing all configured refs for {source}...") - registries = sync_dfns(source=source, force=force) - - for registry in registries: - meta = registry.registry_meta - print(f" {registry.ref}: {len(meta.files)} files") - - print(f"Synced {len(registries)} registry(ies)") - return 0 - - except DfnRegistryNotFoundError as e: - print(f"Error: {e}", file=sys.stderr) - return 1 - except DfnRegistryDiscoveryError as e: - print(f"Error: {e}", file=sys.stderr) - return 1 - except Exception as e: - print(f"Unexpected error: {e}", file=sys.stderr) - return 1 - + """Sync DFN releases from GitHub release assets.""" -def cmd_info(args: argparse.Namespace) -> int: - """Show sync status and cache information.""" - source = args.source + registries = RemoteDfnRegistry.load_default() try: - config = get_bootstrap_config() - - if source not in config.sources: - print(f"Unknown source: {source}", file=sys.stderr) - print(f"Available sources: {list(config.sources.keys())}", file=sys.stderr) - return 1 - - source_config = config.sources[source] - print(f"Source: {source}") - print(f" Repository: {source_config.repo}") - print(f" DFN path: {source_config.dfn_path}") - print(f" Registry path: {source_config.registry_path}") - print() - - # Show sync status - status = get_sync_status(source=source) - print("Configured refs:") - for ref, synced in status.items(): - status_str = "synced" if synced else "not synced" - print(f" {ref}: {status_str}") - print() - - # Show cache info - cache_dir = get_cache_dir("dfn") - if cache_dir.exists(): - # Count cached files - registries_dir = cache_dir / "registries" / source - files_dir = cache_dir / "files" / source - - registry_count = 0 - file_count = 0 - total_size = 0 - - if registries_dir.exists(): - for p in registries_dir.rglob("*"): - if p.is_file(): - registry_count += 1 - total_size += p.stat().st_size - - if files_dir.exists(): - for p in files_dir.rglob("*"): - if p.is_file(): - file_count += 1 - total_size += p.stat().st_size - - print(f"Cache directory: {cache_dir}") - print(f" Registries: {registry_count}") - print(f" DFN files: {file_count}") - print(f" Total size: {_format_size(total_size)}") - else: - print("Cache directory: (not created)") - + for registry in registries.values(): + print(f"Syncing {registry.release_id}...") + registry.sync(force=args.force) + n_files = ( + len(list(registry.cache_path.glob("*.*"))) if registry.cache_path.exists() else 0 + ) + print(f" {registry.release_id}: {n_files} files") + print(f"Synced {registry.release_id}") return 0 - except Exception as e: print(f"Error: {e}", file=sys.stderr) return 1 -def cmd_list(args: argparse.Namespace) -> int: - """List available components.""" - source = args.source - ref = args.ref +def cmd_info(args: argparse.Namespace) -> int: + """Show DFN release synchronization status.""" + registries = RemoteDfnRegistry.load_default() try: - registry = get_registry(source=source, ref=ref, auto_sync=True) - components = list(registry.spec.keys()) - - print(f"Components in {source}@{ref} ({len(components)} total):") - for component in sorted(components): - print(f" {component}") - + for registry in registries.values(): + cached = registry.cached_tag() + if cached: + _, tag = registry.release_id.split("@") + suffix = f" ({cached})" if tag == "latest" else "" + print(f"Cached: {registry.release_id}{suffix}") + else: + print(f"Not cached: {registry.release_id}") return 0 - - except DfnRegistryNotFoundError as e: - print(f"Error: {e}", file=sys.stderr) - print("Try running 'mf dfns sync' first.", file=sys.stderr) - return 1 except Exception as e: print(f"Error: {e}", file=sys.stderr) return 1 def cmd_clean(args: argparse.Namespace) -> int: - """Clean the cache directory.""" - source = args.source - clean_all = args.all - - cache_dir = get_cache_dir("dfn") - - if not cache_dir.exists(): - print("Cache directory does not exist.") - return 0 - - if clean_all: - # Clean entire cache - print(f"Removing entire cache directory: {cache_dir}") - shutil.rmtree(cache_dir) - print("Cache cleaned.") - else: - # Clean only the specified source - registries_dir = cache_dir / "registries" / source - files_dir = cache_dir / "files" / source + """Clean the DFN release cache directory.""" - removed = False - if registries_dir.exists(): - print(f"Removing registries for {source}: {registries_dir}") - shutil.rmtree(registries_dir) - removed = True - - if files_dir.exists(): - print(f"Removing files for {source}: {files_dir}") - shutil.rmtree(files_dir) - removed = True - - if removed: - print(f"Cache cleaned for {source}.") - else: - print(f"No cache found for {source}.") + cache_dir = RemoteDfnRegistry.base_cache_path() + print(f"Cleaning cache directory: {cache_dir}") + shutil.rmtree(cache_dir, ignore_errors=True) + print("Cache cleaned.") return 0 -def _format_size(size_bytes: int) -> str: - """Format size in bytes to human-readable string.""" - size = float(size_bytes) - for unit in ["B", "KB", "MB", "GB"]: - if size < 1024: - return f"{size:.1f} {unit}" - size /= 1024 - return f"{size:.1f} TB" - - def main(argv: list[str] | None = None) -> int: """Main entry point for the CLI.""" parser = argparse.ArgumentParser( - prog="mf dfns", + prog="python -m modflow_devtools.dfns", description="MODFLOW 6 definition file tools", ) - parser.add_argument( - "--source", - "-s", - default="modflow6", - help="Source repository name (default: modflow6)", - ) - subparsers = parser.add_subparsers(dest="command", help="Available commands") - # sync command - sync_parser = subparsers.add_parser("sync", help="Sync DFN registries from remote") - sync_parser.add_argument( - "--ref", - "-r", - help="Specific ref to sync (default: all configured refs)", - ) + # sync + sync_parser = subparsers.add_parser("sync", help="Sync DFN files from release assets") sync_parser.add_argument( "--force", "-f", action="store_true", - help="Force re-sync even if already cached", + help="Force re-download even if already cached", ) - # info command - subparsers.add_parser("info", help="Show sync status and cache info") - - # list command - list_parser = subparsers.add_parser("list", help="List available components") - list_parser.add_argument( - "--ref", - "-r", - default="develop", - help="Git ref to list components from (default: develop)", - ) + # info + subparsers.add_parser("info", help="Show cache info and sync status") - # clean command - clean_parser = subparsers.add_parser("clean", help="Clean the cache") - clean_parser.add_argument( - "--all", - "-a", - action="store_true", - help="Clean entire cache, not just the specified source", - ) + # clean + subparsers.add_parser("clean", help="Clean the cache") args = parser.parse_args(argv) @@ -251,12 +93,10 @@ def main(argv: list[str] | None = None) -> int: parser.print_help() return 0 - if args.command == "sync": + elif args.command == "sync": return cmd_sync(args) elif args.command == "info": return cmd_info(args) - elif args.command == "list": - return cmd_list(args) elif args.command == "clean": return cmd_clean(args) else: diff --git a/modflow_devtools/dfns/dfn2toml.py b/modflow_devtools/dfns/dfn2toml.py deleted file mode 100644 index 33760280..00000000 --- a/modflow_devtools/dfns/dfn2toml.py +++ /dev/null @@ -1,125 +0,0 @@ -"""Convert DFNs to TOML.""" - -import argparse -import sys -import textwrap -from dataclasses import asdict -from os import PathLike -from pathlib import Path - -import tomli_w as tomli -from boltons.iterutils import remap - -from modflow_devtools.dfns import Dfn, is_valid, load, load_flat, map, to_flat, to_tree -from modflow_devtools.dfns.parse import parse_dfn -from modflow_devtools.dfns.schema.block import block_sort_key -from modflow_devtools.misc import drop_none_or_empty - -# mypy: ignore-errors - - -def convert(inpath: PathLike, outdir: PathLike, schema_version: str = "2") -> None: - """ - Convert DFN files in `inpath` to TOML files in `outdir`. - By default, convert the definitions to schema version 2. - """ - inpath = Path(inpath).expanduser().absolute() - outdir = Path(outdir).expanduser().absolute() - outdir.mkdir(exist_ok=True, parents=True) - - if inpath.is_file(): - if inpath.name == "common.dfn": - raise ValueError("Cannot convert common.dfn as a standalone file") - - common_path = inpath.parent / "common.dfn" - if common_path.exists(): - with common_path.open() as f: - common, _ = parse_dfn(f) - else: - common = {} - - with inpath.open() as f: - dfn = load(f, name=inpath.stem, common=common, format="dfn") - - dfn = map(dfn, schema_version=schema_version) - _convert(dfn, outdir / f"{inpath.stem}.toml") - else: - dfns = { - name: map(dfn, schema_version=schema_version) for name, dfn in load_flat(inpath).items() - } - tree = to_tree(dfns) - flat = to_flat(tree) - for dfn_name, dfn in flat.items(): - _convert(dfn, outdir / f"{dfn_name}.toml") - - -def _convert(dfn: Dfn, outpath: Path) -> None: - with Path.open(outpath, "wb") as f: - # TODO if we start using c/attrs, swap out - # all this for a custom unstructuring hook - dfn_dict = asdict(dfn) - dfn_dict["schema_version"] = str(dfn_dict["schema_version"]) - if blocks := dfn_dict.pop("blocks", None): - for block_name, block_fields in blocks.items(): - if block_name not in dfn_dict: - dfn_dict[block_name] = {} - for field_name, field_data in block_fields.items(): - dfn_dict[block_name][field_name] = field_data - - tomli.dump( - dict( - sorted( - remap(dfn_dict, visit=drop_none_or_empty).items(), - key=block_sort_key, - ) - ), - f, - ) - - -if __name__ == "__main__": - """ - Convert DFN files in the original format and schema version 1 - to TOML files, by default also converting to schema version 2. - """ - - parser = argparse.ArgumentParser( - description="Convert DFN files to TOML.", - epilog=textwrap.dedent( - """\ -Convert DFN files in the original format and schema version 1 -to TOML files, by default also converting to schema version 2. -""" - ), - ) - parser.add_argument( - "--indir", - "-i", - type=str, - help="Directory containing DFN files, or a single DFN file.", - ) - parser.add_argument( - "--outdir", - "-o", - help="Output directory.", - ) - parser.add_argument( - "--schema-version", - "-s", - type=str, - default="2", - help="Schema version to convert to.", - ) - parser.add_argument( - "--validate", - "-v", - action="store_true", - help="Validate DFN files without converting them.", - ) - args = parser.parse_args() - - if args.validate: - if not is_valid(args.indir): - sys.exit(1) - else: - convert(args.indir, args.outdir, args.schema_version) diff --git a/modflow_devtools/dfns/dfns.toml b/modflow_devtools/dfns/dfns.toml index 4a84ae67..5676b5cc 100644 --- a/modflow_devtools/dfns/dfns.toml +++ b/modflow_devtools/dfns/dfns.toml @@ -1,24 +1,9 @@ -# DFNs API bootstrap configuration -# -# This file tells modflow-devtools where to find DFN registries. -# Users can override or extend this by creating a config file at: +# This file tells modflow-devtools where to find DFN releases. +# Users can override or extend this with a config file at: # - Linux/macOS: ~/.config/modflow-devtools/dfns.toml # - Windows: %APPDATA%/modflow-devtools/dfns.toml -[sources.modflow6] -# GitHub repository containing DFN files -repo = "MODFLOW-ORG/modflow6" - -# Path within the repository to the DFN files directory -dfn_path = "doc/mf6io/mf6ivar/dfn" - -# Path within the repository to the registry metadata file -registry_path = ".registry/dfns.toml" - -# Git refs (branches, tags, commit hashes) to sync by default -refs = [ - "develop", - "6.6.0", - "6.5.0", - "6.4.4", +releases = [ + # "MODFLOW-ORG/modflow6@latest", + "MODFLOW-ORG/modflow6-nightly-build@latest" ] diff --git a/modflow_devtools/dfns/make_registry.py b/modflow_devtools/dfns/make_registry.py deleted file mode 100644 index bd510aa0..00000000 --- a/modflow_devtools/dfns/make_registry.py +++ /dev/null @@ -1,184 +0,0 @@ -""" -Registry generation tool for DFN files. - -This tool scans a directory of DFN files and generates a registry file -that can be used by the DFNs API for discovery and verification. - -Usage: - python -m modflow_devtools.dfn.make_registry --dfn-path PATH --output FILE [--ref REF] - -Example (for MODFLOW 6 CI): - python -m modflow_devtools.dfn.make_registry \\ - --dfn-path doc/mf6io/mf6ivar/dfn \\ - --output .registry/dfns.toml \\ - --ref ${{ github.ref_name }} -""" - -from __future__ import annotations - -import argparse -import hashlib -import sys -from datetime import datetime, timezone -from pathlib import Path - -import tomli_w - - -def compute_file_hash(path: Path) -> str: - """Compute SHA256 hash of a file.""" - sha256 = hashlib.sha256() - with path.open("rb") as f: - for chunk in iter(lambda: f.read(8192), b""): - sha256.update(chunk) - return f"sha256:{sha256.hexdigest()}" - - -def scan_dfn_directory(dfn_path: Path) -> dict[str, str]: - """ - Scan a directory for DFN files and compute their hashes. - - Parameters - ---------- - dfn_path : Path - Path to directory containing DFN files. - - Returns - ------- - dict[str, str] - Map of filename to SHA256 hash. - """ - files = {} - - # Find all .dfn files - for p in sorted(dfn_path.glob("*.dfn")): - files[p.name] = compute_file_hash(p) - - # Find all .toml files (spec.toml and/or component files) - for p in sorted(dfn_path.glob("*.toml")): - files[p.name] = compute_file_hash(p) - - return files - - -def generate_registry( - dfn_path: Path, - output_path: Path, - ref: str | None = None, - devtools_version: str | None = None, -) -> None: - """ - Generate a DFN registry file. - - Parameters - ---------- - dfn_path : Path - Path to directory containing DFN files. - output_path : Path - Path to write the registry file. - ref : str, optional - Git ref this registry is being generated for. - devtools_version : str, optional - Version of modflow-devtools generating this registry. - """ - # Scan directory for files - files = scan_dfn_directory(dfn_path) - - if not files: - raise ValueError(f"No DFN files found in {dfn_path}") - - # Get devtools version if not provided - if devtools_version is None: - try: - from modflow_devtools import __version__ - - devtools_version = __version__ - except ImportError: - devtools_version = "unknown" - - # Build registry structure - registry: dict = { - "schema_version": "1.0", - "generated_at": datetime.now(timezone.utc).isoformat(), - "devtools_version": devtools_version, - } - - if ref: - registry["metadata"] = {"ref": ref} - - # Add files section - registry["files"] = {filename: {"hash": file_hash} for filename, file_hash in files.items()} - - # Write registry file - output_path.parent.mkdir(parents=True, exist_ok=True) - with output_path.open("wb") as f: - tomli_w.dump(registry, f) - - -def main(argv: list[str] | None = None) -> int: - """Main entry point.""" - parser = argparse.ArgumentParser( - prog="python -m modflow_devtools.dfn.make_registry", - description="Generate a DFN registry file", - ) - parser.add_argument( - "--dfn-path", - "-d", - type=Path, - required=True, - help="Path to directory containing DFN files", - ) - parser.add_argument( - "--output", - "-o", - type=Path, - required=True, - help="Output path for registry file", - ) - parser.add_argument( - "--ref", - "-r", - help="Git ref this registry is being generated for", - ) - parser.add_argument( - "--devtools-version", - help="Version of modflow-devtools (default: auto-detect)", - ) - - args = parser.parse_args(argv) - - dfn_path = args.dfn_path.expanduser().resolve() - output_path = args.output.expanduser().resolve() - - if not dfn_path.exists(): - print(f"Error: DFN path does not exist: {dfn_path}", file=sys.stderr) - return 1 - - if not dfn_path.is_dir(): - print(f"Error: DFN path is not a directory: {dfn_path}", file=sys.stderr) - return 1 - - try: - generate_registry( - dfn_path=dfn_path, - output_path=output_path, - ref=args.ref, - devtools_version=args.devtools_version, - ) - - # Report results - files = scan_dfn_directory(dfn_path) - print(f"Generated registry: {output_path}") - print(f" Files: {len(files)}") - if args.ref: - print(f" Ref: {args.ref}") - - return 0 - - except Exception as e: - print(f"Error: {e}", file=sys.stderr) - return 1 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/modflow_devtools/dfns/mapper.py b/modflow_devtools/dfns/mapper.py new file mode 100644 index 00000000..71f2b410 --- /dev/null +++ b/modflow_devtools/dfns/mapper.py @@ -0,0 +1,719 @@ +import re +from collections.abc import Mapping +from typing import Any, Literal + +from modflow_devtools.dfn import schema as v1 +from modflow_devtools.dfn.parser import try_parse_bool +from modflow_devtools.dfns import schema as v2 +from modflow_devtools.misc import try_literal_eval + +_IDENT_RE = re.compile(r"^[A-Za-z_]\w*$") +_LOOKUP_RE = re.compile(r"^(\w+)\.(\w+)\((\w+)\)$") + + +def _scope_for( + parent: "str | list[str] | None", +) -> "Literal['component', 'model', 'simulation']": + """ + Derive the DimDef scope for dims in a component's dimensions block from its parent. + + - Parent is a model (``-nam``) or a generic type (``"model"``, + ``"package"``, ``"*"``) → ``"model"`` + - Parent is ``"sim-nam"`` (directly under simulation) → ``"simulation"`` + - Otherwise → ``"component"`` + """ + parents = ([parent] if isinstance(parent, str) else parent) if parent is not None else [] + for p in parents: + if p == "sim-nam": + return "simulation" + if p.endswith("-nam") or p in ("model", "package", "*"): + return "model" + return "component" + + +def _raw_dim_names(blocks: dict[str, v2.Block]) -> set[str]: + """Names of all Integer fields in the dimensions block.""" + dim_block = blocks.get("dimensions") + if not dim_block: + return set() + return {fname for fname, f in dim_block.fields.items() if isinstance(f, v2.Integer)} + + +def _parse_list_shape(s: str) -> list[str]: + """ + Parse a v1 recarray shape string into a ``List.shape`` value. + + Only a bare identifier is accepted — complex expressions such as + ``sum(nlakeconn)`` cannot be represented in ``List.shape`` and are dropped. + """ + if not s: + return [] + s_clean = s.strip() + if s_clean.startswith("(") and s_clean.endswith(")"): + s_clean = s_clean[1:-1].strip() + if _IDENT_RE.fullmatch(s_clean): + return [s_clean] + return [] + + +def _normalize_n_prefix_shapes( + blocks: dict[str, v2.Block], + raw_dim_names: set[str], +) -> dict[str, v2.Block]: + """ + Fix List shapes that use ``nFoo`` where the actual dimension is ``maxFoo``. + + Some v1 DFNs (e.g. ``gwf-mvr [packages]`` with ``shape (npackages)``) use + an ``n``-prefixed name while the dimensions block defines the same quantity + under a ``max``-prefixed name. Normalise before building explicit dims. + """ + result = {} + for bname, block in blocks.items(): + new_fields = {} + changed = False + for fname, field in block.fields.items(): + if isinstance(field, v2.List) and field.shape: + elem = field.shape[0] + if elem not in raw_dim_names and elem.startswith("n") and len(elem) > 1: + candidate = "max" + elem[1:] + if candidate in raw_dim_names: + field = field.model_copy(update={"shape": [candidate]}) + changed = True + new_fields[fname] = field + result[bname] = block.model_copy(update={"fields": new_fields}) if changed else block + return result + + +def _build_explicit_dims( + parent: "str | list[str] | None", + blocks: dict[str, v2.Block], +) -> dict[str, v2.Dim]: + """Build the dims section from a component's dimensions block.""" + dims: dict[str, v2.Dim] = {} + dim_block = blocks.get("dimensions") + if not dim_block: + return dims + + scope = _scope_for(parent) + for fname, field in dim_block.fields.items(): + if isinstance(field, v2.Integer): + dims[fname] = v2.Dim(field=fname, scope=scope) + + if scope == "model": + has = set(dims.keys()) + if {"nlay", "nrow", "ncol"} <= has: + dims["ncpl"] = v2.Dim(expr="nrow * ncol", scope="model") + dims["nodes"] = v2.Dim(expr="nlay * nrow * ncol", scope="model") + dims["ncelldim"] = v2.Dim(expr="3", scope="model") + elif {"nlay", "ncpl"} <= has: + dims["nodes"] = v2.Dim(expr="nlay * ncpl", scope="model") + dims["ncelldim"] = v2.Dim(expr="2", scope="model") + elif {"nrow", "ncol"} <= has: + dims["ncpl"] = v2.Dim(expr="nrow * ncol", scope="model") + dims["nodes"] = v2.Dim(expr="nrow * ncol", scope="model") + dims["ncelldim"] = v2.Dim(expr="2", scope="model") + elif "nodes" in has: + dims["ncelldim"] = v2.Dim(expr="1", scope="model") + + return dims + + +def _sanitize_list_shapes( + blocks: dict[str, v2.Block], + known_dims: set[str], +) -> dict[str, v2.Block]: + """ + Clear the shape of any List whose shape element doesn't resolve to a known + dim. + + Advanced packages (LAK, SFR, GNC, transport packages, etc.) often carry + ``shape (maxbound)`` in their v1 DFNs as a convention even though + ``maxbound`` is not declared as a dimension. The structurally correct v2 + representation for such lists is ``shape=[]``. + """ + result = {} + for bname, block in blocks.items(): + new_fields = {} + changed = False + for fname, field in block.fields.items(): + if isinstance(field, v2.List) and field.shape: + if any(elem not in known_dims for elem in field.shape): + field = field.model_copy(update={"shape": []}) + changed = True + new_fields[fname] = field + result[bname] = block.model_copy(update={"fields": new_fields}) if changed else block + return result + + +def _resolve_dimensions( + blocks: dict[str, v2.Block], +) -> tuple[dict[str, v2.Block], dict[str, v2.Dim]]: + """ + Detect self-sizing arrays whose name is referenced in another array's shape + expression — those define a component-scoped dimension. + + Any array type qualifies (not just string). Returns the unchanged blocks + alongside a dict of component-scoped DimDef entries. + """ + self_sizing: set[str] = set() + shape_refs: set[str] = set() + + def _scan(fields: Mapping[str, v2.Field]) -> None: + for name, field in fields.items(): + if isinstance(field, v2.Array): + if not field.shape: + self_sizing.add(name) + else: + for elem in field.shape: + if _IDENT_RE.fullmatch(elem): + shape_refs.add(elem) + if isinstance(field, v2.Record): + _scan(field.fields) + elif isinstance(field, v2.Union): + _scan(field.arms) + elif isinstance(field, v2.List): + item = field.item + _scan(item.fields if isinstance(item, v2.Record) else item.arms) + + for block in blocks.values(): + _scan(block.fields) + + array_dim_names = self_sizing & shape_refs + array_dims = {n: v2.Dim(field=n, scope="component") for n in array_dim_names} + return blocks, array_dims + + +def _resolve_relations(blocks: dict[str, v2.Block]) -> dict[str, v2.Block]: + pk_set: set[tuple[str, str]] = set() + fk_map: dict[tuple[str, str], str] = {} + + def _scan_fields(block_name: str, fields: Mapping[str, v2.Field]) -> None: + + def _scan_record(record: v2.Record) -> None: + for field in record.fields.values(): + if isinstance(field, v2.Array): + for dim in field.shape: + if m := _LOOKUP_RE.fullmatch(dim): + pk_block, _, fk_fname = m.groups() + sibling = record.fields.get(fk_fname) + if sibling is not None and getattr(sibling, "fk", None) is None: + fk_map[(block_name, fk_fname)] = f"{pk_block}.{fk_fname}" + pk_set.add((pk_block, fk_fname)) + + for field in fields.values(): + if isinstance(field, v2.Record): + _scan_record(field) + elif isinstance(field, v2.Union): + _scan_fields(block_name, field.arms) + elif isinstance(field, v2.List): + item = field.item + if isinstance(item, v2.Record): + _scan_record(item) + elif isinstance(item, v2.Union): + _scan_fields(block_name, item.arms) + + for block_name, block in blocks.items(): + _scan_fields(block_name, block.fields) + + if not fk_map and not pk_set: + return blocks + + def _resolve_fields(block_name: str, fields: Mapping[str, v2.Field]) -> dict[str, v2.Field]: + + def _resolve_record(record: v2.Record) -> v2.Record: + updates: dict = {} + for fname, sf in record.fields.items(): + updated = sf + if (block_name, fname) in fk_map and getattr(sf, "fk", None) is None: + updated = updated.model_copy(update={"fk": fk_map[(block_name, fname)]}) + if (block_name, fname) in pk_set and not getattr(sf, "pk", False): + updated = updated.model_copy(update={"pk": True}) + if updated is not sf: + updates[fname] = updated + if not updates: + return record + return record.model_copy( + update={"fields": {fn: updates.get(fn, sf) for fn, sf in record.fields.items()}} + ) + + result = {} + for name, f in fields.items(): + if isinstance(f, v2.Record): + f = _resolve_record(f) + elif isinstance(f, v2.Union): + f.arms = _resolve_fields(block_name, f.arms) # type: ignore[assignment] + elif isinstance(f, v2.List): + if isinstance(f.item, v2.Record): + f.item = _resolve_record(f.item) + else: + f.item.arms = _resolve_fields(block_name, f.item.arms) # type: ignore[assignment] + result[name] = f + return result + + return { + block_name: block.model_copy(update={"fields": _resolve_fields(block_name, block.fields)}) + for block_name, block in blocks.items() + } + + +def _fill_period_list_shapes( + blocks: dict[str, v2.Block], + explicit_dims: dict[str, v2.Dim], +) -> dict[str, v2.Block]: + """ + For period blocks whose List field has no shape expression, infer the shape + from the component's explicit dims. Currently handles ``maxbound`` only: + if the component defines a ``maxbound`` dimension but the period list omits + it, add ``shape=["maxbound"]``. + """ + if "maxbound" not in explicit_dims: + return blocks + result = {} + for bname, block in blocks.items(): + if "period" not in bname: + result[bname] = block + continue + new_fields = {} + changed = False + for fname, field in block.fields.items(): + if isinstance(field, v2.List) and not field.shape: + field = field.model_copy(update={"shape": ["maxbound"]}) + changed = True + new_fields[fname] = field + result[bname] = block.model_copy(update={"fields": new_fields}) if changed else block + return result + + +def map(dfn: v1.Dfn) -> v2.Component: + """Map a component definition from the v1 schema to v2.""" + + if dfn["schema_version"] != "1": + raise ValueError(f"Expected schema version 1, got {dfn['schema_version']!r}") + + fields = v1.get_fields(dfn) + + def _map_field(field: v1.Field) -> v2.Field: + + def _to_bool(v: Any, default: bool = False) -> bool: + if isinstance(v, bool): + return v + if isinstance(v, str): + s = v.strip().lower() + if s == "true": + return True + if s in ("false", ""): + return False + return default + + def __map_field(f: v1.Field) -> v2.Field: + fd: dict[str, Any] = {k: try_parse_bool(v) for k, v in dict(f).items()} + + _name: str = fd["name"] + _type: str | None = fd.get("type") + shape_str: str | None = fd.get("shape") or None + description: str | None = fd.get("description") or None + longname: str | None = fd.get("longname") or None + optional: bool = _to_bool(fd.get("optional"), False) + developmode: bool = _to_bool(fd.get("developmode"), False) + netcdf: bool = _to_bool(fd.get("netcdf"), False) + tagged: bool = _to_bool(fd.get("tagged"), False) + preserve_case: bool = _to_bool(fd.get("preserve_case"), False) + time_series: bool = _to_bool(fd.get("time_series"), False) + valid = fd.get("valid") + _default_raw = fd.get("default") + default = ( + try_literal_eval(_default_raw) + if _type != "string" and isinstance(_default_raw, str) + else _default_raw + ) + + _COL_FK_RE = re.compile(r"^([A-Za-z_]\w*)\(([A-Za-z_]\w*)\)$") + + def _parse_shape(s: str) -> list[str]: + result = [] + s_clean = s.strip() + if s_clean.startswith("(") and s_clean.endswith(")"): + s_clean = s_clean[1:-1] + for elem in (x.strip() for x in s_clean.split(",") if x.strip()): + if ";" in elem: + result.append("ncpl") + elif ( + elem in ("any1d", "unknown") or elem.startswith("<") or elem.startswith(">") + ): + pass + elif m := _COL_FK_RE.fullmatch(elem): + col_name = m.group(1) + block_name = next( + ( + fi["block"] + for fi in fields.values(multi=True) + if fi["name"] == col_name + and fi["type"] == "integer" + and fi.get("in_record", False) + ), + None, + ) + if block_name: + result.append(f"{block_name}.{elem}") + else: + provider = next( + ( + fi["name"] + for fi in fields.values(multi=True) + if fi["type"] == "string" + and (fi.get("shape") or "").strip() in (f"({elem})", elem) + ), + None, + ) + result.append(provider if provider else elem) + return result + + def _to_scalar() -> v2.Scalar: + assert _type is not None + if _type == "keyword": + return v2.Keyword( + name=_name, + longname=longname, + description=description, + optional=optional, + default=default, + developmode=developmode, + netcdf=netcdf, + ) + if _type == "string": + return v2.String( + name=_name, + longname=longname, + description=description, + optional=optional, + default=default, + developmode=developmode, + netcdf=netcdf, + tagged=tagged, + valid=valid.split() + if isinstance(valid, str) and valid + else (list(valid) if valid else None), + case_sensitive=preserve_case, + time_series=time_series, + ) + if _type == "integer": + v = ( + [int(x) for x in valid.split()] + if isinstance(valid, str) and valid + else ([int(x) for x in valid] if valid else None) + ) + return v2.Integer( + name=_name, + longname=longname, + description=description, + optional=optional, + default=default, + developmode=developmode, + netcdf=netcdf, + tagged=tagged, + valid=v, + time_series=time_series, + ) + if _type in ("double", "double precision"): + return v2.Double( + name=_name, + longname=longname, + description=description, + optional=optional, + default=default, + developmode=developmode, + netcdf=netcdf, + tagged=tagged, + time_series=time_series, + ) + raise TypeError(f"Unsupported scalar type: {_type!r}") + + def _row_field() -> v2.Record | v2.Union: + item_names = (_type or "").split()[1:] + if not item_names: + raise ValueError(f"Missing list item definition: {_type!r}") + + item_types = [ + fi["type"] + for fi in fields.values(multi=True) + if fi["name"] in item_names and fi.get("in_record", False) + ] + + if ( + len(item_names) == 1 + and item_types + and ( + (item_types[0] or "").startswith("record") + or (item_types[0] or "").startswith("keystring") + ) + ): + mapped = __map_field(next(iter(fields.getlist(item_names[0])))) + if isinstance(mapped, (v2.Record, v2.Union)): + return mapped + raise TypeError( + f"Expected Record or Union for list item, got {type(mapped).__name__}" + ) + + if all(t in v1.SCALAR_TYPES for t in item_types): + rec_fields = _record_fields() + return v2.Record( + name=_name, + description=( + (description or "").replace("is the list of", "is the record of") + or None + ), + fields=rec_fields, + ) + + children = { + fi["name"]: __map_field(fi) + for fi in fields.values(multi=True) + if fi["name"] in item_names and fi.get("in_record", False) + } + first = next(iter(children.values())) + if len(children) == 1 and isinstance(first, v2.Union): + return first + return v2.Record( + name=_name, + description=( + (description or "").replace("is the list of", "is the record of") or None + ), + fields=children, # type: ignore[arg-type] + ) + + def _union_fields() -> dict: + names = (_type or "").split()[1:] + return { + fi["name"]: __map_field(fi) + for fi in fields.values(multi=True) + if fi["name"] in names and fi.get("in_record", False) + } + + def _record_fields() -> dict: + names = (_type or "").split()[1:] + result = {} + for rname in names: + matches = [ + fi + for fi in fields.values(multi=True) + if fi["name"] == rname + and fi.get("in_record", False) + and not (fi["type"] or "").startswith("record") + ] + if matches: + result[rname] = __map_field(matches[0]) + return result + + if _type is None: + raise ValueError(f"Missing type for v1 field: {_name!r}") + + if _type.startswith("recarray"): + item = _row_field() + list_shape = _parse_list_shape(shape_str) if shape_str else [] + return v2.List( + name=_name, + longname=longname, + description=description, + optional=optional, + default=default, + developmode=developmode, + netcdf=netcdf, + item=item, + shape=list_shape, + ) + + if _type.startswith("keystring"): + arms = _union_fields() + return v2.Union( + name=_name, + longname=longname, + description=description, + optional=optional, + default=default, + developmode=developmode, + arms=arms, # type: ignore[arg-type] + ) + + if _type.startswith("record"): + subnames = (_type or "").split()[1:] + # Detect filerecord: a subfield named 'filein' or 'fileout' with type keyword + file_mode: str | None = None + for sname in subnames: + if sname in ("filein", "fileout"): + m = next( + ( + fi + for fi in fields.values(multi=True) + if fi["name"] == sname + and try_parse_bool(fi.get("in_record", False)) + ), + None, + ) + if m and (m.get("type") or "").strip() == "keyword": + file_mode = sname + break + + if file_mode: + # Filerecord pattern: + # In v2: drop the mode keyword and the untagged path string; promote + # the tag keyword to a File field (tagged=True, name=tag keyword name). + # Find the untagged string (the path value) so we can skip it. + path_field_name: str | None = None + for sname in subnames: + if sname == file_mode: + continue + m_s = next( + ( + fi + for fi in fields.values(multi=True) + if fi["name"] == sname + and try_parse_bool(fi.get("in_record", False)) + ), + None, + ) + if ( + m_s + and (m_s.get("type") or "").strip() == "string" + and not _to_bool(m_s.get("tagged"), True) + ): + path_field_name = sname + break + + rec_fields = {} + for rname in subnames: + if rname in (file_mode, path_field_name): + continue # drop mode keyword and path string + m = next( + ( + fi + for fi in fields.values(multi=True) + if fi["name"] == rname + and try_parse_bool(fi.get("in_record", False)) + and not (fi.get("type") or "").startswith("record") + ), + None, + ) + if m is None: + continue + ftype = (m.get("type") or "").strip() + if ftype == "keyword": + # Tag keyword becomes the File field (tagged=True, name=keyword name) + rec_fields[rname] = v2.File( + name=rname, + longname=m.get("longname") or None, + description=m.get("description") or None, + optional=_to_bool(m.get("optional"), False), + developmode=_to_bool(m.get("developmode"), False), + netcdf=_to_bool(m.get("netcdf"), False), + tagged=True, + mode=file_mode, # type: ignore[arg-type] + ) + else: + rec_fields[rname] = __map_field(m) # type: ignore + else: + rec_fields = _record_fields() + + return v2.Record( + name=_name, + longname=longname, + description=description, + optional=optional, + default=default, + developmode=developmode, + fields=rec_fields, # type: ignore[arg-type] + ) + + if shape_str is not None: + dtype_map: dict[str, Literal["keyword", "integer", "double", "string"]] = { + "double precision": "double", + "double": "double", + "integer": "integer", + "string": "string", + "keyword": "keyword", + } + dtype = dtype_map.get(_type) + if dtype is not None: + if dtype == "string": + # String arrays in v1 are always self-sizing; whether the + # array defines a component dimension is detected generically + # by _resolve_dimensions (any self-sizing array referenced + # by name in a sibling shape expression is a dim source). + return v2.Array( + name=_name, + longname=longname, + description=description, + optional=optional, + default=default, + developmode=developmode, + netcdf=netcdf, + time_series=time_series, + dtype="string", + shape=[], + ) + parsed_shape = _parse_shape(shape_str) + return v2.Array( + name=_name, + longname=longname, + description=description, + optional=optional, + default=default, + developmode=developmode, + netcdf=netcdf, + time_series=time_series, + dtype=dtype, + shape=parsed_shape, + ) + + return _to_scalar() + + return __map_field(field) + + name = dfn["name"] + blocks: dict[str, v2.Block] = {} + + for field in fields.values(multi=True): + if field.get("in_record", False): + continue # record subfields are handled recursively + v2_field = _map_field(field) + blocks.setdefault(field["block"], v2.Block(name=field["block"], fields={})).fields[ + field["name"] + ] = v2_field + blocks[field["block"]].repeats = field.get("block_variable", False) + + blocks, array_dims = _resolve_dimensions(blocks) + blocks = _resolve_relations(blocks) + raw_dim_names = _raw_dim_names(blocks) + blocks = _normalize_n_prefix_shapes(blocks, raw_dim_names) + explicit_dims = _build_explicit_dims(dfn["parent"], blocks) + known_dims = set(explicit_dims) | set(array_dims) + blocks = _sanitize_list_shapes(blocks, known_dims) + blocks = _fill_period_list_shapes(blocks, explicit_dims) + dims = {**explicit_dims, **array_dims} or None + + d: dict[str, Any] = { + "schema_version": "2", + "name": name, + "parent": dfn["parent"], + "blocks": blocks or None, + "dims": dims, + } + if name == "sim-nam": + return v2.Simulation(**d) + if name.endswith("-nam"): + return v2.Model(**d) + + subtype: Literal["solution", "exchange", "stress", "advanced", "utility"] | None = None + if name.startswith("sln-"): + subtype = "solution" + elif name.startswith("exg-"): + subtype = "exchange" + elif name.startswith("utl-"): + subtype = "utility" + else: + is_stress_pkg = bool(any(blocks) and any("period" in k for k in blocks)) + subtype = "advanced" if dfn["advanced"] else "stress" if is_stress_pkg else None + return v2.Package( + **d, + subtype=subtype, + multi=dfn["multi"], + ) diff --git a/modflow_devtools/dfns/registry.py b/modflow_devtools/dfns/registry.py index 51a6dfc2..f02b3ca7 100644 --- a/modflow_devtools/dfns/registry.py +++ b/modflow_devtools/dfns/registry.py @@ -1,379 +1,55 @@ -""" -DFN registry infrastructure for discovery, caching, and synchronization. - -This module provides: -- Pydantic schemas for registry and bootstrap configuration -- Cache management for registries and DFN files -- Registry classes for local and remote DFN access -""" - -from __future__ import annotations - +import json import os -import sys -from datetime import datetime +import tempfile +import urllib.request from os import PathLike from pathlib import Path -from typing import TYPE_CHECKING +from platform import system -from packaging.version import Version -from pydantic import BaseModel, Field +import pooch +import tomli +from pydantic import BaseModel, Field, PrivateAttr -if TYPE_CHECKING: - import pooch - - from modflow_devtools.dfns import Dfn, DfnSpec +from modflow_devtools.dfns.schema import Dfns __all__ = [ - "BootstrapConfig", "DfnRegistry", - "DfnRegistryDiscoveryError", - "DfnRegistryError", - "DfnRegistryFile", - "DfnRegistryMeta", - "DfnRegistryNotFoundError", "LocalDfnRegistry", "RemoteDfnRegistry", - "SourceConfig", - "get_bootstrap_config", - "get_cache_dir", - "get_registry", - "get_sync_status", - "get_user_config_path", - "sync_dfns", + "is_cached", ] -# ============================================================================= -# Pydantic Schemas for Bootstrap Configuration -# ============================================================================= - - -class SourceConfig(BaseModel): - """Configuration for a DFN source repository.""" - - repo: str = Field(description="GitHub repository identifier (owner/name)") - dfn_path: str = Field( - default="doc/mf6io/mf6ivar/dfn", - description="Path within the repository to the DFN files directory", - ) - registry_path: str = Field( - default=".registry/dfns.toml", - description="Path within the repository to the registry metadata file", - ) - refs: list[str] = Field( - default_factory=list, - description="Git refs (branches, tags, commit hashes) to sync by default", - ) - - -class BootstrapConfig(BaseModel): - """Bootstrap configuration for DFN sources.""" - - sources: dict[str, SourceConfig] = Field( - default_factory=dict, - description="Map of source names to their configurations", - ) - - @classmethod - def load(cls, path: str | PathLike) -> BootstrapConfig: - """Load bootstrap configuration from a TOML file.""" - import tomli - - path = Path(path) - if not path.exists(): - return cls() - - with path.open("rb") as f: - data = tomli.load(f) - - # Convert sources dict to SourceConfig instances - sources = {} - for name, config in data.get("sources", {}).items(): - sources[name] = SourceConfig(**config) - - return cls(sources=sources) - - @classmethod - def merge(cls, base: BootstrapConfig, overlay: BootstrapConfig) -> BootstrapConfig: - """Merge two bootstrap configs, with overlay taking precedence.""" - merged_sources = dict(base.sources) - merged_sources.update(overlay.sources) - return cls(sources=merged_sources) - - -# ============================================================================= -# Pydantic Schemas for Registry Files -# ============================================================================= - - -class DfnRegistryFile(BaseModel): - """Entry for a single file in the registry.""" - - hash: str = Field(description="SHA256 hash of the file (sha256:...)") - - -class DfnRegistryMeta(BaseModel): - """ - Registry metadata and file listings. - - This represents the contents of a dfns.toml registry file. - """ - - schema_version: str = Field( - default="1.0", - description="Registry schema version", - ) - generated_at: datetime | None = Field( - default=None, - description="When the registry was generated", - ) - devtools_version: str | None = Field( - default=None, - description="Version of modflow-devtools that generated this registry", - ) - ref: str | None = Field( - default=None, - description="Git ref this registry was generated from", - ) - files: dict[str, DfnRegistryFile] = Field( - default_factory=dict, - description="Map of filenames to file metadata", - ) - - @classmethod - def load(cls, path: str | PathLike) -> DfnRegistryMeta: - """Load registry metadata from a TOML file.""" - import tomli - - path = Path(path) - with path.open("rb") as f: - data = tomli.load(f) - - # Handle nested structure: files section contains filename -> {hash: ...} - files_data = data.pop("files", {}) - files = {} - for filename, file_info in files_data.items(): - if isinstance(file_info, dict): - files[filename] = DfnRegistryFile(**file_info) - elif isinstance(file_info, str): - # Support shorthand: filename = "hash" - files[filename] = DfnRegistryFile(hash=file_info) - - # Handle metadata section if present - metadata = data.pop("metadata", {}) - ref = metadata.get("ref") or data.pop("ref", None) - - return cls( - schema_version=data.get("schema_version", "1.0"), - generated_at=data.get("generated_at"), - devtools_version=data.get("devtools_version"), - ref=ref, - files=files, - ) - - def save(self, path: str | PathLike) -> None: - """Save registry metadata to a TOML file.""" - import tomli_w - - path = Path(path) - path.parent.mkdir(parents=True, exist_ok=True) - - data: dict = { - "schema_version": self.schema_version, - } - - if self.generated_at: - data["generated_at"] = self.generated_at.isoformat() - if self.devtools_version: - data["devtools_version"] = self.devtools_version - - if self.ref: - data["metadata"] = {"ref": self.ref} - - # Write files section - data["files"] = { - filename: {"hash": file_info.hash} for filename, file_info in self.files.items() - } - - with path.open("wb") as f: - tomli_w.dump(data, f) - - -# ============================================================================= -# Cache and Configuration Utilities -# ============================================================================= - - -def get_user_config_path(subdir: str = "dfn") -> Path: - """ - Get the user configuration directory path. - - Parameters - ---------- - subdir : str - Subdirectory name (e.g., "dfn", "models", "programs"). - - Returns - ------- - Path - Path to user config file (e.g., ~/.config/modflow-devtools/dfns.toml). - """ - if sys.platform == "win32": - base = Path(os.environ.get("APPDATA", Path.home() / "AppData" / "Roaming")) - else: - base = Path(os.environ.get("XDG_CONFIG_HOME", Path.home() / ".config")) - - return base / "modflow-devtools" / f"{subdir}s.toml" - - -def get_cache_dir(subdir: str = "dfn") -> Path: - """ - Get the cache directory path. - - Parameters - ---------- - subdir : str - Subdirectory name (e.g., "dfn", "models", "programs"). - - Returns - ------- - Path - Path to cache directory (e.g., ~/.cache/modflow-devtools/dfn/). - """ - if sys.platform == "win32": - base = Path(os.environ.get("LOCALAPPDATA", Path.home() / "AppData" / "Local")) - else: - base = Path(os.environ.get("XDG_CACHE_HOME", Path.home() / ".cache")) - - return base / "modflow-devtools" / subdir - - -def get_bootstrap_config() -> BootstrapConfig: - """ - Load and merge bootstrap configuration. - - Loads the bundled bootstrap file and merges with user config if present. - - Returns - ------- - BootstrapConfig - Merged bootstrap configuration. - """ - # Load bundled bootstrap config - bundled_path = Path(__file__).parent / "dfns.toml" - bundled_config = BootstrapConfig.load(bundled_path) - - # Load user config if present - user_path = get_user_config_path("dfn") - if user_path.exists(): - user_config = BootstrapConfig.load(user_path) - return BootstrapConfig.merge(bundled_config, user_config) - - return bundled_config - - -# ============================================================================= -# Registry Classes -# ============================================================================= - - class DfnRegistry(BaseModel): - """ - Base class for DFN registries. - - A registry provides access to DFN files and the parsed DfnSpec. - This is a Pydantic model that can be used directly for data-only use cases. - """ + """Base class for DFN registries.""" model_config = {"arbitrary_types_allowed": True} - source: str = Field(default="modflow6", description="Source repository name") - ref: str = Field(default="develop", description="Git ref (branch, tag, or commit hash)") - - _spec: DfnSpec | None = None + _spec: Dfns | None = PrivateAttr(default=None, init=False) @property - def spec(self) -> DfnSpec: - """ - Get the full DFN specification. + def spec(self) -> Dfns: + raise NotImplementedError - Returns - ------- - DfnSpec - The parsed specification with hierarchical structure. - """ - raise NotImplementedError("Subclasses must implement spec property") - - @property - def schema_version(self) -> Version: - """Get the schema version of the specification.""" - return self.spec.schema_version - - @property - def components(self) -> dict[str, Dfn]: - """Get all components as a flat dictionary.""" - return dict(self.spec.items()) - - def get_dfn(self, component: str) -> Dfn: - """ - Get a DFN by component name. - - Parameters - ---------- - component : str - Component name (e.g., "gwf-chd", "sim-nam"). - - Returns - ------- - Dfn - The requested component definition. - """ - return self.spec[component] - - def get_dfn_path(self, component: str) -> Path: - """ - Get the local file path for a DFN. - - Parameters - ---------- - component : str - Component name (e.g., "gwf-chd", "sim-nam"). - - Returns - ------- - Path - Path to the local DFN file. - """ - raise NotImplementedError("Subclasses must implement get_dfn_path") + def get_path(self, component: str) -> Path: + raise NotImplementedError class LocalDfnRegistry(DfnRegistry): - """ - Registry for local DFN files. - - Use this for working with DFN files on the local filesystem, - e.g., during development or with a local clone of the MODFLOW 6 repository. - """ + """Registry for local DFN files.""" path: Path = Field(description="Path to directory containing DFN files") - def model_post_init(self, __context) -> None: - """Validate and resolve path after initialization.""" + def model_post_init(self, _) -> None: object.__setattr__(self, "path", Path(self.path).expanduser().resolve()) @property - def spec(self) -> DfnSpec: - """Load and return the DFN specification from local files.""" + def spec(self) -> Dfns: if self._spec is None: - from modflow_devtools.dfns import DfnSpec - - self._spec = DfnSpec.load(self.path) + self._spec = Dfns.load(self.path) return self._spec - def get_dfn_path(self, component: str) -> Path: - """Get the local file path for a DFN component.""" - # Look for both .dfn and .toml extensions + def get_path(self, component: str) -> Path: for ext in [".dfn", ".toml"]: p = self.path / f"{component}{ext}" if p.exists(): @@ -381,410 +57,172 @@ def get_dfn_path(self, component: str) -> Path: raise FileNotFoundError(f"Component '{component}' not found in {self.path}") +def _auto_sync() -> bool: + return os.environ.get("MODFLOW_DEVTOOLS_AUTO_SYNC", "").lower() in ("1", "true", "yes") + + class RemoteDfnRegistry(DfnRegistry): - """ - Registry for remote DFN files with Pooch-based caching. - - Handles remote registry discovery, caching, and DFN file fetching. - URLs are constructed dynamically from bootstrap metadata, or can be - overridden by providing explicit repo/dfn_path/registry_path values. - - Examples - -------- - >>> # Use bootstrap config - >>> registry = RemoteDfnRegistry(source="modflow6", ref="6.6.0") - >>> dfn = registry.get_dfn("gwf-chd") - - >>> # Override repo directly (useful for testing) - >>> registry = RemoteDfnRegistry( - ... source="modflow6", - ... ref="registry", - ... repo="wpbonelli/modflow6", - ... ) - """ + """Registry for DFN files associated with a GitHub repository release.""" - # Optional overrides (bypass bootstrap config when provided) - repo: str | None = Field( - default=None, - description="GitHub repository (owner/repo). Overrides bootstrap config.", - ) - dfn_path: str | None = Field( - default=None, - description="Path to DFN files in repo. Overrides bootstrap config.", - ) - registry_path: str | None = Field( - default=None, - description="Path to registry file in repo. Overrides bootstrap config.", + release_id: str = Field( + description="DFN source repository release ID (owner/name@tag)", ) - _registry_meta: DfnRegistryMeta | None = None - _source_config: SourceConfig | None = None - _pooch: pooch.Pooch | None = None - _files_dir: Path | None = None - - def model_post_init(self, __context) -> None: - """Initialize registry after model creation.""" - self._ensure_source_config() - - def _ensure_source_config(self) -> SourceConfig: - """Load and cache source configuration from bootstrap or overrides.""" - if self._source_config is None: - # If repo is provided, construct config from overrides - if self.repo is not None: - self._source_config = SourceConfig( - repo=self.repo, - dfn_path=self.dfn_path or "doc/mf6io/mf6ivar/dfn", - registry_path=self.registry_path or ".registry/dfns.toml", - refs=[self.ref], - ) - else: - # Load from bootstrap config - config = get_bootstrap_config() - if self.source not in config.sources: - raise ValueError( - f"Unknown source '{self.source}'. " - f"Available sources: {list(config.sources.keys())}" - ) - self._source_config = config.sources[self.source] - return self._source_config - - def _get_registry_cache_path(self) -> Path: - """Get path to cached registry file.""" - cache_dir = get_cache_dir("dfn") - return cache_dir / "registries" / self.source / self.ref / "dfns.toml" - - def _get_files_cache_dir(self) -> Path: - """Get directory for cached DFN files.""" - cache_dir = get_cache_dir("dfn") - return cache_dir / "files" / self.source / self.ref - - def _construct_raw_url(self, path: str) -> str: - """Construct GitHub raw content URL for a file.""" - source_config = self._ensure_source_config() - return f"https://raw.githubusercontent.com/{source_config.repo}/{self.ref}/{path}" - - def _fetch_registry(self, force: bool = False) -> DfnRegistryMeta: - """Fetch registry metadata from remote or cache.""" - cache_path = self._get_registry_cache_path() - - # Use cached registry if available and not forcing refresh - if cache_path.exists() and not force: - return DfnRegistryMeta.load(cache_path) - - # Fetch from remote - source_config = self._ensure_source_config() - registry_url = self._construct_raw_url(source_config.registry_path) - - import urllib.error - import urllib.request - - try: - with urllib.request.urlopen(registry_url, timeout=30) as response: - content = response.read() - except urllib.error.HTTPError as e: - if e.code == 404: - raise DfnRegistryNotFoundError( - f"Registry not found at {registry_url} for '{self.source}@{self.ref}'. " - f"The registry file may not exist for this ref." - ) from e - raise DfnRegistryDiscoveryError( - f"Failed to fetch registry from {registry_url}: {e}" - ) from e - except urllib.error.URLError as e: - raise DfnRegistryDiscoveryError( - f"Network error fetching registry from {registry_url}: {e}" - ) from e - - # Parse and cache - import tomli - - data = tomli.loads(content.decode("utf-8")) - - # Build registry meta from parsed data - files_data = data.pop("files", {}) - files = {} - for filename, file_info in files_data.items(): - if isinstance(file_info, dict): - files[filename] = DfnRegistryFile(**file_info) - elif isinstance(file_info, str): - files[filename] = DfnRegistryFile(hash=file_info) - - metadata = data.pop("metadata", {}) - registry_meta = DfnRegistryMeta( - schema_version=data.get("schema_version", "1.0"), - generated_at=data.get("generated_at"), - devtools_version=data.get("devtools_version"), - ref=metadata.get("ref") or data.get("ref") or self.ref, - files=files, - ) - - # Cache the registry - cache_path.parent.mkdir(parents=True, exist_ok=True) - registry_meta.save(cache_path) - - return registry_meta - - def _ensure_registry_meta(self, force: bool = False) -> DfnRegistryMeta: - """Ensure registry metadata is loaded.""" - if self._registry_meta is None or force: - self._registry_meta = self._fetch_registry(force=force) - return self._registry_meta - - def _setup_pooch(self) -> pooch.Pooch: - """Set up Pooch for DFN file fetching.""" - if self._pooch is not None: - return self._pooch - - import pooch - - registry_meta = self._ensure_registry_meta() - source_config = self._ensure_source_config() - - # Construct base URL for DFN files - base_url = self._construct_raw_url(source_config.dfn_path) + "/" - - # Build registry dict for Pooch (filename -> hash) - pooch_registry = {} - for filename, file_info in registry_meta.files.items(): - # Pooch expects hash without "sha256:" prefix for sha256 - hash_value = file_info.hash - if hash_value.startswith("sha256:"): - hash_value = hash_value[7:] - pooch_registry[filename] = f"sha256:{hash_value}" - - self._files_dir = self._get_files_cache_dir() - self._pooch = pooch.create( - path=self._files_dir, - base_url=base_url, - registry=pooch_registry, - ) - - return self._pooch - - def sync(self, force: bool = False) -> None: + _latest: str | None = PrivateAttr(default=None, init=False) + + def latest_tag(self) -> str: + repo, tag = self.release_id.split("@") + if tag != "latest": + return tag + if self._latest is None: + owner, name = repo.split("/") + with urllib.request.urlopen( + f"https://api.github.com/repos/{owner}/{name}/releases/latest" + ) as resp: + self._latest = json.loads(resp.read())["tag_name"] + return self._latest + + @staticmethod + def base_cache_path() -> Path: """ - Synchronize registry and optionally pre-fetch all DFN files. - - Parameters - ---------- - force : bool, optional - If True, re-fetch registry even if cached. Default is False. + Get the base DFN cache path. On Unix: $XDG_CACHE_HOME/modflow-devtools/dfns, + falling back to ~/.cache/. On Windows: %LOCALAPPDATA%/modflow-devtools/dfns. """ - self._ensure_registry_meta(force=force) - self._setup_pooch() - - @property - def registry_meta(self) -> DfnRegistryMeta: - """Get the registry metadata.""" - return self._ensure_registry_meta() - - @property - def spec(self) -> DfnSpec: - """Load and return the DFN specification from cached files.""" - if self._spec is None: - from modflow_devtools.dfns import DfnSpec - - # Ensure all files are fetched - self._fetch_all_files() - - # Load from cache directory - self._spec = DfnSpec.load(self._get_files_cache_dir()) - return self._spec - - def _fetch_all_files(self) -> None: - """Fetch all DFN files to cache.""" - p = self._setup_pooch() - registry_meta = self._ensure_registry_meta() - - for filename in registry_meta.files: - # Skip non-DFN files (like spec.toml) - if filename.endswith(".dfn") or filename.endswith(".toml"): - p.fetch(filename) - - def get_dfn_path(self, component: str) -> Path: - """Get the local cached file path for a DFN component.""" - p = self._setup_pooch() - registry_meta = self._ensure_registry_meta() - - # Look for both .dfn and .toml extensions - for ext in [".dfn", ".toml"]: - filename = f"{component}{ext}" - if filename in registry_meta.files: - return Path(p.fetch(filename)) - - raise FileNotFoundError( - f"Component '{component}' not found in registry for '{self.source}@{self.ref}'" - ) - - -# ============================================================================= -# Exceptions -# ============================================================================= - - -class DfnRegistryError(Exception): - """Base exception for DFN registry errors.""" - - pass - - -class DfnRegistryNotFoundError(DfnRegistryError): - """Registry file not found for the specified ref.""" + if system() == "Windows": + base = Path(os.environ.get("LOCALAPPDATA", Path.home() / "AppData" / "Local")) + else: + base = Path(os.environ.get("XDG_CACHE_HOME", Path.home() / ".cache")) + return base / "modflow-devtools" / "dfns" + + @staticmethod + def user_config_path() -> Path: + """ + Path to the user overlay configuration file, in which users can override + and/or add to the configuration shipped with the package. - pass + On Unix: $XDG_CONFIG_HOME/modflow-devtools/dfns.toml (default ~/.config/). + On Windows: %APPDATA%/modflow-devtools/dfns.toml. + """ + if system() == "Windows": + base = Path(os.environ.get("APPDATA", Path.home() / "AppData" / "Roaming")) + else: + base = Path(os.environ.get("XDG_CONFIG_HOME", Path.home() / ".config")) + return base / "modflow-devtools" / "dfns.toml" + @classmethod + def from_ids(cls, *ids: str) -> "dict[str, RemoteDfnRegistry]": + """Create registries from one or more DFN source repository release IDs.""" + registries = {} -class DfnRegistryDiscoveryError(DfnRegistryError): - """Error during registry discovery.""" + for id in ids: + registry = cls(release_id=id) - pass + if _auto_sync() and ( + not registry.cache_path.exists() or not any(registry.cache_path.iterdir()) + ): + registry.sync() + registries[id] = registry -# ============================================================================= -# Sync Functions -# ============================================================================= + return registries + @classmethod + def load(cls, path: str | PathLike) -> "dict[str, RemoteDfnRegistry]": + """Load registries from a TOML file of DFN source repository release IDs.""" + path = Path(path) + if not path.exists(): + return {} -def sync_dfns( - source: str = "modflow6", - ref: str | None = None, - force: bool = False, -) -> list[RemoteDfnRegistry]: - """ - Synchronize DFN registries from remote sources. - - Parameters - ---------- - source : str, optional - Source repository name. Default is "modflow6". - ref : str, optional - Specific git ref to sync. If not provided, syncs all configured refs. - force : bool, optional - If True, re-fetch registries even if cached. Default is False. - - Returns - ------- - list[RemoteDfnRegistry] - List of synced registries. - - Examples - -------- - >>> # Sync all configured refs - >>> registries = sync_dfns() - - >>> # Sync specific ref - >>> registries = sync_dfns(ref="6.6.0") - - >>> # Force re-sync - >>> registries = sync_dfns(force=True) - """ - config = get_bootstrap_config() + with path.open("rb") as f: + data = tomli.load(f) - if source not in config.sources: - raise ValueError( - f"Unknown source '{source}'. Available sources: {list(config.sources.keys())}" - ) + registries = {} + for id in data.get("releases", []): + registry = RemoteDfnRegistry(release_id=id) + registries[id] = registry - source_config = config.sources[source] + return registries - # Determine which refs to sync - refs_to_sync = [ref] if ref else source_config.refs + @classmethod + def load_default(cls) -> "dict[str, RemoteDfnRegistry]": + """ + Load registries from remote DFN source repository configuration bundled + with the package, and from a user overlay configuration file if present. + """ + base = RemoteDfnRegistry.load(Path(__file__).parent / "dfns.toml") + if not RemoteDfnRegistry.user_config_path().exists(): + return base - registries = [] - for r in refs_to_sync: - registry = RemoteDfnRegistry(source=source, ref=r) - registry.sync(force=force) - registries.append(registry) + user = RemoteDfnRegistry.load(RemoteDfnRegistry.user_config_path()) + return base | user - return registries + @property + def cache_path(self) -> Path: + repo, _ = self.release_id.split("@") + return RemoteDfnRegistry.base_cache_path() / repo / self.latest_tag() + @property + def spec(self) -> Dfns: + if self._spec is None: + if not self.cache_path.exists() or not any(self.cache_path.iterdir()): + self.sync() + self._spec = Dfns.load(self.cache_path) + return self._spec -def get_sync_status(source: str = "modflow6") -> dict[str, bool]: - """ - Check which refs have cached registries. + def sync(self, force: bool = False) -> None: + """Download and extract DFN files for this release to the local cache.""" - Parameters - ---------- - source : str, optional - Source repository name. Default is "modflow6". + if not force and self.cache_path.exists() and any(self.cache_path.iterdir()): + return - Returns - ------- - dict[str, bool] - Map of ref names to whether they have a cached registry. - """ - config = get_bootstrap_config() + asset_name = "dfns.zip" + repo, _ = self.release_id.split("@") + url = f"https://github.com/{repo}/releases/download/{self.latest_tag()}/{asset_name}" - if source not in config.sources: - raise ValueError( - f"Unknown source '{source}'. Available sources: {list(config.sources.keys())}" - ) + self.cache_path.mkdir(parents=True, exist_ok=True) - source_config = config.sources[source] - cache_dir = get_cache_dir("dfn") + with tempfile.TemporaryDirectory() as tmpdir: + pooch.retrieve( + url=url, + known_hash=None, + path=tmpdir, + fname=asset_name, + processor=pooch.Unzip(extract_dir=str(self.cache_path)), + ) - status = {} - for ref in source_config.refs: - registry_path = cache_dir / "registries" / source / ref / "dfns.toml" - status[ref] = registry_path.exists() + def cached_tag(self) -> str | None: + """ + Return the cached tag for this release without making a network request. - return status + For exact tags, checks the specific cache directory. For ``@latest``, + scans the repo's cache directory and returns the most recently modified + cached tag, or None if nothing is cached. + """ + repo, tag = self.release_id.split("@") + if tag != "latest": + return tag if self.cache_path.exists() and any(self.cache_path.iterdir()) else None + repo_cache = RemoteDfnRegistry.base_cache_path() / repo + if not repo_cache.is_dir(): + return None + tags = [p for p in repo_cache.iterdir() if p.is_dir() and any(p.iterdir())] + if not tags: + return None + return max(tags, key=lambda p: p.stat().st_mtime).name + + def get_path(self, component: str) -> Path: + if not self.cache_path.exists() or not any(self.cache_path.iterdir()): + self.sync() + for ext in [".dfn", ".toml"]: + p = self.cache_path / f"{component}{ext}" + if p.exists(): + return p + raise FileNotFoundError(f"Component '{component}' not found for '{self.release_id}'") -def get_registry( - source: str = "modflow6", - ref: str = "develop", - auto_sync: bool = False, - path: str | PathLike | None = None, -) -> DfnRegistry: +def is_cached(release_id: str) -> bool: """ - Get a registry for the specified source and ref. - - Parameters - ---------- - source : str, optional - Source repository name. Default is "modflow6". - ref : str, optional - Git ref (branch, tag, or commit hash). Default is "develop". - auto_sync : bool, optional - If True and registry is not cached, automatically sync. Default is False - (opt-in while experimental). Can be enabled via MODFLOW_DEVTOOLS_AUTO_SYNC - environment variable (set to "1", "true", or "yes"). - Ignored when path is provided. - path : str or PathLike, optional - Path to a local directory containing DFN files. If provided, returns - a LocalDfnRegistry for autodiscovery instead of RemoteDfnRegistry. - When using a local path, source and ref are used for metadata only. - - Returns - ------- - DfnRegistry - Registry for the specified source and ref. Returns LocalDfnRegistry - if path is provided, otherwise RemoteDfnRegistry. - - Examples - -------- - >>> # Remote registry (existing behavior) - >>> registry = get_registry(ref="6.6.0") - >>> dfn = registry.get_dfn("gwf-chd") - - >>> # Local registry with autodiscovery (NEW) - >>> registry = get_registry(path="/path/to/mf6/doc/mf6io/mf6ivar/dfn") - >>> dfn = registry.get_dfn("gwf-chd") + Check whether a remote DFN source repository's release is in the cache. """ - # If path is provided, return LocalDfnRegistry for autodiscovery - if path is not None: - return LocalDfnRegistry(path=Path(path), source=source, ref=ref) - - # Check for auto-sync opt-in (experimental - off by default) - if os.environ.get("MODFLOW_DEVTOOLS_AUTO_SYNC", "").lower() in ("1", "true", "yes"): - auto_sync = True - - registry = RemoteDfnRegistry(source=source, ref=ref) - - # Check if registry is cached - cache_path = registry._get_registry_cache_path() - if not cache_path.exists() and auto_sync: - registry.sync() - - return registry + registry = RemoteDfnRegistry(release_id=release_id) + cache_dir = registry.cache_path + return any(cache_dir.iterdir()) if cache_dir.is_dir() else False diff --git a/modflow_devtools/dfns/schema.py b/modflow_devtools/dfns/schema.py new file mode 100644 index 00000000..56627227 --- /dev/null +++ b/modflow_devtools/dfns/schema.py @@ -0,0 +1,913 @@ +import ast +import re +from collections.abc import Mapping +from os import PathLike +from pathlib import Path +from typing import Annotated, Any, Literal + +import tomli +from pydantic import ( + BaseModel, + SerializationInfo, + computed_field, + model_serializer, + model_validator, +) +from pydantic import ( + Field as PydanticField, +) + + +class FieldBase(BaseModel): + name: str + longname: str | None = None + description: str | None = None + optional: bool = False + default: Any | None = None + developmode: bool = False + netcdf: bool = False + tagged: bool = True + + @model_serializer(mode="wrap") + def _serialize(self, handler: Any, info: SerializationInfo) -> dict[str, Any]: + data = handler(self) + if info.context and info.context.get("strip_names"): + data.pop("name", None) + # `type` has a frozen default so exclude_defaults=True drops it; restore it. + if "type" not in data and "type" in type(self).model_fields: + data = {"type": getattr(self, "type"), **data} + return data + + def dump(self, *, strip_names: bool = True, **kwargs) -> dict[str, Any]: + if strip_names: + kwargs["context"] = {**(kwargs.get("context") or {}), "strip_names": True} + return self.model_dump(**kwargs) + + def dump_json(self, *, strip_names: bool = True, **kwargs) -> str: + if strip_names: + kwargs["context"] = {**(kwargs.get("context") or {}), "strip_names": True} + return self.model_dump_json(**kwargs) + + @classmethod + def from_dict(cls, d: dict, name: str | None = None, strict: bool = False) -> "FieldBase": + if name is not None: + d = {"name": name, **d} + type_name = d.get("type") + type_map: dict[str | None, type[FieldBase]] = { + "keyword": Keyword, + "string": String, + "integer": Integer, + "double": Double, + "file": File, + "array": Array, + "record": Record, + "union": Union, + "list": List, + } + type_ = type_map.get(type_name) + if type_ is None: + raise ValueError(f"Unknown or missing field type: {type_name!r}") + if strict: + extra = set(d.keys()) - set(type_.model_fields.keys()) + if extra: + raise ValueError(f"Unrecognized keys in field data: {extra}") + return type_.model_validate(d) + + +class Keyword(FieldBase): + type: Literal["keyword"] = PydanticField(default="keyword", frozen=True) + + +class String(FieldBase): + type: Literal["string"] = PydanticField(default="string", frozen=True) + valid: list[str] | None = None + case_sensitive: bool = False + time_series: bool = False + pk: bool = False + fk: str | None = None + fk_ref: str | None = None + + +class Integer(FieldBase): + type: Literal["integer"] = PydanticField(default="integer", frozen=True) + valid: list[int] | None = None + time_series: bool = False + pk: bool = False + fk: str | None = None + fk_ref: str | None = None + + +class Double(FieldBase): + type: Literal["double"] = PydanticField(default="double", frozen=True) + time_series: bool = False + + +class File(FieldBase): + type: Literal["file"] = PydanticField(default="file", frozen=True) + mode: Literal["filein", "fileout"] + + +Scalar = Annotated[ + Keyword | String | Integer | Double | File, + PydanticField(discriminator="type"), +] + + +class Array(FieldBase): + type: Literal["array"] = PydanticField(default="array", frozen=True) + dtype: Literal["keyword", "integer", "double", "string"] + shape: list[str] = [] + time_series: bool = False + repeat: str | None = None + + +class Record(FieldBase): + type: Literal["record"] = PydanticField(default="record", frozen=True) + fields: "dict[str, Scalar | Array | Record | Union]" = PydanticField(default_factory=dict) + + @property + def children(self) -> "dict[str, Field]": + return self.fields # type: ignore[return-value] + + +class Union(FieldBase): + type: Literal["union"] = PydanticField(default="union", frozen=True) + arms: "dict[str, Scalar | Array | Record]" = PydanticField(default_factory=dict) + + @property + def children(self) -> "dict[str, Field]": + return self.arms # type: ignore[return-value] + + +class List(FieldBase): + type: Literal["list"] = PydanticField(default="list", frozen=True) + item: "Record | Union" + shape: list[str] = [] + + @model_serializer(mode="wrap") + def _serialize(self, handler: Any, info: SerializationInfo) -> dict[str, Any]: + data = handler(self) + if info.context and info.context.get("strip_names"): + data.pop("name", None) + if "type" not in data: + data = {"type": "list", **data} + # item is stored as an attribute, not a dict key, so its name is never + # implicit — always re-inject it regardless of strip_names. + if "item" in data and isinstance(data["item"], dict): + data["item"] = {"name": self.item.name, **data["item"]} + return data + + @model_validator(mode="after") + def _check_shape_length(self) -> "List": + if len(self.shape) > 1: + raise ValueError( + f"List {self.name!r}: shape must have at most one element " + f"(lists are 1-dimensional), got {self.shape!r}" + ) + return self + + @property + def children(self) -> "dict[str, Field]": + return {"item": self.item} # type: ignore[return-value] + + +Field = Annotated[ + Keyword | String | Integer | Double | File | Array | Record | Union | List, + PydanticField(discriminator="type"), +] + + +Record.model_rebuild() +Union.model_rebuild() +List.model_rebuild() + + +def _names_in_expr(expr: str) -> set[str]: + """Return Name identifiers from expr, excluding those inside sum() calls.""" + try: + tree = ast.parse(expr, mode="eval") + except SyntaxError as e: + raise ValueError(f"Invalid expression {expr!r}: {e}") from e + + sum_interior_ids: set[int] = set() + for node in ast.walk(tree): + if isinstance(node, ast.Call) and isinstance(node.func, ast.Name) and node.func.id == "sum": + for child in ast.walk(node): + if child is not node: + sum_interior_ids.add(id(child)) + + return { + node.id + for node in ast.walk(tree) + if isinstance(node, ast.Name) and id(node) not in sum_interior_ids + } + + +def _validate_sum_call(call: ast.Call, component: "ComponentBase", expr: str) -> None: + """Validate a sum(list.col) or sum(block.list.col) call in a derived_dims expression.""" + if len(call.args) != 1: + raise ValueError(f"sum() in derived_dims must have exactly one argument in {expr!r}") + arg = call.args[0] + if not isinstance(arg, ast.Attribute): + raise ValueError(f"sum() argument must be an attribute expression in {expr!r}") + + col_name = arg.attr + if isinstance(arg.value, ast.Name): + list_name = arg.value.id + block_qualifier: str | None = None + elif isinstance(arg.value, ast.Attribute) and isinstance(arg.value.value, ast.Name): + block_qualifier = arg.value.value.id + list_name = arg.value.attr + else: + raise ValueError(f"Unrecognised sum() form in {expr!r}") + + found_block: str | None = None + found_list: List | None = None + for block_name, block in (component.blocks or {}).items(): + f = block.fields.get(list_name) + if isinstance(f, List): + found_block = block_name + found_list = f + break + + if found_list is None: + raise ValueError(f"sum() references unknown list field {list_name!r} in {expr!r}") + if block_qualifier is not None and block_qualifier != found_block: + raise ValueError( + f"sum() block qualifier {block_qualifier!r} does not match " + f"actual block {found_block!r} in {expr!r}" + ) + + item = found_list.item + item_fields: dict = item.fields if isinstance(item, Record) else item.arms + col_field = item_fields.get(col_name) + if col_field is None: + raise ValueError( + f"sum() column {col_name!r} not found in {list_name!r} item fields in {expr!r}" + ) + if not isinstance(col_field, Integer): + raise ValueError( + f"sum() column {col_name!r} is {type(col_field).__name__}, must be Integer in {expr!r}" + ) + + +class Dim(BaseModel): + """A named dimension, either backed by a field or derived from an expression.""" + + field: str | None = None # name of the field that provides this dimension + expr: str | None = None # derivation expression, e.g. "nlay * nrow * ncol" + scope: Literal["component", "model", "simulation"] = "component" + + @model_validator(mode="after") + def _check_exclusive(self) -> "Dim": + if (self.field is None) == (self.expr is None): + raise ValueError("Dim must have exactly one of 'field' or 'expr'") + return self + + @property + def is_derived(self) -> bool: + return self.expr is not None + + +def _parents_as_set(parent: "str | list[str] | None") -> set[str]: + if parent is None: + return set() + return {parent} if isinstance(parent, str) else set(parent) + + +def _can_share_model( + req_parent: "str | list[str] | None", + dim_parent: "str | list[str] | None", +) -> bool: + """ + Return True if the requesting component (req_parent) can be in the same + model as the dim-defining component (dim_parent). + + The dim-provider's parent identifies which model it belongs to (a concrete + ``-nam`` name, e.g. ``"gwf-nam"``). The requesting component's parent + determines whether it can be in that model: an explicit match, or a generic + type like ``"model"`` or ``"package"`` meaning any model. + """ + dim_parents = _parents_as_set(dim_parent) + model_contexts = {p for p in dim_parents if p.endswith("-nam") and p != "sim-nam"} + if not model_contexts: + return False + + req_parents = _parents_as_set(req_parent) + for rp in req_parents: + if rp in ("model", "package", "*"): + return True + if rp in model_contexts: + return True + return False + + +def _resolve_derived_dims(component: "ComponentBase", known_dims: set[str]) -> list[str]: + """ + Validate derived dims expressions and return their names in topological order. + Raises ValueError on cycles or unresolvable operands. + + ``known_dims`` is the full set of dim names visible to this component; + pass ``spec.dims(name)`` or an explicit set in tests. + """ + derived = {n: d for n, d in (component.dims or {}).items() if d.is_derived} + if not derived: + return [] + + derived_names = set(derived.keys()) + deps: dict[str, set[str]] = {} + + for name, dim_def in derived.items(): + expr = dim_def.expr + assert expr is not None + try: + tree = ast.parse(expr, mode="eval") + except SyntaxError as e: + raise ValueError(f"Invalid dims {name!r}: {expr!r}: {e}") from e + + for node in ast.walk(tree): + if ( + isinstance(node, ast.Call) + and isinstance(node.func, ast.Name) + and node.func.id == "sum" + ): + _validate_sum_call(node, component, expr) + + operands = _names_in_expr(expr) + for op in operands: + if op not in known_dims and op not in derived_names: + raise ValueError(f"dims {name!r} operand {op!r} is not a known dimension") + deps[name] = operands & derived_names + + in_degree = dict.fromkeys(derived_names, 0) + dependents: dict[str, set[str]] = {n: set() for n in derived_names} + for name, dep_set in deps.items(): + for dep in dep_set: + in_degree[name] += 1 + dependents[dep].add(name) + + queue = [n for n, d in in_degree.items() if d == 0] + order: list[str] = [] + while queue: + n = queue.pop(0) + order.append(n) + for dependent in dependents[n]: + in_degree[dependent] -= 1 + if in_degree[dependent] == 0: + queue.append(dependent) + + if len(order) != len(derived_names): + cyclic = {n for n, d in in_degree.items() if d > 0} + raise ValueError(f"Cycle in dims: {cyclic}") + + return order + + +class Block(BaseModel): + name: str + fields: dict[str, Field] + repeats: bool = False + + @model_serializer(mode="wrap") + def _serialize(self, handler: Any) -> dict[str, Any]: + data = handler(self) + data.pop("name", None) # name is the dict key in ComponentBase.blocks + return data + + def dump(self, *, strip_names: bool = True, **kwargs) -> dict[str, Any]: + if strip_names: + kwargs["context"] = {**(kwargs.get("context") or {}), "strip_names": True} + return self.model_dump(**kwargs) + + def dump_json(self, *, strip_names: bool = True, **kwargs) -> str: + if strip_names: + kwargs["context"] = {**(kwargs.get("context") or {}), "strip_names": True} + return self.model_dump_json(**kwargs) + + @property + def optional(self) -> bool: + return all(f.optional for f in self.fields.values()) + + +Blocks = Mapping[str, Block] + + +class ComponentBase(BaseModel): + schema_version: str | None = None + name: str + parent: str | list[str] | None = None + dims: dict[str, Dim] | None = None + blocks: dict[str, Block] | None = None + + @model_serializer(mode="wrap") + def _serialize(self, handler: Any) -> dict[str, Any]: + data = handler(self) + if "type" not in data: + data = {"type": getattr(self, "type"), **data} + return data + + +class Simulation(ComponentBase): + type: Literal["simulation"] = "simulation" + + +class Model(ComponentBase): + type: Literal["model"] = "model" + solution: Literal["ims", "ems", "sln-ims", "sln-ems"] | None = None + + +class Package(ComponentBase): + type: Literal["package"] = "package" + multi: bool = False # whether multiple instances per parent are allowed + subtype: Literal["solution", "exchange", "stress", "advanced", "utility"] | None = None + + +Component = Annotated[ + Simulation | Model | Package, + PydanticField(discriminator="type"), +] + +_DIM_RE = re.compile(r"^[A-Za-z_]\w*$") +_LOOKUP_RE = re.compile(r"^(?:([\w-]+)\.)?(\w+)\.(\w+)\((\w+)\)$") +_BOUND_RE = re.compile(r"^[<>]=?") +_ARITH_RE = re.compile(r"^([A-Za-z_]\w*)\s*[+-]\s*\d+$") + + +def _find_list_in_block(component: "ComponentBase", block_name: str) -> "List | None": + """Return the first List field in the named block, or None.""" + block = (component.blocks or {}).get(block_name) + if block is None: + return None + for f in block.fields.values(): + if isinstance(f, List): + return f + return None + + +def _validate_shape_element( + element: str, + array_field: "Array", + component: "ComponentBase", + enclosing_record: "Record | None", + known_dims: set[str], + spec: "Dfns | None" = None, +) -> None: + """ + Validate one element of an Array.shape list. + + Valid forms: + - Dim reference ``^[A-Za-z_]\\w*$`` + Must resolve in the 3-level scope: explicit → derived → grid dims. + - Row-level column lookup ``^(\\w+)\\.(\\w+)\\((\\w+)\\)$`` + Structural checks (see plan §Shape element parsing). + + Raises ValueError on any violation. + """ + # Advisory bound annotation prefix (<, >, <=, >=): strip it and validate the core identifier. + if bound_m := _BOUND_RE.match(element): + core = element[bound_m.end() :] + if not _DIM_RE.fullmatch(core): + raise ValueError( + f"Array {array_field.name!r} has invalid shape element {element!r}: " + f"must be a plain identifier after the bound operator" + ) + if core in known_dims: + return + if enclosing_record is not None: + sibling = enclosing_record.fields.get(core) + if isinstance(sibling, Integer): + return + raise ValueError( + f"Array {array_field.name!r} shape element {element!r}: " + f"{core!r} does not resolve to a known dim (explicit, derived, or grid)" + ) + + if _DIM_RE.fullmatch(element): + if element in known_dims: + return + # Per-row varying shape: a sibling Integer with dimension="record" supplies + # an inline count on the same line. + if enclosing_record is not None: + sibling = enclosing_record.fields.get(element) + if isinstance(sibling, Integer): + return + raise ValueError( + f"Array {array_field.name!r} shape element {element!r} " + f"does not resolve to a known dim " + f"(explicit, derived, or grid)" + ) + + if m := _LOOKUP_RE.fullmatch(element): + component_ref, block_name, col_name, fk_field_name = m.groups() + + # Check 5: array must be a subfield of a record, not a top-level block field + if enclosing_record is None: + raise ValueError( + f"Array {array_field.name!r} shape element {element!r} is a " + f"row-level lookup but the array is not inside a record" + ) + + # Resolve target component (cross-component reference or local) + if component_ref is not None: + if spec is None: + raise ValueError( + f"Array {array_field.name!r} shape element {element!r}: " + f"cross-component reference requires a Dfns spec" + ) + target = spec.components.get(component_ref) + if target is None: + raise ValueError( + f"Array {array_field.name!r} shape element {element!r}: " + f"component {component_ref!r} not found in spec" + ) + else: + target = component # type: ignore + + # Check 1: block_name must identify a list block in the target component + list_field = _find_list_in_block(target, block_name) # type: ignore + if list_field is None: + where = f"component {component_ref!r}" if component_ref else "this component" + raise ValueError( + f"Array {array_field.name!r} shape element {element!r}: " + f"{block_name!r} is not a list block in {where}" + ) + + # Check 2: col_name must be an Integer field in the list's item record + item = list_field.item + item_fields: dict = item.fields if isinstance(item, Record) else item.arms + col_field = item_fields.get(col_name) + if col_field is None: + raise ValueError( + f"Array {array_field.name!r} shape element {element!r}: " + f"{col_name!r} is not a field in {list_field.name!r} item" + ) + if not isinstance(col_field, Integer): + raise ValueError( + f"Array {array_field.name!r} shape element {element!r}: " + f"{col_name!r} is {type(col_field).__name__}, must be Integer" + ) + + # Check 3: fk_field_name must be a sibling field in the enclosing record + fk_field = enclosing_record.fields.get(fk_field_name) + if fk_field is None: + raise ValueError( + f"Array {array_field.name!r} shape element {element!r}: " + f"{fk_field_name!r} is not a sibling field in the enclosing record" + ) + + # Check 4: fk_field.fk must be set and its block portion must match block_name + fk = getattr(fk_field, "fk", None) + if fk is None: + raise ValueError( + f"Array {array_field.name!r} shape element {element!r}: " + f"{fk_field_name!r}.fk is not set" + ) + fk_block = fk.split(".")[0] if "." in fk else fk + if fk_block != block_name: + raise ValueError( + f"Array {array_field.name!r} shape element {element!r}: " + f"{fk_field_name!r}.fk = {fk!r} does not reference block {block_name!r}" + ) + return + + if m := _ARITH_RE.fullmatch(element): + # Arithmetic offset: `dim [+-] integer` — validate the dim part only. + dim_name = m.group(1) + if dim_name in known_dims: + return + if enclosing_record is not None: + sibling = enclosing_record.fields.get(dim_name) + if isinstance(sibling, Integer): + return + raise ValueError( + f"Array {array_field.name!r} shape element {element!r}: " + f"{dim_name!r} does not resolve to a known dim " + f"(explicit, derived, or grid)" + ) + + raise ValueError( + f"Array {array_field.name!r} has invalid shape element {element!r}: " + f"must be a dim reference (^[A-Za-z_]\\w*$), an arithmetic offset " + f"(dim [+-] integer), or a row-level lookup (block.column(fk_field))" + ) + + +def _validate_list_shape_element( + element: str, + list_field: "List", + known_dims: set[str], +) -> None: + """ + Validate one element of a List.shape. + + Valid forms are a strict subset of array shape forms — no row-level lookup + and no intra-record sibling reference, since lists are not inside records: + - Plain dim reference + - Bound-annotated dim reference (<, >, <=, >=) + - Arithmetic offset (dim [+-] integer) + """ + if bound_m := _BOUND_RE.match(element): + core = element[bound_m.end() :] + if not _DIM_RE.fullmatch(core): + raise ValueError( + f"List {list_field.name!r} has invalid shape element {element!r}: " + f"must be a plain identifier after the bound operator" + ) + if core not in known_dims: + raise ValueError( + f"List {list_field.name!r} shape element {element!r}: " + f"{core!r} does not resolve to a known dim" + ) + return + + if _DIM_RE.fullmatch(element): + if element not in known_dims: + raise ValueError( + f"List {list_field.name!r} shape element {element!r} " + f"does not resolve to a known dim" + ) + return + + if m := _ARITH_RE.fullmatch(element): + dim_name = m.group(1) + if dim_name not in known_dims: + raise ValueError( + f"List {list_field.name!r} shape element {element!r}: " + f"{dim_name!r} does not resolve to a known dim" + ) + return + + raise ValueError( + f"List {list_field.name!r} has invalid shape element {element!r}: " + f"must be a dim reference (^[A-Za-z_]\\w*$), an arithmetic offset " + f"(dim [+-] integer), or a bound-annotated dim (/>=dim)" + ) + + +def _validate_fk_fields(component: "ComponentBase", spec: "Dfns") -> None: + """ + For every Integer/String field with fk or fk_ref set, validate structural + consistency: + - fk must reference a list block in this component, and that list's item + must have at least one pk=True field. + - fk_ref must name a component that exists in the spec. + """ + if not component.blocks: + return + + def _check_fields(fields: dict) -> None: + for field in fields.values(): + fk: str | None = getattr(field, "fk", None) + fk_ref: str | None = getattr(field, "fk_ref", None) + + if fk is not None: + block_name = fk.split(".")[0] if "." in fk else fk + list_field = _find_list_in_block(component, block_name) + if list_field is None: + raise ValueError( + f"Field {field.name!r} fk={fk!r}: " + f"{block_name!r} is not a list block in this component" + ) + item = list_field.item + item_fields: dict = item.fields if isinstance(item, Record) else item.arms + has_pk = any(getattr(f, "pk", False) for f in item_fields.values()) + if not has_pk: + raise ValueError( + f"Field {field.name!r} fk={fk!r}: " + f"list {list_field.name!r} item has no pk=True field" + ) + + if fk_ref is not None and fk_ref not in spec.components: + raise ValueError( + f"Field {field.name!r} fk_ref={fk_ref!r}: " + f"component {fk_ref!r} not found in spec" + ) + + if isinstance(field, Record): + _check_fields(field.fields) + elif isinstance(field, Union): + _check_fields(field.arms) + elif isinstance(field, List): + item = field.item + if isinstance(item, Record): + _check_fields(item.fields) + + for block in component.blocks.values(): + _check_fields(block.fields) + + +def _validate_array_shapes( + component: "ComponentBase", + component_name: str, + spec: "Dfns", +) -> None: + """ + Validate all Array.shape elements in a component. + + Arrays are found at three nesting levels: + - Top-level block fields (no enclosing record) + - Fields within a top-level Record (enclosing_record = the Record) + - Fields within a List item Record (enclosing_record = the item Record) + """ + if not component.blocks: + return + + known_dims = spec.dims(component_name) + + def _check_list(lst: "List") -> None: + for elem in lst.shape: + _validate_list_shape_element(elem, lst, known_dims) + + def _check_array(arr: "Array", enclosing: "Record | None") -> None: + if not arr.shape: + # Self-sizing (shape=[]) is valid at the top level and as the rightmost + # subfield of a record. The only invalid case is non-rightmost in a record: + # subsequent fields on the same line would be unreadable. + if enclosing is not None: + fields_list = list(enclosing.fields.keys()) + if not fields_list or fields_list[-1] != arr.name: + raise ValueError( + f"Array {arr.name!r}: only the rightmost field in a record may " + f"have an undeclared shape (self-sizing)" + ) + return # self-sizing: nothing to validate + for elem in arr.shape: + _validate_shape_element(elem, arr, component, enclosing, known_dims, spec) + + for block in component.blocks.values(): + for field in block.fields.values(): + if isinstance(field, Array): + _check_array(field, None) + + elif isinstance(field, Record): + for subfield in field.fields.values(): + if isinstance(subfield, Array): + _check_array(subfield, field) + + elif isinstance(field, List): + if field.shape: + _check_list(field) + item = field.item + if isinstance(item, Record): + for subfield in item.fields.values(): + if isinstance(subfield, Array): + _check_array(subfield, item) + + +def _inject_field_names(fields: dict) -> None: + """Recursively inject name from dict key into field dicts.""" + for field_name, field in fields.items(): + field.setdefault("name", field_name) + _inject_field_names(field.get("fields") or {}) # Record.fields + _inject_field_names(field.get("arms") or {}) # Union.arms + item = field.get("item") + if isinstance(item, dict): + # List.item.name is re-injected during serialization; recurse into its children. + _inject_field_names(item.get("fields") or {}) + _inject_field_names(item.get("arms") or {}) + + +def _inject_names(comp_data: dict) -> None: + """Inject block and field names from dict keys before Pydantic validation.""" + for block_name, block in (comp_data.get("blocks") or {}).items(): + block.setdefault("name", block_name) + _inject_field_names(block.get("fields") or {}) + + +class Dfns(BaseModel): + """A set of component definitions.""" + + components: dict[str, Component] = PydanticField(default_factory=dict) + + @computed_field # type: ignore[prop-decorator] + @property + def schema_version(self) -> str: + for c in self.components.values(): + if c.schema_version is not None: + return c.schema_version + return "2" + + @property + def root(self) -> "Simulation | None": + """The root (simulation) component, or None if not present.""" + for c in self.components.values(): + if isinstance(c, Simulation): + return c + return None + + def children(self, name: str) -> "dict[str, Component]": + """Components whose parent matches ``name``.""" + return {n: c for n, c in self.components.items() if c.parent == name} + + def local_dims(self, component_name: str) -> set[str]: + """Dim names declared in this component's dims section.""" + return set((self.components[component_name].dims or {}).keys()) + + def inherited_dims(self, component_name: str) -> set[str]: + """ + Dim names visible to ``component_name`` from other components. + + - ``"simulation"`` scope: always visible. + - ``"model"`` scope: visible when the requesting component can share a + model with the dim-defining component, determined purely from parent + attributes (no hardcoded model-type strings). + - ``"component"`` scope: visible when the dim-defining component is + explicitly listed as a parent of the requesting component (subpackage). + """ + inherited: set[str] = set() + component = self.components[component_name] + req_parent = component.parent + for cname, c in self.components.items(): + if cname == component_name: + continue + for dim_name, dim in (c.dims or {}).items(): + match dim.scope: + case "simulation": + inherited.add(dim_name) + case "model": + if _can_share_model(req_parent, c.parent): + inherited.add(dim_name) + case "component": + if cname in _parents_as_set(req_parent): + inherited.add(dim_name) + return inherited + + def dims(self, component_name: str) -> set[str]: + """ + Return all dim names visible to ``component_name`` for shape resolution. + + This is the union of the component's own declared dims (field-backed and + derived) and any dims inherited from other components via scoping rules. + """ + return self.local_dims(component_name) | self.inherited_dims(component_name) + + @model_validator(mode="after") + def _validate_schema_version(self) -> "Dfns": + versions = { + c.schema_version for c in self.components.values() if c.schema_version is not None + } + if len(versions) > 1: + raise ValueError( + f"All components must share the same schema_version; " + f"found: {sorted(str(v) for v in versions)}" + ) + return self + + @model_validator(mode="after") + def _validate_relations(self) -> "Dfns": + for name, component in self.components.items(): + if component.dims and any(d.is_derived for d in component.dims.values()): + _resolve_derived_dims(component, self.dims(name)) + for name, component in self.components.items(): + _validate_fk_fields(component, self) + for name, component in self.components.items(): + _validate_array_shapes(component, name, self) + return self + + @classmethod + def load(cls, path: str | PathLike) -> "Dfns": + """Load a directory of definition files.""" + import json + + import yaml + + from modflow_devtools.dfn import schema as v1 + from modflow_devtools.dfns.mapper import map as map_v2 + + dfns: dict = {} + path = Path(path).expanduser().resolve() + _EXCLUDE = {"common", "flopy"} + + dfn_paths = {p.stem: p for p in path.glob("*.dfn") if p.stem not in _EXCLUDE} + toml_paths = {p.stem: p for p in path.glob("*.toml") if p.stem not in _EXCLUDE} + yaml_paths = { + p.stem: p + for ext in ("*.yaml", "*.yml") + for p in path.glob(ext) + if p.stem not in _EXCLUDE + } + json_paths = {p.stem: p for p in path.glob("*.json") if p.stem not in _EXCLUDE} + + if dfn_paths: + dfns = v1.resolve_parents(v1.load_all(path)) + dfns = {n: map_v2(d) for n, d in dfns.items()} + elif toml_paths: + for toml_path in toml_paths.values(): + with toml_path.open("rb") as f: + dfn = tomli.load(f) + _inject_names(dfn) + dfns[dfn["name"]] = dfn + elif yaml_paths: + for yaml_path in yaml_paths.values(): + with yaml_path.open() as f: + dfn = yaml.safe_load(f) + _inject_names(dfn) + dfns[dfn["name"]] = dfn + elif json_paths: + for json_path in json_paths.values(): + with json_path.open() as f: + dfn = json.load(f) + _inject_names(dfn) + dfns[dfn["name"]] = dfn + + return cls(components=dfns) diff --git a/modflow_devtools/dfns/schema/block.py b/modflow_devtools/dfns/schema/block.py deleted file mode 100644 index b545a311..00000000 --- a/modflow_devtools/dfns/schema/block.py +++ /dev/null @@ -1,20 +0,0 @@ -from collections.abc import Mapping - -from modflow_devtools.dfns.schema.field import Fields - -Block = Fields -Blocks = Mapping[str, Block] - - -def block_sort_key(item) -> int: - k, _ = item - if k == "options": - return 0 - elif k == "dimensions": - return 1 - elif k == "griddata": - return 2 - elif "period" in k: - return 4 - else: - return 3 diff --git a/modflow_devtools/dfns/schema/field.py b/modflow_devtools/dfns/schema/field.py deleted file mode 100644 index 985df211..00000000 --- a/modflow_devtools/dfns/schema/field.py +++ /dev/null @@ -1,22 +0,0 @@ -from collections.abc import Mapping -from dataclasses import dataclass -from typing import Any - -Fields = Mapping[str, "Field"] - - -@dataclass(kw_only=True) -class Field: - name: str - type: str | None = None - block: str | None = None - default: Any | None = None - longname: str | None = None - description: str | None = None - children: Fields | None = None - optional: bool = False - developmode: bool = False - shape: str | None = None - valid: tuple[str, ...] | None = None - netcdf: bool = False - tagged: bool = False diff --git a/modflow_devtools/dfns/schema/v1.py b/modflow_devtools/dfns/schema/v1.py deleted file mode 100644 index 5722771c..00000000 --- a/modflow_devtools/dfns/schema/v1.py +++ /dev/null @@ -1,60 +0,0 @@ -from dataclasses import dataclass -from typing import Literal - -from modflow_devtools.dfns.schema.field import Field - -FieldType = Literal[ - "keyword", - "integer", - "double precision", - "string", - "record", - "recarray", - "keystring", -] - -SCALAR_TYPES = ("keyword", "integer", "double precision", "string") - - -Reader = Literal[ - "urword", - "u1ddbl", - "u2ddbl", - "readarray", -] - - -@dataclass(kw_only=True) -class FieldV1(Field): - valid: tuple[str, ...] | None = None - reader: Reader = "urword" - tagged: bool = False - in_record: bool = False - layered: bool | None = None - preserve_case: bool = False - numeric_index: bool = False - deprecated: bool = False - removed: bool = False - mf6internal: str | None = None - block_variable: bool = False - just_data: bool = False - time_series: bool = False - - @classmethod - def from_dict(cls, d: dict, strict: bool = False) -> "FieldV1": - """ - Create a FieldV1 instance from a dictionary. - - Parameters - ---------- - d : dict - Dictionary containing field data - strict : bool, optional - If True, raise ValueError if dict contains unrecognized keys. - If False (default), ignore unrecognized keys. - """ - keys = set(list(cls.__annotations__.keys()) + list(Field.__annotations__.keys())) - if strict: - if extra_keys := set(d.keys()) - keys: - raise ValueError(f"Unrecognized keys in field data: {extra_keys}") - return cls(**{k: v for k, v in d.items() if k in keys}) diff --git a/modflow_devtools/dfns/schema/v2.py b/modflow_devtools/dfns/schema/v2.py deleted file mode 100644 index bf676e16..00000000 --- a/modflow_devtools/dfns/schema/v2.py +++ /dev/null @@ -1,32 +0,0 @@ -from dataclasses import dataclass -from typing import Literal - -from modflow_devtools.dfns.schema.field import Field - -FieldType = Literal["keyword", "integer", "double", "string", "record", "array", "list"] - -SCALAR_TYPES = ("keyword", "integer", "double", "string") - - -@dataclass(kw_only=True) -class FieldV2(Field): - pass - - @classmethod - def from_dict(cls, d: dict, strict: bool = False) -> "FieldV2": - """ - Create a FieldV2 instance from a dictionary. - - Parameters - ---------- - d : dict - Dictionary containing field data - strict : bool, optional - If True, raise ValueError if dict contains unrecognized keys. - If False (default), ignore unrecognized keys. - """ - keys = set(list(cls.__annotations__.keys()) + list(Field.__annotations__.keys())) - if strict: - if extra_keys := set(d.keys()) - keys: - raise ValueError(f"Unrecognized keys in field data: {extra_keys}") - return cls(**{k: v for k, v in d.items() if k in keys}) diff --git a/modflow_devtools/imports.py b/modflow_devtools/imports.py index 3ebcb42e..da39e8cc 100644 --- a/modflow_devtools/imports.py +++ b/modflow_devtools/imports.py @@ -33,8 +33,6 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from __future__ import annotations - import importlib import sys import types diff --git a/modflow_devtools/programs/__init__.py b/modflow_devtools/programs/__init__.py index 1de99248..19f04399 100644 --- a/modflow_devtools/programs/__init__.py +++ b/modflow_devtools/programs/__init__.py @@ -3,7 +3,7 @@ import shutil import warnings from dataclasses import dataclass, field -from datetime import datetime +from datetime import UTC, datetime from os import PathLike from pathlib import Path @@ -1416,7 +1416,6 @@ def install( If installation fails """ import shutil - from datetime import timezone # 1. Load config and find program in registries config = self.config @@ -1576,7 +1575,7 @@ def install( version=version, platform=platform, bindir=bindir, - installed_at=datetime.now(timezone.utc), + installed_at=datetime.now(UTC), source=source_info, executables=[exe_name], ) diff --git a/pyproject.toml b/pyproject.toml index 8e621884..afc551d4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,13 +29,13 @@ classifiers = [ "License :: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication", "Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", "Topic :: Scientific/Engineering :: Hydrology" ] -requires-python = ">=3.10" +requires-python = ">=3.11" dynamic = ["version"] [project.optional-dependencies] @@ -73,6 +73,7 @@ docs = [ dfn = [ "boltons", "pooch", + "pyaml", "pydantic", "tomli", "tomli-w" @@ -82,6 +83,7 @@ models = [ "boltons", "filelock", "pooch", + "pyaml", "pydantic", "tomli", "tomli-w" @@ -125,6 +127,7 @@ docs = [ dfn = [ "boltons", "pooch", + "pyaml", "pydantic", "tomli", "tomli-w" @@ -134,6 +137,7 @@ models = [ "boltons", "filelock", "pooch", + "pyaml", "pydantic", "tomli", "tomli-w"