PLAID-lib
diff --git a/‎docs/source/tutorials/downloadable_example.md‎
Lines changed: 7 additions & 1 deletion b/‎docs/source/tutorials/downloadable_example.md‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎examples/downloadable_example/__init__.py‎ b/‎examples/downloadable_example/__init__.py‎
diff --git a/‎examples/downloadable_example/sample_example.py‎
Lines changed: 0 additions & 49 deletions b/‎examples/downloadable_example/sample_example.py‎
Lines changed: 0 additions & 49 deletions
diff --git a/‎tests/cli/test_plaidcheck.py‎
Lines changed: 49 additions & 0 deletions b/‎tests/cli/test_plaidcheck.py‎
Lines changed: 49 additions & 0 deletions
diff --git a/‎tests/storage/test_common_writer.py‎
Lines changed: 65 additions & 0 deletions b/‎tests/storage/test_common_writer.py‎
Lines changed: 65 additions & 0 deletions
@@ -4,6 +4,8 @@ title: Downloadable samples
 
 # Downloadable samples
 
+## First retrieval
+
 Retrieving sample examples is as easy as:
 
 ```python
@@ -13,4 +15,8 @@ print(AVAILABLE_EXAMPLES)
 print("samples.vki_ls59:", samples.vki_ls59)
 ```
 
-The first call to `samples.vki_ls59` triggers a download and takes a few seconds, whereas subsequent calls are instantaneous because they reuse the cached sample.
+The first call to `samples.vki_ls59` triggers a download and takes a few seconds.
+
+## Cached retrieval
+
+Subsequent calls are instantaneous because they reuse the cached sample.
@@ -70,6 +70,26 @@ def test_check_dataset_missing_infos(tmp_path: Path, dataset_name: str) -> None:
     assert any(msg.code == "MISSING_PATH" for msg in report.messages)
 
 
+@pytest.mark.parametrize("dataset_name", _REFERENCE_DATASETS)
+def test_check_dataset_missing_required_layout_after_valid_infos(
+    tmp_path: Path, dataset_name: str
+) -> None:
+    """Missing layout file (other than infos.yaml) should short-circuit checks."""
+    dataset_path = _copy_reference_dataset(tmp_path, dataset_name)
+    if dataset_name == "dataset_cgns":
+        # CGNS backend only requires infos.yaml + data/.
+        shutil.rmtree(dataset_path / "data")
+    else:
+        (dataset_path / "variable_schema.yaml").unlink()
+
+    report = check_dataset(dataset_path)
+
+    assert report.has_errors()
+    assert any(msg.code == "MISSING_PATH" for msg in report.messages)
+    # The early return on missing layout means we never reach init-related codes.
+    assert not any(msg.code == "DATASET_INIT_ERROR" for msg in report.messages)
+
+
 @pytest.mark.parametrize("dataset_name", _REFERENCE_DATASETS)
 def test_check_dataset_rejects_extra_infos_key(
     tmp_path: Path, dataset_name: str
@@ -643,6 +663,35 @@ def test_check_dataset_sample_conversion_error(tmp_path: Path, monkeypatch) -> N
     assert any(msg.code == "SAMPLE_CONVERSION_ERROR" for msg in report.messages)
 
 
+def test_check_dataset_init_keyerror_reported_as_missing_split(
+    tmp_path: Path, monkeypatch
+) -> None:
+    """KeyError raised by `init_from_disk` should map to NUM_SAMPLES_MISSING_SPLIT."""
+    dataset = _make_minimal_layout(tmp_path)
+
+    monkeypatch.setattr(
+        plaidcheck,
+        "load_infos_from_disk",
+        lambda path: _infos({"train": 1}),  # noqa: ARG005
+    )
+    monkeypatch.setattr(
+        plaidcheck,
+        "load_metadata_from_disk",
+        lambda path: ({"train": {}}, {"Var": {}}, {"train": {}}, None),  # noqa: ARG005
+    )
+
+    def _raise_key_error(path):  # noqa: ARG001
+        raise KeyError("ghost_split")
+
+    monkeypatch.setattr(plaidcheck, "init_from_disk", _raise_key_error)
+
+    report = check_dataset(dataset)
+
+    assert any(msg.code == "NUM_SAMPLES_MISSING_SPLIT" for msg in report.messages)
+    assert any("ghost_split" in msg.message for msg in report.messages)
+    assert not any(msg.code == "DATASET_INIT_ERROR" for msg in report.messages)
+
+
 def test_check_dataset_missing_num_samples_split_is_clear(
     tmp_path: Path, monkeypatch
 ) -> None:
 
@@ -0,0 +1,65 @@
+"""Tests for `plaid.storage.common.writer` validation paths."""
+
+from pathlib import Path
+
+import pytest
+
+from plaid.problem_definition import ProblemDefinition
+from plaid.storage.common.writer import save_problem_definitions_to_disk
+
+
+def _make_pb_def() -> ProblemDefinition:
+    return ProblemDefinition(
+        input_features=["Global/in"],
+        output_features=["Global/out"],
+        train_split={"train": [0]},
+        test_split={"test": [0]},
+    )
+
+
+def test_save_problem_definitions_to_disk_rejects_non_dict_non_pbdef(
+    tmp_path: Path,
+) -> None:
+    """Passing a non-dict, non-ProblemDefinition value should raise TypeError."""
+    with pytest.raises(TypeError, match=r"dict\[str, ProblemDefinition\]"):
+        save_problem_definitions_to_disk(tmp_path, [("name", _make_pb_def())])  # type: ignore[arg-type]
+
+
+def test_save_problem_definitions_to_disk_rejects_non_string_identifier(
+    tmp_path: Path,
+) -> None:
+    """Non-string / empty identifiers should raise TypeError."""
+    pb_def = _make_pb_def()
+    with pytest.raises(TypeError, match="non-empty strings"):
+        save_problem_definitions_to_disk(tmp_path, {123: pb_def})  # type: ignore[dict-item]
+    with pytest.raises(TypeError, match="non-empty strings"):
+        save_problem_definitions_to_disk(tmp_path, {"": pb_def})
+
+
+def test_save_problem_definitions_to_disk_rejects_non_pbdef_value(
+    tmp_path: Path,
+) -> None:
+    """Non-ProblemDefinition values should raise TypeError."""
+    with pytest.raises(TypeError, match="ProblemDefinition instances"):
+        save_problem_definitions_to_disk(tmp_path, {"pb": "not a pb_def"})  # type: ignore[dict-item]
+
+
+def test_save_problem_definitions_to_disk_rejects_bare_pbdef(tmp_path: Path) -> None:
+    """Passing a bare ProblemDefinition (not wrapped in a dict) should raise."""
+    with pytest.raises(TypeError, match="use the dictionary key as the problem"):
+        save_problem_definitions_to_disk(tmp_path, _make_pb_def())  # type: ignore[arg-type]
+
+
+def test_save_problem_definitions_to_disk_writes_each_definition(
+    tmp_path: Path,
+) -> None:
+    """Happy path: each ProblemDefinition is delegated to its `save_to_file`."""
+    pb_defs = {"pb_a": _make_pb_def(), "pb_b": _make_pb_def()}
+
+    save_problem_definitions_to_disk(tmp_path, pb_defs)
+
+    target_dir = tmp_path / "problem_definitions"
+    assert target_dir.is_dir()
+    # ProblemDefinition.save_to_file serialises each definition as a YAML file.
+    assert (target_dir / "pb_a.yaml").is_file()
+    assert (target_dir / "pb_b.yaml").is_file()