Skip to content

Commit 40a3605

Browse files
committed
feat: Enhance IOType mapping and documentation for arctrl integration
1 parent cdfeac0 commit 40a3605

4 files changed

Lines changed: 215 additions & 17 deletions

File tree

.agents/skills/arctrl/SKILL.md

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,33 @@ but some internals are Fable runtime types.
1818

1919
## Package & Imports
2020

21+
arctrl ships no type stubs and no `py.typed` marker. Mypy will report
22+
`[import-untyped]` for every `arctrl.*` import unless you suppress it.
23+
24+
**Preferred: project-level override in `pyproject.toml`** (no per-import
25+
comments needed, covers all submodules):
26+
27+
```toml
28+
[[tool.mypy.overrides]]
29+
module = ["arctrl", "arctrl.*"]
30+
ignore_missing_imports = true
31+
```
32+
33+
The `arctrl.*` glob is required because the Fable-transpiled internals are
34+
exposed under `arctrl.py.*` subpackages (e.g.
35+
`arctrl.py.Core.Table.composite_cell`), which are a different dotted path
36+
from the bare `arctrl` package.
37+
38+
**Alternative: per-import suppression** (only needed when the project-level
39+
override is not in place):
40+
41+
```python
42+
from arctrl.py.fable_modules.fable_library.async_ import start_as_task # type: ignore[import-untyped]
43+
from arctrl.py.Core.Table.composite_cell import Data # type: ignore[import-untyped]
44+
```
45+
46+
---
47+
2148
```python
2249
from arctrl import (
2350
ARC,
@@ -33,7 +60,7 @@ from arctrl import (
3360
Publication,
3461
)
3562

36-
# Async write helper (Fable internal — untyped, needs type: ignore)
63+
# Async write helper lives in the Fable internals:
3764
from arctrl.py.fable_modules.fable_library.async_ import start_as_task # type: ignore[import-untyped]
3865
```
3966

@@ -194,9 +221,13 @@ header_date = CompositeHeader.date # property, not callable
194221
# Fallback for unknown/simple header names:
195222
header_any = CompositeHeader.OfHeaderString("SomeColumnName")
196223

197-
# IOType known strings (IOType.of_string):
198-
# "source_name", "sample_name", "raw_data_file", "derived_data_file",
199-
# "image_file", "material"
224+
# IOType canonical strings recognised by IOType.of_string() (maps to named tags 0-3):
225+
# "Source Name" / "Source" → tag 0 (Source)
226+
# "Sample Name" / "Sample" → tag 1 (Sample)
227+
# "Data" / "RawDataFile" / "Raw Data File" / "DerivedDataFile" /
228+
# "Derived Data File" / "ImageFile" / "Image File" → tag 2 (Data)
229+
# "Material" → tag 3 (Material)
230+
# Any other string → tag 4 (FreeType — avoid for ISA compliance)
200231

201232
# Build cells
202233
cell_text = CompositeCell.free_text("some value")

middleware/sql_to_arc/src/middleware/sql_to_arc/builder.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
IOType,
1717
OntologyAnnotation,
1818
)
19+
from arctrl.py.Core.Table.composite_cell import Data
1920

2021
from middleware.sql_to_arc.context import ArcBuildData
2122
from middleware.sql_to_arc.mapper import (
@@ -156,6 +157,16 @@ def _add_publications_to_arc(
156157
study.Publications.append(map_publication(p_row))
157158

158159

160+
# Maps DB schema column_io_type values (snake_case DB contract) to the canonical
161+
# strings recognised by IOType.of_string() (ARCitect display names).
162+
_IO_TYPE_MAP: dict[str, str] = {
163+
"source_name": "Source Name",
164+
"sample_name": "Sample Name",
165+
"data": "Data",
166+
"material_name": "Material",
167+
}
168+
169+
159170
def _get_column_key(r: AnnotationTableRow) -> tuple[Any, ...]:
160171
"""Extract a unique key for a column definition."""
161172
return (
@@ -176,7 +187,7 @@ def _build_header(key: tuple[Any, ...]) -> CompositeHeader | None:
176187
oa = OntologyAnnotation(c_ann_term or "", c_ann_uri or "", c_ann_ver or "")
177188

178189
if c_type in {"input", "output"} and not c_io:
179-
default_io = "source_name" if c_type == "input" else "sample_name"
190+
default_io = "Source Name" if c_type == "input" else "Sample Name"
180191
logger.warning(
181192
"column_io_type missing for column_type '%s'; defaulting to '%s'",
182193
c_type,
@@ -185,8 +196,12 @@ def _build_header(key: tuple[Any, ...]) -> CompositeHeader | None:
185196

186197
# Dispatch table for different header types
187198
handlers = {
188-
"input": lambda: CompositeHeader.input(IOType.of_string(c_io or "source_name")),
189-
"output": lambda: CompositeHeader.output(IOType.of_string(c_io or "sample_name")),
199+
"input": lambda: CompositeHeader.input(
200+
IOType.of_string(_IO_TYPE_MAP.get(c_io or "", c_io or "Source Name"))
201+
),
202+
"output": lambda: CompositeHeader.output(
203+
IOType.of_string(_IO_TYPE_MAP.get(c_io or "", c_io or "Sample Name"))
204+
),
190205
"characteristic": lambda: CompositeHeader.characteristic(oa),
191206
"factor": lambda: CompositeHeader.factor(oa),
192207
"parameter": lambda: CompositeHeader.parameter(oa),
@@ -222,6 +237,10 @@ def _build_single_cell(cell_row: AnnotationTableRow, header: CompositeHeader) ->
222237
if cat is not None:
223238
return CompositeCell.term(OntologyAnnotation(cat, cau, cav))
224239

240+
# Data cell (file path) — required when header is a Data-type IO column
241+
if header.IsDataColumn:
242+
return CompositeCell.data(Data(name=str(cv)) if cv is not None else Data())
243+
225244
# Text value? (either from new schema 'cell_value' or fallback 'value')
226245
val_to_use = cv
227246
if val_to_use is not None:

middleware/sql_to_arc/tests/unit/test_builder.py

Lines changed: 155 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@
44
from typing import Any
55

66
import pytest
7+
from arctrl import CompositeHeader, IOType
78

8-
from middleware.sql_to_arc.builder import build_single_arc_task
9+
from middleware.sql_to_arc.builder import _IO_TYPE_MAP, _build_header, _build_single_cell, build_single_arc_task
910
from middleware.sql_to_arc.context import ArcBuildData
1011
from middleware.sql_to_arc.models import (
12+
AnnotationTableRow,
1113
AssayRow,
1214
ContactRow,
1315
InvestigationRow,
@@ -197,3 +199,155 @@ def test_build_ignores_irrelevant_data(sample_investigation: dict[str, Any]) ->
197199
# Check that styX is NOT in the graph
198200
sty_x = next((item for item in graph if item.get("@id") == "styX" or item.get("identifier") == "styX"), None)
199201
assert sty_x is None
202+
203+
204+
# ---------------------------------------------------------------------------
205+
# Helpers to build minimal AnnotationTableRow dicts
206+
# ---------------------------------------------------------------------------
207+
208+
209+
def _ann_row(**overrides: Any) -> dict[str, Any]:
210+
"""Return a minimal AnnotationTableRow dict, optionally overriding any field."""
211+
base: dict[str, Any] = {
212+
"investigation_ref": "inv1",
213+
"target_type": "study",
214+
"target_ref": "sty1",
215+
"table_name": "T",
216+
"column_type": "input",
217+
"row_index": 0,
218+
"column_io_type": None,
219+
"cell_value": None,
220+
"cell_annotation_term": None,
221+
"cell_annotation_uri": None,
222+
"cell_annotation_version": None,
223+
"column_annotation_term": None,
224+
"column_annotation_uri": None,
225+
"column_annotation_version": None,
226+
"column_value": None,
227+
}
228+
base.update(overrides)
229+
return base
230+
231+
232+
def _row(data: dict[str, Any]) -> AnnotationTableRow:
233+
"""Validate a dict into an AnnotationTableRow."""
234+
return AnnotationTableRow.model_validate(data)
235+
236+
237+
# ---------------------------------------------------------------------------
238+
# IOType mapping tests
239+
# ---------------------------------------------------------------------------
240+
241+
242+
class TestIOTypeMapping:
243+
"""_IO_TYPE_MAP translates snake_case DB values to canonical ARCitect strings."""
244+
245+
@staticmethod
246+
@pytest.mark.parametrize(
247+
("db_value", "canonical"),
248+
[
249+
("source_name", "Source Name"),
250+
("sample_name", "Sample Name"),
251+
("data", "Data"),
252+
("material_name", "Material"),
253+
],
254+
)
255+
def test_map_covers_all_db_values(db_value: str, canonical: str) -> None:
256+
"""Each DB snake_case value maps to the expected canonical ARCitect string."""
257+
assert _IO_TYPE_MAP[db_value] == canonical
258+
259+
@staticmethod
260+
@pytest.mark.parametrize(
261+
("db_value", "expected_tag"),
262+
[
263+
("source_name", 0), # IOType.Source
264+
("sample_name", 1), # IOType.Sample
265+
("data", 2), # IOType.Data
266+
("material_name", 3), # IOType.Material
267+
],
268+
)
269+
def test_build_header_input_uses_named_iotype(db_value: str, expected_tag: int) -> None:
270+
"""DB values must produce a named IOType (tag 0–3), never FreeType (tag 4)."""
271+
key = ("input", db_value, None, None, None, None, None)
272+
header = _build_header(key)
273+
assert header is not None
274+
assert header.is_input
275+
assert header.fields[0].tag == expected_tag
276+
277+
@staticmethod
278+
@pytest.mark.parametrize(
279+
("db_value", "expected_tag"),
280+
[
281+
("sample_name", 1),
282+
("data", 2),
283+
("material_name", 3),
284+
],
285+
)
286+
def test_build_header_output_uses_named_iotype(db_value: str, expected_tag: int) -> None:
287+
"""DB output values must also produce a named IOType, never FreeType."""
288+
key = ("output", db_value, None, None, None, None, None)
289+
header = _build_header(key)
290+
assert header is not None
291+
assert header.is_output
292+
assert header.fields[0].tag == expected_tag
293+
294+
@staticmethod
295+
def test_missing_io_type_defaults_to_source_name_for_input() -> None:
296+
"""Missing column_io_type falls back to 'Source Name' (tag 0) for input."""
297+
key = ("input", None, None, None, None, None, None)
298+
header = _build_header(key)
299+
assert header is not None
300+
assert header.is_input
301+
assert header.fields[0].tag == 0
302+
303+
@staticmethod
304+
def test_missing_io_type_defaults_to_sample_name_for_output() -> None:
305+
"""Missing column_io_type falls back to 'Sample Name' (tag 1) for output."""
306+
key = ("output", None, None, None, None, None, None)
307+
header = _build_header(key)
308+
assert header is not None
309+
assert header.is_output
310+
assert header.fields[0].tag == 1
311+
312+
313+
# ---------------------------------------------------------------------------
314+
# Data cell tests
315+
# ---------------------------------------------------------------------------
316+
317+
318+
class TestDataCellBuilding:
319+
"""_build_single_cell must emit CompositeCell.data() for data-typed IO columns."""
320+
321+
@staticmethod
322+
def test_data_cell_has_correct_file_path() -> None:
323+
"""A data-typed output column must produce a DataCell with the file path set."""
324+
header = CompositeHeader.output(IOType.of_string("Data"))
325+
row = _row(_ann_row(column_type="output", column_io_type="data", cell_value="raw.fastq.gz"))
326+
cell = _build_single_cell(row, header)
327+
assert cell.is_data
328+
assert cell.AsData.FilePath == "raw.fastq.gz"
329+
330+
@staticmethod
331+
def test_data_cell_empty_when_no_cell_value() -> None:
332+
"""A data-typed column with no cell_value must produce an empty DataCell, not a crash."""
333+
header = CompositeHeader.output(IOType.of_string("Data"))
334+
row = _row(_ann_row(column_type="output", column_io_type="data", cell_value=None))
335+
cell = _build_single_cell(row, header)
336+
assert cell.is_data
337+
assert cell.AsData.FilePath is None
338+
339+
@staticmethod
340+
def test_source_name_column_emits_free_text() -> None:
341+
"""A source_name input column must produce a free-text cell, not a DataCell."""
342+
header = CompositeHeader.input(IOType.of_string("Source Name"))
343+
row = _row(_ann_row(column_type="input", column_io_type="source_name", cell_value="SourceA"))
344+
cell = _build_single_cell(row, header)
345+
assert cell.is_free_text
346+
347+
@staticmethod
348+
def test_sample_name_column_emits_free_text() -> None:
349+
"""A sample_name output column must produce a free-text cell, not a DataCell."""
350+
header = CompositeHeader.output(IOType.of_string("Sample Name"))
351+
row = _row(_ann_row(column_type="output", column_io_type="sample_name", cell_value="SampleB"))
352+
cell = _build_single_cell(row, header)
353+
assert cell.is_free_text

pyproject.toml

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -131,16 +131,10 @@ exclude = [
131131
".ruff_cache",
132132
]
133133

134-
# [[tool.mypy.overrides]]
135-
# module = [
136-
# "middleware.api_client.*",
137-
# "middleware.shared.*",
138-
# ]
139-
# ignore_missing_imports = true
140-
141-
# arctrl has no type stubs and no py.typed marker — suppress the resulting noise
134+
# arctrl has no type stubs and no py.typed marker — suppress the resulting noise.
135+
# The Fable-transpiled internals live under arctrl.py.* so both patterns are needed.
142136
[[tool.mypy.overrides]]
143-
module = ["arctrl"]
137+
module = ["arctrl", "arctrl.*"]
144138
ignore_missing_imports = true
145139

146140
# Docstring-Regeln aktivieren, damit es wie pylint C0114/15/16 meckert

0 commit comments

Comments
 (0)