|
4 | 4 | from typing import Any |
5 | 5 |
|
6 | 6 | import pytest |
| 7 | +from arctrl import CompositeHeader, IOType |
7 | 8 |
|
8 | | -from middleware.sql_to_arc.builder import build_single_arc_task |
| 9 | +from middleware.sql_to_arc.builder import _IO_TYPE_MAP, _build_header, _build_single_cell, build_single_arc_task |
9 | 10 | from middleware.sql_to_arc.context import ArcBuildData |
10 | 11 | from middleware.sql_to_arc.models import ( |
| 12 | + AnnotationTableRow, |
11 | 13 | AssayRow, |
12 | 14 | ContactRow, |
13 | 15 | InvestigationRow, |
@@ -197,3 +199,155 @@ def test_build_ignores_irrelevant_data(sample_investigation: dict[str, Any]) -> |
197 | 199 | # Check that styX is NOT in the graph |
198 | 200 | sty_x = next((item for item in graph if item.get("@id") == "styX" or item.get("identifier") == "styX"), None) |
199 | 201 | assert sty_x is None |
| 202 | + |
| 203 | + |
| 204 | +# --------------------------------------------------------------------------- |
| 205 | +# Helpers to build minimal AnnotationTableRow dicts |
| 206 | +# --------------------------------------------------------------------------- |
| 207 | + |
| 208 | + |
| 209 | +def _ann_row(**overrides: Any) -> dict[str, Any]: |
| 210 | + """Return a minimal AnnotationTableRow dict, optionally overriding any field.""" |
| 211 | + base: dict[str, Any] = { |
| 212 | + "investigation_ref": "inv1", |
| 213 | + "target_type": "study", |
| 214 | + "target_ref": "sty1", |
| 215 | + "table_name": "T", |
| 216 | + "column_type": "input", |
| 217 | + "row_index": 0, |
| 218 | + "column_io_type": None, |
| 219 | + "cell_value": None, |
| 220 | + "cell_annotation_term": None, |
| 221 | + "cell_annotation_uri": None, |
| 222 | + "cell_annotation_version": None, |
| 223 | + "column_annotation_term": None, |
| 224 | + "column_annotation_uri": None, |
| 225 | + "column_annotation_version": None, |
| 226 | + "column_value": None, |
| 227 | + } |
| 228 | + base.update(overrides) |
| 229 | + return base |
| 230 | + |
| 231 | + |
| 232 | +def _row(data: dict[str, Any]) -> AnnotationTableRow: |
| 233 | + """Validate a dict into an AnnotationTableRow.""" |
| 234 | + return AnnotationTableRow.model_validate(data) |
| 235 | + |
| 236 | + |
| 237 | +# --------------------------------------------------------------------------- |
| 238 | +# IOType mapping tests |
| 239 | +# --------------------------------------------------------------------------- |
| 240 | + |
| 241 | + |
| 242 | +class TestIOTypeMapping: |
| 243 | + """_IO_TYPE_MAP translates snake_case DB values to canonical ARCitect strings.""" |
| 244 | + |
| 245 | + @staticmethod |
| 246 | + @pytest.mark.parametrize( |
| 247 | + ("db_value", "canonical"), |
| 248 | + [ |
| 249 | + ("source_name", "Source Name"), |
| 250 | + ("sample_name", "Sample Name"), |
| 251 | + ("data", "Data"), |
| 252 | + ("material_name", "Material"), |
| 253 | + ], |
| 254 | + ) |
| 255 | + def test_map_covers_all_db_values(db_value: str, canonical: str) -> None: |
| 256 | + """Each DB snake_case value maps to the expected canonical ARCitect string.""" |
| 257 | + assert _IO_TYPE_MAP[db_value] == canonical |
| 258 | + |
| 259 | + @staticmethod |
| 260 | + @pytest.mark.parametrize( |
| 261 | + ("db_value", "expected_tag"), |
| 262 | + [ |
| 263 | + ("source_name", 0), # IOType.Source |
| 264 | + ("sample_name", 1), # IOType.Sample |
| 265 | + ("data", 2), # IOType.Data |
| 266 | + ("material_name", 3), # IOType.Material |
| 267 | + ], |
| 268 | + ) |
| 269 | + def test_build_header_input_uses_named_iotype(db_value: str, expected_tag: int) -> None: |
| 270 | + """DB values must produce a named IOType (tag 0–3), never FreeType (tag 4).""" |
| 271 | + key = ("input", db_value, None, None, None, None, None) |
| 272 | + header = _build_header(key) |
| 273 | + assert header is not None |
| 274 | + assert header.is_input |
| 275 | + assert header.fields[0].tag == expected_tag |
| 276 | + |
| 277 | + @staticmethod |
| 278 | + @pytest.mark.parametrize( |
| 279 | + ("db_value", "expected_tag"), |
| 280 | + [ |
| 281 | + ("sample_name", 1), |
| 282 | + ("data", 2), |
| 283 | + ("material_name", 3), |
| 284 | + ], |
| 285 | + ) |
| 286 | + def test_build_header_output_uses_named_iotype(db_value: str, expected_tag: int) -> None: |
| 287 | + """DB output values must also produce a named IOType, never FreeType.""" |
| 288 | + key = ("output", db_value, None, None, None, None, None) |
| 289 | + header = _build_header(key) |
| 290 | + assert header is not None |
| 291 | + assert header.is_output |
| 292 | + assert header.fields[0].tag == expected_tag |
| 293 | + |
| 294 | + @staticmethod |
| 295 | + def test_missing_io_type_defaults_to_source_name_for_input() -> None: |
| 296 | + """Missing column_io_type falls back to 'Source Name' (tag 0) for input.""" |
| 297 | + key = ("input", None, None, None, None, None, None) |
| 298 | + header = _build_header(key) |
| 299 | + assert header is not None |
| 300 | + assert header.is_input |
| 301 | + assert header.fields[0].tag == 0 |
| 302 | + |
| 303 | + @staticmethod |
| 304 | + def test_missing_io_type_defaults_to_sample_name_for_output() -> None: |
| 305 | + """Missing column_io_type falls back to 'Sample Name' (tag 1) for output.""" |
| 306 | + key = ("output", None, None, None, None, None, None) |
| 307 | + header = _build_header(key) |
| 308 | + assert header is not None |
| 309 | + assert header.is_output |
| 310 | + assert header.fields[0].tag == 1 |
| 311 | + |
| 312 | + |
| 313 | +# --------------------------------------------------------------------------- |
| 314 | +# Data cell tests |
| 315 | +# --------------------------------------------------------------------------- |
| 316 | + |
| 317 | + |
| 318 | +class TestDataCellBuilding: |
| 319 | + """_build_single_cell must emit CompositeCell.data() for data-typed IO columns.""" |
| 320 | + |
| 321 | + @staticmethod |
| 322 | + def test_data_cell_has_correct_file_path() -> None: |
| 323 | + """A data-typed output column must produce a DataCell with the file path set.""" |
| 324 | + header = CompositeHeader.output(IOType.of_string("Data")) |
| 325 | + row = _row(_ann_row(column_type="output", column_io_type="data", cell_value="raw.fastq.gz")) |
| 326 | + cell = _build_single_cell(row, header) |
| 327 | + assert cell.is_data |
| 328 | + assert cell.AsData.FilePath == "raw.fastq.gz" |
| 329 | + |
| 330 | + @staticmethod |
| 331 | + def test_data_cell_empty_when_no_cell_value() -> None: |
| 332 | + """A data-typed column with no cell_value must produce an empty DataCell, not a crash.""" |
| 333 | + header = CompositeHeader.output(IOType.of_string("Data")) |
| 334 | + row = _row(_ann_row(column_type="output", column_io_type="data", cell_value=None)) |
| 335 | + cell = _build_single_cell(row, header) |
| 336 | + assert cell.is_data |
| 337 | + assert cell.AsData.FilePath is None |
| 338 | + |
| 339 | + @staticmethod |
| 340 | + def test_source_name_column_emits_free_text() -> None: |
| 341 | + """A source_name input column must produce a free-text cell, not a DataCell.""" |
| 342 | + header = CompositeHeader.input(IOType.of_string("Source Name")) |
| 343 | + row = _row(_ann_row(column_type="input", column_io_type="source_name", cell_value="SourceA")) |
| 344 | + cell = _build_single_cell(row, header) |
| 345 | + assert cell.is_free_text |
| 346 | + |
| 347 | + @staticmethod |
| 348 | + def test_sample_name_column_emits_free_text() -> None: |
| 349 | + """A sample_name output column must produce a free-text cell, not a DataCell.""" |
| 350 | + header = CompositeHeader.output(IOType.of_string("Sample Name")) |
| 351 | + row = _row(_ann_row(column_type="output", column_io_type="sample_name", cell_value="SampleB")) |
| 352 | + cell = _build_single_cell(row, header) |
| 353 | + assert cell.is_free_text |
0 commit comments