|
16 | 16 | _build_code_expr, |
17 | 17 | _build_text_expr, |
18 | 18 | _build_attr_expr, |
| 19 | + _build_ontology_id_expr, |
19 | 20 | _parse_criterion, |
20 | 21 | ) |
21 | 22 |
|
@@ -393,6 +394,68 @@ def test_raises_when_no_column_mapped_to_concept(self, icd_columns): |
393 | 394 | _build_text_expr(criterion, icd_columns, "epidemiological_history") |
394 | 395 |
|
395 | 396 |
|
| 397 | +class TestBuildOntologyIdExpr: |
| 398 | + @pytest.fixture |
| 399 | + def ontology_df(self): |
| 400 | + return pl.DataFrame( |
| 401 | + {"phenotype": ["hpo:0002045", "hpo:0000975", "hpo:0001649", None]} |
| 402 | + ) |
| 403 | + |
| 404 | + @pytest.fixture |
| 405 | + def symptom_columns(self): |
| 406 | + return [ColumnSpec("phenotype", concept="symptom")] |
| 407 | + |
| 408 | + def test_matches_exact_ontology_id(self, ontology_df, symptom_columns): |
| 409 | + criterion = { |
| 410 | + "type": "symptom", |
| 411 | + "name": "Hyperthermia", |
| 412 | + "ontology_id": "hpo:0002045", |
| 413 | + } |
| 414 | + result = ontology_df.filter( |
| 415 | + _build_ontology_id_expr(criterion, symptom_columns, "symptom") |
| 416 | + ) |
| 417 | + assert result.height == 1 |
| 418 | + assert result["phenotype"][0] == "hpo:0002045" |
| 419 | + |
| 420 | + def test_no_match_returns_empty(self, ontology_df, symptom_columns): |
| 421 | + criterion = {"type": "symptom", "name": "Unknown", "ontology_id": "hpo:9999999"} |
| 422 | + result = ontology_df.filter( |
| 423 | + _build_ontology_id_expr(criterion, symptom_columns, "symptom") |
| 424 | + ) |
| 425 | + assert result.is_empty() |
| 426 | + |
| 427 | + def test_matching_is_case_insensitive(self, symptom_columns): |
| 428 | + df = pl.DataFrame({"phenotype": ["HPO:0002045", "hpo:0000975"]}) |
| 429 | + criterion = {"type": "symptom", "ontology_id": "hpo:0002045"} |
| 430 | + result = df.filter( |
| 431 | + _build_ontology_id_expr(criterion, symptom_columns, "symptom") |
| 432 | + ) |
| 433 | + assert result.height == 1 |
| 434 | + assert result["phenotype"][0] == "HPO:0002045" |
| 435 | + |
| 436 | + def test_matches_across_multiple_columns(self): |
| 437 | + df = pl.DataFrame( |
| 438 | + { |
| 439 | + "phenotype_1": ["hpo:0002045", "hpo:0000001"], |
| 440 | + "phenotype_2": ["hpo:0000001", "hpo:0001649"], |
| 441 | + } |
| 442 | + ) |
| 443 | + columns = [ |
| 444 | + ColumnSpec("phenotype_1", concept="symptom"), |
| 445 | + ColumnSpec("phenotype_2", concept="symptom"), |
| 446 | + ] |
| 447 | + criterion = {"type": "symptom", "ontology_id": "hpo:0001649"} |
| 448 | + result = df.filter(_build_ontology_id_expr(criterion, columns, "symptom")) |
| 449 | + assert result.height == 1 |
| 450 | + assert result["phenotype_2"][0] == "hpo:0001649" |
| 451 | + |
| 452 | + def test_raises_when_no_column_mapped_to_concept(self): |
| 453 | + columns = [ColumnSpec("icd_code", concept="diagnosis")] |
| 454 | + criterion = {"type": "symptom", "ontology_id": "hpo:0002045"} |
| 455 | + with pytest.raises(UnresolvableCriterion, match="symptom"): |
| 456 | + _build_ontology_id_expr(criterion, columns, "symptom") |
| 457 | + |
| 458 | + |
396 | 459 | class TestBuildAttrExpr: |
397 | 460 | def test_numeric_greater_than(self, fake_dataset, demographic_columns): |
398 | 461 | assert (fake_dataset["age"] > 60).sum() == 190 # expected value |
@@ -624,6 +687,31 @@ def test_epidemiological_history_with_attribute_uses_attr_expr(self, fake_datase |
624 | 687 | result = fake_dataset.filter(_parse_criterion(criterion, columns)) |
625 | 688 | assert (result["age"].le(14)).all() |
626 | 689 |
|
| 690 | + def test_ontology_id_routes_to_ontology_expr(self): |
| 691 | + df = pl.DataFrame({"phenotype": ["hpo:0002045", "hpo:0000975", "hpo:0001649"]}) |
| 692 | + columns = [ColumnSpec("phenotype", concept="symptom")] |
| 693 | + criterion = { |
| 694 | + "type": "symptom", |
| 695 | + "name": "Tachycardia", |
| 696 | + "ontology_id": "hpo:0001649", |
| 697 | + } |
| 698 | + result = df.filter(_parse_criterion(criterion, columns)) |
| 699 | + assert result.height == 1 |
| 700 | + assert result["phenotype"][0] == "hpo:0001649" |
| 701 | + |
| 702 | + def test_ontology_id_takes_precedence_over_text_matching(self): |
| 703 | + # Column contains ontology IDs; name would not match, ontology_id does |
| 704 | + df = pl.DataFrame({"phenotype": ["hpo:0001649", "Frankfurt"]}) |
| 705 | + columns = [ColumnSpec("phenotype", concept="symptom")] |
| 706 | + criterion = { |
| 707 | + "type": "symptom", |
| 708 | + "name": "Frankfurt", |
| 709 | + "ontology_id": "hpo:0001649", |
| 710 | + } |
| 711 | + result = df.filter(_parse_criterion(criterion, columns)) |
| 712 | + assert result.height == 1 |
| 713 | + assert result["phenotype"][0] == "hpo:0001649" |
| 714 | + |
627 | 715 | def test_syndrome_type_raises(self, all_columns): |
628 | 716 | with pytest.raises(UnresolvableCriterion, match="syndrome"): |
629 | 717 | _parse_criterion({"type": "syndrome", "name": "Dengue"}, all_columns) |
|
0 commit comments