Increasing data curation coverage (#637)

robertandremitchell · web-flow · commit 2f0b9caed543 · 2026-06-18T14:24:30.000-04:00
## Description A few small coverage gaps on `data-curation/`. I also noticed a small place in `validation/` tests where we should be using the constant. I had a local issue with validation; I had to remove and regenerate the .tmp files to get tests to work locally. This doesn't impact github actions since it regenerates the tests each time. I added a small cleanup ticket (https://app.zenhub.com/workspaces/dibbs-text-to-code-68756bf8ea06bb00319391c0/issues/gh/cdcgov/dibbs-text-to-code/638) to see if there's anything we can add locally to either regenerate the files with each run, or a check to determine if it should, but it could be overkill. ## Related Issues ## Additional Notes [Add any additional context or notes that reviewers should know about.] <--------------------- REMOVE THE LINES BELOW BEFORE MERGING ---------------------> ## Checklist Please review and complete the following checklist before submitting your pull request: - [ ] I have ensured that the pull request is of a manageable size, allowing it to be reviewed within a single session. - [ ] I have reviewed my changes to ensure they are clear, concise, and well-documented. - [ ] I have updated the documentation, if applicable. - [ ] I have added or updated test cases to cover my changes, if applicable. - [ ] I have minimized the number of reviewers to include only those essential for the review. ## Checklist for Reviewers Please review and complete the following checklist during the review process: - [ ] The code follows best practices and conventions. - [ ] The changes implement the desired functionality or fix the reported issue. - [ ] The tests cover the new changes and pass successfully. - [ ] Any potential edge cases or error scenarios have been considered.
diff --git a/packages/data-curation/tests/test_http_client.py b/packages/data-curation/tests/test_http_client.py
@@ -0,0 +1,41 @@
+import pytest
+from requests.models import Response
+
+from data_curation.terminologies import http_client
+from data_curation.terminologies.http_client import get_with_timeout
+
+
+def test_get_with_timeout_calls_requests_get_with_timeout(
+    monkeypatch: pytest.MonkeyPatch,
+):
+    response = Response()
+    response.status_code = http_client.STATUS_CODE_OK
+    calls: list[tuple[str, dict[str, object] | None, int | None, tuple[str, str] | None]] = []
+
+    def fake_get(
+        url: str,
+        params: dict[str, object] | None = None,
+        timeout: int | None = None,
+        auth: tuple[str, str] | None = None,
+    ) -> Response:
+        calls.append((url, params, timeout, auth))
+
+        return response
+
+    monkeypatch.setattr(http_client.requests, "get", fake_get)
+
+    result = get_with_timeout(
+        "https://example.com",
+        params={"code": "1234-5"},
+        auth=("user", "password"),
+    )
+
+    assert result is response
+    assert calls == [
+        (
+            "https://example.com",
+            {"code": "1234-5"},
+            60,
+            ("user", "password"),
+        )
+    ]
diff --git a/packages/data-curation/tests/test_loinc.py b/packages/data-curation/tests/test_loinc.py
@@ -99,6 +99,52 @@ def mock_save_valueset_csv_file(
     assert saved_files[0][2] is False
 
 
+def test_extract_full_loinc_lab_names_handles_empty_api_results(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    payload: dict[str, object] = {
+        "ResponseSummary": {
+            "RecordsFound": 0,
+            "RowsReturned": 1,
+            "Next": None,
+        },
+        "Results": [],
+    }
+    consumer_names_file = tmp_path / "consumer_names.csv"
+    consumer_names_file.write_text(
+        "LoincNumber|ConsumerName\n",
+        encoding="utf-8",
+    )
+    saved_files: list[tuple[str, list[loinc.LoincRow], bool]] = []
+
+    def mock_get_with_timeout(
+        api_url: str,
+        auth: tuple[str, str] | None = None,
+    ) -> MockResponse:
+        return MockResponse(200, payload)
+
+    def mock_save_valueset_csv_file(
+        filename: str, contents: list[loinc.LoincRow], append_to_file: bool = False
+    ) -> None:
+        saved_files.append((filename, contents, append_to_file))
+
+    monkeypatch.setattr(loinc, "LOINC_USERNAME", "username")
+    monkeypatch.setattr(loinc, "LOINC_PWD", "password")
+    monkeypatch.setattr(loinc, "LOINC_CS_NAMES", consumer_names_file)
+    monkeypatch.setattr(loinc, "get_with_timeout", mock_get_with_timeout)
+    monkeypatch.setattr(loinc, "save_valueset_csv_file", mock_save_valueset_csv_file)
+
+    loinc.extract_full_loinc_lab_names()
+
+    assert "NO RESULTS TO PROCESS!" in capsys.readouterr().out
+    assert len(saved_files) == 1
+    assert saved_files[0][0].startswith("loinc_lab_names_")
+    assert saved_files[0][1] == []
+    assert saved_files[0][2] is False
+
+
 def test_extract_full_loinc_lab_orders(monkeypatch: pytest.MonkeyPatch) -> None:
     rows = [_loinc_row()]
     saved_files: list[tuple[str, list[dict[str, str]], bool]] = []
@@ -405,6 +451,44 @@ def test_process_loinc_valueset_requires_credentials(monkeypatch: pytest.MonkeyP
         loinc._process_loinc_valueset("https://example.com", "Lab Names")
 
 
+def test_process_loincs_for_umls_urls_processes_api_response(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    payload: dict[str, object] = {
+        "ResponseSummary": {
+            "RecordsFound": 1,
+            "RowsReturned": 1,
+            "Next": None,
+        },
+        "Results": [
+            {
+                "LOINC_NUM": "12345-F",
+                "LONG_COMMON_NAME": "TEST LONG NAME",
+            }
+        ],
+    }
+
+    def mock_get_with_timeout(
+        api_url: str,
+        auth: tuple[str, str] | None = None,
+    ) -> MockResponse:
+        return MockResponse(200, payload)
+
+    monkeypatch.setattr(loinc, "LOINC_USERNAME", "username")
+    monkeypatch.setattr(loinc, "LOINC_PWD", "password")
+    monkeypatch.setattr(loinc, "get_with_timeout", mock_get_with_timeout)
+
+    result = loinc.process_loincs_for_umls_urls()
+
+    assert result == {
+        "12345-F": {
+            "atom": loinc.UMLS_LOINC_LAB_ATOMS_URL + "12345-F/atoms",
+            "crs": loinc.UMLS_LOINC_LAB_CROSSWALK_URL + "12345-F",
+            "long_name": "TEST LONG NAME",
+        }
+    }
+
+
 def test_process_loincs_for_umls_urls(monkeypatch: pytest.MonkeyPatch) -> None:
     calls: dict[str, str] = {}
     expected = {
diff --git a/packages/validation/tests/test_validation.py b/packages/validation/tests/test_validation.py
@@ -29,7 +29,7 @@ def __init__(self) -> None:
 
     def get_attribute_value(self, attribute: str) -> str:
         values = {
-            "id": "ttc-labOrder-code-missing",
+            "id": LabTestNameOrderedSchematronErrors.MISSING_CODE_ATTRIBUTE.value,
             "location": FAKE_LOCATION,
             "test": FAKE_TEST,
         }
@@ -112,7 +112,7 @@ def test_validation():
 
     assert results == [
         ValidationResult(
-            error_id="ttc-labOrder-code-missing",
+            error_id=LabTestNameOrderedSchematronErrors.MISSING_CODE_ATTRIBUTE.value,
             location="/Q{urn:hl7-org:v3}ClinicalDocument[1]/Q{urn:hl7-org:v3}component[1]/Q{urn:hl7-org:v3}structuredBody[1]/Q{urn:hl7-org:v3}component[1]/Q{urn:hl7-org:v3}section[1]/Q{urn:hl7-org:v3}entry[1]/Q{urn:hl7-org:v3}observation[1]",
         )
     ]
@@ -151,7 +151,7 @@ def test_validation_redoes_all_steps(monkeypatch: pytest.MonkeyPatch, tmp_path:
     assert results == [
         ValidationResult(
             error_id=LabTestNameOrderedSchematronErrors.MISSING_CODE_ATTRIBUTE.value,
-            location="/ClinicalDocument/component/structuredBody/component/section/entry/observation",
+            location=FAKE_LOCATION,
         )
     ]
     assert stage1_output.read_text() == "<generated />"
@@ -179,7 +179,7 @@ def test_validation_uses_existing_generated_files(monkeypatch: pytest.MonkeyPatc
     assert results == [
         ValidationResult(
             error_id=LabTestNameOrderedSchematronErrors.MISSING_CODE_ATTRIBUTE.value,
-            location="/ClinicalDocument/component/structuredBody/component/section/entry/observation",
+            location=FAKE_LOCATION,
         )
     ]
     assert stage1_output.read_text() == "existing stage 1"

Original file line number	Diff line number	Diff line change
`@@ -29,7 +29,7 @@ def __init__(self) -> None:`
`29`	`29`
`30`	`30`	`def get_attribute_value(self, attribute: str) -> str:`
`31`	`31`	`values = {`
`32`		`- "id": "ttc-labOrder-code-missing",`
	`32`	`+ "id": LabTestNameOrderedSchematronErrors.MISSING_CODE_ATTRIBUTE.value,`
`33`	`33`	`"location": FAKE_LOCATION,`
`34`	`34`	`"test": FAKE_TEST,`
`35`	`35`	`}`
`@@ -112,7 +112,7 @@ def test_validation():`
`112`	`112`
`113`	`113`	`assert results == [`
`114`	`114`	`ValidationResult(`
`115`		`- error_id="ttc-labOrder-code-missing",`
	`115`	`+ error_id=LabTestNameOrderedSchematronErrors.MISSING_CODE_ATTRIBUTE.value,`
`116`	`116`	`location="/Q{urn:hl7-org:v3}ClinicalDocument[1]/Q{urn:hl7-org:v3}component[1]/Q{urn:hl7-org:v3}structuredBody[1]/Q{urn:hl7-org:v3}component[1]/Q{urn:hl7-org:v3}section[1]/Q{urn:hl7-org:v3}entry[1]/Q{urn:hl7-org:v3}observation[1]",`
`117`	`117`	`)`
`118`	`118`	`]`
`@@ -151,7 +151,7 @@ def test_validation_redoes_all_steps(monkeypatch: pytest.MonkeyPatch, tmp_path:`
`151`	`151`	`assert results == [`
`152`	`152`	`ValidationResult(`
`153`	`153`	`error_id=LabTestNameOrderedSchematronErrors.MISSING_CODE_ATTRIBUTE.value,`
`154`		`- location="/ClinicalDocument/component/structuredBody/component/section/entry/observation",`
	`154`	`+ location=FAKE_LOCATION,`
`155`	`155`	`)`
`156`	`156`	`]`
`157`	`157`	`assert stage1_output.read_text() == "<generated />"`
`@@ -179,7 +179,7 @@ def test_validation_uses_existing_generated_files(monkeypatch: pytest.MonkeyPatc`
`179`	`179`	`assert results == [`
`180`	`180`	`ValidationResult(`
`181`	`181`	`error_id=LabTestNameOrderedSchematronErrors.MISSING_CODE_ATTRIBUTE.value,`
`182`		`- location="/ClinicalDocument/component/structuredBody/component/section/entry/observation",`
	`182`	`+ location=FAKE_LOCATION,`
`183`	`183`	`)`
`184`	`184`	`]`
`185`	`185`	`assert stage1_output.read_text() == "existing stage 1"`