Skip to content

Commit 5a4a7b2

Browse files
authored
Merge pull request #84 from datakind/Validation-Errors
adjusted validation files
2 parents b66ec69 + 44c0bc8 commit 5a4a7b2

3 files changed

Lines changed: 77 additions & 6 deletions

File tree

src/webapp/routers/data.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -886,8 +886,8 @@ def validation_helper(
886886
if not allowed_schemas:
887887
allowed_schemas = infer_models_from_filename(file_name, "pdp")
888888

889-
inferred_schemas = set()
890-
# TODO:
889+
inferred_schemas: list[str] = []
890+
891891
try:
892892
inferred_schemas = storage_control.validate_file(
893893
get_external_bucket_name(inst_id),
@@ -914,7 +914,7 @@ def validation_helper(
914914
local_session.get().add(new_file_record)
915915
except Exception as e:
916916
logging.error(f"Error message: {str(e)}")
917-
917+
print(f"Inferred Schemas {list(inferred_schemas)}")
918918
return {
919919
"name": file_name,
920920
"inst_id": inst_id,
@@ -923,7 +923,9 @@ def validation_helper(
923923
}
924924

925925

926-
@router.post("/{inst_id}/input/validate-sftp/{file_name:path}")
926+
@router.post(
927+
"/{inst_id}/input/validate-sftp/{file_name:path}", response_model=ValidationResult
928+
)
927929
def validate_file_sftp(
928930
inst_id: str,
929931
file_name: str,

src/webapp/validation.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,11 +174,11 @@ def validate_dataset(
174174

175175
# 2) merge requested models
176176
if models is None:
177-
model_list: List[str] = []
177+
model_list = []
178178
elif isinstance(models, str):
179179
model_list = [models]
180180
else:
181-
model_list = models
181+
model_list = list(models) # <- ensures it's not a set
182182

183183
merged_specs: Dict[str, dict] = {}
184184
for m in model_list:

src/webapp/validation_test.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import pytest
2+
import pandas as pd
3+
from pathlib import Path
4+
from unittest.mock import patch
5+
from src.webapp.validation import validate_file_reader, HardValidationError
6+
7+
# Minimal schema for testing
8+
MOCK_BASE_SCHEMA = {
9+
"base": {
10+
"data_models": {
11+
"test_model": {
12+
"columns": {
13+
"foo_col": {
14+
"dtype": "int",
15+
"nullable": False,
16+
"required": True,
17+
"aliases": ["foo"],
18+
},
19+
"bar_col": {
20+
"dtype": "str",
21+
"nullable": True,
22+
"required": False,
23+
"aliases": ["bar"],
24+
},
25+
}
26+
}
27+
}
28+
}
29+
}
30+
31+
MOCK_EXT_SCHEMA = {"institutions": {"pdp": {"data_models": {}}}}
32+
33+
34+
@pytest.fixture
35+
def tmp_csv_file(tmp_path: Path):
36+
df = pd.DataFrame({"foo_col": [1, 2], "bar_col": ["a", "b"]})
37+
file_path = tmp_path / "test.csv"
38+
df.to_csv(file_path, index=False)
39+
return str(file_path)
40+
41+
42+
def test_validate_file_reader_passes(tmp_csv_file):
43+
with (
44+
patch("src.webapp.validation.load_json") as mock_load,
45+
patch("os.path.exists", return_value=True),
46+
):
47+
mock_load.side_effect = (
48+
lambda path: MOCK_BASE_SCHEMA if "base" in path else MOCK_EXT_SCHEMA
49+
)
50+
result = validate_file_reader(tmp_csv_file, ["test_model"])
51+
assert result["validation_status"] == "passed"
52+
assert result["schemas"] == ["test_model"]
53+
54+
55+
def test_validate_file_reader_fails_missing_required(tmp_path):
56+
df = pd.DataFrame({"bar_col": ["x", "y"]}) # Missing "foo_col"
57+
file_path = tmp_path / "invalid.csv"
58+
df.to_csv(file_path, index=False)
59+
60+
with (
61+
patch("src.webapp.validation.load_json") as mock_load,
62+
patch("os.path.exists", return_value=True),
63+
):
64+
mock_load.side_effect = (
65+
lambda path: MOCK_BASE_SCHEMA if "base" in path else MOCK_EXT_SCHEMA
66+
)
67+
with pytest.raises(HardValidationError) as exc_info:
68+
validate_file_reader(str(file_path), ["test_model"])
69+
assert "Missing required columns" in str(exc_info.value)

0 commit comments

Comments
 (0)