Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 18 additions & 9 deletions src/webapp/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import logging
import tempfile
from contextlib import contextmanager
from functools import lru_cache, partial
from functools import lru_cache
from typing import (
Any,
BinaryIO,
Expand All @@ -46,6 +46,19 @@
# Type for PDP converter functions (DataFrame -> DataFrame); used for cohort/course.
PDPConverterFunc = Optional[Callable[[pd.DataFrame], pd.DataFrame]]


def _default_pdp_course_duplicate_converter(df: pd.DataFrame) -> pd.DataFrame:
"""
PDP course duplicate cleanup for read_raw_pdp_course_data.

Passes the schema selector as the second *positional* argument so this works
with current edvise (``schema_type``) and older builds that used the same slot
for ``school_type``. Do not pass bare ``handling_duplicates`` as a converter:
read_raw_pdp_course_data calls ``converter_func(df)`` with a single argument.
"""
return handling_duplicates(df, "pdp")


# --------------------------------------------------------------------------- #
# Logging
# --------------------------------------------------------------------------- #
Expand Down Expand Up @@ -867,9 +880,8 @@ def _read_pdp_course_edvise(

Tries each datetime format with each converter. If a custom
course_converter_func is provided (e.g. from a school), it is tried first;
then the default handling_duplicates(..., school_type="pdp"), then
handling_duplicates for older edvise. Raises HardValidationError if all
attempts fail.
then :func:`_default_pdp_course_duplicate_converter` (``handling_duplicates``
with PDP settings). Raises HardValidationError if all attempts fail.

Args:
path: Path to course CSV.
Expand All @@ -882,10 +894,7 @@ def _read_pdp_course_edvise(
Raises:
HardValidationError: If no (converter, format) pair succeeded.
"""
default_converters = (
partial(handling_duplicates, school_type="pdp"),
handling_duplicates,
)
default_converters = (_default_pdp_course_duplicate_converter,)
converters = (
(course_converter_func,) if course_converter_func is not None else ()
) + default_converters
Expand All @@ -903,7 +912,7 @@ def _read_pdp_course_edvise(
except ValueError as e:
last_error = e
except TypeError as e:
if "school_type" in str(e):
if "school_type" in str(e) or "schema_type" in str(e):
last_error = None
break
raise
Expand Down
18 changes: 11 additions & 7 deletions src/webapp/validation_pdp_read_path_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,20 +366,24 @@ def test_read_pdp_course_edvise_all_attempts_fail_raises_hard_validation_error()
)


def test_read_pdp_course_edvise_typeerror_school_type_tries_next_converter() -> None:
"""When first converter raises TypeError with school_type, second converter is tried."""
def test_read_pdp_course_edvise_falls_back_after_custom_converter_fails() -> None:
"""When custom converter fails all datetime formats, default PDP converter is used."""
expected = pd.DataFrame({"course_id": ["c1"]})
with patch(
"src.webapp.validation.read_raw_pdp_course_data",
side_effect=[
TypeError(
"handling_duplicates() got an unexpected keyword argument 'school_type'"
),
ValueError("bad datetime"),
ValueError("bad datetime"),
ValueError("bad datetime"),
expected,
],
):
result = _read_pdp_course_edvise("/path.csv")
) as mock_read:
result = _read_pdp_course_edvise(
"/path.csv",
course_converter_func=lambda df: df, # noqa: ARG005
)
pd.testing.assert_frame_equal(result, expected)
assert mock_read.call_count == 4


def test_read_pdp_course_edvise_custom_converter_tried_first() -> None:
Expand Down
Loading