diff --git a/src/webapp/validation.py b/src/webapp/validation.py index b146cb79..418ba95a 100644 --- a/src/webapp/validation.py +++ b/src/webapp/validation.py @@ -19,7 +19,7 @@ import logging import tempfile from contextlib import contextmanager -from functools import lru_cache, partial +from functools import lru_cache from typing import ( Any, BinaryIO, @@ -46,6 +46,19 @@ # Type for PDP converter functions (DataFrame -> DataFrame); used for cohort/course. PDPConverterFunc = Optional[Callable[[pd.DataFrame], pd.DataFrame]] + +def _default_pdp_course_duplicate_converter(df: pd.DataFrame) -> pd.DataFrame: + """ + PDP course duplicate cleanup for read_raw_pdp_course_data. + + Passes the schema selector as the second *positional* argument so this works + with current edvise (``schema_type``) and older builds that used the same slot + for ``school_type``. Do not pass bare ``handling_duplicates`` as a converter: + read_raw_pdp_course_data calls ``converter_func(df)`` with a single argument. + """ + return handling_duplicates(df, "pdp") + + # --------------------------------------------------------------------------- # # Logging # --------------------------------------------------------------------------- # @@ -867,9 +880,8 @@ def _read_pdp_course_edvise( Tries each datetime format with each converter. If a custom course_converter_func is provided (e.g. from a school), it is tried first; - then the default handling_duplicates(..., school_type="pdp"), then - handling_duplicates for older edvise. Raises HardValidationError if all - attempts fail. + then :func:`_default_pdp_course_duplicate_converter` (``handling_duplicates`` + with PDP settings). Raises HardValidationError if all attempts fail. Args: path: Path to course CSV. @@ -882,10 +894,7 @@ def _read_pdp_course_edvise( Raises: HardValidationError: If no (converter, format) pair succeeded. """ - default_converters = ( - partial(handling_duplicates, school_type="pdp"), - handling_duplicates, - ) + default_converters = (_default_pdp_course_duplicate_converter,) converters = ( (course_converter_func,) if course_converter_func is not None else () ) + default_converters @@ -903,7 +912,7 @@ def _read_pdp_course_edvise( except ValueError as e: last_error = e except TypeError as e: - if "school_type" in str(e): + if "school_type" in str(e) or "schema_type" in str(e): last_error = None break raise diff --git a/src/webapp/validation_pdp_read_path_test.py b/src/webapp/validation_pdp_read_path_test.py index 909b271b..5d532e1a 100644 --- a/src/webapp/validation_pdp_read_path_test.py +++ b/src/webapp/validation_pdp_read_path_test.py @@ -366,20 +366,24 @@ def test_read_pdp_course_edvise_all_attempts_fail_raises_hard_validation_error() ) -def test_read_pdp_course_edvise_typeerror_school_type_tries_next_converter() -> None: - """When first converter raises TypeError with school_type, second converter is tried.""" +def test_read_pdp_course_edvise_falls_back_after_custom_converter_fails() -> None: + """When custom converter fails all datetime formats, default PDP converter is used.""" expected = pd.DataFrame({"course_id": ["c1"]}) with patch( "src.webapp.validation.read_raw_pdp_course_data", side_effect=[ - TypeError( - "handling_duplicates() got an unexpected keyword argument 'school_type'" - ), + ValueError("bad datetime"), + ValueError("bad datetime"), + ValueError("bad datetime"), expected, ], - ): - result = _read_pdp_course_edvise("/path.csv") + ) as mock_read: + result = _read_pdp_course_edvise( + "/path.csv", + course_converter_func=lambda df: df, # noqa: ARG005 + ) pd.testing.assert_frame_equal(result, expected) + assert mock_read.call_count == 4 def test_read_pdp_course_edvise_custom_converter_tried_first() -> None: