datakind · vishpillai123 · Mar 23, 2026 · Mar 19, 2026
diff --git a/src/webapp/validation.py b/src/webapp/validation.py
@@ -19,7 +19,7 @@
 import logging
 import tempfile
 from contextlib import contextmanager
-from functools import lru_cache, partial
+from functools import lru_cache
 from typing import (
     Any,
     BinaryIO,
@@ -46,6 +46,19 @@
 # Type for PDP converter functions (DataFrame -> DataFrame); used for cohort/course.
 PDPConverterFunc = Optional[Callable[[pd.DataFrame], pd.DataFrame]]
 
+
+def _default_pdp_course_duplicate_converter(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    PDP course duplicate cleanup for read_raw_pdp_course_data.
+
+    Passes the schema selector as the second *positional* argument so this works
+    with current edvise (``schema_type``) and older builds that used the same slot
+    for ``school_type``. Do not pass bare ``handling_duplicates`` as a converter:
+    read_raw_pdp_course_data calls ``converter_func(df)`` with a single argument.
+    """
+    return handling_duplicates(df, "pdp")
+
+
 # --------------------------------------------------------------------------- #
 # Logging
 # --------------------------------------------------------------------------- #
@@ -867,9 +880,8 @@ def _read_pdp_course_edvise(
 
     Tries each datetime format with each converter. If a custom
     course_converter_func is provided (e.g. from a school), it is tried first;
-    then the default handling_duplicates(..., school_type="pdp"), then
-    handling_duplicates for older edvise. Raises HardValidationError if all
-    attempts fail.
+    then :func:`_default_pdp_course_duplicate_converter` (``handling_duplicates``
+    with PDP settings). Raises HardValidationError if all attempts fail.
 
     Args:
         path: Path to course CSV.
@@ -882,10 +894,7 @@ def _read_pdp_course_edvise(
     Raises:
         HardValidationError: If no (converter, format) pair succeeded.
     """
-    default_converters = (
-        partial(handling_duplicates, school_type="pdp"),
-        handling_duplicates,
-    )
+    default_converters = (_default_pdp_course_duplicate_converter,)
     converters = (
         (course_converter_func,) if course_converter_func is not None else ()
     ) + default_converters
@@ -903,7 +912,7 @@ def _read_pdp_course_edvise(
             except ValueError as e:
                 last_error = e
             except TypeError as e:
-                if "school_type" in str(e):
+                if "school_type" in str(e) or "schema_type" in str(e):
                     last_error = None
                     break
                 raise

diff --git a/src/webapp/validation_pdp_read_path_test.py b/src/webapp/validation_pdp_read_path_test.py
@@ -366,20 +366,24 @@ def test_read_pdp_course_edvise_all_attempts_fail_raises_hard_validation_error()
     )
 
 
-def test_read_pdp_course_edvise_typeerror_school_type_tries_next_converter() -> None:
-    """When first converter raises TypeError with school_type, second converter is tried."""
+def test_read_pdp_course_edvise_falls_back_after_custom_converter_fails() -> None:
+    """When custom converter fails all datetime formats, default PDP converter is used."""
     expected = pd.DataFrame({"course_id": ["c1"]})
     with patch(
         "src.webapp.validation.read_raw_pdp_course_data",
         side_effect=[
-            TypeError(
-                "handling_duplicates() got an unexpected keyword argument 'school_type'"
-            ),
+            ValueError("bad datetime"),
+            ValueError("bad datetime"),
+            ValueError("bad datetime"),
             expected,
         ],
-    ):
-        result = _read_pdp_course_edvise("/path.csv")
+    ) as mock_read:
+        result = _read_pdp_course_edvise(
+            "/path.csv",
+            course_converter_func=lambda df: df,  # noqa: ARG005
+        )
     pd.testing.assert_frame_equal(result, expected)
+    assert mock_read.call_count == 4
 
 
 def test_read_pdp_course_edvise_custom_converter_tried_first() -> None: