diff --git a/app/main.py b/app/main.py index c989c2a..84650d9 100644 --- a/app/main.py +++ b/app/main.py @@ -126,6 +126,8 @@ def custom_openapi(): "The schools are scraped as part of the [jedeschule.de](https://jedeschule.de) project." "You can find the source code for the [API](https://github.com/codeforberlin/jedeschule-api) and " "the [scrapers](https://github.com/datenschule/jedeschule-scraper) on Github. \n\n" + "Each school may include `state_key` when the scraper has stored it (same " + "Bundesland codes as the `state` filter).\n\n" "For more information about the available endpoints, check the documentation below.", routes=app.routes, ) diff --git a/app/models.py b/app/models.py index 9c0a519..a08daea 100644 --- a/app/models.py +++ b/app/models.py @@ -8,6 +8,8 @@ class School(Base): __tablename__ = 'schools' id = Column(String, primary_key=True) + # ISO 3166-2:DE code without DE- prefix; set by jedeschule-scraper (Land spider `state_key`). + state_key = Column(String, nullable=True) name = Column(String) address = Column(String) address2 = Column(String) diff --git a/app/schemas.py b/app/schemas.py index 0953d94..cad210e 100644 --- a/app/schemas.py +++ b/app/schemas.py @@ -5,9 +5,12 @@ from typing import Optional, List from geoalchemy2.shape import to_shape -from pydantic import ConfigDict, BaseModel +from pydantic import BaseModel, ConfigDict, Field + +from sqlalchemy.inspection import inspect as sa_inspect from app import models +from app.state_key import parse_state_key_column class State(Enum): @@ -31,6 +34,16 @@ class State(Enum): class School(BaseModel): id: str + state_key: Optional[State] = Field( + default=None, + description=( + "ISO 3166-2:DE subdivision code without the DE- prefix (same vocabulary as the `state` " + "query filter). Stored in the database from the Land scraper that produced the row " + "(same code used when composing `id`); identifies which JedeSchule feed produced the row " + "(not coordinates). Aligns with pipeline-declared `state_key` for split-by-Land jobs. " + "Omitted when unset or not a known code." + ), + ) name: str address: str address2: Optional[str] = None @@ -52,12 +65,15 @@ class School(BaseModel): @staticmethod def from_db(db_entry: models.School) -> School: - if not db_entry.location: - return School.model_validate(db_entry) - shape = to_shape(db_entry.location) - school = School.model_validate(db_entry) - school.latitude = shape.y - school.longitude = shape.x + insp = sa_inspect(db_entry) + cols = {c.key: getattr(db_entry, c.key) for c in insp.mapper.column_attrs} + cols.pop("location", None) + cols["state_key"] = parse_state_key_column(cols.get("state_key")) + school = School.model_validate(cols) + if db_entry.location: + shape = to_shape(db_entry.location) + school.latitude = shape.y + school.longitude = shape.x return school diff --git a/app/state_key.py b/app/state_key.py new file mode 100644 index 0000000..5ebb152 --- /dev/null +++ b/app/state_key.py @@ -0,0 +1,23 @@ +"""Validate `schools.state_key` from the database (set by jedeschule-scraper per Land).""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from app.schemas import State + + +def parse_state_key_column(raw: str | None) -> Optional[State]: + """Map stored ``state_key`` text to :class:`State`, or ``None`` if missing/invalid.""" + from app.schemas import State as StateEnum + + if raw is None: + return None + s = raw.strip() + if len(s) != 2: + return None + try: + return StateEnum(s) + except ValueError: + return None diff --git a/test/factory.py b/test/factory.py index 895b869..3bc7d49 100644 --- a/test/factory.py +++ b/test/factory.py @@ -16,6 +16,8 @@ def _get_state(n): return states[n % len(states)] id = factory.Sequence(lambda n: "{}-{}".format(SchoolFactory._get_state(n), n)) + # Same Land code the jedeschule-scraper sets on each spider (matches id prefix). + state_key = factory.Sequence(lambda n: SchoolFactory._get_state(n)) name = factory.Sequence(lambda n: 'School %d' % n) address = factory.Faker('address') @@ -23,6 +25,7 @@ def _get_state(n): def get_full_school(): return School( id="NW-112586", + state_key="NW", name="Städt. Gem. Grundschule - Primarstufe -", address="Pfälzer Str. 30-34", city="Köln", diff --git a/test/test_api.py b/test/test_api.py index a34376f..54cec91 100644 --- a/test/test_api.py +++ b/test/test_api.py @@ -10,6 +10,8 @@ from app.main import app, get_db from app.database import Base from app.models import School +from app.schemas import State +from app.state_key import parse_state_key_column from test.factory import SchoolFactory, get_full_school engine = create_engine(os.environ.get("DATABASE_URL_TEST")) @@ -43,6 +45,13 @@ def client() -> Generator: yield c +def test_parse_state_key_column(): + assert parse_state_key_column("NW") == State.NW + assert parse_state_key_column("XX") is None + assert parse_state_key_column(None) is None + assert parse_state_key_column("") is None + + class TestStats: def test_stats(self, client, db): ni_schools = [School(id=f"NI-{i}", update_timestamp=datetime(2025, 1, 1)) for i in range(10)] @@ -176,6 +185,7 @@ def test_schools_json_default_without_raw(self, client, db): assert response.json() == [ { "id": "NW-112586", + "state_key": "NW", "name": "Städt. Gem. Grundschule - Primarstufe -", "address": "Pfälzer Str. 30-34", "city": "Köln", @@ -201,6 +211,7 @@ def test_schools_json_with_raw(self, client, db): assert response.json() == [ { "id": "NW-112586", + "state_key": "NW", "name": "Städt. Gem. Grundschule - Primarstufe -", "address": "Pfälzer Str. 30-34", "city": "Köln", @@ -244,6 +255,7 @@ def test_schools_json_exposes_lat_and_lon(self, client, db): # Arrange school = SchoolFactory( id=f"NW-100010", + state_key="NW", location="SRID=4326;POINT(6.897017373118707 50.94217152830834)", name="Gymnasium Claudia Agrippina Privat schule als priv.Ersatzsch. d. Sek.I u.II im Aufbau d. CAPS Privatschu gGmbH", address="Stolberger Str. 112", @@ -258,6 +270,7 @@ def test_schools_json_exposes_lat_and_lon(self, client, db): # Assert assert response.json() == { "id": "NW-100010", + "state_key": "NW", "name": "Gymnasium Claudia Agrippina Privat schule als priv.Ersatzsch. d. Sek.I u.II im Aufbau d. CAPS Privatschu gGmbH", "address": "Stolberger Str. 112", "latitude": 50.94217152830834, @@ -380,3 +393,23 @@ def test_get_single_no_result(self, client, db): # Assert assert response.status_code == 404 + def test_school_malformed_id_omits_state_key(self, client, db): + db.add(SchoolFactory(id="XX-12345", name="Unknown prefix", state_key=None)) + db.commit() + + response = client.get("/schools/XX-12345") + + assert response.status_code == 200 + body = response.json() + assert body["id"] == "XX-12345" + assert "state_key" not in body + + def test_school_id_without_hyphen_omits_state_key(self, client, db): + db.add(SchoolFactory(id="nohyphen", name="No dash", state_key=None)) + db.commit() + + response = client.get("/schools/nohyphen") + + assert response.status_code == 200 + assert "state_key" not in response.json() +