diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 24c5c20..7b1d098 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -40,7 +40,13 @@ jobs: PGUSER: postgres - run: | set -e + echo "🏗 Running alembic migrations..." uv run alembic upgrade head + + echo "🧪 Running test_models.py..." uv run python test_models.py + + echo "🔎 Discovering and running unit tests..." + uv run python -m unittest discover -s test -p "*.py" -v env: DATABASE_URL: postgresql://postgres:postgres@0.0.0.0:5432/jedeschule_test diff --git a/jedeschule/spiders/berlin.py b/jedeschule/spiders/berlin.py index 055cc60..cda1111 100644 --- a/jedeschule/spiders/berlin.py +++ b/jedeschule/spiders/berlin.py @@ -1,42 +1,19 @@ -import xml.etree.ElementTree as ET - -from jedeschule.items import School from scrapy import Item +from jedeschule.items import School from jedeschule.spiders.school_spider import SchoolSpider +from jedeschule.wfs_basic_parsers import parse_geojson_features class BerlinSpider(SchoolSpider): name = "berlin" start_urls = [ - "https://gdi.berlin.de/services/wfs/schulen?SERVICE=WFS&VERSION=1.1.0&REQUEST=GetFeature&srsname=EPSG:4326&typename=fis:schulen" + "https://gdi.berlin.de/services/wfs/schulen?SERVICE=WFS&VERSION=1.1.0&REQUEST=GetFeature&srsname=EPSG:4326" + "&typename=fis:schulen&outputFormat=application/json" ] - def parse(self, response): - tree = ET.fromstring(response.body) - - namespaces = { - "gml": "http://www.opengis.net/gml", - "fis": "http://www.berlin.de/broker", - } - for school in tree.find("gml:featureMembers", namespaces).findall( - "{schulen}schulen", namespaces - ): - data_elem = {} - for entry in school: - if entry.tag == "{http://www.opengis.net/gml}boundedBy": - continue - if entry.tag == "{schulen}geom": - # This nested entry contains the coordinates that we would like to expand - lon, lat = entry.findtext( - "gml:Point/gml:pos", namespaces=namespaces - ).split(" ") - data_elem["lat"] = lat - data_elem["lon"] = lon - continue - # strip the namespace before returning - data_elem[entry.tag.split("}", 1)[1]] = entry.text - yield data_elem + def parse(self, response, **kwargs): + yield from parse_geojson_features(response) @staticmethod def normalize(item: Item) -> School: diff --git a/jedeschule/spiders/brandenburg.py b/jedeschule/spiders/brandenburg.py index 1e1c67c..90e5b7f 100644 --- a/jedeschule/spiders/brandenburg.py +++ b/jedeschule/spiders/brandenburg.py @@ -1,39 +1,20 @@ -import xml.etree.ElementTree as ET - from scrapy import Item from jedeschule.items import School from jedeschule.spiders.school_spider import SchoolSpider +from jedeschule.wfs_basic_parsers import parse_geojson_features class BrandenburgSpider(SchoolSpider): name = "brandenburg" start_urls = [ - "https://schullandschaft.brandenburg.de/edugis/wfs/schulen?SERVICE=WFS&VERSION=1.1.0&REQUEST=GetFeature&typename=ms:Schul_Standorte&srsname=epsg:4326" + "https://schullandschaft.brandenburg.de/edugis/wfs/schulen?SERVICE=WFS&VERSION=1.1.0&REQUEST=GetFeature&typename=ms:Schul_Standorte" + "&srsname=epsg:4326&outputFormat=application/json" ] - def parse(self, response): - tree = ET.fromstring(response.body) - - namespaces = { - "gml": "http://www.opengis.net/gml", - "ms": "http://mapserver.gis.umn.edu/mapserver", - } - for school in tree.findall("gml:featureMember", namespaces): - data_elem = {} - for entry in school[0]: - if entry.tag == "{http://mapserver.gis.umn.edu/mapserver}msGeometry": - # This nested entry contains the coordinates that we would like to expand - lat, lon = entry.findtext( - "gml:Point/gml:pos", namespaces=namespaces - ).split(" ") - data_elem["lat"] = lat - data_elem["lon"] = lon - continue - # strip the namespace before returning - data_elem[entry.tag.split("}", 1)[1]] = entry.text - yield data_elem + def parse(self, response, **kwargs): + yield from parse_geojson_features(response) @staticmethod def normalize(item: Item) -> School: diff --git a/jedeschule/wfs_basic_parsers.py b/jedeschule/wfs_basic_parsers.py new file mode 100644 index 0000000..9ce3bcf --- /dev/null +++ b/jedeschule/wfs_basic_parsers.py @@ -0,0 +1,17 @@ +import json + +from scrapy.http import Response + + +def parse_geojson_features(response: Response): + geojson = json.loads(response.text) + + for feature in geojson.get("features", []): + properties = feature.get("properties", {}) + coords = feature.get("geometry", {}).get("coordinates", []) + + properties["lon"] = coords[0] + properties["lat"] = coords[1] + + + yield properties diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/test_berlin.py b/test/test_berlin.py new file mode 100644 index 0000000..fae2a20 --- /dev/null +++ b/test/test_berlin.py @@ -0,0 +1,89 @@ +import unittest +from scrapy.http import TextResponse +from jedeschule.spiders.berlin import BerlinSpider + + +class TestBerlinSpider(unittest.TestCase): + def test_parse(self): + json_response = """ + { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "id": "schulen.01A04", + "geometry": { + "type": "Point", + "coordinates": [13.33391576, 52.52672359] + }, + "geometry_name": "geom", + "properties": { + "bsn": "01A04", + "schulname": "Berlin-Kolleg", + "schulart": "Kolleg", + "traeger": "öffentlich", + "schultyp": "Andere Schule", + "bezirk": "Mitte", + "ortsteil": "Moabit", + "plz": "10551", + "strasse": "Turmstraße", + "hausnr": "75", + "telefon": "+49 30 901838210", + "fax": "+49 30 901838222", + "email": "sekretariat@berlin-kolleg.de", + "internet": "https://www.berlin-kolleg.de", + "schuljahr": "2024/25" + }, + "bbox": [ + 13.33391576, + 52.52672359, + 13.33391576, + 52.52672359 + ] + } + ], + "totalFeatures": 925, + "numberMatched": 925, + "numberReturned": 1, + "timeStamp": "2025-06-13T14:59:35.045Z", + "crs": { + "type": "name", + "properties": { + "name": "urn:ogc:def:crs:EPSG::4326" + } + }, + "bbox": [ + 13.33391576, + 52.52672359, + 13.33391576, + 52.52672359 + ] + } + """ + + spider = BerlinSpider() + response = TextResponse( + url="http://test_webserver.com", + body=json_response.encode("utf-8"), + encoding="utf-8", + ) + + schools = list(spider.parse(response)) + self.assertEqual(len(schools), 1) + + school = schools[0] + self.assertAlmostEqual(school["lon"], 13.33391576) + self.assertAlmostEqual(school["lat"], 52.52672359) + self.assertEqual(school["bsn"], "01A04") + self.assertEqual(school["schulname"], "Berlin-Kolleg") + self.assertEqual(school["plz"], "10551") + self.assertEqual(school["strasse"], "Turmstraße") + self.assertEqual(school["hausnr"], "75") + self.assertEqual(school["telefon"], "+49 30 901838210") + self.assertEqual(school["fax"], "+49 30 901838222") + self.assertEqual(school["email"], "sekretariat@berlin-kolleg.de") + self.assertEqual(school["internet"], "https://www.berlin-kolleg.de") + + +if __name__ == "__main__": + unittest.main() diff --git a/test/test_brandenburg.py b/test/test_brandenburg.py new file mode 100644 index 0000000..91c8538 --- /dev/null +++ b/test/test_brandenburg.py @@ -0,0 +1,79 @@ +import unittest +from scrapy.http import TextResponse +import json + +from jedeschule.spiders.brandenburg import BrandenburgSpider + + +class TestBrandenburgSpider(unittest.TestCase): + + def test_parse(self): + json_text = json.dumps({ + "type": "FeatureCollection", + "name": "Schul_Standorte", + "features": [ + { + "type": "Feature", + "properties": { + "schul_nr": "100020", + "schulname": "Grundschule Forst Mitte", + "strasse_hausnr": "Max-Fritz-Hammer-Straße 15", + "plz": "03149", + "ort": "Forst (Lausitz)", + "telefonnummer": "(03562) 7163", + "faxnummer": "(03562) 691288", + "dienst_email": "s100020@schulen.brandenburg.de", + "homepage": "http://www.grundschule-forst-mitte.de", + "schulamtname": "Staatliches Schulamt Cottbus", + "kreis": "Spree-Neiße", + "schulform_kurzbez": "G", + "schulform": "Grundschule", + "traeger": "Gemeinde", + "schultraeger_grp": "o", + "schueler": "288 (Stand: 2022)", + "besonderheiten_sl": "(763),(561),(132),(201)", + "besonderheiten": [ + "Einstiegsphase Startchancen", + "Schule mit Nutzung Schul-Cloud Brandenburg", + "verlässliche Halbtagsschule und Hort", + "FLEX - Optimierung des Schulanfangs" + ], + "studienseminar": "2", + "fremdsprachen": ["Englisch"], + "fremdsprachen_sl": "(EN)", + "fremdsprachen_timestmp": "(Schuljahr: 2020/2021)" + }, + "geometry": { + "type": "Point", + "coordinates": [14.651148207215728, 51.74023651973522] + } + } + ] + }) + + spider = BrandenburgSpider() + response = TextResponse( + url="http://test_webserver.com", + body=json_text.encode("utf-8"), + encoding="utf-8", + ) + + results = list(spider.parse(response)) + + self.assertEqual(len(results), 1) + school = results[0] + + self.assertAlmostEqual(school["lat"], 51.74023651973522) + self.assertAlmostEqual(school["lon"], 14.651148207215728) + + self.assertEqual(school["schul_nr"], "100020") + self.assertEqual(school["schulname"], "Grundschule Forst Mitte") + self.assertEqual(school["plz"], "03149") + self.assertEqual(school["ort"], "Forst (Lausitz)") + self.assertEqual(school["dienst_email"], "s100020@schulen.brandenburg.de") + self.assertEqual(school["schulform"], "Grundschule") + self.assertEqual(school["traeger"], "Gemeinde") + + +if __name__ == '__main__': + unittest.main() diff --git a/test_changes.sh b/test_changes.sh index 235af33..229fc34 100755 --- a/test_changes.sh +++ b/test_changes.sh @@ -2,9 +2,10 @@ set -euxo pipefail -if [ $CI ] -then - HEAD_REF=${GITHUB_REF} +git fetch origin main + +if [ "${CI:-}" = "true" ]; then + HEAD_REF="${GITHUB_SHA}" else HEAD_REF="HEAD" fi