Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,13 @@ jobs:
PGUSER: postgres
- run: |
set -e
echo "🏗 Running alembic migrations..."
uv run alembic upgrade head

echo "🧪 Running test_models.py..."
uv run python test_models.py

echo "🔎 Discovering and running unit tests..."
uv run python -m unittest discover -s test -p "*.py" -v
env:
DATABASE_URL: postgresql://postgres:postgres@0.0.0.0:5432/jedeschule_test
35 changes: 6 additions & 29 deletions jedeschule/spiders/berlin.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,19 @@
import xml.etree.ElementTree as ET

from jedeschule.items import School
from scrapy import Item

from jedeschule.items import School
from jedeschule.spiders.school_spider import SchoolSpider
from jedeschule.wfs_basic_parsers import parse_geojson_features


class BerlinSpider(SchoolSpider):
name = "berlin"
start_urls = [
"https://gdi.berlin.de/services/wfs/schulen?SERVICE=WFS&VERSION=1.1.0&REQUEST=GetFeature&srsname=EPSG:4326&typename=fis:schulen"
"https://gdi.berlin.de/services/wfs/schulen?SERVICE=WFS&VERSION=1.1.0&REQUEST=GetFeature&srsname=EPSG:4326"
"&typename=fis:schulen&outputFormat=application/json"
]

def parse(self, response):
tree = ET.fromstring(response.body)

namespaces = {
"gml": "http://www.opengis.net/gml",
"fis": "http://www.berlin.de/broker",
}
for school in tree.find("gml:featureMembers", namespaces).findall(
"{schulen}schulen", namespaces
):
data_elem = {}
for entry in school:
if entry.tag == "{http://www.opengis.net/gml}boundedBy":
continue
if entry.tag == "{schulen}geom":
# This nested entry contains the coordinates that we would like to expand
lon, lat = entry.findtext(
"gml:Point/gml:pos", namespaces=namespaces
).split(" ")
data_elem["lat"] = lat
data_elem["lon"] = lon
continue
# strip the namespace before returning
data_elem[entry.tag.split("}", 1)[1]] = entry.text
yield data_elem
def parse(self, response, **kwargs):
yield from parse_geojson_features(response)

@staticmethod
def normalize(item: Item) -> School:
Expand Down
29 changes: 5 additions & 24 deletions jedeschule/spiders/brandenburg.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,20 @@
import xml.etree.ElementTree as ET

from scrapy import Item

from jedeschule.items import School
from jedeschule.spiders.school_spider import SchoolSpider
from jedeschule.wfs_basic_parsers import parse_geojson_features


class BrandenburgSpider(SchoolSpider):
name = "brandenburg"

start_urls = [
"https://schullandschaft.brandenburg.de/edugis/wfs/schulen?SERVICE=WFS&VERSION=1.1.0&REQUEST=GetFeature&typename=ms:Schul_Standorte&srsname=epsg:4326"
"https://schullandschaft.brandenburg.de/edugis/wfs/schulen?SERVICE=WFS&VERSION=1.1.0&REQUEST=GetFeature&typename=ms:Schul_Standorte"
"&srsname=epsg:4326&outputFormat=application/json"
]

def parse(self, response):
tree = ET.fromstring(response.body)

namespaces = {
"gml": "http://www.opengis.net/gml",
"ms": "http://mapserver.gis.umn.edu/mapserver",
}
for school in tree.findall("gml:featureMember", namespaces):
data_elem = {}
for entry in school[0]:
if entry.tag == "{http://mapserver.gis.umn.edu/mapserver}msGeometry":
# This nested entry contains the coordinates that we would like to expand
lat, lon = entry.findtext(
"gml:Point/gml:pos", namespaces=namespaces
).split(" ")
data_elem["lat"] = lat
data_elem["lon"] = lon
continue
# strip the namespace before returning
data_elem[entry.tag.split("}", 1)[1]] = entry.text
yield data_elem
def parse(self, response, **kwargs):
yield from parse_geojson_features(response)

@staticmethod
def normalize(item: Item) -> School:
Expand Down
17 changes: 17 additions & 0 deletions jedeschule/wfs_basic_parsers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import json

from scrapy.http import Response


def parse_geojson_features(response: Response):
geojson = json.loads(response.text)

for feature in geojson.get("features", []):
properties = feature.get("properties", {})
coords = feature.get("geometry", {}).get("coordinates", [])

properties["lon"] = coords[0]
properties["lat"] = coords[1]


yield properties
Empty file added test/__init__.py
Empty file.
89 changes: 89 additions & 0 deletions test/test_berlin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import unittest
from scrapy.http import TextResponse
from jedeschule.spiders.berlin import BerlinSpider


class TestBerlinSpider(unittest.TestCase):
def test_parse(self):
json_response = """
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"id": "schulen.01A04",
"geometry": {
"type": "Point",
"coordinates": [13.33391576, 52.52672359]
},
"geometry_name": "geom",
"properties": {
"bsn": "01A04",
"schulname": "Berlin-Kolleg",
"schulart": "Kolleg",
"traeger": "öffentlich",
"schultyp": "Andere Schule",
"bezirk": "Mitte",
"ortsteil": "Moabit",
"plz": "10551",
"strasse": "Turmstraße",
"hausnr": "75",
"telefon": "+49 30 901838210",
"fax": "+49 30 901838222",
"email": "sekretariat@berlin-kolleg.de",
"internet": "https://www.berlin-kolleg.de",
"schuljahr": "2024/25"
},
"bbox": [
13.33391576,
52.52672359,
13.33391576,
52.52672359
]
}
],
"totalFeatures": 925,
"numberMatched": 925,
"numberReturned": 1,
"timeStamp": "2025-06-13T14:59:35.045Z",
"crs": {
"type": "name",
"properties": {
"name": "urn:ogc:def:crs:EPSG::4326"
}
},
"bbox": [
13.33391576,
52.52672359,
13.33391576,
52.52672359
]
}
"""

spider = BerlinSpider()
response = TextResponse(
url="http://test_webserver.com",
body=json_response.encode("utf-8"),
encoding="utf-8",
)

schools = list(spider.parse(response))
self.assertEqual(len(schools), 1)

school = schools[0]
self.assertAlmostEqual(school["lon"], 13.33391576)
self.assertAlmostEqual(school["lat"], 52.52672359)
self.assertEqual(school["bsn"], "01A04")
self.assertEqual(school["schulname"], "Berlin-Kolleg")
self.assertEqual(school["plz"], "10551")
self.assertEqual(school["strasse"], "Turmstraße")
self.assertEqual(school["hausnr"], "75")
self.assertEqual(school["telefon"], "+49 30 901838210")
self.assertEqual(school["fax"], "+49 30 901838222")
self.assertEqual(school["email"], "sekretariat@berlin-kolleg.de")
self.assertEqual(school["internet"], "https://www.berlin-kolleg.de")


if __name__ == "__main__":
unittest.main()
79 changes: 79 additions & 0 deletions test/test_brandenburg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import unittest
from scrapy.http import TextResponse
import json

from jedeschule.spiders.brandenburg import BrandenburgSpider


class TestBrandenburgSpider(unittest.TestCase):

def test_parse(self):
json_text = json.dumps({
"type": "FeatureCollection",
"name": "Schul_Standorte",
"features": [
{
"type": "Feature",
"properties": {
"schul_nr": "100020",
"schulname": "Grundschule Forst Mitte",
"strasse_hausnr": "Max-Fritz-Hammer-Straße 15",
"plz": "03149",
"ort": "Forst (Lausitz)",
"telefonnummer": "(03562) 7163",
"faxnummer": "(03562) 691288",
"dienst_email": "s100020@schulen.brandenburg.de",
"homepage": "http://www.grundschule-forst-mitte.de",
"schulamtname": "Staatliches Schulamt Cottbus",
"kreis": "Spree-Neiße",
"schulform_kurzbez": "G",
"schulform": "Grundschule",
"traeger": "Gemeinde",
"schultraeger_grp": "o",
"schueler": "288 (Stand: 2022)",
"besonderheiten_sl": "(763),(561),(132),(201)",
"besonderheiten": [
"Einstiegsphase Startchancen",
"Schule mit Nutzung Schul-Cloud Brandenburg",
"verlässliche Halbtagsschule und Hort",
"FLEX - Optimierung des Schulanfangs"
],
"studienseminar": "2",
"fremdsprachen": ["Englisch"],
"fremdsprachen_sl": "(EN)",
"fremdsprachen_timestmp": "(Schuljahr: 2020/2021)"
},
"geometry": {
"type": "Point",
"coordinates": [14.651148207215728, 51.74023651973522]
}
}
]
})

spider = BrandenburgSpider()
response = TextResponse(
url="http://test_webserver.com",
body=json_text.encode("utf-8"),
encoding="utf-8",
)

results = list(spider.parse(response))

self.assertEqual(len(results), 1)
school = results[0]

self.assertAlmostEqual(school["lat"], 51.74023651973522)
self.assertAlmostEqual(school["lon"], 14.651148207215728)

self.assertEqual(school["schul_nr"], "100020")
self.assertEqual(school["schulname"], "Grundschule Forst Mitte")
self.assertEqual(school["plz"], "03149")
self.assertEqual(school["ort"], "Forst (Lausitz)")
self.assertEqual(school["dienst_email"], "s100020@schulen.brandenburg.de")
self.assertEqual(school["schulform"], "Grundschule")
self.assertEqual(school["traeger"], "Gemeinde")


if __name__ == '__main__':
unittest.main()
7 changes: 4 additions & 3 deletions test_changes.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@

set -euxo pipefail

if [ $CI ]
then
HEAD_REF=${GITHUB_REF}
git fetch origin main

if [ "${CI:-}" = "true" ]; then
HEAD_REF="${GITHUB_SHA}"
else
HEAD_REF="HEAD"
fi
Expand Down