Skip to content

Commit 3208f39

Browse files
committed
Reworked hamburg spider + test
1 parent fd46106 commit 3208f39

3 files changed

Lines changed: 117 additions & 27 deletions

File tree

jedeschule/spiders/hamburg.py

Lines changed: 14 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,30 @@
1-
import xml.etree.ElementTree as ET
2-
31
from scrapy import Item
42

5-
from jedeschule.spiders.school_spider import SchoolSpider
63
from jedeschule.items import School
4+
from jedeschule.spiders.school_spider import SchoolSpider
5+
from jedeschule.wfs_basic_parsers import parse_geojson_features
76

87

98
class HamburgSpider(SchoolSpider):
109
name = "hamburg"
1110

1211
start_urls = [
13-
"https://geodienste.hamburg.de/HH_WFS_Schulen?SERVICE=WFS&VERSION=1.1.0&REQUEST=GetFeature&typename=de.hh.up:nicht_staatliche_schulen,de.hh.up:staatliche_schulen&srsname=EPSG:4326"
12+
"https://api.hamburg.de/datasets/v1/schulen/collections/staatliche_schulen/items"
13+
"?crs=http://www.opengis.net/def/crs/EPSG/0/4326"
14+
"&limit=1000",
15+
"https://api.hamburg.de/datasets/v1/schulen/collections/nicht_staatliche_schulen/items"
16+
"?crs=http://www.opengis.net/def/crs/EPSG/0/4326"
17+
"&limit=1000"
1418
]
1519

16-
17-
def parse(self, response):
18-
namespaces = {
19-
"gml": "http://www.opengis.net/gml",
20+
custom_settings = {
21+
"DEFAULT_REQUEST_HEADERS": {
22+
"Accept": "application/geo+json, application/json, */*"
2023
}
24+
}
2125

22-
elem = ET.fromstring(response.body)
23-
24-
for member in elem:
25-
data_elem = {}
26-
for attr in member[0]:
27-
if attr.tag == "{https://registry.gdi-de.org/id/de.hh.up}the_geom":
28-
# This nested entry contains the coordinates that we would like to expand
29-
lon, lat = attr.findtext(
30-
"gml:Point/gml:pos", namespaces=namespaces
31-
).split(" ")
32-
data_elem["lat"] = lat
33-
data_elem["lon"] = lon
34-
continue
35-
# strip the namespace before returning
36-
data_elem[attr.tag.split("}", 1)[1]] = attr.text
37-
yield data_elem
26+
def parse(self, response, **kwargs):
27+
yield from parse_geojson_features(response, invert=True)
3828

3929
@staticmethod
4030
def normalize(item: Item) -> School:

jedeschule/wfs_basic_parsers.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,19 @@
33
from scrapy.http import Response
44

55

6-
def parse_geojson_features(response: Response):
6+
def parse_geojson_features(response: Response, invert: bool = False):
77
geojson = json.loads(response.text)
88

99
for feature in geojson.get("features", []):
1010
properties = feature.get("properties", {})
1111
coords = feature.get("geometry", {}).get("coordinates", [])
1212

13-
properties["lon"] = coords[0]
14-
properties["lat"] = coords[1]
13+
if not invert:
14+
properties["lon"] = coords[0]
15+
properties["lat"] = coords[1]
16+
else:
17+
properties["lat"] = coords[0]
18+
properties["lon"] = coords[1]
1519

1620

1721
yield properties

test/test_hamburg.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
import unittest
2+
3+
from scrapy.http import TextResponse
4+
5+
from jedeschule.spiders.hamburg import HamburgSpider
6+
7+
8+
class TestHamburgSpider(unittest.TestCase):
9+
def test_parse(self):
10+
json_response = """
11+
{
12+
"type": "FeatureCollection",
13+
"numberReturned": 1,
14+
"numberMatched": 453,
15+
"timeStamp": "2025-07-13T16:55:57Z",
16+
"crs": "http://www.opengis.net/def/crs/EPSG/0/4326",
17+
"features": [
18+
{
19+
"type": "Feature",
20+
"geometry": {
21+
"type": "Point",
22+
"coordinates": [
23+
53.601522503676144,
24+
10.047106063058099
25+
]
26+
},
27+
"properties": {
28+
"abschluss": "Allgemeine Hochschulreife|erster allgemeinbildender Schulabschluss|Erweiterter erster allgemeinbildender Schulabschluss|mittlerer Schulabschluss|schulischer Teil der Fachhochschulreife",
29+
"adresse_ort": "22307 Hamburg",
30+
"adresse_strasse_hausnr": "Benzenbergweg 2",
31+
"ansprechp_klasse_5": "Nadine Kalsow",
32+
"ansprechp_buero": "Janka Gierck",
33+
"anzahl_schueler": 996,
34+
"anzahl_schueler_gesamt": "1261 an 2 Standorten",
35+
"bezirk": "Hamburg-Nord",
36+
"fax": "+49 40 428 88 15 22",
37+
"fremdsprache": "Englisch|Französisch|Spanisch|Spanisch",
38+
"fremdsprache_mit_klasse": "Englisch ab Klasse 5|Französisch ab Klasse 7|Spanisch ab Klasse 11|Spanisch ab Klasse 7",
39+
"ganztagsform": "GTS teilweise gebunden",
40+
"is_rebbz": "true",
41+
"kapitelbezeichnung": "Stadtteilschulen",
42+
"lgv_standortk_erwachsenenbildung": "No",
43+
"name_schulleiter": "Bianca Thies",
44+
"name_stellv_schulleiter": "Christian Pape",
45+
"name_oberstufenkoordinator": "Frau Scheuermann-Andersen *49 40 428 88 15-61",
46+
"name_verwaltungsleitung": "Grit Sobottka",
47+
"rebbz_homepage": "http://rebbz-winterhude.hamburg.de/",
48+
"rechtsform": "staatlich",
49+
"schueleranzahl_schuljahr": "2024",
50+
"schul_email": "stadtteilschule-helmuth-huebener@bsb.hamburg.de",
51+
"schul_homepage": "https://helmuthhuebener.de",
52+
"schul_id": "5043-0",
53+
"schul_telefonnr": "+49 40 428 88 15 0",
54+
"schulaufsicht": "Christine Zopff",
55+
"schulform": "Stadtteilschule",
56+
"schulinspektion_link": "https://www.hamburg.de/politik-und-verwaltung/behoerden/schulbehoerde/themen/schulaufsicht/inspektionsberichte/weiterfuehrende-schulen-hamburg-nord",
57+
"schulname": "Stadtteilschule Helmuth Hübener",
58+
"schultyp": "Hauptstandort",
59+
"sozialindex": "Stufe 2",
60+
"stadtteil": "Barmbek-Nord",
61+
"standort_id": "431",
62+
"zuegigkeit_kl_5": "7",
63+
"zustaendiges_rebbz": "ReBBZ Winterhude"
64+
},
65+
"id": 874864
66+
}
67+
],
68+
"links": []
69+
}
70+
"""
71+
72+
spider = HamburgSpider()
73+
response = TextResponse(
74+
url="http://test_webserver.com",
75+
body=json_response.encode("utf-8"),
76+
encoding="utf-8",
77+
)
78+
79+
schools = list(spider.parse(response))
80+
self.assertEqual(len(schools), 1)
81+
82+
school = schools[0]
83+
self.assertAlmostEqual(school["lon"], 10.047106063058099)
84+
self.assertAlmostEqual(school["lat"], 53.601522503676144)
85+
self.assertEqual(school["schul_id"], "5043-0")
86+
self.assertEqual(school["schulname"], "Stadtteilschule Helmuth Hübener")
87+
self.assertEqual(school["adresse_ort"], "22307 Hamburg")
88+
self.assertEqual(school["adresse_strasse_hausnr"], "Benzenbergweg 2")
89+
self.assertEqual(school["schul_telefonnr"], "+49 40 428 88 15 0")
90+
self.assertEqual(school["fax"], "+49 40 428 88 15 22")
91+
self.assertEqual(school["schul_email"], "stadtteilschule-helmuth-huebener@bsb.hamburg.de")
92+
self.assertEqual(school["schul_homepage"], "https://helmuthhuebener.de")
93+
94+
95+
if __name__ == "__main__":
96+
unittest.main()

0 commit comments

Comments
 (0)