-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathgtfs_stops_to_geojson.py
More file actions
125 lines (103 loc) · 4 KB
/
gtfs_stops_to_geojson.py
File metadata and controls
125 lines (103 loc) · 4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import json
import sys
import logging
from collections import defaultdict
from csv_cache import CsvCache, ROUTES_FILE, TRIPS_FILE, STOP_TIMES_FILE, STOPS_FILE
from gtfs import stop_txt_is_lat_log_required
from shared.helpers.runtime_metrics import track_metrics
from shared.helpers.transform import get_safe_float
logger = logging.getLogger(__name__)
def create_routes_map(routes_data):
"""Creates a dictionary of routes from route data."""
routes = {}
for row in routes_data:
route_id = row.get("route_id")
if route_id:
routes[route_id] = row
return routes
def build_stop_to_routes(stop_times_data, trips_data):
"""Builds a mapping from stop_id to a set of route_ids."""
# Build trip_id -> route_id mapping
trip_to_route = {}
for row in trips_data:
trip_id = row.get("trip_id")
route_id = row.get("route_id")
if trip_id and route_id:
trip_to_route[trip_id] = route_id
# Build stop_id -> set of route_ids
stop_to_routes = defaultdict(set)
for row in stop_times_data:
trip_id = row.get("trip_id")
stop_id = row.get("stop_id")
if trip_id and stop_id:
route_id = trip_to_route.get(trip_id)
if route_id:
stop_to_routes[stop_id].add(route_id)
return stop_to_routes
@track_metrics(metrics=("time", "memory", "cpu"))
def convert_stops_to_geojson(csv_cache: CsvCache, output_file):
"""Converts GTFS stops data to a GeoJSON file."""
routes_map = create_routes_map(csv_cache.get_file(ROUTES_FILE))
stop_to_routes = build_stop_to_routes(
csv_cache.get_file(STOP_TIMES_FILE), csv_cache.get_file(TRIPS_FILE)
)
features = []
for row in csv_cache.get_file(STOPS_FILE):
stop_id = row.get("stop_id")
if not stop_id:
continue
if (
"stop_lat" not in row
or "stop_lon" not in row
or get_safe_float(row, "stop_lat") is None
or get_safe_float(row, "stop_lon") is None
):
if stop_txt_is_lat_log_required(row):
logger.warning(
"Missing coordinates for stop_id {%s}, skipping.", stop_id
)
continue
# Routes serving this stop
route_ids = sorted(stop_to_routes.get(stop_id, []))
route_colors = [
routes_map[r].get("route_color", "") for r in route_ids if r in routes_map
]
try:
stop_lon = float(row["stop_lon"])
stop_lat = float(row["stop_lat"])
except (ValueError, TypeError):
logger.warning(f"Invalid coordinates for stop_id {stop_id}, skipping.")
continue
feature = {
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [stop_lon, stop_lat],
},
"properties": {
"stop_id": stop_id,
"stop_code": row.get("stop_code", ""),
"stop_name": row.get("stop_name", ""),
"stop_desc": row.get("stop_desc", ""),
"zone_id": row.get("zone_id", ""),
"stop_url": row.get("stop_url", ""),
"wheelchair_boarding": row.get("wheelchair_boarding", ""),
"location_type": row.get("location_type", ""),
"route_ids": route_ids,
"route_colors": route_colors,
},
}
features.append(feature)
geojson = {"type": "FeatureCollection", "features": features}
with open(output_file, "w", encoding="utf-8") as f:
json.dump(geojson, f, indent=2, ensure_ascii=False)
logger.info(f"✅ GeoJSON file saved to {output_file} with {len(features)} stops")
if __name__ == "__main__":
if len(sys.argv) != 6:
logger.info(
"Usage: python script.py stops stop_times trips routes output.geojson"
)
sys.exit(1)
convert_stops_to_geojson(
sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5]
)