|
| 1 | +import io |
| 2 | +import logging |
| 3 | +from typing import Tuple, Optional |
| 4 | + |
| 5 | +import flask |
| 6 | +import pandas as pd |
| 7 | +import requests |
| 8 | +from jsonpath_ng import parse |
| 9 | + |
| 10 | + |
| 11 | +def parse_request_parameters( |
| 12 | + request: flask.Request, |
| 13 | +) -> Tuple[pd.DataFrame, str, Optional[str], str, str]: |
| 14 | + """ |
| 15 | + Parse the request parameters and return a DataFrame with the stops data. |
| 16 | + @:returns Tuple: A tuple containing the stops DataFrame, stable ID, and dataset ID. |
| 17 | + """ |
| 18 | + logging.info("Parsing request parameters.") |
| 19 | + request_json = request.get_json(silent=True) |
| 20 | + logging.info(f"Request JSON: {request_json}") |
| 21 | + |
| 22 | + if ( |
| 23 | + not request_json |
| 24 | + or ( |
| 25 | + ("stops_url" not in request_json or "dataset_id" not in request_json) and |
| 26 | + "station_information_url" not in request_json and |
| 27 | + "vehicle_status_url" not in request_json |
| 28 | + ) |
| 29 | + or "stable_id" not in request_json |
| 30 | + ): |
| 31 | + raise ValueError( |
| 32 | + "Missing required parameters: [stops_url, dataset_id | station_information_url | vehicle_status_url], " |
| 33 | + "stable_id." |
| 34 | + ) |
| 35 | + |
| 36 | + data_type = request_json.get("data_type", "gtfs") |
| 37 | + logging.info(f"Data type: {data_type}") |
| 38 | + if data_type == "gtfs": |
| 39 | + df, stable_id, dataset_id, url = parse_request_parameters_gtfs(request_json) |
| 40 | + elif data_type == "gbfs": |
| 41 | + df, stable_id, dataset_id, url = parse_request_parameters_gbfs(request_json) |
| 42 | + else: |
| 43 | + raise ValueError( |
| 44 | + f"Invalid data_type '{data_type}'. Supported types are 'gtfs' and 'gbfs'." |
| 45 | + ) |
| 46 | + return df, stable_id, dataset_id, data_type, url |
| 47 | + |
| 48 | + |
| 49 | +def parse_request_parameters_gtfs(request_json: dict) -> Tuple[pd.DataFrame, str, Optional[str], str]: |
| 50 | + """ Parse the request parameters for GTFS data. """ |
| 51 | + if ( |
| 52 | + not request_json |
| 53 | + or "stops_url" not in request_json |
| 54 | + or "stable_id" not in request_json |
| 55 | + or "dataset_id" not in request_json |
| 56 | + ): |
| 57 | + raise ValueError( |
| 58 | + "Invalid request: missing 'stops_url', 'dataset_id' or 'stable_id' parameter." |
| 59 | + ) |
| 60 | + |
| 61 | + stable_id = request_json["stable_id"] |
| 62 | + dataset_id = request_json["dataset_id"] |
| 63 | + |
| 64 | + # Read the stops from the URL |
| 65 | + try: |
| 66 | + s = requests.get(request_json["stops_url"]).content |
| 67 | + stops_df = pd.read_csv(io.StringIO(s.decode("utf-8"))) |
| 68 | + except Exception as e: |
| 69 | + raise ValueError( |
| 70 | + f"Error reading stops from URL {request_json['stops_url']}: {e}" |
| 71 | + ) |
| 72 | + return stops_df, stable_id, dataset_id, request_json["stops_url"] |
| 73 | + |
| 74 | + |
| 75 | +def parse_station_information_url(station_information_url) -> pd.DataFrame: |
| 76 | + """ Parse the station information URL and return a DataFrame with the stops' data. """ |
| 77 | + response = requests.get(station_information_url) |
| 78 | + response.raise_for_status() |
| 79 | + data = response.json() |
| 80 | + |
| 81 | + lat_expr = parse('data.stations[*].lat') |
| 82 | + lon_expr = parse('data.stations[*].lon') |
| 83 | + station_id_expr = parse('data.stations[*].station_id') |
| 84 | + |
| 85 | + lats = [match.value for match in lat_expr.find(data)] |
| 86 | + lons = [match.value for match in lon_expr.find(data)] |
| 87 | + station_ids = [match.value for match in station_id_expr.find(data)] |
| 88 | + |
| 89 | + stations_info = [ |
| 90 | + {"station_id": sid, "stop_lat": lat, "stop_lon": lon} |
| 91 | + for sid, lat, lon in zip(station_ids, lats, lons) |
| 92 | + ] |
| 93 | + return pd.DataFrame(stations_info) |
| 94 | + |
| 95 | + |
| 96 | +def parse_vehicle_status_url(vehicle_status_url) -> pd.DataFrame: |
| 97 | + """ Parse the vehicle status URL and return a DataFrame with vehicle_id, lat, and lon. """ |
| 98 | + response = requests.get(vehicle_status_url) |
| 99 | + response.raise_for_status() |
| 100 | + data = response.json() |
| 101 | + |
| 102 | + lat_expr = parse('data.vehicles[*].lat') |
| 103 | + lon_expr = parse('data.vehicles[*].lon') |
| 104 | + vehicle_id_expr = parse('data.vehicles[*].vehicle_id') |
| 105 | + |
| 106 | + lats = [match.value for match in lat_expr.find(data)] |
| 107 | + lons = [match.value for match in lon_expr.find(data)] |
| 108 | + vehicle_ids = [match.value for match in vehicle_id_expr.find(data)] |
| 109 | + |
| 110 | + vehicles_info = [ |
| 111 | + {"vehicle_id": vid, "stop_lat": lat, "stop_lon": lon} |
| 112 | + for vid, lat, lon in zip(vehicle_ids, lats, lons) |
| 113 | + ] |
| 114 | + |
| 115 | + return pd.DataFrame(vehicles_info) |
| 116 | + |
| 117 | + |
| 118 | +def parse_request_parameters_gbfs(request_json: dict) -> Tuple[pd.DataFrame, str, Optional[str], str]: |
| 119 | + """ Parse the request parameters for GBFS data. """ |
| 120 | + if ( |
| 121 | + not request_json |
| 122 | + or ("station_information_url" not in request_json and "vehicle_status_url" not in request_json) |
| 123 | + or "stable_id" not in request_json |
| 124 | + ): |
| 125 | + raise ValueError( |
| 126 | + "Invalid request: missing ['station_information_url' | 'vehicle_status_url'], 'dataset_id' or 'stable_id' " |
| 127 | + "parameter." |
| 128 | + ) |
| 129 | + |
| 130 | + stable_id = request_json["stable_id"] |
| 131 | + station_information_url = request_json.get("station_information_url") |
| 132 | + vehicle_status_url = request_json.get("vehicle_status_url") |
| 133 | + if station_information_url: |
| 134 | + logging.info('Parsing station information URL') |
| 135 | + stops_df = parse_station_information_url(station_information_url) |
| 136 | + else: |
| 137 | + logging.info('Parsing vehicle status URL') |
| 138 | + stops_df = parse_vehicle_status_url(vehicle_status_url) |
| 139 | + return stops_df, stable_id, None, station_information_url or vehicle_status_url |
| 140 | + |
0 commit comments