|
| 1 | +-- SpatialBench queries (DuckDB dialect), from sedona-spatialbench DuckDBSpatialBenchBenchmark |
| 2 | +-- (spatialbench-queries/print_queries.py). Query logic is unchanged, only reformatted for readability |
| 3 | +-- and numbered Q1..Q12 (canonical order). The harness splits the file on semicolons, so a comment |
| 4 | +-- must never contain one. |
| 5 | + |
| 6 | +-- Q1: trips starting within 50km of Sedona city center, ordered by distance. |
| 7 | +SELECT |
| 8 | + t.t_tripkey, |
| 9 | + ST_X(t.t_pickuploc) AS pickup_lon, |
| 10 | + ST_Y(t.t_pickuploc) AS pickup_lat, |
| 11 | + t.t_pickuptime, |
| 12 | + ST_Distance(t.t_pickuploc, ST_GeomFromText('POINT (-111.7610 34.8697)')) AS distance_to_center |
| 13 | +FROM trip t |
| 14 | +WHERE ST_DWithin(t.t_pickuploc, ST_GeomFromText('POINT (-111.7610 34.8697)'), 0.45) |
| 15 | +ORDER BY distance_to_center ASC, t.t_tripkey ASC; |
| 16 | + |
| 17 | +-- Q2: count trips starting within the Coconino County (Arizona) zone. |
| 18 | +SELECT COUNT(*) AS trip_count_in_coconino_county |
| 19 | +FROM trip t |
| 20 | +WHERE ST_Intersects( |
| 21 | + t.t_pickuploc, |
| 22 | + (SELECT z.z_boundary FROM zone z WHERE z.z_name = 'Coconino County' LIMIT 1) |
| 23 | +); |
| 24 | + |
| 25 | +-- Q3: monthly trip statistics within 15km of Sedona city center (10km bbox + 5km buffer). |
| 26 | +SELECT |
| 27 | + DATE_TRUNC('month', t.t_pickuptime) AS pickup_month, |
| 28 | + COUNT(t.t_tripkey) AS total_trips, |
| 29 | + AVG(t.t_distance) AS avg_distance, |
| 30 | + AVG(t.t_dropofftime - t.t_pickuptime) AS avg_duration, |
| 31 | + AVG(t.t_fare) AS avg_fare |
| 32 | +FROM trip t |
| 33 | +WHERE ST_DWithin( |
| 34 | + t.t_pickuploc, |
| 35 | + ST_GeomFromText('POLYGON((-111.9060 34.7347, -111.6160 34.7347, -111.6160 35.0047, -111.9060 35.0047, -111.9060 34.7347))'), |
| 36 | + 0.045 |
| 37 | +) |
| 38 | +GROUP BY pickup_month |
| 39 | +ORDER BY pickup_month; |
| 40 | + |
| 41 | +-- Q4: zone distribution of the top 1000 trips by tip amount. |
| 42 | +SELECT z.z_zonekey, z.z_name, COUNT(*) AS trip_count |
| 43 | +FROM zone z |
| 44 | +JOIN ( |
| 45 | + SELECT t.t_pickuploc |
| 46 | + FROM trip t |
| 47 | + ORDER BY t.t_tip DESC, t.t_tripkey ASC |
| 48 | + LIMIT 1000 |
| 49 | +) top_trips ON ST_Within(top_trips.t_pickuploc, z.z_boundary) |
| 50 | +GROUP BY z.z_zonekey, z.z_name |
| 51 | +ORDER BY trip_count DESC, z.z_zonekey ASC; |
| 52 | + |
| 53 | +-- Q5: monthly travel patterns for repeat customers (convex hull of dropoff locations). |
| 54 | +SELECT |
| 55 | + c.c_custkey, |
| 56 | + c.c_name AS customer_name, |
| 57 | + DATE_TRUNC('month', t.t_pickuptime) AS pickup_month, |
| 58 | + ST_Area(ST_ConvexHull(ST_Collect(ARRAY_AGG(t.t_dropoffloc)))) AS monthly_travel_hull_area, |
| 59 | + COUNT(*) AS dropoff_count |
| 60 | +FROM trip t |
| 61 | +JOIN customer c ON t.t_custkey = c.c_custkey |
| 62 | +GROUP BY c.c_custkey, c.c_name, pickup_month |
| 63 | +HAVING dropoff_count > 5 |
| 64 | +ORDER BY dropoff_count DESC, c.c_custkey ASC; |
| 65 | + |
| 66 | +-- Q6: zone statistics for trips intersecting a bounding box. |
| 67 | +SELECT |
| 68 | + z.z_zonekey, |
| 69 | + z.z_name, |
| 70 | + COUNT(t.t_tripkey) AS total_pickups, |
| 71 | + AVG(t.t_totalamount) AS avg_distance, |
| 72 | + AVG(t.t_dropofftime - t.t_pickuptime) AS avg_duration |
| 73 | +FROM trip t, zone z |
| 74 | +WHERE ST_Intersects( |
| 75 | + ST_GeomFromText('POLYGON((-112.2110 34.4197, -111.3110 34.4197, -111.3110 35.3197, -112.2110 35.3197, -112.2110 34.4197))'), |
| 76 | + z.z_boundary |
| 77 | +) |
| 78 | +AND ST_Within(t.t_pickuploc, z.z_boundary) |
| 79 | +GROUP BY z.z_zonekey, z.z_name |
| 80 | +ORDER BY total_pickups DESC, z.z_zonekey ASC; |
| 81 | + |
| 82 | +-- Q7: detect potential route detours by comparing reported vs. geometric distances. |
| 83 | +WITH trip_lengths AS ( |
| 84 | + SELECT |
| 85 | + t.t_tripkey, |
| 86 | + t.t_distance AS reported_distance_m, |
| 87 | + ST_Length(ST_MakeLine(t.t_pickuploc, t.t_dropoffloc)) / 0.000009 AS line_distance_m |
| 88 | + FROM trip t |
| 89 | +) |
| 90 | +SELECT |
| 91 | + t.t_tripkey, |
| 92 | + t.reported_distance_m, |
| 93 | + t.line_distance_m, |
| 94 | + t.reported_distance_m / NULLIF(t.line_distance_m, 0) AS detour_ratio |
| 95 | +FROM trip_lengths t |
| 96 | +ORDER BY detour_ratio DESC NULLS LAST, reported_distance_m DESC, t_tripkey ASC; |
| 97 | + |
| 98 | +-- Q8: count nearby pickups for each building within ~500m. |
| 99 | +SELECT b.b_buildingkey, b.b_name, COUNT(*) AS nearby_pickup_count |
| 100 | +FROM trip t |
| 101 | +JOIN building b ON ST_DWithin(t.t_pickuploc, b.b_boundary, 0.0045) |
| 102 | +GROUP BY b.b_buildingkey, b.b_name |
| 103 | +ORDER BY nearby_pickup_count DESC, b.b_buildingkey ASC; |
| 104 | + |
| 105 | +-- Q9: building conflation (duplicate/overlap detection via IoU). |
| 106 | +WITH b1 AS ( |
| 107 | + SELECT b_buildingkey AS id, b_boundary AS geom FROM building |
| 108 | +), |
| 109 | +b2 AS ( |
| 110 | + SELECT b_buildingkey AS id, b_boundary AS geom FROM building |
| 111 | +), |
| 112 | +pairs AS ( |
| 113 | + SELECT |
| 114 | + b1.id AS building_1, |
| 115 | + b2.id AS building_2, |
| 116 | + ST_Area(b1.geom) AS area1, |
| 117 | + ST_Area(b2.geom) AS area2, |
| 118 | + ST_Area(ST_Intersection(b1.geom, b2.geom)) AS overlap_area |
| 119 | + FROM b1 |
| 120 | + JOIN b2 ON b1.id < b2.id AND ST_Intersects(b1.geom, b2.geom) |
| 121 | +) |
| 122 | +SELECT |
| 123 | + building_1, |
| 124 | + building_2, |
| 125 | + area1, |
| 126 | + area2, |
| 127 | + overlap_area, |
| 128 | + CASE |
| 129 | + WHEN overlap_area = 0 THEN 0.0 |
| 130 | + WHEN (area1 + area2 - overlap_area) = 0 THEN 1.0 |
| 131 | + ELSE overlap_area / (area1 + area2 - overlap_area) |
| 132 | + END AS iou |
| 133 | +FROM pairs |
| 134 | +ORDER BY iou DESC, building_1 ASC, building_2 ASC; |
| 135 | + |
| 136 | +-- Q10: zone statistics for trips starting within each zone. |
| 137 | +SELECT |
| 138 | + z.z_zonekey, |
| 139 | + z.z_name AS pickup_zone, |
| 140 | + AVG(t.t_dropofftime - t.t_pickuptime) AS avg_duration, |
| 141 | + AVG(t.t_distance) AS avg_distance, |
| 142 | + COUNT(t.t_tripkey) AS num_trips |
| 143 | +FROM zone z |
| 144 | +LEFT JOIN trip t ON ST_Within(t.t_pickuploc, z.z_boundary) |
| 145 | +GROUP BY z.z_zonekey, z.z_name |
| 146 | +ORDER BY avg_duration DESC NULLS LAST, z.z_zonekey ASC; |
| 147 | + |
| 148 | +-- Q11: count trips that cross between different zones. |
| 149 | +SELECT COUNT(*) AS cross_zone_trip_count |
| 150 | +FROM trip t |
| 151 | +JOIN zone pickup_zone ON ST_Within(t.t_pickuploc, pickup_zone.z_boundary) |
| 152 | +JOIN zone dropoff_zone ON ST_Within(t.t_dropoffloc, dropoff_zone.z_boundary) |
| 153 | +WHERE pickup_zone.z_zonekey != dropoff_zone.z_zonekey; |
| 154 | + |
| 155 | +-- Q12: five nearest buildings per trip pickup (CROSS JOIN LATERAL, since DuckDB spatial has no ST_KNN). |
| 156 | +SELECT |
| 157 | + t.t_tripkey, |
| 158 | + t.t_pickuploc, |
| 159 | + nb.b_buildingkey, |
| 160 | + nb.building_name, |
| 161 | + nb.distance_to_building |
| 162 | +FROM trip t |
| 163 | +CROSS JOIN LATERAL ( |
| 164 | + SELECT |
| 165 | + b.b_buildingkey, |
| 166 | + b.b_name AS building_name, |
| 167 | + ST_Distance(t.t_pickuploc, b.b_boundary) AS distance_to_building |
| 168 | + FROM building b |
| 169 | + ORDER BY distance_to_building |
| 170 | + LIMIT 5 |
| 171 | +) AS nb |
| 172 | +ORDER BY nb.distance_to_building, nb.b_buildingkey; |
0 commit comments