1+ """
2+ Exploratory data viz script for OSM observations.
3+
4+ This script:
5+ 1. Reads in the OSM observations from a CSV file.
6+ 2. Creates time series plots of the observations, showing how many remain open over time.
7+ """
8+
9+ import numpy as np
10+ import pandas as pd
11+ from pathlib import Path
12+ import plotnine as gg
13+
14+ # ----------------------------------------------------------------------------------------
15+ # Configuration constants
16+ # ----------------------------------------------------------------------------------------
17+
18+ DATA_VERSION = "20260129"
19+ SAVE_DIR = Path ("~/data/openpois" ).expanduser () / DATA_VERSION
20+ OSM_KEYS = ["amenity" , "shop" , "healthcare" , "leisure" ]
21+ TAG_KEY = "name"
22+ END_DATE = pd .Timestamp ('2025-12-31' , tz = 'UTC' )
23+
24+ max_days = 365 * 10
25+
26+ # ----------------------------------------------------------------------------------------
27+ # Main workflow
28+ # ----------------------------------------------------------------------------------------
29+
30+ if __name__ == "__main__" :
31+ # Read observations
32+ timestamp_cols = ['obs_timestamp' , 'last_obs_timestamp' , 'last_tag_timestamp' ]
33+ observations_df = (pd .read_csv (SAVE_DIR / f"osm_observations_{ TAG_KEY } .csv" )
34+ .dropna (subset = timestamp_cols )
35+ )
36+ for timestamp_col in timestamp_cols :
37+ observations_df [timestamp_col ] = pd .to_datetime (observations_df [timestamp_col ])
38+ # Add a column that is 1 for the highest value of 'version' within each 'id' grouping
39+ observations_df ['latest_version' ] = (
40+ observations_df .groupby ('id' )['version' ].transform (
41+ lambda x : x == x .max ()
42+ ).astype (int )
43+ )
44+ # Prepare timediffs in days:
45+ # t1: Time elapsed until the final confirmation of the previous tag
46+ # t2: Time elapsed from previous tag to changed tag
47+ changed_tags = (observations_df
48+ .query ('changed == 1' )
49+ .assign (
50+ t1 = (pd .col ('last_obs_timestamp' ) - pd .col ('last_tag_timestamp' )).dt .days ,
51+ t2 = (pd .col ('obs_timestamp' ) - pd .col ('last_tag_timestamp' )).dt .days ,
52+ t3 = np .inf # (END_DATE - pd.col('last_tag_timestamp')).dt.days
53+ )
54+ )
55+ unchanged_tags = (observations_df
56+ .query ('(changed == 0) & (latest_version == 1)' )
57+ .assign (
58+ t1 = (pd .col ('obs_timestamp' ) - pd .col ('last_tag_timestamp' )).dt .days ,
59+ t2 = np .inf , # (END_DATE - pd.col('last_tag_timestamp')).dt.days,
60+ t3 = np .inf
61+ )
62+ )
63+ # Format changes
64+ to_plot_df = pd .concat ([changed_tags , unchanged_tags ])
65+ # Create a plot
66+ reshaped_df = (
67+ pd .DataFrame ({
68+ 'yes' : [np .sum (day_i < to_plot_df ['t1' ]) for day_i in range (max_days )],
69+ 'unknown' : [
70+ np .sum ((to_plot_df ['t1' ] <= day_i ) & (day_i < to_plot_df ['t2' ]))
71+ for day_i in range (max_days )
72+ ],
73+ 'no' : [
74+ np .sum ((to_plot_df ['t2' ] <= day_i ) & (day_i < to_plot_df ['t3' ]))
75+ for day_i in range (max_days )
76+ ],
77+ })
78+ .assign (
79+ all = pd .col ('yes' ) + pd .col ('no' ) + pd .col ('unknown' ),
80+ ymin = pd .col ('yes' ) / pd .col ('all' ),
81+ ymax = (pd .col ('yes' ) + pd .col ('unknown' )) / pd .col ('all' ),
82+ year = np .arange (max_days ) / 365 ,
83+ )
84+ )
85+ fig = (
86+ gg .ggplot (
87+ reshaped_df ,
88+ gg .aes (x = 'year' , ymin = 'ymin' , ymax = 'ymax' )) +
89+ gg .geom_ribbon (fill = 'blue' , alpha = 0.4 ) +
90+ gg .geom_line (gg .aes (y = 'ymin' ), color = 'black' , alpha = 0.5 ) +
91+ gg .geom_line (gg .aes (y = 'ymax' ), color = 'black' , alpha = 0.5 ) +
92+ gg .labs (
93+ x = "Years from tag" ,
94+ y = "Proportion remaining unchanged" ,
95+ title = f"Proportion of `{ TAG_KEY } ` tags unchanged over time"
96+ ) +
97+ gg .scale_y_continuous (
98+ limits = (0 , 1.01 ),
99+ breaks = np .arange (0 , 1 , 0.25 ),
100+ labels = [f"{ x * 100 :.0f} %" for x in np .arange (0 , 1 , 0.25 )]
101+ ) +
102+ gg .theme_bw ()
103+ )
104+ fig .save (
105+ SAVE_DIR / f"osm_observations_{ TAG_KEY } .png" ,
106+ width = 10 ,
107+ height = 6 ,
108+ units = 'in' ,
109+ dpi = 300 ,
110+ )
0 commit comments