Skip to content

Commit 3d6761e

Browse files
committed
Visualize proportion of a tag remaining unchanged after X years.
1 parent 3d58862 commit 3d6761e

1 file changed

Lines changed: 110 additions & 0 deletions

File tree

exploratory/osm_data_viz.py

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
"""
2+
Exploratory data viz script for OSM observations.
3+
4+
This script:
5+
1. Reads in the OSM observations from a CSV file.
6+
2. Creates time series plots of the observations, showing how many remain open over time.
7+
"""
8+
9+
import numpy as np
10+
import pandas as pd
11+
from pathlib import Path
12+
import plotnine as gg
13+
14+
# ----------------------------------------------------------------------------------------
15+
# Configuration constants
16+
# ----------------------------------------------------------------------------------------
17+
18+
DATA_VERSION = "20260129"
19+
SAVE_DIR = Path("~/data/openpois").expanduser() / DATA_VERSION
20+
OSM_KEYS = ["amenity", "shop", "healthcare", "leisure"]
21+
TAG_KEY = "name"
22+
END_DATE = pd.Timestamp('2025-12-31', tz = 'UTC')
23+
24+
max_days = 365*10
25+
26+
# ----------------------------------------------------------------------------------------
27+
# Main workflow
28+
# ----------------------------------------------------------------------------------------
29+
30+
if __name__ == "__main__":
31+
# Read observations
32+
timestamp_cols = ['obs_timestamp', 'last_obs_timestamp', 'last_tag_timestamp']
33+
observations_df = (pd.read_csv(SAVE_DIR / f"osm_observations_{TAG_KEY}.csv")
34+
.dropna(subset = timestamp_cols)
35+
)
36+
for timestamp_col in timestamp_cols:
37+
observations_df[timestamp_col] = pd.to_datetime(observations_df[timestamp_col])
38+
# Add a column that is 1 for the highest value of 'version' within each 'id' grouping
39+
observations_df['latest_version'] = (
40+
observations_df.groupby('id')['version'].transform(
41+
lambda x: x == x.max()
42+
).astype(int)
43+
)
44+
# Prepare timediffs in days:
45+
# t1: Time elapsed until the final confirmation of the previous tag
46+
# t2: Time elapsed from previous tag to changed tag
47+
changed_tags = (observations_df
48+
.query('changed == 1')
49+
.assign(
50+
t1 = (pd.col('last_obs_timestamp') - pd.col('last_tag_timestamp')).dt.days,
51+
t2 = (pd.col('obs_timestamp') - pd.col('last_tag_timestamp')).dt.days,
52+
t3 = np.inf # (END_DATE - pd.col('last_tag_timestamp')).dt.days
53+
)
54+
)
55+
unchanged_tags = (observations_df
56+
.query('(changed == 0) & (latest_version == 1)')
57+
.assign(
58+
t1 = (pd.col('obs_timestamp') - pd.col('last_tag_timestamp')).dt.days,
59+
t2 = np.inf, # (END_DATE - pd.col('last_tag_timestamp')).dt.days,
60+
t3 = np.inf
61+
)
62+
)
63+
# Format changes
64+
to_plot_df = pd.concat([changed_tags, unchanged_tags])
65+
# Create a plot
66+
reshaped_df = (
67+
pd.DataFrame({
68+
'yes': [np.sum(day_i < to_plot_df['t1']) for day_i in range(max_days)],
69+
'unknown': [
70+
np.sum((to_plot_df['t1'] <= day_i) & (day_i < to_plot_df['t2']))
71+
for day_i in range(max_days)
72+
],
73+
'no': [
74+
np.sum((to_plot_df['t2'] <= day_i) & (day_i < to_plot_df['t3']))
75+
for day_i in range(max_days)
76+
],
77+
})
78+
.assign(
79+
all = pd.col('yes') + pd.col('no') + pd.col('unknown'),
80+
ymin = pd.col('yes') / pd.col('all'),
81+
ymax = (pd.col('yes') + pd.col('unknown')) / pd.col('all'),
82+
year = np.arange(max_days) / 365,
83+
)
84+
)
85+
fig = (
86+
gg.ggplot(
87+
reshaped_df,
88+
gg.aes(x = 'year', ymin = 'ymin', ymax = 'ymax')) +
89+
gg.geom_ribbon(fill = 'blue', alpha = 0.4) +
90+
gg.geom_line(gg.aes(y = 'ymin'), color = 'black', alpha = 0.5) +
91+
gg.geom_line(gg.aes(y = 'ymax'), color = 'black', alpha = 0.5) +
92+
gg.labs(
93+
x = "Years from tag",
94+
y = "Proportion remaining unchanged",
95+
title = f"Proportion of `{TAG_KEY}` tags unchanged over time"
96+
) +
97+
gg.scale_y_continuous(
98+
limits = (0, 1.01),
99+
breaks = np.arange(0, 1, 0.25),
100+
labels = [f"{x*100:.0f}%" for x in np.arange(0, 1, 0.25)]
101+
) +
102+
gg.theme_bw()
103+
)
104+
fig.save(
105+
SAVE_DIR / f"osm_observations_{TAG_KEY}.png",
106+
width = 10,
107+
height = 6,
108+
units = 'in',
109+
dpi = 300,
110+
)

0 commit comments

Comments
 (0)