Skip to content

Commit 0b33d64

Browse files
committed
Update exploratory script to use central functons.
1 parent ca6dc1e commit 0b33d64

1 file changed

Lines changed: 72 additions & 54 deletions

File tree

exploratory/osm_data_viz.py

Lines changed: 72 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -9,26 +9,57 @@
99
import numpy as np
1010
import pandas as pd
1111
from pathlib import Path
12+
13+
import matplotlib
14+
matplotlib.use("Agg")
1215
import plotnine as gg
1316

17+
from openpois.osm.change_plots import change_plot_create, change_multiplot_create
18+
1419
# ----------------------------------------------------------------------------------------
1520
# Configuration constants
1621
# ----------------------------------------------------------------------------------------
1722

1823
DATA_VERSION = "20260129"
1924
SAVE_DIR = Path("~/data/openpois").expanduser() / DATA_VERSION
25+
VIZ_DIR = SAVE_DIR / "viz"
2026
OSM_KEYS = ["amenity", "shop", "healthcare", "leisure"]
2127
TAG_KEY = "name"
2228
END_DATE = pd.Timestamp('2025-12-31', tz = 'UTC')
2329

24-
max_days = 365*10
30+
max_days = 365 * 10
31+
VIZ_DIR.mkdir(parents = True, exist_ok = True)
32+
33+
# ----------------------------------------------------------------------------------------
34+
# Plotting functions
35+
# ----------------------------------------------------------------------------------------
36+
37+
def fig_save(
38+
fig: gg.ggplot, stub: str, width: float = 10, height: float = 6, **kwargs
39+
) -> None:
40+
"""
41+
Helper function to save a ggplot figure
42+
"""
43+
fig.save(
44+
filename = VIZ_DIR / f"{stub}.png",
45+
width = width,
46+
height = height,
47+
units = 'in',
48+
dpi = 300,
49+
verbose = False,
50+
**kwargs
51+
)
52+
return None
53+
2554

2655
# ----------------------------------------------------------------------------------------
2756
# Main workflow
2857
# ----------------------------------------------------------------------------------------
2958

3059
if __name__ == "__main__":
3160
# Read observations
61+
# Drop the first observation for each POI (when the POI was first added) - the last
62+
# observation timestamp will be missing for these rows
3263
timestamp_cols = ['obs_timestamp', 'last_obs_timestamp', 'last_tag_timestamp']
3364
observations_df = (pd.read_csv(SAVE_DIR / f"osm_observations_{TAG_KEY}.csv")
3465
.dropna(subset = timestamp_cols)
@@ -42,69 +73,56 @@
4273
).astype(int)
4374
)
4475
# Prepare timediffs in days:
45-
# t1: Time elapsed until the final confirmation of the previous tag
46-
# t2: Time elapsed from previous tag to changed tag
76+
# no_change: Time elapsed until the final confirmation of the previous tag
77+
# change: Time elapsed from previous tag to changed tag
78+
# final_obs: Time elapsed from previous tag to data download
4779
changed_tags = (observations_df
4880
.query('changed == 1')
4981
.assign(
50-
t1 = (pd.col('last_obs_timestamp') - pd.col('last_tag_timestamp')).dt.days,
51-
t2 = (pd.col('obs_timestamp') - pd.col('last_tag_timestamp')).dt.days,
52-
t3 = np.inf # (END_DATE - pd.col('last_tag_timestamp')).dt.days
82+
no_change = (
83+
pd.col('last_obs_timestamp') - pd.col('last_tag_timestamp')
84+
).dt.days,
85+
change = (pd.col('obs_timestamp') - pd.col('last_tag_timestamp')).dt.days,
86+
final_obs = (END_DATE - pd.col('last_tag_timestamp')).dt.days
5387
)
5488
)
5589
unchanged_tags = (observations_df
5690
.query('(changed == 0) & (latest_version == 1)')
5791
.assign(
58-
t1 = (pd.col('obs_timestamp') - pd.col('last_tag_timestamp')).dt.days,
59-
t2 = np.inf, # (END_DATE - pd.col('last_tag_timestamp')).dt.days,
60-
t3 = np.inf
92+
no_change = (pd.col('obs_timestamp') - pd.col('last_tag_timestamp')).dt.days,
93+
change = np.inf,
94+
final_obs = (END_DATE - pd.col('last_tag_timestamp')).dt.days
6195
)
6296
)
6397
# Format changes
6498
to_plot_df = pd.concat([changed_tags, unchanged_tags])
65-
# Create a plot
66-
reshaped_df = (
67-
pd.DataFrame({
68-
'yes': [np.sum(day_i < to_plot_df['t1']) for day_i in range(max_days)],
69-
'unknown': [
70-
np.sum((to_plot_df['t1'] <= day_i) & (day_i < to_plot_df['t2']))
71-
for day_i in range(max_days)
72-
],
73-
'no': [
74-
np.sum((to_plot_df['t2'] <= day_i) & (day_i < to_plot_df['t3']))
75-
for day_i in range(max_days)
76-
],
77-
})
78-
.assign(
79-
all = pd.col('yes') + pd.col('no') + pd.col('unknown'),
80-
ymin = pd.col('yes') / pd.col('all'),
81-
ymax = (pd.col('yes') + pd.col('unknown')) / pd.col('all'),
82-
year = np.arange(max_days) / 365,
83-
)
99+
# Create a plot for all tags
100+
fig = change_plot_create(
101+
observations = to_plot_df,
102+
no_change_col = 'no_change',
103+
change_col = 'change',
104+
final_observation_col = 'final_obs',
105+
day_range = max_days,
106+
title = f"Stability of the `{TAG_KEY}` tag over time",
107+
x_label = "Years since tag",
108+
y_label = "Proportion remaining unchanged",
84109
)
85-
fig = (
86-
gg.ggplot(
87-
reshaped_df,
88-
gg.aes(x = 'year', ymin = 'ymin', ymax = 'ymax')) +
89-
gg.geom_ribbon(fill = 'blue', alpha = 0.4) +
90-
gg.geom_line(gg.aes(y = 'ymin'), color = 'black', alpha = 0.5) +
91-
gg.geom_line(gg.aes(y = 'ymax'), color = 'black', alpha = 0.5) +
92-
gg.labs(
93-
x = "Years from tag",
94-
y = "Proportion remaining unchanged",
95-
title = f"Proportion of `{TAG_KEY}` tags unchanged over time"
96-
) +
97-
gg.scale_y_continuous(
98-
limits = (0, 1.01),
99-
breaks = np.arange(0, 1, 0.25),
100-
labels = [f"{x*100:.0f}%" for x in np.arange(0, 1, 0.25)]
101-
) +
102-
gg.theme_bw()
103-
)
104-
fig.save(
105-
SAVE_DIR / f"osm_observations_{TAG_KEY}.png",
106-
width = 10,
107-
height = 6,
108-
units = 'in',
109-
dpi = 300,
110-
)
110+
fig_save(fig, stub = f"osm_changes_{TAG_KEY}_all")
111+
112+
# Create multi-panel plots for the top tags in each OSM category
113+
for subtype in OSM_KEYS:
114+
fig = change_multiplot_create(
115+
observations = to_plot_df,
116+
col = subtype,
117+
top_n = 9,
118+
no_change_col = 'no_change',
119+
change_col = 'change',
120+
final_observation_col = 'final_obs',
121+
day_range = max_days,
122+
)
123+
fig_save(
124+
fig = fig,
125+
stub = f"osm_changes_{TAG_KEY}_{subtype}",
126+
height = 12,
127+
width = 12
128+
)

0 commit comments

Comments
 (0)