Skip to content

Commit c0c80d9

Browse files
committed
Add simple pytorch model for time-invariant lambda.
1 parent 89c3503 commit c0c80d9

1 file changed

Lines changed: 115 additions & 0 deletions

File tree

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
"""
2+
PyTorch model testing
3+
4+
Created February 12, 2026
5+
Purpose: Explore a simple empirical Bayes PyTorch model framework for change data
6+
7+
Reads data prepared in `osm/format_tabular.py`
8+
"""
9+
10+
import numpy as np
11+
import pandas as pd
12+
import torch
13+
import torchmin
14+
import plotnine as gg
15+
from pathlib import Path
16+
17+
# Globals
18+
DATA_VERSION = "20260129"
19+
MODEL_VERSION = "20260212"
20+
DATA_DIR = Path("~/data/openpois").expanduser() / DATA_VERSION
21+
MODEL_DIR = Path("~/data/openpois").expanduser() / MODEL_VERSION
22+
TAG_KEY = "name"
23+
24+
# Load data
25+
observations_df = pd.read_csv(DATA_DIR / f"osm_observations_{TAG_KEY}.csv")
26+
27+
# Ensure model directory exists
28+
MODEL_DIR.mkdir(parents = True, exist_ok = True)
29+
30+
# Device setup
31+
DTYPE = torch.float64
32+
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
33+
print("Running on", DEVICE)
34+
torch.set_default_device(DEVICE)
35+
36+
37+
## Input data preparation --------------------------------------------------------------->
38+
39+
timestamp_cols = ['obs_timestamp', 'last_obs_timestamp', 'last_tag_timestamp']
40+
for timestamp_col in timestamp_cols:
41+
observations_df[timestamp_col] = pd.to_datetime(observations_df[timestamp_col])
42+
observations_df = observations_df.assign(
43+
tag_days = (pd.col('obs_timestamp') - pd.col('last_tag_timestamp')).dt.days,
44+
tag_years = pd.col('tag_days') / 365
45+
)
46+
obs_sub = (observations_df
47+
.dropna(subset = ['tag_years', 'changed'])
48+
.query('tag_years > 1e-6')
49+
)
50+
51+
52+
## Define model ------------------------------------------------------------------------->
53+
54+
# Only parameters need requires_grad=True; data tensors must not, or memory explodes
55+
X = torch.tensor(obs_sub[['tag_years']].values, dtype=DTYPE, device=DEVICE)
56+
y = torch.tensor(obs_sub['changed'].values, dtype=DTYPE, device=DEVICE)
57+
58+
# Estimand: lambda, the rate parameter that is always positive
59+
omega = torch.tensor(
60+
np.array([0.0]),
61+
dtype=DTYPE,
62+
device=DEVICE,
63+
requires_grad=True,
64+
)
65+
66+
# Small epsilon to avoid log(0) and log(1-p) = -inf -> NaN
67+
def nll_torchmin(params, y, X, DELTA = 1e-6, EPSILON = 1e-7):
68+
log_lambda = params[0].clamp(-20.0, 20.0) # keep lambda in [2e-9, 5e8]
69+
lambda_ = torch.exp(log_lambda)
70+
# X is (n,1); ensure positive so p is in (0,1)
71+
x = X.clamp(min = DELTA)
72+
p = (
73+
(1.0 - torch.exp(-lambda_ * x))
74+
.squeeze(-1)
75+
.clamp(min = EPSILON, max = 1.0 - EPSILON)
76+
)
77+
ll = torch.sum(y * torch.log(p) + (1.0 - y) * torch.log(1.0 - p))
78+
return -ll
79+
80+
model_fit = torchmin.minimize(
81+
fun = lambda params: nll_torchmin(params = params, y = y, X = X),
82+
x0 = omega,
83+
method = 'l-bfgs',
84+
tol = 1e-5,
85+
disp = True,
86+
)
87+
88+
# Prepare model results
89+
hessian_ = torch.autograd.functional.hessian(
90+
lambda params: nll_torchmin(params, y, X),
91+
model_fit.x
92+
)
93+
se_torch_ = torch.sqrt(torch.linalg.diagonal(torch.linalg.inv(hessian_)))
94+
95+
m1 = pd.DataFrame({
96+
'parameter': ['log_lambda'],
97+
'estimate': model_fit.x.data.cpu().numpy(),
98+
'std_err': se_torch_.data.cpu().numpy(),
99+
})
100+
m2 = (
101+
m1
102+
.copy()
103+
.assign(
104+
parameter = 'lambda',
105+
estimate = np.exp(pd.col('estimate')),
106+
std_err = pd.col('estimate') * pd.col('std_err')
107+
)
108+
)
109+
model_results = pd.concat([m1, m2])
110+
111+
112+
## Run model and save results ----------------------------------------------------------->
113+
114+
model_results.to_csv(MODEL_DIR / f"fitted_params_{TAG_KEY}.csv", index = False)
115+
torch.save(model_fit, MODEL_DIR / f"fitted_params_{TAG_KEY}.pt")

0 commit comments

Comments
 (0)