1+ """
2+ PyTorch model testing
3+
4+ Created February 12, 2026
5+ Purpose: Explore a simple empirical Bayes PyTorch model framework for change data
6+
7+ Reads data prepared in `osm/format_tabular.py`
8+ """
9+
10+ import numpy as np
11+ import pandas as pd
12+ import torch
13+ import torchmin
14+ import plotnine as gg
15+ from pathlib import Path
16+
17+ # Globals
18+ DATA_VERSION = "20260129"
19+ MODEL_VERSION = "20260212"
20+ DATA_DIR = Path ("~/data/openpois" ).expanduser () / DATA_VERSION
21+ MODEL_DIR = Path ("~/data/openpois" ).expanduser () / MODEL_VERSION
22+ TAG_KEY = "name"
23+
24+ # Load data
25+ observations_df = pd .read_csv (DATA_DIR / f"osm_observations_{ TAG_KEY } .csv" )
26+
27+ # Ensure model directory exists
28+ MODEL_DIR .mkdir (parents = True , exist_ok = True )
29+
30+ # Device setup
31+ DTYPE = torch .float64
32+ DEVICE = "cuda" if torch .cuda .is_available () else "cpu"
33+ print ("Running on" , DEVICE )
34+ torch .set_default_device (DEVICE )
35+
36+
37+ ## Input data preparation --------------------------------------------------------------->
38+
39+ timestamp_cols = ['obs_timestamp' , 'last_obs_timestamp' , 'last_tag_timestamp' ]
40+ for timestamp_col in timestamp_cols :
41+ observations_df [timestamp_col ] = pd .to_datetime (observations_df [timestamp_col ])
42+ observations_df = observations_df .assign (
43+ tag_days = (pd .col ('obs_timestamp' ) - pd .col ('last_tag_timestamp' )).dt .days ,
44+ tag_years = pd .col ('tag_days' ) / 365
45+ )
46+ obs_sub = (observations_df
47+ .dropna (subset = ['tag_years' , 'changed' ])
48+ .query ('tag_years > 1e-6' )
49+ )
50+
51+
52+ ## Define model ------------------------------------------------------------------------->
53+
54+ # Only parameters need requires_grad=True; data tensors must not, or memory explodes
55+ X = torch .tensor (obs_sub [['tag_years' ]].values , dtype = DTYPE , device = DEVICE )
56+ y = torch .tensor (obs_sub ['changed' ].values , dtype = DTYPE , device = DEVICE )
57+
58+ # Estimand: lambda, the rate parameter that is always positive
59+ omega = torch .tensor (
60+ np .array ([0.0 ]),
61+ dtype = DTYPE ,
62+ device = DEVICE ,
63+ requires_grad = True ,
64+ )
65+
66+ # Small epsilon to avoid log(0) and log(1-p) = -inf -> NaN
67+ def nll_torchmin (params , y , X , DELTA = 1e-6 , EPSILON = 1e-7 ):
68+ log_lambda = params [0 ].clamp (- 20.0 , 20.0 ) # keep lambda in [2e-9, 5e8]
69+ lambda_ = torch .exp (log_lambda )
70+ # X is (n,1); ensure positive so p is in (0,1)
71+ x = X .clamp (min = DELTA )
72+ p = (
73+ (1.0 - torch .exp (- lambda_ * x ))
74+ .squeeze (- 1 )
75+ .clamp (min = EPSILON , max = 1.0 - EPSILON )
76+ )
77+ ll = torch .sum (y * torch .log (p ) + (1.0 - y ) * torch .log (1.0 - p ))
78+ return - ll
79+
80+ model_fit = torchmin .minimize (
81+ fun = lambda params : nll_torchmin (params = params , y = y , X = X ),
82+ x0 = omega ,
83+ method = 'l-bfgs' ,
84+ tol = 1e-5 ,
85+ disp = True ,
86+ )
87+
88+ # Prepare model results
89+ hessian_ = torch .autograd .functional .hessian (
90+ lambda params : nll_torchmin (params , y , X ),
91+ model_fit .x
92+ )
93+ se_torch_ = torch .sqrt (torch .linalg .diagonal (torch .linalg .inv (hessian_ )))
94+
95+ m1 = pd .DataFrame ({
96+ 'parameter' : ['log_lambda' ],
97+ 'estimate' : model_fit .x .data .cpu ().numpy (),
98+ 'std_err' : se_torch_ .data .cpu ().numpy (),
99+ })
100+ m2 = (
101+ m1
102+ .copy ()
103+ .assign (
104+ parameter = 'lambda' ,
105+ estimate = np .exp (pd .col ('estimate' )),
106+ std_err = pd .col ('estimate' ) * pd .col ('std_err' )
107+ )
108+ )
109+ model_results = pd .concat ([m1 , m2 ])
110+
111+
112+ ## Run model and save results ----------------------------------------------------------->
113+
114+ model_results .to_csv (MODEL_DIR / f"fitted_params_{ TAG_KEY } .csv" , index = False )
115+ torch .save (model_fit , MODEL_DIR / f"fitted_params_{ TAG_KEY } .pt" )
0 commit comments