Skip to content

Commit bbe6caa

Browse files
committed
Create a BaseModel class to estimate change probabilities for particular POIs over time.
1 parent c0c80d9 commit bbe6caa

2 files changed

Lines changed: 225 additions & 0 deletions

File tree

src/openpois/models/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+

src/openpois/models/base_model.py

Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
# -------------------------------------------------------------
2+
# Copyright (c) Henry Spatial Analysis. All rights reserved.
3+
# Licensed under the MIT License. See LICENSE in project root for information.
4+
# -------------------------------------------------------------
5+
6+
"""
7+
This module contains the base model for OSM POI change data, which is extended by
8+
particular models that make assumptions about the form of the change process.
9+
"""
10+
11+
import torch
12+
import torchmin
13+
import pandas as pd
14+
15+
class EventRate:
16+
"""
17+
Stores and calculates event rates (lambda) for a given model of a Poisson process.
18+
If the event rate is constant, the event rate is a scalar, and change probabilities
19+
can be calculated directly. If the event rate is time-varying, then change
20+
probabilities have to be integrated over the time period.
21+
"""
22+
23+
VALID_TYPES = ['constant', 'varying']
24+
25+
def __init__(
26+
self,
27+
type: str = 'constant',
28+
fun: callable = None,
29+
delta: float = 0.02
30+
):
31+
"""
32+
Args:
33+
type: Type of event rate. Must be one of {self.VALID_TYPES}.
34+
fun: Function to calculate the event rate. If `type` is "constant", this
35+
function will return a scalar. If `type` is "varying", this function will
36+
return a function `f(t)` that gives the event rate at time `t`.
37+
delta: Time step for a time-varying event rate function. Only used if `type`
38+
is "varying".
39+
"""
40+
if type not in self.VALID_TYPES:
41+
raise ValueError(
42+
f"Invalid event rate type: {type}. Must be one of {self.VALID_TYPES}"
43+
)
44+
self.type = type
45+
self.fun = fun
46+
self.delta = delta
47+
48+
def calculate_change_constant(self, t1: torch.Tensor, t2: torch.Tensor, **kwargs):
49+
"""
50+
Calculate the change probability for a constant event rate. Given lambda and a
51+
time period (t1, t2], the change probability is `lambda * (t2 - t1)`
52+
"""
53+
return self.fun(**kwargs) * (t2 - t1)
54+
55+
def calculate_change_varying(self, t1: torch.Tensor, t2: torch.Tensor, **kwargs):
56+
"""
57+
Calculate the change probability for a varying event rate. Given a function `f(t)`
58+
that gives the event rate at time `t`, the change probability is the approximate
59+
integral from t1 to t2 of `f(t)`
60+
"""
61+
f = self.fun(**kwargs)
62+
t_range = torch.linspace(t1, t2, steps = max(2, int((t2 - t1) / self.delta) + 1))
63+
return torch.trapz(y = f(t_range), x = t_range)
64+
65+
def calculate_change(self, t1: torch.Tensor, t2: torch.Tensor, **kwargs):
66+
"""
67+
Calculate the change probability between two time periods for this event rate.
68+
"""
69+
if self.type == 'constant':
70+
return self.calculate_change_constant(t1 = t1, t2 = t2, **kwargs)
71+
elif self.type == 'varying':
72+
return self.calculate_change_varying(t1 = t1, t2 = t2, **kwargs)
73+
else:
74+
raise ValueError(f"Invalid event rate type: {self.type}")
75+
76+
77+
class BaseModel:
78+
"""
79+
Base class for OpenPOI change rate models.
80+
"""
81+
82+
# Small epsilon to avoid log(0) and log(1-p) = -inf -> NaN
83+
EPSILON = 1e-7
84+
# Model fit tolerance
85+
TOLERANCE = 1e-5
86+
# Optimizer type
87+
OPTIMIZER = 'l-bfgs'
88+
89+
def __init__(
90+
self,
91+
event_rate: EventRate,
92+
params: torch.Tensor,
93+
covariates: torch.Tensor,
94+
target: torch.Tensor,
95+
t1: torch.Tensor,
96+
t2: torch.Tensor,
97+
verbose: bool = False
98+
):
99+
"""
100+
Args:
101+
observations: DataFrame of observations.
102+
event_rate: EventRate object.
103+
params: Parameters to fit.
104+
covariates: Covariates to use in the model.
105+
target: Target variable to predict.
106+
t1: Start time of the time period.
107+
t2: End time of the time period.
108+
"""
109+
self.event_rate = event_rate
110+
self.starting_params = params
111+
self.covariates = covariates
112+
self.target = target
113+
self.t1 = t1
114+
self.t2 = t2
115+
self.verbose = verbose
116+
self.model_run = False
117+
self.model_fit = None
118+
self.fitted_params = None
119+
self.fitted_probs = None
120+
self.hessian = None
121+
self.se = None
122+
self.params_table = None
123+
124+
def calculate_change_rates(
125+
self,
126+
params: torch.Tensor,
127+
covariates: torch.Tensor = None,
128+
t1: torch.Tensor = None,
129+
t2: torch.Tensor = None,
130+
**kwargs
131+
):
132+
if t1 is None: t1 = self.t1
133+
if t2 is None: t2 = self.t2
134+
if covariates is None: covariates = self.covariates
135+
return self.event_rate.calculate_change(
136+
t1 = t1,
137+
t2 = t2,
138+
params = params,
139+
covariates = covariates,
140+
**kwargs,
141+
)
142+
143+
def calculate_probs(self, params: torch.Tensor, **kwargs):
144+
change_rates = self.calculate_change_rates(params = params, **kwargs)
145+
probs = (
146+
(1.0 - torch.exp(-1.0 * change_rates))
147+
.squeeze(-1)
148+
.clamp(min = self.EPSILON, max = 1.0 - self.EPSILON)
149+
)
150+
return probs
151+
152+
def calculate_nll(self, params: torch.Tensor, **kwargs):
153+
probs = self.calculate_probs(params = params, **kwargs)
154+
ll = torch.sum(
155+
self.target * torch.log(probs) +
156+
(1.0 - self.target) * torch.log(1.0 - probs)
157+
)
158+
return -1.0 * ll
159+
160+
def prepare_results(self):
161+
if not self.model_fit:
162+
raise ValueError("Run fit() first")
163+
# Prepare table of fitted parameters
164+
self.hessian = torch.autograd.functional.hessian(
165+
self.calculate_nll,
166+
self.fitted_params,
167+
)
168+
self.se = torch.sqrt(torch.linalg.diagonal(torch.linalg.inv(self.hessian)))
169+
self.params_table = pd.DataFrame({
170+
'estimate': self.fitted_params.data.cpu().numpy(),
171+
'std_err': self.se.data.cpu().numpy(),
172+
})
173+
174+
def fit(self):
175+
self.model_fit = torchmin.minimize(
176+
fun = self.calculate_nll,
177+
x0 = self.starting_params,
178+
method = self.OPTIMIZER,
179+
tol = self.TOLERANCE,
180+
disp = self.verbose,
181+
)
182+
self.fitted_params = self.model_fit.x
183+
self.fitted_probs = self.calculate_probs(params = self.fitted_params)
184+
self.model_fit = True
185+
self.prepare_results()
186+
187+
def get_results(self):
188+
return self.params_table
189+
190+
def predict(
191+
self,
192+
t1: torch.Tensor = None,
193+
t2: torch.Tensor = None,
194+
covariates: torch.Tensor = None,
195+
**kwargs
196+
):
197+
if t1 is None and t2 is None and covariates is None:
198+
t1 = self.t1
199+
t2 = self.t2
200+
covariates = self.covariates
201+
elif t1 is None:
202+
t1 = torch.zeros_like(t2)
203+
change_rates = self.calculate_change_rates(
204+
params = self.fitted_params,
205+
t1 = t1,
206+
t2 = t2,
207+
covariates = covariates,
208+
**kwargs,
209+
)
210+
probs = self.calculate_probs(
211+
params = self.fitted_params,
212+
t1 = t1,
213+
t2 = t2,
214+
covariates = covariates,
215+
**kwargs,
216+
)
217+
return pd.DataFrame(
218+
{
219+
't1': t1.data.cpu().numpy(),
220+
't2': t2.data.cpu().numpy(),
221+
'probs': probs.data.cpu().numpy(),
222+
'change_rates': change_rates.data.cpu().numpy(),
223+
}
224+
)

0 commit comments

Comments
 (0)