|
| 1 | +# ------------------------------------------------------------- |
| 2 | +# Copyright (c) Henry Spatial Analysis. All rights reserved. |
| 3 | +# Licensed under the MIT License. See LICENSE in project root for information. |
| 4 | +# ------------------------------------------------------------- |
| 5 | + |
| 6 | +""" |
| 7 | +This module contains the base model for OSM POI change data, which is extended by |
| 8 | +particular models that make assumptions about the form of the change process. |
| 9 | +""" |
| 10 | + |
| 11 | +import torch |
| 12 | +import torchmin |
| 13 | +import pandas as pd |
| 14 | + |
| 15 | +class EventRate: |
| 16 | + """ |
| 17 | + Stores and calculates event rates (lambda) for a given model of a Poisson process. |
| 18 | + If the event rate is constant, the event rate is a scalar, and change probabilities |
| 19 | + can be calculated directly. If the event rate is time-varying, then change |
| 20 | + probabilities have to be integrated over the time period. |
| 21 | + """ |
| 22 | + |
| 23 | + VALID_TYPES = ['constant', 'varying'] |
| 24 | + |
| 25 | + def __init__( |
| 26 | + self, |
| 27 | + type: str = 'constant', |
| 28 | + fun: callable = None, |
| 29 | + delta: float = 0.02 |
| 30 | + ): |
| 31 | + """ |
| 32 | + Args: |
| 33 | + type: Type of event rate. Must be one of {self.VALID_TYPES}. |
| 34 | + fun: Function to calculate the event rate. If `type` is "constant", this |
| 35 | + function will return a scalar. If `type` is "varying", this function will |
| 36 | + return a function `f(t)` that gives the event rate at time `t`. |
| 37 | + delta: Time step for a time-varying event rate function. Only used if `type` |
| 38 | + is "varying". |
| 39 | + """ |
| 40 | + if type not in self.VALID_TYPES: |
| 41 | + raise ValueError( |
| 42 | + f"Invalid event rate type: {type}. Must be one of {self.VALID_TYPES}" |
| 43 | + ) |
| 44 | + self.type = type |
| 45 | + self.fun = fun |
| 46 | + self.delta = delta |
| 47 | + |
| 48 | + def calculate_change_constant(self, t1: torch.Tensor, t2: torch.Tensor, **kwargs): |
| 49 | + """ |
| 50 | + Calculate the change probability for a constant event rate. Given lambda and a |
| 51 | + time period (t1, t2], the change probability is `lambda * (t2 - t1)` |
| 52 | + """ |
| 53 | + return self.fun(**kwargs) * (t2 - t1) |
| 54 | + |
| 55 | + def calculate_change_varying(self, t1: torch.Tensor, t2: torch.Tensor, **kwargs): |
| 56 | + """ |
| 57 | + Calculate the change probability for a varying event rate. Given a function `f(t)` |
| 58 | + that gives the event rate at time `t`, the change probability is the approximate |
| 59 | + integral from t1 to t2 of `f(t)` |
| 60 | + """ |
| 61 | + f = self.fun(**kwargs) |
| 62 | + t_range = torch.linspace(t1, t2, steps = max(2, int((t2 - t1) / self.delta) + 1)) |
| 63 | + return torch.trapz(y = f(t_range), x = t_range) |
| 64 | + |
| 65 | + def calculate_change(self, t1: torch.Tensor, t2: torch.Tensor, **kwargs): |
| 66 | + """ |
| 67 | + Calculate the change probability between two time periods for this event rate. |
| 68 | + """ |
| 69 | + if self.type == 'constant': |
| 70 | + return self.calculate_change_constant(t1 = t1, t2 = t2, **kwargs) |
| 71 | + elif self.type == 'varying': |
| 72 | + return self.calculate_change_varying(t1 = t1, t2 = t2, **kwargs) |
| 73 | + else: |
| 74 | + raise ValueError(f"Invalid event rate type: {self.type}") |
| 75 | + |
| 76 | + |
| 77 | +class BaseModel: |
| 78 | + """ |
| 79 | + Base class for OpenPOI change rate models. |
| 80 | + """ |
| 81 | + |
| 82 | + # Small epsilon to avoid log(0) and log(1-p) = -inf -> NaN |
| 83 | + EPSILON = 1e-7 |
| 84 | + # Model fit tolerance |
| 85 | + TOLERANCE = 1e-5 |
| 86 | + # Optimizer type |
| 87 | + OPTIMIZER = 'l-bfgs' |
| 88 | + |
| 89 | + def __init__( |
| 90 | + self, |
| 91 | + event_rate: EventRate, |
| 92 | + params: torch.Tensor, |
| 93 | + covariates: torch.Tensor, |
| 94 | + target: torch.Tensor, |
| 95 | + t1: torch.Tensor, |
| 96 | + t2: torch.Tensor, |
| 97 | + verbose: bool = False |
| 98 | + ): |
| 99 | + """ |
| 100 | + Args: |
| 101 | + observations: DataFrame of observations. |
| 102 | + event_rate: EventRate object. |
| 103 | + params: Parameters to fit. |
| 104 | + covariates: Covariates to use in the model. |
| 105 | + target: Target variable to predict. |
| 106 | + t1: Start time of the time period. |
| 107 | + t2: End time of the time period. |
| 108 | + """ |
| 109 | + self.event_rate = event_rate |
| 110 | + self.starting_params = params |
| 111 | + self.covariates = covariates |
| 112 | + self.target = target |
| 113 | + self.t1 = t1 |
| 114 | + self.t2 = t2 |
| 115 | + self.verbose = verbose |
| 116 | + self.model_run = False |
| 117 | + self.model_fit = None |
| 118 | + self.fitted_params = None |
| 119 | + self.fitted_probs = None |
| 120 | + self.hessian = None |
| 121 | + self.se = None |
| 122 | + self.params_table = None |
| 123 | + |
| 124 | + def calculate_change_rates( |
| 125 | + self, |
| 126 | + params: torch.Tensor, |
| 127 | + covariates: torch.Tensor = None, |
| 128 | + t1: torch.Tensor = None, |
| 129 | + t2: torch.Tensor = None, |
| 130 | + **kwargs |
| 131 | + ): |
| 132 | + if t1 is None: t1 = self.t1 |
| 133 | + if t2 is None: t2 = self.t2 |
| 134 | + if covariates is None: covariates = self.covariates |
| 135 | + return self.event_rate.calculate_change( |
| 136 | + t1 = t1, |
| 137 | + t2 = t2, |
| 138 | + params = params, |
| 139 | + covariates = covariates, |
| 140 | + **kwargs, |
| 141 | + ) |
| 142 | + |
| 143 | + def calculate_probs(self, params: torch.Tensor, **kwargs): |
| 144 | + change_rates = self.calculate_change_rates(params = params, **kwargs) |
| 145 | + probs = ( |
| 146 | + (1.0 - torch.exp(-1.0 * change_rates)) |
| 147 | + .squeeze(-1) |
| 148 | + .clamp(min = self.EPSILON, max = 1.0 - self.EPSILON) |
| 149 | + ) |
| 150 | + return probs |
| 151 | + |
| 152 | + def calculate_nll(self, params: torch.Tensor, **kwargs): |
| 153 | + probs = self.calculate_probs(params = params, **kwargs) |
| 154 | + ll = torch.sum( |
| 155 | + self.target * torch.log(probs) + |
| 156 | + (1.0 - self.target) * torch.log(1.0 - probs) |
| 157 | + ) |
| 158 | + return -1.0 * ll |
| 159 | + |
| 160 | + def prepare_results(self): |
| 161 | + if not self.model_fit: |
| 162 | + raise ValueError("Run fit() first") |
| 163 | + # Prepare table of fitted parameters |
| 164 | + self.hessian = torch.autograd.functional.hessian( |
| 165 | + self.calculate_nll, |
| 166 | + self.fitted_params, |
| 167 | + ) |
| 168 | + self.se = torch.sqrt(torch.linalg.diagonal(torch.linalg.inv(self.hessian))) |
| 169 | + self.params_table = pd.DataFrame({ |
| 170 | + 'estimate': self.fitted_params.data.cpu().numpy(), |
| 171 | + 'std_err': self.se.data.cpu().numpy(), |
| 172 | + }) |
| 173 | + |
| 174 | + def fit(self): |
| 175 | + self.model_fit = torchmin.minimize( |
| 176 | + fun = self.calculate_nll, |
| 177 | + x0 = self.starting_params, |
| 178 | + method = self.OPTIMIZER, |
| 179 | + tol = self.TOLERANCE, |
| 180 | + disp = self.verbose, |
| 181 | + ) |
| 182 | + self.fitted_params = self.model_fit.x |
| 183 | + self.fitted_probs = self.calculate_probs(params = self.fitted_params) |
| 184 | + self.model_fit = True |
| 185 | + self.prepare_results() |
| 186 | + |
| 187 | + def get_results(self): |
| 188 | + return self.params_table |
| 189 | + |
| 190 | + def predict( |
| 191 | + self, |
| 192 | + t1: torch.Tensor = None, |
| 193 | + t2: torch.Tensor = None, |
| 194 | + covariates: torch.Tensor = None, |
| 195 | + **kwargs |
| 196 | + ): |
| 197 | + if t1 is None and t2 is None and covariates is None: |
| 198 | + t1 = self.t1 |
| 199 | + t2 = self.t2 |
| 200 | + covariates = self.covariates |
| 201 | + elif t1 is None: |
| 202 | + t1 = torch.zeros_like(t2) |
| 203 | + change_rates = self.calculate_change_rates( |
| 204 | + params = self.fitted_params, |
| 205 | + t1 = t1, |
| 206 | + t2 = t2, |
| 207 | + covariates = covariates, |
| 208 | + **kwargs, |
| 209 | + ) |
| 210 | + probs = self.calculate_probs( |
| 211 | + params = self.fitted_params, |
| 212 | + t1 = t1, |
| 213 | + t2 = t2, |
| 214 | + covariates = covariates, |
| 215 | + **kwargs, |
| 216 | + ) |
| 217 | + return pd.DataFrame( |
| 218 | + { |
| 219 | + 't1': t1.data.cpu().numpy(), |
| 220 | + 't2': t2.data.cpu().numpy(), |
| 221 | + 'probs': probs.data.cpu().numpy(), |
| 222 | + 'change_rates': change_rates.data.cpu().numpy(), |
| 223 | + } |
| 224 | + ) |
0 commit comments