-
Notifications
You must be signed in to change notification settings - Fork 57
Expand file tree
/
Copy pathoptimize.py
More file actions
310 lines (244 loc) · 14.2 KB
/
optimize.py
File metadata and controls
310 lines (244 loc) · 14.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import, division, print_function
import numpy as np
import pandas as pd
import cvxpy as cvx
from scipy.optimize import minimize, differential_evolution
class Optimize(object):
'''
This class is where the Synthetic Controls weights are solved for,
More precisely, it contains all methods for parameter estimation, such as:
W: a (1 x n_controls) matrix
containing the weights assigned to each
control unit in the synthetic control. W is contstrained to be convex,
that is sum(W)==1 and ∀w∈W, w≥0, each weight is non-negative and all weights sum to one.
Referred to as W in Abadie, Diamond, Hainmueller.
V: a (len(covariates) x len(covariates)) matrix
representing the relative importance of each covariate. V is constrained to be diagonal, positive semi-definite.
Practcally, this means that the product V.control_covariates and V.treated_covariates
will always be non-negative. Further, we constrain sum(V)==1, otherwise there will an infinite
number of solutions V*c, where c is a scalar, that assign equal relative importance to each covariate
Referred to as V in Abadie, Diamond, Hainmueller.
pen: float
Coefficient representing the relative importance of minimizing differences between control units and treated unit
BEFORE weighting them (i.e. pairwise difference between control units and treated unit) as compared to
AFTER weighting them (synthetic control vs. treated unit).
A higher value means that pairwise differences are more important.
If pen==0, then pairwise differences do not matter. Thus, if pen==0 optimize will solve for a normal synthetic control.
constant: float
Differenced synthetic controls allow for a constant offset (difference) between the treated unit and the synthetic control
this constant is solved for here.
'''
def optimize(self,
treated_outcome, treated_covariates,
control_outcome, control_covariates,
pairwise_difference,
data,
placebo,
pen, steps=8,
verbose=False):
'''
Solves the nested optimization function of finding the optimal synthetic control
placebo: bool
indicates whether the optimization is ran for finding the real synthetic control
or as part of a placebo-style validity test. If True, only placebo class attributes are affected.
steps: int
The number of different initializations of v_0 the gradient descent optimization is ran for
Higher values mean longer running time but higher chances of finding a globally optimal solution
verbose: bool, default=False
If true, prints additional detail regarding the state of the optimization
'''
args = (treated_outcome, treated_covariates,
control_outcome, control_covariates,
pairwise_difference,
pen, placebo, data)
for step in range(steps):
#Approach for selecting initial v matrix:
#First time, try a uniform v matrix, assigning equal weight to all covariates
#Subsequent times, sample a random pmf using the dirichlet distribution
if step == 0:
v_0 = np.full(data.n_covariates, 1/data.n_covariates)
if pen == "auto":
#if pen =="auto", we have an additional parameter to optimize over, so we append it
v_0 = np.append(v_0, 0)
else:
#Dirichlet distribution returns a valid pmf over n_covariates states
v_0 = self.original_data.rng.dirichlet(np.ones(data.n_covariates))
if pen == "auto":
#if pen =="auto", we have an additional parameter to optimize over, so we append it
v_0 = np.append(v_0, self.original_data.rng.lognormal(1.5, 1)) #Still experimenting with what distribution is appropriate
#Required to have non negative values
if pen != "auto":
bnds = tuple((0,1) for _ in range(data.n_covariates))
else:
#if pen =="auto", we have an additional parameter to optimize over, and we need to bound it to be non-negative
bnds = tuple((0,20) if ((pen=="auto") and (x==data.n_covariates)) else (0,1) for x in range(data.n_covariates + 1))
#Optimze
res = minimize(self.total_loss, v_0, args=(args),
method='L-BFGS-B', bounds=bnds,
options={'gtol': 1e-8,'disp':3, 'iprint':3})
if verbose:
print("Successful:", res.success)
print(res.message)
#If sampler did not converge, try again up to times before admitting defeat
try:
res.x
except:
data.fail_count += 1
if data.fail_count <= 1:
data.optimize(*args)
if self.method == "DSC":
self._update_original_data(placebo)
return
def total_loss(self, v_0,
treated_outcome, treated_covariates,
control_outcome, control_covariates,
pairwise_difference,
pen, placebo, data):
'''
Solves for w*(v) that minimizes loss function 1 given v,
Returns loss from loss function 2 with w=w*(v)
placebo: bool
indicates whether the optimization is ran for finding the real synthetic control
or as part of a placebo-style validity test. If True, only placebo class attributes are affected.
'''
assert placebo in [False, "in-time", "in-space"], "TypeError: Placebo must False, 'in-time' or 'in-space'"
n_controls = control_outcome.shape[1]
if pen == "auto":
V = np.diag(v_0[:-1])
pen_coef = v_0[-1]
else:
V = np.diag(v_0)
pen_coef = pen
# Construct the problem - constrain weights to be non-negative
w = cvx.Variable((n_controls, 1), nonneg=True)
#Define the objective
#PROBLEM: treated_synth_difference = cvx.sum(V @ cvx.square(treated_covariates.T - control_covariates @ w)) runs better for normal sc,
#but it doesnt work at all for in-time placebos, this probably means I am messing up the dimensionality somewhere in the processing
#This is a work-around that works, but it ain't pretty
if placebo == 'in-time':
treated_synth_difference = cvx.sum(V @ cvx.square(treated_covariates - control_covariates @ w))
else:
treated_synth_difference = cvx.sum(V @ cvx.square(treated_covariates.T - control_covariates @ w))
pairwise_difference = cvx.sum(V @ (cvx.square(pairwise_difference) @ w))
objective = cvx.Minimize(treated_synth_difference + pen_coef*pairwise_difference)
#Add constraint sum of weights must equal one
constraints = [cvx.sum(w) == 1]
#Solve problem
problem = cvx.Problem(objective, constraints)
try: #Try solving using current value of V, if it doesn't work return infinite loss
result = problem.solve(verbose=False)
loss = (treated_outcome - control_outcome @ w.value).T @ (treated_outcome - control_outcome @ w.value)
except:
return float(np.inf)
#If loss is smaller than previous minimum, update loss, w and v
if not placebo:
if loss < data.min_loss:
data.min_loss = loss
data.w = w.value
data.v = np.diagonal(V) / np.sum(np.diagonal(V)) #Make sure its normailzed (sometimes the optimizers diverge from bounds)
data.pen = pen_coef
data.synth_outcome = data.w.T @ data.control_outcome_all.T #Transpose to make it (n_periods x 1)
data.synth_covariates = data.control_covariates @ data.w
elif placebo == "in-space":
data.in_space_placebo_w = w.value
elif placebo == "in-time":
data.in_time_placebo_w = w.value
#Return loss
return loss
def _get_dsc_outcome(self, w, control_outcome, periods_pre_treatment, treated_pretreatment_outcome):
'''Method used only by DiffSynth (DSC)
Arguments:
w: np.array
Weight matrix (n_controls x 1)
control_outcome: np.array
Outcome matrix for all control units for all time periods (n_controls x n_periods_all)
periods_pre_treatment: int
Integer representing the number of periods before treatment
treated_pretreatment_outcome: np.array
Outcome matrix for treated unit (1 x n_periods_pre_treatment)
Approach:
1. Solve for the differenced synthetic control, less the constant
2. Solve for the constant by computing the average difference, in the pre-treatment period,
between the treated unit and (1.)
3. Add the constant to all time periods in (1). This is the outcome of the differenced synthtic control.
'''
#1. Compute synthetic control outcome, less constant
synth_outcome = w.T @ control_outcome.T
synth_outcome_pre_treatment = w.T @ control_outcome[:periods_pre_treatment].T
#2. Constant defined to be average difference between synth and treated unit in the pre-treatment period
constant = np.mean(treated_pretreatment_outcome - synth_outcome_pre_treatment)
#3. Add constant to synthetic control outcome
synth_outcome += constant
return constant, synth_outcome
def _update_original_data(self, placebo):
'''
Used only in DiffSynth / DSC:
Called at the end of optimization procedure:
Transcribes relevant results from ModifiedData to OriginalData
'''
if not placebo:
self.original_data.w = self.modified_data.w
self.original_data.v = self.modified_data.v
self.original_data.pen = self.modified_data.pen
self.original_data.synth_constant, self.original_data.synth_outcome = self._get_dsc_outcome(self.original_data.w,
self.original_data.control_outcome_all,
self.original_data.periods_pre_treatment,
self.original_data.treated_outcome)
elif placebo == 'in-space':
self.original_data.in_space_placebo_w = self.modified_data.in_space_placebo_w
self.original_data.pre_post_rmspe_ratio = self.modified_data.pre_post_rmspe_ratio
self.original_data.in_space_placebos = self.modified_data.in_space_placebos
else: #Update in-time placebo
self.original_data.placebo_treatment_period = self.modified_data.placebo_treatment_period
self.original_data.placebo_periods_pre_treatment = self.modified_data.placebo_periods_pre_treatment
self.original_data.in_time_placebo_w = self.modified_data.in_time_placebo_w
_, self.original_data.in_time_placebo_outcome = self._get_dsc_outcome(self.original_data.in_time_placebo_w,
self.original_data.control_outcome_all,
self.original_data.placebo_periods_pre_treatment,
self.original_data.treated_outcome[:self.original_data.placebo_periods_pre_treatment])
return
##########################################################
## ALTERNATE OPTIMIZATION METHODS -- NOT CURRENTLY USED ##
##########################################################
def diffevo_optimize(self):
'''Uses the differential evolution optimizer from scipy to solve for synthetic control'''
bounds = [(0,1) for _ in range(self.n_covariates)]
result = differential_evolution(self.total_loss, bounds)
self.v = result.x
self.w, loss = self.total_loss(self.v, False)
return self.w, loss
def random_optimize(self, steps=10**4):
'''
"When intelligent approaches fail, throw spaghetti at the wall and see what sticks" - Benito Mussolini
The below random samples valid v matrices from a dirichlet distribution,
then computes the resulting w*(v) and the total loss associated with it
Returns the w*(v) that minimizes total loss, and the total loss
'''
#Initalize variable to track best w*(v)
best_w, min_loss = None, float(np.inf)
for i in range(steps):
#Generate sample v
#Dirichlet distribution returns a valid pmf over n_covariates states
v = np.random.dirichlet(np.ones(self.n_covariates), size=1)
#Print progress
if (i+1)%steps/10 == 0:
print('{}%'.format((i+1)%steps/10))
#Compute w*(v) and loss for v
w, loss = self.total_loss(v, False)
#See if w*(v) results in lower loss, if so update best
if loss < min_loss:
best_w, min_loss = w, loss
#Store, print, return best solutions
self.w = best_w
return best_w, min_loss