@@ -50,9 +50,7 @@ from fixedeffect.iv import iv2sls, ivgmm, ivtest
5050from fixedeffect.fe import fixedeffect, did, getfe
5151from fixedeffect.utils.panel_dgp import gen_data
5252```
53- gen_data is the function we use to simulate data. The function above generated
54- a balanced panel data set with number of cross-sectional id equals 100 and time
55- id equals 10.
53+ * gen_data* is the function we use to simulate data.
5654
5755### Data
5856
@@ -73,8 +71,11 @@ Ihe the above simulated dataset, "beta" are true coefficients,
7371#### Instrumental variables estimation
7472We include two function: "iv2sls" and "iv2gmm" for instrumental variable regression.
7573##### iv2sls
76- This function return two-stage least square estimation results.
77- The estimation is achieved by:
74+ This function return two-stage least square estimation results. Define * y* as
75+ the dependent variable, * x_1* as exogenous variable, * x_2* as endogenous variable,
76+ * x_3* and * x_4* are instrumental variables. * id* and * time* are cross sectional
77+ id and time id.
78+ An IV two-way fixed effect model estimated by two-stage least square is achieved by using:
7879``` python
7980formula = ' y ~ x_1|id+time|0|(x_2~x_3+x_4)'
8081model_iv2sls = iv2sls(data_df = df,
@@ -99,7 +100,7 @@ model_iv2sls = iv2sls(data_df = df,
99100result = model_iv2sls.fit()
100101result.summary()
101102```
102- You can obtain estimation result using either grammar above .
103+ The two grammars above yield identical results .
103104We provide specification test for iv models:
104105``` python
105106ivtest(result1)
@@ -108,10 +109,10 @@ Three tests are included: weak iv test (Cragg-Dolnald statistics + Stock and Yog
108109over-identification test (Sargan/Basmann test), and endogeneity test (Durbin test).
109110
110111##### ivgmm
111- This function returns one-step gmm estimation result.
112- The estimation is achieved by:
112+ This function returns one-step gmm estimation result. With same variables definition,
113+ estimation is achieved by:
113114``` python
114- formula = ' y ~ x_1|id|0|(x_2~x_3+x_4)'
115+ formula = ' y ~ x_1|id+time |0|(x_2~x_3+x_4)'
115116
116117model_ivgmm = ivgmm(data_df = df,
117118 formula = formula)
@@ -137,7 +138,11 @@ result.summary()
137138```
138139#### Fixed Effect Model
139140This function returns fixed effect model estimation result.
140- The estimation is achieved by:
141+ Define * y* as
142+ the dependent variable, * x_1* as independent variable, * id* and * time* are cross sectional
143+ ID and time ID.
144+ Following code yield estimation of a two-way fixed effect model with two-way cluster
145+ standard error:
141146``` python
142147formula = ' y ~ x_1|id+time|id+time|0'
143148
@@ -147,9 +152,7 @@ model_fe = fixedeffect(data_df = df,
147152result = model_fe.fit()
148153result.summary()
149154```
150- Sample code above estimate a two-way fixed effect model with cluster standard
151- error clustering at the individual and time level.
152- You can also achieve the same estimation results by:
155+ or
153156``` python
154157exog_x = [' x_1' ]
155158y = [' y' ]
@@ -167,7 +170,7 @@ result = model_fe.fit()
167170result.summary()
168171```
169172#### Difference in Difference
170- DID is simply a specific fixed effect model. We provide a function of DID to help
173+ DID is simply a specific type of fixed effect model. We provide a function of DID to help
171174simplify the estimation process. The regular DID estimation is achieved using
172175following command:
173176``` python
@@ -182,7 +185,7 @@ model_did = did(data_df = df,
182185result = model_did.fit()
183186result.summary()
184187```
185- "exp_date" is the first date that the experiment begins, "treatment" is the
188+ "* exp_date* " is the first date that the experiment begins, "* treatment* " is the
186189column name of the treatment variable. This command estimate the equation below:
187190
188191<img src =" https://latex.codecogs.com/svg.image?y_{it}&space;=&space;Treat_i&space;Post_t&space;\beta_1&space;&plus ; &space;&space;Treat_i\beta_2&space;&plus ; &space;Post_t&space;\beta_3&space;&plus ; &space;\varepsilon_{it} " title =" y_{it} = Treat_i Post_t \beta_1 + Treat_i\beta_2 + Post_t \beta_3 + \varepsilon_{it} " />
@@ -330,97 +333,77 @@ Return a test result table of iv tests.
330333
331334``` python
332335# need to install from kuaishou product base
333- from FixedEffectModel.api import *
334- from utils.panel_dgp import gen_data
336+ import numpy as np
337+ import pandas as pd
338+ from fixedeffect.iv import iv2sls, ivgmm,ivtest
339+ from fixedeffect.fe import fixedeffect, did,getfe
340+ from fixedeffect.utils.panel_dgp import gen_data
341+ from fixedeffect.iv import ivtest
335342
336343N = 100
337344T = 10
338- beta = [- 3 ,- 1.5 ,1 ,2 ,3 ,4 ,5 ]
339- alpha = 0.9
340- ate = 1
341- exp_date = 2
345+ beta = [- 3 ,1 ,2 ,3 ,4 ]
346+ ate = 1
347+ exp_date = 5
342348
343349# generate sample data
344350df = gen_data(N, T, beta, ate, exp_date)
345351
346- # define model
347- # you can define the model through defining formula like 'dependent variable ~ continuous variable|fixed_effect|clusters|(endogenous variables ~ instrument variables)'
348- formula_without_iv = ' y~x_1+x_2|id+time|id+time'
349- formula_without_cluster = ' y~x_1+x_2|id+time|0|(x_3|x_4~x_5+x_6)'
350- formula = ' y~x_1+x_2|id+time|id+time|(x_3|x_4~x_5+x_6)'
351- result1 = ols_high_d_category(df,
352- formula = formula,
353- robust = False ,
354- c_method = ' cgm' ,
355- epsilon = 1e-8 ,
356- psdef = True ,
357- max_iter = 1e6 )
358-
359- # or you can define the model through defining each part
360- consist_input = [' x_1' ,' x_2' ]
361- out_input = [' y' ]
362- category_input = [' id' ,' time' ]
363- cluster_input = [' id' ,' time' ]
364- endo_input = [' x_3' ,' x_4' ]
365- iv_input = [' x_5' ,' x_6' ]
366- result1 = ols_high_d_category(df,
367- consist_input,
368- out_input,
369- category_input,
370- cluster_input,
371- endo_input,
372- iv_input,
373- formula = None ,
374- robust = False ,
375- c_method = ' cgm' ,
376- epsilon = 1e-8 ,
377- max_iter = 1e6 )
378-
379- # show result
380- result1.summary()
381-
382- # get fixed effects
383- getfe(result1)
352+ # ------------------------------#
353+ # define instrumental variable model
354+ # iv2sls
355+ formula = ' y ~ x_1|id+time|0|(x_2~x_3+x_4)'
356+ model_iv2sls = iv2sls(data_df = df,
357+ formula = formula)
358+ result = model_iv2sls.fit()
359+ result.summary()
384360
361+ # ivgmm
362+ formula = ' y ~ x_1|id|0|(x_2~x_3+x_4)'
385363
364+ model_ivgmm = ivgmm(data_df = df,
365+ formula = formula)
366+ result = model_ivgmm.fit()
367+ result.summary()
386368
387- ```
388- You can also do DID with treatment_input option:
389- ``` python
390- # need to install from kuaishou product base
391- from FixedEffectModel.api import *
392- from utils.panel_dgp import gen_data
369+ # obtain iv test results
370+ ivtest(result)
393371
394- N = 100
395- T = 10
396- beta = [- 3 , - 1.5 , 1 , 2 , 3 , 4 , 5 ]
397- alpha = 0.9
398- ate = 1
399- exp_date = 2
372+ # ------------------------------#
373+ # define fixed effect model
374+ exog_x = [' x_1 ' ]
375+ y = [ ' y ' ]
376+ category = [ ' id ' , ' time ' ]
377+ cluster = [ ' id ' , ' time ' ]
400378
401- # generate sample data
402- df = gen_data(N, T, beta, ate, exp_date)
403379
404- # did wrt group effect
405- formula = ' y~0|id+time|0|0'
406- result = ols_high_d_category(data_df,
407- formula = formula,
408- treatment_input = {' treatment_col' :' treatment' ,
409- ' exp_date' :5 ,
410- ' effect' :' group' })
411- result.summary()
380+ model_fe = fixedeffect(data_df = df,
381+ dependent = y,
382+ exog_x = exog_x,
383+ category = category,
384+ cluster = cluster)
412385
413- # did wrt individual effect
414- formula = ' y~0|id+time|0|0'
415- result = ols_high_d_category(data_df,
416- formula = formula,
417- treatment_input = {' treatment_col' :' treatment' ,
418- ' exp_date' :5 ,
419- ' effect' :' individual' })
386+ result = model_fe.fit()
420387result.summary()
421388
389+ # obtain fixed effect
390+ getfe(result)
391+
392+ # ------------------------------#
393+ # define DID model
394+ formula = ' y ~ 0|0|0|0'
395+
396+ model_did = did(data_df = df,
397+ formula = formula,
398+ treatment = [' treatment' ],
399+ csid = [' id' ],
400+ tsid = [' time' ],
401+ exp_date = 2 )
402+ result = model_did.fit()
403+ result.summary()
422404```
423405
406+
424407# Requirements
425408- Python 3.6+
426409- Pandas and its dependencies (Numpy, etc.)
0 commit comments