Skip to content

Commit 7d894ec

Browse files
committed
added julia files
1 parent 9a50561 commit 7d894ec

3 files changed

Lines changed: 228 additions & 0 deletions

File tree

puf_stage2/dataprep.jl

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
function Dataprep(puf, Stage_I_factors, Stage_II_targets, year)
2+
3+
println("Preparing coefficient matrix for $year .....")
4+
5+
s006 = @. ifelse(puf["e02400"] > 0,
6+
puf["s006"] * Stage_I_factors[string(year)]["APOPSNR"] / 100,
7+
puf["s006"] * Stage_I_factors[string(year)]["ARETS"] / 100)
8+
9+
single_return = @. ifelse((puf["mars"] == 1) & (puf["filer"] == 1), s006, 0)
10+
joint_return = @. ifelse(((puf["mars"] == 2) | (puf["mars"] == 3)) &
11+
(puf["filer"] == 1), s006, 0)
12+
13+
hh_return = @. ifelse((puf["mars"] == 4) & (puf["filer"] == 1), s006, 0)
14+
return_w_SS = @. ifelse((puf["e02400"] > 0) & (puf["filer"] == 1), s006, 0)
15+
16+
dependent_exempt_num = (puf["xocah"] + puf["xocawh"] +
17+
puf["xoodep"] + puf["xopar"]) * s006
18+
interest = puf["e00300"] * s006
19+
dividend = puf["e00600"] * s006
20+
biz_income = @. ifelse(puf["e00900"] > 0, puf["e00900"], 0) * s006
21+
biz_loss = @. ifelse(puf["e00900"] < 0, -puf["e00900"], 0) * s006
22+
cap_gain = @. ifelse((puf["p23250"] + puf["p22250"]) > 0,
23+
puf["p23250"] + puf["p22250"], 0) * s006
24+
annuity_pension = puf["e01700"] * s006
25+
sch_e_income = @. ifelse(puf["e02000"] > 0, puf["e02000"], 0) * s006
26+
sch_e_loss = @. ifelse(puf["e02000"] < 0, -puf["e02000"], 0) * s006
27+
ss_income = @. ifelse(puf["filer"] == 1, puf["e02400"], 0) * s006
28+
unemployment_comp = puf["e02300"] * s006
29+
30+
# Wage distribution
31+
wage_1 = @. ifelse(puf["e00100"] <= 0, puf["e00200"], 0) * s006
32+
wage_2 = @. ifelse((puf["e00100"] > 0) & (puf["e00100"] <= 10000),
33+
puf["e00200"], 0) * s006
34+
wage_3 = @. ifelse((puf["e00100"] > 10000) & (puf["e00100"] <= 20000),
35+
puf["e00200"], 0) * s006
36+
wage_4 = @. ifelse((puf["e00100"] > 20000) & (puf["e00100"] <= 30000),
37+
puf["e00200"], 0) * s006
38+
wage_5 = @. ifelse((puf["e00100"] > 30000) & (puf["e00100"] <= 40000),
39+
puf["e00200"], 0) * s006
40+
wage_6 = @. ifelse((puf["e00100"] > 40000) & (puf["e00100"] <= 50000),
41+
puf["e00200"], 0) * s006
42+
wage_7 = @. ifelse((puf["e00100"] > 50000) & (puf["e00100"] <= 75000),
43+
puf["e00200"], 0) * s006
44+
wage_8 = @. ifelse((puf["e00100"] > 75000) & (puf["e00100"] <= 100000),
45+
puf["e00200"], 0) * s006
46+
wage_9 = @. ifelse((puf["e00100"] > 100000) & (puf["e00100"] <= 200000),
47+
puf["e00200"], 0) * s006
48+
wage_10 = @. ifelse((puf["e00100"] > 200000) & (puf["e00100"] <= 500000),
49+
puf["e00200"], 0) * s006
50+
wage_11 = @. ifelse((puf["e00100"] > 500000) & (puf["e00100"] <= 1000000),
51+
puf["e00200"], 0) * s006
52+
wage_12 = @. ifelse(puf["e00100"] > 1000000, puf["e00200"], 0) * s006
53+
54+
# Set up the matrix
55+
One_half_LHS = vcat(single_return, joint_return, hh_return,
56+
return_w_SS,
57+
dependent_exempt_num, interest, dividend,
58+
biz_income, biz_loss, cap_gain, annuity_pension,
59+
sch_e_income, sch_e_loss,
60+
ss_income, unemployment_comp,
61+
wage_1, wage_2, wage_3, wage_4, wage_5,
62+
wage_6, wage_7, wage_8, wage_9, wage_10,
63+
wage_11, wage_12)
64+
65+
# Coefficients for r and s
66+
A1 = One_half_LHS
67+
A2 = -1*One_half_LHS
68+
69+
print("Preparing targets for year $year .....")
70+
71+
APOPN = Stage_I_factors[string(year)]["APOPN"]
72+
73+
b = []
74+
75+
append!(b, Stage_II_targets[string(year)]["Single Returns"] - sum(single_return))
76+
append!(b, Stage_II_targets[string(year)]["Joint Returns"] - sum(joint_return))
77+
target_name = "Head of Household Returns"
78+
append!(b, Stage_II_targets[string(year)][target_name] - sum(hh_return))
79+
target_name = "Number of Returns w/ Gross Security Income"
80+
append!(b, Stage_II_targets[string(year)][target_name] - sum(return_w_SS))
81+
target_name = "Number of Dependent Exemptions"
82+
append!(b, Stage_II_targets[string(year)][target_name] - sum(dependent_exempt_num))
83+
84+
85+
AINTS = Stage_I_factors[string(year)]["AINTS"]
86+
INTEREST = (Stage_II_targets[string(year)]["Taxable Interest Income"] *
87+
APOPN / AINTS * 1000 - sum(interest))
88+
89+
ADIVS = Stage_I_factors[string(year)]["ADIVS"]
90+
DIVIDEND = (Stage_II_targets[string(year)]["Ordinary Dividends"] *
91+
APOPN / ADIVS * 1000 - sum(dividend))
92+
93+
ASCHCI = Stage_I_factors[string(year)]["ASCHCI"]
94+
BIZ_INCOME = (Stage_II_targets[string(year)]["Business Income (Schedule C)"] *
95+
APOPN / ASCHCI * 1000 - sum(biz_income))
96+
97+
ASCHCL = Stage_I_factors[string(year)]["ASCHCL"]
98+
BIZ_LOSS = (Stage_II_targets[string(year)]["Business Loss (Schedule C)"] *
99+
APOPN / ASCHCL * 1000 - sum(biz_loss))
100+
101+
ACGNS = Stage_I_factors[string(year)]["ACGNS"]
102+
CAP_GAIN = (Stage_II_targets[string(year)]["Net Capital Gains in AGI"] *
103+
APOPN / ACGNS * 1000 - sum(cap_gain))
104+
105+
ATXPY = Stage_I_factors[string(year)]["ATXPY"]
106+
target_name = "Taxable Pensions and Annuities"
107+
ANNUITY_PENSION = (Stage_II_targets[string(year)][target_name] *
108+
APOPN / ATXPY * 1000 - sum(annuity_pension))
109+
110+
ASCHEI = Stage_I_factors[string(year)]["ASCHEI"]
111+
target_name = "Supplemental Income (Schedule E)"
112+
SCH_E_INCOME = (Stage_II_targets[string(year)][target_name] *
113+
APOPN / ASCHEI * 1000 - sum(sch_e_income))
114+
115+
ASCHEL = Stage_I_factors[string(year)]["ASCHEL"]
116+
SCH_E_LOSS = (Stage_II_targets[string(year)]["Supplemental Loss (Schedule E)"] *
117+
APOPN / ASCHEL * 1000 - sum(sch_e_loss))
118+
119+
ASOCSEC = Stage_I_factors[string(year)]["ASOCSEC"]
120+
APOPSNR = Stage_I_factors[string(year)]["APOPSNR"]
121+
SS_INCOME = (Stage_II_targets[string(year)]["Gross Social Security Income"] *
122+
APOPSNR / ASOCSEC * 1000 - sum(ss_income))
123+
124+
AUCOMP = Stage_I_factors[string(year)]["AUCOMP"]
125+
UNEMPLOYMENT_COMP = (Stage_II_targets[string(year)]["Unemployment Compensation"] *
126+
APOPN / AUCOMP * 1000 - sum(unemployment_comp))
127+
128+
AWAGE = Stage_I_factors[string(year)]["AWAGE"]
129+
target_name = "Wages and Salaries: Zero or Less"
130+
WAGE_1 = (Stage_II_targets[string(year)][target_name] *
131+
APOPN / AWAGE * 100 - sum(wage_1))
132+
target_name = "Wages and Salaries: \$1 Less Than \$10,000"
133+
WAGE_2 = (Stage_II_targets[string(year)][target_name] *
134+
APOPN / AWAGE * 100 - sum(wage_2))
135+
target_name = "Wages and Salaries: \$10,000 Less Than \$20,000"
136+
WAGE_3 = (Stage_II_targets[string(year)][target_name] *
137+
APOPN / AWAGE * 100 - sum(wage_3))
138+
target_name = "Wages and Salaries: \$20,000 Less Than \$30,000"
139+
WAGE_4 = (Stage_II_targets[string(year)][target_name] *
140+
APOPN / AWAGE * 100 - sum(wage_4))
141+
target_name = "Wages and Salaries: \$30,000 Less Than \$40,000"
142+
WAGE_5 = (Stage_II_targets[string(year)][target_name] *
143+
APOPN / AWAGE * 100 - sum(wage_5))
144+
target_name = "Wages and Salaries: \$40,000 Less Than \$50,000"
145+
WAGE_6 = (Stage_II_targets[string(year)][target_name] *
146+
APOPN / AWAGE * 100 - sum(wage_6))
147+
target_name = "Wages and Salaries: \$50,000 Less Than \$75,000"
148+
WAGE_7 = (Stage_II_targets[string(year)][target_name] *
149+
APOPN / AWAGE * 100 - sum(wage_7))
150+
target_name = "Wages and Salaries: \$75,000 Less Than \$100,000"
151+
WAGE_8 = (Stage_II_targets[string(year)][target_name] *
152+
APOPN / AWAGE * 100 - sum(wage_8))
153+
target_name = "Wages and Salaries: \$100,000 Less Than \$200,000"
154+
WAGE_9 = (Stage_II_targets[string(year)][target_name] *
155+
APOPN / AWAGE * 100 - sum(wage_9))
156+
target_name = "Wages and Salaries: \$200,000 Less Than \$500,000"
157+
WAGE_10 = (Stage_II_targets[string(year)][target_name] *
158+
APOPN / AWAGE * 100 - sum(wage_10))
159+
target_name = "Wages and Salaries: \$500,000 Less Than \$1 Million"
160+
WAGE_11 = (Stage_II_targets[string(year)][target_name] *
161+
APOPN / AWAGE * 100 - sum(wage_11))
162+
target_name = "Wages and Salaries: \$1 Million and Over"
163+
WAGE_12 = (Stage_II_targets[string(year)][target_name] *
164+
APOPN / AWAGE * 100 - sum(wage_12))
165+
166+
temp = [INTEREST, DIVIDEND, BIZ_INCOME, BIZ_LOSS, CAP_GAIN,
167+
ANNUITY_PENSION, SCH_E_INCOME, SCH_E_LOSS, SS_INCOME,
168+
UNEMPLOYMENT_COMP,
169+
WAGE_1, WAGE_2, WAGE_3, WAGE_4, WAGE_5, WAGE_6,
170+
WAGE_7, WAGE_8, WAGE_9, WAGE_10, WAGE_11, WAGE_12]
171+
for m in temp
172+
append!(b, m)
173+
end
174+
175+
176+
return
177+
178+
179+
180+
println("Test: Program has finished")
181+
182+
end

puf_stage2/solver.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
""" rename `solve_lp_for_year.jl` """
2+
3+
using #insert solver

puf_stage2/stage2.jl

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
using Pandas
2+
include("dataprep.jl")
3+
# include("solver.jl")
4+
5+
puf = read_csv(joinpath(@__DIR__,"..","puf_data","cps-matched-puf.csv"))
6+
Stage_I_factors = read_csv(joinpath(@__DIR__,"..","puf_stage1","Stage_I_factors_transpose.csv"),
7+
index_col = 0)
8+
Stage_II_targets = read_csv(joinpath(@__DIR__,"..","puf_stage1","Stage_II_targets.csv"),
9+
index_col = 0)
10+
11+
# Use the matched_weight variable in CPS as the final weight
12+
puf["s006"] = puf["matched_weight"] * 100
13+
14+
15+
16+
z = DataFrame()
17+
z["WT2011"] = puf["s006"]
18+
19+
# Execute stage2 logic for each year using a year-specific LP tolerance
20+
# function create_weights(puf, Stage_I_factors, Stage_II_targets, year, tol)
21+
# col_name = string("WT", year)
22+
# end
23+
24+
tol_list = [0.40, 0.38, 0.35, 0.33, 0.30, 0.37, 0.38, 0.38, 0.39, 0.39, 0.38, 0.40, 0.39, 0.41, 0.41, 0.42, 0.42, 0.42, 0.42]
25+
year_list = [2012:1:2030;]
26+
27+
function test_func(puf, Stage_I_factors, Stage_II_targets, year, tol)
28+
Dataprep(puf, Stage_I_factors, Stage_II_targets, year)
29+
end
30+
31+
# # run function for all tolerance levels and years (in sequential order) using broadcasting
32+
# create_weights.(puf, Stage_I_factors, Stage_II_targets, year_list, tol_list)
33+
34+
# test_func.(puf, Stage_I_factors, Stage_II_targets, year_list, tol_list)
35+
test_func(puf, Stage_I_factors, Stage_II_targets, 2012, 0.4)
36+
37+
38+
# z = z.round(0).astype('int64') # python code
39+
40+
# this can be combined before adding to dataframe using round.(Int64, [array])
41+
# ^^^ *** NOTE THE BROADCASTING OPERATOR ***
42+
43+
# to_csv(z, joinpath(@__DIR__, "puf_weights.csv.gz"), index=false, compression = "gzip") # "false" MUST be LOWERcase

0 commit comments

Comments
 (0)