Skip to content

Commit 83bfd65

Browse files
committed
Merge branch 'main' of https://github.com/npr99/intersect-community-data into main
2 parents 4c6aac2 + e392cba commit 83bfd65

4 files changed

Lines changed: 272 additions & 753 deletions

File tree

Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"id": "a3b0c51a-7e85-41bb-ad72-4cdc103dadd4",
7+
"metadata": {},
8+
"outputs": [],
9+
"source": [
10+
"import warnings\n",
11+
"warnings.filterwarnings(\"ignore\")\n",
12+
"# warnings.filterwarnings(\"ignore\", category=DeprecationWarning)\n",
13+
"warnings.filterwarnings(\"ignore\", message=\".*Pyarrow will become a required dependency.*\")\n",
14+
"import pandas as pd\n",
15+
"import geopandas as gpd\n",
16+
"import numpy as np\n",
17+
"import sys # For displaying package versions\n",
18+
"import os # For managing directories and file paths if drive is mounted\n",
19+
"import random\n",
20+
"import json\n",
21+
"# pd.set_option('display.max_columns', None)\n",
22+
"# pd.set_option('display.max_rows', None)"
23+
]
24+
},
25+
{
26+
"cell_type": "code",
27+
"execution_count": null,
28+
"id": "2961a9a0",
29+
"metadata": {},
30+
"outputs": [],
31+
"source": [
32+
"# To reload submodules need to use this magic command to set autoreload on\n",
33+
"# This command loads the autoreload extension, enabling the use of the %autoreload magic command.\n",
34+
"# This sets autoreload to automatically reload all modules (both modules you've imported and your own modules) before executing a new line. \n",
35+
"# The value \"2\" means \"always reload,\"\n",
36+
"\n",
37+
"%load_ext autoreload\n",
38+
"%autoreload 2"
39+
]
40+
},
41+
{
42+
"cell_type": "code",
43+
"execution_count": null,
44+
"id": "4e9cf45c-b973-44c7-b8f9-5a2a852de736",
45+
"metadata": {},
46+
"outputs": [],
47+
"source": [
48+
"!pip install wget\n",
49+
"!pip install us\n",
50+
"!pip install seaborn\n",
51+
"!pip install fpdf2"
52+
]
53+
},
54+
{
55+
"cell_type": "code",
56+
"execution_count": null,
57+
"id": "ebe817eb-e2ef-4144-bbf0-9d46f82b48f7",
58+
"metadata": {},
59+
"outputs": [],
60+
"source": [
61+
"from _lodes_data_structure import all_ods\n",
62+
"from _lodes_data_structure import all_segparts\n",
63+
"from _lodes_data_structure import all_charstems\n",
64+
"from _lodes_data_structure import all_stems\n",
65+
"from _lodes_data_structure import all_jobtypes\n",
66+
"from _lodes_data_structure import all_mxjobtypes\n",
67+
"from _lodes_data_structure import all_segstems\n",
68+
"from lodes_datautil import *\n",
69+
"from lodes_fullloop import *\n",
70+
"from lodes_mcmcsa_util import *"
71+
]
72+
},
73+
{
74+
"cell_type": "code",
75+
"execution_count": null,
76+
"id": "2b272042-4612-4141-b354-5b6cd56188b3",
77+
"metadata": {},
78+
"outputs": [],
79+
"source": [
80+
"arise_county_fips = {\"johnson\" : \"20091\",\n",
81+
" \"wyandotte\" : \"20209\",\n",
82+
" \"finney\" : \"20055\",\n",
83+
" \"ford\" : \"20057\",\n",
84+
" \"seward\" : \"20175\"}"
85+
]
86+
},
87+
{
88+
"cell_type": "code",
89+
"execution_count": null,
90+
"id": "ae28f3ef-58b7-4af4-b453-bc658a59f59b",
91+
"metadata": {},
92+
"outputs": [],
93+
"source": [
94+
"target_county = \"ford\""
95+
]
96+
},
97+
{
98+
"cell_type": "code",
99+
"execution_count": null,
100+
"id": "d0502145",
101+
"metadata": {},
102+
"outputs": [],
103+
"source": [
104+
"county_fips= arise_county_fips[target_county]"
105+
]
106+
},
107+
{
108+
"cell_type": "code",
109+
"execution_count": null,
110+
"id": "2a07e15f-fe79-4103-bcdb-df512d98a0ea",
111+
"metadata": {},
112+
"outputs": [],
113+
"source": [
114+
"stacked_df = obtain_lodes_county_loop([county_fips], \n",
115+
" ['2020'], \n",
116+
" outputfoldername = \"output\",\n",
117+
" ods = all_ods,\n",
118+
" segparts = all_segparts,\n",
119+
" jobtypes = all_jobtypes,\n",
120+
" mxjobtypes = all_mxjobtypes,\n",
121+
" segstems = all_segstems,\n",
122+
" blocklist = '')"
123+
]
124+
},
125+
{
126+
"cell_type": "code",
127+
"execution_count": null,
128+
"id": "e08ddfe1-2485-4a04-9b5a-f495288128b6",
129+
"metadata": {},
130+
"outputs": [],
131+
"source": [
132+
"work_block_list = get_county_work_block_list(stacked_df, county_fips_code=county_fips, year='2020', od='od', seg='na')\n",
133+
"home_block_list = get_county_home_block_list(stacked_df, county_fips_code=county_fips, year='2020', od='od', seg='na')\n",
134+
"print(\"work block list: \", len(work_block_list))\n",
135+
"print(\"home block list: \", len(home_block_list))"
136+
]
137+
},
138+
{
139+
"cell_type": "code",
140+
"execution_count": null,
141+
"id": "4faedc22-ac94-4416-ab1b-2e93de1f6b1a",
142+
"metadata": {},
143+
"outputs": [],
144+
"source": [
145+
"# for block in work_block_list[0:10]:\n",
146+
"# out_of_state_rac_blocks_df = out_of_state_rac_blocks(work_block=block, \n",
147+
"# years= ['2020'], \n",
148+
"# outputfoldername = \"output\",\n",
149+
"# stacked_df=stacked_df,\n",
150+
"# segstems = ['SE','SI','SA'])\n",
151+
"# print(len(out_of_state_rac_blocks_df))"
152+
]
153+
},
154+
{
155+
"cell_type": "code",
156+
"execution_count": null,
157+
"id": "62a095d5",
158+
"metadata": {},
159+
"outputs": [],
160+
"source": [
161+
"seed_value = 1234\n",
162+
"counter=1\n",
163+
"for od in [\"wac\", \"rac\"]:\n",
164+
" joblist_df = pd.DataFrame()\n",
165+
" if od== \"wac\":\n",
166+
" block_list = work_block_list\n",
167+
" elif od == \"rac\":\n",
168+
" block_list = home_block_list\n",
169+
" \n",
170+
" for block in block_list:\n",
171+
" seed_value += 1\n",
172+
" joblist = wac_rac_block_to_joblist(stacked_df = stacked_df, \n",
173+
" block_fips= block ,\n",
174+
" years = ['2020'],\n",
175+
" seed_value = seed_value,\n",
176+
" outputfoldername = \"output\",\n",
177+
" od = od,\n",
178+
" reshape_vars = {'CE' : 'Earnings',\n",
179+
" 'CNS': 'IndustryCode',\n",
180+
" 'CA' : 'Age',\n",
181+
" 'CR' : 'Race',\n",
182+
" 'CT' : 'Ethnicity',\n",
183+
" 'CD' : 'Education',\n",
184+
" 'CS' : 'Sex'},\n",
185+
" segstems = ['SE','SI','SA'])\n",
186+
" \n",
187+
" for (year, od), inner_dict in joblist.items():\n",
188+
" for key, df in inner_dict.items():\n",
189+
" df.to_csv(f'output2/{od}_{block}_joblist_{year}.csv', index=False)\n",
190+
" if counter == 1:\n",
191+
" joblist_df = df.copy() # Create a new DataFrame\n",
192+
" else:\n",
193+
" joblist_df = pd.concat([joblist_df, df], ignore_index=True) \n",
194+
" counter += 1\n",
195+
" print(\"*********************************************************************\")\n",
196+
"\n",
197+
" joblist_df.to_csv(f'{od}_{target_county}_county_joblist_{year}.csv', index=False)\n",
198+
" # display(wac_joblist_df)"
199+
]
200+
},
201+
{
202+
"cell_type": "code",
203+
"execution_count": null,
204+
"id": "b43529e0-f739-47a3-9459-7d21df043fcf",
205+
"metadata": {},
206+
"outputs": [],
207+
"source": []
208+
},
209+
{
210+
"cell_type": "code",
211+
"execution_count": null,
212+
"id": "dca9888a-dc7e-4591-b0b9-1cc9b7bd0b91",
213+
"metadata": {},
214+
"outputs": [],
215+
"source": []
216+
},
217+
{
218+
"cell_type": "code",
219+
"execution_count": null,
220+
"id": "2803df4f-2e89-4b17-a226-21c05e6edf17",
221+
"metadata": {},
222+
"outputs": [],
223+
"source": []
224+
},
225+
{
226+
"cell_type": "code",
227+
"execution_count": null,
228+
"id": "f3cd9ed6-6352-4861-8aea-7bd4a983447e",
229+
"metadata": {},
230+
"outputs": [],
231+
"source": []
232+
}
233+
],
234+
"metadata": {
235+
"kernelspec": {
236+
"display_name": "Python 3 (ipykernel)",
237+
"language": "python",
238+
"name": "python3"
239+
},
240+
"language_info": {
241+
"codemirror_mode": {
242+
"name": "ipython",
243+
"version": 3
244+
},
245+
"file_extension": ".py",
246+
"mimetype": "text/x-python",
247+
"name": "python",
248+
"nbconvert_exporter": "python",
249+
"pygments_lexer": "ipython3",
250+
"version": "3.11.4"
251+
}
252+
},
253+
"nbformat": 4,
254+
"nbformat_minor": 5
255+
}

0 commit comments

Comments
 (0)