npr99
diff --git a/‎pyncoda/99_SandboxCode/SandboxAE/Career Inventory.ipynb‎
Lines changed: 255 additions & 0 deletions b/‎pyncoda/99_SandboxCode/SandboxAE/Career Inventory.ipynb‎
Lines changed: 255 additions & 0 deletions
@@ -0,0 +1,255 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a3b0c51a-7e85-41bb-ad72-4cdc103dadd4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import warnings\n",
+    "warnings.filterwarnings(\"ignore\")\n",
+    "# warnings.filterwarnings(\"ignore\", category=DeprecationWarning)\n",
+    "warnings.filterwarnings(\"ignore\", message=\".*Pyarrow will become a required dependency.*\")\n",
+    "import pandas as pd\n",
+    "import geopandas as gpd\n",
+    "import numpy as np\n",
+    "import sys # For displaying package versions\n",
+    "import os # For managing directories and file paths if drive is mounted\n",
+    "import random\n",
+    "import json\n",
+    "# pd.set_option('display.max_columns', None)\n",
+    "# pd.set_option('display.max_rows', None)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2961a9a0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# To reload submodules need to use this magic command to set autoreload on\n",
+    "# This command loads the autoreload extension, enabling the use of the %autoreload magic command.\n",
+    "# This sets autoreload to automatically reload all modules (both modules you've imported and your own modules) before executing a new line. \n",
+    "# The value \"2\" means \"always reload,\"\n",
+    "\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4e9cf45c-b973-44c7-b8f9-5a2a852de736",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install wget\n",
+    "!pip install us\n",
+    "!pip install seaborn\n",
+    "!pip install fpdf2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ebe817eb-e2ef-4144-bbf0-9d46f82b48f7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from _lodes_data_structure import all_ods\n",
+    "from _lodes_data_structure import all_segparts\n",
+    "from _lodes_data_structure import all_charstems\n",
+    "from _lodes_data_structure import all_stems\n",
+    "from _lodes_data_structure import all_jobtypes\n",
+    "from _lodes_data_structure import all_mxjobtypes\n",
+    "from _lodes_data_structure import all_segstems\n",
+    "from lodes_datautil import *\n",
+    "from lodes_fullloop import *\n",
+    "from lodes_mcmcsa_util import *"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2b272042-4612-4141-b354-5b6cd56188b3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "arise_county_fips = {\"johnson\" : \"20091\",\n",
+    "                     \"wyandotte\" : \"20209\",\n",
+    "                     \"finney\" : \"20055\",\n",
+    "                     \"ford\" : \"20057\",\n",
+    "                     \"seward\" : \"20175\"}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ae28f3ef-58b7-4af4-b453-bc658a59f59b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "target_county = \"ford\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d0502145",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "county_fips= arise_county_fips[target_county]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2a07e15f-fe79-4103-bcdb-df512d98a0ea",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stacked_df = obtain_lodes_county_loop([county_fips], \n",
+    "                    ['2020'], \n",
+    "                    outputfoldername = \"output\",\n",
+    "                    ods = all_ods,\n",
+    "                    segparts = all_segparts,\n",
+    "                    jobtypes = all_jobtypes,\n",
+    "                    mxjobtypes = all_mxjobtypes,\n",
+    "                    segstems = all_segstems,\n",
+    "                    blocklist = '')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e08ddfe1-2485-4a04-9b5a-f495288128b6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "work_block_list = get_county_work_block_list(stacked_df, county_fips_code=county_fips, year='2020', od='od', seg='na')\n",
+    "home_block_list = get_county_home_block_list(stacked_df, county_fips_code=county_fips, year='2020', od='od', seg='na')\n",
+    "print(\"work block list: \", len(work_block_list))\n",
+    "print(\"home block list: \", len(home_block_list))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4faedc22-ac94-4416-ab1b-2e93de1f6b1a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# for block in work_block_list[0:10]:\n",
+    "#     out_of_state_rac_blocks_df = out_of_state_rac_blocks(work_block=block, \n",
+    "#                             years= ['2020'], \n",
+    "#                             outputfoldername = \"output\",\n",
+    "#                             stacked_df=stacked_df,\n",
+    "#                             segstems = ['SE','SI','SA'])\n",
+    "#     print(len(out_of_state_rac_blocks_df))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "62a095d5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "seed_value = 1234\n",
+    "counter=1\n",
+    "for od in [\"wac\", \"rac\"]:\n",
+    "    joblist_df = pd.DataFrame()\n",
+    "    if od== \"wac\":\n",
+    "        block_list = work_block_list\n",
+    "    elif od == \"rac\":\n",
+    "        block_list = home_block_list\n",
+    "    \n",
+    "    for block in block_list:\n",
+    "        seed_value += 1\n",
+    "        joblist = wac_rac_block_to_joblist(stacked_df = stacked_df, \n",
+    "                                            block_fips= block ,\n",
+    "                                            years = ['2020'],\n",
+    "                                            seed_value = seed_value,\n",
+    "                                            outputfoldername = \"output\",\n",
+    "                                            od = od,\n",
+    "                                            reshape_vars = {'CE' : 'Earnings',\n",
+    "                                                                'CNS': 'IndustryCode',\n",
+    "                                                                'CA' : 'Age',\n",
+    "                                                                'CR' : 'Race',\n",
+    "                                                                'CT' : 'Ethnicity',\n",
+    "                                                                'CD' : 'Education',\n",
+    "                                                                'CS' : 'Sex'},\n",
+    "                                                            segstems = ['SE','SI','SA'])\n",
+    "        \n",
+    "        for (year, od), inner_dict in joblist.items():\n",
+    "            for key, df in inner_dict.items():\n",
+    "                df.to_csv(f'output2/{od}_{block}_joblist_{year}.csv', index=False)\n",
+    "                if counter == 1:\n",
+    "                    joblist_df = df.copy()  # Create a new DataFrame\n",
+    "                else:\n",
+    "                    joblist_df = pd.concat([joblist_df, df], ignore_index=True)   \n",
+    "        counter += 1\n",
+    "        print(\"*********************************************************************\")\n",
+    "\n",
+    "    joblist_df.to_csv(f'{od}_{target_county}_county_joblist_{year}.csv', index=False)\n",
+    "    # display(wac_joblist_df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b43529e0-f739-47a3-9459-7d21df043fcf",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dca9888a-dc7e-4591-b0b9-1cc9b7bd0b91",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2803df4f-2e89-4b17-a226-21c05e6edf17",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f3cd9ed6-6352-4861-8aea-7bd4a983447e",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}