Skip to content

Commit 4d42f69

Browse files
authored
Merge pull request #62 from npr99/syspath
Major revisions
2 parents 24618ec + fd1cc61 commit 4d42f69

20 files changed

Lines changed: 7772 additions & 6854 deletions

Archive/ncoda_07fv1_HUA_PREC_NSI_Brazos.ipynb

Lines changed: 7522 additions & 0 deletions
Large diffs are not rendered by default.
4.07 MB
Binary file not shown.
Binary file not shown.

ncoda_07fv1_HUA_PREC_NSI.ipynb

Lines changed: 88 additions & 6695 deletions
Large diffs are not rendered by default.

pyncoda/CommunitySourceData/api_census_gov/acg_01a_BaseInventory.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ def get_data_based_on_varstems_and_roots(state_county: str,
203203
df_return = df_return.drop(columns=['precode'])
204204

205205
# Save File as CSV
206-
savefile = sys.path[0]+"/"+csv_filepath
206+
savefile = os.path.join(os.getcwd(), csv_filepath)
207207
df_return.to_csv(savefile, index=False)
208208

209209
return df_return
@@ -318,7 +318,7 @@ def get_apidata(state_county: str,
318318
col_list = primary_key_list + foreign_keys + char_vars
319319
df = df[col_list]
320320

321-
savefile = sys.path[0]+"/"+csv_filepath
321+
savefile = os.path.join(os.getcwd(), csv_filepath)
322322
df.to_csv(savefile, index=False)
323323

324324
return df
@@ -941,7 +941,7 @@ def graft_on_new_char(base_inventory: pd.DataFrame,
941941
new_char+" set 1 by greater than counter "+newchar_var
942942
expanded_hui_recombine.loc[conditions, new_char+'_flagset'] = 1
943943

944-
savefile = sys.path[0]+"/"+csv_filepath
944+
savefile = os.path.join(os.getcwd(), csv_filepath)
945945
expanded_hui_recombine.to_csv(savefile, index=False)
946946

947947
return expanded_hui_recombine

pyncoda/CommunitySourceData/api_census_gov/acg_02a_add_categorical_char.py

Lines changed: 58 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -168,10 +168,9 @@ def add_geovarid(self, input_df):
168168
column_list = list(add_geovarid_df.columns)
169169

170170
# Check if blockid in column list
171-
if ('blockid' in column_list) & \
172-
('block' not in column_list):
171+
if ('blockid' in column_list) and ('block' not in column_list):
173172
print("Adding Block2010 to column list")
174-
# create column in input df
173+
# create column in input df
175174
# Version 2.0 of HUI renames Block2010 to blockid
176175
add_geovarid_df['Block2010'] = add_geovarid_df['blockid']
177176

@@ -180,119 +179,99 @@ def add_geovarid(self, input_df):
180179

181180
# Set geoid FIPS code by concatenating state, county, census geography ids
182181
# Check if geocodes are strings
183-
geo_levels = {'State': {'length' : 2, 'total_len' : 2, 'required' : ['state'] },
184-
'County': {'length' : 3, 'total_len' : 5, 'required' : ['state','county']},
185-
'Tract': {'length' : 6, 'total_len' : 11, 'required' : ['state','county','tract']},
186-
'BlockGroup' : {'length' : 1, 'total_len' : 12, 'required' : ['state','county','tract','blockgroup'],
187-
'notes' :'Block Group code is first digit of block id'},
188-
'Block': {'length' : 4, 'total_len' : 15, 'required' : ['state','county','tract','block']}
189-
}
182+
geo_levels = {
183+
'State': {'length': 2, 'total_len': 2, 'required': ['state']},
184+
'County': {'length': 3, 'total_len': 5, 'required': ['state', 'county']},
185+
'Tract': {'length': 6, 'total_len': 11, 'required': ['state', 'county', 'tract']},
186+
'BlockGroup': {'length': 1, 'total_len': 12, 'required': ['state', 'county', 'tract', 'blockgroup'],
187+
'notes': 'Block Group code is first digit of block id'},
188+
'Block': {'length': 4, 'total_len': 15, 'required': ['state', 'county', 'tract', 'block']}
189+
}
190190

191191
# Name of Geovar to add
192-
geovarid = self.geolevel+self.geovintage
192+
geovarid = self.geolevel + self.geovintage
193193

194194
# Check to see what geolevels are available
195195
geolevels_available = []
196196
# Check to see what geovarids are available
197197
geovarids_available = []
198-
# Make sure all input variable are correctly zero padded and saved as strings
198+
199+
# Make sure all input variables are correctly zero-padded and saved as strings
199200
for geo_level in geo_levels:
200201
# Geo level needs to be all lower case to match api variables
201202
geo_level_lower = geo_level.lower()
202203
if geo_level_lower in column_list:
203204
# Each geolevel is a zero padded string
204205
length = geo_levels[geo_level]['length']
205-
print("Check length of",geo_level_lower,"expected length",length)
206+
print("Check length of", geo_level_lower, "expected length", length)
206207
check_length = self.check_var_length(
207-
input_df = add_geovarid_df,
208-
var = geo_level_lower,
209-
expected_length = length)
208+
input_df=add_geovarid_df,
209+
var=geo_level_lower,
210+
expected_length=length)
211+
210212
if (check_length == "Match") or \
211-
(check_length == "Possible match with zero pad"):
213+
(check_length == "Possible match with zero pad"):
212214
# Check variable type
213-
# Issue with typ converting to int or float
214215
geo_level_type = add_geovarid_df[geo_level_lower].dtypes
215-
print(geo_level_lower,"is type",geo_level_type)
216-
add_geovarid_df.loc[:,geo_level_lower] = \
217-
add_geovarid_df[geo_level_lower].\
218-
apply(lambda x: str(x).zfill(length))
216+
print(geo_level_lower, "is type", geo_level_type)
217+
add_geovarid_df[geo_level_lower] = add_geovarid_df[geo_level_lower].astype(str).str.zfill(length)
219218
geolevels_available.append(geo_level_lower)
220219
geo_level_type = add_geovarid_df[geo_level_lower].dtypes
221-
print("after update",geo_level_lower,"is type",geo_level_type)
220+
print("after update", geo_level_lower, "is type", geo_level_type)
221+
222222
# Check to see what geovarids are available
223-
geovarid_test = geo_level+self.geovintage
223+
geovarid_test = geo_level + self.geovintage
224224
if geovarid_test in column_list:
225225
total_length_of_geovar = geo_levels[geo_level]['total_len']
226226
check_length = self.check_var_length(
227-
add_geovarid_df,geovarid_test,total_length_of_geovar)
227+
add_geovarid_df, geovarid_test, total_length_of_geovar)
228228
if (check_length == "Match") or \
229-
(check_length == "Possible match with zero pad"):
230-
add_geovarid_df.loc[:,geovarid_test] = \
231-
add_geovarid_df[geovarid_test].apply(lambda x: str(x).\
232-
zfill(total_length_of_geovar))
229+
(check_length == "Possible match with zero pad"):
230+
add_geovarid_df[geovarid_test] = add_geovarid_df[geovarid_test].astype(str).str.zfill(total_length_of_geovar)
233231
geovarids_available.append(geovarid_test)
234232
elif (check_length == "Possible convert to float"):
235233
print("Possible convert to float")
236-
add_geovarid_df.loc[:,geovarid_test] = \
237-
add_geovarid_df[geovarid_test].apply(lambda x: str(x)[:-2].\
238-
zfill(total_length_of_geovar))
234+
add_geovarid_df[geovarid_test] = add_geovarid_df[geovarid_test].astype(str).apply(lambda x: str(x)[:-2].zfill(total_length_of_geovar))
239235
geovarids_available.append(geovarid_test)
240-
print('Geolevels available',geolevels_available)
241-
print('Geolvarids available',geovarids_available)
236+
237+
print('Geolevels available', geolevels_available)
238+
print('Geolvarids available', geovarids_available)
239+
242240
# Generate Geovarid based on available columns
243-
# What is the total length expected for the geolevel
244241
total_length_of_geovar = geo_levels[self.geolevel]['total_len']
245-
# What are the required input variables
246242
required_vars = geo_levels[self.geolevel]['required']
247243

248-
print('Adding',geovarid,'expected length',total_length_of_geovar)
249-
# Check to make sure that all columns needed are in list
244+
print('Adding', geovarid, 'expected length', total_length_of_geovar)
245+
250246
if all(cols in column_list for cols in geolevels_available) and \
251-
all(cols in column_list for cols in required_vars) and \
252-
(geolevels_available == required_vars) and \
253-
(geolevels_available != []):
254-
print('Dataframe has required geo levels',geolevels_available)
255-
# Set geovarid to empty
256-
add_geovarid_df.loc[:,geovarid] = ''
247+
all(cols in column_list for cols in required_vars) and \
248+
(geolevels_available == required_vars) and \
249+
(geolevels_available != []):
250+
print('Dataframe has required geo levels', geolevels_available)
251+
add_geovarid_df[geovarid] = ''
257252
for geo_level in required_vars:
258253
geo_level_type = add_geovarid_df[geo_level.lower()].dtypes
259-
print(geo_level.lower(),"is type",geo_level_type)
260-
# Add geo level to geovarid
261-
add_geovarid_df.loc[:,geovarid] = add_geovarid_df[geovarid] + \
262-
add_geovarid_df[geo_level.lower()]
263-
# If geolevel columns are not in list check if block id is in list
264-
elif 'Block'+self.geovintage in geovarids_available:
265-
print('Dataframe has Block',self.geovintage,'for new geovar',geovarid)
266-
# Check that the block id is a zero padded 15 digit string
267-
# The geovarid is the first x characters
268-
add_geovarid_df.loc[:,geovarid] = add_geovarid_df['Block'+self.geovintage].\
269-
apply(lambda x : str(int(x)).zfill(15)[0:total_length_of_geovar])
270-
elif 'Tract'+self.geovintage in geovarids_available:
271-
print('Dataframe has Tract',self.geovintage,'for new geovar',geovarid)
272-
# Check that the tract id is a zero padded 11 digit string
273-
# The geovarid is the first x characters
274-
#print('Before update confirm',geovarid,'has expected length.')
275-
#self.check_var_length(add_geovarid_df,geovarid,total_length_of_geovar)
276-
add_geovarid_df.loc[:,geovarid] = add_geovarid_df['Tract'+self.geovintage].\
277-
apply(lambda x : str(int(x)).zfill(11)[0:total_length_of_geovar])
278-
#print('After update confirm',geovarid,'has expected length.')
279-
#self.check_var_length(add_geovarid_df,geovarid,total_length_of_geovar)
254+
print(geo_level.lower(), "is type", geo_level_type)
255+
add_geovarid_df[geovarid] += add_geovarid_df[geo_level.lower()]
256+
elif 'Block' + self.geovintage in geovarids_available:
257+
print('Dataframe has Block', self.geovintage, 'for new geovar', geovarid)
258+
add_geovarid_df[geovarid] = add_geovarid_df['Block' + self.geovintage].astype(str).str.zfill(15).str[:total_length_of_geovar]
259+
elif 'Tract' + self.geovintage in geovarids_available:
260+
print('Dataframe has Tract', self.geovintage, 'for new geovar', geovarid)
261+
add_geovarid_df[geovarid] = add_geovarid_df['Tract' + self.geovintage].astype(str).str.zfill(11).str[:total_length_of_geovar]
280262
elif 'GEO_ID' in column_list:
281-
# GEO_ID has the FIPS code data using the substring
282-
print('Dataframe has GEO_ID for new geovar',geovarid)
283-
add_geovarid_df.loc[:,geovarid] = add_geovarid_df['GEO_ID'].\
284-
apply(lambda x : str(x).zfill(11)[x.find("US")+2:\
285-
total_length_of_geovar+x.find("US")+2])
263+
print('Dataframe has GEO_ID for new geovar', geovarid)
264+
add_geovarid_df[geovarid] = add_geovarid_df['GEO_ID'].astype(str).apply(lambda x: x.zfill(11)[x.find("US") + 2:total_length_of_geovar + x.find("US") + 2])
286265
else:
287-
print('Warning: Column list does not have required columns to make',geovarid)
266+
print('Warning: Column list does not have required columns to make', geovarid)
288267

289268
# Update column list to move geovarid to front
290269
columnlist = [col for col in add_geovarid_df if col != geovarid]
291-
new_columnlist = [geovarid]+ columnlist
292-
# Confirm geovarid is set correctly
293-
print('Confirming',geovarid,'has expected length.')
270+
new_columnlist = [geovarid] + columnlist
271+
272+
print('Confirming', geovarid, 'has expected length.')
294273
check_length = self.check_var_length(
295-
add_geovarid_df,geovarid,total_length_of_geovar)
274+
add_geovarid_df, geovarid, total_length_of_geovar)
296275
return add_geovarid_df[new_columnlist]
297276

298277

@@ -969,9 +948,9 @@ def run_random_merge_2dfs(self, rounds):
969948

970949
if self.savefiles == True:
971950
print("Save primary and secondary files with all columns")
972-
savefile = sys.path[0]+"/"+csv_filepath_primary
951+
savefile = os.path.join(os.getcwd(), csv_filepath_primary)
973952
output_df['primary'].to_csv(savefile, index=False)
974-
savefile = sys.path[0]+"/"+csv_filepath_secondary
953+
savefile = os.path.join(os.getcwd(), csv_filepath_secondary)
975954
output_df['secondary'].to_csv(savefile, index=False)
976955

977956
return output_df
@@ -1008,10 +987,10 @@ def run_random_merge_2dfs(self, rounds):
1008987
print("Check primary and secondary files to understand why merge is not complete")
1009988
if self.savefiles == True:
1010989
csv_filepath_primary_almost = self.outputfolder+"/"+csv_filename_primary+'_almost.csv'
1011-
savefile = sys.path[0]+"/"+csv_filepath_primary_almost
990+
savefile = os.path.join(os.getcwd(), csv_filepath_primary_almost)
1012991
output_df['primary'].to_csv(savefile, index=False)
1013992
csv_filepath_secondary_almost = self.outputfolder+"/"+csv_filename_secondary+'_almost.csv'
1014-
savefile = sys.path[0]+"/"+csv_filepath_secondary_almost
993+
savefile = os.path.join(os.getcwd(), csv_filepath_secondary_almost)
1015994
output_df['secondary'].to_csv(savefile, index=False)
1016995
return output_df
1017996

pyncoda/CommunitySourceData/api_census_gov/acg_05a_hui_functions.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ def final_polish_hui(self, input_df):
248248

249249
if self.savefiles == True:
250250
csv_filepath = self.outputfolders['top']+"/"+self.output_filename+'.csv'
251-
savefile = sys.path[0]+"/"+csv_filepath
251+
savefile = os.path.join(os.getcwd(), csv_filepath)
252252
hui_df.to_csv(savefile, index=False)
253253
print("File saved:",savefile)
254254

@@ -331,7 +331,7 @@ def save_incore_version2(self, input_df):
331331
print("***************************************\n")
332332

333333
csv_filepath = self.outputfolders['top']+"/"+output_filename+'.csv'
334-
savefile = sys.path[0]+"/"+csv_filepath
334+
savefile = os.path.join(os.getcwd(), csv_filepath)
335335
output_df.to_csv(savefile, index=False)
336336
print("File saved:",savefile)
337337
return output_df

pyncoda/CommunitySourceData/api_census_gov/acg_05b_prec_functions.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,8 @@ def run_prec_workflow(self, savelog=True):
212212
print("***************************************\n")
213213

214214
csv_filepath = self.outputfolders['top']+"/"+output_filename+'.csv'
215-
savefile = sys.path[0]+"/"+csv_filepath
215+
216+
savefile = os.path.join(os.getcwd(), csv_filepath)
216217
prec_df.to_csv(savefile, index=False)
217218
print("File saved:",savefile)
218219

@@ -288,7 +289,8 @@ def final_polish_prec(self, input_df):
288289

289290
if self.savefiles == True:
290291
csv_filepath = self.outputfolders['top']+"/"+self.output_filename+'.csv'
291-
savefile = sys.path[0]+"/"+csv_filepath
292+
293+
savefile = os.path.join(os.getcwd(), csv_filepath)
292294
prec_df.to_csv(savefile, index=False)
293295
print("File saved:",savefile)
294296

pyncoda/CommunitySourceData/nces_ed_gov/nces_01a_downloadfiles.py

Lines changed: 15 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8,30 +8,22 @@
88
import *
99

1010
def setup_directory():
11-
# Create output directory if it does not exist
12-
output_sourcedata = 'Outputdata\\00_SourceData'
13-
output_directory = 'Outputdata\\00_SourceData\\nces_ed_gov'
14-
# Make directory to save output
15-
if not os.path.exists(output_sourcedata):
16-
print("Making new directory to save output: ",
17-
output_sourcedata)
18-
os.mkdir(output_sourcedata)
19-
if not os.path.exists(output_directory):
20-
print("Making new directory to save output: ",
21-
output_directory)
22-
os.mkdir(output_directory)
23-
else:
24-
print("Directory",output_directory,"Already exists.")
11+
# Define output directories
12+
output_sourcedata = os.path.join('OutputData', '00_SourceData')
13+
output_directory = os.path.join(output_sourcedata, 'nces_ed_gov')
14+
unzipped_output_directory = os.path.join(output_directory, 'unzipped')
2515

26-
unzipped_output_directory = output_directory+'\\unzipped'
27-
# Make directory to save output
28-
if not os.path.exists(unzipped_output_directory):
29-
print("Making unzipped_output_directory directory"+
30-
" to save output: ",unzipped_output_directory)
31-
os.mkdir(unzipped_output_directory)
32-
else:
33-
print("Directory",unzipped_output_directory,
34-
"Already exists.")
16+
# Create output directory if it does not exist
17+
def create_directory(path):
18+
if not os.path.exists(path):
19+
print(f"Making new directory to save output: {path}")
20+
os.makedirs(path)
21+
else:
22+
print(f"Directory {path} already exists.")
23+
24+
create_directory(output_sourcedata)
25+
create_directory(output_directory)
26+
create_directory(unzipped_output_directory)
3527

3628
return output_directory, unzipped_output_directory
3729

pyncoda/CommunitySourceData/nsi_sec_usace_army_mil/nsi_01a_downloadfiles.py

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13,28 +13,26 @@
1313

1414

1515
def setup_nsi_directory():
16+
# Define output directories
17+
output_folder = 'OutputData'
18+
output_sourcedata = os.path.join(output_folder, '00_SourceData')
19+
output_directory = os.path.join(output_sourcedata, 'nsi_sec_usace_army_mil')
20+
1621
# Create output directory if it does not exist
17-
output_folder = 'Outputdata'
18-
output_sourcedata = 'Outputdata\\00_SourceData'
19-
output_directory = 'Outputdata\\00_SourceData\\nsi_sec_usace_army_mil'
20-
# Make directory to save output
21-
if not os.path.exists(output_folder):
22-
print("Making new directory to save output: ",
23-
output_folder)
24-
os.mkdir(output_folder)
25-
if not os.path.exists(output_sourcedata):
26-
print("Making new directory to save output: ",
27-
output_sourcedata)
28-
os.mkdir(output_sourcedata)
29-
if not os.path.exists(output_directory):
30-
print("Making new directory to save output: ",
31-
output_directory)
32-
os.mkdir(output_directory)
33-
else:
34-
print("Directory",output_directory,"Already exists.")
22+
def create_directory(path):
23+
if not os.path.exists(path):
24+
print(f"Making new directory to save output: {path}")
25+
os.makedirs(path)
26+
else:
27+
print(f"Directory {path} already exists.")
28+
29+
create_directory(output_folder)
30+
create_directory(output_sourcedata)
31+
create_directory(output_directory)
3532

3633
return output_directory
3734

35+
3836
def download_nsi_files(county_fips,
3937
unique_id = 'fd_id_bid',
4038
unique_id_vars = ['fd_id','bid'],

0 commit comments

Comments
 (0)