npr99
diff --git a/‎Archive/ncoda_07fv1_HUA_PREC_NSI_Brazos.ipynb‎
Lines changed: 7522 additions & 0 deletions b/‎Archive/ncoda_07fv1_HUA_PREC_NSI_Brazos.ipynb‎
Lines changed: 7522 additions & 0 deletions
diff --git a/‎Readings/Rosenheim_pyncoda_overview_2024-06-19.pdf‎
4.07 MB b/‎Readings/Rosenheim_pyncoda_overview_2024-06-19.pdf‎
4.07 MB
diff --git a/‎Readings/_Rosenheim_2021_HousingUnitInventory_Overview_2021-06-30.pdf‎
485 KB b/‎Readings/_Rosenheim_2021_HousingUnitInventory_Overview_2021-06-30.pdf‎
485 KB
diff --git a/‎ncoda_07fv1_HUA_PREC_NSI.ipynb‎
Lines changed: 88 additions & 6695 deletions b/‎ncoda_07fv1_HUA_PREC_NSI.ipynb‎
Lines changed: 88 additions & 6695 deletions
diff --git a/‎pyncoda/CommunitySourceData/api_census_gov/acg_01a_BaseInventory.py‎
Lines changed: 3 additions & 3 deletions b/‎pyncoda/CommunitySourceData/api_census_gov/acg_01a_BaseInventory.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎pyncoda/CommunitySourceData/api_census_gov/acg_02a_add_categorical_char.py‎
Lines changed: 58 additions & 79 deletions b/‎pyncoda/CommunitySourceData/api_census_gov/acg_02a_add_categorical_char.py‎
Lines changed: 58 additions & 79 deletions
diff --git a/‎pyncoda/CommunitySourceData/api_census_gov/acg_05a_hui_functions.py‎
Lines changed: 2 additions & 2 deletions b/‎pyncoda/CommunitySourceData/api_census_gov/acg_05a_hui_functions.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pyncoda/CommunitySourceData/api_census_gov/acg_05b_prec_functions.py‎
Lines changed: 4 additions & 2 deletions b/‎pyncoda/CommunitySourceData/api_census_gov/acg_05b_prec_functions.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎pyncoda/CommunitySourceData/nces_ed_gov/nces_01a_downloadfiles.py‎
Lines changed: 15 additions & 23 deletions b/‎pyncoda/CommunitySourceData/nces_ed_gov/nces_01a_downloadfiles.py‎
Lines changed: 15 additions & 23 deletions
diff --git a/‎pyncoda/CommunitySourceData/nsi_sec_usace_army_mil/nsi_01a_downloadfiles.py‎
Lines changed: 16 additions & 18 deletions b/‎pyncoda/CommunitySourceData/nsi_sec_usace_army_mil/nsi_01a_downloadfiles.py‎
Lines changed: 16 additions & 18 deletions
@@ -203,7 +203,7 @@ def get_data_based_on_varstems_and_roots(state_county: str,
         df_return = df_return.drop(columns=['precode'])
 
         # Save File as CSV
-        savefile = sys.path[0]+"/"+csv_filepath
+        savefile = os.path.join(os.getcwd(), csv_filepath)
         df_return.to_csv(savefile, index=False)
 
         return df_return
@@ -318,7 +318,7 @@ def get_apidata(state_county: str,
             col_list = primary_key_list + foreign_keys + char_vars
             df = df[col_list]
 
-        savefile = sys.path[0]+"/"+csv_filepath
+        savefile = os.path.join(os.getcwd(), csv_filepath)
         df.to_csv(savefile, index=False)
 
         return df
@@ -941,7 +941,7 @@ def graft_on_new_char(base_inventory: pd.DataFrame,
                 new_char+" set 1 by greater than counter "+newchar_var
             expanded_hui_recombine.loc[conditions, new_char+'_flagset'] = 1
 
-        savefile = sys.path[0]+"/"+csv_filepath
+        savefile = os.path.join(os.getcwd(), csv_filepath)
         expanded_hui_recombine.to_csv(savefile, index=False)
 
         return expanded_hui_recombine
 
@@ -168,10 +168,9 @@ def add_geovarid(self, input_df):
         column_list = list(add_geovarid_df.columns)
 
         # Check if blockid in column list
-        if ('blockid' in column_list) & \
-            ('block' not in column_list):
+        if ('blockid' in column_list) and ('block' not in column_list):
             print("Adding Block2010 to column list")
-            # create  column in input df
+            # create column in input df
             # Version 2.0 of HUI renames Block2010 to blockid
             add_geovarid_df['Block2010'] = add_geovarid_df['blockid']
 
@@ -180,119 +179,99 @@ def add_geovarid(self, input_df):
 
         # Set geoid FIPS code by concatenating state, county, census geography ids
         # Check if geocodes are strings
-        geo_levels = {'State':  {'length' : 2, 'total_len' : 2,  'required' : ['state'] },
-                      'County': {'length' : 3, 'total_len' : 5,  'required' : ['state','county']},
-                      'Tract':  {'length' : 6, 'total_len' : 11, 'required' : ['state','county','tract']},
-                      'BlockGroup' : {'length' : 1, 'total_len' : 12, 'required' : ['state','county','tract','blockgroup'],
-                                      'notes' :'Block Group code is first digit of block id'},
-                      'Block':  {'length' : 4, 'total_len' : 15, 'required' : ['state','county','tract','block']}
-                      }
+        geo_levels = {
+            'State': {'length': 2, 'total_len': 2, 'required': ['state']},
+            'County': {'length': 3, 'total_len': 5, 'required': ['state', 'county']},
+            'Tract': {'length': 6, 'total_len': 11, 'required': ['state', 'county', 'tract']},
+            'BlockGroup': {'length': 1, 'total_len': 12, 'required': ['state', 'county', 'tract', 'blockgroup'],
+                        'notes': 'Block Group code is first digit of block id'},
+            'Block': {'length': 4, 'total_len': 15, 'required': ['state', 'county', 'tract', 'block']}
+        }
 
         # Name of Geovar to add
-        geovarid = self.geolevel+self.geovintage
+        geovarid = self.geolevel + self.geovintage
 
         # Check to see what geolevels are available 
         geolevels_available = []
         # Check to see what geovarids are available
         geovarids_available = []
-        # Make sure all input variable are correctly zero padded and saved as strings
+        
+        # Make sure all input variables are correctly zero-padded and saved as strings
         for geo_level in geo_levels:
             # Geo level needs to be all lower case to match api variables
             geo_level_lower = geo_level.lower()
             if geo_level_lower in column_list:
                 # Each geolevel is a zero padded string
                 length = geo_levels[geo_level]['length']
-                print("Check length of",geo_level_lower,"expected length",length)
+                print("Check length of", geo_level_lower, "expected length", length)
                 check_length = self.check_var_length(
-                    input_df = add_geovarid_df,
-                    var = geo_level_lower,
-                    expected_length = length)
+                    input_df=add_geovarid_df,
+                    var=geo_level_lower,
+                    expected_length=length)
+                
                 if (check_length == "Match") or \
-                    (check_length == "Possible match with zero pad"):
+                (check_length == "Possible match with zero pad"):
                     # Check variable type
-                    # Issue with typ converting to int or float
                     geo_level_type = add_geovarid_df[geo_level_lower].dtypes
-                    print(geo_level_lower,"is type",geo_level_type)
-                    add_geovarid_df.loc[:,geo_level_lower] =  \
-                        add_geovarid_df[geo_level_lower].\
-                            apply(lambda x: str(x).zfill(length))
+                    print(geo_level_lower, "is type", geo_level_type)
+                    add_geovarid_df[geo_level_lower] = add_geovarid_df[geo_level_lower].astype(str).str.zfill(length)
                     geolevels_available.append(geo_level_lower)
                     geo_level_type = add_geovarid_df[geo_level_lower].dtypes
-                    print("after update",geo_level_lower,"is type",geo_level_type)
+                    print("after update", geo_level_lower, "is type", geo_level_type)
+            
             # Check to see what geovarids are available
-            geovarid_test = geo_level+self.geovintage
+            geovarid_test = geo_level + self.geovintage
             if geovarid_test in column_list:
                 total_length_of_geovar = geo_levels[geo_level]['total_len']
                 check_length = self.check_var_length(
-                    add_geovarid_df,geovarid_test,total_length_of_geovar)
+                    add_geovarid_df, geovarid_test, total_length_of_geovar)
                 if (check_length == "Match") or \
-                   (check_length == "Possible match with zero pad"):
-                    add_geovarid_df.loc[:,geovarid_test] =  \
-                        add_geovarid_df[geovarid_test].apply(lambda x: str(x).\
-                            zfill(total_length_of_geovar))
+                (check_length == "Possible match with zero pad"):
+                    add_geovarid_df[geovarid_test] = add_geovarid_df[geovarid_test].astype(str).str.zfill(total_length_of_geovar)
                     geovarids_available.append(geovarid_test)
                 elif (check_length == "Possible convert to float"):
                     print("Possible convert to float")
-                    add_geovarid_df.loc[:,geovarid_test] =  \
-                        add_geovarid_df[geovarid_test].apply(lambda x: str(x)[:-2].\
-                            zfill(total_length_of_geovar))
+                    add_geovarid_df[geovarid_test] = add_geovarid_df[geovarid_test].astype(str).apply(lambda x: str(x)[:-2].zfill(total_length_of_geovar))
                     geovarids_available.append(geovarid_test)
-        print('Geolevels available',geolevels_available)
-        print('Geolvarids available',geovarids_available)
+        
+        print('Geolevels available', geolevels_available)
+        print('Geolvarids available', geovarids_available)
+        
         # Generate Geovarid based on available columns
-        # What is the total length expected for the geolevel 
         total_length_of_geovar = geo_levels[self.geolevel]['total_len']
-        # What are the required input variables
         required_vars = geo_levels[self.geolevel]['required']
 
-        print('Adding',geovarid,'expected length',total_length_of_geovar)
-        # Check to make sure that all columns needed are in list
+        print('Adding', geovarid, 'expected length', total_length_of_geovar)
+        
         if all(cols in column_list for cols in geolevels_available) and \
-            all(cols in column_list for cols in required_vars) and \
-            (geolevels_available == required_vars) and \
-            (geolevels_available != []):
-            print('Dataframe has required geo levels',geolevels_available)
-            # Set geovarid to empty
-            add_geovarid_df.loc[:,geovarid] = ''
+        all(cols in column_list for cols in required_vars) and \
+        (geolevels_available == required_vars) and \
+        (geolevels_available != []):
+            print('Dataframe has required geo levels', geolevels_available)
+            add_geovarid_df[geovarid] = ''
             for geo_level in required_vars:
                 geo_level_type = add_geovarid_df[geo_level.lower()].dtypes
-                print(geo_level.lower(),"is type",geo_level_type)
-                # Add geo level to geovarid
-                add_geovarid_df.loc[:,geovarid] = add_geovarid_df[geovarid] + \
-                    add_geovarid_df[geo_level.lower()]
-        # If geolevel columns are not in list check if block id is in list
-        elif 'Block'+self.geovintage in geovarids_available:
-            print('Dataframe has Block',self.geovintage,'for new geovar',geovarid)
-            # Check that the block id is a zero padded 15 digit string
-            # The geovarid is the first x characters
-            add_geovarid_df.loc[:,geovarid] = add_geovarid_df['Block'+self.geovintage].\
-                apply(lambda x : str(int(x)).zfill(15)[0:total_length_of_geovar])
-        elif 'Tract'+self.geovintage in geovarids_available:
-            print('Dataframe has Tract',self.geovintage,'for new geovar',geovarid)
-            # Check that the tract id is a zero padded 11 digit string
-            # The geovarid is the first x characters
-            #print('Before update confirm',geovarid,'has expected length.')
-            #self.check_var_length(add_geovarid_df,geovarid,total_length_of_geovar)
-            add_geovarid_df.loc[:,geovarid] = add_geovarid_df['Tract'+self.geovintage].\
-                apply(lambda x : str(int(x)).zfill(11)[0:total_length_of_geovar])
-            #print('After update confirm',geovarid,'has expected length.')
-            #self.check_var_length(add_geovarid_df,geovarid,total_length_of_geovar)
+                print(geo_level.lower(), "is type", geo_level_type)
+                add_geovarid_df[geovarid] += add_geovarid_df[geo_level.lower()]
+        elif 'Block' + self.geovintage in geovarids_available:
+            print('Dataframe has Block', self.geovintage, 'for new geovar', geovarid)
+            add_geovarid_df[geovarid] = add_geovarid_df['Block' + self.geovintage].astype(str).str.zfill(15).str[:total_length_of_geovar]
+        elif 'Tract' + self.geovintage in geovarids_available:
+            print('Dataframe has Tract', self.geovintage, 'for new geovar', geovarid)
+            add_geovarid_df[geovarid] = add_geovarid_df['Tract' + self.geovintage].astype(str).str.zfill(11).str[:total_length_of_geovar]
         elif 'GEO_ID' in column_list:
-            # GEO_ID has the FIPS code data using the substring
-            print('Dataframe has GEO_ID for new geovar',geovarid)
-            add_geovarid_df.loc[:,geovarid] = add_geovarid_df['GEO_ID'].\
-                apply(lambda x : str(x).zfill(11)[x.find("US")+2:\
-                    total_length_of_geovar+x.find("US")+2])        
+            print('Dataframe has GEO_ID for new geovar', geovarid)
+            add_geovarid_df[geovarid] = add_geovarid_df['GEO_ID'].astype(str).apply(lambda x: x.zfill(11)[x.find("US") + 2:total_length_of_geovar + x.find("US") + 2])
         else:
-            print('Warning: Column list does not have required columns to make',geovarid)
+            print('Warning: Column list does not have required columns to make', geovarid)
 
         # Update column list to move geovarid to front
         columnlist = [col for col in add_geovarid_df if col != geovarid]
-        new_columnlist = [geovarid]+ columnlist
-        # Confirm geovarid is set correctly
-        print('Confirming',geovarid,'has expected length.')
+        new_columnlist = [geovarid] + columnlist
+
+        print('Confirming', geovarid, 'has expected length.')
         check_length = self.check_var_length(
-            add_geovarid_df,geovarid,total_length_of_geovar)
+            add_geovarid_df, geovarid, total_length_of_geovar)
         return add_geovarid_df[new_columnlist]
 
 
@@ -969,9 +948,9 @@ def run_random_merge_2dfs(self, rounds):
 
                         if self.savefiles == True:
                             print("Save primary and secondary files with all columns")
-                            savefile = sys.path[0]+"/"+csv_filepath_primary
+                            savefile = os.path.join(os.getcwd(), csv_filepath_primary)
                             output_df['primary'].to_csv(savefile, index=False)
-                            savefile = sys.path[0]+"/"+csv_filepath_secondary
+                            savefile = os.path.join(os.getcwd(), csv_filepath_secondary)
                             output_df['secondary'].to_csv(savefile, index=False)
 
                         return output_df
@@ -1008,10 +987,10 @@ def run_random_merge_2dfs(self, rounds):
         print("Check primary and secondary files to understand why merge is not complete")
         if self.savefiles == True:
             csv_filepath_primary_almost = self.outputfolder+"/"+csv_filename_primary+'_almost.csv'
-            savefile = sys.path[0]+"/"+csv_filepath_primary_almost
+            savefile = os.path.join(os.getcwd(), csv_filepath_primary_almost)
             output_df['primary'].to_csv(savefile, index=False)
             csv_filepath_secondary_almost = self.outputfolder+"/"+csv_filename_secondary+'_almost.csv'
-            savefile = sys.path[0]+"/"+csv_filepath_secondary_almost
+            savefile = os.path.join(os.getcwd(), csv_filepath_secondary_almost)
             output_df['secondary'].to_csv(savefile, index=False)
         return output_df
 
 
@@ -248,7 +248,7 @@ def final_polish_hui(self, input_df):
 
         if self.savefiles == True:
             csv_filepath = self.outputfolders['top']+"/"+self.output_filename+'.csv'
-            savefile = sys.path[0]+"/"+csv_filepath
+            savefile = os.path.join(os.getcwd(), csv_filepath)
             hui_df.to_csv(savefile, index=False)
             print("File saved:",savefile)
 
@@ -331,7 +331,7 @@ def save_incore_version2(self, input_df):
         print("***************************************\n")
 
         csv_filepath = self.outputfolders['top']+"/"+output_filename+'.csv'
-        savefile = sys.path[0]+"/"+csv_filepath
+        savefile = os.path.join(os.getcwd(), csv_filepath)
         output_df.to_csv(savefile, index=False)
         print("File saved:",savefile)       
         return output_df    
 
@@ -212,7 +212,8 @@ def run_prec_workflow(self, savelog=True):
         print("***************************************\n")
 
         csv_filepath = self.outputfolders['top']+"/"+output_filename+'.csv'
-        savefile = sys.path[0]+"/"+csv_filepath
+
+        savefile = os.path.join(os.getcwd(), csv_filepath)
         prec_df.to_csv(savefile, index=False)
         print("File saved:",savefile)
 
@@ -288,7 +289,8 @@ def final_polish_prec(self, input_df):
 
         if self.savefiles == True:
             csv_filepath = self.outputfolders['top']+"/"+self.output_filename+'.csv'
-            savefile = sys.path[0]+"/"+csv_filepath
+
+            savefile = os.path.join(os.getcwd(), csv_filepath)
             prec_df.to_csv(savefile, index=False)
             print("File saved:",savefile)
 
 
@@ -8,30 +8,22 @@
     import *
 
 def setup_directory():
-    # Create output directory if it does not exist
-    output_sourcedata = 'Outputdata\\00_SourceData'
-    output_directory = 'Outputdata\\00_SourceData\\nces_ed_gov'
-    # Make directory to save output
-    if not os.path.exists(output_sourcedata):
-        print("Making new directory to save output: ",
-            output_sourcedata)
-        os.mkdir(output_sourcedata)
-    if not os.path.exists(output_directory):
-        print("Making new directory to save output: ",
-            output_directory)
-        os.mkdir(output_directory)
-    else:
-        print("Directory",output_directory,"Already exists.")
+    # Define output directories
+    output_sourcedata = os.path.join('OutputData', '00_SourceData')
+    output_directory = os.path.join(output_sourcedata, 'nces_ed_gov')
+    unzipped_output_directory = os.path.join(output_directory, 'unzipped')
 
-    unzipped_output_directory = output_directory+'\\unzipped'
-    # Make directory to save output
-    if not os.path.exists(unzipped_output_directory):
-        print("Making unzipped_output_directory directory"+
-            " to save output: ",unzipped_output_directory)
-        os.mkdir(unzipped_output_directory)
-    else:
-        print("Directory",unzipped_output_directory,
-            "Already exists.")
+    # Create output directory if it does not exist
+    def create_directory(path):
+        if not os.path.exists(path):
+            print(f"Making new directory to save output: {path}")
+            os.makedirs(path)
+        else:
+            print(f"Directory {path} already exists.")
+    
+    create_directory(output_sourcedata)
+    create_directory(output_directory)
+    create_directory(unzipped_output_directory)
 
     return output_directory, unzipped_output_directory
 
 
@@ -13,28 +13,26 @@
 
 
 def setup_nsi_directory():
+    # Define output directories
+    output_folder = 'OutputData'
+    output_sourcedata = os.path.join(output_folder, '00_SourceData')
+    output_directory = os.path.join(output_sourcedata, 'nsi_sec_usace_army_mil')
+
     # Create output directory if it does not exist
-    output_folder = 'Outputdata'
-    output_sourcedata = 'Outputdata\\00_SourceData'
-    output_directory = 'Outputdata\\00_SourceData\\nsi_sec_usace_army_mil'
-    # Make directory to save output
-    if not os.path.exists(output_folder):
-        print("Making new directory to save output: ",
-            output_folder)
-        os.mkdir(output_folder)
-    if not os.path.exists(output_sourcedata):
-        print("Making new directory to save output: ",
-            output_sourcedata)
-        os.mkdir(output_sourcedata)
-    if not os.path.exists(output_directory):
-        print("Making new directory to save output: ",
-            output_directory)
-        os.mkdir(output_directory)
-    else:
-        print("Directory",output_directory,"Already exists.")
+    def create_directory(path):
+        if not os.path.exists(path):
+            print(f"Making new directory to save output: {path}")
+            os.makedirs(path)
+        else:
+            print(f"Directory {path} already exists.")
+
+    create_directory(output_folder)
+    create_directory(output_sourcedata)
+    create_directory(output_directory)
 
     return output_directory
 
+
 def download_nsi_files(county_fips,
                        unique_id = 'fd_id_bid',
                        unique_id_vars = ['fd_id','bid'],