new functions added

mfonsecaOEF · web-flow · commit 43f98e3ebd0f · 2023-09-13T20:32:52.000+03:00
diff --git a/Marine_Ecosystem_Credits/Marine_Biodiversity/MBU_Methodology/MBU_utils.py b/Marine_Ecosystem_Credits/Marine_Biodiversity/MBU_Methodology/MBU_utils.py
@@ -179,7 +179,7 @@ def count_overlapping_geometries(gdf):
 
 #---------------------------------------------------------------------------------------------------------------------
 
-#This function calculates the sum of all values of an interest colum of overlapping geometries 
+#This function calculates a specific algebra operation of all values of an interest colum of overlapping geometries 
 def map_algebra(gdf, gdf_col_name, operation):
     """
     This function calculates the sum of all values of an interest colum of overlapping geometries 
@@ -323,24 +323,27 @@ def shannon(MPA, gdf, grid_gdf):
     gdf = gpd.clip(gdf.set_crs(epsg=4326, allow_override=True), MPA.set_crs(epsg=4326, allow_override=True))
     
     #Spatial join of gdf and grid_gdf
-    pointInPolys = sjoin(gdf, grid_gdf, how='inner')
+    pointInPolys = sjoin(gdf, grid_gdf, how='inner', op='intersects')
     
     # 'individualCount' refers to the number of individual organisms observed or sampled 
     # for a particular species at a particular location and time.
     pointInPolys = pointInPolys.dropna(subset='individualCount')
     pointInPolys['individualCount'] = pointInPolys['individualCount'].astype(float).astype(int)
     
-    #To calculate the total number of species
+    # To calculate the total number of species by grid
     N = pd.DataFrame()
     N['N'] = pointInPolys.groupby('Grid_ID').apply(lambda x: x['individualCount'].sum())
     
     new = pd.merge(pointInPolys, N, on='Grid_ID')
     
+    # delete duplicate rows
+    new = new.drop_duplicates(subset = 'Grid_ID', keep = 'first', inplace = False)
+    
     #Calculate the Shanoon index with the information available
     new['pi'] = new['individualCount']/new['N']
     new['shannon'] = (-1)*new['pi']*np.log(new['pi'])
     
-    new = new.dissolve(by='Grid_ID', aggfunc={'shannon': 'sum'})
+    new = new.dissolve(by='Grid_ID')
         
     new = new.drop(['geometry'], axis = 1)
         
@@ -390,6 +393,9 @@ def simpson(MPA, gdf, grid_gdf):
         
     #Merge the datasets based on the Grid_ID
     new = pd.merge(pointInPolys, N, on='Grid_ID')
+    
+    # delete duplicate rows
+    new = new.drop_duplicates(subset = 'Grid_ID', keep = 'first', inplace = False)
         
     #Calculate the Simpson index
     new['simpson'] = 1-((new['num'])/(new['N']*(new['N']-1)))
@@ -662,11 +668,11 @@ def habitat_accounting(MPA, grid_gdf, path_EFG):
 #Modulating Factor Functions
 #---------------------------------------------------------------------------------------------------------------------
 
-def mbu_biodiversity_score(MPA, gdf, grid_gdf, source, crs_transformation_kms):
+def mbu_shannon_index(MPA, gdf, grid_gdf, source):
     """
-    This functions combines the Shannon Index and Simpson Index to calculate a Biodiversity Score per grid or 
-    given area and converts these numbers into MBUs.
-    It calls internally the Shannon and Simpson functions to do the calculations
+    This functions uses the Shannon Index to calculate marine species diversity and richness per grid or given area and c
+    onverts these numbers into MBUs.
+    It calls internally the Shannon Index function to do the calculations
     
     input(s):
     MPA <shapely polygon in CRS WGS84:EPSG 4326>: Marine Proteted Area of interest
@@ -679,7 +685,7 @@ def mbu_biodiversity_score(MPA, gdf, grid_gdf, source, crs_transformation_kms):
     crs_transformation_kms: coordinate reference system transformation applied to the MPA in meters 
     
     output(s):
-    gdf <geopandas dataframe>: with an additional column ('mbu_biodiversity_score') containing the calculation of MBUs with this                                
+    gdf <geopandas dataframe>: with an additional column ('mbu_shannon_index') containing the calculation of MBUs with this                                
                              :factor information per grid or geometry
     """
     
@@ -688,25 +694,68 @@ def mbu_biodiversity_score(MPA, gdf, grid_gdf, source, crs_transformation_kms):
         #Shannon Index calculation
         df1 = shannon(MPA, gdf, grid_gdf)
         
-        #Simpson Index calculation
-        df2 = simpson(MPA, gdf, grid_gdf)
-        
         #Normalization factor
         Norm_factor1 = df1['shannon']/df1['shannon'].max()
-        Norm_factor2 = df2['simpson']/df2['simpson'].max()
         
         #Convert area from degrees to square kilometers
-        df1['area_sqkm'] = (df1.to_crs(crs=crs_transformation_kms).area)*10**(-6)
+        #df1['area_sqkm'] = (df1.to_crs(crs=crs_transformation_kms).area)*10**(-6)
+
+        #Calculate the MBUS from this MF
+        df1['mbu_shannon_index'] = Norm_factor1
+        
+    elif source == 'IUCN':
+        print('The Shannon Index - Modulating Factor is not available to IUCN data')
+        
+        #Calculate the MBUS from this MF
+        df1['mbu_shannon_index'] = 'NA'
+        
+    else:
+        raise ValueError("Unsupported source: {}".format(source))
+
+    return df1
+
+#---------------------------------------------------------------------------------------------------------------------
+
+def mbu_simpson_index(MPA, gdf, grid_gdf, source):
+    """
+    This function uses the Simpson Index to quantify biodiversity in a specific habitat, in this the quantification is per grid or 
+    given area, and converts these numbers into MBUs.
+    It calls internally the Simpson function to do the calculations
+    
+    input(s):
+    MPA <shapely polygon in CRS WGS84:EPSG 4326>: Marine Proteted Area of interest
+    gdf <geopandas dataframe>: contains at least the name of the species and either
+                            i) the distribution polygons of each of them or (presumbaly from IUCN or local surveys),
+                            ii) points denoting the observations of each species - repeated observations for the same species
+    grid_gdf <geopandas dataframe>: consists of polygons of grids typically generated by the gridding function
+                                  : containts at least a geometry column and a unique grid_id
+    source <str>: if the data is from OBIS or from IUCN
+    crs_transformation_kms: coordinate reference system transformation applied to the MPA in meters 
+    
+    output(s):
+    gdf <geopandas dataframe>: with an additional column ('mbu_biodiversity_score') containing the calculation of MBUs with this                                
+                             :factor information per grid or geometry
+    """
+    
+    if source == 'OBIS':
+        
+        #Simpson Index calculation
+        df1 = simpson(MPA, gdf, grid_gdf)
         
-        #Add colums
-        #df['shannon'] = df1['shannon']
-        df1['simpson'] = df2['simpson']
+        #Normalization factor
+        Norm_factor1 = df1['simpson']/df1['simpson'].max()
+        
+        #Convert area from degrees to square kilometers
+        #df1['area_sqkm'] = (df1.to_crs(crs=crs_transformation_kms).area)*10**(-6)
 
         #Calculate the MBUS from this MF
-        df1['mbu_biodiversity_score'] = Norm_factor1*df1['area_sqkm'] + Norm_factor2*df1['area_sqkm']
+        df1['mbu_simpson_index'] = Norm_factor1
         
     elif source == 'IUCN':
-        print('The Biodiversity Score - Modulating Factor is not available to IUCN data')
+        print('The Simpsion Index - Modulating Factor is not available to IUCN data')
+        
+        #Calculate the MBUS from this MF
+        df1['mbu_simpsion_index'] = 'NA'
         
     else:
         raise ValueError("Unsupported source: {}".format(source))
@@ -715,7 +764,7 @@ def mbu_biodiversity_score(MPA, gdf, grid_gdf, source, crs_transformation_kms):
 
 #---------------------------------------------------------------------------------------------------------------------
 
-def mbu_species_richness(MPA, gdf, grid_gdf, crs_transformation_kms):
+def mbu_species_richness(MPA, gdf, grid_gdf):
     """
     This function calculates the amount of MBUs from the species richness metric and converts these 
     numbers into MBUs in a given area in sqd kms.
@@ -743,16 +792,16 @@ def mbu_species_richness(MPA, gdf, grid_gdf, crs_transformation_kms):
     Norm_factor1 = df1['species_richness']/df1['species_richness'].max()
         
     #Convert area from degrees to square kilometers
-    df1['area_sqkm'] = (df1.to_crs(crs=crs_transformation_kms).area)*10**(-6)
+    #df1['area_sqkm'] = (df1.to_crs(crs=crs_transformation_kms).area)*10**(-6)
 
     #Calculate the MBUS from this MF
-    df1['mbu_species_richness'] = Norm_factor1*df1['area_sqkm']
+    df1['mbu_species_richness'] = Norm_factor1
         
     return df1
     
 #---------------------------------------------------------------------------------------------------------------------
     
-def mbu_endemism(MPA, gdf, grid_gdf, source, crs_transformation_kms):
+def mbu_endemism(MPA, gdf, grid_gdf, source):
     """
     This function calculates the amount of MBUs from the Endemic index and converts these numbers into 
     MBUs in a given area in sqd kms.
@@ -777,7 +826,7 @@ def mbu_endemism(MPA, gdf, grid_gdf, source, crs_transformation_kms):
         print('Endemic Modulating Factor is not available to OBIS data')
         
         #Calculate the MBUS from this MF
-        df1['mbu_endemism'] = 0
+        df1['mbu_endemism'] = 'NA'
         
     elif source == 'IUCN':
 
@@ -788,10 +837,10 @@ def mbu_endemism(MPA, gdf, grid_gdf, source, crs_transformation_kms):
         Norm_factor1 = df1['endemism']/df1['endemism'].max()
 
         #Convert area from degrees to square kilometers
-        df1['area_sqkm'] = (df1.to_crs(crs=crs_transformation_kms).area)*10**(-6)
+        #df1['area_sqkm'] = (df1.to_crs(crs=crs_transformation_kms).area)*10**(-6)
 
         #Calculate the MBUS from this MF
-        df1['mbu_endemism'] = Norm_factor1*df1['area_sqkm']
+        df1['mbu_endemism'] = Norm_factor1
         
     else:
         raise ValueError("Unsupported source: {}".format(source))
@@ -800,7 +849,7 @@ def mbu_endemism(MPA, gdf, grid_gdf, source, crs_transformation_kms):
 
 #--------------------------------------------------------------------------------------------------------------------- 
    
-def mbu_wege(MPA, gdf, grid_gdf, source, crs_transformation_kms):
+def mbu_wege(MPA, gdf, grid_gdf, source):
     """
     This function calculates the amount of MBUs from the WEGE index and converts these numbers into MBUs in a 
     given area in sqd kms.
@@ -824,7 +873,7 @@ def mbu_wege(MPA, gdf, grid_gdf, source, crs_transformation_kms):
         print('WEGE Modulating Factor is not available to OBIS data')
         
         #Calculate the MBUS from this MF
-        df1['mbu_endemism'] = 0
+        df1['mbu_endemism'] = 'NA'
         
     elif source == 'IUCN':
     
@@ -835,10 +884,10 @@ def mbu_wege(MPA, gdf, grid_gdf, source, crs_transformation_kms):
         Norm_factor1 = df1['wege']/df1['wege'].max()
 
         #Convert area from degrees to square kilometers
-        df1['area_sqkm'] = (df1.to_crs(crs=crs_transformation_kms).area)*10**(-6)
+        #df1['area_sqkm'] = (df1.to_crs(crs=crs_transformation_kms).area)*10**(-6)
 
         #Calculate the MBUS from this MF
-        df1['mbu_wege'] = Norm_factor1*df1['area_sqkm']
+        df1['mbu_wege'] = Norm_factor1
         
     else:
         raise ValueError("Unsupported source: {}".format(source))
@@ -847,7 +896,7 @@ def mbu_wege(MPA, gdf, grid_gdf, source, crs_transformation_kms):
 
 #---------------------------------------------------------------------------------------------------------------------
 
-def mbu_habitats_survey(MPA, grid_gdf, path_EFG, crs_transformation_kms):
+def mbu_habitats_survey(MPA, grid_gdf, path_EFG):
     """
     This function calculates the amount of MBUs from the Habitats Survey calculation and converts these numbers into MBUs in 
     a given area in sqd kms.
@@ -871,23 +920,24 @@ def mbu_habitats_survey(MPA, grid_gdf, path_EFG, crs_transformation_kms):
     Norm_factor1 = df1['habitat_accounting']/df1['habitat_accounting'].max()
     
     #Convert area from degrees to square kilometers
-    df1['area_sqkm'] = (df1.to_crs(crs=crs_transformation_kms).area)*10**(-6)
+    #df1['area_sqkm'] = (df1.to_crs(crs=crs_transformation_kms).area)*10**(-6)
 
     #Calculate the MBUS from this MF
-    df1['mbu_habitats_survey'] = Norm_factor1*df1['area_sqkm']
+    df1['mbu_habitats_survey'] = Norm_factor1
     
     return df1
 
 #---------------------------------------------------------------------------------------------------------------------
 #General MBU function
 #---------------------------------------------------------------------------------------------------------------------
 
-def give_mbu_score(modulating_factor_names, MPA, gdf, grid_shape, grid_size_deg, path_EFG, source, crs_transformation_kms):
+def weighted_MFs(modulating_factor_names, MPA, gdf, grid_shape, grid_size_deg, path_EFG, source, weights):
     """
     input(s):
     modulating_factor_names: list of names of the modulating factors, e.g. ["species_richness", "habitats_survey"]
     modulating_factor_names:
-                - biodiversity_score
+                - shannon_index
+                - simpson_index
                 - species_richness
                 - endemism
                 - wege
@@ -902,45 +952,67 @@ def give_mbu_score(modulating_factor_names, MPA, gdf, grid_shape, grid_size_deg,
     grid_shape <str>: either "square" or "hexagonal"
     path_EFG <list>: consist in a list with path location of each EFG file 
     source <str>: if the data is from OBIS or from IUCN
-    crs_transformation_kms: coordinate reference system transformation applied to the MPA in meters 
+    weights <dic>: it contains the weight values chosen for each modulation factor
     
     output(s):
         gdf <geopandas dataframe>: with an additional columns with the MBUs from each MF chosen and the Total_Number_MBUs 
-                                 :per grid or geometry
+                                 : per grid or geometry
     """
     if not isinstance(modulating_factor_names, (np.ndarray, list)):
         print('A list of modulating factors to calculate MBUs is needed')
     
     elif isinstance(modulating_factor_names, (np.ndarray, list)):
         grid = create_grid(MPA, grid_shape, grid_size_deg)
     
-        if 'biodiversity_score' in modulating_factor_names:
+        if 'shannon_index' in modulating_factor_names:
+            print('Calculating Shannon Index Modulating Factor')
             if source == 'OBIS':
-                grid['mbu_biodiversity_score'] = mbu_biodiversity_score(MPA, gdf, grid, source, crs_transformation_kms)['mbu_biodiversity_score']
+                grid['mbu_shannon_index'] = mbu_shannon_index(MPA, gdf, grid, source)['mbu_shannon_index']*(1/weights.get('shannon_index'))
             elif source == 'IUCN':
-                grid['mbu_biodiversity_score'] = 0
-
-        if 'species_richness' in modulating_factor_names:
-            grid['mbu_species_richness'] = mbu_species_richness(MPA, gdf, grid, crs_transformation_kms)['mbu_species_richness']
-
-        if 'endemism' in modulating_factor_names:
+                grid['mbu_shannon_index'] = 0
+                
+        if 'habitats_survey' in modulating_factor_names:
+            print('Calculating Habitats Survey Modulating Factor')
+            if not isinstance(path_EFG, np.ndarray):
+                grid['mbu_habitats_survey'] = 0
+            elif isinstance(path_EFG, np.ndarray):
+                grid['mbu_habitats_survey'] = mbu_habitats_survey(MPA, grid, path_EFG)['mbu_habitats_survey']*(1/weights.get('habitats_survey'))
+                
+        if 'wege' in modulating_factor_names:
+            print('Calculating WEGE Modulating Factor')
             if source == 'OBIS':
-                grid['mbu_endemism'] = 0
+                grid['mbu_wege'] = 0
             elif source == 'IUCN':
-                grid['mbu_endemism'] = mbu_endemism(MPA, gdf, grid, source, crs_transformation_kms)['mbu_endemism']     
+                grid['mbu_wege'] = mbu_wege(MPA, gdf, grid, source)['mbu_wege']*(1/weights.get('wege'))
 
-        if 'wege' in modulating_factor_names:
+        if 'simpson_index' in modulating_factor_names:
+            print('Calculating Simpson Index Modulating Factor')
             if source == 'OBIS':
-                grid['mbu_wege'] = 0
+                grid['mbu_simpson_index'] = mbu_simpson_index(MPA, gdf, grid, source)['mbu_simpson_index']*(1/weights.get('simpson_index'))
             elif source == 'IUCN':
-                grid['mbu_wege'] = mbu_wege(MPA, gdf, grid, source, crs_transformation_kms)['mbu_wege']
+                grid['mbu_simpson_index'] = 0
 
-        if 'habitats_survey' in modulating_factor_names:
-            if not isinstance(path_EFG, np.ndarray):
-                grid['mbu_habitats_survey'] = 0
-            elif isinstance(path_EFG, np.ndarray):
-                grid['mbu_habitats_survey'] = mbu_habitats_survey(MPA, grid, path_EFG, crs_transformation_kms)['mbu_habitats_survey']
+        if 'species_richness' in modulating_factor_names:
+            print('Calculating Species Richness Modulating Factor')
+            grid['mbu_species_richness'] = mbu_species_richness(MPA, gdf, grid)['mbu_species_richness']*(1/weights.get('species_richness'))
+        
+    grid = grid.drop('species_richness', axis=1)
+
+    return grid
 
-        grid['Total_MBUs'] = grid['mbu_species_richness'] + grid['mbu_biodiversity_score'] + grid['mbu_endemism'] + grid['mbu_wege'] + grid['mbu_habitats_survey']
+def total_mbu(modulating_factor_names, MPA, gdf, grid_shape, grid_size_deg, path_EFG, source, weights, baseline_value):
+    """
+    """
+    grid = weighted_MFs(modulating_factor_names, MPA, gdf, grid_shape, grid_size_deg, path_EFG, source, weights)
     
-    return grid
+    # filter columns that contain 'mbu' in their name
+    mbu_columns = [col for col in grid.columns if 'mbu' in col]
+
+    # calculate the sum of 'mbu' columns for each row and add as a new column
+    grid['mbu_sum'] = grid[mbu_columns].sum(axis=1)
+
+    # check if each value in 'mbu_sum' is within ±5 of the baseline value
+    grid['result'] = grid['mbu_sum'].apply(lambda x: 1 if abs(x - baseline_value) <= 5 else 0)
+    
+    return grid
+