Skip to content

Commit 43f98e3

Browse files
authored
new functions added
1 parent 3733e27 commit 43f98e3

1 file changed

Lines changed: 129 additions & 57 deletions

File tree

  • Marine_Ecosystem_Credits/Marine_Biodiversity/MBU_Methodology

Marine_Ecosystem_Credits/Marine_Biodiversity/MBU_Methodology/MBU_utils.py

Lines changed: 129 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ def count_overlapping_geometries(gdf):
179179

180180
#---------------------------------------------------------------------------------------------------------------------
181181

182-
#This function calculates the sum of all values of an interest colum of overlapping geometries
182+
#This function calculates a specific algebra operation of all values of an interest colum of overlapping geometries
183183
def map_algebra(gdf, gdf_col_name, operation):
184184
"""
185185
This function calculates the sum of all values of an interest colum of overlapping geometries
@@ -323,24 +323,27 @@ def shannon(MPA, gdf, grid_gdf):
323323
gdf = gpd.clip(gdf.set_crs(epsg=4326, allow_override=True), MPA.set_crs(epsg=4326, allow_override=True))
324324

325325
#Spatial join of gdf and grid_gdf
326-
pointInPolys = sjoin(gdf, grid_gdf, how='inner')
326+
pointInPolys = sjoin(gdf, grid_gdf, how='inner', op='intersects')
327327

328328
# 'individualCount' refers to the number of individual organisms observed or sampled
329329
# for a particular species at a particular location and time.
330330
pointInPolys = pointInPolys.dropna(subset='individualCount')
331331
pointInPolys['individualCount'] = pointInPolys['individualCount'].astype(float).astype(int)
332332

333-
#To calculate the total number of species
333+
# To calculate the total number of species by grid
334334
N = pd.DataFrame()
335335
N['N'] = pointInPolys.groupby('Grid_ID').apply(lambda x: x['individualCount'].sum())
336336

337337
new = pd.merge(pointInPolys, N, on='Grid_ID')
338338

339+
# delete duplicate rows
340+
new = new.drop_duplicates(subset = 'Grid_ID', keep = 'first', inplace = False)
341+
339342
#Calculate the Shanoon index with the information available
340343
new['pi'] = new['individualCount']/new['N']
341344
new['shannon'] = (-1)*new['pi']*np.log(new['pi'])
342345

343-
new = new.dissolve(by='Grid_ID', aggfunc={'shannon': 'sum'})
346+
new = new.dissolve(by='Grid_ID')
344347

345348
new = new.drop(['geometry'], axis = 1)
346349

@@ -390,6 +393,9 @@ def simpson(MPA, gdf, grid_gdf):
390393

391394
#Merge the datasets based on the Grid_ID
392395
new = pd.merge(pointInPolys, N, on='Grid_ID')
396+
397+
# delete duplicate rows
398+
new = new.drop_duplicates(subset = 'Grid_ID', keep = 'first', inplace = False)
393399

394400
#Calculate the Simpson index
395401
new['simpson'] = 1-((new['num'])/(new['N']*(new['N']-1)))
@@ -662,11 +668,11 @@ def habitat_accounting(MPA, grid_gdf, path_EFG):
662668
#Modulating Factor Functions
663669
#---------------------------------------------------------------------------------------------------------------------
664670

665-
def mbu_biodiversity_score(MPA, gdf, grid_gdf, source, crs_transformation_kms):
671+
def mbu_shannon_index(MPA, gdf, grid_gdf, source):
666672
"""
667-
This functions combines the Shannon Index and Simpson Index to calculate a Biodiversity Score per grid or
668-
given area and converts these numbers into MBUs.
669-
It calls internally the Shannon and Simpson functions to do the calculations
673+
This functions uses the Shannon Index to calculate marine species diversity and richness per grid or given area and c
674+
onverts these numbers into MBUs.
675+
It calls internally the Shannon Index function to do the calculations
670676
671677
input(s):
672678
MPA <shapely polygon in CRS WGS84:EPSG 4326>: Marine Proteted Area of interest
@@ -679,7 +685,7 @@ def mbu_biodiversity_score(MPA, gdf, grid_gdf, source, crs_transformation_kms):
679685
crs_transformation_kms: coordinate reference system transformation applied to the MPA in meters
680686
681687
output(s):
682-
gdf <geopandas dataframe>: with an additional column ('mbu_biodiversity_score') containing the calculation of MBUs with this
688+
gdf <geopandas dataframe>: with an additional column ('mbu_shannon_index') containing the calculation of MBUs with this
683689
:factor information per grid or geometry
684690
"""
685691

@@ -688,25 +694,68 @@ def mbu_biodiversity_score(MPA, gdf, grid_gdf, source, crs_transformation_kms):
688694
#Shannon Index calculation
689695
df1 = shannon(MPA, gdf, grid_gdf)
690696

691-
#Simpson Index calculation
692-
df2 = simpson(MPA, gdf, grid_gdf)
693-
694697
#Normalization factor
695698
Norm_factor1 = df1['shannon']/df1['shannon'].max()
696-
Norm_factor2 = df2['simpson']/df2['simpson'].max()
697699

698700
#Convert area from degrees to square kilometers
699-
df1['area_sqkm'] = (df1.to_crs(crs=crs_transformation_kms).area)*10**(-6)
701+
#df1['area_sqkm'] = (df1.to_crs(crs=crs_transformation_kms).area)*10**(-6)
702+
703+
#Calculate the MBUS from this MF
704+
df1['mbu_shannon_index'] = Norm_factor1
705+
706+
elif source == 'IUCN':
707+
print('The Shannon Index - Modulating Factor is not available to IUCN data')
708+
709+
#Calculate the MBUS from this MF
710+
df1['mbu_shannon_index'] = 'NA'
711+
712+
else:
713+
raise ValueError("Unsupported source: {}".format(source))
714+
715+
return df1
716+
717+
#---------------------------------------------------------------------------------------------------------------------
718+
719+
def mbu_simpson_index(MPA, gdf, grid_gdf, source):
720+
"""
721+
This function uses the Simpson Index to quantify biodiversity in a specific habitat, in this the quantification is per grid or
722+
given area, and converts these numbers into MBUs.
723+
It calls internally the Simpson function to do the calculations
724+
725+
input(s):
726+
MPA <shapely polygon in CRS WGS84:EPSG 4326>: Marine Proteted Area of interest
727+
gdf <geopandas dataframe>: contains at least the name of the species and either
728+
i) the distribution polygons of each of them or (presumbaly from IUCN or local surveys),
729+
ii) points denoting the observations of each species - repeated observations for the same species
730+
grid_gdf <geopandas dataframe>: consists of polygons of grids typically generated by the gridding function
731+
: containts at least a geometry column and a unique grid_id
732+
source <str>: if the data is from OBIS or from IUCN
733+
crs_transformation_kms: coordinate reference system transformation applied to the MPA in meters
734+
735+
output(s):
736+
gdf <geopandas dataframe>: with an additional column ('mbu_biodiversity_score') containing the calculation of MBUs with this
737+
:factor information per grid or geometry
738+
"""
739+
740+
if source == 'OBIS':
741+
742+
#Simpson Index calculation
743+
df1 = simpson(MPA, gdf, grid_gdf)
700744

701-
#Add colums
702-
#df['shannon'] = df1['shannon']
703-
df1['simpson'] = df2['simpson']
745+
#Normalization factor
746+
Norm_factor1 = df1['simpson']/df1['simpson'].max()
747+
748+
#Convert area from degrees to square kilometers
749+
#df1['area_sqkm'] = (df1.to_crs(crs=crs_transformation_kms).area)*10**(-6)
704750

705751
#Calculate the MBUS from this MF
706-
df1['mbu_biodiversity_score'] = Norm_factor1*df1['area_sqkm'] + Norm_factor2*df1['area_sqkm']
752+
df1['mbu_simpson_index'] = Norm_factor1
707753

708754
elif source == 'IUCN':
709-
print('The Biodiversity Score - Modulating Factor is not available to IUCN data')
755+
print('The Simpsion Index - Modulating Factor is not available to IUCN data')
756+
757+
#Calculate the MBUS from this MF
758+
df1['mbu_simpsion_index'] = 'NA'
710759

711760
else:
712761
raise ValueError("Unsupported source: {}".format(source))
@@ -715,7 +764,7 @@ def mbu_biodiversity_score(MPA, gdf, grid_gdf, source, crs_transformation_kms):
715764

716765
#---------------------------------------------------------------------------------------------------------------------
717766

718-
def mbu_species_richness(MPA, gdf, grid_gdf, crs_transformation_kms):
767+
def mbu_species_richness(MPA, gdf, grid_gdf):
719768
"""
720769
This function calculates the amount of MBUs from the species richness metric and converts these
721770
numbers into MBUs in a given area in sqd kms.
@@ -743,16 +792,16 @@ def mbu_species_richness(MPA, gdf, grid_gdf, crs_transformation_kms):
743792
Norm_factor1 = df1['species_richness']/df1['species_richness'].max()
744793

745794
#Convert area from degrees to square kilometers
746-
df1['area_sqkm'] = (df1.to_crs(crs=crs_transformation_kms).area)*10**(-6)
795+
#df1['area_sqkm'] = (df1.to_crs(crs=crs_transformation_kms).area)*10**(-6)
747796

748797
#Calculate the MBUS from this MF
749-
df1['mbu_species_richness'] = Norm_factor1*df1['area_sqkm']
798+
df1['mbu_species_richness'] = Norm_factor1
750799

751800
return df1
752801

753802
#---------------------------------------------------------------------------------------------------------------------
754803

755-
def mbu_endemism(MPA, gdf, grid_gdf, source, crs_transformation_kms):
804+
def mbu_endemism(MPA, gdf, grid_gdf, source):
756805
"""
757806
This function calculates the amount of MBUs from the Endemic index and converts these numbers into
758807
MBUs in a given area in sqd kms.
@@ -777,7 +826,7 @@ def mbu_endemism(MPA, gdf, grid_gdf, source, crs_transformation_kms):
777826
print('Endemic Modulating Factor is not available to OBIS data')
778827

779828
#Calculate the MBUS from this MF
780-
df1['mbu_endemism'] = 0
829+
df1['mbu_endemism'] = 'NA'
781830

782831
elif source == 'IUCN':
783832

@@ -788,10 +837,10 @@ def mbu_endemism(MPA, gdf, grid_gdf, source, crs_transformation_kms):
788837
Norm_factor1 = df1['endemism']/df1['endemism'].max()
789838

790839
#Convert area from degrees to square kilometers
791-
df1['area_sqkm'] = (df1.to_crs(crs=crs_transformation_kms).area)*10**(-6)
840+
#df1['area_sqkm'] = (df1.to_crs(crs=crs_transformation_kms).area)*10**(-6)
792841

793842
#Calculate the MBUS from this MF
794-
df1['mbu_endemism'] = Norm_factor1*df1['area_sqkm']
843+
df1['mbu_endemism'] = Norm_factor1
795844

796845
else:
797846
raise ValueError("Unsupported source: {}".format(source))
@@ -800,7 +849,7 @@ def mbu_endemism(MPA, gdf, grid_gdf, source, crs_transformation_kms):
800849

801850
#---------------------------------------------------------------------------------------------------------------------
802851

803-
def mbu_wege(MPA, gdf, grid_gdf, source, crs_transformation_kms):
852+
def mbu_wege(MPA, gdf, grid_gdf, source):
804853
"""
805854
This function calculates the amount of MBUs from the WEGE index and converts these numbers into MBUs in a
806855
given area in sqd kms.
@@ -824,7 +873,7 @@ def mbu_wege(MPA, gdf, grid_gdf, source, crs_transformation_kms):
824873
print('WEGE Modulating Factor is not available to OBIS data')
825874

826875
#Calculate the MBUS from this MF
827-
df1['mbu_endemism'] = 0
876+
df1['mbu_endemism'] = 'NA'
828877

829878
elif source == 'IUCN':
830879

@@ -835,10 +884,10 @@ def mbu_wege(MPA, gdf, grid_gdf, source, crs_transformation_kms):
835884
Norm_factor1 = df1['wege']/df1['wege'].max()
836885

837886
#Convert area from degrees to square kilometers
838-
df1['area_sqkm'] = (df1.to_crs(crs=crs_transformation_kms).area)*10**(-6)
887+
#df1['area_sqkm'] = (df1.to_crs(crs=crs_transformation_kms).area)*10**(-6)
839888

840889
#Calculate the MBUS from this MF
841-
df1['mbu_wege'] = Norm_factor1*df1['area_sqkm']
890+
df1['mbu_wege'] = Norm_factor1
842891

843892
else:
844893
raise ValueError("Unsupported source: {}".format(source))
@@ -847,7 +896,7 @@ def mbu_wege(MPA, gdf, grid_gdf, source, crs_transformation_kms):
847896

848897
#---------------------------------------------------------------------------------------------------------------------
849898

850-
def mbu_habitats_survey(MPA, grid_gdf, path_EFG, crs_transformation_kms):
899+
def mbu_habitats_survey(MPA, grid_gdf, path_EFG):
851900
"""
852901
This function calculates the amount of MBUs from the Habitats Survey calculation and converts these numbers into MBUs in
853902
a given area in sqd kms.
@@ -871,23 +920,24 @@ def mbu_habitats_survey(MPA, grid_gdf, path_EFG, crs_transformation_kms):
871920
Norm_factor1 = df1['habitat_accounting']/df1['habitat_accounting'].max()
872921

873922
#Convert area from degrees to square kilometers
874-
df1['area_sqkm'] = (df1.to_crs(crs=crs_transformation_kms).area)*10**(-6)
923+
#df1['area_sqkm'] = (df1.to_crs(crs=crs_transformation_kms).area)*10**(-6)
875924

876925
#Calculate the MBUS from this MF
877-
df1['mbu_habitats_survey'] = Norm_factor1*df1['area_sqkm']
926+
df1['mbu_habitats_survey'] = Norm_factor1
878927

879928
return df1
880929

881930
#---------------------------------------------------------------------------------------------------------------------
882931
#General MBU function
883932
#---------------------------------------------------------------------------------------------------------------------
884933

885-
def give_mbu_score(modulating_factor_names, MPA, gdf, grid_shape, grid_size_deg, path_EFG, source, crs_transformation_kms):
934+
def weighted_MFs(modulating_factor_names, MPA, gdf, grid_shape, grid_size_deg, path_EFG, source, weights):
886935
"""
887936
input(s):
888937
modulating_factor_names: list of names of the modulating factors, e.g. ["species_richness", "habitats_survey"]
889938
modulating_factor_names:
890-
- biodiversity_score
939+
- shannon_index
940+
- simpson_index
891941
- species_richness
892942
- endemism
893943
- wege
@@ -902,45 +952,67 @@ def give_mbu_score(modulating_factor_names, MPA, gdf, grid_shape, grid_size_deg,
902952
grid_shape <str>: either "square" or "hexagonal"
903953
path_EFG <list>: consist in a list with path location of each EFG file
904954
source <str>: if the data is from OBIS or from IUCN
905-
crs_transformation_kms: coordinate reference system transformation applied to the MPA in meters
955+
weights <dic>: it contains the weight values chosen for each modulation factor
906956
907957
output(s):
908958
gdf <geopandas dataframe>: with an additional columns with the MBUs from each MF chosen and the Total_Number_MBUs
909-
:per grid or geometry
959+
: per grid or geometry
910960
"""
911961
if not isinstance(modulating_factor_names, (np.ndarray, list)):
912962
print('A list of modulating factors to calculate MBUs is needed')
913963

914964
elif isinstance(modulating_factor_names, (np.ndarray, list)):
915965
grid = create_grid(MPA, grid_shape, grid_size_deg)
916966

917-
if 'biodiversity_score' in modulating_factor_names:
967+
if 'shannon_index' in modulating_factor_names:
968+
print('Calculating Shannon Index Modulating Factor')
918969
if source == 'OBIS':
919-
grid['mbu_biodiversity_score'] = mbu_biodiversity_score(MPA, gdf, grid, source, crs_transformation_kms)['mbu_biodiversity_score']
970+
grid['mbu_shannon_index'] = mbu_shannon_index(MPA, gdf, grid, source)['mbu_shannon_index']*(1/weights.get('shannon_index'))
920971
elif source == 'IUCN':
921-
grid['mbu_biodiversity_score'] = 0
922-
923-
if 'species_richness' in modulating_factor_names:
924-
grid['mbu_species_richness'] = mbu_species_richness(MPA, gdf, grid, crs_transformation_kms)['mbu_species_richness']
925-
926-
if 'endemism' in modulating_factor_names:
972+
grid['mbu_shannon_index'] = 0
973+
974+
if 'habitats_survey' in modulating_factor_names:
975+
print('Calculating Habitats Survey Modulating Factor')
976+
if not isinstance(path_EFG, np.ndarray):
977+
grid['mbu_habitats_survey'] = 0
978+
elif isinstance(path_EFG, np.ndarray):
979+
grid['mbu_habitats_survey'] = mbu_habitats_survey(MPA, grid, path_EFG)['mbu_habitats_survey']*(1/weights.get('habitats_survey'))
980+
981+
if 'wege' in modulating_factor_names:
982+
print('Calculating WEGE Modulating Factor')
927983
if source == 'OBIS':
928-
grid['mbu_endemism'] = 0
984+
grid['mbu_wege'] = 0
929985
elif source == 'IUCN':
930-
grid['mbu_endemism'] = mbu_endemism(MPA, gdf, grid, source, crs_transformation_kms)['mbu_endemism']
986+
grid['mbu_wege'] = mbu_wege(MPA, gdf, grid, source)['mbu_wege']*(1/weights.get('wege'))
931987

932-
if 'wege' in modulating_factor_names:
988+
if 'simpson_index' in modulating_factor_names:
989+
print('Calculating Simpson Index Modulating Factor')
933990
if source == 'OBIS':
934-
grid['mbu_wege'] = 0
991+
grid['mbu_simpson_index'] = mbu_simpson_index(MPA, gdf, grid, source)['mbu_simpson_index']*(1/weights.get('simpson_index'))
935992
elif source == 'IUCN':
936-
grid['mbu_wege'] = mbu_wege(MPA, gdf, grid, source, crs_transformation_kms)['mbu_wege']
993+
grid['mbu_simpson_index'] = 0
937994

938-
if 'habitats_survey' in modulating_factor_names:
939-
if not isinstance(path_EFG, np.ndarray):
940-
grid['mbu_habitats_survey'] = 0
941-
elif isinstance(path_EFG, np.ndarray):
942-
grid['mbu_habitats_survey'] = mbu_habitats_survey(MPA, grid, path_EFG, crs_transformation_kms)['mbu_habitats_survey']
995+
if 'species_richness' in modulating_factor_names:
996+
print('Calculating Species Richness Modulating Factor')
997+
grid['mbu_species_richness'] = mbu_species_richness(MPA, gdf, grid)['mbu_species_richness']*(1/weights.get('species_richness'))
998+
999+
grid = grid.drop('species_richness', axis=1)
1000+
1001+
return grid
9431002

944-
grid['Total_MBUs'] = grid['mbu_species_richness'] + grid['mbu_biodiversity_score'] + grid['mbu_endemism'] + grid['mbu_wege'] + grid['mbu_habitats_survey']
1003+
def total_mbu(modulating_factor_names, MPA, gdf, grid_shape, grid_size_deg, path_EFG, source, weights, baseline_value):
1004+
"""
1005+
"""
1006+
grid = weighted_MFs(modulating_factor_names, MPA, gdf, grid_shape, grid_size_deg, path_EFG, source, weights)
9451007

946-
return grid
1008+
# filter columns that contain 'mbu' in their name
1009+
mbu_columns = [col for col in grid.columns if 'mbu' in col]
1010+
1011+
# calculate the sum of 'mbu' columns for each row and add as a new column
1012+
grid['mbu_sum'] = grid[mbu_columns].sum(axis=1)
1013+
1014+
# check if each value in 'mbu_sum' is within ±5 of the baseline value
1015+
grid['result'] = grid['mbu_sum'].apply(lambda x: 1 if abs(x - baseline_value) <= 5 else 0)
1016+
1017+
return grid
1018+

0 commit comments

Comments
 (0)