Skip to content

Commit af58f3a

Browse files
committed
uncertainty add ability to load library covariance
The uncertainty tool currently assumes all entries within a library are uncorrelated with other entries in that same library, but in some cases, such as BEEF-vdW DFT, we have correlation information. This commit allows the uncertainty tool to load a .npy of the library's covariance matrix and use a .pickle file to map the species onto the species for your mechanism.
1 parent b9860ae commit af58f3a

1 file changed

Lines changed: 119 additions & 1 deletion

File tree

rmgpy/tools/uncertainty.py

Lines changed: 119 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,11 +320,12 @@ class Uncertainty(object):
320320
for a single RMG-generated mechanism.
321321
"""
322322

323-
def __init__(self, species_list=None, reaction_list=None, output_directory=''):
323+
def __init__(self, species_list=None, reaction_list=None, output_directory='', thermo_corr_dir=None):
324324
"""
325325
`species_list`: list of RMG species objects
326326
`reaction_list`: list of RMG reaction objects
327327
`outputDirectoy`: directory path for saving output files from the analyses
328+
`thermo_corr_dir`: directory path for saving thermo correlation files
328329
"""
329330
self.database = None
330331
self.species_list = species_list
@@ -339,6 +340,8 @@ def __init__(self, species_list=None, reaction_list=None, output_directory=''):
339340
self.kinetic_covariance_matrix = None
340341
self.overall_covariance_matrix = None
341342
self.output_directory = output_directory if output_directory else os.getcwd()
343+
self.thermo_corr_dir = thermo_corr_dir
344+
self.thermo_corr_data = {} # a dictionary of available thermo covariance matrices for each library
342345

343346
# For extra species needed for correlated analysis but not in model
344347
self.extra_species = []
@@ -904,6 +907,121 @@ def get_thermo_covariance_matrix(self):
904907
for source_i in self.thermo_input_uncertainties[i].keys():
905908
if source_i in self.thermo_input_uncertainties[j].keys():
906909
self.thermo_covariance_matrix[i, j] += self.thermo_input_uncertainties[i][source_i] * self.thermo_input_uncertainties[j][source_i]
910+
911+
# load correlations between adsorbates calculated with BEEF vdW
912+
# load the saved species ensemble data: this includes a numpy matrix of dG values for each species and a pickle of the species adjacency list
913+
if self.thermo_corr_dir is not None:
914+
def get_i_spec(spec, species_list):
915+
for i in range(len(species_list)):
916+
if spec.is_isomorphic(species_list[i]):
917+
return i
918+
return None
919+
920+
def get_i_group(group, group_list):
921+
for i in range(len(group_list)):
922+
if group.is_identical(group_list[i]):
923+
return i
924+
return None
925+
926+
# load the ensemble data
927+
# TODO - generalize this
928+
my_library = 'surfaceThermoPt111'
929+
corr_thermo_data_file = os.path.join(self.thermo_corr_dir, f'{my_library}_cov.npy')
930+
self.thermo_corr_data = {my_library: np.load(corr_thermo_data_file) / 4.18 / 4.18} # convert from kJ^2/mol^2 to kcal^2/mol^2
931+
molecules_file = os.path.join(os.path.join(self.thermo_corr_dir, f'{my_library}_molecules.pickle'))
932+
with open(molecules_file, 'rb') as f:
933+
# this should be a list of molecule adjacency lists
934+
molecules_data = pickle.load(f)
935+
correlated_species = []
936+
for i in range(len(molecules_data)):
937+
sp = Species().from_adjacency_list(molecules_data[i])
938+
correlated_species.append(sp)
939+
assert len(correlated_species) == self.thermo_corr_data[my_library].shape[0]
940+
941+
assert 'adsorptionPt111' in self.database.thermo.groups, 'BEEF adsorption corrections require adsorptionPt111 group in the thermo database'
942+
db_ads_group_items = [self.database.thermo.groups['adsorptionPt111'].entries[key].item for key in self.database.thermo.groups['adsorptionPt111'].entries]
943+
944+
def get_species_indices_in_group(group):
945+
species_used = []
946+
for j in range(len(correlated_species)):
947+
if correlated_species[j].molecule[0].is_subgraph_isomorphic(group, generate_initial_map=True):
948+
species_used.append(j)
949+
return species_used
950+
951+
def get_cov_sp_lists(sp_list_i, sp_list_j):
952+
if len(sp_list_i) == 0 or len(sp_list_j) == 0:
953+
return 0
954+
955+
cov = 0
956+
for i in sp_list_i:
957+
for j in sp_list_j:
958+
cov += self.thermo_corr_data[my_library][i, j]
959+
cov /= len(sp_list_i) * len(sp_list_j)
960+
return cov
961+
962+
# now splice this into the big thermo covariance matrix
963+
for i_spc in range(len(self.species_list)):
964+
for j_spc in range(len(self.species_list)):
965+
966+
# does species use surfaceThermoPt111 lin?
967+
i_uses_pt111_lib = False
968+
j_uses_pt111_lib = False
969+
if 'surfaceThermoPt111' in self.species_list[i_spc].thermo.comment:
970+
i_uses_pt111_lib = True
971+
if 'surfaceThermoPt111' in self.species_list[j_spc].thermo.comment:
972+
j_uses_pt111_lib = True
973+
974+
# does species use adsorptionPt111 group?
975+
i_ads_group = None
976+
j_ads_group = None
977+
if 'ADS' in self.species_sources_dict[self.species_list[i_spc]]:
978+
i_ads_group = self.species_sources_dict[self.species_list[i_spc]]['ADS']['adsorptionPt111'][0][0].item
979+
if 'ADS' in self.species_sources_dict[self.species_list[j_spc]]:
980+
j_ads_group = self.species_sources_dict[self.species_list[j_spc]]['ADS']['adsorptionPt111'][0][0].item
981+
982+
# two libraries
983+
if i_uses_pt111_lib and j_uses_pt111_lib:
984+
i_spc_beef = get_i_spec(self.species_list[i_spc], correlated_species)
985+
j_spc_beef = get_i_spec(self.species_list[j_spc], correlated_species)
986+
if i_spc_beef is not None and j_spc_beef is not None:
987+
self.thermo_covariance_matrix[i_spc, j_spc] += self.thermo_corr_data[my_library][i_spc_beef, j_spc_beef]
988+
else:
989+
# TODO add default value here??
990+
pass
991+
# two groups
992+
elif i_ads_group is not None and j_ads_group is not None:
993+
i_beef = get_i_group(i_ads_group, db_ads_group_items)
994+
j_beef = get_i_group(j_ads_group, db_ads_group_items)
995+
if i_beef is not None and j_beef is not None:
996+
species_list_i = get_species_indices_in_group(db_ads_group_items[i_beef])
997+
species_list_j = get_species_indices_in_group(db_ads_group_items[j_beef])
998+
self.thermo_covariance_matrix[i_spc, j_spc] += get_cov_sp_lists(species_list_i, species_list_j)
999+
else:
1000+
# TODO add default value here??
1001+
pass
1002+
# one group and one library
1003+
elif i_uses_pt111_lib and j_ads_group is not None:
1004+
i_spc_beef = get_i_spec(self.species_list[i_spc], correlated_species)
1005+
if i_spc_beef is not None:
1006+
species_list_i = [i_spc_beef]
1007+
j_beef = get_i_group(j_ads_group, db_ads_group_items)
1008+
if j_beef is not None:
1009+
species_list_j = get_species_indices_in_group(db_ads_group_items[j_beef])
1010+
self.thermo_covariance_matrix[i_spc, j_spc] += get_cov_sp_lists(species_list_i, species_list_j)
1011+
else:
1012+
# TODO add default value here??
1013+
pass
1014+
elif j_uses_pt111_lib and i_ads_group is not None:
1015+
j_spc_beef = get_i_spec(self.species_list[j_spc], correlated_species)
1016+
if j_spc_beef is not None:
1017+
species_list_j = [j_spc_beef]
1018+
i_beef = get_i_group(i_ads_group, db_ads_group_items)
1019+
if i_beef is not None:
1020+
species_list_i = get_species_indices_in_group(db_ads_group_items[i_beef])
1021+
self.thermo_covariance_matrix[i_spc, j_spc] += get_cov_sp_lists(species_list_i, species_list_j)
1022+
else:
1023+
# TODO add default value here??
1024+
pass
9071025

9081026
return self.thermo_covariance_matrix
9091027

0 commit comments

Comments
 (0)