Skip to content

Commit 99e9c48

Browse files
committed
uncertainty add ability to load library covariance
The uncertainty tool currently assumes all entries within a library are uncorrelated with other entries in that same library, but in some cases, such as BEEF-vdW DFT, we have correlation information. This commit allows the uncertainty tool to load a .npy of the library's covariance matrix and use a .pickle file to map the species onto the species for your mechanism.
1 parent a576560 commit 99e9c48

1 file changed

Lines changed: 119 additions & 1 deletion

File tree

rmgpy/tools/uncertainty.py

Lines changed: 119 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,11 +332,12 @@ class Uncertainty(object):
332332
for a single RMG-generated mechanism.
333333
"""
334334

335-
def __init__(self, species_list=None, reaction_list=None, output_directory=''):
335+
def __init__(self, species_list=None, reaction_list=None, output_directory='', thermo_corr_dir=None):
336336
"""
337337
`species_list`: list of RMG species objects
338338
`reaction_list`: list of RMG reaction objects
339339
`outputDirectoy`: directory path for saving output files from the analyses
340+
`thermo_corr_dir`: directory path for saving thermo correlation files
340341
"""
341342
self.database = None
342343
self.species_list = species_list
@@ -351,6 +352,8 @@ def __init__(self, species_list=None, reaction_list=None, output_directory=''):
351352
self.kinetic_covariance_matrix = None
352353
self.overall_covariance_matrix = None
353354
self.output_directory = output_directory if output_directory else os.getcwd()
355+
self.thermo_corr_dir = thermo_corr_dir
356+
self.thermo_corr_data = {} # a dictionary of available thermo covariance matrices for each library
354357

355358
# For extra species needed for correlated analysis but not in model
356359
self.extra_species = []
@@ -941,6 +944,121 @@ def get_thermo_covariance_matrix(self):
941944
for source_i in self.thermo_input_uncertainties[i].keys():
942945
if source_i in self.thermo_input_uncertainties[j].keys():
943946
self.thermo_covariance_matrix[i, j] += self.thermo_input_uncertainties[i][source_i] * self.thermo_input_uncertainties[j][source_i]
947+
948+
# load correlations between adsorbates calculated with BEEF vdW
949+
# load the saved species ensemble data: this includes a numpy matrix of dG values for each species and a pickle of the species adjacency list
950+
if self.thermo_corr_dir is not None:
951+
def get_i_spec(spec, species_list):
952+
for i in range(len(species_list)):
953+
if spec.is_isomorphic(species_list[i]):
954+
return i
955+
return None
956+
957+
def get_i_group(group, group_list):
958+
for i in range(len(group_list)):
959+
if group.is_identical(group_list[i]):
960+
return i
961+
return None
962+
963+
# load the ensemble data
964+
# TODO - generalize this
965+
my_library = 'surfaceThermoPt111'
966+
corr_thermo_data_file = os.path.join(self.thermo_corr_dir, f'{my_library}_cov.npy')
967+
self.thermo_corr_data = {my_library: np.load(corr_thermo_data_file) / 4.18 / 4.18} # convert from kJ^2/mol^2 to kcal^2/mol^2
968+
molecules_file = os.path.join(os.path.join(self.thermo_corr_dir, f'{my_library}_molecules.pickle'))
969+
with open(molecules_file, 'rb') as f:
970+
# this should be a list of molecule adjacency lists
971+
molecules_data = pickle.load(f)
972+
correlated_species = []
973+
for i in range(len(molecules_data)):
974+
sp = Species().from_adjacency_list(molecules_data[i])
975+
correlated_species.append(sp)
976+
assert len(correlated_species) == self.thermo_corr_data[my_library].shape[0]
977+
978+
assert 'adsorptionPt111' in self.database.thermo.groups, 'BEEF adsorption corrections require adsorptionPt111 group in the thermo database'
979+
db_ads_group_items = [self.database.thermo.groups['adsorptionPt111'].entries[key].item for key in self.database.thermo.groups['adsorptionPt111'].entries]
980+
981+
def get_species_indices_in_group(group):
982+
species_used = []
983+
for j in range(len(correlated_species)):
984+
if correlated_species[j].molecule[0].is_subgraph_isomorphic(group, generate_initial_map=True):
985+
species_used.append(j)
986+
return species_used
987+
988+
def get_cov_sp_lists(sp_list_i, sp_list_j):
989+
if len(sp_list_i) == 0 or len(sp_list_j) == 0:
990+
return 0
991+
992+
cov = 0
993+
for i in sp_list_i:
994+
for j in sp_list_j:
995+
cov += self.thermo_corr_data[my_library][i, j]
996+
cov /= len(sp_list_i) * len(sp_list_j)
997+
return cov
998+
999+
# now splice this into the big thermo covariance matrix
1000+
for i_spc in range(len(self.species_list)):
1001+
for j_spc in range(len(self.species_list)):
1002+
1003+
# does species use surfaceThermoPt111 lin?
1004+
i_uses_pt111_lib = False
1005+
j_uses_pt111_lib = False
1006+
if 'surfaceThermoPt111' in self.species_list[i_spc].thermo.comment:
1007+
i_uses_pt111_lib = True
1008+
if 'surfaceThermoPt111' in self.species_list[j_spc].thermo.comment:
1009+
j_uses_pt111_lib = True
1010+
1011+
# does species use adsorptionPt111 group?
1012+
i_ads_group = None
1013+
j_ads_group = None
1014+
if 'ADS' in self.species_sources_dict[self.species_list[i_spc]]:
1015+
i_ads_group = self.species_sources_dict[self.species_list[i_spc]]['ADS']['adsorptionPt111'][0][0].item
1016+
if 'ADS' in self.species_sources_dict[self.species_list[j_spc]]:
1017+
j_ads_group = self.species_sources_dict[self.species_list[j_spc]]['ADS']['adsorptionPt111'][0][0].item
1018+
1019+
# two libraries
1020+
if i_uses_pt111_lib and j_uses_pt111_lib:
1021+
i_spc_beef = get_i_spec(self.species_list[i_spc], correlated_species)
1022+
j_spc_beef = get_i_spec(self.species_list[j_spc], correlated_species)
1023+
if i_spc_beef is not None and j_spc_beef is not None:
1024+
self.thermo_covariance_matrix[i_spc, j_spc] += self.thermo_corr_data[my_library][i_spc_beef, j_spc_beef]
1025+
else:
1026+
# TODO add default value here??
1027+
pass
1028+
# two groups
1029+
elif i_ads_group is not None and j_ads_group is not None:
1030+
i_beef = get_i_group(i_ads_group, db_ads_group_items)
1031+
j_beef = get_i_group(j_ads_group, db_ads_group_items)
1032+
if i_beef is not None and j_beef is not None:
1033+
species_list_i = get_species_indices_in_group(db_ads_group_items[i_beef])
1034+
species_list_j = get_species_indices_in_group(db_ads_group_items[j_beef])
1035+
self.thermo_covariance_matrix[i_spc, j_spc] += get_cov_sp_lists(species_list_i, species_list_j)
1036+
else:
1037+
# TODO add default value here??
1038+
pass
1039+
# one group and one library
1040+
elif i_uses_pt111_lib and j_ads_group is not None:
1041+
i_spc_beef = get_i_spec(self.species_list[i_spc], correlated_species)
1042+
if i_spc_beef is not None:
1043+
species_list_i = [i_spc_beef]
1044+
j_beef = get_i_group(j_ads_group, db_ads_group_items)
1045+
if j_beef is not None:
1046+
species_list_j = get_species_indices_in_group(db_ads_group_items[j_beef])
1047+
self.thermo_covariance_matrix[i_spc, j_spc] += get_cov_sp_lists(species_list_i, species_list_j)
1048+
else:
1049+
# TODO add default value here??
1050+
pass
1051+
elif j_uses_pt111_lib and i_ads_group is not None:
1052+
j_spc_beef = get_i_spec(self.species_list[j_spc], correlated_species)
1053+
if j_spc_beef is not None:
1054+
species_list_j = [j_spc_beef]
1055+
i_beef = get_i_group(i_ads_group, db_ads_group_items)
1056+
if i_beef is not None:
1057+
species_list_i = get_species_indices_in_group(db_ads_group_items[i_beef])
1058+
self.thermo_covariance_matrix[i_spc, j_spc] += get_cov_sp_lists(species_list_i, species_list_j)
1059+
else:
1060+
# TODO add default value here??
1061+
pass
9441062

9451063
return self.thermo_covariance_matrix
9461064

0 commit comments

Comments
 (0)