Skip to content

Commit 486dd7d

Browse files
committed
uncertainty add ability to load library covariance
The uncertainty tool currently assumes all entries within a library are uncorrelated with other entries in that same library, but in some cases, such as BEEF-vdW DFT, we have correlation information. This commit allows the uncertainty tool to load a .npy of the library's covariance matrix and use a .pickle file to map the species onto the species for your mechanism.
1 parent 7b1d05c commit 486dd7d

1 file changed

Lines changed: 119 additions & 1 deletion

File tree

rmgpy/tools/uncertainty.py

Lines changed: 119 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,11 +332,12 @@ class Uncertainty(object):
332332
for a single RMG-generated mechanism.
333333
"""
334334

335-
def __init__(self, species_list=None, reaction_list=None, output_directory=''):
335+
def __init__(self, species_list=None, reaction_list=None, output_directory='', thermo_corr_dir=None):
336336
"""
337337
`species_list`: list of RMG species objects
338338
`reaction_list`: list of RMG reaction objects
339339
`outputDirectoy`: directory path for saving output files from the analyses
340+
`thermo_corr_dir`: directory path for saving thermo correlation files
340341
"""
341342
self.database = None
342343
self.species_list = species_list
@@ -351,6 +352,8 @@ def __init__(self, species_list=None, reaction_list=None, output_directory=''):
351352
self.kinetic_covariance_matrix = None
352353
self.overall_covariance_matrix = None
353354
self.output_directory = output_directory if output_directory else os.getcwd()
355+
self.thermo_corr_dir = thermo_corr_dir
356+
self.thermo_corr_data = {} # a dictionary of available thermo covariance matrices for each library
354357

355358
# For extra species needed for correlated analysis but not in model
356359
self.extra_species = []
@@ -923,6 +926,121 @@ def get_thermo_covariance_matrix(self):
923926
for source_i in self.thermo_input_uncertainties[i].keys():
924927
if source_i in self.thermo_input_uncertainties[j].keys():
925928
self.thermo_covariance_matrix[i, j] += self.thermo_input_uncertainties[i][source_i] * self.thermo_input_uncertainties[j][source_i]
929+
930+
# load correlations between adsorbates calculated with BEEF vdW
931+
# load the saved species ensemble data: this includes a numpy matrix of dG values for each species and a pickle of the species adjacency list
932+
if self.thermo_corr_dir is not None:
933+
def get_i_spec(spec, species_list):
934+
for i in range(len(species_list)):
935+
if spec.is_isomorphic(species_list[i]):
936+
return i
937+
return None
938+
939+
def get_i_group(group, group_list):
940+
for i in range(len(group_list)):
941+
if group.is_identical(group_list[i]):
942+
return i
943+
return None
944+
945+
# load the ensemble data
946+
# TODO - generalize this
947+
my_library = 'surfaceThermoPt111'
948+
corr_thermo_data_file = os.path.join(self.thermo_corr_dir, f'{my_library}_cov.npy')
949+
self.thermo_corr_data = {my_library: np.load(corr_thermo_data_file) / 4.18 / 4.18} # convert from kJ^2/mol^2 to kcal^2/mol^2
950+
molecules_file = os.path.join(os.path.join(self.thermo_corr_dir, f'{my_library}_molecules.pickle'))
951+
with open(molecules_file, 'rb') as f:
952+
# this should be a list of molecule adjacency lists
953+
molecules_data = pickle.load(f)
954+
correlated_species = []
955+
for i in range(len(molecules_data)):
956+
sp = Species().from_adjacency_list(molecules_data[i])
957+
correlated_species.append(sp)
958+
assert len(correlated_species) == self.thermo_corr_data[my_library].shape[0]
959+
960+
assert 'adsorptionPt111' in self.database.thermo.groups, 'BEEF adsorption corrections require adsorptionPt111 group in the thermo database'
961+
db_ads_group_items = [self.database.thermo.groups['adsorptionPt111'].entries[key].item for key in self.database.thermo.groups['adsorptionPt111'].entries]
962+
963+
def get_species_indices_in_group(group):
964+
species_used = []
965+
for j in range(len(correlated_species)):
966+
if correlated_species[j].molecule[0].is_subgraph_isomorphic(group, generate_initial_map=True):
967+
species_used.append(j)
968+
return species_used
969+
970+
def get_cov_sp_lists(sp_list_i, sp_list_j):
971+
if len(sp_list_i) == 0 or len(sp_list_j) == 0:
972+
return 0
973+
974+
cov = 0
975+
for i in sp_list_i:
976+
for j in sp_list_j:
977+
cov += self.thermo_corr_data[my_library][i, j]
978+
cov /= len(sp_list_i) * len(sp_list_j)
979+
return cov
980+
981+
# now splice this into the big thermo covariance matrix
982+
for i_spc in range(len(self.species_list)):
983+
for j_spc in range(len(self.species_list)):
984+
985+
# does species use surfaceThermoPt111 lin?
986+
i_uses_pt111_lib = False
987+
j_uses_pt111_lib = False
988+
if 'surfaceThermoPt111' in self.species_list[i_spc].thermo.comment:
989+
i_uses_pt111_lib = True
990+
if 'surfaceThermoPt111' in self.species_list[j_spc].thermo.comment:
991+
j_uses_pt111_lib = True
992+
993+
# does species use adsorptionPt111 group?
994+
i_ads_group = None
995+
j_ads_group = None
996+
if 'ADS' in self.species_sources_dict[self.species_list[i_spc]]:
997+
i_ads_group = self.species_sources_dict[self.species_list[i_spc]]['ADS']['adsorptionPt111'][0][0].item
998+
if 'ADS' in self.species_sources_dict[self.species_list[j_spc]]:
999+
j_ads_group = self.species_sources_dict[self.species_list[j_spc]]['ADS']['adsorptionPt111'][0][0].item
1000+
1001+
# two libraries
1002+
if i_uses_pt111_lib and j_uses_pt111_lib:
1003+
i_spc_beef = get_i_spec(self.species_list[i_spc], correlated_species)
1004+
j_spc_beef = get_i_spec(self.species_list[j_spc], correlated_species)
1005+
if i_spc_beef is not None and j_spc_beef is not None:
1006+
self.thermo_covariance_matrix[i_spc, j_spc] += self.thermo_corr_data[my_library][i_spc_beef, j_spc_beef]
1007+
else:
1008+
# TODO add default value here??
1009+
pass
1010+
# two groups
1011+
elif i_ads_group is not None and j_ads_group is not None:
1012+
i_beef = get_i_group(i_ads_group, db_ads_group_items)
1013+
j_beef = get_i_group(j_ads_group, db_ads_group_items)
1014+
if i_beef is not None and j_beef is not None:
1015+
species_list_i = get_species_indices_in_group(db_ads_group_items[i_beef])
1016+
species_list_j = get_species_indices_in_group(db_ads_group_items[j_beef])
1017+
self.thermo_covariance_matrix[i_spc, j_spc] += get_cov_sp_lists(species_list_i, species_list_j)
1018+
else:
1019+
# TODO add default value here??
1020+
pass
1021+
# one group and one library
1022+
elif i_uses_pt111_lib and j_ads_group is not None:
1023+
i_spc_beef = get_i_spec(self.species_list[i_spc], correlated_species)
1024+
if i_spc_beef is not None:
1025+
species_list_i = [i_spc_beef]
1026+
j_beef = get_i_group(j_ads_group, db_ads_group_items)
1027+
if j_beef is not None:
1028+
species_list_j = get_species_indices_in_group(db_ads_group_items[j_beef])
1029+
self.thermo_covariance_matrix[i_spc, j_spc] += get_cov_sp_lists(species_list_i, species_list_j)
1030+
else:
1031+
# TODO add default value here??
1032+
pass
1033+
elif j_uses_pt111_lib and i_ads_group is not None:
1034+
j_spc_beef = get_i_spec(self.species_list[j_spc], correlated_species)
1035+
if j_spc_beef is not None:
1036+
species_list_j = [j_spc_beef]
1037+
i_beef = get_i_group(i_ads_group, db_ads_group_items)
1038+
if i_beef is not None:
1039+
species_list_i = get_species_indices_in_group(db_ads_group_items[i_beef])
1040+
self.thermo_covariance_matrix[i_spc, j_spc] += get_cov_sp_lists(species_list_i, species_list_j)
1041+
else:
1042+
# TODO add default value here??
1043+
pass
9261044

9271045
return self.thermo_covariance_matrix
9281046

0 commit comments

Comments
 (0)