2828###############################################################################
2929
3030import os
31- import re
3231
3332import numpy as np
3433
34+ import rmgpy .data .thermo
3535import rmgpy .util as util
3636from rmgpy .species import Species
3737from rmgpy .tools .data import GenericData
@@ -43,7 +43,7 @@ class ThermoParameterUncertainty(object):
4343 This class is an engine that generates the species uncertainty based on its thermo sources.
4444 """
4545
46- def __init__ (self , dG_library = 1.5 , dG_QM = 3.0 , dG_GAV = 1.5 , dG_group = 0.7159 , dG_ADS_correction = 6.918 , dG_surf_lib = 6.918 ):
46+ def __init__ (self , dG_library = 1.5 , dG_QM = 3.0 , dG_GAV = 1.5 , dG_group = 0.7159 , dG_ADS_correction = 6.918 , dG_surf_lib = 6.918 , other_covariances = None ):
4747 """
4848 Initialize the different uncertainties dG_library, dG_QM, dG_GAV, and dG_other with set values
4949 in units of kcal/mol.
@@ -57,6 +57,7 @@ def __init__(self, dG_library=1.5, dG_QM=3.0, dG_GAV=1.5, dG_group=0.7159, dG_AD
5757 self .dG_group = dG_group
5858 self .dG_ADS_correction = dG_ADS_correction
5959 self .dG_surf_lib = dG_surf_lib
60+ self .other_covariances = other_covariances # storage of covariances as a dict. Keys are sorted tuples of parameter labels and values are covariances
6061
6162 def get_uncertainty_value (self , source ):
6263 """
@@ -66,7 +67,14 @@ def get_uncertainty_value(self, source):
6667 if 'Library' in source :
6768 varG += self .dG_library * self .dG_library
6869 if 'Surface_Library' in source :
69- varG += self .dG_surf_lib * self .dG_surf_lib
70+ surf_lib_varG = self .dG_surf_lib * self .dG_surf_lib
71+ # covariance libraries should overrule the default uncertainties when available
72+ if self .other_covariances is not None :
73+ label = f'Surface_Library { source ["Surface_Library" ]} ' # match the covariance dict label format
74+ cov_label = (label , label )
75+ if cov_label in self .other_covariances :
76+ surf_lib_varG = self .other_covariances [cov_label ]
77+ varG += surf_lib_varG # Add the variance of the surface library parameter if covariance is not specified in the covariance libraries
7078 if 'QM' in source :
7179 varG += self .dG_QM * self .dG_QM
7280 if 'GAV' in source :
@@ -172,6 +180,12 @@ def _get_covariance_qq(self, q_label1, q_label2):
172180 if corr_type1 is None or corr_type2 is None :
173181 raise ValueError (f'Could not determine the type of the correlated parameters from their labels { q_label1 } and { q_label2 } ' )
174182
183+ if self .other_covariances is not None :
184+ # check if covariance is specified in other_covariances dict
185+ sorted_labels = tuple (sorted ([q_label1 , q_label2 ]))
186+ if sorted_labels in self .other_covariances :
187+ return self .other_covariances [sorted_labels ]
188+
175189 if corr_type1 != corr_type2 :
176190 return 0
177191 elif q_label1 == q_label2 :
@@ -394,11 +408,15 @@ class Uncertainty(object):
394408 for a single RMG-generated mechanism.
395409 """
396410
397- def __init__ (self , species_list = None , reaction_list = None , output_directory = '' ):
411+ def __init__ (self , species_list = None , reaction_list = None , output_directory = '' , thermo_covariance_libraries = None ,
412+ kinetic_covariance_libraries = None , thermo_covariance_groups = None ):
398413 """
399414 `species_list`: list of RMG species objects
400415 `reaction_list`: list of RMG reaction objects
401416 `outputDirectoy`: directory path for saving output files from the analyses
417+ `thermo_covariance_libraries`: list of library paths to pull additional thermo covariances from
418+ `kinetic_covariance_libraries`: list of library paths to pull additional kinetic covariances from
419+ `thermo_covariance_groups`: list of groups to get additional thermo covariances from
402420 """
403421 self .database = None
404422 self .species_list = species_list
@@ -418,6 +436,11 @@ def __init__(self, species_list=None, reaction_list=None, output_directory=''):
418436 self .all_thermo_intermediates = None # list of labels of underlying thermo parameters
419437 self .all_kinetics_intermediates = None # list of labels of underlying kinetic parameters
420438 self .output_directory = output_directory if output_directory else os .getcwd ()
439+ self .thermo_covariance_libraries = thermo_covariance_libraries
440+ self .thermo_covariance_groups = thermo_covariance_groups
441+ self .kinetic_covariance_libraries = kinetic_covariance_libraries
442+ self .thermo_covariances_dict = {} # dictionary to store covariances from covariance libraries
443+ self .kinetic_covariances_dict = {} # dictionary to store covariances from covariance libraries
421444
422445 # For extra species needed for correlated analysis but not in model
423446 self .extra_species = []
@@ -507,6 +530,154 @@ def retrieve_saturated_species_from_list(self, species):
507530 else :
508531 raise Exception ('Could not retrieve saturated species form of {0} from the species list' .format (species ))
509532
533+ def load_thermo_covariances_from_libraries (self ):
534+ from rmgpy .chemkin import load_species_dictionary
535+ assert self .database is not None , 'Must load database before loading covariance libraries, since we need the path to the covariance libraries from the database'
536+ if self .thermo_covariance_libraries is not None :
537+ for cov_lib in self .thermo_covariance_libraries :
538+ library_name = os .path .basename (cov_lib )
539+ if library_name in self .database .thermo .libraries :
540+ library = self .database .thermo .libraries [library_name ]
541+ else :
542+ raise ValueError (f'Thermo covariance library { library_name } not found in the loaded database' )
543+
544+ covariance_file = os .path .join (cov_lib , 'covariance.npy' )
545+ covariance_species = os .path .join (cov_lib , 'species_dictionary.txt' )
546+ if not os .path .isfile (covariance_file ):
547+ raise ValueError (f'Thermo covariance file { covariance_file } not found in library { cov_lib } ' )
548+ if not os .path .isfile (covariance_species ):
549+ raise ValueError (f'Thermo species file { covariance_species } not found in library { cov_lib } ' )
550+ cov_data = np .load (covariance_file )
551+ cov_species_dict = load_species_dictionary (covariance_species )
552+ cov_specs = [item for _ , item in cov_species_dict .items ()]
553+
554+ # quick check to make sure the covariance data and molecule data are consistent with each other
555+ if cov_data .shape [0 ] != len (cov_specs ):
556+ raise ValueError (f'Covariance data and molecule data in library { cov_lib } are inconsistent: covariance data has shape { cov_data .shape } but molecule data has length { len (cov_specs )} ' )
557+
558+ # load the labels, but only include species in the model
559+ subset_indices = [] # keep track of indices relevant to the model
560+ for i_lib , lib_species in enumerate (cov_specs ):
561+ i_sp = get_i_thing (lib_species , self .species_list )
562+ if i_sp < 0 :
563+ continue
564+
565+ # make sure the species actually comes from this library, otherwise skip
566+ result = self .database .thermo .get_thermo_data_from_library (lib_species , library )
567+ if result is not None :
568+ surface_prefix = 'Surface_' if lib_species .contains_surface_site () else ''
569+ # match the label as constructed in assign_intermediate_uncertainties,
570+ # where the number corresponds to the index of the species in species_list
571+ try :
572+ label = f'{ surface_prefix } Library { self .species_list [i_sp ].to_chemkin ()} '
573+ except IndexError :
574+ label = f'{ surface_prefix } Library { self .extra_species [i_sp - len (self .species_list )].to_chemkin ()} '
575+ lib_species .label = label
576+ subset_indices .append (i_lib )
577+
578+ # fill in the dictionary of covariances from the covariance libraries,
579+ # with keys being sorted tuples of the labels of the correlated parameters
580+ # and values being the covariance between those parameters
581+ tolerance = 1e-12 # consider anything with covariance less than this to be uncorrelated
582+ for i , index_i in enumerate (subset_indices ):
583+ for j in range (i , len (subset_indices )):
584+ index_j = subset_indices [j ]
585+ if cov_data [index_i , index_j ] > tolerance :
586+ label1 = cov_specs [index_i ].label
587+ label2 = cov_specs [index_j ].label
588+ covariance = cov_data [index_i , index_j ]
589+ self .thermo_covariances_dict [tuple (sorted ([label1 , label2 ]))] = covariance
590+
591+ def load_thermo_covariances_from_groups (self ):
592+ # assumes there might also be covariances associated with library entries
593+
594+ # associated library is hardcoded for now
595+ associated_libraries = {
596+ 'adsorptionPt111' : 'surfaceThermoPt111' ,
597+ }
598+ associated_library = None
599+
600+ assert self .database is not None , 'Must load database before loading covariance groups, since we need the path to the covariance groups/libraries from the database'
601+ if self .thermo_covariance_groups is not None :
602+ for cov_group_tree in self .thermo_covariance_groups :
603+ cov_group_tree_name = os .path .basename (cov_group_tree )
604+ if cov_group_tree_name in self .database .thermo .groups :
605+ grouptree = self .database .thermo .groups [cov_group_tree_name ]
606+ else :
607+ raise ValueError (f'Thermo covariance library { cov_group_tree_name } not found in the loaded database' )
608+ if cov_group_tree_name in associated_libraries :
609+ library_name = associated_libraries [cov_group_tree_name ]
610+ if library_name in self .database .thermo .libraries :
611+ associated_library = self .database .thermo .libraries [library_name ]
612+ else :
613+ raise ValueError (f'Associated library { library_name } for covariance group { cov_group_tree_name } not found in the loaded database' )
614+
615+ covariance_file = os .path .join (cov_group_tree , 'covariance.npy' )
616+ group_database_file = os .path .join (cov_group_tree , 'groups.py' )
617+ group_database = rmgpy .data .thermo .ThermoGroups ()
618+ group_database .load (group_database_file )
619+
620+ covariance_molecules = os .path .join (cov_group_tree , 'molecules.pickle' )
621+ if not os .path .isfile (covariance_file ):
622+ raise ValueError (f'Thermo covariance file { covariance_file } not found in { cov_group_tree } ' )
623+ if not os .path .isfile (covariance_molecules ):
624+ raise ValueError (f'Thermo molecules file { covariance_molecules } not found in { cov_group_tree } ' )
625+ cov_data = np .load (covariance_file )
626+
627+ # reconstruct the groups and molecules stored in the molecules.pickle file
628+
629+ with open (covariance_molecules , 'rb' ) as f :
630+ adj_lists = pickle .load (f )
631+ reconstructed_items = []
632+ for i in range (len (adj_lists )):
633+ try :
634+ item = rmgpy .molecule .Molecule ().from_adjacency_list (adj_lists [i ])
635+ except ValueError :
636+ item = rmgpy .molecule .Group ().from_adjacency_list (adj_lists [i ])
637+ reconstructed_items .append (item )
638+ assert len (cov_data ) == len (reconstructed_items )
639+
640+ n_groups = np .sum ([isinstance (mol , rmgpy .molecule .Group ) for mol in reconstructed_items ])
641+ n_mols = np .sum ([isinstance (mol , rmgpy .molecule .Molecule ) for mol in reconstructed_items ])
642+
643+
644+ # quick check to make sure the covariance data and molecule data are consistent with each other
645+ if cov_data .shape [0 ] != len (cov_specs ):
646+ raise ValueError (f'Covariance data and molecule data in library { cov_lib } are inconsistent: covariance data has shape { cov_data .shape } but molecule data has length { len (cov_specs )} ' )
647+
648+ # load the labels, but only include species in the model
649+ subset_indices = [] # keep track of indices relevant to the model
650+ for i_lib , lib_species in enumerate (cov_specs ):
651+ i_sp = get_i_thing (lib_species , self .species_list )
652+ if i_sp < 0 :
653+ continue
654+
655+ # make sure the species actually comes from this library, otherwise skip
656+ result = self .database .thermo .get_thermo_data_from_library (lib_species , library )
657+ if result is not None :
658+ surface_prefix = 'Surface_' if lib_species .contains_surface_site () else ''
659+ # match the label as constructed in assign_intermediate_uncertainties,
660+ # where the number corresponds to the index of the species in species_list
661+ try :
662+ label = f'{ surface_prefix } Library { self .species_list [i_sp ].to_chemkin ()} '
663+ except IndexError :
664+ label = f'{ surface_prefix } Library { self .extra_species [i_sp - len (self .species_list )].to_chemkin ()} '
665+ lib_species .label = label
666+ subset_indices .append (i_lib )
667+
668+ # fill in the dictionary of covariances from the covariance libraries,
669+ # with keys being sorted tuples of the labels of the correlated parameters
670+ # and values being the covariance between those parameters
671+ tolerance = 1e-12 # consider anything with covariance less than this to be uncorrelated
672+ for i , index_i in enumerate (subset_indices ):
673+ for j in range (i , len (subset_indices )):
674+ index_j = subset_indices [j ]
675+ if cov_data [index_i , index_j ] > tolerance :
676+ label1 = cov_specs [index_i ].label
677+ label2 = cov_specs [index_j ].label
678+ covariance = cov_data [index_i , index_j ]
679+ self .thermo_covariances_dict [tuple (sorted ([label1 , label2 ]))] = covariance
680+
510681 def extract_sources_from_model (self ):
511682 """
512683 Extract the source data from the model using its comments.
@@ -626,6 +797,9 @@ def extract_sources_from_model(self):
626797 for spc in self .extra_species :
627798 self .species_list .remove (spc )
628799
800+ # -------------------- load covariance libraries ------------------------#
801+ self .load_thermo_covariances_from_libraries ()
802+
629803 def compile_all_sources (self ):
630804 """
631805 Compile two dictionaries composed of all the thermo and kinetic sources. Must
@@ -721,7 +895,7 @@ def assign_parameter_uncertainties(self, g_param_engine=None, k_param_engine=Non
721895 Assign uncertainties based on the sources of the species thermo and reaction kinetics.
722896 """
723897 if g_param_engine is None :
724- g_param_engine = ThermoParameterUncertainty ()
898+ g_param_engine = ThermoParameterUncertainty (other_covariances = self . thermo_covariances_dict )
725899 if k_param_engine is None :
726900 k_param_engine = KineticParameterUncertainty ()
727901
@@ -730,7 +904,15 @@ def assign_parameter_uncertainties(self, g_param_engine=None, k_param_engine=Non
730904
731905 for species in self .species_list :
732906 if not correlated :
733- dG = g_param_engine .get_uncertainty_value (self .species_sources_dict [species ])
907+ entry = self .species_sources_dict [species ]
908+ if 'Surface_Library' in entry : # preconditioning for covariance
909+ # this is an ugly workaround to handle covariances: because get_uncertainty_value needs the species chemkin string to get the covariance
910+ # but the source dictionary only has the index of the surface library entry
911+ entry_copy = entry .copy ()
912+ entry_copy ['Surface_Library' ] = self .species_list [entry_copy ['Surface_Library' ]].to_chemkin ()
913+ dG = g_param_engine .get_uncertainty_value (entry_copy )
914+ else :
915+ dG = g_param_engine .get_uncertainty_value (self .species_sources_dict [species ])
734916 self .thermo_input_uncertainties .append (dG )
735917 else :
736918 source = self .species_sources_dict [species ]
@@ -854,7 +1036,7 @@ def assign_intermediate_uncertainties(self, g_param_engine=None, k_param_engine=
8541036 But instead of assuming all underlying parameters are independent, here we can allow for dependence as long as we have the covariance
8551037 """
8561038 if g_param_engine is None :
857- g_param_engine = ThermoParameterUncertainty ()
1039+ g_param_engine = ThermoParameterUncertainty (other_covariances = self . thermo_covariances_dict )
8581040 if k_param_engine is None :
8591041 k_param_engine = KineticParameterUncertainty ()
8601042
@@ -863,7 +1045,15 @@ def assign_intermediate_uncertainties(self, g_param_engine=None, k_param_engine=
8631045
8641046 for species in self .species_list :
8651047 if not correlated :
866- dG = g_param_engine .get_uncertainty_value (self .species_sources_dict [species ])
1048+ entry = self .species_sources_dict [species ]
1049+ if 'Surface_Library' in entry : # preconditioning for covariance
1050+ # this is an ugly workaround to handle covariances: because get_uncertainty_value needs the species chemkin string to get the covariance
1051+ # but the source dictionary only has the index of the surface library entry
1052+ entry_copy = entry .copy ()
1053+ entry_copy ['Surface_Library' ] = self .species_list [entry_copy ['Surface_Library' ]].to_chemkin ()
1054+ dG = g_param_engine .get_uncertainty_value (entry_copy )
1055+ else :
1056+ dG = g_param_engine .get_uncertainty_value (self .species_sources_dict [species ])
8671057 self .thermo_intermediate_uncertainties .append (dG ) # in the uncorrelated case, the intermediate is just the uncertainty value itself, since there is only one parameter that contributes to the uncertainty
8681058 else :
8691059 source = self .species_sources_dict [species ]
@@ -1248,7 +1438,7 @@ def get_thermo_covariance_matrix(self, g_param_engine=None):
12481438 self .thermo_covariance_matrix = np .zeros ((len (self .species_list ), len (self .species_list )))
12491439
12501440 if g_param_engine is None :
1251- g_param_engine = ThermoParameterUncertainty ()
1441+ g_param_engine = ThermoParameterUncertainty (other_covariances = self . thermo_covariances_dict )
12521442
12531443 for i in range (len (self .species_list )):
12541444 for j in range ((len (self .species_list ))):
@@ -1308,7 +1498,7 @@ def _get_intermediate_thermo_covariance_matrix(self, g_param_engine=None, subset
13081498 return self .Sigma_ww_thermo
13091499
13101500 if g_param_engine is None :
1311- g_param_engine = ThermoParameterUncertainty ()
1501+ g_param_engine = ThermoParameterUncertainty (other_covariances = self . thermo_covariances_dict )
13121502
13131503 self .all_thermo_intermediates = set ()
13141504 for sp_idx in subset_indices :
@@ -1455,3 +1645,9 @@ def process_local_results(results, sensitive_species, number=10):
14551645 output += '================================================================================\n \n '
14561646
14571647 return processed_results , output
1648+
1649+ def get_i_thing (thing , thing_list ):
1650+ for i in range (len (thing_list )):
1651+ if thing .is_isomorphic (thing_list [i ]):
1652+ return i
1653+ return - 1
0 commit comments