add ability to load BEEF correlations for UQ

sevyharris · sevyharris · commit 1ea52ddb12a0 · 2026-05-10T18:25:47.000-04:00
diff --git a/rmgpy/tools/uncertainty.py b/rmgpy/tools/uncertainty.py
@@ -28,7 +28,7 @@
 ###############################################################################
 
 import os
-import re
+import pickle
 
 import numpy as np
 
@@ -43,7 +43,7 @@ class ThermoParameterUncertainty(object):
     This class is an engine that generates the species uncertainty based on its thermo sources.
     """
 
-    def __init__(self, dG_library=1.5, dG_QM=3.0, dG_GAV=1.5, dG_group=0.7159, dG_ADS_correction=6.918, dG_surf_lib=6.918):
+    def __init__(self, dG_library=1.5, dG_QM=3.0, dG_GAV=1.5, dG_group=0.7159, dG_ADS_correction=6.918, dG_surf_lib=6.918, other_covariances=None):
         """
         Initialize the different uncertainties dG_library, dG_QM, dG_GAV, and dG_other with set values
         in units of kcal/mol.
@@ -57,6 +57,7 @@ def __init__(self, dG_library=1.5, dG_QM=3.0, dG_GAV=1.5, dG_group=0.7159, dG_AD
         self.dG_group = dG_group
         self.dG_ADS_correction = dG_ADS_correction
         self.dG_surf_lib = dG_surf_lib
+        self.other_covariances = other_covariances  # storage of covariances as a dict. Keys are sorted tuples of parameter labels and values are covariances
 
     def get_uncertainty_value(self, source):
         """
@@ -172,6 +173,12 @@ def _get_covariance_qq(self, q_label1, q_label2):
         if corr_type1 is None or corr_type2 is None:
             raise ValueError(f'Could not determine the type of the correlated parameters from their labels {q_label1} and {q_label2}')
 
+        if self.other_covariances is not None:
+            # check if covariance is specified in other_covariances dict
+            sorted_labels = tuple(sorted([q_label1, q_label2]))
+            if sorted_labels in self.other_covariances:
+                return self.other_covariances[sorted_labels]
+
         if corr_type1 != corr_type2:
             return 0
         elif q_label1 == q_label2:
@@ -394,11 +401,13 @@ class Uncertainty(object):
     for a single RMG-generated mechanism.
     """
 
-    def __init__(self, species_list=None, reaction_list=None, output_directory=''):
+    def __init__(self, species_list=None, reaction_list=None, output_directory='', thermo_covariance_libraries=None, kinetic_covariance_libraries=None):
         """
         `species_list`: list of RMG species objects
         `reaction_list`: list of RMG reaction objects
         `outputDirectoy`: directory path for saving output files from the analyses
+        `thermo_covariance_libraries`: list of library paths to pull additional thermo covariances from
+        `kinetic_covariance_libraries`: list of library paths to pull additional kinetic covariances from
         """
         self.database = None
         self.species_list = species_list
@@ -418,6 +427,10 @@ def __init__(self, species_list=None, reaction_list=None, output_directory=''):
         self.all_thermo_intermediates = None            # list of labels of underlying thermo parameters
         self.all_kinetics_intermediates = None          # list of labels of underlying kinetic parameters
         self.output_directory = output_directory if output_directory else os.getcwd()
+        self.thermo_covariance_libraries = thermo_covariance_libraries
+        self.kinetic_covariance_libraries = kinetic_covariance_libraries
+        self.thermo_covariances_dict = {}  # dictionary to store covariances from covariance libraries
+        self.kinetic_covariances_dict = {}  # dictionary to store covariances from covariance libraries
 
         # For extra species needed for correlated analysis but not in model
         self.extra_species = []
@@ -507,6 +520,63 @@ def retrieve_saturated_species_from_list(self, species):
         else:
             raise Exception('Could not retrieve saturated species form of {0} from the species list'.format(species))
 
+    def load_thermo_covariances_from_libraries(self):
+        assert self.database is not None, 'Must load database before loading covariance libraries, since we need the path to the covariance libraries from the database'
+        if self.thermo_covariance_libraries is not None:
+            for cov_lib in self.thermo_covariance_libraries:
+                library_name = os.path.basename(cov_lib)
+                if library_name in self.database.thermo.libraries:
+                    library = self.database.thermo.libraries[library_name]
+                else:
+                    raise ValueError(f'Thermo covariance library {library_name} not found in the loaded database')
+
+                covariance_file = os.path.join(cov_lib, 'covariance.npy')
+                covariance_molecules = os.path.join(cov_lib, 'molecules.pickle')
+                if not os.path.isfile(covariance_file):
+                    raise ValueError(f'Thermo covariance file {covariance_file} not found in library {cov_lib}')
+                if not os.path.isfile(covariance_molecules):
+                    raise ValueError(f'Thermo molecules file {covariance_molecules} not found in library {cov_lib}')
+                cov_data = np.load(covariance_file)
+                with open(covariance_molecules, 'rb') as f:
+                    sp_adj_lists = pickle.load(f)
+                    cov_specs = [Species().from_adjacency_list(adj_list) for adj_list in sp_adj_lists]
+                
+                # quick check to make sure the covariance data and molecule data are consistent with each other
+                if cov_data.shape[0] != len(cov_specs):
+                    raise ValueError(f'Covariance data and molecule data in library {cov_lib} are inconsistent: covariance data has shape {cov_data.shape} but molecule data has length {len(cov_specs)}')
+
+                # load the labels, but only include species in the model
+                subset_indices = []  # keep track of indices relevant to the model
+                for i_lib, lib_species in enumerate(cov_specs):
+                    i_sp = get_i_thing(lib_species, self.species_list)
+                    if i_sp < 0:
+                        continue
+
+                    # make sure the species actually comes from this library, otherwise skip
+                    result = self.database.thermo.get_thermo_data_from_library(lib_species, library)
+                    if result is not None:
+                        # match the label as constructed in assign_intermediate_uncertainties,
+                        # where the number corresponds to the index of the species in species_list
+                        try:
+                            label = 'Library {}'.format(self.species_list[i_sp].to_chemkin())
+                        except IndexError:
+                            label = 'Library {}'.format(self.extra_species[i_sp - len(self.species_list)].to_chemkin())
+                        lib_species.label = label
+                        subset_indices.append(i_lib)
+
+                # fill in the dictionary of covariances from the covariance libraries,
+                # with keys being sorted tuples of the labels of the correlated parameters
+                # and values being the covariance between those parameters
+                tolerance = 1e-12  # consider anything with covariance less than this to be uncorrelated
+                for i, index_i in enumerate(subset_indices):
+                    for j in range(i, len(subset_indices)):
+                        index_j = subset_indices[j]
+                        if cov_data[index_i, index_j] > tolerance:
+                            label1 = cov_specs[index_i].label
+                            label2 = cov_specs[index_j].label
+                            covariance = cov_data[index_i, index_j]
+                            self.covariances_dict[tuple(sorted([label1, label2]))] = covariance
+
     def extract_sources_from_model(self):
         """
         Extract the source data from the model using its comments.
@@ -626,6 +696,10 @@ def extract_sources_from_model(self):
         for spc in self.extra_species:
             self.species_list.remove(spc)
 
+        # -------------------- load covariance libraries ------------------------#
+        self.load_thermo_covariances_from_libraries()
+        self.load_kinetic_covariances_from_libraries()
+
     def compile_all_sources(self):
         """
         Compile two dictionaries composed of all the thermo and kinetic sources.  Must
@@ -1455,3 +1529,9 @@ def process_local_results(results, sensitive_species, number=10):
         output += '================================================================================\n\n'
 
     return processed_results, output
+
+def get_i_thing(thing, thing_list):
+    for i in range(len(thing_list)):
+        if thing.is_isomorphic(thing_list[i]):
+            return i
+    return -1