@@ -1643,7 +1643,7 @@ def correct_binding_energy(self, thermo, species, metal_to_scale_from=None, meta
16431643 thermo .comment += f" Binding energy corrected by LSR ({ '+' .join (comments )} ) from { metal_to_scale_from } (H={ change_in_binding_energy / 1e3 :+.0f} kJ/mol)"
16441644 return thermo
16451645
1646- def get_thermo_data_for_surface_species (self , species ):
1646+ def get_thermo_data_for_surface_species (self , species , return_resonance_data = False ):
16471647 """
16481648 Get the thermo data for an adsorbed species,
16491649 by desorbing it, finding the thermo of the gas-phase
@@ -1652,6 +1652,9 @@ def get_thermo_data_for_surface_species(self, species):
16521652 Does not apply linear scaling relationship.
16531653
16541654 Returns a :class:`ThermoData` object, with no Cp0 or CpInf
1655+
1656+ option to return resonance data if return_resonance_data is True,
1657+ which is useful for identifying the specific molecule chosen
16551658 """
16561659
16571660 # define the comparison function to find the lowest energy
@@ -1741,7 +1744,7 @@ def species_enthalpy(species):
17411744 atom .label = label
17421745
17431746 # a tuple of molecule and its thermo
1744- resonance_data .append ((molecule , thermo ))
1747+ resonance_data .append ((molecule , thermo , gas_phase_species ))
17451748
17461749 # Get the enthalpy of formation of every adsorbate at 298 K and
17471750 # determine the resonance structure with the lowest enthalpy of formation.
@@ -1751,7 +1754,7 @@ def species_enthalpy(species):
17511754 resonance_data = sorted (resonance_data , key = lambda x : x [1 ].H298 .value_si )
17521755
17531756 # reorder the resonance structures (molecules) by their H298
1754- species .molecule = [mol for mol , thermo in resonance_data ]
1757+ species .molecule = [mol for mol , thermo , gas_phase_species in resonance_data ]
17551758
17561759 thermo = resonance_data [0 ][1 ]
17571760
@@ -1760,6 +1763,9 @@ def species_enthalpy(species):
17601763
17611764 find_cp0_and_cpinf (species , thermo )
17621765
1766+ if return_resonance_data is True : # is True makes this a little more error-proof in case user accidentally provides another argument that evaluates to True
1767+ return thermo , resonance_data
1768+
17631769 return thermo
17641770
17651771 def _add_adsorption_correction (self , adsorption_thermo , adsorption_groups , molecule , surface_sites ):
@@ -2854,82 +2860,9 @@ def get_ring_groups_from_comments(self, thermo_data):
28542860
28552861 return ring_groups , polycyclic_groups
28562862
2857- def extract_source_from_comments (self , species ):
2858- """
2859- `species`: A species object containing thermo data and thermo data comments
2860-
2861- Parses the verbose string of comments from the thermo data of the species object,
2862- and extracts the thermo sources.
2863-
2864- Returns a dictionary with keys of 'Library', 'QM', 'ADS', and/or 'GAV'.
2865- Commonly, species thermo are estimated using only one of these sources.
2866- However, a radical can be estimated with more than one type of source, for
2867- instance a saturated library value and a GAV HBI correction, or a QM saturated value
2868- and a GAV HBI correction. Adsorbates can be estimated using a single library
2869- for the adsorbate or a combination of a gas phase library for the
2870- gas phase portion and an adsorption correction.
2871-
2872- source = {'Library': String_Name_of_Library_Used,
2873- 'QM': String_of_Method_Used,
2874- 'GAV': Dictionary_of_Groups_Used,
2875- 'ADS': Dictionary_of_Adsorption_Group_Used,
2876- }
2877-
2878- The Dictionary_of_Groups_Used looks like
2879- {'groupType':[List of tuples containing (Entry, Weight)]
2880- """
2881- comment = species .thermo .comment
2882- tokens = comment .split ()
2883-
2884- source = {}
2885-
2886- if comment .startswith ('Thermo library' ):
2887- # Store name of the library source, which is the 3rd token in the comments
2888- source ['Library' ] = tokens [2 ]
2889-
2890- elif comment .startswith ('QM' ):
2891- # Store the level of the calculation, which is the 2nd token in the comments
2892- source ['QM' ] = tokens [1 ]
2893-
2894- elif comment .startswith ('Gas phase thermo' ):
2895- # Handle adsorption correction thermo data of the following format:
2896- # Library example
2897- # Gas phase thermo for C(T) from Thermo library: primaryThermoLibrary.
2898- # Adsorption correction: + Thermo group additivity estimation: adsorptionPt111(Cq*)
2899-
2900- # GAV example
2901- # Gas phase thermo for [CH]CC from Thermo group additivity estimation: group(Cs-CsCsHH) + group(Cs-CsHHH) + group(Cs-CsHHH) + radical(CCJ2_triplet).
2902- # Adsorption correction: + Thermo group additivity estimation: adsorptionPt111(C=*RCR3)"
2903-
2904- comment = comment .replace (r'\n' , ' ' )
2905- comment = comment .replace ('\n ' , ' ' )
2906- if 'Adsorption correction:' not in comment :
2907- raise ValueError (f'adsorption correction in unrecognized format { comment } ' )
2908-
2909- # Handle the gas-phase portion first
2910- gas_comment = comment .split ('Adsorption correction: + ' )[0 ].strip ()
2911- if gas_comment .endswith ('.' ):
2912- gas_comment = gas_comment [:- 1 ] # delete the . at the end if it exists
2913- gas_comment = gas_comment [gas_comment .find ('from ' , len ('Gas phase thermo for ' )) + len ('from ' ):]
2914- dummy_gas_phase_species = Species ()
2915- dummy_gas_phase_species .thermo = NASA ()
2916- dummy_gas_phase_species .thermo .comment = gas_comment
2917- source = self .extract_source_from_comments (dummy_gas_phase_species )
2918-
2919- # This is an adsorption correction
2920- # comment is split into two parts: the gas phase, and the surface adsorption correction
2921- ads_correction_comment = comment .split ('Adsorption correction: +' )[- 1 ].strip ()
2922- dummy_adsorption_correction_species = Species ()
2923- dummy_adsorption_correction_species .thermo = NASA ()
2924- dummy_adsorption_correction_species .thermo .comment = ads_correction_comment
2925- source ['ADS' ] = self .extract_source_from_comments (dummy_adsorption_correction_species )['GAV' ]
2926-
2927- return source
2928-
2929- # Check for group additivity contributions to the thermo in this species
2930-
2931- # The contribution of the groups can be either additive or subtracting
2932- # after changes to the polycyclic algorithm
2863+ def _parse_gav_groups (self , comment ):
2864+ """Extract the groups from the comment"""
2865+ groups = {}
29332866
29342867 comment = comment .replace (' + ' , ' +' )
29352868 comment = comment .replace (' - ' , ' -' )
@@ -2939,10 +2872,12 @@ def extract_source_from_comments(self, species):
29392872 # groups are still split by spaces
29402873 comment = comment .replace (')\n +' , ') +' )
29412874 comment = comment .replace (')\n -' , ') -' )
2875+ # `Thermo group additivity estimation:\nadsorptionPt111(...)` shows up in
2876+ # adsorbate comments - keep the trailing colon separated from the group token.
2877+ comment = comment .replace (':\n ' , ': ' )
29422878 comment = comment .replace ('\n ' , '' )
29432879 tokens = comment .split (' ' )
29442880
2945- groups = {}
29462881 group_types = list (self .groups .keys ())
29472882
29482883 regex = r"\((.*)\)" # only hit outermost parentheses
@@ -2970,14 +2905,180 @@ def extract_source_from_comments(self, species):
29702905
29712906 if groups :
29722907 # Indicate that group additivity is used when it is either an HBI correction
2973- # onto a thermo library or QM value, or if the entire molecule is estimated using group additivity
2908+ # onto a thermo library or QM value, or if the entire molecule is estimated using group additivity
29742909 # Save the groups into the source dictionary
29752910
2976- # Convert groups back into tuples
2911+ # Convert groups back into tuples
29772912 for groupType , groupDict in groups .items ():
29782913 groups [groupType ] = list (groupDict .items ())
29792914
2980- source ['GAV' ] = groups
2915+ return groups
2916+
2917+ def _parse_library_source (self , comment , library_species ):
2918+ # handle the library source comment, which looks like "Thermo library: library_name"
2919+ # we then need to retrieve the specific library entry given the species
2920+ # but may have unfortunate line breaks in the middle
2921+
2922+ # trim the comment down to just the library portion so it starts with Thermo library:
2923+ split_loc = comment .find ('Thermo library:' )
2924+ if split_loc == - 1 :
2925+ raise ValueError (f"Expected 'Thermo library:' in comment, got { comment } " )
2926+
2927+ comment = comment [split_loc :]
2928+
2929+ # library name is the token that comes immediately after 'Thermo library:'
2930+ assert 'Thermo library:' in comment , f"Expected 'Thermo library:' in comment, got { comment } "
2931+ tokens = comment .split ()
2932+ library_name = tokens [2 ]
2933+
2934+ results = self .get_thermo_data_from_library (library_species , self .libraries [library_name ])
2935+ if results is None :
2936+ raise DatabaseError (f"Could not find a library match for { library_species } in library { library_name } " )
2937+
2938+ data , thermo_library , library_entry = results
2939+ return (library_name , library_entry )
2940+
2941+ def _parse_adsorption_correction (self , comment ):
2942+ # handle the adsorption correction comment, which looks like
2943+ # "Adsorption correction: + Thermo group additivity estimation: adsorptionPt111(C-XR2CR3)"
2944+ # but may have unfortunate line breaks in the middle
2945+
2946+ # check that the number of tokens matches our expectation for an adsorption correction
2947+ # should be 8, maybe 9 if there was a weird line break
2948+ tokens = comment .split ()
2949+ if len (tokens ) not in [8 , 9 ]:
2950+ raise ValueError (f"Expected 8 or 9 tokens in adsorption correction comment, got { len (tokens )} : { comment } " )
2951+
2952+ ADS = self ._parse_gav_groups (comment )
2953+
2954+ if len (ADS ) > 1 :
2955+ raise ValueError ("Only adsorption corrections should be present in the adsorption correction portion of the comment. Found: {}" .format (ADS ))
2956+
2957+ return ADS
2958+
2959+ def extract_source_from_comments (self , species ):
2960+ """
2961+ `species`: A species object containing thermo data and thermo data comments
2962+
2963+ Parses the verbose string of comments from the thermo data of the species object,
2964+ and extracts the thermo sources.
2965+
2966+ Returns a dictionary with keys of 'Library', 'QM', 'ADS', and/or 'GAV'.
2967+ Commonly, species thermo are estimated using only one of these sources.
2968+ However, a radical can be estimated with more than one type of source, for
2969+ instance a saturated library value and a GAV HBI correction, or a QM saturated value
2970+ and a GAV HBI correction. Adsorbates can be estimated using a single library
2971+ for the adsorbate or a combination of a gas phase library for the
2972+ gas phase portion and an adsorption correction.
2973+
2974+ source = {'Library': (String_Name_of_Library_Used, Library_Entry_Used),
2975+ 'QM': (String_of_Method_Used, Molecule_Used_for_QM),
2976+ 'GAV': Dictionary_of_Groups_Used,
2977+ 'ADS': Dictionary_of_Adsorption_Group_Used,
2978+ }
2979+
2980+ The Dictionary_of_Groups_Used looks like
2981+ {'groupType':[List of tuples containing (Entry, Weight)]
2982+ """
2983+
2984+ # TODO: solvent, electrocat, LSR
2985+ source = {}
2986+
2987+ comment = species .thermo .comment
2988+ tokens = comment .split ()
2989+
2990+ ads_correction = 'Gas phase thermo' in comment and 'Adsorption correction:' in comment
2991+ library = 'Thermo library' in comment
2992+ QM = 'QM' in tokens
2993+ GAV = 'Thermo group additivity estimation:' in comment # ambiguous since ads correction looks identical to group
2994+
2995+ # the biggest thing to split on first is the adsorption correction
2996+ if ads_correction :
2997+ # The source options here are:
2998+ # (Library(gas-phase species), Adsorption correction)
2999+ # (QM(gas-phase species), Adsorption correction) <--- not really, QM is dead/sleeping
3000+ # (GAV(gas-phase species), Adsorption correction)
3001+ # (Library(gas-phase species), GAV(radical correction), Adsorption correction)
3002+ # (QM(gas-phase species), GAV(radical correction), Adsorption correction) <--- not really, QM is dead/sleeping
3003+
3004+ # split the comment into the gas phase thermo portion and the adsorption correction portion
3005+ split_loc = comment .find ('Adsorption correction:' )
3006+ if split_loc == - 1 :
3007+ raise ValueError (f"Expected 'Adsorption correction:' in comment, got { comment } " )
3008+ gas_comment = comment [:split_loc ].strip ()
3009+ if gas_comment .endswith ('.' ):
3010+ gas_comment = gas_comment [:- 1 ] # the period that closed the gas-phase sentence
3011+ ads_correction_comment = comment [split_loc :].strip ()
3012+ source ['ADS' ] = self ._parse_adsorption_correction (ads_correction_comment )
3013+
3014+ # get the desorbed gas species
3015+ thermo , resonance_data = self .get_thermo_data_for_surface_species (species , return_resonance_data = True )
3016+ desorbed_gas_species = resonance_data [0 ][2 ]
3017+
3018+ groups = self ._parse_gav_groups (gas_comment )
3019+ if groups : # (Library/QM + GAV + ADS) or (GAV + ADS)
3020+ source ['GAV' ] = groups # handle the GAV portion
3021+
3022+ if library or QM : # (Library/QM + GAV + ADS)
3023+ # this means the library/QM species is the desorbed, saturated gas-phase version of the adsorbate
3024+
3025+ assert desorbed_gas_species .molecule [0 ].is_radical (), "Method only valid for radicals."
3026+ molecule = desorbed_gas_species .molecule [0 ] # no need to deepcopy again since get_desorbed_molecules already does deepcopy
3027+ molecule .saturate_radicals () # note, this returns a dictionary instead of the Molecule object, but it modifies the molecule in place, so we can just ignore the returned dictionary
3028+ saturated_desorbed_gas_species = Species (molecule = [molecule ])
3029+
3030+ if library : # (Library(gas-phase species), GAV(radical correction), Adsorption correction)
3031+ source ['Library' ] = self ._parse_library_source (gas_comment , saturated_desorbed_gas_species )
3032+ if QM : # (QM(gas-phase species), GAV(radical correction), Adsorption correction)
3033+ # whatever token comes immediately after 'QM' is the method used
3034+ source ['QM' ] = (tokens [tokens .index ('QM' ) + 1 ], saturated_desorbed_gas_species )
3035+
3036+ else :
3037+ # no groups, so this is (Library/QM + ADS)
3038+ # in this case, the library/QM species is the desorbed gas-phase molecule of the adsorbate
3039+ if library :
3040+ source ['Library' ] = self ._parse_library_source (gas_comment , desorbed_gas_species )
3041+ if QM :
3042+ # whatever token comes immediately after 'QM' is the method used
3043+ source ['QM' ] = (tokens [tokens .index ('QM' ) + 1 ], desorbed_gas_species )
3044+
3045+ else :
3046+ # gas phase only, source options are:
3047+ # (Library)
3048+ # (QM)
3049+ # (GAV)
3050+ # (Library, GAV)
3051+ # (QM, GAV)
3052+ groups = self ._parse_gav_groups (comment )
3053+ GAV = 'Thermo group additivity estimation:' in comment
3054+ if GAV and not groups :
3055+ raise ValueError ("No groups were found in the comments but 'Thermo group additivity estimation:' was in the comment. Comment: {}" .format (comment ))
3056+ elif not GAV and groups :
3057+ if 'radical' not in groups .keys ():
3058+ raise ValueError ("Groups were found in the comments but 'Thermo group additivity estimation:' was not in the comment. Comment: {}" .format (comment ))
3059+
3060+ if groups :
3061+ # Get groups first
3062+ source ['GAV' ] = groups
3063+ if library or QM : # (Library/QM, GAV)
3064+ # get the saturated species for the library source
3065+ if 'radical' not in groups .keys ():
3066+ raise ValueError ("Method only valid for radicals, but no radical groups were found. Comment: {}" .format (comment ))
3067+ molecule = deepcopy (species .molecule [0 ])
3068+ assert molecule .is_radical (), "Method only valid for radicals."
3069+ molecule .saturate_radicals () # note, this returns a dictionary instead of the Molecule object, but it modifies the molecule in place, so we can just ignore the returned dictionary
3070+ saturated_species = Species (molecule = [molecule ])
3071+ if library : # (Library, GAV)
3072+ source ['Library' ] = self ._parse_library_source (comment , saturated_species )
3073+ if QM : # (QM, GAV)
3074+ # whatever token comes immediately after 'QM' is the method used
3075+ source ['QM' ] = (tokens [tokens .index ('QM' ) + 1 ], saturated_species )
3076+ else : # (Library) or (QM)
3077+ if library :
3078+ source ['Library' ] = self ._parse_library_source (comment , species )
3079+ if QM :
3080+ # whatever token comes immediately after 'QM' is the method used
3081+ source ['QM' ] = (tokens [tokens .index ('QM' ) + 1 ], species )
29813082
29823083 # Perform a sanity check that this molecule is estimated by at least one method
29833084 if not list (source .keys ()):
0 commit comments