diff --git a/arc/job/adapters/xtb_adapter.py b/arc/job/adapters/xtb_adapter.py index 31b4879c14..7328a2ce4c 100644 --- a/arc/job/adapters/xtb_adapter.py +++ b/arc/job/adapters/xtb_adapter.py @@ -328,7 +328,9 @@ def opt_ts(self): 'futurewarning:', 'userwarning:', 'deprecationwarning:', - 'warnings.warn(' + 'warnings.warn(', + 'pjrt_executable.cc', + 'cpu_aot_loader.cc', ] real_errors = [] for line in stderr: diff --git a/arc/level.py b/arc/level.py index 50d2062d2a..947c4b8828 100644 --- a/arc/level.py +++ b/arc/level.py @@ -39,6 +39,7 @@ class Level(object): args (Dict[Dict[str, str]], optional): Additional arguments provided to the software. Different than the ``args`` in ``LevelOfTheory``. compatible_ess (list, optional): Entries are names of compatible ESS. Not in ``LevelOfTheory``. + year (int, optional): Optional 4-digit year suffix for differentiating methods such as b97d3/b97d32023. """ def __init__(self, @@ -56,6 +57,7 @@ def __init__(self, solvent: Optional[str] = None, solvation_scheme_level: Optional['Level'] = None, args: Optional[Union[Dict[str, str], Iterable, str]] = None, + year: Optional[int] = None, ): self.repr = repr self.method = method @@ -88,6 +90,12 @@ def __init__(self, 'Both solvation method and solvent must be defined together, or both must be None. ' f'Got solvation method = "{self.solvation_method}", solvent = "{self.solvent}".' ) + if year is not None: + self.year = int(year) + if self.year < 1000 or self.year > 9999: + raise ValueError(f'year must be a 4-digit integer (1000-9999), got {self.year}.') + else: + self.year = None self.args = args or {'keyword': dict(), 'block': dict()} if self.repr is not None: @@ -122,6 +130,8 @@ def __str__(self) -> str: str_ = self.method if self.basis is not None: str_ += f'/{self.basis}' + if self.year is not None: + str_ += f', year: {self.year}' if self.auxiliary_basis is not None: str_ += f', auxiliary_basis: {self.auxiliary_basis}' if self.dispersion is not None: @@ -165,6 +175,8 @@ def simple(self) -> str: str_ = self.method if self.basis is not None: str_ += f'/{self.basis}' + if self.year is not None: + str_ += f' ({self.year})' return str_ def as_dict(self) -> dict: @@ -196,7 +208,8 @@ def build(self): 'solvation_method': None, 'solvent': None, 'solvation_scheme_level': None, - 'args': None} + 'args': None, + 'year': None} allowed_keys = list(level_dict.keys()) if isinstance(self.repr, str): @@ -304,6 +317,20 @@ def lower(self): self.args = args + def warn_if_year_set(self, attr_name: str): + """ + Warn if ``year`` is set on this Level. The ``year`` attribute only affects Arkane + database matching and is ignored on non-``arkane_level_of_theory`` levels. + + Args: + attr_name (str): The name of the attribute this Level is assigned to (for the warning message). + """ + if self.year is not None: + logger.warning( + f'The "year" attribute on {attr_name} ({self.simple()}) has no effect. ' + f'Year is only used for Arkane database matching via arkane_level_of_theory.' + ) + def deduce_method_type(self): """ Determine the model chemistry type: diff --git a/arc/level_test.py b/arc/level_test.py index 8a39542db6..b3e538a086 100644 --- a/arc/level_test.py +++ b/arc/level_test.py @@ -119,6 +119,27 @@ def test_build(self): "dlpno-ccsd(t)/def2-tzvp, auxiliary_basis: def2-tzvp/c, solvation_method: smd, " "solvent: water, solvation_scheme_level: 'apfd/def2-tzvp, software: gaussian', software: orca") + def test_year_validation(self): + """Test year validation for Level""" + with self.assertRaises(ValueError): + Level(method='b97d3', basis='def2tzvp', year=23) + level = Level(method='b97d3', basis='def2tzvp', year=2023) + self.assertEqual(level.year, 2023) + + def test_warn_if_year_set(self): + """Test that warn_if_year_set logs a warning when year is set.""" + level_with_year = Level(method='b97d3', basis='def2tzvp', year=2023) + with self.assertLogs('arc', level='WARNING') as cm: + level_with_year.warn_if_year_set('sp_level') + self.assertEqual(len(cm.output), 1) + self.assertIn('sp_level', cm.output[0]) + self.assertIn('has no effect', cm.output[0]) + + level_no_year = Level(method='b97d3', basis='def2tzvp') + with self.assertRaises(AssertionError): + with self.assertLogs('arc', level='WARNING'): + level_no_year.warn_if_year_set('opt_level') + def test_ess_methods_yml(self): """Test reading the ess_methods.yml file""" ess_methods = read_yaml_file(path=os.path.join(ARC_PATH, 'data', 'ess_methods.yml')) diff --git a/arc/main.py b/arc/main.py index 7c302a9892..01a26b0598 100644 --- a/arc/main.py +++ b/arc/main.py @@ -406,6 +406,7 @@ def __init__(self, self.job_types['opt'] = True # Run the optimizations, self.fine_only will make sure that they are fine. self.set_levels_of_theory() # All level of theories should be Level types after this call. + self._warn_year_on_non_arkane_levels() if self.thermo_adapter == 'arkane': self.check_arkane_level_of_theory() @@ -1155,6 +1156,17 @@ def process_level_of_theory(self): self.level_of_theory = '' # Reset the level_of_theory argument to avoid conflicts upon restarting ARC. + def _warn_year_on_non_arkane_levels(self): + """ + Warn if ``year`` was specified on any Level other than ``arkane_level_of_theory``. + The ``year`` attribute only affects Arkane database matching and is ignored elsewhere. + """ + for attr_name in ('sp_level', 'opt_level', 'freq_level', 'scan_level', 'irc_level', + 'conformer_opt_level', 'conformer_sp_level', 'orbitals_level'): + level = getattr(self, attr_name, None) + if isinstance(level, Level): + level.warn_if_year_set(attr_name) + def check_arkane_level_of_theory(self): """ Check that the level of theory has AEC in Arkane. diff --git a/arc/main_test.py b/arc/main_test.py index 8251079306..4034c55cd5 100644 --- a/arc/main_test.py +++ b/arc/main_test.py @@ -340,6 +340,24 @@ def test_determine_model_chemistry_for_job_types(self): self.assertEqual(arc14.freq_level.simple(), 'pm6') self.assertEqual(arc14.sp_level.simple(), 'amber') + # Test explicit year in arkane_level_of_theory dictionary + arc15 = ARC(project='test', + sp_level='wb97xd/def2tzvp', + opt_level='wb97xd/def2tzvp', + arkane_level_of_theory={'method': 'wb97xd', 'basis': 'def2tzvp', 'year': 2023}, + bac_type=None, + calc_freq_factor=False, compute_thermo=False) + self.assertEqual(arc15.arkane_level_of_theory.year, 2023) + + # Test warning when year is specified on sp_level instead of arkane_level_of_theory + arc16 = ARC(project='test', + sp_level={'method': 'wb97xd', 'basis': 'def2tzvp', 'year': 2023}, + opt_level='wb97xd/def2tzvp', + calc_freq_factor=False, compute_thermo=False) + with open(os.path.join(arc16.project_directory, 'arc.log'), 'r') as f: + log_content = f.read() + self.assertIn('"year" attribute on sp_level', log_content) + def test_determine_unique_species_labels(self): """Test the determine_unique_species_labels method""" spc0 = ARCSpecies(label='spc0', smiles='CC', compute_thermo=False) diff --git a/arc/statmech/arkane.py b/arc/statmech/arkane.py index d34de61129..173690a7e8 100644 --- a/arc/statmech/arkane.py +++ b/arc/statmech/arkane.py @@ -545,7 +545,8 @@ def run_arkane(statmech_dir: str) -> bool: ignorable_phrases = [ "Open Babel Warning", "Accepted unusual valence", - "==============================" + "==============================", + "pjrt_executable.cc", ] real_errors = [] @@ -653,14 +654,15 @@ def create_statmech_dir(calcs_directory: str, return statmech_dir -def _extract_section(file_path: str, section_start: str, section_end: str) -> Optional[str]: +def _extract_section(file_path: str, section_start: str, section_end: Optional[str] = None) -> Optional[str]: """ Extract a section from a file between section_start and section_end. Args: file_path (str): Path to the file to read. section_start (str): String marking the start of the section. - section_end (str): String marking the end of the section. + section_end (Optional[str]): String marking the end of the section. + If ``None``, reads to the end of the file. Returns: Optional[str]: Extracted section as string, or None if not found. @@ -672,35 +674,281 @@ def _extract_section(file_path: str, section_start: str, section_end: str) -> Op start_idx = text.find(section_start) if start_idx == -1: return None + if section_end is None: + return text[start_idx:] end_idx = text.find(section_end, start_idx + len(section_start)) if end_idx == -1: return None return text[start_idx:end_idx + len(section_end)] -def _section_contains_key(file_path: str, section_start: str, section_end: str, target: str) -> bool: + +def _get_qm_corrections_files() -> List[str]: + """ + Return quantum corrections data.py paths from the RMG database. """ - Check if the target string appears in a section with flexible attribute handling. + candidates = [ + os.path.join(RMG_DB_PATH, 'input', 'quantum_corrections', 'data.py'), + os.path.join(RMG_DB_PATH, 'quantum_corrections', 'data.py'), + ] + existing = [path for path in candidates if os.path.isfile(path)] + if not existing: + raise InputError( + "Could not locate Arkane quantum corrections data.py. " + f"Checked: {', '.join(candidates)}. " + "Please set RMG_DB_PATH to a valid RMG database." + ) + return existing - Args: - file_path (str): Path to the file. - section_start (str): Section start marker. - section_end (str): Section end marker. - target (str): String to search for. - Returns: - bool: True if target found, False otherwise. +def _normalize_name(name: Optional[str]) -> Optional[str]: + """ + Normalize a method or basis name for comparison: + - lowercase + - remove hyphens and spaces + + Examples: + "DLPNO-CCSD(T)-F12" -> "dlpnoccsd(t)f12" + "cc-pVTZ-F12" -> "ccpvtzf12" + """ + if name is None: + return None + return name.replace('-', '').replace(' ', '').lower() + + +def _split_method_year(method_norm: str) -> tuple: + """ + Split a normalized method into (base_method, year). + + Examples: + "dlpnoccsd(t)f122023" -> ("dlpnoccsd(t)f12", 2023) + "dlpnoccsd(t)f12" -> ("dlpnoccsd(t)f12", None) + """ + m = re.match(r"^(.*?)(\d{4})$", method_norm) + if not m: + return method_norm, None + base, year_str = m.groups() + return base, int(year_str) + + +def _parse_lot_params(lot_str: str) -> dict: + """ + Parse method, basis, and software from a LevelOfTheory(...) string. + + Example lot_str: + "LevelOfTheory(method='dlpnoccsd(t)f122023',basis='ccpvtzf12',software='orca')" + """ + params = {'method': None, 'basis': None, 'software': None} + for key in params.keys(): + m = re.search(rf"{key}='([^']+)'", lot_str) + if m: + params[key] = m.group(1) + return params + + +def _iter_level_keys_from_section(file_path: str, + section_start: str, + section_end: Optional[str] = None) -> List[str]: + """ + Return all LevelOfTheory(...) key strings that appear as dictionary keys + in a given section of data.py. + + These look like: + "LevelOfTheory(method='...',basis='...',software='...')" : { ... } """ section = _extract_section(file_path, section_start, section_end) if section is None: - return False - if target in section: # Check for exact match first - return True - if 'software=' in target: # Check for partial match without software - no_software = re.sub(r",\s*software='[^']*'", '', target) - if no_software in section: - return True - return False + return [] + + # Match things like: "LevelOfTheory(...)" : { ... } + pattern = r'"(LevelOfTheory\([^"]*\))"\s*:' + return re.findall(pattern, section, flags=re.DOTALL) + + +def _available_years_for_level(level: "Level", + file_path: str, + section_start: str, + section_end: Optional[str] = None) -> List[Optional[int]]: + """ + Return a sorted list of available year suffixes for a given Level in a section. + """ + if level is None or level.method is None: + return [] + + target_method_norm = _normalize_name(level.method) + target_base, _ = _split_method_year(target_method_norm) + target_basis_norm = _normalize_name(level.basis) + target_software = level.software.lower() if level.software else None + + years = set() + for lot_str in _iter_level_keys_from_section(file_path, section_start, section_end): + params = _parse_lot_params(lot_str) + cand_method = params.get('method') + cand_basis = params.get('basis') + cand_sw = params.get('software') + + if cand_method is None: + continue + + cand_method_norm = _normalize_name(cand_method) + cand_base, cand_year = _split_method_year(cand_method_norm) + + if cand_base != target_base: + continue + if target_basis_norm is not None: + cand_basis_norm = _normalize_name(cand_basis) + if cand_basis_norm != target_basis_norm: + continue + if target_software is not None and cand_sw is not None: + if cand_sw.lower() != target_software: + continue + + years.add(cand_year) + + # Sort with None first to represent "no year suffix" + return sorted(years, key=lambda y: (-1 if y is None else y)) + + +def _format_years(years: List[Optional[int]]) -> str: + """ + Format a list of years for logging. + """ + if not years: + return "none" + return ", ".join("none" if y is None else str(y) for y in years) + + +def _find_best_across_files(level: "Level", + qm_corr_files: List[str], + section_start: str, + section_end: Optional[str], + ) -> Optional[str]: + """ + Search all quantum-corrections files for the best matching LevelOfTheory key. + + Returns the first match found, preserving file priority order. + """ + for qm_corr_file in qm_corr_files: + result = _find_best_level_key_for_sp_level(level, qm_corr_file, section_start, section_end) + if result is not None: + return result + return None + + +def _all_available_years(level: "Level", + qm_corr_files: List[str], + section_start: str, + section_end: Optional[str], + ) -> List[Optional[int]]: + """ + Aggregate available year suffixes for a Level across all quantum-corrections files. + """ + years = set() + for qm_corr_file in qm_corr_files: + years.update(_available_years_for_level(level, qm_corr_file, section_start, section_end)) + return sorted(years, key=lambda y: (-1 if y is None else y)) + + +def _warn_no_match(level: "Level", + qm_corr_files: List[str], + section_start: str, + section_end: Optional[str], + label: str = "AEC", + ) -> None: + """ + Log a warning when no matching LevelOfTheory key was found, listing available years. + """ + years = _all_available_years(level, qm_corr_files, section_start, section_end) + if level.year is not None: + logger.warning( + f"No Arkane {label} entry found for year {level.year} at {level.simple()}; " + f"available years: {_format_years(years)}" + ) + elif years: + logger.warning( + f"No Arkane {label} entry found for {level.simple()} without a year; " + f"available years: {_format_years(years)}. " + f"Specify a year to select a matching entry." + ) + + +def _find_best_level_key_for_sp_level(level: "Level", + file_path: str, + section_start: str, + section_end: Optional[str] = None) -> Optional[str]: + """ + Given an ARC Level and a data.py section, find the LevelOfTheory(...) key string + that best matches the level's method/basis, allowing: + - hyphen-insensitive comparison + - an optional 4-digit year suffix in Arkane's method + and choose the *no-year* entry when no year is specified. + """ + if level is None or level.method is None: + return None + + target_method_norm = _normalize_name(level.method) + target_base, method_year = _split_method_year(target_method_norm) + explicit_year = level.year + if explicit_year is not None and method_year is not None and explicit_year != method_year: + raise InputError( + f"Conflicting year specifications for level '{level}': " + f"explicit year={explicit_year}, method suffix year={method_year}. " + "Please remove the year suffix from the method name or update the 'year' attribute to match." + ) + target_year = explicit_year if explicit_year is not None else method_year + target_basis_norm = _normalize_name(level.basis) + target_software = level.software.lower() if level.software else None + + best_key = None + best_year = None + + for lot_str in _iter_level_keys_from_section(file_path, section_start, section_end): + params = _parse_lot_params(lot_str) + cand_method = params.get('method') + cand_basis = params.get('basis') + cand_sw = params.get('software') + + if cand_method is None: + continue + + cand_method_norm = _normalize_name(cand_method) + cand_base, cand_year = _split_method_year(cand_method_norm) + + # method base must match + if cand_base != target_base: + continue + + # basis must match (normalized), if we have one + if target_basis_norm is not None: + cand_basis_norm = _normalize_name(cand_basis) + if cand_basis_norm != target_basis_norm: + continue + + # Software matching: if BOTH user and DB specify software, they must agree. + # If either side omits software the entry still passes, so a user without a + # software preference matches the first qualifying entry in file order. + if target_software is not None and cand_sw is not None: + if cand_sw.lower() != target_software: + continue + + # Exact year requested: take the first candidate with that year. + # Earlier filters already ensured method/basis/software match. + if target_year is not None: + if cand_year != target_year: + continue + best_key = lot_str + break + + # No target year specified: prefer no-year entry; if absent, pick latest year. + if cand_year is None: + best_key = lot_str + break + + if best_year is None or cand_year > best_year: + best_year = cand_year + best_key = lot_str + + return best_key def _level_to_str(level: 'Level') -> str: @@ -713,12 +961,16 @@ def _level_to_str(level: 'Level') -> str: Returns: str: LevelOfTheory string representation. """ - parts = [f"method='{level.method}'"] + method = _normalize_name(level.method) + if level.year is not None and not method.endswith(str(level.year)): + method = f"{method}{level.year}" + + parts = [f"method='{method}'"] if level.basis: - parts.append(f"basis='{level.basis}'") + parts.append(f"basis='{_normalize_name(level.basis)}'") if level.software: - parts.append(f"software='{level.software}'") - return f"LevelOfTheory({','.join(parts)})".replace('-','') + parts.append(f"software='{level.software.lower()}'") + return f"LevelOfTheory({','.join(parts)})".replace('-', '') def get_arkane_model_chemistry(sp_level: 'Level', @@ -728,6 +980,17 @@ def get_arkane_model_chemistry(sp_level: 'Level', """ Get Arkane model chemistry string with database validation. + Reads quantum_corrections/data.py as plain text, searches for + LevelOfTheory(...) keys, and matches: + - method: ignoring hyphens and optional 4-digit year suffix + - basis: ignoring hyphens and spaces + + When a year is explicitly specified in the Level, only entries with that exact + year are matched. If no year is specified and an entry without a year exists, + that entry is used. Only when no year is specified and no no-year entry exists, + if multiple entries differ only by year, the one with the *latest* year is + chosen. + Args: sp_level (Level): Level of theory for energy. freq_level (Optional[Level]): Level of theory for frequencies. @@ -736,52 +999,41 @@ def get_arkane_model_chemistry(sp_level: 'Level', Returns: Optional[str]: Arkane-compatible model chemistry string. """ - if sp_level.method_type == 'composite': - return f"LevelOfTheory(method='{sp_level.method}',software='gaussian')" - - qm_corr_file = os.path.join(RMG_DB_PATH, 'input', 'quantum_corrections', 'data.py') - if not os.path.isfile(qm_corr_file): - qm_corr_file = os.path.join(RMG_DB_PATH, 'quantum_corrections', 'data.py') - - atom_energies_start = "atom_energies = {" - atom_energies_end = "pbac = {" - freq_dict_start = "freq_dict = {" - freq_dict_end = "}" - - sp_repr = _level_to_str(sp_level) - quoted_sp_repr = f'"{sp_repr}"' - - if freq_scale_factor is not None: - found = _section_contains_key(file_path=qm_corr_file, - section_start=atom_energies_start, - section_end=atom_energies_end, - target=quoted_sp_repr) - if not found: + qm_corr_files = _get_qm_corrections_files() + + aec_start = "atom_energies = {" + aec_end = "pbac = {" + freq_start = "freq_dict = {" + freq_end = None # freq_dict is the last section in data.py; read to EOF + + # Composite methods and user-supplied freq_scale_factor both only need an AEC entry. + if sp_level.method_type == 'composite' or freq_scale_factor is not None: + best_energy = _find_best_across_files(sp_level, qm_corr_files, aec_start, aec_end) + if best_energy is None: + _warn_no_match(sp_level, qm_corr_files, aec_start, aec_end, label="AEC") return None - return sp_repr + return best_energy + # CompositeLevelOfTheory: need both energy (AEC) and frequency entries. if freq_level is None: raise ValueError("freq_level required when freq_scale_factor isn't provided") - freq_repr = _level_to_str(freq_level) - quoted_freq_repr = f'"{freq_repr}"' + best_energy = _find_best_across_files(sp_level, qm_corr_files, aec_start, aec_end) + best_freq = _find_best_across_files(freq_level, qm_corr_files, freq_start, freq_end) - found_sp = _section_contains_key(file_path=qm_corr_file, - section_start=atom_energies_start, - section_end=atom_energies_end, - target=quoted_sp_repr) - found_freq = _section_contains_key(file_path=qm_corr_file, - section_start=freq_dict_start, - section_end=freq_dict_end, - target=quoted_freq_repr) - - if not found_sp or not found_freq: + if best_energy is None or best_freq is None: + if best_energy is None: + _warn_no_match(sp_level, qm_corr_files, aec_start, aec_end, label="AEC") + if best_freq is None: + _warn_no_match(freq_level, qm_corr_files, freq_start, freq_end, label="frequency correction") return None - return (f"CompositeLevelOfTheory(\n" - f" freq={freq_repr},\n" - f" energy={sp_repr}\n" - f")") + return ( + "CompositeLevelOfTheory(\n" + f" freq={best_freq},\n" + f" energy={best_energy}\n" + ")" + ) def check_arkane_bacs(sp_level: 'Level', @@ -791,45 +1043,56 @@ def check_arkane_bacs(sp_level: 'Level', """ Check that Arkane has AECs and BACs for the given sp level of theory. + Uses plain-text parsing of quantum_corrections/data.py, matching LevelOfTheory + keys by method base (ignore hyphens + optional year) and basis (normalized), + picking the latest year where multiple exist. + Args: sp_level (Level): Level of theory for energy. - bac_type (str): Type of bond additivity correction ('p' for Petersson, 'm' for Melius) + bac_type (str): Type of bond additivity correction ('p' for Petersson, 'm' for Melius). raise_error (bool): Whether to raise an error if AECs or BACs are missing. Returns: bool: True if both AECs and BACs are available, False otherwise. """ - qm_corr_file = os.path.join(RMG_DB_PATH, 'input', 'quantum_corrections', 'data.py') - if not os.path.isfile(qm_corr_file): - qm_corr_file = os.path.join(RMG_DB_PATH, 'quantum_corrections', 'data.py') + qm_corr_files = _get_qm_corrections_files() - atom_energies_start = "atom_energies = {" - atom_energies_end = "pbac = {" + aec_start = "atom_energies = {" + aec_end = "pbac = {" if bac_type.lower() == 'm': - bac_section_start = "mbac = {" - bac_section_end = "freq_dict =" + bac_start = "mbac = {" + bac_end = "freq_dict =" else: - bac_section_start = "pbac = {" - bac_section_end = "mbac = {" + bac_start = "pbac = {" + bac_end = "mbac = {" - sp_repr = _level_to_str(sp_level) - quoted_sp_repr = f'"{sp_repr}"' + best_aec_key = _find_best_across_files(sp_level, qm_corr_files, aec_start, aec_end) + best_bac_key = _find_best_across_files(sp_level, qm_corr_files, bac_start, bac_end) - has_aec = _section_contains_key( - file_path=qm_corr_file, - section_start=atom_energies_start, - section_end=atom_energies_end, - target=quoted_sp_repr, - ) - has_bac = _section_contains_key( - file_path=qm_corr_file, - section_start=bac_section_start, - section_end=bac_section_end, - target=quoted_sp_repr, - ) + has_aec = best_aec_key is not None + has_bac = best_bac_key is not None has_encorr = bool(has_aec and has_bac) + if not has_encorr: - mssg = f"Arkane does not have the required energy corrections for {sp_repr} (AEC: {has_aec}, BAC: {has_bac})" + repr_level = best_aec_key if best_aec_key is not None else _level_to_str(sp_level) + year_note = "" + aec_years = _all_available_years(sp_level, qm_corr_files, aec_start, aec_end) + bac_years = _all_available_years(sp_level, qm_corr_files, bac_start, bac_end) + if sp_level.year is not None: + year_note = ( + f" Available AEC years: {_format_years(aec_years)}; " + f"available BAC years: {_format_years(bac_years)}." + ) + elif aec_years or bac_years: + year_note = ( + f" Available AEC years: {_format_years(aec_years)}; " + f"available BAC years: {_format_years(bac_years)}. " + f"Specify a year to select a matching entry." + ) + mssg = ( + f"Arkane does not have the required energy corrections for {repr_level} " + f"(AEC: {has_aec}, BAC: {has_bac}).{year_note}" + ) if raise_error: raise ValueError(mssg) else: diff --git a/arc/statmech/arkane_test.py b/arc/statmech/arkane_test.py index 83d357f00a..e5d84d193f 100644 --- a/arc/statmech/arkane_test.py +++ b/arc/statmech/arkane_test.py @@ -7,16 +7,31 @@ import os import shutil +import tempfile import unittest from arc.common import ARC_PATH, ARC_TESTING_PATH +from arc.exceptions import InputError from arc.level import Level from arc.reaction import ARCReaction from arc.species import ARCSpecies from arc.statmech.adapter import StatmechEnum from arc.statmech.arkane import ArkaneAdapter -from arc.statmech.arkane import _level_to_str, _section_contains_key, get_arkane_model_chemistry -from arc.imports import settings +from arc.statmech.arkane import ( + _all_available_years, + _available_years_for_level, + _extract_section, + _find_best_across_files, + _find_best_level_key_for_sp_level, + _get_qm_corrections_files, + _level_to_str, + _normalize_name, + _parse_lot_params, + _split_method_year, + _warn_no_match, + get_arkane_model_chemistry, +) +from unittest.mock import patch class TestEnumerationClasses(unittest.TestCase): @@ -134,26 +149,8 @@ def test_level_to_str(self): "LevelOfTheory(method='b3lyp',basis='631g(d)',software='gaussian')") self.assertEqual(_level_to_str(Level(method='CCSD(T)-F12', basis='cc-pVTZ-F12')), "LevelOfTheory(method='ccsd(t)f12',basis='ccpvtzf12',software='molpro')") - - def test_section_contains_key(self): - """Test the _section_contains_key function""" - rmg_db_path = settings.get('RMG_DB_PATH') - file_path = os.path.join(rmg_db_path, 'input', 'quantum_corrections', 'data.py') - if not os.path.isfile(file_path): - file_path = os.path.join(rmg_db_path, 'quantum_corrections', 'data.py') - self.assertTrue(os.path.isfile(file_path), f'RMG quantum corrections file not found at {file_path}') - self.assertTrue(_section_contains_key(file_path=file_path, - section_start="atom_energies = {", - section_end="pbac = {", - target="LevelOfTheory(method='b97d32023',basis='def2tzvp',software='gaussian')")) - self.assertTrue(_section_contains_key(file_path=file_path, - section_start="atom_energies = {", - section_end="pbac = {", - target="LevelOfTheory(method='ccsd(t)f12',basis='ccpvtzf12',software='molpro')")) - self.assertFalse(_section_contains_key(file_path=file_path, - section_start="atom_energies = {", - section_end="pbac = {", - target="LevelOfTheory(method=imaginary',basis='basis',software='ess')")) + self.assertEqual(_level_to_str(Level(method='b97d3', basis='def2tzvp', software='gaussian', year=2023)), + "LevelOfTheory(method='b97d32023',basis='def2tzvp',software='gaussian')") def test_get_arkane_model_chemistry(self): """Test the get_arkane_model_chemistry function""" @@ -162,7 +159,222 @@ def test_get_arkane_model_chemistry(self): "LevelOfTheory(method='ccsd(t)f12',basis='ccpvtzf12',software='molpro')") self.assertEqual(get_arkane_model_chemistry(sp_level=Level(method='CBS-QB3'), freq_scale_factor=1.0), - "LevelOfTheory(method='cbs-qb3',software='gaussian')") + "LevelOfTheory(method='cbsqb3',software='gaussian')") + + def test_get_arkane_model_chemistry_year_not_found(self): + """Test warnings when a requested year is not found in the Arkane database.""" + level = Level(method='b97d3', basis='def2tzvp', software='gaussian', year=2099) + with self.assertLogs('arc', level='WARNING') as cm: + model_chemistry = get_arkane_model_chemistry(sp_level=level, freq_scale_factor=1.0) + self.assertIsNone(model_chemistry) + self.assertTrue(any('available years' in msg for msg in cm.output)) + + def test_get_arkane_model_chemistry_latest_year(self): + """Test selecting the latest available year when no year is specified.""" + model_chemistry = get_arkane_model_chemistry(sp_level=Level(method='CBS-QB3'), + freq_scale_factor=1.0) + self.assertEqual(model_chemistry, "LevelOfTheory(method='cbsqb3',software='gaussian')") + + def test_level_helpers(self): + """Test helper functions for method/basis/year parsing.""" + self.assertEqual(_normalize_name("DLPNO-CCSD(T)-F12"), "dlpnoccsd(t)f12") + self.assertEqual(_normalize_name("dlpnoccsd(t)f122023"), "dlpnoccsd(t)f122023") + + base, year = _split_method_year("dlpnoccsd(t)f122023") + self.assertEqual(base, "dlpnoccsd(t)f12") + self.assertEqual(year, 2023) + base, year = _split_method_year("dlpnoccsd(t)f12") + self.assertEqual(base, "dlpnoccsd(t)f12") + self.assertIsNone(year) + + self.assertEqual(_normalize_name("cc-pVTZ-F12"), "ccpvtzf12") + self.assertEqual(_normalize_name("ccpvtz f12"), "ccpvtzf12") + self.assertIsNone(_normalize_name(None)) + + params = _parse_lot_params( + "LevelOfTheory(method='dlpnoccsd(t)f122023',basis='ccpvtzf12',software='orca')" + ) + self.assertEqual(params["method"], "dlpnoccsd(t)f122023") + self.assertEqual(params["basis"], "ccpvtzf12") + self.assertEqual(params["software"], "orca") + + def test_level_key_selection(self): + """Test matching of LevelOfTheory keys by year and no-year preference.""" + section = '\n'.join([ + 'atom_energies = {', + " \"LevelOfTheory(method='cbsqb3',software='gaussian')\": {},", + " \"LevelOfTheory(method='cbsqb32023',software='gaussian')\": {},", + "}", + "pbac = {", + ]) + with tempfile.NamedTemporaryFile(mode="w+", delete=False) as f: + f.write(section) + path = f.name + try: + level = Level(method="CBS-QB3", software="gaussian") + best = _find_best_level_key_for_sp_level(level, path, "atom_energies = {", "pbac = {") + self.assertEqual(best, "LevelOfTheory(method='cbsqb3',software='gaussian')") + + level_year = Level(method="CBS-QB3", software="gaussian", year=2023) + best_year = _find_best_level_key_for_sp_level(level_year, path, "atom_energies = {", "pbac = {") + self.assertEqual(best_year, "LevelOfTheory(method='cbsqb32023',software='gaussian')") + + years = _available_years_for_level(level, path, "atom_energies = {", "pbac = {") + self.assertEqual(years, [None, 2023]) + finally: + os.remove(path) + + def test_conflicting_year_spec(self): + """Test conflicting year in method suffix vs explicit year.""" + section = '\n'.join([ + 'atom_energies = {', + " \"LevelOfTheory(method='b97d32023',software='gaussian')\": {},", + "}", + "pbac = {", + ]) + with tempfile.NamedTemporaryFile(mode="w+", delete=False) as f: + f.write(section) + path = f.name + try: + level = Level(method="b97d32023", software="gaussian", year=2022) + with self.assertRaises(InputError): + _find_best_level_key_for_sp_level(level, path, "atom_energies = {", "pbac = {") + finally: + os.remove(path) + + def test_qm_corrections_file_path(self): + """Test quantum corrections files are read from the RMG database path.""" + with tempfile.TemporaryDirectory() as rmg_root: + rmg_qc = os.path.join(rmg_root, 'input', 'quantum_corrections', 'data.py') + os.makedirs(os.path.dirname(rmg_qc), exist_ok=True) + with open(rmg_qc, 'w') as f: + f.write('# rmg qc\n') + + with patch('arc.statmech.arkane.RMG_DB_PATH', rmg_root): + paths = _get_qm_corrections_files() + self.assertTrue(paths) + self.assertEqual(paths[0], rmg_qc) + + def test_get_arkane_model_chemistry_from_qm_file(self): + """Test reading LevelOfTheory keys from a quantum corrections file.""" + section = '\n'.join([ + 'atom_energies = {', + " \"LevelOfTheory(method='cbsqb3',software='gaussian')\": {},", + "}", + "pbac = {", + ]) + with tempfile.NamedTemporaryFile(mode="w+", delete=False) as f: + f.write(section) + path = f.name + try: + with patch('arc.statmech.arkane._get_qm_corrections_files', return_value=[path]): + model_chemistry = get_arkane_model_chemistry( + sp_level=Level(method='CBS-QB3'), + freq_scale_factor=1.0, + ) + self.assertEqual(model_chemistry, "LevelOfTheory(method='cbsqb3',software='gaussian')") + finally: + os.remove(path) + + def test_extract_section_eof(self): + """Test _extract_section with section_end=None reads to EOF.""" + content = "header\nfreq_dict = {\n key: val,\n}\ntrailer\n" + with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".py") as f: + f.write(content) + path = f.name + try: + section = _extract_section(path, "freq_dict = {", None) + self.assertIn("key: val", section) + self.assertIn("trailer", section) + # With an explicit end marker, trailer is excluded + section_bounded = _extract_section(path, "freq_dict = {", "}") + self.assertNotIn("trailer", section_bounded) + finally: + os.remove(path) + + def test_find_best_across_files(self): + """Test multi-file search returns first match without overwriting.""" + file1_content = '\n'.join([ + 'atom_energies = {', + " \"LevelOfTheory(method='b3lyp',basis='631g(d)',software='gaussian')\": {},", + '}', + 'pbac = {', + ]) + file2_content = '\n'.join([ + 'atom_energies = {', + " \"LevelOfTheory(method='wb97xd',basis='def2tzvp',software='gaussian')\": {},", + '}', + 'pbac = {', + ]) + with tempfile.NamedTemporaryFile(mode="w+", delete=False) as f1, \ + tempfile.NamedTemporaryFile(mode="w+", delete=False) as f2: + f1.write(file1_content) + f2.write(file2_content) + path1, path2 = f1.name, f2.name + try: + # b3lyp is only in file1 — should be found + level_b3 = Level(method='B3LYP', basis='6-31G(d)', software='gaussian') + result = _find_best_across_files(level_b3, [path1, path2], "atom_energies = {", "pbac = {") + self.assertIn("b3lyp", result) + # wb97xd is only in file2 — should still be found + level_wb = Level(method='wB97X-D', basis='def2-TZVP', software='gaussian') + result = _find_best_across_files(level_wb, [path1, path2], "atom_energies = {", "pbac = {") + self.assertIn("wb97xd", result) + # imaginary method — not in either file + level_fake = Level(method='fake', basis='fake') + result = _find_best_across_files(level_fake, [path1, path2], "atom_energies = {", "pbac = {") + self.assertIsNone(result) + finally: + os.remove(path1) + os.remove(path2) + + def test_all_available_years_aggregates(self): + """Test _all_available_years aggregates across files.""" + file1 = '\n'.join([ + 'atom_energies = {', + " \"LevelOfTheory(method='b97d3',basis='def2tzvp',software='gaussian')\": {},", + '}', + 'pbac = {', + ]) + file2 = '\n'.join([ + 'atom_energies = {', + " \"LevelOfTheory(method='b97d32023',basis='def2tzvp',software='gaussian')\": {},", + '}', + 'pbac = {', + ]) + with tempfile.NamedTemporaryFile(mode="w+", delete=False) as f1, \ + tempfile.NamedTemporaryFile(mode="w+", delete=False) as f2: + f1.write(file1) + f2.write(file2) + path1, path2 = f1.name, f2.name + try: + level = Level(method='b97d3', basis='def2tzvp', software='gaussian') + years = _all_available_years(level, [path1, path2], "atom_energies = {", "pbac = {") + self.assertIn(None, years) + self.assertIn(2023, years) + finally: + os.remove(path1) + os.remove(path2) + + def test_warn_no_match_logs(self): + """Test _warn_no_match emits a warning with available years.""" + file_content = '\n'.join([ + 'atom_energies = {', + " \"LevelOfTheory(method='b97d32023',basis='def2tzvp',software='gaussian')\": {},", + '}', + 'pbac = {', + ]) + with tempfile.NamedTemporaryFile(mode="w+", delete=False) as f: + f.write(file_content) + path = f.name + try: + level = Level(method='b97d3', basis='def2tzvp', software='gaussian', year=2099) + with self.assertLogs('arc', level='WARNING') as cm: + _warn_no_match(level, [path], "atom_energies = {", "pbac = {", label="AEC") + self.assertTrue(any('year 2099' in msg for msg in cm.output)) + self.assertTrue(any('2023' in msg for msg in cm.output)) + finally: + os.remove(path) def test_generate_arkane_input(self): """Test generating Arkane input""" diff --git a/docs/source/advanced.rst b/docs/source/advanced.rst index 74987b97e0..94e871cec9 100644 --- a/docs/source/advanced.rst +++ b/docs/source/advanced.rst @@ -95,9 +95,21 @@ Another example:: specifies ``DLPNO-CCSD(T)-F12/cc-pVTZ-F12`` model chemistry along with two auxiliary basis sets, ``aug-cc-pVTZ/C`` and ``cc-pVTZ-F12-CABS``, with ``TightOpt`` for a single point energy calculation. +You can also provide a 4-digit ``year`` on ``arkane_level_of_theory`` to distinguish method variants +in the Arkane database (e.g., ``b97d3`` vs ``b97d32023``):: + arkane_level_of_theory: + method: b97d3 + basis: def2tzvp + year: 2023 -THe following are examples for **equivalent** definitions:: +If ``year`` is omitted, ARC will prefer the no-year Arkane entry for that method/basis. If no entry +without a year exists, ARC will use the latest available year in the Arkane database. If no entry +exists at all (neither with nor without a year), ARC will warn the user and proceed without +atom energy or bond additivity corrections. + + +The following are examples for **equivalent** definitions:: opt_level = 'apfd/def2tzvp' opt_level = {'method': 'apfd', 'basis': 'def2tzvp'} @@ -138,6 +150,12 @@ is equivalent to:: scan_level = {'method': 'wb97xd', 'basis': 'def2svp'} sp_level = {'method': 'wb97xd', 'basis': 'def2svp'} +Note: Year suffixes in the method (e.g., ``wb97xd32023``) are meant for Arkane database matching +and are not valid QC methods. Do not include year suffixes in ``level_of_theory``; instead, specify a +``year`` key on ``arkane_level_of_theory`` if you need a specific atom or bond energy correction year. +See `here `_ +for all of Arkane's corrections. + Note: If ``level_of_theory`` does not contain any deliminator (neither ``//`` nor ``\/``), it is interpreted as a composite method. diff --git a/docs/source/examples.rst b/docs/source/examples.rst index 7c9b245cf4..e65addb3ea 100644 --- a/docs/source/examples.rst +++ b/docs/source/examples.rst @@ -84,6 +84,13 @@ To specify a composite method, simply define something like:: level_of_theory: CBS-QB3 +Note: Do not include year suffixes in ``level_of_theory`` (e.g., ``wb97xd32023``). Year suffixes are +for Arkane database matching only and are not valid QC methods. If you need a specific correction year, +specify a ``year`` key on ``arkane_level_of_theory``. + +If ``year`` is omitted, ARC will prefer the no-year Arkane entry for that method/basis; if none exists, +ARC will fall back to the latest available year in the Arkane database. + Note that for composite methods the ``freq_level`` and ``scan_level`` may have different default values than for non-composite methods (defined in settings.py). Note: an independent frequencies calculation job is automatically executed after a composite job just so that the Hamiltonian will diff --git a/functional/restart_test.py b/functional/restart_test.py index d49c2e945c..35594910b8 100644 --- a/functional/restart_test.py +++ b/functional/restart_test.py @@ -16,6 +16,14 @@ from arc.main import ARC +def _project_name(base: str) -> str: + """Return a per-xdist-worker project name to avoid parallel cleanup collisions.""" + worker_id = os.environ.get('PYTEST_XDIST_WORKER') + if worker_id: + return f'{base}_{worker_id}' + return base + + class TestRestart(unittest.TestCase): """ Contains unit tests for restarting ARC. @@ -36,7 +44,7 @@ def test_restart_thermo(self): """ restart_dir = os.path.join(ARC_PATH, 'arc', 'testing', 'restart', '1_restart_thermo') restart_path = os.path.join(restart_dir, 'restart.yml') - project = 'arc_project_for_testing_delete_after_usage_restart_thermo' + project = _project_name('arc_project_for_testing_delete_after_usage_restart_thermo') project_directory = os.path.join(ARC_PATH, 'Projects', project) os.makedirs(os.path.dirname(project_directory), exist_ok=True) shutil.copytree(os.path.join(restart_dir, 'calcs'), os.path.join(project_directory, 'calcs', 'Species'), dirs_exist_ok=True) @@ -55,7 +63,7 @@ def test_restart_thermo(self): break self.assertTrue(thermo_dft_ccsdtf12_bac) - with open(os.path.join(project_directory, 'arc_project_for_testing_delete_after_usage_restart_thermo.info'), 'r') as f: + with open(os.path.join(project_directory, f'{project}.info'), 'r') as f: sts, n2h3, oet, lot, ap = False, False, False, False, False for line in f.readlines(): if 'Considered the following species and TSs:' in line: @@ -66,7 +74,7 @@ def test_restart_thermo(self): oet = True elif 'Levels of theory used:' in line: lot = True - elif 'ARC project arc_project_for_testing_delete_after_usage_restart_thermo' in line: + elif f'ARC project {project}' in line: ap = True self.assertTrue(sts) self.assertTrue(n2h3) @@ -133,7 +141,7 @@ def test_restart_rate_1(self): """Test restarting ARC and attaining a reaction rate coefficient""" restart_dir = os.path.join(ARC_PATH, 'arc', 'testing', 'restart', '2_restart_rate') restart_path = os.path.join(restart_dir, 'restart.yml') - project = 'arc_project_for_testing_delete_after_usage_restart_rate_1' + project = _project_name('arc_project_for_testing_delete_after_usage_restart_rate_1') project_directory = os.path.join(ARC_PATH, 'Projects', project) os.makedirs(os.path.dirname(project_directory), exist_ok=True) shutil.copytree(os.path.join(restart_dir, 'calcs'), os.path.join(project_directory, 'calcs'), dirs_exist_ok=True) @@ -154,7 +162,7 @@ def test_restart_rate_1(self): def test_restart_rate_2(self): """Test restarting ARC and attaining a reaction rate coefficient""" - project = 'arc_project_for_testing_delete_after_usage_restart_rate_2' + project = _project_name('arc_project_for_testing_delete_after_usage_restart_rate_2') project_directory = os.path.join(ARC_PATH, 'Projects', project) base_path = os.path.join(ARC_PATH, 'arc', 'testing', 'restart', '5_TS1') restart_path = os.path.join(base_path, 'restart.yml') @@ -183,7 +191,7 @@ def test_restart_bde (self): """Test restarting ARC and attaining a BDE for anilino_radical.""" restart_dir = os.path.join(ARC_PATH, 'arc', 'testing', 'restart', '3_restart_bde') restart_path = os.path.join(restart_dir, 'restart.yml') - project = 'test_restart_bde' + project = _project_name('test_restart_bde') project_directory = os.path.join(ARC_PATH, 'Projects', project) os.makedirs(os.path.dirname(project_directory), exist_ok=True) shutil.copytree(os.path.join(restart_dir, 'calcs'), os.path.join(project_directory, 'calcs'), dirs_exist_ok=True) @@ -192,7 +200,7 @@ def test_restart_bde (self): arc1 = ARC(**input_dict) arc1.execute() - report_path = os.path.join(ARC_PATH, 'Projects', 'test_restart_bde', 'output', 'BDE_report.txt') + report_path = os.path.join(ARC_PATH, 'Projects', project, 'output', 'BDE_report.txt') with open(report_path, 'r') as f: lines = f.readlines() self.assertIn(' BDE report for anilino_radical:\n', lines) @@ -218,10 +226,10 @@ def tearDownClass(cls): A function that is run ONCE after all unit tests in this class. Delete all project directories created during these unit tests """ - projects = ['arc_project_for_testing_delete_after_usage_restart_thermo', - 'arc_project_for_testing_delete_after_usage_restart_rate_1', - 'arc_project_for_testing_delete_after_usage_restart_rate_2', - 'test_restart_bde', + projects = [_project_name('arc_project_for_testing_delete_after_usage_restart_thermo'), + _project_name('arc_project_for_testing_delete_after_usage_restart_rate_1'), + _project_name('arc_project_for_testing_delete_after_usage_restart_rate_2'), + _project_name('test_restart_bde'), ] for project in projects: project_directory = os.path.join(ARC_PATH, 'Projects', project) @@ -244,3 +252,4 @@ def tearDownClass(cls): if __name__ == '__main__': unittest.main(testRunner=unittest.TextTestRunner(verbosity=2)) + diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000..35219b8842 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,8 @@ +[build-system] +requires = [ + "setuptools>=64", + "wheel", + "Cython", + "numpy", +] +build-backend = "setuptools.build_meta"