|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
| 3 | +############################################################################### |
| 4 | +# # |
| 5 | +# RMG - Reaction Mechanism Generator # |
| 6 | +# # |
| 7 | +# Copyright (c) 2002-2023 Prof. William H. Green (whgreen@mit.edu), # |
| 8 | +# Prof. Richard H. West (r.west@neu.edu) and the RMG Team (rmg_dev@mit.edu) # |
| 9 | +# # |
| 10 | +# Permission is hereby granted, free of charge, to any person obtaining a # |
| 11 | +# copy of this software and associated documentation files (the 'Software'), # |
| 12 | +# to deal in the Software without restriction, including without limitation # |
| 13 | +# the rights to use, copy, modify, merge, publish, distribute, sublicense, # |
| 14 | +# and/or sell copies of the Software, and to permit persons to whom the # |
| 15 | +# Software is furnished to do so, subject to the following conditions: # |
| 16 | +# # |
| 17 | +# The above copyright notice and this permission notice shall be included in # |
| 18 | +# all copies or substantial portions of the Software. # |
| 19 | +# # |
| 20 | +# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # |
| 21 | +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # |
| 22 | +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # |
| 23 | +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # |
| 24 | +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # |
| 25 | +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # |
| 26 | +# DEALINGS IN THE SOFTWARE. # |
| 27 | +# # |
| 28 | +############################################################################### |
| 29 | + |
| 30 | +""" |
| 31 | +Arkane ESS adapter for xTB (extended Tight Binding) output files. |
| 32 | +Supports geometry (V2000 and $coord/Turbomol formats), energy, frequencies, |
| 33 | +ZPE, and conformer loading. |
| 34 | +""" |
| 35 | + |
| 36 | +import logging |
| 37 | +import re |
| 38 | + |
| 39 | +import numpy as np |
| 40 | + |
| 41 | +import rmgpy.constants as constants |
| 42 | +from rmgpy.statmech import ( |
| 43 | + Conformer, |
| 44 | + HarmonicOscillator, |
| 45 | + IdealGasTranslation, |
| 46 | + LinearRotor, |
| 47 | + NonlinearRotor, |
| 48 | +) |
| 49 | + |
| 50 | +from arkane.common import ( |
| 51 | + get_element_mass, |
| 52 | + get_principal_moments_of_inertia, |
| 53 | + symbol_by_number, |
| 54 | +) |
| 55 | +from arkane.exceptions import LogError |
| 56 | +from arkane.ess.adapter import ESSAdapter |
| 57 | +from arkane.ess.factory import register_ess_adapter |
| 58 | + |
| 59 | +logger = logging.getLogger(__name__) |
| 60 | + |
| 61 | +BOHR_TO_ANGSTROM = constants.a0 * 1e10 # ~0.52918 Angstrom |
| 62 | +INERTIA_ZERO_TOL = 1e-4 # amu*angstrom^2 |
| 63 | + |
| 64 | + |
| 65 | +def _normalize_symbol(raw: str) -> str: |
| 66 | + """Normalize an element symbol from xTB output (e.g. 'c' -> 'C', 'cl' -> 'Cl').""" |
| 67 | + if len(raw) == 1: |
| 68 | + return raw.upper() |
| 69 | + return raw[0].upper() + raw[1:].lower() |
| 70 | + |
| 71 | + |
| 72 | +class XTBLog(ESSAdapter): |
| 73 | + """ |
| 74 | + Arkane ESS adapter for xTB output files. |
| 75 | +
|
| 76 | + Parses output from the xtb program (https://github.com/grimme-lab/xtb), |
| 77 | + including geometry, electronic energy, vibrational frequencies, and |
| 78 | + thermochemical data. |
| 79 | +
|
| 80 | + Supported geometry formats: |
| 81 | + - V2000 (SDF/Molfile) block from ``--opt`` outputs |
| 82 | + - Turbomol ``$coord`` block (Bohr) from ``--opt`` outputs |
| 83 | + Frequency-only (``--hess``) outputs do not embed a geometry. |
| 84 | + """ |
| 85 | + |
| 86 | + def check_for_errors(self): |
| 87 | + """Check for common xTB errors in the output file.""" |
| 88 | + with open(self.path, 'r') as f: |
| 89 | + for line in f: |
| 90 | + if 'ERROR' in line and 'SETUP' not in line: |
| 91 | + raise LogError(f'xTB error found in {self.path}: {line.strip()}') |
| 92 | + if 'abnormal termination' in line.lower(): |
| 93 | + raise LogError(f'xTB job terminated abnormally in {self.path}') |
| 94 | + |
| 95 | + def get_number_of_atoms(self) -> int: |
| 96 | + """ |
| 97 | + Return the number of atoms from the xTB output. |
| 98 | +
|
| 99 | + Tries the explicit 'number of atoms' line first, then falls |
| 100 | + back to counting atoms in the geometry block. |
| 101 | + """ |
| 102 | + with open(self.path, 'r') as f: |
| 103 | + for line in f: |
| 104 | + if 'number of atoms' in line: |
| 105 | + return int(line.split()[-1]) |
| 106 | + try: |
| 107 | + coord, _, _ = self.load_geometry() |
| 108 | + return len(coord) |
| 109 | + except LogError: |
| 110 | + pass |
| 111 | + raise LogError(f'Could not determine the number of atoms in {self.path}') |
| 112 | + |
| 113 | + def load_geometry(self): |
| 114 | + """ |
| 115 | + Load the molecular geometry from the xTB output file. |
| 116 | +
|
| 117 | + Supports two formats found in xTB ``--opt`` outputs: |
| 118 | + - **V2000 (SDF/Molfile)**: Cartesian coordinates in Angstroms. |
| 119 | + - **Turbomol ``$coord``**: Cartesian coordinates in Bohr (converted to Angstroms). |
| 120 | +
|
| 121 | + Frequency-only (``--hess``) outputs do not embed a geometry and will raise. |
| 122 | +
|
| 123 | + Returns: |
| 124 | + Tuple of (coord, number, mass): |
| 125 | + coord: np.ndarray (n, 3) in Angstroms |
| 126 | + number: np.ndarray (n,) atomic numbers |
| 127 | + mass: np.ndarray (n,) atomic masses in amu |
| 128 | + """ |
| 129 | + with open(self.path, 'r') as f: |
| 130 | + lines = f.readlines() |
| 131 | + coord, number, mass = self._parse_v2000(lines) |
| 132 | + if not coord: |
| 133 | + coord, number, mass = self._parse_turbomol_coord(lines) |
| 134 | + if not coord: |
| 135 | + raise LogError(f'Could not find geometry in {self.path}') |
| 136 | + return (np.array(coord, dtype=np.float64), |
| 137 | + np.array(number, dtype=int), |
| 138 | + np.array(mass, dtype=np.float64)) |
| 139 | + |
| 140 | + @staticmethod |
| 141 | + def _parse_v2000(lines): |
| 142 | + """Parse V2000 (SDF/Molfile) geometry block. Returns (coord, number, mass) or empty lists.""" |
| 143 | + coord, number, mass = [], [], [] |
| 144 | + for i, line in enumerate(lines): |
| 145 | + if 'V2000' in line: |
| 146 | + n_atoms = int(line.split()[0]) |
| 147 | + coord, number, mass = [], [], [] |
| 148 | + for j in range(i + 1, i + 1 + n_atoms): |
| 149 | + tokens = lines[j].split() |
| 150 | + x, y, z = float(tokens[0]), float(tokens[1]), float(tokens[2]) |
| 151 | + symbol = tokens[3] |
| 152 | + coord.append([x, y, z]) |
| 153 | + mass_i, num_i = get_element_mass(symbol) |
| 154 | + number.append(num_i) |
| 155 | + mass.append(mass_i) |
| 156 | + return coord, number, mass |
| 157 | + |
| 158 | + @staticmethod |
| 159 | + def _parse_turbomol_coord(lines): |
| 160 | + """Parse Turbomol $coord block (Bohr). Converts to Angstroms.""" |
| 161 | + coord, number, mass = [], [], [] |
| 162 | + in_coord = False |
| 163 | + for line in lines: |
| 164 | + stripped = line.strip() |
| 165 | + if stripped == '$coord': |
| 166 | + in_coord = True |
| 167 | + coord, number, mass = [], [], [] |
| 168 | + continue |
| 169 | + if in_coord: |
| 170 | + if stripped.startswith('$'): |
| 171 | + break |
| 172 | + tokens = stripped.split() |
| 173 | + if len(tokens) >= 4: |
| 174 | + try: |
| 175 | + x = float(tokens[0]) * BOHR_TO_ANGSTROM |
| 176 | + y = float(tokens[1]) * BOHR_TO_ANGSTROM |
| 177 | + z = float(tokens[2]) * BOHR_TO_ANGSTROM |
| 178 | + symbol = _normalize_symbol(tokens[3]) |
| 179 | + coord.append([x, y, z]) |
| 180 | + mass_i, num_i = get_element_mass(symbol) |
| 181 | + number.append(num_i) |
| 182 | + mass.append(mass_i) |
| 183 | + except (ValueError, KeyError): |
| 184 | + break |
| 185 | + return coord, number, mass |
| 186 | + |
| 187 | + def load_energy(self, zpe_scale_factor=1.): |
| 188 | + """ |
| 189 | + Load the electronic energy in J/mol from the xTB output. |
| 190 | +
|
| 191 | + Returns the last ``total energy`` value found. The zero-point energy |
| 192 | + is NOT included. |
| 193 | + """ |
| 194 | + e_elect = None |
| 195 | + with open(self.path, 'r') as f: |
| 196 | + for line in f: |
| 197 | + if ':: total energy' in line: |
| 198 | + match = re.search(r'(-?\d+\.\d+)\s+Eh', line) |
| 199 | + if match: |
| 200 | + e_elect = float(match.group(1)) |
| 201 | + elif 'TOTAL ENERGY' in line and 'Eh' in line: |
| 202 | + match = re.search(r'(-?\d+\.\d+)\s+Eh', line) |
| 203 | + if match: |
| 204 | + e_elect = float(match.group(1)) |
| 205 | + if e_elect is None: |
| 206 | + raise LogError(f'Unable to find energy in xTB output file {self.path}') |
| 207 | + return e_elect * constants.E_h * constants.Na |
| 208 | + |
| 209 | + def load_zero_point_energy(self): |
| 210 | + """ |
| 211 | + Load the zero-point energy in J/mol from the xTB output. |
| 212 | + Only available in frequency (``--hess``) calculations. |
| 213 | + """ |
| 214 | + zpe = None |
| 215 | + with open(self.path, 'r') as f: |
| 216 | + for line in f: |
| 217 | + if ':: zero point energy' in line or 'zero-point vibrational energy' in line.lower(): |
| 218 | + match = re.search(r'(\d+\.\d+)\s+Eh', line) |
| 219 | + if match: |
| 220 | + zpe = float(match.group(1)) |
| 221 | + if zpe is None: |
| 222 | + raise LogError(f'Unable to find zero-point energy in xTB output file {self.path}') |
| 223 | + return zpe * constants.E_h * constants.Na |
| 224 | + |
| 225 | + def _load_all_frequencies(self): |
| 226 | + """ |
| 227 | + Load ALL vibrational frequencies from the last eigval block in the xTB output, |
| 228 | + including near-zero (translational/rotational) and negative (imaginary) modes. |
| 229 | +
|
| 230 | + xTB prints frequencies twice (after Hessian and in Frequency Printout). |
| 231 | + Returns the last complete set. |
| 232 | + """ |
| 233 | + all_blocks, current_block = [], [] |
| 234 | + with open(self.path, 'r') as f: |
| 235 | + for line in f: |
| 236 | + if line.strip().startswith('eigval :'): |
| 237 | + values = line.split(':')[1].split() |
| 238 | + current_block.extend(float(v) for v in values) |
| 239 | + elif current_block and not line.strip().startswith('eigval'): |
| 240 | + all_blocks.append(current_block) |
| 241 | + current_block = [] |
| 242 | + if current_block: |
| 243 | + all_blocks.append(current_block) |
| 244 | + return all_blocks[-1] if all_blocks else [] |
| 245 | + |
| 246 | + def _load_frequencies(self): |
| 247 | + """ |
| 248 | + Load positive (real) vibrational frequencies in cm^-1. |
| 249 | + Filters out translational/rotational modes (near-zero) and imaginary (negative). |
| 250 | + """ |
| 251 | + return [f for f in self._load_all_frequencies() if f > 0.1] |
| 252 | + |
| 253 | + def _load_spin_multiplicity(self): |
| 254 | + """ |
| 255 | + Determine spin multiplicity from the xTB output. |
| 256 | +
|
| 257 | + xTB reports ``spin`` as S (not 2S+1), so multiplicity = 2*S + 1. |
| 258 | + Some xTB versions use ``--uhf N`` where N = number of unpaired electrons. |
| 259 | + Default to singlet. |
| 260 | + """ |
| 261 | + with open(self.path, 'r') as f: |
| 262 | + for line in f: |
| 263 | + # Format: " spin : 0.5" |
| 264 | + if 'spin' in line: |
| 265 | + parts = line.split() |
| 266 | + if parts and parts[0] == 'spin': |
| 267 | + s = float(parts[-1]) |
| 268 | + return int(2 * s + 1) |
| 269 | + # Also check for unpaired electrons in setup block |
| 270 | + if 'unpaired' in line and ':' in line: |
| 271 | + parts = line.split(':') |
| 272 | + try: |
| 273 | + n_unpaired = int(parts[-1].strip()) |
| 274 | + return n_unpaired + 1 |
| 275 | + except ValueError: |
| 276 | + pass |
| 277 | + return 1 |
| 278 | + |
| 279 | + def load_conformer(self, symmetry=None, spin_multiplicity=0, optical_isomers=None, label=''): |
| 280 | + """ |
| 281 | + Load the molecular degree of freedom data from an xTB output file. |
| 282 | +
|
| 283 | + Requires geometry to be available in the log file (optimization outputs). |
| 284 | + For frequency-only outputs, geometry must be loaded from a separate file. |
| 285 | +
|
| 286 | + Returns: |
| 287 | + Tuple of (Conformer, unscaled_frequencies). |
| 288 | + """ |
| 289 | + if optical_isomers is None or symmetry is None: |
| 290 | + _optical_isomers, _symmetry, _ = self.get_symmetry_properties() |
| 291 | + if optical_isomers is None: |
| 292 | + optical_isomers = _optical_isomers |
| 293 | + if symmetry is None: |
| 294 | + symmetry = _symmetry |
| 295 | + |
| 296 | + if spin_multiplicity == 0: |
| 297 | + spin_multiplicity = self._load_spin_multiplicity() |
| 298 | + |
| 299 | + unscaled_frequencies = self._load_frequencies() |
| 300 | + modes = [] |
| 301 | + |
| 302 | + # Translation |
| 303 | + coord, number, mass = self.load_geometry() |
| 304 | + modes.append(IdealGasTranslation(mass=(sum(mass), "amu"))) |
| 305 | + |
| 306 | + # Rotation — use tolerance for linear molecule detection |
| 307 | + symbols = [symbol_by_number[i] for i in number] |
| 308 | + inertia = list(get_principal_moments_of_inertia(coord, numbers=number, symbols=symbols)[0]) |
| 309 | + if inertia and not all(abs(i) < INERTIA_ZERO_TOL for i in inertia): |
| 310 | + nonzero = [i for i in inertia if abs(i) >= INERTIA_ZERO_TOL] |
| 311 | + if len(nonzero) < len(inertia): |
| 312 | + # Linear molecule: one or more moments are ~zero |
| 313 | + modes.append(LinearRotor(inertia=(max(nonzero), "amu*angstrom^2"), symmetry=symmetry)) |
| 314 | + else: |
| 315 | + modes.append(NonlinearRotor(inertia=(nonzero, "amu*angstrom^2"), symmetry=symmetry)) |
| 316 | + |
| 317 | + # Vibration |
| 318 | + if unscaled_frequencies: |
| 319 | + modes.append(HarmonicOscillator(frequencies=(unscaled_frequencies, "cm^-1"))) |
| 320 | + |
| 321 | + return Conformer(E0=(0.0, "kJ/mol"), |
| 322 | + modes=modes, |
| 323 | + spin_multiplicity=spin_multiplicity, |
| 324 | + optical_isomers=optical_isomers), unscaled_frequencies |
| 325 | + |
| 326 | + def load_negative_frequency(self): |
| 327 | + """Load the first imaginary (negative) frequency in cm^-1 for a transition state.""" |
| 328 | + neg_freqs = [f for f in self._load_all_frequencies() if f < -0.1] |
| 329 | + if not neg_freqs: |
| 330 | + raise LogError(f'No imaginary frequencies found in {self.path}') |
| 331 | + return neg_freqs[0] |
| 332 | + |
| 333 | + def load_force_constant_matrix(self): |
| 334 | + """xTB writes a separate hessian file; not parsed from the main output.""" |
| 335 | + return None |
| 336 | + |
| 337 | + def load_scan_energies(self): |
| 338 | + raise NotImplementedError('Rotor scans are not supported by the xTB adapter.') |
| 339 | + |
| 340 | + def load_scan_pivot_atoms(self): |
| 341 | + raise NotImplementedError('Rotor scans are not supported by the xTB adapter.') |
| 342 | + |
| 343 | + def load_scan_frozen_atoms(self): |
| 344 | + raise NotImplementedError('Rotor scans are not supported by the xTB adapter.') |
| 345 | + |
| 346 | + |
| 347 | +register_ess_adapter('XTBLog', XTBLog) |
0 commit comments