Skip to content

Commit 35a641d

Browse files
committed
Added the Arkane xTB ESS adapter
1 parent 7e86c4c commit 35a641d

File tree

3 files changed

+352
-1
lines changed

3 files changed

+352
-1
lines changed

arkane/ess/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,4 @@
3939
from arkane.ess.psi4_parser import Psi4Log
4040
from arkane.ess.qchem import QChemLog
4141
from arkane.ess.terachem import TeraChemLog
42+
from arkane.ess.xtb import XTBLog

arkane/ess/factory.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,12 @@ def ess_factory(fullpath: str,
102102
elif 'terachem' in line:
103103
ess_name = 'TeraChemLog'
104104
break
105+
elif 'x t b' in line or 'xtb version' in line:
106+
ess_name = 'XTBLog'
107+
break
105108
line = f.readline().lower()
106109
if ess_name is None:
107110
raise InputError(f'The file at {fullpath} could not be identified as a '
108-
f'Gaussian, Molpro, Orca, Psi4, QChem, or TeraChem log file.')
111+
f'Gaussian, Molpro, Orca, Psi4, QChem, TeraChem, or xTB log file.')
109112

110113
return _registered_ess_adapters[ess_name](path=fullpath, check_for_errors=check_for_errors, scratch_directory=scratch_directory)

arkane/ess/xtb.py

Lines changed: 347 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,347 @@
1+
#!/usr/bin/env python3
2+
3+
###############################################################################
4+
# #
5+
# RMG - Reaction Mechanism Generator #
6+
# #
7+
# Copyright (c) 2002-2023 Prof. William H. Green (whgreen@mit.edu), #
8+
# Prof. Richard H. West (r.west@neu.edu) and the RMG Team (rmg_dev@mit.edu) #
9+
# #
10+
# Permission is hereby granted, free of charge, to any person obtaining a #
11+
# copy of this software and associated documentation files (the 'Software'), #
12+
# to deal in the Software without restriction, including without limitation #
13+
# the rights to use, copy, modify, merge, publish, distribute, sublicense, #
14+
# and/or sell copies of the Software, and to permit persons to whom the #
15+
# Software is furnished to do so, subject to the following conditions: #
16+
# #
17+
# The above copyright notice and this permission notice shall be included in #
18+
# all copies or substantial portions of the Software. #
19+
# #
20+
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR #
21+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, #
22+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #
23+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #
24+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING #
25+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER #
26+
# DEALINGS IN THE SOFTWARE. #
27+
# #
28+
###############################################################################
29+
30+
"""
31+
Arkane ESS adapter for xTB (extended Tight Binding) output files.
32+
Supports geometry (V2000 and $coord/Turbomol formats), energy, frequencies,
33+
ZPE, and conformer loading.
34+
"""
35+
36+
import logging
37+
import re
38+
39+
import numpy as np
40+
41+
import rmgpy.constants as constants
42+
from rmgpy.statmech import (
43+
Conformer,
44+
HarmonicOscillator,
45+
IdealGasTranslation,
46+
LinearRotor,
47+
NonlinearRotor,
48+
)
49+
50+
from arkane.common import (
51+
get_element_mass,
52+
get_principal_moments_of_inertia,
53+
symbol_by_number,
54+
)
55+
from arkane.exceptions import LogError
56+
from arkane.ess.adapter import ESSAdapter
57+
from arkane.ess.factory import register_ess_adapter
58+
59+
logger = logging.getLogger(__name__)
60+
61+
BOHR_TO_ANGSTROM = constants.a0 * 1e10 # ~0.52918 Angstrom
62+
INERTIA_ZERO_TOL = 1e-4 # amu*angstrom^2
63+
64+
65+
def _normalize_symbol(raw: str) -> str:
66+
"""Normalize an element symbol from xTB output (e.g. 'c' -> 'C', 'cl' -> 'Cl')."""
67+
if len(raw) == 1:
68+
return raw.upper()
69+
return raw[0].upper() + raw[1:].lower()
70+
71+
72+
class XTBLog(ESSAdapter):
73+
"""
74+
Arkane ESS adapter for xTB output files.
75+
76+
Parses output from the xtb program (https://github.com/grimme-lab/xtb),
77+
including geometry, electronic energy, vibrational frequencies, and
78+
thermochemical data.
79+
80+
Supported geometry formats:
81+
- V2000 (SDF/Molfile) block from ``--opt`` outputs
82+
- Turbomol ``$coord`` block (Bohr) from ``--opt`` outputs
83+
Frequency-only (``--hess``) outputs do not embed a geometry.
84+
"""
85+
86+
def check_for_errors(self):
87+
"""Check for common xTB errors in the output file."""
88+
with open(self.path, 'r') as f:
89+
for line in f:
90+
if 'ERROR' in line and 'SETUP' not in line:
91+
raise LogError(f'xTB error found in {self.path}: {line.strip()}')
92+
if 'abnormal termination' in line.lower():
93+
raise LogError(f'xTB job terminated abnormally in {self.path}')
94+
95+
def get_number_of_atoms(self) -> int:
96+
"""
97+
Return the number of atoms from the xTB output.
98+
99+
Tries the explicit 'number of atoms' line first, then falls
100+
back to counting atoms in the geometry block.
101+
"""
102+
with open(self.path, 'r') as f:
103+
for line in f:
104+
if 'number of atoms' in line:
105+
return int(line.split()[-1])
106+
try:
107+
coord, _, _ = self.load_geometry()
108+
return len(coord)
109+
except LogError:
110+
pass
111+
raise LogError(f'Could not determine the number of atoms in {self.path}')
112+
113+
def load_geometry(self):
114+
"""
115+
Load the molecular geometry from the xTB output file.
116+
117+
Supports two formats found in xTB ``--opt`` outputs:
118+
- **V2000 (SDF/Molfile)**: Cartesian coordinates in Angstroms.
119+
- **Turbomol ``$coord``**: Cartesian coordinates in Bohr (converted to Angstroms).
120+
121+
Frequency-only (``--hess``) outputs do not embed a geometry and will raise.
122+
123+
Returns:
124+
Tuple of (coord, number, mass):
125+
coord: np.ndarray (n, 3) in Angstroms
126+
number: np.ndarray (n,) atomic numbers
127+
mass: np.ndarray (n,) atomic masses in amu
128+
"""
129+
with open(self.path, 'r') as f:
130+
lines = f.readlines()
131+
coord, number, mass = self._parse_v2000(lines)
132+
if not coord:
133+
coord, number, mass = self._parse_turbomol_coord(lines)
134+
if not coord:
135+
raise LogError(f'Could not find geometry in {self.path}')
136+
return (np.array(coord, dtype=np.float64),
137+
np.array(number, dtype=int),
138+
np.array(mass, dtype=np.float64))
139+
140+
@staticmethod
141+
def _parse_v2000(lines):
142+
"""Parse V2000 (SDF/Molfile) geometry block. Returns (coord, number, mass) or empty lists."""
143+
coord, number, mass = [], [], []
144+
for i, line in enumerate(lines):
145+
if 'V2000' in line:
146+
n_atoms = int(line.split()[0])
147+
coord, number, mass = [], [], []
148+
for j in range(i + 1, i + 1 + n_atoms):
149+
tokens = lines[j].split()
150+
x, y, z = float(tokens[0]), float(tokens[1]), float(tokens[2])
151+
symbol = tokens[3]
152+
coord.append([x, y, z])
153+
mass_i, num_i = get_element_mass(symbol)
154+
number.append(num_i)
155+
mass.append(mass_i)
156+
return coord, number, mass
157+
158+
@staticmethod
159+
def _parse_turbomol_coord(lines):
160+
"""Parse Turbomol $coord block (Bohr). Converts to Angstroms."""
161+
coord, number, mass = [], [], []
162+
in_coord = False
163+
for line in lines:
164+
stripped = line.strip()
165+
if stripped == '$coord':
166+
in_coord = True
167+
coord, number, mass = [], [], []
168+
continue
169+
if in_coord:
170+
if stripped.startswith('$'):
171+
break
172+
tokens = stripped.split()
173+
if len(tokens) >= 4:
174+
try:
175+
x = float(tokens[0]) * BOHR_TO_ANGSTROM
176+
y = float(tokens[1]) * BOHR_TO_ANGSTROM
177+
z = float(tokens[2]) * BOHR_TO_ANGSTROM
178+
symbol = _normalize_symbol(tokens[3])
179+
coord.append([x, y, z])
180+
mass_i, num_i = get_element_mass(symbol)
181+
number.append(num_i)
182+
mass.append(mass_i)
183+
except (ValueError, KeyError):
184+
break
185+
return coord, number, mass
186+
187+
def load_energy(self, zpe_scale_factor=1.):
188+
"""
189+
Load the electronic energy in J/mol from the xTB output.
190+
191+
Returns the last ``total energy`` value found. The zero-point energy
192+
is NOT included.
193+
"""
194+
e_elect = None
195+
with open(self.path, 'r') as f:
196+
for line in f:
197+
if ':: total energy' in line:
198+
match = re.search(r'(-?\d+\.\d+)\s+Eh', line)
199+
if match:
200+
e_elect = float(match.group(1))
201+
elif 'TOTAL ENERGY' in line and 'Eh' in line:
202+
match = re.search(r'(-?\d+\.\d+)\s+Eh', line)
203+
if match:
204+
e_elect = float(match.group(1))
205+
if e_elect is None:
206+
raise LogError(f'Unable to find energy in xTB output file {self.path}')
207+
return e_elect * constants.E_h * constants.Na
208+
209+
def load_zero_point_energy(self):
210+
"""
211+
Load the zero-point energy in J/mol from the xTB output.
212+
Only available in frequency (``--hess``) calculations.
213+
"""
214+
zpe = None
215+
with open(self.path, 'r') as f:
216+
for line in f:
217+
if ':: zero point energy' in line or 'zero-point vibrational energy' in line.lower():
218+
match = re.search(r'(\d+\.\d+)\s+Eh', line)
219+
if match:
220+
zpe = float(match.group(1))
221+
if zpe is None:
222+
raise LogError(f'Unable to find zero-point energy in xTB output file {self.path}')
223+
return zpe * constants.E_h * constants.Na
224+
225+
def _load_all_frequencies(self):
226+
"""
227+
Load ALL vibrational frequencies from the last eigval block in the xTB output,
228+
including near-zero (translational/rotational) and negative (imaginary) modes.
229+
230+
xTB prints frequencies twice (after Hessian and in Frequency Printout).
231+
Returns the last complete set.
232+
"""
233+
all_blocks, current_block = [], []
234+
with open(self.path, 'r') as f:
235+
for line in f:
236+
if line.strip().startswith('eigval :'):
237+
values = line.split(':')[1].split()
238+
current_block.extend(float(v) for v in values)
239+
elif current_block and not line.strip().startswith('eigval'):
240+
all_blocks.append(current_block)
241+
current_block = []
242+
if current_block:
243+
all_blocks.append(current_block)
244+
return all_blocks[-1] if all_blocks else []
245+
246+
def _load_frequencies(self):
247+
"""
248+
Load positive (real) vibrational frequencies in cm^-1.
249+
Filters out translational/rotational modes (near-zero) and imaginary (negative).
250+
"""
251+
return [f for f in self._load_all_frequencies() if f > 0.1]
252+
253+
def _load_spin_multiplicity(self):
254+
"""
255+
Determine spin multiplicity from the xTB output.
256+
257+
xTB reports ``spin`` as S (not 2S+1), so multiplicity = 2*S + 1.
258+
Some xTB versions use ``--uhf N`` where N = number of unpaired electrons.
259+
Default to singlet.
260+
"""
261+
with open(self.path, 'r') as f:
262+
for line in f:
263+
# Format: " spin : 0.5"
264+
if 'spin' in line:
265+
parts = line.split()
266+
if parts and parts[0] == 'spin':
267+
s = float(parts[-1])
268+
return int(2 * s + 1)
269+
# Also check for unpaired electrons in setup block
270+
if 'unpaired' in line and ':' in line:
271+
parts = line.split(':')
272+
try:
273+
n_unpaired = int(parts[-1].strip())
274+
return n_unpaired + 1
275+
except ValueError:
276+
pass
277+
return 1
278+
279+
def load_conformer(self, symmetry=None, spin_multiplicity=0, optical_isomers=None, label=''):
280+
"""
281+
Load the molecular degree of freedom data from an xTB output file.
282+
283+
Requires geometry to be available in the log file (optimization outputs).
284+
For frequency-only outputs, geometry must be loaded from a separate file.
285+
286+
Returns:
287+
Tuple of (Conformer, unscaled_frequencies).
288+
"""
289+
if optical_isomers is None or symmetry is None:
290+
_optical_isomers, _symmetry, _ = self.get_symmetry_properties()
291+
if optical_isomers is None:
292+
optical_isomers = _optical_isomers
293+
if symmetry is None:
294+
symmetry = _symmetry
295+
296+
if spin_multiplicity == 0:
297+
spin_multiplicity = self._load_spin_multiplicity()
298+
299+
unscaled_frequencies = self._load_frequencies()
300+
modes = []
301+
302+
# Translation
303+
coord, number, mass = self.load_geometry()
304+
modes.append(IdealGasTranslation(mass=(sum(mass), "amu")))
305+
306+
# Rotation — use tolerance for linear molecule detection
307+
symbols = [symbol_by_number[i] for i in number]
308+
inertia = list(get_principal_moments_of_inertia(coord, numbers=number, symbols=symbols)[0])
309+
if inertia and not all(abs(i) < INERTIA_ZERO_TOL for i in inertia):
310+
nonzero = [i for i in inertia if abs(i) >= INERTIA_ZERO_TOL]
311+
if len(nonzero) < len(inertia):
312+
# Linear molecule: one or more moments are ~zero
313+
modes.append(LinearRotor(inertia=(max(nonzero), "amu*angstrom^2"), symmetry=symmetry))
314+
else:
315+
modes.append(NonlinearRotor(inertia=(nonzero, "amu*angstrom^2"), symmetry=symmetry))
316+
317+
# Vibration
318+
if unscaled_frequencies:
319+
modes.append(HarmonicOscillator(frequencies=(unscaled_frequencies, "cm^-1")))
320+
321+
return Conformer(E0=(0.0, "kJ/mol"),
322+
modes=modes,
323+
spin_multiplicity=spin_multiplicity,
324+
optical_isomers=optical_isomers), unscaled_frequencies
325+
326+
def load_negative_frequency(self):
327+
"""Load the first imaginary (negative) frequency in cm^-1 for a transition state."""
328+
neg_freqs = [f for f in self._load_all_frequencies() if f < -0.1]
329+
if not neg_freqs:
330+
raise LogError(f'No imaginary frequencies found in {self.path}')
331+
return neg_freqs[0]
332+
333+
def load_force_constant_matrix(self):
334+
"""xTB writes a separate hessian file; not parsed from the main output."""
335+
return None
336+
337+
def load_scan_energies(self):
338+
raise NotImplementedError('Rotor scans are not supported by the xTB adapter.')
339+
340+
def load_scan_pivot_atoms(self):
341+
raise NotImplementedError('Rotor scans are not supported by the xTB adapter.')
342+
343+
def load_scan_frozen_atoms(self):
344+
raise NotImplementedError('Rotor scans are not supported by the xTB adapter.')
345+
346+
347+
register_ess_adapter('XTBLog', XTBLog)

0 commit comments

Comments
 (0)