Skip to content

Commit 9a553b2

Browse files
committed
f xtb
1 parent 27bdea2 commit 9a553b2

File tree

1 file changed

+129
-91
lines changed

1 file changed

+129
-91
lines changed

arkane/ess/xtb.py

Lines changed: 129 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@
2929

3030
"""
3131
Arkane ESS adapter for xTB (extended Tight Binding) output files.
32-
Supports geometry, energy, frequencies, and conformer loading.
32+
Supports geometry (V2000 and $coord/Turbomol formats), energy, frequencies,
33+
ZPE, and conformer loading.
3334
"""
3435

3536
import logging
@@ -57,6 +58,19 @@
5758

5859
logger = logging.getLogger(__name__)
5960

61+
# Bohr to Angstrom conversion
62+
BOHR_TO_ANGSTROM = constants.a0 * 1e10 # ~0.52918 Angstrom
63+
64+
# Tolerance for considering a principal moment of inertia as zero (linear molecule)
65+
INERTIA_ZERO_TOL = 1e-4 # amu*angstrom^2
66+
67+
68+
def _normalize_symbol(raw: str) -> str:
69+
"""Normalize an element symbol from xTB output (e.g. 'c' -> 'C', 'cl' -> 'Cl')."""
70+
if len(raw) == 1:
71+
return raw.upper()
72+
return raw[0].upper() + raw[1:].lower()
73+
6074

6175
class XTBLog(ESSAdapter):
6276
"""
@@ -65,6 +79,11 @@ class XTBLog(ESSAdapter):
6579
Parses output from the xtb program (https://github.com/grimme-lab/xtb),
6680
including geometry, electronic energy, vibrational frequencies, and
6781
thermochemical data.
82+
83+
Supported geometry formats:
84+
- V2000 (SDF/Molfile) block from ``--opt`` outputs
85+
- Turbomol ``$coord`` block (Bohr) from ``--opt`` outputs
86+
Frequency-only (``--hess``) outputs do not embed a geometry.
6887
"""
6988

7089
def check_for_errors(self):
@@ -77,37 +96,61 @@ def check_for_errors(self):
7796
raise LogError(f'xTB job terminated abnormally in {self.path}')
7897

7998
def get_number_of_atoms(self) -> int:
80-
"""Return the number of atoms from the xTB output."""
99+
"""
100+
Return the number of atoms from the xTB output.
101+
102+
Tries the explicit 'number of atoms' line first, then falls
103+
back to counting atoms in the geometry block.
104+
"""
81105
with open(self.path, 'r') as f:
82106
for line in f:
83107
if 'number of atoms' in line:
84108
return int(line.split()[-1])
85-
raise LogError(f'Could not find the number of atoms in {self.path}')
109+
# Fallback: count atoms from geometry
110+
try:
111+
coord, _, _ = self.load_geometry()
112+
return len(coord)
113+
except LogError:
114+
pass
115+
raise LogError(f'Could not determine the number of atoms in {self.path}')
86116

87117
def load_geometry(self):
88118
"""
89119
Load the molecular geometry from the xTB output file.
90120
91-
For optimization outputs, reads the final V2000 structure block.
92-
For frequency outputs, reads the initial coordinate input section.
121+
Supports two formats found in xTB ``--opt`` outputs:
122+
- **V2000 (SDF/Molfile)**: Cartesian coordinates in Angstroms.
123+
- **Turbomol ``$coord``**: Cartesian coordinates in Bohr (converted to Angstroms).
124+
125+
Frequency-only (``--hess``) outputs do not embed a geometry and will raise.
93126
94127
Returns:
95128
Tuple of (coord, number, mass):
96129
coord: np.ndarray (n, 3) in Angstroms
97130
number: np.ndarray (n,) atomic numbers
98131
mass: np.ndarray (n,) atomic masses in amu
99132
"""
100-
coord, number, mass = [], [], []
101-
102133
with open(self.path, 'r') as f:
103134
lines = f.readlines()
104135

105-
# Try V2000 format (from geometry optimization output)
136+
coord, number, mass = self._parse_v2000(lines)
137+
if not coord:
138+
coord, number, mass = self._parse_turbomol_coord(lines)
139+
140+
if not coord:
141+
raise LogError(f'Could not find geometry in {self.path}')
142+
143+
return (np.array(coord, dtype=np.float64),
144+
np.array(number, dtype=int),
145+
np.array(mass, dtype=np.float64))
146+
147+
@staticmethod
148+
def _parse_v2000(lines):
149+
"""Parse V2000 (SDF/Molfile) geometry block. Returns (coord, number, mass) or empty lists."""
150+
coord, number, mass = [], [], []
106151
for i, line in enumerate(lines):
107152
if 'V2000' in line:
108-
# Parse the counts line: "n_atoms n_bonds 0 ..."
109-
parts = line.split()
110-
n_atoms = int(parts[0])
153+
n_atoms = int(line.split()[0])
111154
coord, number, mass = [], [], []
112155
for j in range(i + 1, i + 1 + n_atoms):
113156
tokens = lines[j].split()
@@ -117,51 +160,52 @@ def load_geometry(self):
117160
mass_i, num_i = get_element_mass(symbol)
118161
number.append(num_i)
119162
mass.append(mass_i)
163+
return coord, number, mass
120164

121-
if not coord:
122-
# Fall back: parse from the "Calculation Setup" section
123-
# xTB prints "ID Z sym. atoms" followed by element list,
124-
# and coordinates may come from the input SDF parsed at the start.
125-
# Try parsing the Cartesian coordinate block if present
126-
for i, line in enumerate(lines):
127-
if 'final xyz coordinates' in line.lower() or 'cartesian coordinates' in line.lower():
128-
coord, number, mass = [], [], []
129-
for j in range(i + 1, len(lines)):
130-
tokens = lines[j].split()
131-
if len(tokens) < 4:
132-
break
133-
try:
134-
symbol = tokens[0]
135-
x, y, z = float(tokens[1]), float(tokens[2]), float(tokens[3])
136-
coord.append([x, y, z])
137-
mass_i, num_i = get_element_mass(symbol)
138-
number.append(num_i)
139-
mass.append(mass_i)
140-
except (ValueError, KeyError):
141-
break
142-
143-
if not coord:
144-
raise LogError(f'Could not find geometry in {self.path}')
145-
146-
return np.array(coord, dtype=np.float64), np.array(number, dtype=int), np.array(mass, dtype=np.float64)
165+
@staticmethod
166+
def _parse_turbomol_coord(lines):
167+
"""Parse Turbomol $coord block (Bohr). Converts to Angstroms."""
168+
coord, number, mass = [], [], []
169+
in_coord = False
170+
for line in lines:
171+
stripped = line.strip()
172+
if stripped == '$coord':
173+
in_coord = True
174+
coord, number, mass = [], [], []
175+
continue
176+
if in_coord:
177+
if stripped.startswith('$'):
178+
break
179+
tokens = stripped.split()
180+
if len(tokens) >= 4:
181+
try:
182+
x = float(tokens[0]) * BOHR_TO_ANGSTROM
183+
y = float(tokens[1]) * BOHR_TO_ANGSTROM
184+
z = float(tokens[2]) * BOHR_TO_ANGSTROM
185+
symbol = _normalize_symbol(tokens[3])
186+
coord.append([x, y, z])
187+
mass_i, num_i = get_element_mass(symbol)
188+
number.append(num_i)
189+
mass.append(mass_i)
190+
except (ValueError, KeyError):
191+
break
192+
return coord, number, mass
147193

148194
def load_energy(self, zpe_scale_factor=1.):
149195
"""
150196
Load the electronic energy in J/mol from the xTB output.
151197
152-
Returns the last "total energy" value found in the SUMMARY block.
153-
The zero-point energy is NOT included.
198+
Returns the last ``total energy`` value found. The zero-point energy
199+
is NOT included.
154200
"""
155201
e_elect = None
156202
with open(self.path, 'r') as f:
157203
for line in f:
158204
if ':: total energy' in line:
159-
# Format: ":: total energy -9.907945789900 Eh ::"
160205
match = re.search(r'(-?\d+\.\d+)\s+Eh', line)
161206
if match:
162207
e_elect = float(match.group(1))
163208
elif 'TOTAL ENERGY' in line and 'Eh' in line:
164-
# Format: "| TOTAL ENERGY -9.907945789911 Eh |"
165209
match = re.search(r'(-?\d+\.\d+)\s+Eh', line)
166210
if match:
167211
e_elect = float(match.group(1))
@@ -172,28 +216,26 @@ def load_energy(self, zpe_scale_factor=1.):
172216
def load_zero_point_energy(self):
173217
"""
174218
Load the zero-point energy in J/mol from the xTB output.
175-
Only available in frequency (--hess) calculations.
219+
Only available in frequency (``--hess``) calculations.
176220
"""
177221
zpe = None
178222
with open(self.path, 'r') as f:
179223
for line in f:
180-
if ':: zero point energy' in line:
181-
match = re.search(r'(-?\d+\.\d+)\s+Eh', line)
224+
if ':: zero point energy' in line or 'zero-point vibrational energy' in line.lower():
225+
match = re.search(r'(\d+\.\d+)\s+Eh', line)
182226
if match:
183227
zpe = float(match.group(1))
184228
if zpe is None:
185229
raise LogError(f'Unable to find zero-point energy in xTB output file {self.path}')
186230
return zpe * constants.E_h * constants.Na
187231

188-
def _load_frequencies(self):
232+
def _load_all_frequencies(self):
189233
"""
190-
Load vibrational frequencies from the xTB output.
234+
Load ALL vibrational frequencies from the last eigval block in the xTB output,
235+
including near-zero (translational/rotational) and negative (imaginary) modes.
191236
192237
xTB prints frequencies twice (after Hessian and in Frequency Printout).
193-
We take only the last complete set.
194-
195-
Returns:
196-
List of positive (real) frequencies in cm^-1.
238+
Returns the last complete set.
197239
"""
198240
all_blocks = []
199241
current_block = []
@@ -207,33 +249,47 @@ def _load_frequencies(self):
207249
current_block = []
208250
if current_block:
209251
all_blocks.append(current_block)
252+
return all_blocks[-1] if all_blocks else []
210253

211-
if not all_blocks:
212-
return []
213-
214-
# Use the last frequency block
215-
frequencies = all_blocks[-1]
216-
# Filter out translational/rotational modes (near-zero) and imaginary (negative)
217-
return [f for f in frequencies if f > 0.1]
254+
def _load_frequencies(self):
255+
"""
256+
Load positive (real) vibrational frequencies in cm^-1.
257+
Filters out translational/rotational modes (near-zero) and imaginary (negative).
258+
"""
259+
return [f for f in self._load_all_frequencies() if f > 0.1]
218260

219261
def _load_spin_multiplicity(self):
220262
"""
221263
Determine spin multiplicity from the xTB output.
222264
223-
xTB reports 'spin' as S (not 2S+1), so multiplicity = 2*S + 1.
265+
xTB reports ``spin`` as S (not 2S+1), so multiplicity = 2*S + 1.
266+
Some xTB versions use ``--uhf N`` where N = number of unpaired electrons.
224267
"""
225268
with open(self.path, 'r') as f:
226269
for line in f:
227-
if 'spin' in line and 'spin' == line.split()[0].strip():
228-
# Format: "spin : 0.5"
229-
s = float(line.split()[-1])
230-
return int(2 * s + 1)
270+
# Format: " spin : 0.5"
271+
if 'spin' in line:
272+
parts = line.split()
273+
if parts and parts[0] == 'spin':
274+
s = float(parts[-1])
275+
return int(2 * s + 1)
276+
# Also check for unpaired electrons in setup block
277+
if 'unpaired' in line and ':' in line:
278+
parts = line.split(':')
279+
try:
280+
n_unpaired = int(parts[-1].strip())
281+
return n_unpaired + 1
282+
except ValueError:
283+
pass
231284
return 1 # default singlet
232285

233286
def load_conformer(self, symmetry=None, spin_multiplicity=0, optical_isomers=None, label=''):
234287
"""
235288
Load the molecular degree of freedom data from an xTB output file.
236289
290+
Requires geometry to be available in the log file (optimization outputs).
291+
For frequency-only outputs, geometry must be loaded from a separate file.
292+
237293
Returns:
238294
Tuple of (Conformer, unscaled_frequencies).
239295
"""
@@ -247,25 +303,23 @@ def load_conformer(self, symmetry=None, spin_multiplicity=0, optical_isomers=Non
247303
if spin_multiplicity == 0:
248304
spin_multiplicity = self._load_spin_multiplicity()
249305

250-
# Load frequencies
251306
unscaled_frequencies = self._load_frequencies()
252307
modes = []
253308

254309
# Translation
255310
coord, number, mass = self.load_geometry()
256-
translation = IdealGasTranslation(mass=(sum(mass), "amu"))
257-
modes.append(translation)
311+
modes.append(IdealGasTranslation(mass=(sum(mass), "amu")))
258312

259-
# Rotation
313+
# Rotation — use tolerance for linear molecule detection
260314
symbols = [symbol_by_number[i] for i in number]
261-
inertia = get_principal_moments_of_inertia(coord, numbers=number, symbols=symbols)
262-
inertia = list(inertia[0])
263-
if inertia and not all(i == 0.0 for i in inertia):
264-
if any(i == 0.0 for i in inertia):
265-
inertia.remove(0.0)
266-
modes.append(LinearRotor(inertia=(inertia[0], "amu*angstrom^2"), symmetry=symmetry))
315+
inertia = list(get_principal_moments_of_inertia(coord, numbers=number, symbols=symbols)[0])
316+
if inertia and not all(abs(i) < INERTIA_ZERO_TOL for i in inertia):
317+
nonzero = [i for i in inertia if abs(i) >= INERTIA_ZERO_TOL]
318+
if len(nonzero) < len(inertia):
319+
# Linear molecule: one or more moments are ~zero
320+
modes.append(LinearRotor(inertia=(max(nonzero), "amu*angstrom^2"), symmetry=symmetry))
267321
else:
268-
modes.append(NonlinearRotor(inertia=(inertia, "amu*angstrom^2"), symmetry=symmetry))
322+
modes.append(NonlinearRotor(inertia=(nonzero, "amu*angstrom^2"), symmetry=symmetry))
269323

270324
# Vibration
271325
if unscaled_frequencies:
@@ -277,24 +331,8 @@ def load_conformer(self, symmetry=None, spin_multiplicity=0, optical_isomers=Non
277331
optical_isomers=optical_isomers), unscaled_frequencies
278332

279333
def load_negative_frequency(self):
280-
"""
281-
Load the imaginary (negative) frequency in cm^-1 for a transition state.
282-
"""
283-
# Reuse _load_frequencies logic to get the last block, but include negatives
284-
all_blocks = []
285-
current_block = []
286-
with open(self.path, 'r') as f:
287-
for line in f:
288-
if line.strip().startswith('eigval :'):
289-
values = line.split(':')[1].split()
290-
current_block.extend(float(v) for v in values)
291-
elif current_block and not line.strip().startswith('eigval'):
292-
all_blocks.append(current_block)
293-
current_block = []
294-
if current_block:
295-
all_blocks.append(current_block)
296-
frequencies = all_blocks[-1] if all_blocks else []
297-
neg_freqs = [f for f in frequencies if f < -0.1]
334+
"""Load the first imaginary (negative) frequency in cm^-1 for a transition state."""
335+
neg_freqs = [f for f in self._load_all_frequencies() if f < -0.1]
298336
if not neg_freqs:
299337
raise LogError(f'No imaginary frequencies found in {self.path}')
300338
return neg_freqs[0]

0 commit comments

Comments
 (0)