Skip to content

Commit fea15c8

Browse files
committed
Improved the xTB parser adapter
1 parent 1e71721 commit fea15c8

1 file changed

Lines changed: 66 additions & 41 deletions

File tree

arc/parser/adapters/xtb.py

Lines changed: 66 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import pandas as pd
1212

1313
from arc.common import is_str_float
14-
from arc.constants import E_h_kJmol
14+
from arc.constants import E_h_kJmol, bohr_to_angstrom
1515
from arc.species.converter import str_to_xyz, xyz_from_data, logger
1616
from arc.parser.adapter import ESSAdapter
1717
from arc.parser.factory import register_ess_adapter
@@ -35,13 +35,22 @@ def logfile_contains_errors(self) -> Optional[str]:
3535
Returns: Optional[str]
3636
``None`` if the log file is free of errors, otherwise the error is returned as a string.
3737
"""
38-
# Not implemented for xTB.
38+
with open(self.log_file_path, 'r') as f:
39+
for line in f:
40+
if 'abnormal termination' in line.lower():
41+
return line.strip()
42+
if '[ERROR]' in line or '#ERROR' in line:
43+
return line.strip()
3944
return None
4045

4146
def parse_geometry(self) -> Optional[Dict[str, tuple]]:
4247
"""
4348
Parse the xyz geometry from an ESS log file.
4449
50+
Supports both Turbomol ``$coord`` (Bohr) and Molfile V2000 (Angstrom) formats.
51+
If the file contains multiple geometry blocks (e.g., from multiple optimization
52+
cycles), only the last one is returned.
53+
4554
Returns: Optional[Dict[str, tuple]]
4655
The cartesian geometry.
4756
"""
@@ -58,6 +67,8 @@ def parse_geometry(self) -> Optional[Dict[str, tuple]]:
5867
final_structure = True
5968
continue
6069
if final_structure and '$coord' in line:
70+
# Reset on each new $coord block so we keep only the last one
71+
coords, symbols = list(), list()
6172
in_coord_block = True
6273
continue
6374
if final_structure and 'V2000' in line and not in_coord_block:
@@ -66,16 +77,18 @@ def parse_geometry(self) -> Optional[Dict[str, tuple]]:
6677
if parts and parts[0].isdigit():
6778
atom_count = int(parts[0])
6879
molfile_line_counter = 0
80+
# Reset on each new V2000 block
81+
coords, symbols = list(), list()
6982
continue
7083

71-
# Parse $coord format
84+
# Parse $coord format (Turbomole $coord coordinates are in Bohr, convert to Angstrom)
7285
if in_coord_block:
7386
if '$' in line or 'end' in line.lower() or len(line.split()) < 4:
7487
in_coord_block = False
7588
continue
7689
parts = line.split()
7790
try:
78-
x, y, z = map(float, parts[:3])
91+
x, y, z = (float(v) * bohr_to_angstrom for v in parts[:3])
7992
symbol = parts[3].capitalize() if len(parts[3]) == 1 else parts[3][0].upper() + parts[3][1:].lower()
8093
coords.append([x, y, z])
8194
symbols.append(symbol)
@@ -105,30 +118,34 @@ def parse_frequencies(self) -> Optional[np.ndarray]:
105118
"""
106119
Parse the frequencies from a freq job output file.
107120
121+
xTB prints frequencies twice (once after the Hessian and once in the
122+
Frequency Printout section). This method reads ALL eigval blocks and
123+
returns only the last complete one to ensure we get the final values.
124+
108125
Returns: Optional[np.ndarray]
109126
The parsed frequencies (in cm^-1).
110127
"""
111-
freqs = list()
128+
# Collect all eigval blocks; use the last one
129+
all_blocks = list()
130+
current_block = list()
112131
lines = _get_lines_from_file(self.log_file_path)
113-
read_output = False
114132

115133
for line in lines:
116-
if read_output:
117-
if 'eigval :' in line:
118-
splits = line.split()
119-
for split in splits[2:]:
120-
try:
121-
freq = float(split)
122-
if freq != 0.0:
123-
freqs.append(freq)
124-
except ValueError:
125-
continue
126-
elif line.strip() == "" or "projected vibrational frequencies" in line.lower():
127-
continue
128-
else:
129-
break
130-
if 'vibrational frequencies' in line.lower():
131-
read_output = True
134+
if 'eigval :' in line:
135+
splits = line.split()
136+
for split in splits[2:]:
137+
try:
138+
current_block.append(float(split))
139+
except ValueError:
140+
continue
141+
elif current_block:
142+
# End of an eigval run
143+
all_blocks.append(current_block)
144+
current_block = list()
145+
if current_block:
146+
all_blocks.append(current_block)
147+
148+
freqs = [f for f in all_blocks[-1] if f != 0.0] if all_blocks else list()
132149

133150
# Fallback: try vibspectrum file if no frequencies found in output
134151
if not freqs:
@@ -232,28 +249,32 @@ def parse_e_elect(self) -> Optional[float]:
232249
"""
233250
Parse the electronic energy from an sp job output file.
234251
252+
Looks for ``:: total energy ... Eh`` (SUMMARY block) or
253+
``| TOTAL ENERGY ... Eh |`` (TOTAL section).
254+
Avoids false matches against ``total energy gain`` (optimization deltas).
255+
235256
Returns: Optional[float]
236257
The electronic energy in kJ/mol.
237258
"""
259+
import re
238260
lines = _get_lines_from_file(self.log_file_path)
239261
energy = None
240-
for line in reversed(lines):
241-
if 'total energy' in line.lower():
242-
try:
243-
energy = float(line.split()[3].strip())
244-
break
245-
except (ValueError, IndexError):
262+
# Iterate forward and keep the LAST hit (final result)
263+
for line in lines:
264+
stripped = line.strip()
265+
if stripped.startswith(':: total energy') or 'TOTAL ENERGY' in line:
266+
m = re.search(r'(-?\d+\.\d+)\s+Eh', line)
267+
if m:
268+
energy = float(m.group(1))
269+
if energy is None:
270+
# Fallback: 'final energy' lines (rare)
271+
for line in reversed(lines):
272+
if 'final energy' in line.lower():
246273
try:
247-
energy = float(line.split()[-1].strip())
274+
energy = float(line.split()[-1])
248275
break
249276
except (ValueError, IndexError):
250277
continue
251-
if 'final energy' in line.lower():
252-
try:
253-
energy = float(line.split()[-1])
254-
break
255-
except (ValueError, IndexError):
256-
continue
257278
if energy is not None:
258279
return energy * E_h_kJmol
259280
return None
@@ -265,16 +286,16 @@ def parse_zpe_correction(self) -> Optional[float]:
265286
Returns: Optional[float]
266287
The calculated zero point energy in kJ/mol.
267288
"""
289+
import re
268290
zpe = None
269291
with open(self.log_file_path, 'r') as f:
270292
for line in f:
271293
if 'zero-point vibrational energy' in line.lower() or 'zero point energy' in line.lower():
272294
# :: zero point energy 0.056690417480 Eh ::
273-
try:
274-
zpe = float(line.split()[-3])
295+
m = re.search(r'(\d+\.\d+(?:[eE][+-]?\d+)?)\s+Eh', line)
296+
if m:
297+
zpe = float(m.group(1))
275298
break
276-
except (ValueError, IndexError):
277-
continue
278299
if zpe is not None:
279300
return zpe * E_h_kJmol
280301
return None
@@ -323,8 +344,12 @@ def parse_1d_scan_energies(self) -> Tuple[Optional[List[float]], Optional[List[f
323344
logger.warning(f'No valid scan points found in xTB scan log file {scan_path}.')
324345
return None, None
325346

326-
# Angles: evenly spaced 0 to 360 inclusive
327-
angles = [i * 360.0 / (n_points + 1) for i in range(n_points + 1)]
347+
# Angles: evenly spaced from 0 deg with one angle per energy point.
348+
# For 44 energies, the dihedral was scanned in steps of 360/45 = 8 deg.
349+
# We return n_points angles (matching the energies length): [0, 8, 16, ..., 344].
350+
# Note: callers expecting (n_points+1) angles for n_points energies should
351+
# add the closing 360 deg themselves.
352+
angles = [i * 360.0 / (n_points + 1) for i in range(n_points)]
328353

329354
return rel_energies, angles
330355

0 commit comments

Comments
 (0)