Skip to content

Commit 305dd18

Browse files
committed
Improved the xTB parser adapter
1 parent e8ce91a commit 305dd18

1 file changed

Lines changed: 66 additions & 38 deletions

File tree

arc/parser/adapters/xtb.py

Lines changed: 66 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,25 @@ def logfile_contains_errors(self) -> Optional[str]:
3535
Returns: Optional[str]
3636
``None`` if the log file is free of errors, otherwise the error is returned as a string.
3737
"""
38-
# Not implemented for xTB.
38+
try:
39+
with open(self.log_file_path, 'r') as f:
40+
for line in f:
41+
if 'abnormal termination' in line.lower():
42+
return line.strip()
43+
if '[ERROR]' in line or '#ERROR' in line:
44+
return line.strip()
45+
except OSError:
46+
return None
3947
return None
4048

4149
def parse_geometry(self) -> Optional[Dict[str, tuple]]:
4250
"""
4351
Parse the xyz geometry from an ESS log file.
4452
53+
Supports both Turbomol ``$coord`` (Bohr) and Molfile V2000 (Angstrom) formats.
54+
If the file contains multiple geometry blocks (e.g., from multiple optimization
55+
cycles), only the last one is returned.
56+
4557
Returns: Optional[Dict[str, tuple]]
4658
The cartesian geometry.
4759
"""
@@ -58,6 +70,8 @@ def parse_geometry(self) -> Optional[Dict[str, tuple]]:
5870
final_structure = True
5971
continue
6072
if final_structure and '$coord' in line:
73+
# Reset on each new $coord block so we keep only the last one
74+
coords, symbols = list(), list()
6175
in_coord_block = True
6276
continue
6377
if final_structure and 'V2000' in line and not in_coord_block:
@@ -66,6 +80,8 @@ def parse_geometry(self) -> Optional[Dict[str, tuple]]:
6680
if parts and parts[0].isdigit():
6781
atom_count = int(parts[0])
6882
molfile_line_counter = 0
83+
# Reset on each new V2000 block
84+
coords, symbols = list(), list()
6985
continue
7086

7187
# Parse $coord format
@@ -105,30 +121,34 @@ def parse_frequencies(self) -> Optional[np.ndarray]:
105121
"""
106122
Parse the frequencies from a freq job output file.
107123
124+
xTB prints frequencies twice (once after the Hessian and once in the
125+
Frequency Printout section). This method reads ALL eigval blocks and
126+
returns only the last complete one to ensure we get the final values.
127+
108128
Returns: Optional[np.ndarray]
109129
The parsed frequencies (in cm^-1).
110130
"""
111-
freqs = list()
131+
# Collect all eigval blocks; use the last one
132+
all_blocks = list()
133+
current_block = list()
112134
lines = _get_lines_from_file(self.log_file_path)
113-
read_output = False
114135

115136
for line in lines:
116-
if read_output:
117-
if 'eigval :' in line:
118-
splits = line.split()
119-
for split in splits[2:]:
120-
try:
121-
freq = float(split)
122-
if freq != 0.0:
123-
freqs.append(freq)
124-
except ValueError:
125-
continue
126-
elif line.strip() == "" or "projected vibrational frequencies" in line.lower():
127-
continue
128-
else:
129-
break
130-
if 'vibrational frequencies' in line.lower():
131-
read_output = True
137+
if 'eigval :' in line:
138+
splits = line.split()
139+
for split in splits[2:]:
140+
try:
141+
current_block.append(float(split))
142+
except ValueError:
143+
continue
144+
elif current_block:
145+
# End of an eigval run
146+
all_blocks.append(current_block)
147+
current_block = list()
148+
if current_block:
149+
all_blocks.append(current_block)
150+
151+
freqs = [f for f in all_blocks[-1] if f != 0.0] if all_blocks else list()
132152

133153
# Fallback: try vibspectrum file if no frequencies found in output
134154
if not freqs:
@@ -232,28 +252,32 @@ def parse_e_elect(self) -> Optional[float]:
232252
"""
233253
Parse the electronic energy from an sp job output file.
234254
255+
Looks for ``:: total energy ... Eh`` (SUMMARY block) or
256+
``| TOTAL ENERGY ... Eh |`` (TOTAL section).
257+
Avoids false matches against ``total energy gain`` (optimization deltas).
258+
235259
Returns: Optional[float]
236260
The electronic energy in kJ/mol.
237261
"""
262+
import re
238263
lines = _get_lines_from_file(self.log_file_path)
239264
energy = None
240-
for line in reversed(lines):
241-
if 'total energy' in line.lower():
242-
try:
243-
energy = float(line.split()[3].strip())
244-
break
245-
except (ValueError, IndexError):
265+
# Iterate forward and keep the LAST hit (final result)
266+
for line in lines:
267+
stripped = line.strip()
268+
if stripped.startswith(':: total energy') or 'TOTAL ENERGY' in line:
269+
m = re.search(r'(-?\d+\.\d+)\s+Eh', line)
270+
if m:
271+
energy = float(m.group(1))
272+
if energy is None:
273+
# Fallback: 'final energy' lines (rare)
274+
for line in reversed(lines):
275+
if 'final energy' in line.lower():
246276
try:
247-
energy = float(line.split()[-1].strip())
277+
energy = float(line.split()[-1])
248278
break
249279
except (ValueError, IndexError):
250280
continue
251-
if 'final energy' in line.lower():
252-
try:
253-
energy = float(line.split()[-1])
254-
break
255-
except (ValueError, IndexError):
256-
continue
257281
if energy is not None:
258282
return energy * E_h_kJmol
259283
return None
@@ -265,16 +289,16 @@ def parse_zpe_correction(self) -> Optional[float]:
265289
Returns: Optional[float]
266290
The calculated zero point energy in kJ/mol.
267291
"""
292+
import re
268293
zpe = None
269294
with open(self.log_file_path, 'r') as f:
270295
for line in f:
271296
if 'zero-point vibrational energy' in line.lower() or 'zero point energy' in line.lower():
272297
# :: zero point energy 0.056690417480 Eh ::
273-
try:
274-
zpe = float(line.split()[-3])
298+
m = re.search(r'(\d+\.\d+(?:[eE][+-]?\d+)?)\s+Eh', line)
299+
if m:
300+
zpe = float(m.group(1))
275301
break
276-
except (ValueError, IndexError):
277-
continue
278302
if zpe is not None:
279303
return zpe * E_h_kJmol
280304
return None
@@ -323,8 +347,12 @@ def parse_1d_scan_energies(self) -> Tuple[Optional[List[float]], Optional[List[f
323347
logger.warning(f'No valid scan points found in xTB scan log file {scan_path}.')
324348
return None, None
325349

326-
# Angles: evenly spaced 0 to 360 inclusive
327-
angles = [i * 360.0 / (n_points + 1) for i in range(n_points + 1)]
350+
# Angles: evenly spaced from 0 deg with one angle per energy point.
351+
# For 44 energies, the dihedral was scanned in steps of 360/45 = 8 deg.
352+
# We return n_points angles (matching the energies length): [0, 8, 16, ..., 344].
353+
# Note: callers expecting (n_points+1) angles for n_points energies should
354+
# add the closing 360 deg themselves.
355+
angles = [i * 360.0 / (n_points + 1) for i in range(n_points)]
328356

329357
return rel_energies, angles
330358

0 commit comments

Comments
 (0)