2929
3030"""
3131Arkane ESS adapter for xTB (extended Tight Binding) output files.
32- Supports geometry, energy, frequencies, and conformer loading.
32+ Supports geometry (V2000 and $coord/Turbomol formats), energy, frequencies,
33+ ZPE, and conformer loading.
3334"""
3435
3536import logging
5758
5859logger = logging .getLogger (__name__ )
5960
61+ # Bohr to Angstrom conversion
62+ BOHR_TO_ANGSTROM = constants .a0 * 1e10 # ~0.52918 Angstrom
63+
64+ # Tolerance for considering a principal moment of inertia as zero (linear molecule)
65+ INERTIA_ZERO_TOL = 1e-4 # amu*angstrom^2
66+
67+
68+ def _normalize_symbol (raw : str ) -> str :
69+ """Normalize an element symbol from xTB output (e.g. 'c' -> 'C', 'cl' -> 'Cl')."""
70+ if len (raw ) == 1 :
71+ return raw .upper ()
72+ return raw [0 ].upper () + raw [1 :].lower ()
73+
6074
6175class XTBLog (ESSAdapter ):
6276 """
@@ -65,6 +79,11 @@ class XTBLog(ESSAdapter):
6579 Parses output from the xtb program (https://github.com/grimme-lab/xtb),
6680 including geometry, electronic energy, vibrational frequencies, and
6781 thermochemical data.
82+
83+ Supported geometry formats:
84+ - V2000 (SDF/Molfile) block from ``--opt`` outputs
85+ - Turbomol ``$coord`` block (Bohr) from ``--opt`` outputs
86+ Frequency-only (``--hess``) outputs do not embed a geometry.
6887 """
6988
7089 def check_for_errors (self ):
@@ -77,37 +96,61 @@ def check_for_errors(self):
7796 raise LogError (f'xTB job terminated abnormally in { self .path } ' )
7897
7998 def get_number_of_atoms (self ) -> int :
80- """Return the number of atoms from the xTB output."""
99+ """
100+ Return the number of atoms from the xTB output.
101+
102+ Tries the explicit 'number of atoms' line first, then falls
103+ back to counting atoms in the geometry block.
104+ """
81105 with open (self .path , 'r' ) as f :
82106 for line in f :
83107 if 'number of atoms' in line :
84108 return int (line .split ()[- 1 ])
85- raise LogError (f'Could not find the number of atoms in { self .path } ' )
109+ # Fallback: count atoms from geometry
110+ try :
111+ coord , _ , _ = self .load_geometry ()
112+ return len (coord )
113+ except LogError :
114+ pass
115+ raise LogError (f'Could not determine the number of atoms in { self .path } ' )
86116
87117 def load_geometry (self ):
88118 """
89119 Load the molecular geometry from the xTB output file.
90120
91- For optimization outputs, reads the final V2000 structure block.
92- For frequency outputs, reads the initial coordinate input section.
121+ Supports two formats found in xTB ``--opt`` outputs:
122+ - **V2000 (SDF/Molfile)**: Cartesian coordinates in Angstroms.
123+ - **Turbomol ``$coord``**: Cartesian coordinates in Bohr (converted to Angstroms).
124+
125+ Frequency-only (``--hess``) outputs do not embed a geometry and will raise.
93126
94127 Returns:
95128 Tuple of (coord, number, mass):
96129 coord: np.ndarray (n, 3) in Angstroms
97130 number: np.ndarray (n,) atomic numbers
98131 mass: np.ndarray (n,) atomic masses in amu
99132 """
100- coord , number , mass = [], [], []
101-
102133 with open (self .path , 'r' ) as f :
103134 lines = f .readlines ()
104135
105- # Try V2000 format (from geometry optimization output)
136+ coord , number , mass = self ._parse_v2000 (lines )
137+ if not coord :
138+ coord , number , mass = self ._parse_turbomol_coord (lines )
139+
140+ if not coord :
141+ raise LogError (f'Could not find geometry in { self .path } ' )
142+
143+ return (np .array (coord , dtype = np .float64 ),
144+ np .array (number , dtype = int ),
145+ np .array (mass , dtype = np .float64 ))
146+
147+ @staticmethod
148+ def _parse_v2000 (lines ):
149+ """Parse V2000 (SDF/Molfile) geometry block. Returns (coord, number, mass) or empty lists."""
150+ coord , number , mass = [], [], []
106151 for i , line in enumerate (lines ):
107152 if 'V2000' in line :
108- # Parse the counts line: "n_atoms n_bonds 0 ..."
109- parts = line .split ()
110- n_atoms = int (parts [0 ])
153+ n_atoms = int (line .split ()[0 ])
111154 coord , number , mass = [], [], []
112155 for j in range (i + 1 , i + 1 + n_atoms ):
113156 tokens = lines [j ].split ()
@@ -117,51 +160,52 @@ def load_geometry(self):
117160 mass_i , num_i = get_element_mass (symbol )
118161 number .append (num_i )
119162 mass .append (mass_i )
163+ return coord , number , mass
120164
121- if not coord :
122- # Fall back: parse from the "Calculation Setup" section
123- # xTB prints "ID Z sym. atoms" followed by element list,
124- # and coordinates may come from the input SDF parsed at the start.
125- # Try parsing the Cartesian coordinate block if present
126- for i , line in enumerate (lines ):
127- if 'final xyz coordinates' in line .lower () or 'cartesian coordinates' in line .lower ():
128- coord , number , mass = [], [], []
129- for j in range (i + 1 , len (lines )):
130- tokens = lines [j ].split ()
131- if len (tokens ) < 4 :
132- break
133- try :
134- symbol = tokens [0 ]
135- x , y , z = float (tokens [1 ]), float (tokens [2 ]), float (tokens [3 ])
136- coord .append ([x , y , z ])
137- mass_i , num_i = get_element_mass (symbol )
138- number .append (num_i )
139- mass .append (mass_i )
140- except (ValueError , KeyError ):
141- break
142-
143- if not coord :
144- raise LogError (f'Could not find geometry in { self .path } ' )
145-
146- return np .array (coord , dtype = np .float64 ), np .array (number , dtype = int ), np .array (mass , dtype = np .float64 )
165+ @staticmethod
166+ def _parse_turbomol_coord (lines ):
167+ """Parse Turbomol $coord block (Bohr). Converts to Angstroms."""
168+ coord , number , mass = [], [], []
169+ in_coord = False
170+ for line in lines :
171+ stripped = line .strip ()
172+ if stripped == '$coord' :
173+ in_coord = True
174+ coord , number , mass = [], [], []
175+ continue
176+ if in_coord :
177+ if stripped .startswith ('$' ):
178+ break
179+ tokens = stripped .split ()
180+ if len (tokens ) >= 4 :
181+ try :
182+ x = float (tokens [0 ]) * BOHR_TO_ANGSTROM
183+ y = float (tokens [1 ]) * BOHR_TO_ANGSTROM
184+ z = float (tokens [2 ]) * BOHR_TO_ANGSTROM
185+ symbol = _normalize_symbol (tokens [3 ])
186+ coord .append ([x , y , z ])
187+ mass_i , num_i = get_element_mass (symbol )
188+ number .append (num_i )
189+ mass .append (mass_i )
190+ except (ValueError , KeyError ):
191+ break
192+ return coord , number , mass
147193
148194 def load_energy (self , zpe_scale_factor = 1. ):
149195 """
150196 Load the electronic energy in J/mol from the xTB output.
151197
152- Returns the last " total energy" value found in the SUMMARY block.
153- The zero-point energy is NOT included.
198+ Returns the last `` total energy`` value found. The zero-point energy
199+ is NOT included.
154200 """
155201 e_elect = None
156202 with open (self .path , 'r' ) as f :
157203 for line in f :
158204 if ':: total energy' in line :
159- # Format: ":: total energy -9.907945789900 Eh ::"
160205 match = re .search (r'(-?\d+\.\d+)\s+Eh' , line )
161206 if match :
162207 e_elect = float (match .group (1 ))
163208 elif 'TOTAL ENERGY' in line and 'Eh' in line :
164- # Format: "| TOTAL ENERGY -9.907945789911 Eh |"
165209 match = re .search (r'(-?\d+\.\d+)\s+Eh' , line )
166210 if match :
167211 e_elect = float (match .group (1 ))
@@ -172,28 +216,26 @@ def load_energy(self, zpe_scale_factor=1.):
172216 def load_zero_point_energy (self ):
173217 """
174218 Load the zero-point energy in J/mol from the xTB output.
175- Only available in frequency (--hess) calculations.
219+ Only available in frequency (`` --hess`` ) calculations.
176220 """
177221 zpe = None
178222 with open (self .path , 'r' ) as f :
179223 for line in f :
180- if ':: zero point energy' in line :
181- match = re .search (r'(-? \d+\.\d+)\s+Eh' , line )
224+ if ':: zero point energy' in line or 'zero-point vibrational energy' in line . lower () :
225+ match = re .search (r'(\d+\.\d+)\s+Eh' , line )
182226 if match :
183227 zpe = float (match .group (1 ))
184228 if zpe is None :
185229 raise LogError (f'Unable to find zero-point energy in xTB output file { self .path } ' )
186230 return zpe * constants .E_h * constants .Na
187231
188- def _load_frequencies (self ):
232+ def _load_all_frequencies (self ):
189233 """
190- Load vibrational frequencies from the xTB output.
234+ Load ALL vibrational frequencies from the last eigval block in the xTB output,
235+ including near-zero (translational/rotational) and negative (imaginary) modes.
191236
192237 xTB prints frequencies twice (after Hessian and in Frequency Printout).
193- We take only the last complete set.
194-
195- Returns:
196- List of positive (real) frequencies in cm^-1.
238+ Returns the last complete set.
197239 """
198240 all_blocks = []
199241 current_block = []
@@ -207,33 +249,47 @@ def _load_frequencies(self):
207249 current_block = []
208250 if current_block :
209251 all_blocks .append (current_block )
252+ return all_blocks [- 1 ] if all_blocks else []
210253
211- if not all_blocks :
212- return []
213-
214- # Use the last frequency block
215- frequencies = all_blocks [- 1 ]
216- # Filter out translational/rotational modes (near-zero) and imaginary (negative)
217- return [f for f in frequencies if f > 0.1 ]
254+ def _load_frequencies (self ):
255+ """
256+ Load positive (real) vibrational frequencies in cm^-1.
257+ Filters out translational/rotational modes (near-zero) and imaginary (negative).
258+ """
259+ return [f for f in self ._load_all_frequencies () if f > 0.1 ]
218260
219261 def _load_spin_multiplicity (self ):
220262 """
221263 Determine spin multiplicity from the xTB output.
222264
223- xTB reports 'spin' as S (not 2S+1), so multiplicity = 2*S + 1.
265+ xTB reports ``spin`` as S (not 2S+1), so multiplicity = 2*S + 1.
266+ Some xTB versions use ``--uhf N`` where N = number of unpaired electrons.
224267 """
225268 with open (self .path , 'r' ) as f :
226269 for line in f :
227- if 'spin' in line and 'spin' == line .split ()[0 ].strip ():
228- # Format: "spin : 0.5"
229- s = float (line .split ()[- 1 ])
230- return int (2 * s + 1 )
270+ # Format: " spin : 0.5"
271+ if 'spin' in line :
272+ parts = line .split ()
273+ if parts and parts [0 ] == 'spin' :
274+ s = float (parts [- 1 ])
275+ return int (2 * s + 1 )
276+ # Also check for unpaired electrons in setup block
277+ if 'unpaired' in line and ':' in line :
278+ parts = line .split (':' )
279+ try :
280+ n_unpaired = int (parts [- 1 ].strip ())
281+ return n_unpaired + 1
282+ except ValueError :
283+ pass
231284 return 1 # default singlet
232285
233286 def load_conformer (self , symmetry = None , spin_multiplicity = 0 , optical_isomers = None , label = '' ):
234287 """
235288 Load the molecular degree of freedom data from an xTB output file.
236289
290+ Requires geometry to be available in the log file (optimization outputs).
291+ For frequency-only outputs, geometry must be loaded from a separate file.
292+
237293 Returns:
238294 Tuple of (Conformer, unscaled_frequencies).
239295 """
@@ -247,25 +303,23 @@ def load_conformer(self, symmetry=None, spin_multiplicity=0, optical_isomers=Non
247303 if spin_multiplicity == 0 :
248304 spin_multiplicity = self ._load_spin_multiplicity ()
249305
250- # Load frequencies
251306 unscaled_frequencies = self ._load_frequencies ()
252307 modes = []
253308
254309 # Translation
255310 coord , number , mass = self .load_geometry ()
256- translation = IdealGasTranslation (mass = (sum (mass ), "amu" ))
257- modes .append (translation )
311+ modes .append (IdealGasTranslation (mass = (sum (mass ), "amu" )))
258312
259- # Rotation
313+ # Rotation — use tolerance for linear molecule detection
260314 symbols = [symbol_by_number [i ] for i in number ]
261- inertia = get_principal_moments_of_inertia (coord , numbers = number , symbols = symbols )
262- inertia = list ( inertia [ 0 ])
263- if inertia and not all ( i == 0.0 for i in inertia ):
264- if any ( i == 0.0 for i in inertia ):
265- inertia . remove ( 0.0 )
266- modes .append (LinearRotor (inertia = (inertia [ 0 ] , "amu*angstrom^2" ), symmetry = symmetry ))
315+ inertia = list ( get_principal_moments_of_inertia (coord , numbers = number , symbols = symbols )[ 0 ] )
316+ if inertia and not all ( abs ( i ) < INERTIA_ZERO_TOL for i in inertia ):
317+ nonzero = [ i for i in inertia if abs ( i ) >= INERTIA_ZERO_TOL ]
318+ if len ( nonzero ) < len ( inertia ):
319+ # Linear molecule: one or more moments are ~zero
320+ modes .append (LinearRotor (inertia = (max ( nonzero ) , "amu*angstrom^2" ), symmetry = symmetry ))
267321 else :
268- modes .append (NonlinearRotor (inertia = (inertia , "amu*angstrom^2" ), symmetry = symmetry ))
322+ modes .append (NonlinearRotor (inertia = (nonzero , "amu*angstrom^2" ), symmetry = symmetry ))
269323
270324 # Vibration
271325 if unscaled_frequencies :
@@ -277,24 +331,8 @@ def load_conformer(self, symmetry=None, spin_multiplicity=0, optical_isomers=Non
277331 optical_isomers = optical_isomers ), unscaled_frequencies
278332
279333 def load_negative_frequency (self ):
280- """
281- Load the imaginary (negative) frequency in cm^-1 for a transition state.
282- """
283- # Reuse _load_frequencies logic to get the last block, but include negatives
284- all_blocks = []
285- current_block = []
286- with open (self .path , 'r' ) as f :
287- for line in f :
288- if line .strip ().startswith ('eigval :' ):
289- values = line .split (':' )[1 ].split ()
290- current_block .extend (float (v ) for v in values )
291- elif current_block and not line .strip ().startswith ('eigval' ):
292- all_blocks .append (current_block )
293- current_block = []
294- if current_block :
295- all_blocks .append (current_block )
296- frequencies = all_blocks [- 1 ] if all_blocks else []
297- neg_freqs = [f for f in frequencies if f < - 0.1 ]
334+ """Load the first imaginary (negative) frequency in cm^-1 for a transition state."""
335+ neg_freqs = [f for f in self ._load_all_frequencies () if f < - 0.1 ]
298336 if not neg_freqs :
299337 raise LogError (f'No imaginary frequencies found in { self .path } ' )
300338 return neg_freqs [0 ]
0 commit comments