Source code for stko._internal.calculators.extractors.xtb_extractor
import re
from pathlib import Path
from stko._internal.calculators.extractors.utilities import check_line
[docs]
class XTBExtractor:
"""Extracts properties from xTB output files.
All formatting based on the 190418 version of xTB.
Parameters:
output_file:
Output file to extract properties from.
Attributes:
output_file:
Output file to extract properties from.
output_lines:
List of all lines in as string in the output file.
total_energy:
The total energy in the :attr:`output_file`. The energy is
in units of a.u..
homo_lumo_gap:
The HOMO-LUMO gap in the :attr:`output_file`. The gap is
in units of eV.
fermi_level:
The Fermi level in the :attr:`output_file` in units of eV.
qonly_dipole_moment:
Components of the Q only dipole moment in units
of Debye in List of the form ``[x, y, z]``.
full_dipole_moment:
Components of the full dipole moment in units
of Debye in List of the form
``[x, y, z, total]``.
qonly_quadrupole_moment:
Components of the Q only traceless quadrupole moment in units
of Debye in List of the form
``[xx, xy, xy, xz, yz, zz]``.
qdip_quadrupole_moment:
Components of the Q+Dip traceless quadrupole moment in units of
Debye in List of the form
``[xx, xy, xy, xz, yz, zz]``.
full_quadrupole_moment:
Components of the full traceless quadrupole moment in units of
Debye in List of the form
``[xx, xy, xy, xz, yz, zz]``.
homo_lumo_occ:
Dictionary of List containing the orbital number,
energy in eV and occupation of the HOMO and LUMO orbitals in
the :attr:`output_file`.
total_free_energy:
The total free energy in the :attr:`output_file`.
The free energy is in units of a.u. and calculated at 298.15K.
frequencies:
List of the vibrational frequencies in the :attr:`output_file`.
Vibrational frequencies are in units of wavenumber and
calculated at 298.15K.
ionisation_potential:
The vertical ionisation potential in the :attr:`output_file`.
Corresponds to the delta SCC IP.
electron_affinity:
The vertical electron affinity in the :attr:`output_file`.
Corresponds to the delta SCC EA.
total_sasa:
The solvent-accessible surface area of the molecule from xtb.
Examples:
.. code-block:: python
import stko
data = stko.XTBExtractor(output_file)
total_energy = data.total_energy
homo_lumo_gap = data.homo_lumo_gap
"""
def __init__(self, output_file: Path | str) -> None:
self.output_file = Path(output_file)
# Explictly set encoding to UTF-8 because default encoding on
# Windows will fail to read the file otherwise.
with self.output_file.open(encoding="UTF-8") as f:
self.output_lines = f.readlines()
self._extract_values()
def _extract_values(self) -> None: # noqa: C901
for i, line in enumerate(self.output_lines):
if check_line(line, "total_energy", self._properties_dict()):
self._extract_total_energy(line)
elif check_line(line, "homo_lumo_gap", self._properties_dict()):
self._extract_homo_lumo_gap(line)
elif check_line(line, "fermi_level", self._properties_dict()):
self._extract_fermi_level(line)
elif check_line(line, "dipole_moment", self._properties_dict()):
self._extract_qonly_dipole_moment(i)
self._extract_full_dipole_moment(i)
elif check_line(
line, "quadrupole_moment", self._properties_dict()
):
self._extract_qonly_quadrupole_moment(i)
self._extract_qdip_quadrupole_moment(i)
self._extract_full_quadrupole_moment(i)
elif check_line(
line, "homo_lumo_occ_HOMO", self._properties_dict()
):
self.homo_lumo_occ: dict[str, list[float]] = {}
self._extract_homo_lumo_occ(line, "HOMO")
elif check_line(
line, "homo_lumo_occ_LUMO", self._properties_dict()
):
self._extract_homo_lumo_occ(line, "LUMO")
elif check_line(
line, "total_free_energy", self._properties_dict()
):
self._extract_total_free_energy(line)
elif check_line(
line, "ionisation_potential", self._properties_dict()
):
self._extract_ionisation_potential(line)
elif check_line(
line, "electron_affinity", self._properties_dict()
):
self._extract_electron_affinity(line)
elif check_line(line, "total_sasa", self._properties_dict()):
self._extract_total_sasa(line)
# Frequency formatting requires loop through full file.
self._extract_frequencies()
def _properties_dict(self) -> dict[str, str]:
return {
"total_energy": " | TOTAL ENERGY ",
"homo_lumo_gap": " | HOMO-LUMO GAP ",
"fermi_level": " Fermi-level ",
"dipole_moment": "molecular dipole:",
"quadrupole_moment": "molecular quadrupole (traceless):",
"homo_lumo_occ_HOMO": "(HOMO)",
"homo_lumo_occ_LUMO": "(LUMO)",
"total_free_energy": " | TOTAL FREE ENERGY ",
"ionisation_potential": "delta SCC IP (eV)",
"electron_affinity": "delta SCC EA (eV)",
"total_sasa": "total SASA /",
}
def _extract_total_energy(self, line: str) -> None:
"""Updates :attr:`total_energy`.
Parameters:
line:
Line of output file to extract property from.
"""
# Use regex to match to numbers.
nums = re.compile(r"[+-]?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?")
string = nums.search(line.rstrip())
self.total_energy = float(string.group(0)) # type: ignore[union-attr]
def _extract_homo_lumo_gap(self, line: str) -> None:
"""Updates :attr:`homo_lumo_gap`.
Parameters:
line:
Line of output file to extract property from.
"""
# Use regex to match to numbers.
nums = re.compile(r"[+-]?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?")
string = nums.search(line.rstrip())
self.homo_lumo_gap = float(string.group(0)) # type: ignore[union-attr]
def _extract_fermi_level(self, line: str) -> None:
"""Updates :attr:`fermi_level`.
Parameters:
line:
Line of output file to extract property from.
"""
# Use regex to match to numbers.
nums = re.compile(r"[+-]?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?")
part2 = line.split("Eh")
string = nums.search(part2[1].rstrip())
self.fermi_level = float(string.group(0)) # type: ignore[union-attr]
def _extract_qonly_dipole_moment(self, index: int) -> None:
"""Updates :attr:`qonly_dipole_moment`.
Parameters:
index:
Index of line in :attr:`output_lines`.
"""
sample_set = self.output_lines[index + 2].rstrip()
if "q only:" in sample_set:
self.qonly_dipole_moment = [
float(i) for i in sample_set.split(":")[1].split(" ") if i
]
def _extract_full_dipole_moment(self, index: int) -> None:
"""Updates :attr:`full_dipole_moment`.
Parameters:
index:
Index of line in :attr:`output_lines`.
"""
sample_set = self.output_lines[index + 3].rstrip()
if "full:" in sample_set:
self.full_dipole_moment = [
float(i) for i in sample_set.split(":")[1].split(" ") if i
]
def _extract_qonly_quadrupole_moment(self, index: int) -> None:
"""Updates :attr:`qonly_quadrupole_moment`.
Parameters:
index:
Index of line in :attr:`output_lines`.
"""
sample_set = self.output_lines[index + 2].rstrip()
if "q only:" in sample_set:
self.qonly_quadrupole_moment = [
float(i) for i in sample_set.split(":")[1].split(" ") if i
]
def _extract_qdip_quadrupole_moment(self, index: int) -> None:
"""Updates :attr:`qdip_quadrupole_moment`.
Parameters:
index:
Index of line in :attr:`output_lines`.
"""
sample_set = self.output_lines[index + 3].rstrip()
if "q+dip:" in sample_set:
self.qdip_quadrupole_moment = [
float(i) for i in sample_set.split(":")[1].split(" ") if i
]
def _extract_full_quadrupole_moment(self, index: int) -> None:
"""Updates :attr:`full_quadrupole_moment`.
Parameters:
index:
Index of line in :attr:`output_lines`.
"""
sample_set = self.output_lines[index + 4].rstrip()
if "full:" in sample_set:
self.full_quadrupole_moment = [
float(i) for i in sample_set.split(":")[1].split(" ") if i
]
def _extract_homo_lumo_occ(self, line: str, orbital: str) -> None:
"""Updates :attr:`homo_lumo_occ`.
Parameters:
line:
Line of output file to extract property from.
orbital:
Can be 'HOMO' or 'LUMO'.
"""
if orbital == "HOMO":
split_line = [i for i in line.rstrip().split(" ") if i]
# The line is: Number, occupation, energy (Ha), energy (ev), label
# Extract: Number, occupation, energy (eV)
orbital_val = [
int(split_line[0]),
float(split_line[1]),
float(split_line[3]),
]
elif orbital == "LUMO":
split_line = [i for i in line.rstrip().split(" ") if i]
# The line is: Number, energy (Ha), energy (ev), label
# Extract: Number, occupation (zero), energy (eV)
orbital_val = [int(split_line[0]), 0, float(split_line[2])]
self.homo_lumo_occ[orbital] = orbital_val
def _extract_total_free_energy(self, line: str) -> None:
"""Updates :attr:`total_free_energy`.
Parameters:
line:
Line of output file to extract property from.
"""
# Use regex to match to numbers.
nums = re.compile(r"[+-]?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?")
string = nums.search(line.rstrip())
self.total_free_energy = float(
string.group(0) # type: ignore[union-attr]
)
def _extract_frequencies(self) -> None:
"""Updates :attr:`frequencies`."""
test = "| Frequency Printout |"
# Use a switch to make sure we are extracting values after the
# final property readout.
switch = False
frequencies = []
for _, line in enumerate(self.output_lines):
if test in line:
# Turn on reading as final frequency printout has
# begun.
switch = True
if " reduced masses (amu)" in line:
# Turn off reading as frequency section is done.
switch = False
if "eigval :" in line and switch is True:
samp = line.rstrip().split(":")[1].split(" ")
split_line = [i for i in samp if i]
frequencies.extend(split_line)
self.frequencies = [float(i) for i in frequencies]
def _extract_ionisation_potential(self, line: str) -> None:
"""Updates :attr:`ionisation_potential`.
Parameters:
line:
Line of output file to extract property from.
"""
# Use regex to match to numbers.
nums = re.compile(r"[+-]?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?")
string = nums.search(line.rstrip())
self.ionisation_potential = float(
string.group(0) # type: ignore[union-attr]
)
def _extract_electron_affinity(self, line: str) -> None:
"""Updates :attr:`electron_affinity`.
Parameters:
line:
Line of output file to extract property from.
"""
# Use regex to match to numbers.
nums = re.compile(r"[+-]?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?")
string = nums.search(line.rstrip())
self.electron_affinity = float(
string.group(0) # type: ignore[union-attr]
)
def _extract_total_sasa(self, line: str) -> None:
"""Updates :attr:`total_sasa`.
Parameters:
line:
Line of output file to extract property from.
"""
# Use regex to match to numbers.
nums = re.compile(r"[+-]?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?")
string = nums.search(line.rstrip())
self.total_sasa = float(string.group(0)) # type: ignore[union-attr]