Source code for stko._internal.calculators.orca_calculators

import logging
import os
import shutil
import subprocess as sp
import uuid
from collections import abc
from pathlib import Path

import stk

from stko._internal.calculators.results.orca_results import OrcaResults
from stko._internal.utilities.exceptions import OptimizerError, PathError

logger = logging.getLogger(__name__)


[docs] class OrcaEnergy: """Uses Orca to calculate energy and other properties. By default, :meth:`get_results` will extract other properties of the :class:`stk.Molecule` passed to :meth:`calculate`, which will be saved in the attributes of :class:`stko.OrcaResults`. All intermediate and output files from Orca are deleted at the end of the job (i.e. the ``.gbw`` file will be deleted) because they can quickly build up to large sizes. The `discard_output` option allows you to keep output files if desired.Additionally, the `write_input_only` option is available for jobs where you would like more customization or to run outside of the Python environment. See Also: * Orca: https://orcaforum.kofo.mpg.de/app.php/portal Parameters: orca_path: The path to the Orca executable. topline: Top line designating the type of calculation. Should start with ``!``. basename: Base name of Orca output files. output_dir: The name of the directory into which files generated during the calculation are written, if ``None`` then :func:`uuid.uuid4` is used. num_cores: The number of cores Orca should use. charge: Formal molecular charge. multiplicity: Multiplicity of system (2S+1), where S is the spin. write_input_only: ``True`` if you only want the input file written and to not have the Orca job run. discard_output: ``True`` if you want to delete auxillary Orca output files such as the ``.gbw`` file. Notes: When running :meth:`calculate`, this calculator changes the present working directory with :func:`os.chdir`. The original working directory will be restored even if an error is raised, so unless multi-threading is being used this implementation detail should not matter. If multi-threading is being used an error could occur if two different threads need to know about the current working directory as :class:`stko.OrcaEnergy` can change it from under them. Note that this does not have any impact on multi-processing, which should always be safe. Examples: .. code-block:: python import stk import stko bb1 = stk.BuildingBlock('NCCNCCN', [stk.PrimaryAminoFactory()]) bb2 = stk.BuildingBlock('O=CCCC=O', [stk.AldehydeFactory()]) polymer = stk.ConstructedMolecule( stk.polymer.Linear( building_blocks=(bb1, bb2), repeating_unit="AB", orientations=[0, 0], num_repeating_units=1 ) ) # Optimize the constructed molecule so that it has a # reasonable structure. opt = stko.UFF() polymer = opt.optimize(polymer) # Calculate energy using Orca. orca = stko.OrcaEnergy( orca_path='/opt/orca/orca', topline='! SP B97-3c', ) orca_results = orca.get_results(polymer) # Extract properties from the energy calculator for a given # molecule. total_energy = orca_results.get_total_energy() If you want the input file written (instead of the job run), you can use the `write_input_only` argument to save the input file in the `output_dir` as `orca_input.inp` with the input xyz file as `input_structure.xyz`. .. code-block:: python # Optimize the constructed molecule so that it has a # reasonable structure. optimizer = stko.ETKDG() polymer = optimizer.optimize(polymer) # Calculate energy using Orca. orca = stko.OrcaEnergy( orca_path='/opt/orca/orca', topline='! SP B97-3c', write_input_only=True, ) orca.get_results(polymer) """ def __init__( # noqa: PLR0913 self, orca_path: Path | str, topline: str, basename: str | None = None, output_dir: Path | str | None = None, num_cores: int = 1, charge: int = 0, multiplicity: int = 1, write_input_only: bool = False, discard_output: bool = True, ) -> None: orca_path = Path(orca_path) self._check_path(orca_path) self._orca_path = orca_path if basename is None: self._basename = f"_{uuid.uuid4().int!s}" else: self._basename = basename self._output_dir = None if output_dir is None else Path(output_dir) self._topline = topline self._num_cores = str(num_cores) self._charge = str(charge) self._multiplicity = multiplicity self._write_input_only = write_input_only self._discard_output = discard_output def _check_path(self, path: Path) -> None: if not path.exists(): msg = f"Orca not found at {path}" raise PathError(msg) if path.is_dir(): msg = f"{path} is a directory" raise PathError(msg) def _write_input_file(self, path: Path, xyz_file: Path) -> None: # Write top line and base name. string = f'{self._topline}\n\n%base "{self._basename}"\n' # Add multiprocessing section. string += ( f"%pal\n nprocs {self._num_cores}\nend\n\n" f"%scf\n MaxIter 2000\nend\n\n" ) # Add geometry section. string += f"* xyzfile {self._charge} {self._multiplicity} {xyz_file}\n" path.write_text(string) def _check_outcome(self, out_file: Path) -> None: if not out_file.exists(): msg = ( f"ORCA: {out_file} does not exist, suggesting the job did " "not run." ) raise OptimizerError(msg) with out_file.open() as f: lines = f.readlines() if "****ORCA TERMINATED NORMALLY****" not in lines[-2]: msg = "ORCA: Orca job did not terminate normally." raise OptimizerError(msg) tmp_files = list(Path().glob(f"{self._basename}*tmp")) if len(tmp_files) > 0: msg = ( "ORCA: tmp files exist, suggesting the job did not complete " "or did not converge." ) raise OptimizerError(msg) def _clean_up(self) -> None: for to_del in Path().glob(f"{self._basename}*"): to_del.unlink() def _run_orca( self, xyz_file: Path, input_file: Path, out_file: Path, init_dir: Path, output_dir: Path, ) -> None: """Runs Orca. Parameters: xyz_file: The name of the input structure ``.xyz`` file. input_file: The name of input file to be written. out_file: The name of output file with Orca results. init_dir: The name of the current working directory. output_dir: The name of the directory into which files generated during the calculation are written. """ cmd = f"{self._orca_path} {input_file}" try: os.chdir(output_dir) self._write_input_file(input_file, xyz_file) if not self._write_input_only: with out_file.open("w") as f: # Note that sp.call will hold the program until # completion of the calculation. sp.call( # noqa: S602 cmd, stdin=sp.PIPE, stdout=f, stderr=sp.PIPE, # Shell is required to run complex arguments. shell=True, ) self._check_outcome(out_file) if self._discard_output: self._clean_up() finally: os.chdir(init_dir)
[docs] def calculate(self, mol: stk.Molecule) -> abc.Generator: if self._output_dir is None: output_dir = Path(str(uuid.uuid4().int)).resolve() else: output_dir = self._output_dir.resolve() if output_dir.exists(): shutil.rmtree(output_dir) output_dir.mkdir(parents=True) init_dir = Path.cwd() xyz_file = output_dir / "input_structure.xyz" input_file = output_dir / "orca_input.inp" out_file = output_dir / "orca_energy.output" mol.write(xyz_file) self._run_orca( xyz_file=xyz_file, input_file=input_file, out_file=out_file, init_dir=init_dir, output_dir=output_dir, ) yield
[docs] def get_results(self, mol: stk.Molecule) -> OrcaResults | None: """Calculate the Orca properties of `mol`. Parameters: mol: The :class:`stk.Molecule` whose energy is to be calculated. Returns: The properties, with units, from Orca calculations or ``None`` if ``write_input_only`` mode. """ if self._output_dir is None: output_dir = Path(str(uuid.uuid4().int)).resolve() else: output_dir = self._output_dir.resolve() out_file = output_dir / "orca_energy.output" if self._write_input_only: next(self.calculate(mol)) return None return OrcaResults( generator=self.calculate(mol), output_file=out_file, )
[docs] def get_energy(self, mol: stk.Molecule) -> float | None: """Calculate the energy of `mol`. Parameters: mol: The :class:`stk.Molecule` whose energy is to be calculated. Returns: The energy or ``None`` if ``write_input_only`` mode. """ results = self.get_results(mol) if results is None: return None return results.get_total_energy()[0]