Source code for stko._internal.calculators.orca_calculators

import logging
import os
import shutil
import subprocess as sp
import uuid
from collections import abc
from pathlib import Path

import stk

from stko._internal.calculators.results.orca_results import OrcaResults
from stko._internal.utilities.exceptions import OptimizerError, PathError

logger = logging.getLogger(__name__)



[docs]
class OrcaEnergy:
    """Uses Orca to calculate energy and other properties.

    By default, :meth:`get_results` will extract other properties of
    the :class:`stk.Molecule` passed to :meth:`calculate`, which
    will be saved in the attributes of :class:`stko.OrcaResults`.

    All intermediate and output files from Orca are deleted at the end
    of the job (i.e. the ``.gbw`` file will be deleted) because they can
    quickly build up to large sizes. The `discard_output` option allows
    you to keep output files if desired.Additionally, the
    `write_input_only` option is available for jobs where you would
    like more customization or to run outside of the Python
    environment.

    See Also:
        * Orca: https://orcaforum.kofo.mpg.de/app.php/portal

    Parameters:
        orca_path:
            The path to the Orca executable.

        topline:
            Top line designating the type of calculation. Should start
            with ``!``.

        basename:
            Base name of Orca output files.

        output_dir:
            The name of the directory into which files generated during
            the calculation are written, if ``None`` then
            :func:`uuid.uuid4` is used.

        num_cores:
            The number of cores Orca should use.

        charge:
            Formal molecular charge.

        multiplicity:
            Multiplicity of system (2S+1), where S is the spin.

        write_input_only:
            ``True`` if you only want the input file written and to not
            have the Orca job run.

        discard_output:
            ``True`` if you want to delete auxillary Orca output files
            such as the ``.gbw`` file.

    Notes:
        When running :meth:`calculate`, this calculator changes the
        present working directory with :func:`os.chdir`. The original
        working directory will be restored even if an error is raised, so
        unless multi-threading is being used this implementation detail
        should not matter.

        If multi-threading is being used an error could occur if two
        different threads need to know about the current working directory
        as :class:`stko.OrcaEnergy` can change it from under them.

        Note that this does not have any impact on multi-processing,
        which should always be safe.

    Examples:
        .. code-block:: python

            import stk
            import stko

            bb1 = stk.BuildingBlock('NCCNCCN', [stk.PrimaryAminoFactory()])
            bb2 = stk.BuildingBlock('O=CCCC=O', [stk.AldehydeFactory()])
            polymer = stk.ConstructedMolecule(
                stk.polymer.Linear(
                    building_blocks=(bb1, bb2),
                    repeating_unit="AB",
                    orientations=[0, 0],
                    num_repeating_units=1
                )
            )

            # Optimize the constructed molecule so that it has a
            # reasonable structure.
            opt = stko.UFF()
            polymer = opt.optimize(polymer)

            # Calculate energy using Orca.
            orca = stko.OrcaEnergy(
                orca_path='/opt/orca/orca',
                topline='! SP B97-3c',
            )

            orca_results = orca.get_results(polymer)

            # Extract properties from the energy calculator for a given
            # molecule.
            total_energy = orca_results.get_total_energy()

        If you want the input file written (instead of the job run), you
        can use the `write_input_only` argument to save the input file
        in the `output_dir` as `orca_input.inp` with the input xyz file as
        `input_structure.xyz`.

        .. code-block:: python

            # Optimize the constructed molecule so that it has a
            # reasonable structure.
            optimizer = stko.ETKDG()
            polymer = optimizer.optimize(polymer)

            # Calculate energy using Orca.
            orca = stko.OrcaEnergy(
                orca_path='/opt/orca/orca',
                topline='! SP B97-3c',
                write_input_only=True,
            )

            orca.get_results(polymer)


    """

    def __init__(  # noqa: PLR0913
        self,
        orca_path: Path | str,
        topline: str,
        basename: str | None = None,
        output_dir: Path | str | None = None,
        num_cores: int = 1,
        charge: int = 0,
        multiplicity: int = 1,
        write_input_only: bool = False,
        discard_output: bool = True,
    ) -> None:
        orca_path = Path(orca_path)
        self._check_path(orca_path)
        self._orca_path = orca_path
        if basename is None:
            self._basename = f"_{uuid.uuid4().int!s}"
        else:
            self._basename = basename
        self._output_dir = None if output_dir is None else Path(output_dir)
        self._topline = topline
        self._num_cores = str(num_cores)
        self._charge = str(charge)
        self._multiplicity = multiplicity
        self._write_input_only = write_input_only
        self._discard_output = discard_output

    def _check_path(self, path: Path) -> None:
        if not path.exists():
            msg = f"Orca not found at {path}"
            raise PathError(msg)
        if path.is_dir():
            msg = f"{path} is a directory"
            raise PathError(msg)

    def _write_input_file(self, path: Path, xyz_file: Path) -> None:
        # Write top line and base name.
        string = f'{self._topline}\n\n%base "{self._basename}"\n'

        # Add multiprocessing section.
        string += (
            f"%pal\n   nprocs {self._num_cores}\nend\n\n"
            f"%scf\n   MaxIter 2000\nend\n\n"
        )
        # Add geometry section.
        string += f"* xyzfile {self._charge} {self._multiplicity} {xyz_file}\n"

        path.write_text(string)

    def _check_outcome(self, out_file: Path) -> None:
        if not out_file.exists():
            msg = (
                f"ORCA: {out_file} does not exist, suggesting the job did "
                "not run."
            )
            raise OptimizerError(msg)

        with out_file.open() as f:
            lines = f.readlines()
            if "****ORCA TERMINATED NORMALLY****" not in lines[-2]:
                msg = "ORCA: Orca job did not terminate normally."
                raise OptimizerError(msg)

        tmp_files = list(Path().glob(f"{self._basename}*tmp"))
        if len(tmp_files) > 0:
            msg = (
                "ORCA: tmp files exist, suggesting the job did not complete "
                "or did not converge."
            )
            raise OptimizerError(msg)

    def _clean_up(self) -> None:
        for to_del in Path().glob(f"{self._basename}*"):
            to_del.unlink()

    def _run_orca(
        self,
        xyz_file: Path,
        input_file: Path,
        out_file: Path,
        init_dir: Path,
        output_dir: Path,
    ) -> None:
        """Runs Orca.

        Parameters:
            xyz_file:
                The name of the input structure ``.xyz`` file.

            input_file:
                The name of input file to be written.

            out_file:
                The name of output file with Orca results.

            init_dir:
                The name of the current working directory.

            output_dir:
                The name of the directory into which files generated during
                the calculation are written.

        """
        cmd = f"{self._orca_path} {input_file}"

        try:
            os.chdir(output_dir)
            self._write_input_file(input_file, xyz_file)
            if not self._write_input_only:
                with out_file.open("w") as f:
                    # Note that sp.call will hold the program until
                    # completion of the calculation.
                    sp.call(  # noqa: S602
                        cmd,
                        stdin=sp.PIPE,
                        stdout=f,
                        stderr=sp.PIPE,
                        # Shell is required to run complex arguments.
                        shell=True,
                    )
                self._check_outcome(out_file)
                if self._discard_output:
                    self._clean_up()
        finally:
            os.chdir(init_dir)


[docs]
    def calculate(self, mol: stk.Molecule) -> abc.Generator:
        if self._output_dir is None:
            output_dir = Path(str(uuid.uuid4().int)).resolve()
        else:
            output_dir = self._output_dir.resolve()

        if output_dir.exists():
            shutil.rmtree(output_dir)
        output_dir.mkdir(parents=True)

        init_dir = Path.cwd()
        xyz_file = output_dir / "input_structure.xyz"
        input_file = output_dir / "orca_input.inp"
        out_file = output_dir / "orca_energy.output"
        mol.write(xyz_file)
        self._run_orca(
            xyz_file=xyz_file,
            input_file=input_file,
            out_file=out_file,
            init_dir=init_dir,
            output_dir=output_dir,
        )
        yield



[docs]
    def get_results(self, mol: stk.Molecule) -> OrcaResults | None:
        """Calculate the Orca properties of `mol`.

        Parameters:
            mol:
                The :class:`stk.Molecule` whose energy is to be calculated.

        Returns:
            The properties, with units, from Orca calculations or ``None``
            if ``write_input_only`` mode.

        """
        if self._output_dir is None:
            output_dir = Path(str(uuid.uuid4().int)).resolve()
        else:
            output_dir = self._output_dir.resolve()

        out_file = output_dir / "orca_energy.output"

        if self._write_input_only:
            next(self.calculate(mol))
            return None
        return OrcaResults(
            generator=self.calculate(mol),
            output_file=out_file,
        )



[docs]
    def get_energy(self, mol: stk.Molecule) -> float | None:
        """Calculate the energy of `mol`.

        Parameters:
            mol:
                The :class:`stk.Molecule` whose energy is to be calculated.

        Returns:
            The energy or ``None`` if ``write_input_only`` mode.

        """
        results = self.get_results(mol)
        if results is None:
            return None
        return results.get_total_energy()[0]