Source code for stko._internal.optimizers.macromodel

import gzip
import logging
import re
import subprocess as sp
import time
from pathlib import Path
from uuid import uuid4

import rdkit.Chem.AllChem as rdkit  # noqa: N813
import stk

from stko._internal.internal_types import MoleculeT
from stko._internal.optimizers.optimizers import Optimizer
from stko._internal.optimizers.utilities import (
    MAEExtractor,
    mol_from_mae_file,
    move_generated_macromodel_files,
)
from stko._internal.utilities.exceptions import (
    ConversionError,
    ForceFieldError,
    InputError,
    LewisStructureError,
    OptimizerError,
    PathError,
)

logger = logging.getLogger(__name__)


class MacroModel(Optimizer):
    """Base class for MacroModel optimzers.

    Parameters:
        macromodel_path:
            The full path of the Schrodinger suite within the user's
            machine. For example, on a Linux machine this may be
            something like ``'/opt/schrodinger2017-2'``.

        output_dir:
            The name of the directory into which files generated during
            the optimization are written, if ``None`` then
            :func:`uuid.uuid4` is used.

        timeout:
            The amount in seconds the optimization is allowed to run
            before being terminated. ``None`` means there is no
            timeout.

        force_field:
            The number of the force field to be used.
            Force field arguments can be the following::

            +-------------------------+
            |  1  | MM2               |
            +-------------------------+
            |  2  | MM3               |
            +-------------------------+
            |  3  | AMBER             |
            +-------------------------+
            |  4  | AMBER94           |
            +-------------------------+
            |  5  | OPLSA             |
            +-------------------------+
            |  10 | MMFF94 and MMFF94s|
            +-------------------------+
            |  14 | OPLS_2005         |
            +-------------------------+
            |  16 | OPLS3e            |
            +-------------------------+

        maximum_iterations:
            The maximum number of iterations done during the
            optimization. Cannot be more than ``999999``.

        minimum_gradient:
            The gradient at which optimization is stopped.
            Cannot be less than ``0.0001``.


    """

    def __init__(  # noqa: PLR0913
        self,
        macromodel_path: Path | str,
        output_dir: Path | str | None,
        timeout: float | None,
        force_field: int,
        maximum_iterations: int,
        minimum_gradient: float,
    ) -> None:
        self._macromodel_path = Path(macromodel_path)
        self._output_dir = None if output_dir is None else Path(output_dir)
        self._timeout = timeout
        self._force_field = force_field
        self._maximum_iterations = maximum_iterations
        self._minimum_gradient = minimum_gradient
        super().__init__()

    def _run_bmin(self, mol: stk.Molecule, run_name: str) -> None:
        """Run an optimization using bmin.

        Parameters:
            mol:
                The molecule being optimized.

            run_name:
                The name of the run. The files generated by this run
                will have this name.

        Raises:
            :class:`OptimizationError`:
                if the optimization failed for some unspecified.

            :class:`ForceFieldError`:
                if the force field could not be used with the molecule.

            :class:`LewisStructureError`:
                if Lewis structure of the molecule had issues.

            :class:`PathError`:
                if an invalid MacroModel path is being used.

        """
        logger.info('Running bmin on "%s".', mol)

        # To run MacroModel a command is issued to the console via
        # ``subprocess.Popen``. The command is the full path of the
        # ``bmin`` program. ``bmin`` is located in the Schrodinger
        # installation folder.
        log_file = Path(f"{run_name}.log")
        opt_app = self._macromodel_path / "bmin"
        # The first member of the list is the command, the following
        # ones are any additional arguments.

        opt_cmd = [str(opt_app), run_name, "-WAIT", "-LOCAL"]

        incomplete = True
        while incomplete:
            process = sp.Popen(  # noqa: S603
                opt_cmd,
                stdout=sp.PIPE,
                stderr=sp.STDOUT,
                universal_newlines=True,
            )
            try:
                output, _ = process.communicate(timeout=self._timeout)

            except sp.TimeoutExpired:
                logger.warning(
                    "Minimization took too long and was terminated "
                    'by force on "%s".',
                    mol,
                )
                self._kill_bmin(run_name)
                output = ""

            logger.debug('Output of bmin on "%s" was: %s.', mol, output)

            with log_file.open() as log:
                log_content = log.read()

            # Check the log for error reports.
            error1 = "termination due to error condition           21-"
            if error1 in log_content:
                msg = "Macromodel: bmin crashed. See log file."
                raise OptimizerError(msg)

            error2 = "FATAL do_nosort_typing: NO MATCH found for atom "
            if error2 in log_content:
                msg = "Macromodel: The log implies the force field failed."
                raise ForceFieldError(msg)

            error3 = (
                "FATAL gen_lewis_structure() -> None: "
                "could not find best Lewis structure"
            )
            error4 = (
                "skipping input structure  "
                "due to forcefield interaction errors"
            )
            if error3 in log_content and error4 in log_content:
                msg = "Macromodel: bmin failed due to poor Lewis structure."
                raise LewisStructureError(msg)

            if "MDYN error encountered" in log_content:
                msg = "Macromodel: MD error during optimization."
                raise OptimizerError(msg)

            # If optimization fails because a wrong Schrodinger path
            # was given, raise.
            if "The system cannot find the path specified" in output:
                msg = "Macromodel: Invalid Schrodinger path given to bmin."
                raise PathError(msg)

            # If optimization fails because the license is not found,
            # rerun the function.
            if self._license_found(run_name, output, mol):
                incomplete = False

        # Make sure the .maegz file created by the optimization is
        # present.
        maegz = Path(f"{run_name}-out.maegz")
        self._wait_for_file(maegz)
        if not log_file.exists() or not maegz.exists() or log_content == "":
            msg = (
                "Macromodel: The .log and/or .maegz files were not created"
                " correctly."
            )
            raise OptimizerError(msg)

    def _kill_bmin(self, run_name: str) -> None:
        """Kill bmin.

        Parameters:
            mol:
                The molecule being optimized.

            run_name:
                The name of the run. The files generated by this run will
                have this name.

        """
        name = re.split(r"\\|/", run_name)[-1]
        app = self._macromodel_path / "jobcontrol"
        cmd = [str(app), "-stop", name]

        incomplete = True
        while incomplete:
            out = sp.run(  # noqa: S603
                cmd,
                stdout=sp.PIPE,
                stderr=sp.STDOUT,
                text=True,
                check=False,
            )

            # Keep re-running the function until license is found.
            if self._license_found(run_name, out.stdout):
                incomplete = False

        # This loop causes the function to wait until the job has been
        # killed via job control. This means the output files will have
        # been written by the time the function exits. Essentially the
        # loop continues until the job is no longer found by
        # "./jobcontrol -list"
        cmd = [str(app), "-list"]
        output = name
        start = time.time()
        while name in output:
            output = sp.run(  # noqa: S603
                cmd,
                stdout=sp.PIPE,
                stderr=sp.STDOUT,
                text=True,
                check=False,
            ).stdout
            if time.time() - start > 600:  # noqa: PLR2004
                break

    def _license_found(
        self,
        run_name: str,
        output: str,
        mol: stk.Molecule | None = None,
    ) -> bool:
        """Check to see if minimization failed due to a missing license.

        The user can be notified of this in one of two ways. Sometimes
        the output of the submission contains the message informing
        that the license was not found and in other cases it will be
        the log file. This function checks both of these sources for
        this message.

        Parameters:
            run_name:
                The name of the run. The files generated by this run will
                have this name.

            output:
                The output from submitting the minimization of the
                structure to bmin.

            mol:
                The molecule being optimized. If the ``.log`` file is not
                to be checked, the default ``None`` should be used.

        Returns:
            ``True`` if the license was found. ``False`` if the
            minimization did not occur due to a missing license.

        """
        if "Could not check out a license for mmlibs" in output:
            return False
        if mol is None:
            return True

        # To check if the log file mentions a missing license file open
        # the log file and scan for the appropriate string.

        # Check if the file exists first. If not, this is often means
        # the calculation must be redone so return False anyway.
        with Path(f"{run_name}.log").open() as f:
            log_file = f.read()

        return "Could not check out a license for mmlibs" not in log_file

    @staticmethod
    def _get_com_line(  # noqa: PLR0913
        arg1: str,
        arg2: int,
        arg3: int,
        arg4: int,
        arg5: int,
        arg6: float,
        arg7: float,
        arg8: float,
        arg9: int,
    ) -> str:
        return (
            f" {arg1:<5}{arg2:>7}{arg3:>7}"
            f"{arg4:>7}{arg5:>7}{arg6:>11.4f}"
            f"{arg7:>11.4f}{arg8:>11.4f}{arg9:>11.4f}"
        )

    def _run_structconvert(self, input_path: Path, output_path: Path) -> None:
        """Use structconvert to change file type.

        Parameters:
            input_path:
                The path to the input file.

            output_path:
                The path to the output file.

        """
        convrt_app = self._macromodel_path / "utilities" / "structconvert"
        convrt_cmd = [str(convrt_app), str(input_path), str(output_path)]

        incomplete = True
        while incomplete:
            # Execute the file conversion.
            try:
                convrt_return = sp.run(  # noqa: S603
                    convrt_cmd,
                    stdout=sp.PIPE,
                    stderr=sp.STDOUT,
                    text=True,
                    check=False,
                )

            # If conversion fails because a wrong Schrodinger path was
            # given, raise.
            except FileNotFoundError as error:
                msg = (
                    "Macromodel: Wrong Schrodinger path supplied to "
                    "structconvert."
                )
                raise PathError(msg) from error

            if "File does not exist" in convrt_return.stdout:
                msg = (
                    f"Macromodel: structconvert input file, {input_path}, "
                    f"missing. Console output was "
                    f"{convrt_return.stdout}"
                )
                raise ConversionError(msg)

            # Keep re-running the function until license is found.
            if self._license_found(
                str(input_path.parent / input_path.stem), convrt_return.stdout
            ):
                incomplete = False

        # If force field failed, raise.
        if "number 1" in convrt_return.stdout:
            msg = f"Macromodel: {convrt_return.stdout}"
            raise ForceFieldError(msg)

        self._wait_for_file(output_path)
        if not output_path.exists():
            msg = (
                f"Macromodel: Conversion output file {output_path} was "
                "not found."
                f" Console output was {convrt_return.stdout}."
            )
            raise ConversionError(msg)

    def _wait_for_file(self, path: Path, timeout: float = 10) -> None:
        """Wait until a given file exists or `timeout` expires.

        Parameters:
            path:
                The full path of the file which should be waited for.

            timeout:
                The number of seconds before the function stops waiting and
                returns.

        """
        t_start = time.time()
        tick = 0
        while True:
            time_taken = time.time() - t_start
            if divmod(time_taken, 5)[0] == tick + 1:
                logger.warning('Waiting for "%s".', path)
                tick += 1

            if path.exists() or time_taken > timeout:
                break

    def _convert_maegz_to_mae(self, run_name: str) -> None:
        """Convert a ``.maegz`` file to a ``.mae`` file.

        Parameters:
            run_name:
                The name of the run. The files generated by this run will
                have this name.

        """
        with (
            gzip.open(f"{run_name}-out.maegz") as gz_file,
            Path(f"{run_name}.mae").open("wb") as f,
        ):
            f.write(gz_file.read())


[docs] class MacroModelForceField(MacroModel): """Uses MacroModel force fields to optimize molecules. Parameters: macromodel_path: The full path of the Schrodinger suite within the user's machine. For example, on a Linux machine this may be something like ``'/opt/schrodinger2017-2'``. output_dir: The name of the directory into which files generated during the optimization are written, if ``None`` then :func:`uuid.uuid4` is used. restricted: If ``True`` then an optimization is performed only on bonds created during the `ConstructedMolecule` creation. All building block bonds will be fixed. If ``False`` then all bonds are optimized. timeout: The amount in seconds the optimization is allowed to run before being terminated. ``None`` means there is no timeout. force_field: The number of the force field to be used. Force field arguments can be the following: +-------------------------+ | 1 | MM2 | +-------------------------+ | 2 | MM3 | +-------------------------+ | 3 | AMBER | +-------------------------+ | 4 | AMBER94 | +-------------------------+ | 5 | OPLSA | +-------------------------+ | 10 | MMFF94 and MMFF94s| +-------------------------+ | 14 | OPLS_2005 | +-------------------------+ | 16 | OPLS3e | +-------------------------+ maximum_iterations: The maximum number of iterations done during the optimization. Cannot be more than ``999999``. minimum_gradient: The gradient at which optimization is stopped. Cannot be less than ``0.0001``. Examples: Optimisation of any :class:`stk.Molecule` is possible with `restricted=False`. .. code-block:: python import stk import stko mol = stk.BuildingBlock('NCCCN') optimizer = stko.MacroModelForceField( macromodel_path='/path/to/macromodel/', ) mol = optimizer.optimize(mol) Optimisation of `long bonds` only within :class:`stk.ConstructedMolecule` is possible with `restricted=True`. Generally, this means only bonds created during the construction process will be optimized, and those belonging to building blocks will be fixed. If the molecule is not a `ConstructedMolecule`, no positions will be optimized. .. code-block:: python import stk import stko bb1 = stk.BuildingBlock('NCCNCCN', [stk.PrimaryAminoFactory()]) bb2 = stk.BuildingBlock('O=CCCC=O', [stk.AldehydeFactory()]) polymer = stk.ConstructedMolecule( stk.polymer.Linear( building_blocks=(bb1, bb2), repeating_unit="AB", orientations=[0, 0], num_repeating_units=1 ) ) optimizer = stko.MacroModelForceField( macromodel_path='/path/to/macromodel/', restricted=True, ) polymer = optimizer.optimize(polymer) """ def __init__( # noqa: PLR0913 self, macromodel_path: Path | str, output_dir: str | None = None, restricted: bool = False, timeout: float | None = None, force_field: int = 16, maximum_iterations: int = 2500, minimum_gradient: float = 0.05, ) -> None: self._check_params( minimum_gradient=minimum_gradient, maximum_iterations=maximum_iterations, ) self._restricted = restricted super().__init__( macromodel_path=macromodel_path, output_dir=output_dir, force_field=force_field, maximum_iterations=maximum_iterations, minimum_gradient=minimum_gradient, timeout=timeout, ) @staticmethod def _check_params( minimum_gradient: float, maximum_iterations: int ) -> None: """Check if the optimization parameters are valid for MacroModel. Parameters: minimum_gradient: The gradient at which optimization is stopped. Cannot be less than ``0.0001``. maximum_iterations: The maximum number of iterations done during the optimization. Cannot be more than ``999999``. Raises: :class:`.InputError`: If the parameters cannot be converted into a valid ``.com`` file entry. """ if minimum_gradient < 0.0001: # noqa: PLR2004 msg = "Macromodel: Convergence gradient (< 0.0001) is too small." raise InputError(msg) if maximum_iterations > 999999: # noqa: PLR2004 msg = "Macromodel: Number of iterations (> 999999) is too high." raise InputError(msg) def _generate_com(self, mol: stk.Molecule, run_name: str) -> None: """Create a ``.com`` file for a MacroModel optimization. The created ``.com`` file fixes all bond parameters which were not added by :meth:`~.Topology.construct`. This means all bond distances, bond angles and torsional angles are fixed, except for cases where it involves a bond added by :meth:`.Topology.construct`. This fixing is implemented by creating a ``.com`` file with various "FX" commands written within its body. Parameters: mol: The molecule which is to be optimized. run_name: The name of the run. The files generated by this run will have this name. """ logger.debug('Creating .com file for "%s".', mol) # This is the body of the ``.com`` file. The line that begins # and ends with exclamation lines is replaced with the various # commands that fix bond distances and angles. line1 = ("FFLD", self._force_field, 1, 0, 0, 1, 0, 0, 0) line2 = ("BGIN", 0, 0, 0, 0, 0, 0, 0, 0) line3 = ("READ", 0, 0, 0, 0, 0, 0, 0, 0) line4 = ("CONV", 2, 0, 0, 0, self._minimum_gradient, 0, 0, 0) line5 = ("MINI", 1, 0, self._maximum_iterations, 0, 0, 0, 0, 0) line6 = ("END", 0, 1, 0, 0, 0, 0, 0, 0) com_block = "\n".join( [ self._get_com_line(*line1), self._get_com_line(*line2), self._get_com_line(*line3), "!!!BLOCK_OF_FIXED_PARAMETERS_COMES_HERE!!!", self._get_com_line(*line4), self._get_com_line(*line5), self._get_com_line(*line6), ] ) # If `restricted` is ``False`` do not add a fix block. if not self._restricted: com_block = com_block.replace( "!!!BLOCK_OF_FIXED_PARAMETERS_COMES_HERE!!!\n", "" ) else: # This function adds all the lines which fix bond distances # and angles into com_block. com_block = self._fix_params(mol, com_block) # Writes the .com file. with Path(f"{run_name}.com").open("w") as com: # The first line holds the .mae file containing the # molecule to be optimized. com.write(f"{run_name}.mae\n") # The second line holds the name of the output file of the # optimization. com.write(f"{run_name}-out.maegz\n") # Next is the body of the .com file. com.write(com_block)
[docs] def optimize(self, mol: MoleculeT) -> MoleculeT: run_name = str(uuid4().int) output_dir = ( Path(run_name) if self._output_dir is None else self._output_dir ) mol_path = Path(f"{run_name}.mol") mae_path = Path(f"{run_name}.mae") # First write a .mol file of the molecule. mol.write(mol_path) # MacroModel requires a ``.mae`` file as input. self._run_structconvert(mol_path, mae_path) # generate the ``.com`` file for the MacroModel run. self._generate_com(mol, run_name) # Run the optimization. self._run_bmin(mol, run_name) # Get the ``.maegz`` optimization output to a ``.mae``. self._convert_maegz_to_mae(run_name) rdkit_opt_mol = mol_from_mae_file(mae_path) mol = mol.with_position_matrix( rdkit_opt_mol.GetConformer().GetPositions() ) move_generated_macromodel_files(run_name, output_dir) return mol
def _fix_params(self, mol: stk.Molecule, com: str) -> str: """Fix bond distances and angles in ``.com`` file. For each bond distance, bond angle and torsional angle that does not involve a bond created by :meth:`~.Topology.construct`, a "FX" command is added to the body of the ``.com`` file. These lines replace the filler line in the main string. Parameters: mol: The molecule which is to be optimized. com: The body of the ``.com`` file which is to have fix commands added. Returns: A string holding the body of the ``.com`` file with instructions to fix the various bond distances and angles as described in the docstring. """ fix_block = "" # Add lines that fix the bond distance. fix_block = self._fix_distances(mol, fix_block) # Add lines that fix the bond angles. fix_block = self._fix_bond_angles(mol, fix_block) # Add lines that fix the torsional angles. fix_block = self._fix_torsional_angles(mol, fix_block) return com.replace( "!!!BLOCK_OF_FIXED_PARAMETERS_COMES_HERE!!!\n", fix_block ) def _fix_distances(self, mol: stk.Molecule, fix_block: str) -> str: """Add lines fixing bond distances to ``.com`` body. Only bond distances which do not involve bonds created during construction are fixed. Parameters: mol: The molecule to be optimized. fix_block: A string holding fix commands in the ``.com`` file. Returns: A string holding fix commands in the ``.com`` file. Raises: :class:`.InputError`: if molecule is not :class:`stk.ConstructedMolecule`. """ if not isinstance(mol, stk.ConstructedMolecule): msg = ( f"{mol} needs to be a ConstructedMolecule for " f"`restricted`==`True` (it is {self._restricted})" ) raise InputError(msg) # Identify bonds created by ``stk`` by checking if the # ``stk.BuildingBlock`` associated with the bond is ``None``. bonder_ids = { atom_id for bond_info in mol.get_bond_infos() # type: ignore[attr-defined] if bond_info.get_building_block() is None for atom_id in ( bond_info.get_bond().get_atom1().get_id(), bond_info.get_bond().get_atom2().get_id(), ) } # Go through all the bonds in the ``stk.Molecule`` . If an # ``stk.BuildingBlock`` associated with the bond is ``None``, # add a fix line to the ``fix_block``. # If the bond is connected to an atom present in `bonder_ids`, # go to the next bond. This is because a bond between atoms, # whose IDs are present in `bonder_ids`, # was added during construction and should therefore not # be fixed. for bond in mol.get_bonds(): if ( bond.get_atom1().get_id() in bonder_ids and bond.get_atom2().get_id() in bonder_ids ): continue atom1_id = bond.get_atom1().get_id() atom2_id = bond.get_atom2().get_id() # Make sure that the indices are increased by 1 in the .mae # file. atom1_id += 1 atom2_id += 1 args = ("FXDI", atom1_id, atom2_id, 0, 0, 99999, 0, 0, 0) fix_block += self._get_com_line(*args) fix_block += "\n" return fix_block def _fix_bond_angles(self, mol: stk.Molecule, fix_block: str) -> str: """Add lines fixing bond angles to the ``.com`` body. All bond angles of the molecule are fixed. Parameters: mol: The molecule to be optimized. fix_block: A string holding fix commands in the ``.com`` file. Returns: A string holding fix commands in the ``.com`` file. """ paths = rdkit.FindAllPathsOfLengthN( mol=mol.to_rdkit_mol(), length=3, useBonds=False, useHs=True, ) for atom_ids in paths: file_atom_ids = (i + 1 for i in atom_ids) args = ("FXBA", *file_atom_ids, 99999, 0, 0, 0, 0) fix_block += self._get_com_line(*args) fix_block += "\n" return fix_block def _fix_torsional_angles(self, mol: stk.Molecule, fix_block: str) -> str: """Add lines fixing torsional bond angles to the ``.com`` body. All torsional angles of the molecule are fixed. Parameters: mol: The molecule to be optimized. fix_block: A string holding fix commands in the ``.com`` file. Returns: A string holding fix commands in the ``.com`` file. """ paths = rdkit.FindAllPathsOfLengthN( mol=mol.to_rdkit_mol(), length=4, useBonds=False, useHs=True, ) for atom_ids in paths: file_atom_ids = (i + 1 for i in atom_ids) args = ("FXTA", *file_atom_ids, 99999, 361, 0, 0) fix_block += self._get_com_line(*args) fix_block += "\n" return fix_block
[docs] class MacroModelMD(MacroModel): """Runs a molecular dynamics conformer search using MacroModel. Parameters: macromodel_path: The full path of the Schrodinger suite within the user's machine. For example, on a Linux machine this may be something like ``'/opt/schrodinger2017-2'``. output_dir: The name of the directory into which files generated during the optimization are written, if ``None`` then :func:`uuid.uuid4` is used. timeout: The amount in seconds the MD is allowed to run before being terminated. ``None`` means there is no timeout. force_field: The number of the force field to be used. temperature: The temperature in Kelvin at which the MD is run. Cannot be more than ``99999.99``. conformers: The number of conformers sampled and optimized from the MD. Cannot be more than ``9999``. simulation_time: The simulation time in ``ps`` of the MD. Cannot be more than ``999999.99``. time_step: The time step in ``fs`` for the MD. Cannot be more than ``99999.99``. eq_time: The equilibration time in ``ps`` before the MD is run. Cannot be more than ``999999.99``. maximum_iterations: The maximum number of iterations done during the optimization. Cannot be more than ``999999``. minimum_gradient: The gradient at which optimization is stopped. Cannot be less than ``0.0001``. restricted_bonds: A :class:`set` of the form .. code-block:: python restricted_bonds = { frozenset((0, 10)), frozenset((3, 14)), frozenset((5, 6)) } Where each :class:`frozenset` defines which bonds should have a fixed length via the atom ids of atoms in the bond. restricted_bond_angles: A :class:`set` of the form .. code-block:: python restricted_bonds = { frozenset((0, 10, 12)), frozenset((3, 14, 7)), frozenset((5, 8, 2)) } Where each :class:`frozenset` defines which bond angles should have a fixed size via the atom ids of atoms in the bond angle. restricted_torsional_angles: A :class:`set` of the form .. code-block:: python restricted_bonds = { frozenset((0, 10, 12, 3)), frozenset((3, 14, 7, 4)), frozenset((5, 8, 2, 9)) } Where each :class:`frozenset` defines which torsional angles should have a fixed size via the atom ids of atoms in the torsional angle. Examples: Molecular dynamics can be run on any :class:`stk.Molecule` using this class. Restrictions can be applied, but are not by default. This class collects a series of conformers from the trajectory, optimises them, then returns the lowest energy conformer. .. code-block:: python import stk import stko mol = stk.BuildingBlock('NCCCN') optimizer = stko.MacroModelMD( macromodel_path='/path/to/macromodel/', conformers=40, ) mol = optimizer.optimize(mol) """ def __init__( # noqa: PLR0913 self, macromodel_path: str, output_dir: str | None = None, timeout: float | None = None, force_field: int = 16, temperature: float = 750, conformers: int = 50, time_step: float = 1, eq_time: float = 10, simulation_time: float = 200, maximum_iterations: int = 2500, minimum_gradient: float = 0.05, restricted_bonds: set | None = None, restricted_bond_angles: set | None = None, restricted_torsional_angles: set | None = None, ) -> None: if restricted_bonds is None: restricted_bonds = set() if restricted_bond_angles is None: restricted_bond_angles = set() if restricted_torsional_angles is None: restricted_torsional_angles = set() self._check_params( temperature=temperature, conformers=conformers, simulation_time=simulation_time, time_step=time_step, eq_time=eq_time, minimum_gradient=minimum_gradient, maximum_iterations=maximum_iterations, ) self._temperature = temperature self._conformers = conformers self._time_step = time_step self._eq_time = eq_time self._simulation_time = simulation_time self._restricted_bonds = restricted_bonds self._restricted_bond_angles = restricted_bond_angles self._restricted_torsional_angles = restricted_torsional_angles # Negative simulation time is interpreted as times 100 ps. if simulation_time > 99999.99: # noqa: PLR2004 self._sim_time = -simulation_time / 100 else: self._sim_time = simulation_time # Negative equilibration time is interpreted as times 100 ps. if eq_time > 99999.99: # noqa: PLR2004 self._eq_time = -eq_time / 100 else: self._eq_time = eq_time super().__init__( macromodel_path=macromodel_path, output_dir=output_dir, timeout=timeout, force_field=force_field, maximum_iterations=maximum_iterations, minimum_gradient=minimum_gradient, ) @staticmethod def _check_params( # noqa: PLR0913 temperature: float, conformers: int, simulation_time: float, time_step: float, eq_time: float, minimum_gradient: float, maximum_iterations: int, ) -> None: """Check if the optimization parameters are valid for MacroModel. Parameters: temperature: The temperature in Kelvin at which the MD is run. Cannot be more than ``99999.99``. conformers': The number of conformers sampled and optimized from the MD. Cannot be more than ``9999``. simulation_time: The simulation time in ``ps`` of the MD. Cannot be more than ``999999.99``. time_step: The time step in ``fs`` for the MD. Cannot be more than ``99999.99``. eq_time: The equilibriation time in ``ps`` before the MD is run. Cannot be more than ``999999.99``. minimum_gradient: The gradient at which optimization is stopped. Cannot be less than ``0.0001``. maximum_iterations: The maximum number of iterations done during the optimization. Cannot be more than ``999999``. Raises: :class:`.MacroModelInputError`: If the parameters cannot be converted into a valid ``.com`` file entry. """ if temperature > 99999.99: # noqa: PLR2004 msg = "Macromodel: Supplied temperature (> 99999 K) is too high." raise InputError(msg) if conformers > 9999: # noqa: PLR2004 msg = ( "Macromodel: Supplied number of conformers (> 9999) is " "too high." ) raise InputError(msg) if simulation_time > 999999.99: # noqa: PLR2004 msg = ( "Macromodel: Supplied simulation time (> 999999 ps) is " "too long." ) raise InputError(msg) if time_step > 99999.99: # noqa: PLR2004 msg = "Macromodel: Supplied time step (> 99999 fs) is too high." raise InputError(msg) if eq_time > 999999.99: # noqa: PLR2004 msg = "Macromodel: Supplied eq time (> 999999 ps) is too long." raise InputError(msg) if minimum_gradient < 0.0001: # noqa: PLR2004 msg = "Macromodel: Convergence gradient (< 0.0001) is too small." raise InputError(msg) if maximum_iterations > 999999: # noqa: PLR2004 msg = "Macromodel: Number of iterations (> 999999) is too high." raise InputError(msg) def _generate_com(self, mol: stk.Molecule, run_name: str) -> None: """Create a ``.com`` file for a MacroModel optimization. The created ``.com`` file fixes all bond parameters which were not added by :meth:`~.Topology.construct`. This means all bond distances, bond angles and torsional angles are fixed, except for cases where it involves a bond added by :meth:`~.Topology.construct`. This fixing is implemented by creating a ``.com`` file with various "FX" commands written within its body. Parameters: mol: The molecule which is to be optimized. """ logger.debug('Creating .com file for "%s".', mol) # Define some short aliases to keep the following lines neat. temp = self._temperature sim_time = self._sim_time tstep = self._time_step eq_time = self._eq_time line1 = ("FFLD", self._force_field, 1, 0, 0, 1, 0, 0, 0) line2 = ("READ", 0, 0, 0, 0, 0, 0, 0, 0) line3 = ("MDIT", 0, 0, 0, 0, temp, 0, 0, 0) line4 = ("MDYN", 0, 0, 0, 0, tstep, eq_time, temp, 0) line5 = ("MDSA", self._conformers, 0, 0, 0, 0, 0, 1, 0) line6 = ("MDYN", 1, 0, 0, 0, tstep, sim_time, temp, 0) line7 = ("WRIT", 0, 0, 0, 0, 0, 0, 0, 0) line8 = ("RWND", 0, 1, 0, 0, 0, 0, 0, 0) line9 = ("BGIN", 0, 0, 0, 0, 0, 0, 0, 0) line10 = ("READ", -2, 0, 0, 0, 0, 0, 0, 0) line11 = ("CONV", 2, 0, 0, 0, self._minimum_gradient, 0, 0, 0) line12 = ( "MINI", 1, 0, self._maximum_iterations, 0, 0, 0, 0, 0, ) line13 = ("END", 0, 1, 0, 0, 0, 0, 0, 0) com_block = "\n".join( [ self._get_com_line(*line1), self._get_com_line(*line2), "!!!BLOCK_OF_FIXED_PARAMETERS_COMES_HERE!!!", self._get_com_line(*line3), self._get_com_line(*line4), self._get_com_line(*line5), self._get_com_line(*line6), self._get_com_line(*line7), self._get_com_line(*line8), self._get_com_line(*line9), self._get_com_line(*line10), "!!!BLOCK_OF_FIXED_PARAMETERS_COMES_HERE!!!", self._get_com_line(*line11), self._get_com_line(*line12), self._get_com_line(*line13), ] ) com_block = self._fix_params(mol, com_block) # Generate the com file containing the info for the run with Path(f"{run_name}.com").open("w") as com: # name of the macromodel file com.write(f"{run_name}.mae\n") # name of the output file com.write(f"{run_name}-out.maegz\n") # details of the macromodel run com.write(com_block)
[docs] def optimize(self, mol: MoleculeT) -> MoleculeT: run_name = str(uuid4().int) output_dir = ( Path(run_name) if self._output_dir is None else self._output_dir ) mol_path = Path(f"{run_name}.mol") # First write a .mol file of the molecule. mol.write(mol_path) # MacroModel requires a ``.mae`` file as input. self._run_structconvert(mol_path, Path(f"{run_name}.mae")) # Generate the ``.com`` file for the MacroModel MD run. self._generate_com(mol, run_name) # Run the optimization. self._run_bmin(mol, run_name) # Extract the lowest energy conformer into its own .mae file. conformer_mae = MAEExtractor(run_name).path rdkit_opt_mol = mol_from_mae_file(conformer_mae) mol = mol.with_position_matrix( rdkit_opt_mol.GetConformer().GetPositions() ) move_generated_macromodel_files(run_name, output_dir) return mol
def _fix_params(self, mol: stk.Molecule, com: str) -> str: """Fix bond distances and angles in ``.com`` file. For each bond distance, bond angle and torsional angle that does not involve a bond created by :meth:`~.Topology.construct`, a "FX" command is added to the body of the ``.com`` file. These lines replace the filler line in the main string. Parameters: mol: The molecule which is to be optimized. com: The body of the ``.com`` file which is to have fix commands added. Returns: A string holding the body of the ``.com`` file with instructions to fix the various bond distances and angles as described in the docstring. """ fix_block = "" # Add lines that fix the bond distance. fix_block = self._fix_distances(mol, fix_block) # Add lines that fix the bond angles. fix_block = self._fix_bond_angles(mol, fix_block) # Add lines that fix the torsional angles. fix_block = self._fix_torsional_angles(mol, fix_block) return com.replace( "!!!BLOCK_OF_FIXED_PARAMETERS_COMES_HERE!!!\n", fix_block ) def _fix_distances(self, mol: stk.Molecule, fix_block: str) -> str: """Add lines fixing bond distances to ``.com`` body. Parameters: mol: The molecule to be optimized. fix_block: A string holding fix commands in the ``.com`` file. Returns: A string holding fix commands in the ``.com`` file. """ # Go through all the bonds in the rdkit molecule. If the bond # is not between bonder atoms add a fix line to the # ``fix_block``. If the bond does invovle two bonder atoms go # to the next bond. This is because a bond between 2 bonder # atoms was added during construction and should therefore not # be fixed. for bond in mol.get_bonds(): bond_key = frozenset( (bond.get_atom1().get_id(), bond.get_atom2().get_id()) ) atom1_id = bond.get_atom1().get_id() atom2_id = bond.get_atom2().get_id() if bond_key not in self._restricted_bonds: continue # Make sure that the indices are increased by 1 in the .mae # file. atom1_id += 1 atom2_id += 1 args = ("FXDI", atom1_id, atom2_id, 0, 0, 99999, 0, 0, 0) fix_block += self._get_com_line(*args) fix_block += "\n" return fix_block def _fix_bond_angles(self, mol: stk.Molecule, fix_block: str) -> str: """Add lines fixing bond angles to the ``.com`` body. Parameters: mol: The molecule to be optimized. fix_block: A string holding fix commands in the ``.com`` file. Returns: A string holding fix commands in the ``.com`` file. """ paths = rdkit.FindAllPathsOfLengthN( mol=mol.to_rdkit_mol(), length=3, useBonds=False, useHs=True, ) for atom_ids in paths: if frozenset(atom_ids) in self._restricted_bond_angles: file_atom_ids = (i + 1 for i in atom_ids) args = ("FXBA", *file_atom_ids, 99999, 0, 0, 0, 0) fix_block += self._get_com_line(*args) fix_block += "\n" return fix_block def _fix_torsional_angles(self, mol: stk.Molecule, fix_block: str) -> str: """Add lines fixing torsional bond angles to the ``.com`` body. Parameters: mol: The molecule to be optimized. fix_block: A string holding fix commands in the ``.com`` file. Returns: A string holding fix commands in the ``.com`` file. """ paths = rdkit.FindAllPathsOfLengthN( mol=mol.to_rdkit_mol(), length=4, useBonds=False, useHs=True, ) # Apply the fix. for atom_ids in paths: if frozenset(atom_ids) in (self._restricted_torsional_angles): file_atom_ids = [i + 1 for i in atom_ids] args = ("FXTA", *file_atom_ids, 99999, 361, 0, 0) fix_block += self._get_com_line(*args) fix_block += "\n" return fix_block