import gzip
import logging
import re
import subprocess as sp
import time
from pathlib import Path
from uuid import uuid4
import rdkit.Chem.AllChem as rdkit # noqa: N813
import stk
from stko._internal.internal_types import MoleculeT
from stko._internal.optimizers.optimizers import Optimizer
from stko._internal.optimizers.utilities import (
MAEExtractor,
mol_from_mae_file,
move_generated_macromodel_files,
)
from stko._internal.utilities.exceptions import (
ConversionError,
ForceFieldError,
InputError,
LewisStructureError,
OptimizerError,
PathError,
)
logger = logging.getLogger(__name__)
class MacroModel(Optimizer):
"""Base class for MacroModel optimzers.
Parameters:
macromodel_path:
The full path of the Schrodinger suite within the user's
machine. For example, on a Linux machine this may be
something like ``'/opt/schrodinger2017-2'``.
output_dir:
The name of the directory into which files generated during
the optimization are written, if ``None`` then
:func:`uuid.uuid4` is used.
timeout:
The amount in seconds the optimization is allowed to run
before being terminated. ``None`` means there is no
timeout.
force_field:
The number of the force field to be used.
Force field arguments can be the following::
+-------------------------+
| 1 | MM2 |
+-------------------------+
| 2 | MM3 |
+-------------------------+
| 3 | AMBER |
+-------------------------+
| 4 | AMBER94 |
+-------------------------+
| 5 | OPLSA |
+-------------------------+
| 10 | MMFF94 and MMFF94s|
+-------------------------+
| 14 | OPLS_2005 |
+-------------------------+
| 16 | OPLS3e |
+-------------------------+
maximum_iterations:
The maximum number of iterations done during the
optimization. Cannot be more than ``999999``.
minimum_gradient:
The gradient at which optimization is stopped.
Cannot be less than ``0.0001``.
"""
def __init__( # noqa: PLR0913
self,
macromodel_path: Path | str,
output_dir: Path | str | None,
timeout: float | None,
force_field: int,
maximum_iterations: int,
minimum_gradient: float,
) -> None:
self._macromodel_path = Path(macromodel_path)
self._output_dir = None if output_dir is None else Path(output_dir)
self._timeout = timeout
self._force_field = force_field
self._maximum_iterations = maximum_iterations
self._minimum_gradient = minimum_gradient
super().__init__()
def _run_bmin(self, mol: stk.Molecule, run_name: str) -> None:
"""Run an optimization using bmin.
Parameters:
mol:
The molecule being optimized.
run_name:
The name of the run. The files generated by this run
will have this name.
Raises:
:class:`OptimizationError`:
if the optimization failed for some unspecified.
:class:`ForceFieldError`:
if the force field could not be used with the molecule.
:class:`LewisStructureError`:
if Lewis structure of the molecule had issues.
:class:`PathError`:
if an invalid MacroModel path is being used.
"""
logger.info('Running bmin on "%s".', mol)
# To run MacroModel a command is issued to the console via
# ``subprocess.Popen``. The command is the full path of the
# ``bmin`` program. ``bmin`` is located in the Schrodinger
# installation folder.
log_file = Path(f"{run_name}.log")
opt_app = self._macromodel_path / "bmin"
# The first member of the list is the command, the following
# ones are any additional arguments.
opt_cmd = [str(opt_app), run_name, "-WAIT", "-LOCAL"]
incomplete = True
while incomplete:
process = sp.Popen( # noqa: S603
opt_cmd,
stdout=sp.PIPE,
stderr=sp.STDOUT,
universal_newlines=True,
)
try:
output, _ = process.communicate(timeout=self._timeout)
except sp.TimeoutExpired:
logger.warning(
"Minimization took too long and was terminated "
'by force on "%s".',
mol,
)
self._kill_bmin(run_name)
output = ""
logger.debug('Output of bmin on "%s" was: %s.', mol, output)
with log_file.open() as log:
log_content = log.read()
# Check the log for error reports.
error1 = "termination due to error condition 21-"
if error1 in log_content:
msg = "Macromodel: bmin crashed. See log file."
raise OptimizerError(msg)
error2 = "FATAL do_nosort_typing: NO MATCH found for atom "
if error2 in log_content:
msg = "Macromodel: The log implies the force field failed."
raise ForceFieldError(msg)
error3 = (
"FATAL gen_lewis_structure() -> None: "
"could not find best Lewis structure"
)
error4 = (
"skipping input structure "
"due to forcefield interaction errors"
)
if error3 in log_content and error4 in log_content:
msg = "Macromodel: bmin failed due to poor Lewis structure."
raise LewisStructureError(msg)
if "MDYN error encountered" in log_content:
msg = "Macromodel: MD error during optimization."
raise OptimizerError(msg)
# If optimization fails because a wrong Schrodinger path
# was given, raise.
if "The system cannot find the path specified" in output:
msg = "Macromodel: Invalid Schrodinger path given to bmin."
raise PathError(msg)
# If optimization fails because the license is not found,
# rerun the function.
if self._license_found(run_name, output, mol):
incomplete = False
# Make sure the .maegz file created by the optimization is
# present.
maegz = Path(f"{run_name}-out.maegz")
self._wait_for_file(maegz)
if not log_file.exists() or not maegz.exists() or log_content == "":
msg = (
"Macromodel: The .log and/or .maegz files were not created"
" correctly."
)
raise OptimizerError(msg)
def _kill_bmin(self, run_name: str) -> None:
"""Kill bmin.
Parameters:
mol:
The molecule being optimized.
run_name:
The name of the run. The files generated by this run will
have this name.
"""
name = re.split(r"\\|/", run_name)[-1]
app = self._macromodel_path / "jobcontrol"
cmd = [str(app), "-stop", name]
incomplete = True
while incomplete:
out = sp.run( # noqa: S603
cmd,
stdout=sp.PIPE,
stderr=sp.STDOUT,
text=True,
check=False,
)
# Keep re-running the function until license is found.
if self._license_found(run_name, out.stdout):
incomplete = False
# This loop causes the function to wait until the job has been
# killed via job control. This means the output files will have
# been written by the time the function exits. Essentially the
# loop continues until the job is no longer found by
# "./jobcontrol -list"
cmd = [str(app), "-list"]
output = name
start = time.time()
while name in output:
output = sp.run( # noqa: S603
cmd,
stdout=sp.PIPE,
stderr=sp.STDOUT,
text=True,
check=False,
).stdout
if time.time() - start > 600: # noqa: PLR2004
break
def _license_found(
self,
run_name: str,
output: str,
mol: stk.Molecule | None = None,
) -> bool:
"""Check to see if minimization failed due to a missing license.
The user can be notified of this in one of two ways. Sometimes
the output of the submission contains the message informing
that the license was not found and in other cases it will be
the log file. This function checks both of these sources for
this message.
Parameters:
run_name:
The name of the run. The files generated by this run will
have this name.
output:
The output from submitting the minimization of the
structure to bmin.
mol:
The molecule being optimized. If the ``.log`` file is not
to be checked, the default ``None`` should be used.
Returns:
``True`` if the license was found. ``False`` if the
minimization did not occur due to a missing license.
"""
if "Could not check out a license for mmlibs" in output:
return False
if mol is None:
return True
# To check if the log file mentions a missing license file open
# the log file and scan for the appropriate string.
# Check if the file exists first. If not, this is often means
# the calculation must be redone so return False anyway.
with Path(f"{run_name}.log").open() as f:
log_file = f.read()
return "Could not check out a license for mmlibs" not in log_file
@staticmethod
def _get_com_line( # noqa: PLR0913
arg1: str,
arg2: int,
arg3: int,
arg4: int,
arg5: int,
arg6: float,
arg7: float,
arg8: float,
arg9: int,
) -> str:
return (
f" {arg1:<5}{arg2:>7}{arg3:>7}"
f"{arg4:>7}{arg5:>7}{arg6:>11.4f}"
f"{arg7:>11.4f}{arg8:>11.4f}{arg9:>11.4f}"
)
def _run_structconvert(self, input_path: Path, output_path: Path) -> None:
"""Use structconvert to change file type.
Parameters:
input_path:
The path to the input file.
output_path:
The path to the output file.
"""
convrt_app = self._macromodel_path / "utilities" / "structconvert"
convrt_cmd = [str(convrt_app), str(input_path), str(output_path)]
incomplete = True
while incomplete:
# Execute the file conversion.
try:
convrt_return = sp.run( # noqa: S603
convrt_cmd,
stdout=sp.PIPE,
stderr=sp.STDOUT,
text=True,
check=False,
)
# If conversion fails because a wrong Schrodinger path was
# given, raise.
except FileNotFoundError as error:
msg = (
"Macromodel: Wrong Schrodinger path supplied to "
"structconvert."
)
raise PathError(msg) from error
if "File does not exist" in convrt_return.stdout:
msg = (
f"Macromodel: structconvert input file, {input_path}, "
f"missing. Console output was "
f"{convrt_return.stdout}"
)
raise ConversionError(msg)
# Keep re-running the function until license is found.
if self._license_found(
str(input_path.parent / input_path.stem), convrt_return.stdout
):
incomplete = False
# If force field failed, raise.
if "number 1" in convrt_return.stdout:
msg = f"Macromodel: {convrt_return.stdout}"
raise ForceFieldError(msg)
self._wait_for_file(output_path)
if not output_path.exists():
msg = (
f"Macromodel: Conversion output file {output_path} was "
"not found."
f" Console output was {convrt_return.stdout}."
)
raise ConversionError(msg)
def _wait_for_file(self, path: Path, timeout: float = 10) -> None:
"""Wait until a given file exists or `timeout` expires.
Parameters:
path:
The full path of the file which should be waited for.
timeout:
The number of seconds before the function stops waiting and
returns.
"""
t_start = time.time()
tick = 0
while True:
time_taken = time.time() - t_start
if divmod(time_taken, 5)[0] == tick + 1:
logger.warning('Waiting for "%s".', path)
tick += 1
if path.exists() or time_taken > timeout:
break
def _convert_maegz_to_mae(self, run_name: str) -> None:
"""Convert a ``.maegz`` file to a ``.mae`` file.
Parameters:
run_name:
The name of the run. The files generated by this run will
have this name.
"""
with (
gzip.open(f"{run_name}-out.maegz") as gz_file,
Path(f"{run_name}.mae").open("wb") as f,
):
f.write(gz_file.read())
[docs]
class MacroModelForceField(MacroModel):
"""Uses MacroModel force fields to optimize molecules.
Parameters:
macromodel_path:
The full path of the Schrodinger suite within the user's
machine. For example, on a Linux machine this may be
something like ``'/opt/schrodinger2017-2'``.
output_dir:
The name of the directory into which files generated during
the optimization are written, if ``None`` then
:func:`uuid.uuid4` is used.
restricted:
If ``True`` then an optimization is performed only on bonds
created during the `ConstructedMolecule`
creation.
All building block bonds will be fixed.
If ``False`` then all bonds are optimized.
timeout:
The amount in seconds the optimization is allowed to run
before being terminated. ``None`` means there is no
timeout.
force_field:
The number of the force field to be used.
Force field arguments can be the following:
+-------------------------+
| 1 | MM2 |
+-------------------------+
| 2 | MM3 |
+-------------------------+
| 3 | AMBER |
+-------------------------+
| 4 | AMBER94 |
+-------------------------+
| 5 | OPLSA |
+-------------------------+
| 10 | MMFF94 and MMFF94s|
+-------------------------+
| 14 | OPLS_2005 |
+-------------------------+
| 16 | OPLS3e |
+-------------------------+
maximum_iterations:
The maximum number of iterations done during the
optimization. Cannot be more than ``999999``.
minimum_gradient:
The gradient at which optimization is stopped.
Cannot be less than ``0.0001``.
Examples:
Optimisation of any :class:`stk.Molecule` is possible with
`restricted=False`.
.. code-block:: python
import stk
import stko
mol = stk.BuildingBlock('NCCCN')
optimizer = stko.MacroModelForceField(
macromodel_path='/path/to/macromodel/',
)
mol = optimizer.optimize(mol)
Optimisation of `long bonds` only within
:class:`stk.ConstructedMolecule` is possible with
`restricted=True`.
Generally, this means only bonds created during the construction
process will be optimized,
and those belonging to building blocks will be fixed.
If the molecule is not a `ConstructedMolecule`, no positions will
be optimized.
.. code-block:: python
import stk
import stko
bb1 = stk.BuildingBlock('NCCNCCN', [stk.PrimaryAminoFactory()])
bb2 = stk.BuildingBlock('O=CCCC=O', [stk.AldehydeFactory()])
polymer = stk.ConstructedMolecule(
stk.polymer.Linear(
building_blocks=(bb1, bb2),
repeating_unit="AB",
orientations=[0, 0],
num_repeating_units=1
)
)
optimizer = stko.MacroModelForceField(
macromodel_path='/path/to/macromodel/',
restricted=True,
)
polymer = optimizer.optimize(polymer)
"""
def __init__( # noqa: PLR0913
self,
macromodel_path: Path | str,
output_dir: str | None = None,
restricted: bool = False,
timeout: float | None = None,
force_field: int = 16,
maximum_iterations: int = 2500,
minimum_gradient: float = 0.05,
) -> None:
self._check_params(
minimum_gradient=minimum_gradient,
maximum_iterations=maximum_iterations,
)
self._restricted = restricted
super().__init__(
macromodel_path=macromodel_path,
output_dir=output_dir,
force_field=force_field,
maximum_iterations=maximum_iterations,
minimum_gradient=minimum_gradient,
timeout=timeout,
)
@staticmethod
def _check_params(
minimum_gradient: float, maximum_iterations: int
) -> None:
"""Check if the optimization parameters are valid for MacroModel.
Parameters:
minimum_gradient:
The gradient at which optimization is stopped.
Cannot be less than ``0.0001``.
maximum_iterations:
The maximum number of iterations done during the
optimization. Cannot be more than ``999999``.
Raises:
:class:`.InputError`:
If the parameters cannot be converted into a valid ``.com``
file entry.
"""
if minimum_gradient < 0.0001: # noqa: PLR2004
msg = "Macromodel: Convergence gradient (< 0.0001) is too small."
raise InputError(msg)
if maximum_iterations > 999999: # noqa: PLR2004
msg = "Macromodel: Number of iterations (> 999999) is too high."
raise InputError(msg)
def _generate_com(self, mol: stk.Molecule, run_name: str) -> None:
"""Create a ``.com`` file for a MacroModel optimization.
The created ``.com`` file fixes all bond parameters which were
not added by :meth:`~.Topology.construct`. This means all bond
distances, bond angles and torsional angles are fixed, except
for cases where it involves a bond added by
:meth:`.Topology.construct`.
This fixing is implemented by creating a ``.com`` file with
various "FX" commands written within its body.
Parameters:
mol:
The molecule which is to be optimized.
run_name:
The name of the run. The files generated by this run will
have this name.
"""
logger.debug('Creating .com file for "%s".', mol)
# This is the body of the ``.com`` file. The line that begins
# and ends with exclamation lines is replaced with the various
# commands that fix bond distances and angles.
line1 = ("FFLD", self._force_field, 1, 0, 0, 1, 0, 0, 0)
line2 = ("BGIN", 0, 0, 0, 0, 0, 0, 0, 0)
line3 = ("READ", 0, 0, 0, 0, 0, 0, 0, 0)
line4 = ("CONV", 2, 0, 0, 0, self._minimum_gradient, 0, 0, 0)
line5 = ("MINI", 1, 0, self._maximum_iterations, 0, 0, 0, 0, 0)
line6 = ("END", 0, 1, 0, 0, 0, 0, 0, 0)
com_block = "\n".join(
[
self._get_com_line(*line1),
self._get_com_line(*line2),
self._get_com_line(*line3),
"!!!BLOCK_OF_FIXED_PARAMETERS_COMES_HERE!!!",
self._get_com_line(*line4),
self._get_com_line(*line5),
self._get_com_line(*line6),
]
)
# If `restricted` is ``False`` do not add a fix block.
if not self._restricted:
com_block = com_block.replace(
"!!!BLOCK_OF_FIXED_PARAMETERS_COMES_HERE!!!\n", ""
)
else:
# This function adds all the lines which fix bond distances
# and angles into com_block.
com_block = self._fix_params(mol, com_block)
# Writes the .com file.
with Path(f"{run_name}.com").open("w") as com:
# The first line holds the .mae file containing the
# molecule to be optimized.
com.write(f"{run_name}.mae\n")
# The second line holds the name of the output file of the
# optimization.
com.write(f"{run_name}-out.maegz\n")
# Next is the body of the .com file.
com.write(com_block)
[docs]
def optimize(self, mol: MoleculeT) -> MoleculeT:
run_name = str(uuid4().int)
output_dir = (
Path(run_name) if self._output_dir is None else self._output_dir
)
mol_path = Path(f"{run_name}.mol")
mae_path = Path(f"{run_name}.mae")
# First write a .mol file of the molecule.
mol.write(mol_path)
# MacroModel requires a ``.mae`` file as input.
self._run_structconvert(mol_path, mae_path)
# generate the ``.com`` file for the MacroModel run.
self._generate_com(mol, run_name)
# Run the optimization.
self._run_bmin(mol, run_name)
# Get the ``.maegz`` optimization output to a ``.mae``.
self._convert_maegz_to_mae(run_name)
rdkit_opt_mol = mol_from_mae_file(mae_path)
mol = mol.with_position_matrix(
rdkit_opt_mol.GetConformer().GetPositions()
)
move_generated_macromodel_files(run_name, output_dir)
return mol
def _fix_params(self, mol: stk.Molecule, com: str) -> str:
"""Fix bond distances and angles in ``.com`` file.
For each bond distance, bond angle and torsional angle that
does not involve a bond created by
:meth:`~.Topology.construct`, a "FX" command is added to the
body of the ``.com`` file.
These lines replace the filler line in the main string.
Parameters:
mol:
The molecule which is to be optimized.
com:
The body of the ``.com`` file which is to have fix commands
added.
Returns:
A string holding the body of the ``.com`` file with
instructions to fix the various bond distances and angles
as described in the docstring.
"""
fix_block = ""
# Add lines that fix the bond distance.
fix_block = self._fix_distances(mol, fix_block)
# Add lines that fix the bond angles.
fix_block = self._fix_bond_angles(mol, fix_block)
# Add lines that fix the torsional angles.
fix_block = self._fix_torsional_angles(mol, fix_block)
return com.replace(
"!!!BLOCK_OF_FIXED_PARAMETERS_COMES_HERE!!!\n", fix_block
)
def _fix_distances(self, mol: stk.Molecule, fix_block: str) -> str:
"""Add lines fixing bond distances to ``.com`` body.
Only bond distances which do not involve bonds created during
construction are fixed.
Parameters:
mol:
The molecule to be optimized.
fix_block:
A string holding fix commands in the ``.com`` file.
Returns:
A string holding fix commands in the ``.com`` file.
Raises:
:class:`.InputError`:
if molecule is not :class:`stk.ConstructedMolecule`.
"""
if not isinstance(mol, stk.ConstructedMolecule):
msg = (
f"{mol} needs to be a ConstructedMolecule for "
f"`restricted`==`True` (it is {self._restricted})"
)
raise InputError(msg)
# Identify bonds created by ``stk`` by checking if the
# ``stk.BuildingBlock`` associated with the bond is ``None``.
bonder_ids = {
atom_id
for bond_info in mol.get_bond_infos() # type: ignore[attr-defined]
if bond_info.get_building_block() is None
for atom_id in (
bond_info.get_bond().get_atom1().get_id(),
bond_info.get_bond().get_atom2().get_id(),
)
}
# Go through all the bonds in the ``stk.Molecule`` . If an
# ``stk.BuildingBlock`` associated with the bond is ``None``,
# add a fix line to the ``fix_block``.
# If the bond is connected to an atom present in `bonder_ids`,
# go to the next bond. This is because a bond between atoms,
# whose IDs are present in `bonder_ids`,
# was added during construction and should therefore not
# be fixed.
for bond in mol.get_bonds():
if (
bond.get_atom1().get_id() in bonder_ids
and bond.get_atom2().get_id() in bonder_ids
):
continue
atom1_id = bond.get_atom1().get_id()
atom2_id = bond.get_atom2().get_id()
# Make sure that the indices are increased by 1 in the .mae
# file.
atom1_id += 1
atom2_id += 1
args = ("FXDI", atom1_id, atom2_id, 0, 0, 99999, 0, 0, 0)
fix_block += self._get_com_line(*args)
fix_block += "\n"
return fix_block
def _fix_bond_angles(self, mol: stk.Molecule, fix_block: str) -> str:
"""Add lines fixing bond angles to the ``.com`` body.
All bond angles of the molecule are fixed.
Parameters:
mol:
The molecule to be optimized.
fix_block:
A string holding fix commands in the ``.com`` file.
Returns:
A string holding fix commands in the ``.com`` file.
"""
paths = rdkit.FindAllPathsOfLengthN(
mol=mol.to_rdkit_mol(),
length=3,
useBonds=False,
useHs=True,
)
for atom_ids in paths:
file_atom_ids = (i + 1 for i in atom_ids)
args = ("FXBA", *file_atom_ids, 99999, 0, 0, 0, 0)
fix_block += self._get_com_line(*args)
fix_block += "\n"
return fix_block
def _fix_torsional_angles(self, mol: stk.Molecule, fix_block: str) -> str:
"""Add lines fixing torsional bond angles to the ``.com`` body.
All torsional angles of the molecule are fixed.
Parameters:
mol:
The molecule to be optimized.
fix_block:
A string holding fix commands in the ``.com`` file.
Returns:
A string holding fix commands in the ``.com`` file.
"""
paths = rdkit.FindAllPathsOfLengthN(
mol=mol.to_rdkit_mol(),
length=4,
useBonds=False,
useHs=True,
)
for atom_ids in paths:
file_atom_ids = (i + 1 for i in atom_ids)
args = ("FXTA", *file_atom_ids, 99999, 361, 0, 0)
fix_block += self._get_com_line(*args)
fix_block += "\n"
return fix_block
[docs]
class MacroModelMD(MacroModel):
"""Runs a molecular dynamics conformer search using MacroModel.
Parameters:
macromodel_path:
The full path of the Schrodinger suite within the user's
machine. For example, on a Linux machine this may be
something like ``'/opt/schrodinger2017-2'``.
output_dir:
The name of the directory into which files generated during
the optimization are written, if ``None`` then
:func:`uuid.uuid4` is used.
timeout:
The amount in seconds the MD is allowed to run before
being terminated. ``None`` means there is no timeout.
force_field:
The number of the force field to be used.
temperature:
The temperature in Kelvin at which the MD is run.
Cannot be more than ``99999.99``.
conformers:
The number of conformers sampled and optimized from the MD.
Cannot be more than ``9999``.
simulation_time:
The simulation time in ``ps`` of the MD.
Cannot be more than ``999999.99``.
time_step:
The time step in ``fs`` for the MD.
Cannot be more than ``99999.99``.
eq_time:
The equilibration time in ``ps`` before the MD is run.
Cannot be more than ``999999.99``.
maximum_iterations:
The maximum number of iterations done during the
optimization. Cannot be more than ``999999``.
minimum_gradient:
The gradient at which optimization is stopped.
Cannot be less than ``0.0001``.
restricted_bonds:
A :class:`set` of the form
.. code-block:: python
restricted_bonds = {
frozenset((0, 10)),
frozenset((3, 14)),
frozenset((5, 6))
}
Where each :class:`frozenset` defines which bonds should
have a fixed length via the atom ids of atoms in the bond.
restricted_bond_angles:
A :class:`set` of the form
.. code-block:: python
restricted_bonds = {
frozenset((0, 10, 12)),
frozenset((3, 14, 7)),
frozenset((5, 8, 2))
}
Where each :class:`frozenset` defines which bond angles
should have a fixed size via the atom ids of atoms in the
bond angle.
restricted_torsional_angles:
A :class:`set` of the form
.. code-block:: python
restricted_bonds = {
frozenset((0, 10, 12, 3)),
frozenset((3, 14, 7, 4)),
frozenset((5, 8, 2, 9))
}
Where each :class:`frozenset` defines which torsional
angles should have a fixed size via the atom ids of atoms
in the torsional angle.
Examples:
Molecular dynamics can be run on any :class:`stk.Molecule` using
this class. Restrictions can be applied, but are not by default.
This class collects a series of conformers from the trajectory,
optimises them, then returns the lowest energy conformer.
.. code-block:: python
import stk
import stko
mol = stk.BuildingBlock('NCCCN')
optimizer = stko.MacroModelMD(
macromodel_path='/path/to/macromodel/',
conformers=40,
)
mol = optimizer.optimize(mol)
"""
def __init__( # noqa: PLR0913
self,
macromodel_path: str,
output_dir: str | None = None,
timeout: float | None = None,
force_field: int = 16,
temperature: float = 750,
conformers: int = 50,
time_step: float = 1,
eq_time: float = 10,
simulation_time: float = 200,
maximum_iterations: int = 2500,
minimum_gradient: float = 0.05,
restricted_bonds: set | None = None,
restricted_bond_angles: set | None = None,
restricted_torsional_angles: set | None = None,
) -> None:
if restricted_bonds is None:
restricted_bonds = set()
if restricted_bond_angles is None:
restricted_bond_angles = set()
if restricted_torsional_angles is None:
restricted_torsional_angles = set()
self._check_params(
temperature=temperature,
conformers=conformers,
simulation_time=simulation_time,
time_step=time_step,
eq_time=eq_time,
minimum_gradient=minimum_gradient,
maximum_iterations=maximum_iterations,
)
self._temperature = temperature
self._conformers = conformers
self._time_step = time_step
self._eq_time = eq_time
self._simulation_time = simulation_time
self._restricted_bonds = restricted_bonds
self._restricted_bond_angles = restricted_bond_angles
self._restricted_torsional_angles = restricted_torsional_angles
# Negative simulation time is interpreted as times 100 ps.
if simulation_time > 99999.99: # noqa: PLR2004
self._sim_time = -simulation_time / 100
else:
self._sim_time = simulation_time
# Negative equilibration time is interpreted as times 100 ps.
if eq_time > 99999.99: # noqa: PLR2004
self._eq_time = -eq_time / 100
else:
self._eq_time = eq_time
super().__init__(
macromodel_path=macromodel_path,
output_dir=output_dir,
timeout=timeout,
force_field=force_field,
maximum_iterations=maximum_iterations,
minimum_gradient=minimum_gradient,
)
@staticmethod
def _check_params( # noqa: PLR0913
temperature: float,
conformers: int,
simulation_time: float,
time_step: float,
eq_time: float,
minimum_gradient: float,
maximum_iterations: int,
) -> None:
"""Check if the optimization parameters are valid for MacroModel.
Parameters:
temperature:
The temperature in Kelvin at which the MD is run.
Cannot be more than ``99999.99``.
conformers':
The number of conformers sampled and optimized from the MD.
Cannot be more than ``9999``.
simulation_time:
The simulation time in ``ps`` of the MD.
Cannot be more than ``999999.99``.
time_step:
The time step in ``fs`` for the MD.
Cannot be more than ``99999.99``.
eq_time:
The equilibriation time in ``ps`` before the MD is run.
Cannot be more than ``999999.99``.
minimum_gradient:
The gradient at which optimization is stopped.
Cannot be less than ``0.0001``.
maximum_iterations:
The maximum number of iterations done during the
optimization. Cannot be more than ``999999``.
Raises:
:class:`.MacroModelInputError`:
If the parameters cannot be converted into a valid ``.com``
file entry.
"""
if temperature > 99999.99: # noqa: PLR2004
msg = "Macromodel: Supplied temperature (> 99999 K) is too high."
raise InputError(msg)
if conformers > 9999: # noqa: PLR2004
msg = (
"Macromodel: Supplied number of conformers (> 9999) is "
"too high."
)
raise InputError(msg)
if simulation_time > 999999.99: # noqa: PLR2004
msg = (
"Macromodel: Supplied simulation time (> 999999 ps) is "
"too long."
)
raise InputError(msg)
if time_step > 99999.99: # noqa: PLR2004
msg = "Macromodel: Supplied time step (> 99999 fs) is too high."
raise InputError(msg)
if eq_time > 999999.99: # noqa: PLR2004
msg = "Macromodel: Supplied eq time (> 999999 ps) is too long."
raise InputError(msg)
if minimum_gradient < 0.0001: # noqa: PLR2004
msg = "Macromodel: Convergence gradient (< 0.0001) is too small."
raise InputError(msg)
if maximum_iterations > 999999: # noqa: PLR2004
msg = "Macromodel: Number of iterations (> 999999) is too high."
raise InputError(msg)
def _generate_com(self, mol: stk.Molecule, run_name: str) -> None:
"""Create a ``.com`` file for a MacroModel optimization.
The created ``.com`` file fixes all bond parameters which were
not added by :meth:`~.Topology.construct`. This means all bond
distances, bond angles and torsional angles are fixed, except
for cases where it involves a bond added by
:meth:`~.Topology.construct`.
This fixing is implemented by creating a ``.com`` file with
various "FX" commands written within its body.
Parameters:
mol:
The molecule which is to be optimized.
"""
logger.debug('Creating .com file for "%s".', mol)
# Define some short aliases to keep the following lines neat.
temp = self._temperature
sim_time = self._sim_time
tstep = self._time_step
eq_time = self._eq_time
line1 = ("FFLD", self._force_field, 1, 0, 0, 1, 0, 0, 0)
line2 = ("READ", 0, 0, 0, 0, 0, 0, 0, 0)
line3 = ("MDIT", 0, 0, 0, 0, temp, 0, 0, 0)
line4 = ("MDYN", 0, 0, 0, 0, tstep, eq_time, temp, 0)
line5 = ("MDSA", self._conformers, 0, 0, 0, 0, 0, 1, 0)
line6 = ("MDYN", 1, 0, 0, 0, tstep, sim_time, temp, 0)
line7 = ("WRIT", 0, 0, 0, 0, 0, 0, 0, 0)
line8 = ("RWND", 0, 1, 0, 0, 0, 0, 0, 0)
line9 = ("BGIN", 0, 0, 0, 0, 0, 0, 0, 0)
line10 = ("READ", -2, 0, 0, 0, 0, 0, 0, 0)
line11 = ("CONV", 2, 0, 0, 0, self._minimum_gradient, 0, 0, 0)
line12 = (
"MINI",
1,
0,
self._maximum_iterations,
0,
0,
0,
0,
0,
)
line13 = ("END", 0, 1, 0, 0, 0, 0, 0, 0)
com_block = "\n".join(
[
self._get_com_line(*line1),
self._get_com_line(*line2),
"!!!BLOCK_OF_FIXED_PARAMETERS_COMES_HERE!!!",
self._get_com_line(*line3),
self._get_com_line(*line4),
self._get_com_line(*line5),
self._get_com_line(*line6),
self._get_com_line(*line7),
self._get_com_line(*line8),
self._get_com_line(*line9),
self._get_com_line(*line10),
"!!!BLOCK_OF_FIXED_PARAMETERS_COMES_HERE!!!",
self._get_com_line(*line11),
self._get_com_line(*line12),
self._get_com_line(*line13),
]
)
com_block = self._fix_params(mol, com_block)
# Generate the com file containing the info for the run
with Path(f"{run_name}.com").open("w") as com:
# name of the macromodel file
com.write(f"{run_name}.mae\n")
# name of the output file
com.write(f"{run_name}-out.maegz\n")
# details of the macromodel run
com.write(com_block)
[docs]
def optimize(self, mol: MoleculeT) -> MoleculeT:
run_name = str(uuid4().int)
output_dir = (
Path(run_name) if self._output_dir is None else self._output_dir
)
mol_path = Path(f"{run_name}.mol")
# First write a .mol file of the molecule.
mol.write(mol_path)
# MacroModel requires a ``.mae`` file as input.
self._run_structconvert(mol_path, Path(f"{run_name}.mae"))
# Generate the ``.com`` file for the MacroModel MD run.
self._generate_com(mol, run_name)
# Run the optimization.
self._run_bmin(mol, run_name)
# Extract the lowest energy conformer into its own .mae file.
conformer_mae = MAEExtractor(run_name).path
rdkit_opt_mol = mol_from_mae_file(conformer_mae)
mol = mol.with_position_matrix(
rdkit_opt_mol.GetConformer().GetPositions()
)
move_generated_macromodel_files(run_name, output_dir)
return mol
def _fix_params(self, mol: stk.Molecule, com: str) -> str:
"""Fix bond distances and angles in ``.com`` file.
For each bond distance, bond angle and torsional angle that
does not involve a bond created by
:meth:`~.Topology.construct`, a "FX" command is added to the
body of the ``.com`` file.
These lines replace the filler line in the main string.
Parameters:
mol:
The molecule which is to be optimized.
com:
The body of the ``.com`` file which is to have fix commands
added.
Returns:
A string holding the body of the ``.com`` file with
instructions to fix the various bond distances and angles
as described in the docstring.
"""
fix_block = ""
# Add lines that fix the bond distance.
fix_block = self._fix_distances(mol, fix_block)
# Add lines that fix the bond angles.
fix_block = self._fix_bond_angles(mol, fix_block)
# Add lines that fix the torsional angles.
fix_block = self._fix_torsional_angles(mol, fix_block)
return com.replace(
"!!!BLOCK_OF_FIXED_PARAMETERS_COMES_HERE!!!\n", fix_block
)
def _fix_distances(self, mol: stk.Molecule, fix_block: str) -> str:
"""Add lines fixing bond distances to ``.com`` body.
Parameters:
mol:
The molecule to be optimized.
fix_block:
A string holding fix commands in the ``.com`` file.
Returns:
A string holding fix commands in the ``.com`` file.
"""
# Go through all the bonds in the rdkit molecule. If the bond
# is not between bonder atoms add a fix line to the
# ``fix_block``. If the bond does invovle two bonder atoms go
# to the next bond. This is because a bond between 2 bonder
# atoms was added during construction and should therefore not
# be fixed.
for bond in mol.get_bonds():
bond_key = frozenset(
(bond.get_atom1().get_id(), bond.get_atom2().get_id())
)
atom1_id = bond.get_atom1().get_id()
atom2_id = bond.get_atom2().get_id()
if bond_key not in self._restricted_bonds:
continue
# Make sure that the indices are increased by 1 in the .mae
# file.
atom1_id += 1
atom2_id += 1
args = ("FXDI", atom1_id, atom2_id, 0, 0, 99999, 0, 0, 0)
fix_block += self._get_com_line(*args)
fix_block += "\n"
return fix_block
def _fix_bond_angles(self, mol: stk.Molecule, fix_block: str) -> str:
"""Add lines fixing bond angles to the ``.com`` body.
Parameters:
mol:
The molecule to be optimized.
fix_block:
A string holding fix commands in the ``.com`` file.
Returns:
A string holding fix commands in the ``.com`` file.
"""
paths = rdkit.FindAllPathsOfLengthN(
mol=mol.to_rdkit_mol(),
length=3,
useBonds=False,
useHs=True,
)
for atom_ids in paths:
if frozenset(atom_ids) in self._restricted_bond_angles:
file_atom_ids = (i + 1 for i in atom_ids)
args = ("FXBA", *file_atom_ids, 99999, 0, 0, 0, 0)
fix_block += self._get_com_line(*args)
fix_block += "\n"
return fix_block
def _fix_torsional_angles(self, mol: stk.Molecule, fix_block: str) -> str:
"""Add lines fixing torsional bond angles to the ``.com`` body.
Parameters:
mol:
The molecule to be optimized.
fix_block:
A string holding fix commands in the ``.com`` file.
Returns:
A string holding fix commands in the ``.com`` file.
"""
paths = rdkit.FindAllPathsOfLengthN(
mol=mol.to_rdkit_mol(),
length=4,
useBonds=False,
useHs=True,
)
# Apply the fix.
for atom_ids in paths:
if frozenset(atom_ids) in (self._restricted_torsional_angles):
file_atom_ids = [i + 1 for i in atom_ids]
args = ("FXTA", *file_atom_ids, 99999, 361, 0, 0)
fix_block += self._get_com_line(*args)
fix_block += "\n"
return fix_block