Source code for stko._internal.optimizers.rdkit
import logging
from itertools import combinations
import numpy as np
import rdkit.Chem.AllChem as rdkit # noqa: N813
import stk
from stko._internal.internal_types import MoleculeT
from stko._internal.optimizers.optimizers import Optimizer
from stko._internal.optimizers.utilities import (
get_metal_atoms,
get_metal_bonds,
to_rdkit_mol_without_metals,
)
from stko._internal.utilities.utilities import vector_angle
logger = logging.getLogger(__name__)
[docs]
class MMFF(Optimizer):
"""Use the MMFF force field in :mod:`rdkit` to optimize molecules.
See Also:
* rdkit: https://www.rdkit.org/
.. warning::
this optimizer seems to be machine dependant, producing
different energies after optimisation on Ubunut 18 vs. Ubuntu 20.
Examples:
.. testcode:: rdkit-mmff
import stk
import stko
mol = stk.BuildingBlock('NCCNCCN')
mmff = stko.MMFF()
opt_mol = mmff.optimize(mol)
.. testcode:: rdkit-mmff
:hide:
assert (
stk.Smiles().get_key(mol) == stk.Smiles().get_key(opt_mol)
)
opt_e = stko.MMFFEnergy().get_energy(opt_mol)
assert opt_e < stko.MMFFEnergy().get_energy(mol)
"""
def __init__(self, ignore_inter_interactions: bool = True) -> None:
self._ignore_inter_interactions = ignore_inter_interactions
[docs]
def optimize(self, mol: MoleculeT) -> MoleculeT:
rdkit_mol = mol.to_rdkit_mol()
# Needs to be sanitized to get force field params.
rdkit.SanitizeMol(rdkit_mol)
rdkit.MMFFOptimizeMolecule(
rdkit_mol,
ignoreInterfragInteractions=self._ignore_inter_interactions,
)
return mol.with_position_matrix(
position_matrix=rdkit_mol.GetConformer().GetPositions()
)
[docs]
class UFF(Optimizer):
"""Use the UFF force field in :mod:`rdkit` to optimize molecules.
.. warning::
this optimizer seems to be machine dependant, producing
different energies after optimisation on Ubunut 18 vs. Ubuntu 20.
See Also:
* rdkit: https://www.rdkit.org/
Examples:
.. testcode:: rdkit-uff
import stk
import stko
mol = stk.BuildingBlock('NCCNCCN')
uff = stko.UFF()
opt_mol = uff.optimize(mol)
.. testcode:: rdkit-uff
:hide:
assert (
stk.Smiles().get_key(mol) == stk.Smiles().get_key(opt_mol)
)
opt_e = stko.UFFEnergy().get_energy(opt_mol)
assert opt_e < stko.UFFEnergy().get_energy(mol)
"""
def __init__(self, ignore_inter_interactions: bool = True) -> None:
self._ignore_inter_interactions = ignore_inter_interactions
[docs]
def optimize(self, mol: MoleculeT) -> MoleculeT:
rdkit_mol = mol.to_rdkit_mol()
# Needs to be sanitized to get force field params.
rdkit.SanitizeMol(rdkit_mol)
rdkit.UFFOptimizeMolecule(
rdkit_mol,
ignoreInterfragInteractions=self._ignore_inter_interactions,
)
return mol.with_position_matrix(
position_matrix=rdkit_mol.GetConformer().GetPositions()
)
[docs]
class ETKDG(Optimizer):
"""Uses ETKDG v2 algorithm in :mod:`rdkit` to optimize a structure.
See Also:
* ETKDG: http://pubs.acs.org/doi/pdf/10.1021/acs.jcim.5b00654
* rdkit: https://www.rdkit.org/
Parameters:
random_seed:
The random seed to use.
Examples:
.. testcode:: rdkit-etkdg
import stk
import stko
mol = stk.BuildingBlock('NCCNCCN')
etkdg = stko.ETKDG()
opt_mol = etkdg.optimize(mol)
.. testcode:: rdkit-etkdg
:hide:
assert (
stk.Smiles().get_key(mol) == stk.Smiles().get_key(opt_mol)
)
"""
def __init__(self, random_seed: int = 12) -> None:
self._random_seed = random_seed
[docs]
def optimize(self, mol: MoleculeT) -> MoleculeT:
params = rdkit.ETKDGv2()
params.clearConfs = True
try:
params.random_seed = self._random_seed
except AttributeError:
# Handle change in naming.
params.randomSeed = self._random_seed
rdkit_mol = mol.to_rdkit_mol()
rdkit.EmbedMolecule(rdkit_mol, params)
return mol.with_position_matrix(
position_matrix=rdkit_mol.GetConformer().GetPositions()
)
[docs]
class MetalOptimizer(Optimizer):
"""Applies forcefield optimizers in :mod:`rdkit` that can handle metals.
Parameters:
metal_binder_distance:
Distance in Angstrom.
metal_binder_forceconstant:
Force constant to use for restricted metal-ligand bonds.
max_iterations:
Number of iteractions to run.
See Also:
* rdkit: https://www.rdkit.org/
Notes:
By default, :meth:`optimize` will run a restricted optimization
using constraints and the UFF. To implement this, metal atoms are
replaced by noninteracting H atoms, and constraints are applied
to maintain the metal centre geometry.
Restrictions are applied to the ligand with respect to its input
structure. So if that is poorly optimised, then the output will be
also.
Warning: this optimizer seems to be machine dependant, producing
different energies after optimisation on Ubunut 18 vs. Ubuntu 20.
Examples:
:class:`MetalOptimizer` allows for the restricted optimization of
:class:`ConstructedMolecule` instances containing metals. Note that
this optimizer algorithm is not very robust to large bonds and may
fail.
.. testcode:: rdkit-metal-opt
import stk
import stko
# Produce a Pd+2 atom with 4 functional groups.
palladium_atom = stk.BuildingBlock(
smiles='[Pd+2]',
functional_groups=(
stk.SingleAtom(stk.Pd(0, charge=2))
for i in range(4)
),
position_matrix=[[0., 0., 0.]],
)
# Build a building block with two functional groups using
# the SmartsFunctionalGroupFactory.
bb1 = stk.BuildingBlock(
smiles=(
'C1=NC=CC(C2=CC=CC(C3=C'
'C=NC=C3)=C2)=C1'
),
functional_groups=[
stk.SmartsFunctionalGroupFactory(
smarts='[#6]~[#7X2]~[#6]',
bonders=(1, ),
deleters=(),
),
],
)
cage1 = stk.ConstructedMolecule(
stk.cage.M3L6(
building_blocks=(palladium_atom, bb1),
# Ensure that bonds between the GenericFunctionalGroups
# of the ligand and the SingleAtom functional groups
# of the metal are dative.
reaction_factory=stk.DativeReactionFactory(
stk.GenericReactionFactory(
bond_orders={
frozenset({
stk.GenericFunctionalGroup,
stk.SingleAtom
}): 9
}
)
),
optimizer=stk.MCHammer(),
)
)
# Define an optimizer.
optimizer = stko.MetalOptimizer()
# Optimize.
opt_cage1 = optimizer.optimize(mol=cage1)
.. testcode:: rdkit-metal-opt
:hide:
assert (
stk.Smiles().get_key(cage1) == stk.Smiles().get_key(opt_cage1)
)
"""
def __init__(
self,
metal_binder_distance: float = 1.6,
metal_binder_forceconstant: float = 1.0e2,
max_iterations: int = 500,
) -> None:
self._metal_binder_distance = metal_binder_distance
self._metal_binder_forceconstant = metal_binder_forceconstant
self._max_iterations = max_iterations
def _apply_metal_centre_constraints(
self,
mol: stk.Molecule,
ff: rdkit.ForceField,
metal_bonds: list[stk.Bond],
) -> None:
"""Applies UFF metal centre constraints.
Parameters:
mol:
The molecule to be optimized.
ff:
Forcefield to apply constraints to. Generally use UFF.
metal_bonds:
List of bonds including metal atoms.
"""
# Add constraints to UFF to hold metal geometry in place.
for bond in metal_bonds:
idx1 = bond.get_atom1().get_id()
idx2 = bond.get_atom2().get_id()
# Add distance constraints in place of metal bonds.
# Target distance set to a given metal_binder_distance.
ff.UFFAddDistanceConstraint(
idx1=idx1,
idx2=idx2,
relative=False,
minLen=self._metal_binder_distance,
maxLen=self._metal_binder_distance,
forceConstant=self._metal_binder_forceconstant,
)
# Also implement angular constraints to all atoms in the
# metal complex.
for bonds in combinations(metal_bonds, r=2):
bond1, bond2 = bonds
bond1_atoms = [bond1.get_atom1(), bond1.get_atom2()]
bond2_atoms = [bond2.get_atom1(), bond2.get_atom2()]
pres_atoms = list(set(bond1_atoms + bond2_atoms))
# If there are more than 3 atoms, implies two
# independant bonds.
if len(pres_atoms) > 3: # noqa: PLR2004
continue
for atom in pres_atoms:
if atom in bond1_atoms and atom in bond2_atoms:
idx2 = atom.get_id()
elif atom in bond1_atoms:
idx1 = atom.get_id()
elif atom in bond2_atoms:
idx3 = atom.get_id()
pos1 = next(mol.get_atomic_positions(atom_ids=[idx1]))
pos2 = next(mol.get_atomic_positions(atom_ids=[idx2]))
pos3 = next(mol.get_atomic_positions(atom_ids=[idx3]))
v1 = pos1 - pos2
v2 = pos3 - pos2
angle = vector_angle(v1, v2)
ff.UFFAddAngleConstraint(
idx1=idx1,
idx2=idx2,
idx3=idx3,
relative=False,
minAngleDeg=np.degrees(angle),
maxAngleDeg=np.degrees(angle),
forceConstant=1.0e5,
)
[docs]
def optimize(self, mol: MoleculeT) -> MoleculeT:
# Find all metal atoms and atoms they are bonded to.
metal_atoms = get_metal_atoms(mol)
metal_bonds, _ids_to_metals = get_metal_bonds(
mol=mol, metal_atoms=metal_atoms
)
# Perform a forcefield optimisation that
# only optimises non metal atoms that are not bonded to the
# metal.
# Write rdkit molecule with metal atoms and bonds deleted.
edit_mol = to_rdkit_mol_without_metals(
mol=mol, metal_atoms=metal_atoms, metal_bonds=metal_bonds
)
# Non-bonded interactions need to be explicitly turned on (if
# desired) because at the point of initialisation, the
# molecules are technically separate (not bonded) and are
# treated as fragments.
rdkit.SanitizeMol(edit_mol)
ff = rdkit.UFFGetMoleculeForceField(
edit_mol,
ignoreInterfragInteractions=False,
)
# Constrain the metal centre.
self._apply_metal_centre_constraints(
mol=mol,
ff=ff,
metal_bonds=metal_bonds,
)
# Optimisation with UFF in RDKit. This method uses constraints
# on the metal centre to attempt to enforce the metal geometry
# described by the metal topology.
ff.Minimize(maxIts=self._max_iterations)
# Update stk molecule from optimized molecule. This should
# only modify atom positions, which means metal atoms will be
# reinstated.
new_position_matrix = edit_mol.GetConformer().GetPositions()
return mol.with_position_matrix(new_position_matrix)