import logging
import math
import os
import re
import shutil
import subprocess as sp
import uuid
import warnings
from pathlib import Path
import stk
from rdkit.Chem import AllChem as rdkit # noqa: N813
from stko._internal.internal_types import MoleculeT
from stko._internal.molecular.periodic.unitcell import UnitCell
from stko._internal.optimizers.optimizers import Optimizer
from stko._internal.optimizers.utilities import (
get_metal_atoms,
get_metal_bonds,
has_h_atom,
has_metal_atom,
to_rdkit_mol_without_metals,
)
from stko._internal.utilities.exceptions import (
ExpectedMetalError,
ForceFieldSetupError,
OptimizerError,
PathError,
)
logger = logging.getLogger(__name__)
[docs]
class GulpUFFOptimizer(Optimizer):
"""Applies forcefield optimizers that can handle metal centres.
Parameters:
gulp_path:
Path to GULP executable.
maxcyc:
Set the maximum number of optimisation steps to use.
Default in Gulp is 1000.
metal_FF:
Dictionary with metal atom forcefield assignments.
Key: :class:`int` : atomic number.
Value: :class:`str` : UFF4MOF forcefield type.
metal_ligand_bond_order:
Bond order to use for metal-ligand bonds. Defaults to
`half`, but using `resonant` can increase the force
constant for stronger metal-ligand interactions.
conjugate_gradient:
``True`` to use Conjugate Graditent method.
Defaults to ``False``
output_dir:
The name of the directory into which files generated during
the calculation are written, if ``None`` then
:func:`uuid.uuid4` is used.
Notes:
By default, :meth:`optimize` will run an optimisation using the
UFF4MOF. This forcefield requires some explicit metal atom
definitions, which are determined by the user.
This code was originally written for use with Gulp 5.1 on Linux and has
not been officially tested on other versions and operating systems.
Make sure to sanity check the output.
Examples:
While metal atoms are not required, UFF4MOF is useful because it
encompasses almost all chemical environments commonly found in
metal-organic structures. Better forcefields exist for purely
organic molecules! An interface with GULP is provided, which takes
the forcefield types assigned by RDKit for non-metal atoms and
user defined forcefield types for metal atoms to perform geometry
optimisations.
.. code-block:: python
import stk
import stko
from rdkit.Chem import AllChem as rdkit
# Produce a Pd+2 atom with 4 functional groups.
atom = rdkit.MolFromSmiles('[Pd+2]')
atom.AddConformer(rdkit.Conformer(atom.GetNumAtoms()))
palladium_atom = stk.BuildingBlock.init_from_rdkit_mol(atom)
atom_0, = palladium_atom.get_atoms(0)
palladium_atom = palladium_atom.with_functional_groups(
(stk.SingleAtom(atom_0) for i in range(4))
)
# Build a building block with two functional groups using
# the SmartsFunctionalGroupFactory.
bb1 = stk.BuildingBlock(
smiles=('C1=CC(=CC(=C1)C2=CN=CC=C2)C3=CN=CC=C3'),
functional_groups=[
stk.SmartsFunctionalGroupFactory(
smarts='[#6]~[#7X2]~[#6]',
bonders=(1, ),
deleters=(),
),
],
)
# Build a metal-organic cage with dative bonds between
# GenericFunctionalGroup and SingleAtom functional groups.
cage = stk.ConstructedMolecule(
stk.cage.M2L4Lantern(
building_blocks={
palladium_atom: (0, 1),
bb1: (2, 3, 4, 5)
},
reaction_factory=stk.DativeReactionFactory(
stk.GenericReactionFactory(
bond_orders={
frozenset({
stk.GenericFunctionalGroup,
stk.SingleAtom
}): 9
}
)
)
)
)
# Perform Gulp optimisation with UFF4MOF.
# Use conjugate gradient method for a slower, but more stable
# optimisation.
gulp_opt = stko.GulpUFFOptimizer(
gulp_path='path/to/gulp',
metal_FF={46: 'Pd4+2'},
conjugate_gradient=True
)
# Assign the force field.
gulp_opt.assign_FF(cage)
# Run optimization.
cage = gulp_opt.optimize(mol=cage)
"""
def __init__( # noqa: PLR0913
self,
gulp_path: Path | str,
maxcyc: int = 1000,
metal_FF: dict | None = None, # noqa: N803
metal_ligand_bond_order: str | None = None,
conjugate_gradient: bool = False,
output_dir: Path | str | None = None,
) -> None:
gulp_path = Path(gulp_path)
self._check_path(gulp_path)
self._gulp_path = gulp_path
self._maxcyc = maxcyc
self._metal_FF = metal_FF
self._metal_ligand_bond_order = (
"half"
if metal_ligand_bond_order is None
else metal_ligand_bond_order
)
self._conjugate_gradient = conjugate_gradient
self._output_dir = None if output_dir is None else Path(output_dir)
def _check_path(self, path: Path) -> None:
if not path.exists():
msg = f"GULP not found at {path}"
raise PathError(msg)
def _add_atom_charge_flags(self, atom: rdkit.Atom, atomkey: str) -> str: # noqa: PLR0915, PLR0912, C901
"""Add atom charge flags for forcefield.
Code inspired by:
https://github.com/rdkit/rdkit
> Code/GraphMol/ForceFieldHelpers/UFF/AtomTyper.cpp
"""
total_valence = rdkit.Atom.GetTotalValence(atom)
atnum = int(atom.GetAtomicNum())
# Go through element cases.
# Mg.
if atnum == 12: # noqa: PLR2004
if total_valence == 2: # noqa: PLR2004
atomkey += "+2"
else:
msg = (
f"UFFTYPER: Unrecognized charge state for atom: "
f"{atom.GetIdx}"
)
raise ForceFieldSetupError(msg)
# Al.
elif atnum == 13: # noqa: PLR2004
if total_valence != 3: # noqa: PLR2004
msg = (
f"UFFTYPER: Unrecognized charge state for atom: "
f"{atom.GetIdx}"
)
raise ForceFieldSetupError(msg)
# Si.
elif atnum == 14: # noqa: PLR2004
if total_valence != 4: # noqa: PLR2004
msg = (
f"UFFTYPER: Unrecognized charge state for atom: "
f"{atom.GetIdx}"
)
raise ForceFieldSetupError(msg)
# P.
elif atnum == 15: # noqa: PLR2004
if total_valence == 3: # noqa: PLR2004
atomkey += "+3"
elif total_valence == 5: # noqa: PLR2004
atomkey += "+5"
else:
msg = (
f"UFFTYPER: Unrecognized charge state for atom: "
f"{atom.GetIdx}"
)
raise ForceFieldSetupError(msg)
# S.
elif atnum == 16: # noqa: PLR2004
hybrid = rdkit.Atom.GetHybridization(atom)
if hybrid != rdkit.HybridizationType.SP2:
if total_valence == 2: # noqa: PLR2004
atomkey += "+2"
elif total_valence == 4: # noqa: PLR2004
atomkey += "+4"
elif total_valence == 6: # noqa: PLR2004
atomkey += "+6"
else:
msg = (
f"UFFTYPER: Unrecognized charge state for "
f"atom: {atom.GetIdx}"
)
raise ForceFieldSetupError(msg)
# Zn.
elif atnum == 30: # noqa: PLR2004
if total_valence == 2: # noqa: PLR2004
atomkey += "+2"
else:
msg = (
f"UFFTYPER: Unrecognized charge state for atom: "
f"{atom.GetIdx}"
)
raise ForceFieldSetupError(msg)
# Ga.
elif atnum in {31, 33}:
if total_valence == 3: # noqa: PLR2004
atomkey += "+3"
else:
msg = (
f"UFFTYPER: Unrecognized charge state for atom: "
f"{atom.GetIdx}"
)
raise ForceFieldSetupError(msg)
# Se.
elif atnum in {34, 48}:
if total_valence == 2: # noqa: PLR2004
atomkey += "+2"
else:
msg = (
f"UFFTYPER: Unrecognized charge state for atom: "
f"{atom.GetIdx}"
)
raise ForceFieldSetupError(msg)
# In.
elif atnum in {49, 51}:
if total_valence == 3: # noqa: PLR2004
atomkey += "+3"
else:
msg = (
f"UFFTYPER: Unrecognized charge state for atom: "
f"{atom.GetIdx}"
)
raise ForceFieldSetupError(msg)
# Te.
elif atnum in {52, 80}:
if total_valence == 2: # noqa: PLR2004
atomkey += "+2"
else:
msg = (
f"UFFTYPER: Unrecognized charge state for atom: "
f"{atom.GetIdx}"
)
raise ForceFieldSetupError(msg)
# Tl.
elif atnum in {81, 82, 83}:
if total_valence == 3: # noqa: PLR2004
atomkey += "+3"
else:
msg = (
f"UFFTYPER: Unrecognized charge state for atom: "
f"{atom.GetIdx}"
)
raise ForceFieldSetupError(msg)
# Po.
elif atnum == 84: # noqa: PLR2004
if total_valence == 2: # noqa: PLR2004
atomkey += "+2"
else:
msg = (
f"UFFTYPER: Unrecognized charge state for atom: "
f"{atom.GetIdx}"
)
raise ForceFieldSetupError(msg)
# Lanthanides.
elif atnum >= 57 and atnum <= 71: # noqa: PLR2004
if total_valence == 6: # noqa: PLR2004
atomkey += "+3"
else:
msg = (
f"UFFTYPER: Unrecognized charge state for atom: "
f"{atom.GetIdx}"
)
raise ForceFieldSetupError(msg)
return atomkey
def _get_atom_label(self, atom: rdkit.Atom) -> str: # noqa: PLR0912, C901
"""Get FF atom label.
Code inspired by:
https://github.com/rdkit/rdkit
> Code/GraphMol/ForceFieldHelpers/UFF/AtomTyper.cpp
"""
atnum = int(atom.GetAtomicNum())
atomkey = atom.GetSymbol()
if len(atomkey) == 1:
atomkey += "_"
table = rdkit.GetPeriodicTable()
chk1 = rdkit.PeriodicTable.GetDefaultValence(table, atnum) == -1
chk2 = rdkit.PeriodicTable.GetNOuterElecs(table, atnum) != 1
chk3 = rdkit.PeriodicTable.GetNOuterElecs(table, atnum) != 7 # noqa: PLR2004
chk4 = chk2 and chk3
if chk1 or chk4:
hybrid = rdkit.Atom.GetHybridization(atom)
if atnum == 84: # noqa: PLR2004
atomkey += "3"
if hybrid != rdkit.HybridizationType.SP3:
msg = (
f"UFFTYPER: Unrecognized hybridization for"
f" atom: {atom.GetIdx}"
)
raise ForceFieldSetupError(msg)
elif atnum == 80: # noqa: PLR2004
atomkey += "1"
if hybrid != rdkit.HybridizationType.SP:
msg = (
f"UFFTYPER: Unrecognized hybridization for"
f" atom: {atom.GetIdx}"
)
raise ForceFieldSetupError(msg)
elif hybrid == rdkit.HybridizationType.SP:
atomkey += "1"
elif hybrid == rdkit.HybridizationType.SP2:
chk1a = rdkit.Atom.GetIsAromatic(atom)
bonds = rdkit.Atom.GetBonds(atom)
conjugated = False
for bond in bonds:
if rdkit.Bond.GetIsConjugated(bond):
conjugated = True
break
chk2a = conjugated
chk3a = atnum in [6, 7, 8, 16]
chk4a = chk1a or chk2a
if chk4a and chk3a:
atomkey += "R"
else:
atomkey += "2"
elif hybrid == rdkit.HybridizationType.SP3:
atomkey += "3"
elif hybrid == rdkit.HybridizationType.SP3D:
atomkey += "5"
elif hybrid == rdkit.HybridizationType.SP3D2:
atomkey += "6"
else:
msg = (
f"UFFTYPER: Unrecognized hybridization for"
f" atom: {atom.GetIdx}"
)
raise ForceFieldSetupError(msg)
return self._add_atom_charge_flags(atom, atomkey)
def _type_translator(self) -> dict[str, str]:
type_translator: dict[str, str] = {}
types = sorted(
{ # type: ignore[type-var]
self.atom_labels[i][0] for i in self.atom_labels
}
)
for t in types:
symb = t[0:2] if t[1].isalpha() else t[0] # type:ignore[index]
for i in range(1, 100):
name = f"{symb}{i}"
if name not in type_translator.values():
type_translator[t] = name # type: ignore[index]
break
return type_translator
def _cell_section(self, unit_cell: UnitCell) -> str:
return (
"\ncell\n"
f"{round(unit_cell.get_a(), 6)} "
f"{round(unit_cell.get_b(), 6)} "
f"{round(unit_cell.get_c(), 6)} "
f"{round(unit_cell.get_alpha(), 6)} "
f"{round(unit_cell.get_beta(), 6)} "
f"{round(unit_cell.get_gamma(), 6)} "
# No fixes.
"0 0 0 0 0 0\n"
)
def _position_section(
self, mol: stk.Molecule, type_translator: dict
) -> str:
position_section = "\ncartesian\n"
for atom in mol.get_atoms():
atom_type = type_translator[self.atom_labels[atom.get_id()][0]]
position = mol.get_centroid(atom_ids=atom.get_id())
posi_string = (
f"{atom_type} core {round(position[0], 5)} "
f"{round(position[1], 5)} {round(position[2], 5)}\n"
)
position_section += posi_string
return position_section
def _bond_section(
self,
mol: stk.Molecule,
metal_atoms: list[stk.Atom],
) -> str:
bond_section = "\n"
for bond in mol.get_bonds():
atom_types = [
self.atom_labels[i.get_id()][0]
for i in [bond.get_atom1(), bond.get_atom2()]
]
# Set bond orders.
if has_h_atom(bond):
# H has bond order of 1.
bond_type = ""
elif has_metal_atom(bond, metal_atoms):
bond_type = self._metal_ligand_bond_order
elif (
"_R" in atom_types[0] # type:ignore[operator]
and "_R" in atom_types[1] # type:ignore[operator]
):
bond_type = "resonant"
elif bond.get_order() == 1:
bond_type = ""
elif bond.get_order() == 2: # noqa: PLR2004
bond_type = "double"
elif bond.get_order() == 3: # noqa: PLR2004
bond_type = "triple"
string = (
f"connect {bond.get_atom1().get_id() + 1} "
f"{bond.get_atom2().get_id() + 1} {bond_type}"
)
bond_section += string + "\n"
return bond_section
def _species_section(self, type_translator: dict) -> str:
species_section = "\nspecies\n"
for spec, name in type_translator.items():
species_section += f"{name} {spec}\n"
return species_section
def _write_gulp_file(
self,
mol: stk.Molecule,
metal_atoms: list[stk.Atom],
in_file: Path,
output_xyz: Path,
unit_cell: UnitCell | None = None,
) -> None:
type_translator = self._type_translator()
top_line = "opti "
if self._conjugate_gradient:
top_line += "conj unit "
if unit_cell is not None:
# Constant pressure.
top_line += "conp "
cell_section = self._cell_section(unit_cell)
# Output CIF.
output_cif = output_xyz.with_suffix(".cif")
periodic_output = f"output cif {output_cif}\n"
else:
# Constant volume.
top_line += "conv "
cell_section = ""
periodic_output = ""
top_line += "noautobond fix molmec cartesian\n"
position_section = self._position_section(mol, type_translator)
bond_section = self._bond_section(mol, metal_atoms)
species_section = self._species_section(type_translator)
library = "\nlibrary uff4mof.lib\n"
output_section = (
"\n"
f"maxcyc {self._maxcyc}\n"
"terse inout potentials\n"
"terse in cell\n"
"terse in structure\n"
"terse inout derivatives\n"
f"output xyz {output_xyz}\n"
f"{periodic_output}"
# 'output movie xyz steps_.xyz\n'
)
with in_file.open("w") as f:
f.write(top_line)
f.write(cell_section)
f.write(position_section)
f.write(bond_section)
f.write(species_section)
f.write(library)
f.write(output_section)
[docs]
def assign_FF(self, mol: stk.Molecule) -> None: # noqa: N802
"""Assign forcefield types to molecule.
Parameters:
mol:
The molecule to be optimized.
"""
warnings.warn(
FutureWarning(
"We have found some minor discrepancies in this "
"assignment algorithm, which is based off rdkit code. "
"Changes should come soon. This UFF optimisation should "
" not be your final step! Due to this, some tests in "
"test_uff_assign_ff.py have been muted."
),
stacklevel=2,
)
metal_atoms = get_metal_atoms(mol)
metal_ids = [i.get_id() for i in metal_atoms]
if len(metal_ids) > 1 and self._metal_FF is None:
msg = (
"No metal FF provivded, but metal atoms were found ("
f"{metal_atoms})"
)
raise ExpectedMetalError(msg)
metal_bonds, _ = get_metal_bonds(mol, metal_atoms)
edit_mol = to_rdkit_mol_without_metals(
mol=mol, metal_atoms=metal_atoms, metal_bonds=metal_bonds
)
# Get forcefield parameters.
rdkit.SanitizeMol(edit_mol)
self.atom_labels = {}
for i in range(edit_mol.GetNumAtoms()):
if i in metal_ids:
self.atom_labels[i] = [None, "metal", None]
else:
atom = edit_mol.GetAtomWithIdx(i)
atom_label = self._get_atom_label(atom)
self.atom_labels[i] = [atom_label, None, None]
# Write UFF4MOF specific forcefield parameters.
# Metals.
for atomid in self.atom_labels:
if self.atom_labels[atomid][1] == "metal":
(atom,) = mol.get_atoms(atomid)
atom_no = atom.get_atomic_number()
self.atom_labels[atomid][0] = self._metal_FF[atom_no] # type:ignore[index]
def _run_gulp(self, in_file: Path, out_file: Path) -> None:
cmd = f"{self._gulp_path} < {in_file}"
with out_file.open("w") as f:
# Note that sp.call will hold the program until completion
# of the calculation.
sp.call( # noqa: S602
cmd,
stdin=sp.PIPE,
stdout=f,
stderr=sp.PIPE,
# Shell is required to run complex arguments.
shell=True,
)
[docs]
def optimize(self, mol: MoleculeT) -> MoleculeT:
if self._output_dir is None:
output_dir = Path(str(uuid.uuid4().int)).resolve()
else:
output_dir = self._output_dir.resolve()
if output_dir.exists():
shutil.rmtree(output_dir)
output_dir.mkdir(parents=True)
init_dir = Path.cwd()
os.chdir(output_dir)
in_file = Path("gulp_opt.gin")
out_file = Path("gulp_opt.ginout")
output_xyz = Path("gulp_opt.xyz")
metal_atoms = get_metal_atoms(mol)
try:
# Write GULP file.
self._write_gulp_file(
mol=mol,
metal_atoms=metal_atoms,
in_file=in_file,
output_xyz=output_xyz,
unit_cell=None,
)
# Run.
self._run_gulp(in_file, out_file)
# Update from output.
mol = mol.with_structure_from_file(output_xyz)
finally:
os.chdir(init_dir)
return mol
[docs]
def p_optimize(
self,
mol: MoleculeT,
unit_cell: UnitCell,
) -> tuple[MoleculeT, UnitCell]:
"""Optimize `mol` and `unit_cell`.
Parameters:
mol:
The molecule to be optimized.
unit_cell:
The unit_cell to be optimized if optimization is periodic.
Returns:
The optimized molecule and the optimized cell.
"""
if self._output_dir is None:
output_dir = Path(str(uuid.uuid4().int)).resolve()
else:
output_dir = self._output_dir.resolve()
if output_dir.exists():
shutil.rmtree(output_dir)
output_dir.mkdir(parents=True)
init_dir = Path.cwd()
os.chdir(output_dir)
in_file = Path("gulp_opt.gin")
out_file = Path("gulp_opt.ginout")
output_xyz = Path("gulp_opt.xyz")
output_cif = output_xyz.with_suffix(".cif")
metal_atoms = get_metal_atoms(mol)
try:
# Write GULP file.
self._write_gulp_file(
mol=mol,
metal_atoms=metal_atoms,
in_file=in_file,
output_xyz=output_xyz,
unit_cell=unit_cell,
)
# Run.
self._run_gulp(in_file, out_file)
# Update from output.
mol = mol.with_structure_from_file(output_xyz)
unit_cell = unit_cell.with_cell_from_cif(output_cif)
finally:
os.chdir(init_dir)
return mol, unit_cell
[docs]
class GulpUFFMDOptimizer(GulpUFFOptimizer):
"""Applies forcefield MD that can handle metal centres.
Parameters:
gulp_path:
Path to GULP executable.
metal_FF:
Dictionary with metal atom forcefield assignments.
Key: :class:`int` : atomic number.
Value: :class:`str` : UFF4MOF forcefield type.
metal_ligand_bond_order:
Bond order to use for metal-ligand bonds. Defaults to
`half`, but using `resonant` can increase the force
constant for stronger metal-ligand interactions.
output_dir:
The name of the directory into which files generated during
the calculation are written, if ``None`` then
:func:`uuid.uuid4` is used.
integrator:
Integrator for GULP to use.
Defaults to 'stochastic'.
ensemble:
Ensemble for GULP to use.
Defaults to 'nvt'.
temperature:
Temperature to run simulation at in Kelvin.
Defaults to 300.
equilbration:
Time spent equilibrating system in ps.
Defaults to 1.0.
production:
Time spent running production simulation of system in ps.
Defaults to 10.0.
timestep:
Timestep of simulation in fs.
Defaults to 1.0.
N_conformers:
Number of conformers to sample.
Defaults to 10.
opt_conformers:
Whether or not to optimise each conformer using UFF4MOF.
Defaults to ``True``.
save_conformers:
Whether or not to save to file each conformer.
Defaults to ``False``.
Notes:
By default, :meth:`optimize` will run a MD run using the UFF4MOF.
This forcefield requires some explicit metal atom definitions,
which are determined by the user.
This code was originally written for use with Gulp 5.1 on Linux
and has not been officially tested on other versions and operating
systems. Make sure to sanity check the output.
Examples:
Conformer searching is often useful, so we have provided an interface
to MD simulations using GULP and UFF4MOF. A conformer search can be
run at high temperature, where N conformers are extracted at constant
intervals throughtout the simulation and optimized using UFF4MOF. The
lowest energy conformer is returned. After these MD steps, it is
crucial to reoptimize the resultant structure using a better
forcefield or a more robust method!
.. code-block:: python
gulp_MD = stko.GulpUFFMDOptimizer(
gulp_path='path/to/gulp',
metal_FF={46: 'Pd4+2'},
temperature=300,
N_conformers=10,
opt_conformers=True,
)
gulp_MD.assign_FF(cage)
cage = gulp_MD.optimize(cage)
"""
def __init__( # noqa: PLR0913
self,
gulp_path: Path | str,
metal_FF: dict[int, str] | None = None, # noqa: N803
metal_ligand_bond_order: str | None = None,
output_dir: Path | str | None = None,
integrator: str = "stochastic",
ensemble: str = "nvt",
temperature: float = 300,
equilbration: float = 1.0,
production: float = 10.0,
timestep: float = 1.0,
N_conformers: int = 10, # noqa: N803
opt_conformers: bool = True,
save_conformers: bool = False,
) -> None:
gulp_path = Path(gulp_path)
self._check_path(gulp_path)
self._gulp_path = gulp_path
self._metal_FF = metal_FF
self._metal_ligand_bond_order = (
"half"
if metal_ligand_bond_order is None
else metal_ligand_bond_order
)
self._output_dir = None if output_dir is None else Path(output_dir)
self._integrator = integrator
self._ensemble = ensemble
self._temperature = temperature
self._equilbration = float(equilbration)
self._production = float(production)
self._timestep = timestep
self._N_conformers = N_conformers
samples = float(self._production) / float(self._N_conformers)
samples = math.floor(samples * 1000) / 1000
self._sample = samples
self._write = samples
self._opt_conformers = opt_conformers
self._save_conformers = save_conformers
def _write_gulp_file(
self,
mol: stk.Molecule,
metal_atoms: list[stk.Atom],
in_file: Path,
output_traj: Path,
unit_cell: UnitCell | None = None,
) -> None:
type_translator = self._type_translator()
top_line = "md "
if unit_cell is not None:
# Constant pressure.
top_line += "conp "
cell_section = self._cell_section(unit_cell)
# Output CIF.
output_cif = output_traj.with_suffix(".cif")
periodic_output = f"output cif {output_cif}\n"
else:
# Constant volume.
top_line += "conv "
cell_section = ""
periodic_output = ""
top_line += "noautobond fix molmec cartesian\n"
position_section = self._position_section(mol, type_translator)
bond_section = self._bond_section(mol, metal_atoms)
species_section = self._species_section(type_translator)
library = "\nlibrary uff4mof.lib\n"
md_section = (
"\n"
"mdmaxtemp 100000000\n"
f"integrator {self._integrator}\n"
f"ensemble {self._ensemble}\n"
f"temperature {self._temperature}\n"
f"equilibration {self._equilbration} ps\n"
f"production {self._production} ps\n"
f"timestep {self._timestep} fs\n"
f"sample {self._sample} ps\n"
f"write {self._write} ps\n"
"\n"
)
output_section = (
"\n"
"terse inout potentials\n"
"terse inout cell\n"
"terse in structure\n"
"terse inout derivatives\n"
f"output trajectory ascii {output_traj}\n"
f"{periodic_output}"
)
with in_file.open("w") as f:
f.write(top_line)
f.write(cell_section)
f.write(position_section)
f.write(bond_section)
f.write(species_section)
f.write(library)
f.write(md_section)
f.write(output_section)
def _convert_traj_to_xyz(
self,
output_xyz: Path,
output_traj: Path,
) -> tuple[list[str], dict[int, dict], list[str]]:
# Get atom types from an existing xyz file.
atom_types = [
line.rstrip().split(" ")[0]
for line in output_xyz.open().readlines()[2:]
]
# Read in lines from trajectory file.
with output_traj.open() as f:
lines = f.readlines()
# Split file using strings.
timesteps = "".join(lines).split("# Time/KE/E/T")[1:]
trajectory_data = {}
xyz_traj_lines = []
for ts, cont in enumerate(timesteps):
ts_data = {}
time_section = cont.split("# Coordinates\n")[0]
coords_section = cont.split("# Coordinates\n")[1].split(
"# Velocities\n"
)[0]
vels_section = cont.split("# Velocities\n")[1].split(
"# Derivatives \n"
)[0]
derivs_section = cont.split("# Derivatives \n")[1].split(
"# Site energies \n"
)[0]
sites_section = cont.split("# Site energies \n")[1]
time, ke, e, t, *_ = (
float(i) for i in time_section.strip().split(" ") if i
)
ts_data["time"] = time
ts_data["KE"] = ke
ts_data["E"] = e
ts_data["T"] = t
ts_data["coords"] = [ # type:ignore[assignment]
[i for i in li.split(" ") if i]
for li in coords_section.split("\n")[:-1]
]
ts_data["vels"] = [ # type:ignore[assignment]
[i for i in li.split(" ") if i]
for li in vels_section.split("\n")[:-1]
]
ts_data["derivs"] = [ # type:ignore[assignment]
[i for i in li.split(" ") if i]
for li in derivs_section.split("\n")[:-1]
]
ts_data["sites"] = [ # type:ignore[assignment]
[i for i in li.split(" ") if i]
for li in sites_section.split("\n")[:-1]
]
trajectory_data[ts] = ts_data
# Write XYZ string for XYZ traj file.
xyz_string = (
f"{len(ts_data['coords'])}\n" # type:ignore[arg-type]
f"{ts_data['time']},{ts_data['KE']},"
f"{ts_data['E']},{ts_data['T']}\n"
)
for i, coord in enumerate( # type: ignore[var-annotated]
ts_data["coords"] # type: ignore[arg-type]
):
site_E = ts_data["sites"][i][0] # type: ignore[index] # noqa: N806
xyz_string += (
f"{atom_types[i]} {round(float(coord[0]), 5)} "
f"{round(float(coord[1]), 5)} "
f"{round(float(coord[2]), 5)} {site_E}\n"
)
xyz_traj_lines.append(xyz_string)
return atom_types, trajectory_data, xyz_traj_lines
def _write_conformer_xyz_file(
self,
ts: float,
ts_data: dict,
filename: Path,
atom_types: list[str],
) -> None:
coords = ts_data["coords"]
xyz_string = (
f"{len(ts_data['coords'])}\n"
f"{ts},{ts_data['time']},{ts_data['KE']},"
f"{ts_data['E']},{ts_data['T']}\n"
)
for i, coord in enumerate(coords):
xyz_string += (
f"{atom_types[i]} {round(float(coord[0]), 5)} "
f"{round(float(coord[1]), 5)} "
f"{round(float(coord[2]), 5)}\n"
)
with filename.open("w") as f:
f.write(xyz_string)
def _optimise_all_conformers(
self,
mol: stk.Molecule,
trajectory_data: dict,
atom_types: list[str],
low_conf_xyz: Path,
) -> None:
min_energy = 1e10
for ts, ts_data in trajectory_data.items():
if self._save_conformers:
conformer_file_name = Path(f"conf_{ts}.xyz")
else:
# This will get overwrriten each time and deleted at
# the end.
conformer_file_name = Path("temp_conf.xyz")
self._write_conformer_xyz_file(
ts=ts,
ts_data=ts_data,
filename=conformer_file_name,
atom_types=atom_types,
)
mol = mol.with_structure_from_file(conformer_file_name)
conformer_opt_dir = Path.cwd() / f"conf_{ts}_opt"
gulp_opt = GulpUFFOptimizer(
gulp_path=self._gulp_path,
metal_FF=self._metal_FF,
output_dir=conformer_opt_dir,
)
gulp_opt.assign_FF(mol)
mol = gulp_opt.optimize(mol=mol)
energy = gulp_opt.extract_final_energy(
out_file=conformer_opt_dir / "gulp_opt.ginout",
)
if energy < min_energy:
min_energy = energy
# Write out optimised conformer.
mol.write(low_conf_xyz)
if not self._save_conformers:
Path("temp_conf.xyz").unlink()
def _save_all_conformers(
self,
trajectory_data: dict,
atom_types: list[str],
) -> None:
for ts, ts_data in trajectory_data.items():
conformer_file_name = Path(f"conf_{ts}.xyz")
self._write_conformer_xyz_file(
ts=ts,
ts_data=ts_data,
filename=conformer_file_name,
atom_types=atom_types,
)
def _calculate_lowest_energy_conformer(
self,
trajectory_data: dict[int, dict],
) -> int:
energies = [trajectory_data[ts]["E"] for ts in trajectory_data]
min_energy = min(energies)
return list(trajectory_data.keys())[energies.index(min_energy)]
def _save_lowest_energy_conf(
self,
mol: stk.Molecule,
output_xyz: Path,
output_traj: Path,
xyz_traj: Path,
low_conf_xyz: Path,
) -> None:
# Convert GULP trajectory file to xyz trajectory.
(
atom_types,
trajectory_data,
xyz_traj_lines,
) = self._convert_traj_to_xyz(
output_xyz=output_xyz,
output_traj=output_traj,
)
# Write XYZ trajectory file.
with xyz_traj.open("w") as f:
for line in xyz_traj_lines:
f.write(line)
# Find lowest energy conformation and output to XYZ.
if self._opt_conformers:
self._optimise_all_conformers(
mol=mol,
trajectory_data=trajectory_data,
atom_types=atom_types,
low_conf_xyz=low_conf_xyz,
)
else:
if self._save_conformers:
self._save_all_conformers(trajectory_data, atom_types)
min_ts = self._calculate_lowest_energy_conformer(
trajectory_data=trajectory_data,
)
self._write_conformer_xyz_file(
ts=min_ts,
ts_data=trajectory_data[min_ts],
filename=low_conf_xyz,
atom_types=atom_types,
)
[docs]
def optimize(self, mol: MoleculeT) -> MoleculeT:
if self._output_dir is None:
output_dir = Path(str(uuid.uuid4().int)).resolve()
else:
output_dir = self._output_dir.resolve()
if output_dir.exists():
shutil.rmtree(output_dir)
output_dir.mkdir(parents=True)
init_dir = Path.cwd()
os.chdir(output_dir)
in_file = Path("gulp_MD.gin")
out_file = Path("gulp_MD.ginout")
output_xyz = Path("gulp_MD_template.xyz")
output_traj = Path("gulp_MD.trg")
xyz_traj = Path("gulp_MD_traj.xyz")
low_conf_xyz = Path("low_energy_conf.xyz")
metal_atoms = get_metal_atoms(mol)
try:
mol.write(output_xyz)
# Write GULP file.
self._write_gulp_file(
mol=mol,
metal_atoms=metal_atoms,
in_file=in_file,
output_traj=output_traj,
)
# Run.
self._run_gulp(in_file, out_file)
# Get lowest energy conformer from trajectory.
self._save_lowest_energy_conf(
mol=mol,
output_xyz=output_xyz,
output_traj=output_traj,
xyz_traj=xyz_traj,
low_conf_xyz=low_conf_xyz,
)
# Update from output.
mol = mol.with_structure_from_file(low_conf_xyz)
finally:
os.chdir(init_dir)
return mol
[docs]
def p_optimize(
self,
mol: MoleculeT,
unit_cell: UnitCell,
) -> tuple[MoleculeT, UnitCell]:
"""Optimize `mol` and `unit_cell`.
Parameters:
mol:
The molecule to be optimized.
unit_cell:
The unit_cell to be optimized if optimization is periodic.
Returns:
The optimized molecule and the optimized cell.
"""
if self._output_dir is None:
output_dir = Path(str(uuid.uuid4().int)).resolve()
else:
output_dir = self._output_dir.resolve()
if output_dir.exists():
shutil.rmtree(output_dir)
output_dir.mkdir(parents=True)
init_dir = Path.cwd()
os.chdir(output_dir)
in_file = Path("gulp_MD.gin")
out_file = Path("gulp_MD.ginout")
output_xyz = Path("gulp_MD_template.xyz")
output_traj = Path("gulp_MD.trg")
xyz_traj = Path("gulp_MD_traj.xyz")
low_conf_xyz = Path("low_energy_conf.xyz")
low_conf_cif = low_conf_xyz.with_suffix(".cif")
metal_atoms = get_metal_atoms(mol)
try:
mol.write(output_xyz)
# Write GULP file.
self._write_gulp_file(
mol=mol,
metal_atoms=metal_atoms,
in_file=in_file,
output_traj=output_traj,
unit_cell=unit_cell,
)
# Run.
self._run_gulp(in_file, out_file)
# Get lowest energy conformer from trajectory.
self._save_lowest_energy_conf(
mol=mol,
output_xyz=output_xyz,
output_traj=output_traj,
xyz_traj=xyz_traj,
low_conf_xyz=low_conf_xyz,
)
# Update from output.
mol = mol.with_structure_from_file(low_conf_xyz)
unit_cell = unit_cell.with_cell_from_cif(low_conf_cif)
finally:
os.chdir(init_dir)
return mol, unit_cell