#!/usr/bin/env python
# ==============================================================================
# MODULE DOCSTRING
# ==============================================================================
"""
environment.py
Lightweight module for validating SMIRKS using Open Force Field ToolkitWrappers.
AUTHORS
Caitlin Bannan <bannanc@uci.edu> (Original author), Mobley Lab, University of California Irvine,
Jeff Wagner <jeffrey.wagner@openforcefield.org> (refactored to use ToolkitWrappers),
with contributions from John Chodera, Memorial Sloan Kettering Cancer Center
and David Mobley, UC Irvine.
"""
__all__ = [
"SMIRKSMismatchError",
"SMIRKSParsingError",
"ChemicalEnvironment",
"AtomChemicalEnvironment",
"BondChemicalEnvironment",
"AngleChemicalEnvironment",
"TorsionChemicalEnvironment",
"ImproperChemicalEnvironment",
]
# ==============================================================================
# GLOBAL IMPORTS
# ==============================================================================
from openforcefield.utils.toolkits import (
GLOBAL_TOOLKIT_REGISTRY,
MessageException,
ToolkitWrapper,
)
class SMIRKSMismatchError(MessageException):
"""
Exception for cases where smirks are inappropriate
for the environment type they are being parsed into
"""
pass
class SMIRKSParsingError(MessageException):
"""
Exception for when SMIRKS are not parseable for any environment
"""
pass
[docs]class ChemicalEnvironment:
"""Chemical environment abstract base class used for validating SMIRKS"""
_expected_type = None
[docs] def __init__(
self,
smirks=None,
label=None,
validate_parsable=True,
validate_valence_type=True,
toolkit_registry=None,
):
"""Initialize a chemical environment abstract base class.
smirks = string, optional
if smirks is not None, a chemical environment is built
from the provided SMIRKS string
label = anything, optional
intended to be used to label this chemical environment
could be a string, int, or float, or anything
validate_parsable: bool, optional, default=True
If specified, ensure the provided smirks is parsable
validate_valence_type : bool, optional, default=True
If specified, ensure the tagged atoms are appropriate to the specified valence type
toolkit_registry = string or ToolkitWrapper or ToolkitRegistry. Default = None
Either a ToolkitRegistry, ToolkitWrapper, or the strings 'openeye' or 'rdkit',
indicating the backend to use for validating the correct
connectivity of the SMIRKS during initialization. If None,
this function will use the GLOBAL_TOOLKIT_REGISTRY
Raises
------
SMIRKSParsingError
if smirks was unparsable
SMIRKSMismatchError
if smirks did not have expected connectivity between tagged atoms
and validate_valence_type=True
"""
# Support string input for toolkit names for legacy reasons
if toolkit_registry == "openeye":
from openforcefield.utils.toolkits import OpenEyeToolkitWrapper
toolkit_registry = OpenEyeToolkitWrapper()
elif toolkit_registry == "rdkit":
from openforcefield.utils.toolkits import RDKitToolkitWrapper
toolkit_registry = RDKitToolkitWrapper()
self.smirks = smirks
self.label = label
if validate_parsable or validate_valence_type:
self.validate(
validate_valence_type=validate_valence_type,
toolkit_registry=toolkit_registry,
)
[docs] def validate(self, validate_valence_type=True, toolkit_registry=None):
"""
Returns True if the underlying smirks is the correct valence type, False otherwise. If the expected type
is None, this method always returns True.
validate_valence_type : bool, optional, default=True
If specified, ensure the tagged atoms are appropriate to the specified valence type
toolkit_registry = ToolkitWrapper or ToolkitRegistry. Default = None
Either a ToolkitRegistry or ToolkitWrapper,
indicating the backend to use for validating the correct
connectivity of the SMIRKS during initialization. If None,
this function will use the GLOBAL_TOOLKIT_REGISTRY
Raises
------
SMIRKSParsingError
if smirks was unparsable
SMIRKSMismatchError
if smirks did not have expected connectivity between tagged atoms
and validate_valence_type=True
"""
perceived_type = self.get_type(toolkit_registry=toolkit_registry)
if (
(perceived_type != self._expected_type)
and validate_valence_type
and not (self._expected_type is None)
):
raise SMIRKSMismatchError(
f"{self.__class__} expected '{self._expected_type}' chemical environment, but "
f"smirks was set to '{self.smirks}', which is type '{perceived_type}'"
)
[docs] @classmethod
def validate_smirks(
cls,
smirks,
validate_parsable=True,
validate_valence_type=True,
toolkit_registry=None,
):
"""
Check the provided SMIRKS string is valid, and if requested, tags atoms appropriate to the
specified valence type.
Parameters
----------
smirks : str
The SMIRKS expression to validate
validate_parsable: bool, optional, default=True
If specified, ensure the provided smirks is parsable
validate_valence_type : bool, optional, default=True
If specified, ensure the tagged atoms are appropriate to the specified valence type
toolkit_registry = string or ToolkitWrapper or ToolkitRegistry. Default = None
Either a ToolkitRegistry, ToolkitWrapper, or the strings 'openeye' or 'rdkit',
indicating the backend to use for validating the correct
connectivity of the SMIRKS during initialization. If None,
this function will use the GLOBAL_TOOLKIT_REGISTRY
Raises
------
SMIRKSParsingError
if smirks was unparsable
SMIRKSMismatchError
if smirks did not have expected connectivity between tagged atoms
and validate_valence_type=True
"""
cls(
smirks,
validate_parsable=validate_parsable,
validate_valence_type=validate_valence_type,
toolkit_registry=toolkit_registry,
)
[docs] def get_type(self, toolkit_registry=None):
"""
Return the valence type implied by the connectivity of the bound atoms in this ChemicalEnvironment.
Parameters
-----------
toolkit_registry : openforcefield.utils.ToolkitRegistry or openforcefield.utils.ToolkitWrapper
The cheminformatics toolkit to use for parsing the smirks
Returns
-------
valence_type : str
One of "Atom", "Bond", "Angle", "ProperTorsion", "ImproperTorsion", or None.
If tagged atoms are not connected in a known pattern this method will return None.
Raises
------
SMIRKSParsingError
if smirks was unparsable
"""
# Query a toolkit wrapper for substructure type
if toolkit_registry is None:
toolkit_registry = GLOBAL_TOOLKIT_REGISTRY
if isinstance(toolkit_registry, ToolkitWrapper):
unique_tags, connectivity = toolkit_registry.get_tagged_smarts_connectivity(
self.smirks
)
else:
unique_tags, connectivity = toolkit_registry.call(
"get_tagged_smarts_connectivity", self.smirks
)
if unique_tags == (1,) and len(connectivity) == 0:
return "Atom"
if unique_tags == (1, 2) and (1, 2) in connectivity:
return "Bond"
elif (
unique_tags == (1, 2, 3)
and (1, 2) in connectivity
and (2, 3) in connectivity
):
return "Angle"
elif (
unique_tags == (1, 2, 3, 4)
and (1, 2) in connectivity
and (2, 3) in connectivity
and (3, 4) in connectivity
):
return "ProperTorsion"
elif (
unique_tags == (1, 2, 3, 4)
and (1, 2) in connectivity
and (2, 3) in connectivity
and (2, 4) in connectivity
):
return "ImproperTorsion"
else:
return None
class AtomChemicalEnvironment(ChemicalEnvironment):
"""Chemical environment matching one labeled atom."""
_expected_type = "Atom"
class BondChemicalEnvironment(ChemicalEnvironment):
"""Chemical environment matching two labeled atoms (or a bond)."""
_expected_type = "Bond"
class AngleChemicalEnvironment(ChemicalEnvironment):
"""Chemical environment matching three marked atoms (angle)."""
_expected_type = "Angle"
class TorsionChemicalEnvironment(ChemicalEnvironment):
"""Chemical environment matching four marked atoms (torsion)."""
_expected_type = "ProperTorsion"
class ImproperChemicalEnvironment(ChemicalEnvironment):
"""Chemical environment matching four marked atoms (improper)."""
_expected_type = "ImproperTorsion"