Source code for openforcefield.typing.chemistry.environment

#!/usr/bin/env python

# ==============================================================================
# MODULE DOCSTRING
# ==============================================================================

"""
environment.py

Lightweight module for validating SMIRKS using Open Force Field ToolkitWrappers.

AUTHORS

Caitlin Bannan <bannanc@uci.edu> (Original author), Mobley Lab, University of California Irvine,
Jeff Wagner <jeffrey.wagner@openforcefield.org> (refactored to use ToolkitWrappers),
with contributions from John Chodera, Memorial Sloan Kettering Cancer Center
and David Mobley, UC Irvine.

"""

__all__ = [
    "SMIRKSMismatchError",
    "SMIRKSParsingError",
    "ChemicalEnvironment",
    "AtomChemicalEnvironment",
    "BondChemicalEnvironment",
    "AngleChemicalEnvironment",
    "TorsionChemicalEnvironment",
    "ImproperChemicalEnvironment",
]


# ==============================================================================
# GLOBAL IMPORTS
# ==============================================================================

from openforcefield.utils.toolkits import (
    GLOBAL_TOOLKIT_REGISTRY,
    MessageException,
    ToolkitWrapper,
)


class SMIRKSMismatchError(MessageException):
    """
    Exception for cases where smirks are inappropriate
    for the environment type they are being parsed into
    """

    pass


class SMIRKSParsingError(MessageException):
    """
    Exception for when SMIRKS are not parseable for any environment
    """

    pass


[docs]class ChemicalEnvironment: """Chemical environment abstract base class used for validating SMIRKS""" _expected_type = None
[docs] def __init__( self, smirks=None, label=None, validate_parsable=True, validate_valence_type=True, toolkit_registry=None, ): """Initialize a chemical environment abstract base class. smirks = string, optional if smirks is not None, a chemical environment is built from the provided SMIRKS string label = anything, optional intended to be used to label this chemical environment could be a string, int, or float, or anything validate_parsable: bool, optional, default=True If specified, ensure the provided smirks is parsable validate_valence_type : bool, optional, default=True If specified, ensure the tagged atoms are appropriate to the specified valence type toolkit_registry = string or ToolkitWrapper or ToolkitRegistry. Default = None Either a ToolkitRegistry, ToolkitWrapper, or the strings 'openeye' or 'rdkit', indicating the backend to use for validating the correct connectivity of the SMIRKS during initialization. If None, this function will use the GLOBAL_TOOLKIT_REGISTRY Raises ------ SMIRKSParsingError if smirks was unparsable SMIRKSMismatchError if smirks did not have expected connectivity between tagged atoms and validate_valence_type=True """ # Support string input for toolkit names for legacy reasons if toolkit_registry == "openeye": from openforcefield.utils.toolkits import OpenEyeToolkitWrapper toolkit_registry = OpenEyeToolkitWrapper() elif toolkit_registry == "rdkit": from openforcefield.utils.toolkits import RDKitToolkitWrapper toolkit_registry = RDKitToolkitWrapper() self.smirks = smirks self.label = label if validate_parsable or validate_valence_type: self.validate( validate_valence_type=validate_valence_type, toolkit_registry=toolkit_registry, )
[docs] def validate(self, validate_valence_type=True, toolkit_registry=None): """ Returns True if the underlying smirks is the correct valence type, False otherwise. If the expected type is None, this method always returns True. validate_valence_type : bool, optional, default=True If specified, ensure the tagged atoms are appropriate to the specified valence type toolkit_registry = ToolkitWrapper or ToolkitRegistry. Default = None Either a ToolkitRegistry or ToolkitWrapper, indicating the backend to use for validating the correct connectivity of the SMIRKS during initialization. If None, this function will use the GLOBAL_TOOLKIT_REGISTRY Raises ------ SMIRKSParsingError if smirks was unparsable SMIRKSMismatchError if smirks did not have expected connectivity between tagged atoms and validate_valence_type=True """ perceived_type = self.get_type(toolkit_registry=toolkit_registry) if ( (perceived_type != self._expected_type) and validate_valence_type and not (self._expected_type is None) ): raise SMIRKSMismatchError( f"{self.__class__} expected '{self._expected_type}' chemical environment, but " f"smirks was set to '{self.smirks}', which is type '{perceived_type}'" )
[docs] @classmethod def validate_smirks( cls, smirks, validate_parsable=True, validate_valence_type=True, toolkit_registry=None, ): """ Check the provided SMIRKS string is valid, and if requested, tags atoms appropriate to the specified valence type. Parameters ---------- smirks : str The SMIRKS expression to validate validate_parsable: bool, optional, default=True If specified, ensure the provided smirks is parsable validate_valence_type : bool, optional, default=True If specified, ensure the tagged atoms are appropriate to the specified valence type toolkit_registry = string or ToolkitWrapper or ToolkitRegistry. Default = None Either a ToolkitRegistry, ToolkitWrapper, or the strings 'openeye' or 'rdkit', indicating the backend to use for validating the correct connectivity of the SMIRKS during initialization. If None, this function will use the GLOBAL_TOOLKIT_REGISTRY Raises ------ SMIRKSParsingError if smirks was unparsable SMIRKSMismatchError if smirks did not have expected connectivity between tagged atoms and validate_valence_type=True """ cls( smirks, validate_parsable=validate_parsable, validate_valence_type=validate_valence_type, toolkit_registry=toolkit_registry, )
[docs] def get_type(self, toolkit_registry=None): """ Return the valence type implied by the connectivity of the bound atoms in this ChemicalEnvironment. Parameters ----------- toolkit_registry : openforcefield.utils.ToolkitRegistry or openforcefield.utils.ToolkitWrapper The cheminformatics toolkit to use for parsing the smirks Returns ------- valence_type : str One of "Atom", "Bond", "Angle", "ProperTorsion", "ImproperTorsion", or None. If tagged atoms are not connected in a known pattern this method will return None. Raises ------ SMIRKSParsingError if smirks was unparsable """ # Query a toolkit wrapper for substructure type if toolkit_registry is None: toolkit_registry = GLOBAL_TOOLKIT_REGISTRY if isinstance(toolkit_registry, ToolkitWrapper): unique_tags, connectivity = toolkit_registry.get_tagged_smarts_connectivity( self.smirks ) else: unique_tags, connectivity = toolkit_registry.call( "get_tagged_smarts_connectivity", self.smirks ) if unique_tags == (1,) and len(connectivity) == 0: return "Atom" if unique_tags == (1, 2) and (1, 2) in connectivity: return "Bond" elif ( unique_tags == (1, 2, 3) and (1, 2) in connectivity and (2, 3) in connectivity ): return "Angle" elif ( unique_tags == (1, 2, 3, 4) and (1, 2) in connectivity and (2, 3) in connectivity and (3, 4) in connectivity ): return "ProperTorsion" elif ( unique_tags == (1, 2, 3, 4) and (1, 2) in connectivity and (2, 3) in connectivity and (2, 4) in connectivity ): return "ImproperTorsion" else: return None
class AtomChemicalEnvironment(ChemicalEnvironment): """Chemical environment matching one labeled atom.""" _expected_type = "Atom" class BondChemicalEnvironment(ChemicalEnvironment): """Chemical environment matching two labeled atoms (or a bond).""" _expected_type = "Bond" class AngleChemicalEnvironment(ChemicalEnvironment): """Chemical environment matching three marked atoms (angle).""" _expected_type = "Angle" class TorsionChemicalEnvironment(ChemicalEnvironment): """Chemical environment matching four marked atoms (torsion).""" _expected_type = "ProperTorsion" class ImproperChemicalEnvironment(ChemicalEnvironment): """Chemical environment matching four marked atoms (improper).""" _expected_type = "ImproperTorsion"