Source code for openff.toolkit.utils.serialization

#!/usr/bin/env python
Serialization mix-in

.. todo ::

   Currently, the ``openff-toolkit`` toolkit package requires a number
   of dependencies to support all of these serialization protocols.
   Instead, should we not include these by default, and instead raise
   a helpful exception with installation instructions if one of the
   serialization schemes is called but the requisite library is not


import abc

from openff.toolkit.utils.utils import requires_package

# =============================================================================================
# =============================================================================================

[docs]class Serializable(abc.ABC): """Mix-in to add serialization and deserialization support via JSON, YAML, BSON, TOML, MessagePack, and XML. For more information on these formats, see: `JSON <>`_, `BSON <>`_, `YAML <>`_, `TOML <>`_, `MessagePack <>`_, and `XML <>`_. To use this mix-in, the class inheriting from this class must have implemented ``to_dict()`` and ``from_dict()`` methods that utilize dictionaries containing only serialiable Python objects. .. warning :: The serialization/deserialiation schemes used here place some strict constraints on what kinds of ``dict`` objects can be serialized. No effort is made to add further protection to ensure serialization is possible. Use with caution. Examples -------- Example class using :class:`Serializable` mix-in: >>> from openff.toolkit.utils.serialization import Serializable >>> class Thing(Serializable): ... def __init__(self, description): ... self.description = description ... ... def to_dict(self): ... return { 'description' : self.description } ... ... @classmethod ... def from_dict(cls, d): ... return cls(d['description']) ... >>> # Create an example object >>> thing = Thing('blorb') Get `JSON <>`_ representation: >>> json_thing = thing.to_json() Reconstruct an object from its `JSON <>`_ representation: >>> thing_from_json = Thing.from_json(json_thing) Get `BSON <>`_ representation: >>> bson_thing = thing.to_bson() Reconstruct an object from its `BSON <>`_ representation: >>> thing_from_bson = Thing.from_bson(bson_thing) Get `YAML <>`_ representation: >>> yaml_thing = thing.to_yaml() Reconstruct an object from its `YAML <>`_ representation: >>> thing_from_yaml = Thing.from_yaml(yaml_thing) Get `MessagePack <>`_ representation: >>> messagepack_thing = thing.to_messagepack() Reconstruct an object from its `MessagePack <>`_ representation: >>> thing_from_messagepack = Thing.from_messagepack(messagepack_thing) Get `XML <>`_ representation: >>> xml_thing = thing.to_xml() """ @abc.abstractmethod def to_dict(self): pass @classmethod @abc.abstractmethod def from_dict(cls, d): pass
[docs] def to_json(self, indent=None): """ Return a JSON serialized representation. Specification: Parameters ---------- indent : int, optional, default=None If not None, will pretty-print with specified number of spaces for indentation Returns ------- serialized : str A JSON serialized representation of the object """ import json d = self.to_dict() # TODO: More generally check for bytes in dict if "conformers" in d.keys(): d = _prep_numpy_data_for_json(d) return json.dumps(d, indent=indent)
[docs] @classmethod def from_json(cls, serialized): """ Instantiate an object from a JSON serialized representation. Specification: Parameters ---------- serialized : str A JSON serialized representation of the object Returns ------- instance : cls An instantiated object """ import json d = json.loads(serialized) return cls.from_dict(d)
[docs] @requires_package("bson") def to_bson(self): """ Return a BSON serialized representation. Specification: Returns ------- serialized : bytes A BSON serialized representation of the objecft """ import bson d = self.to_dict() return bson.dumps(d)
[docs] @classmethod @requires_package("bson") def from_bson(cls, serialized): """ Instantiate an object from a BSON serialized representation. Specification: Parameters ---------- serialized : bytes A BSON serialized representation of the object Returns ------- instance : cls An instantiated object """ import bson d = bson.loads(serialized) return cls.from_dict(d)
[docs] @requires_package("toml") def to_toml(self): """ Return a TOML serialized representation. Specification: Returns ------- serialized : str A TOML serialized representation of the object """ raise NotImplementedError()
# TODO: This implementation currently discards dict keys associated to the None value. # See test_utils_serialization::TestUtilsSMIRNOFFSerialization::test_toml. # import toml # d = self.to_dict() # return toml.dumps(d)
[docs] @classmethod @requires_package("toml") def from_toml(cls, serialized): """ Instantiate an object from a TOML serialized representation. Specification: Parameters ---------- serlialized : str A TOML serialized representation of the object Returns ------- instance : cls An instantiated object """ import toml d = toml.loads(serialized) return cls.from_dict(d)
@staticmethod def _represent_odict(dump, tag, mapping, flow_style=None): """Like BaseRepresenter.represent_mapping, but does not issue the sort().""" import yaml value = [] node = yaml.MappingNode(tag, value, flow_style=flow_style) if dump.alias_key is not None: dump.represented_objects[dump.alias_key] = node best_style = True if hasattr(mapping, "items"): mapping = mapping.items() for item_key, item_value in mapping: node_key = dump.represent_data(item_key) node_value = dump.represent_data(item_value) if not (isinstance(node_key, yaml.ScalarNode) and not best_style = False if not (isinstance(node_value, yaml.ScalarNode) and not best_style = False value.append((node_key, node_value)) if flow_style is None: if dump.default_flow_style is not None: node.flow_style = dump.default_flow_style else: node.flow_style = best_style return node
[docs] @requires_package("yaml") def to_yaml(self): """ Return a YAML serialized representation. Specification: Returns ------- serialized : str A YAML serialized representation of the object """ from collections import OrderedDict import yaml yaml.SafeDumper.add_representer( OrderedDict, lambda dumper, value: self._represent_odict( dumper, u",2002:map", value ), ) d = self.to_dict() return yaml.safe_dump(d, width=180)
[docs] @classmethod @requires_package("yaml") def from_yaml(cls, serialized): """ Instantiate from a YAML serialized representation. Specification: Parameters ---------- serialized : str A YAML serialized representation of the object Returns ------- instance : cls Instantiated object """ from collections import OrderedDict import yaml yaml.SafeDumper.add_representer( OrderedDict, lambda dumper, value: self._represent_odict( dumper, u",2002:map", value ), ) d = yaml.safe_load(serialized) return cls.from_dict(d)
[docs] @requires_package("msgpack") def to_messagepack(self): """ Return a MessagePack representation. Specification: Returns ------- serialized : bytes A MessagePack-encoded bytes serialized representation of the object """ import msgpack d = self.to_dict() return msgpack.dumps(d, use_bin_type=True)
[docs] @classmethod @requires_package("msgpack") def from_messagepack(cls, serialized): """ Instantiate an object from a MessagePack serialized representation. Specification: Parameters ---------- serialized : bytes A MessagePack-encoded bytes serialized representation Returns ------- instance : cls Instantiated object. """ import msgpack d = msgpack.loads(serialized, raw=False) return cls.from_dict(d)
[docs] def to_xml(self, indent=2): """ Return an XML representation. Specification: Parameters ---------- indent : int, optional, default=2 If not None, will pretty-print with specified number of spaces for indentation Returns ------- serialized : bytes A MessagePack-encoded bytes serialized representation. """ import xmltodict # An XML document requires one and only one root node. root_name = self.__class__.__name__ d = {root_name: self.to_dict()} # Configure indentation level. if indent is not None: pretty = True indent = " " * indent else: pretty = False # Convert data from dictionary to XML format. return xmltodict.unparse(d, pretty=pretty, indent=indent)
[docs] @classmethod def from_xml(cls, serialized): """ Instantiate an object from an XML serialized representation. Specification: Parameters ---------- serialized : bytes An XML serialized representation Returns ------- instance : cls Instantiated object. """ raise NotImplementedError()
# TODO: This implementation currently loads numbers as strings. # See test_utils_serialization::TestUtilsSerialization::test_xml. # import xmltodict # d = xmltodict.parse(serialized) # root_name = cls.__name__ # return cls.from_dict(d[root_name])
[docs] def to_pickle(self): """ Return a pickle serialized representation. .. warning :: This is not recommended for safe, stable storage since the pickle specification may change between Python versions. Returns ------- serialized : str A pickled representation of the object """ import pickle d = self.to_dict() return pickle.dumps(d)
[docs] @classmethod def from_pickle(cls, serialized): """ Instantiate an object from a pickle serialized representation. .. warning :: This is not recommended for safe, stable storage since the pickle specification may change between Python versions. Parameters ---------- serialized : str A pickled representation of the object Returns ------- instance : cls An instantiated object """ import pickle d = pickle.loads(serialized) return cls.from_dict(d)
def _prep_numpy_data_for_json(data): """Recursively search through a dict and convert the bytes fields to lists""" # TODO: Much of this logic can probably be trimmed down import numpy as np big_endian_float = np.dtype("float").newbyteorder(">") for key, val in data.items(): if isinstance(val, np.ndarray): data[key] = val.tolist() if isinstance(val, dict): data[key] = _prep_numpy_data_for_json(val) if isinstance(val, bytes): data[key] = np.frombuffer(val, dtype=big_endian_float).tolist() if isinstance(val, list): # Fairly hard-coded for case of Molecule.conformers being a List[np.array] # A more general solution should safely recurse through lists like dicts for i, element in enumerate(val): if isinstance(element, bytes): data[key][i] = np.frombuffer( element, dtype=big_endian_float ).tolist() return data