From cc455f4b052bea63a8b567a8a239a09272029a35 Mon Sep 17 00:00:00 2001 From: Levi Naden Date: Tue, 7 Mar 2017 14:02:16 -0500 Subject: [PATCH] Added in the storage tests Finished migration from YANK to OpenMMTools for storage module Added in the quantity from string utility as a special math_eval Updated readme Updated __init__ import statements bumped the version --- README.md | 10 + openmmtools/__init__.py | 8 +- openmmtools/storage/iodrivers.py | 9 +- openmmtools/tests/test_storage_interface.py | 181 +++++++++++++++++ openmmtools/tests/test_storage_iodrivers.py | 209 ++++++++++++++++++++ openmmtools/tests/test_utils.py | 17 ++ openmmtools/utils.py | 99 +++++++++- setup.py | 4 +- 8 files changed, 522 insertions(+), 15 deletions(-) create mode 100644 openmmtools/tests/test_storage_interface.py create mode 100644 openmmtools/tests/test_storage_iodrivers.py diff --git a/README.md b/README.md index b60fb2c36..d384ccc72 100644 --- a/README.md +++ b/README.md @@ -53,3 +53,13 @@ The module `openmmtools.cache` implements a shared LRU cache for `Context` objec If differences in energies in excess of `ENERGY_TOLERANCE` (default: 0.06 kcal/mol) are detected, these systems will be serialized to XML for further debugging. This is installed onto the command line when the repository is installed. + +## Storage + +This module `openmmtools.storage` provides a user-friendly storage IO interface to store data to disk. The primary +function of this module is to remove the need for the user to define how to format and configure Python variables and +OpenMM Quantities to and from the disk. The module is extensible to any type of data a user wants. Currently supports +NetCDF storage types for now. +- `StorageIODriver`: Abstract extendable class to write format-specific IO drivers such as the `NetCDFIODriver` +- `NetCDFIODriver`: User-configurable IO driver for NetCDF files. Handles built in Python types and `simtk.unit.Quantity`'s +- `StorageInterface`: A layer which runs on top of a provided `StorageIODriver` to create an way for users to interface with the disk with as minimal effort as possible. diff --git a/openmmtools/__init__.py b/openmmtools/__init__.py index 2ed86278d..9e598830b 100644 --- a/openmmtools/__init__.py +++ b/openmmtools/__init__.py @@ -6,8 +6,12 @@ """ # Define global version. -from openmmtools import version +from . import version __version__ = version.version # Import modules. -from openmmtools import testsystems, integrators +from . import testsystems +from . import integrators +from . import storage +from . import cache +from . import states diff --git a/openmmtools/storage/iodrivers.py b/openmmtools/storage/iodrivers.py index f576a381a..f279ea4c4 100644 --- a/openmmtools/storage/iodrivers.py +++ b/openmmtools/storage/iodrivers.py @@ -1373,9 +1373,9 @@ def _bind_read(self): # Handle variable size objects # This line will not happen unless target is real, so output_mode will return the correct value if self._output_mode is 'a': - self._save_shape = self._bound_target.shape[1:] + self._save_shape = self._bound_target.shape[1:] else: - self._save_shape = self._bound_target.shape + self._save_shape = self._bound_target.shape self._unit = self._bound_target.getncattr('IODriver_Unit') self._set_codifiers(self._bound_target.getncattr('type')) @@ -1455,9 +1455,6 @@ def read(self): self._output_mode data = self._decoder(self._bound_target) unit_name = self._bound_target.getncattr('IODriver_Unit') - # Do some things to handle the way quantity_from_string parses units that only have a denominator (e.g. Hz) - if unit_name[0] == '/': - unit_name = "(" + unit_name[1:] + ")**(-1)" cast_unit = quantity_from_string(unit_name) if isinstance(cast_unit, unit.Quantity): cast_unit = cast_unit.unit @@ -1651,8 +1648,6 @@ def _decode_dict(self): # If Quantity, assign unit. if 'units' in output_ncvar.ncattrs(): output_unit_name = output_ncvar.getncattr('units') - if output_unit_name[0] == '/': - output_unit_name = '(' + output_unit_name[1:] + ')**(-1)' output_unit = quantity_from_string(output_unit_name) output_value = output_value * output_unit # Store output. diff --git a/openmmtools/tests/test_storage_interface.py b/openmmtools/tests/test_storage_interface.py new file mode 100644 index 000000000..a50017486 --- /dev/null +++ b/openmmtools/tests/test_storage_interface.py @@ -0,0 +1,181 @@ +#!/usr/local/bin/env python + +""" +Test storageinterface.py facility. + +The tests are written around the netcdf storage handler for its asserts (its default) +Testing the storage handlers themselves should be left to the test_storage_iodrivers.py file + +""" + +# ============================================================================================= +# GLOBAL IMPORTS +# ============================================================================================= + +import numpy as np +from simtk import unit +import contextlib +import tempfile +import shutil + +from nose import tools + +from openmmtools.storage import StorageInterface, NetCDFIODriver + + +# ============================================================================================= +# TEST HELPER FUNCTIONS +# ============================================================================================= + +def spawn_driver(path): + """Create a driver that is used to test the StorageInterface class at path location""" + return NetCDFIODriver(path) + + +@contextlib.contextmanager +def temporary_directory(): + """Context for safe creation of temporary directories.""" + tmp_dir = tempfile.mkdtemp() + try: + yield tmp_dir + finally: + shutil.rmtree(tmp_dir) + +# ============================================================================================= +# STORAGE INTERFACE TESTING FUNCTIONS +# ============================================================================================= + + +def test_storage_interface_creation(): + """Test that the storage interface can create a top level file and read from it""" + with temporary_directory() as tmp_dir: + test_store = tmp_dir + '/teststore.nc' + driver = spawn_driver(test_store) + si = StorageInterface(driver) + si.add_metadata('name', 'data') + assert si.storage_system.ncfile.getncattr('name') == 'data' + + +@tools.raises(Exception) +def test_read_trap(): + """Test that attempting to read a non-existent file fails""" + with temporary_directory() as tmp_dir: + test_store = tmp_dir + '/teststore.nc' + driver = spawn_driver(test_store) + si = StorageInterface(driver) + si.var1.read() + + +def test_variable_write_read(): + """Test that a variable can be create and written to file""" + with temporary_directory() as tmp_dir: + test_store = tmp_dir + '/teststore.nc' + driver = spawn_driver(test_store) + si = StorageInterface(driver) + input_data = 4 + si.four.write(input_data) + output_data = si.four.read() + assert output_data == input_data + + +def test_variable_append_read(): + """Test that a variable can be create and written to file""" + with temporary_directory() as tmp_dir: + test_store = tmp_dir + '/teststore.nc' + driver = spawn_driver(test_store) + si = StorageInterface(driver) + input_data = np.eye(3) * 4.0 + si.four.append(input_data) + si.four.append(input_data) + output_data = si.four.read() + assert np.all(output_data[0] == input_data) + assert np.all(output_data[1] == input_data) + + +@tools.raises(Exception) +def test_write_protect(): + """Test that writing twice without removing protection raises an error""" + with temporary_directory() as tmp_dir: + test_store = tmp_dir + '/teststore.nc' + driver = spawn_driver(test_store) + si = StorageInterface(driver) + input_data = 4 + si.four.write(input_data) + si.four.write(input_data) + + +def test_unbound_read(): + """Test that a variable can read from the file without previous binding""" + with temporary_directory() as tmp_dir: + test_store = tmp_dir + '/teststore.nc' + driver = spawn_driver(test_store) + si = StorageInterface(driver) + input_data = 4*unit.kelvin + si.four.write(input_data) + si.storage_system.close_down() + del si + driver = spawn_driver(test_store) + si = StorageInterface(driver) + output_data = si.four.read() + assert input_data == output_data + + +def test_directory_creation(): + """Test that automatic directory-like objects are created on the fly""" + with temporary_directory() as tmp_dir: + test_store = tmp_dir + '/teststore.nc' + driver = spawn_driver(test_store) + si = StorageInterface(driver) + input_data = 'four' + si.dir0.dir1.dir2.var.write(input_data) + ncfile = si.storage_system.ncfile + target = ncfile + for i in range(3): + my_dir = 'dir{}'.format(i) + assert my_dir in target.groups + target = target.groups[my_dir] + si.storage_system.close_down() + del si + driver = spawn_driver(test_store) + si = StorageInterface(driver) + target = si + for i in range(3): + my_dir = 'dir{}'.format(i) + target = getattr(target, my_dir) + assert target.var.read() == input_data + + +def test_multi_variable_creation(): + """Test that multiple variables can be created in a single directory structure""" + with temporary_directory() as tmp_dir: + test_store = tmp_dir + '/teststore.nc' + driver = spawn_driver(test_store) + si = StorageInterface(driver) + input_data = [4.0, 4.0, 4.0] + si.dir0.var0.write(input_data) + si.dir0.var1.append(input_data) + si.dir0.var1.append(input_data) + si.storage_system.close_down() + del si, driver + driver = spawn_driver(test_store) + si = StorageInterface(driver) + assert si.dir0.var0.read() == input_data + app_data = si.dir0.var1.read() + assert app_data[0] == input_data + assert app_data[1] == input_data + + +def test_metadata_creation(): + """Test that metadata can be added to variables and directories""" + with temporary_directory() as tmp_dir: + test_store = tmp_dir + '/teststore.nc' + driver = spawn_driver(test_store) + si = StorageInterface(driver) + input_data = 4 + si.dir0.var1.write(input_data) + si.dir0.add_metadata('AmIAGroup', 'yes') + si.dir0.var1.add_metadata('AmIAGroup', 'no') + dir0 = si.storage_system.ncfile.groups['dir0'] + var1 = dir0.variables['var1'] + assert dir0.getncattr('AmIAGroup') == 'yes' + assert var1.getncattr('AmIAGroup') == 'no' diff --git a/openmmtools/tests/test_storage_iodrivers.py b/openmmtools/tests/test_storage_iodrivers.py new file mode 100644 index 000000000..ac7ace074 --- /dev/null +++ b/openmmtools/tests/test_storage_iodrivers.py @@ -0,0 +1,209 @@ +#!/usr/local/bin/env python + +""" +Test iodrivers.py facility. + +""" + +# ============================================================================================= +# GLOBAL IMPORTS +# ============================================================================================= + +import numpy as np +from simtk import unit +import contextlib +import tempfile +import shutil + +from nose import tools + +from openmmtools.storage import NetCDFIODriver + + +# ============================================================================================= +# TEST HELPER FUNCTIONS +# ============================================================================================= + +@contextlib.contextmanager +def temporary_directory(): + """Context for safe creation of temporary directories.""" + tmp_dir = tempfile.mkdtemp() + try: + yield tmp_dir + finally: + shutil.rmtree(tmp_dir) + + +# ============================================================================================= +# NETCDFIODRIVER TESTING FUNCTIONS +# ============================================================================================= + +def test_netcdf_driver_group_manipulation(): + """Test that the NetCDFIODriver can create groups, rebind to groups, and that they are on the file""" + with temporary_directory() as tmp_dir: + nc_io_driver = NetCDFIODriver(tmp_dir + 'test.nc') + group2 = nc_io_driver.get_directory('group1/group2') + group1 = nc_io_driver.get_directory('group1') + ncfile = nc_io_driver.ncfile + ncgroup1 = ncfile.groups['group1'] + ncgroup2 = ncfile.groups['group1'].groups['group2'] + assert group1 is ncgroup1 + assert group2 is ncgroup2 + + +def test_netcdf_driver_dimension_manipulation(): + """Test that the NetCDFIODriver can check and create dimensions""" + with temporary_directory() as tmp_dir: + nc_io_driver = NetCDFIODriver(tmp_dir + '/test.nc') + NetCDFIODriver.check_scalar_dimension(nc_io_driver) + NetCDFIODriver.check_iterable_dimension(nc_io_driver, length=4) + NetCDFIODriver.check_infinite_dimension(nc_io_driver) + ncfile = nc_io_driver.ncfile + dims = ncfile.dimensions + assert 'scalar' in dims + assert 'iterable4' in dims + assert 'iteration' in dims + + +def test_netcdf_driver_metadata_creation(): + """Test that the NetCDFIODriver can create metadata on different objects""" + with temporary_directory() as tmp_dir: + nc_io_driver = NetCDFIODriver(tmp_dir + '/test.nc') + group1 = nc_io_driver.get_directory('group1') + nc_io_driver.add_metadata('root_metadata', 'IAm(G)Root!') + nc_io_driver.add_metadata('group_metadata', 'group1_metadata', path='/group1') + ncfile = nc_io_driver.ncfile + nc_metadata = ncfile.getncattr('root_metadata') + group_metadata = group1.getncattr('group_metadata') + assert nc_metadata == 'IAm(G)Root!' + assert group_metadata == 'group1_metadata' + + +# ============================================================================================= +# NETCDF TYPE HANDLER TESTING FUNCTIONS +# ============================================================================================= + + +def generic_type_handler_check(input_data, with_append=True): + """Generic type handler test to ensure all callable functions are working""" + with temporary_directory() as tmp_dir: + file_path = tmp_dir + '/test.nc' + nc_io_driver = NetCDFIODriver(file_path) + input_type = type(input_data) + # Create a write and an append of the data + write_path = 'data_write' + data_write = nc_io_driver.create_storage_variable(write_path, input_type) + if with_append: + append_path = 'group1/data_append' + data_append = nc_io_driver.create_storage_variable(append_path, input_type) + # Store initial data (unbound write/append) + data_write.write(input_data) + if with_append: + data_append.append(input_data) + # Test that we can act on them again (bound write/append) + data_write.write(input_data) + if with_append: + data_append.append(input_data) + # Test bound read + data_write_out = data_write.read() + if with_append: + data_append_out = data_append.read() + try: # Compound dictionary processing + for key in data_write_out.keys(): + assert np.all(data_write_out[key] == input_data[key]) + except AttributeError: + assert np.all(data_write_out == input_data) + if with_append: + try: + for key in data_write_out.keys(): + assert np.all(data_append_out[0][key] == input_data[key]) + assert np.all(data_append_out[1][key] == input_data[key]) + except AttributeError: + assert np.all(data_append_out[0] == input_data) + assert np.all(data_append_out[1] == input_data) + # Delete the IO driver (and close the ncfile in the process) + nc_io_driver.close_down() + del data_write, data_write_out + if with_append: + del data_append, data_append_out + # Reopen and test reading actions + nc_io_driver = NetCDFIODriver(file_path, access_mode='r') + data_write = nc_io_driver.get_variable_handler(write_path) + if with_append: + data_append = nc_io_driver.get_variable_handler(append_path) + # Test unbound read + data_write_out = data_write.read() + if with_append: + data_append_out = data_append.read() + try: # Compound dictionary processing + for key in data_write_out.keys(): + assert np.all(data_write_out[key] == input_data[key]) + except AttributeError: + assert np.all(data_write_out == input_data) + if with_append: + try: + for key in data_write_out.keys(): + assert np.all(data_append_out[0][key] == input_data[key]) + assert np.all(data_append_out[1][key] == input_data[key]) + except AttributeError: + assert np.all(data_append_out[0] == input_data) + assert np.all(data_append_out[1] == input_data) + + +def test_netcdf_int_type_handler(): + """Test that the Int type handler can read/write/append""" + input_data = 4 + generic_type_handler_check(input_data) + + +def test_netcdf_float_type_handler(): + """Test that the Float type handler can read/write/append""" + input_data = 4.0 + generic_type_handler_check(input_data) + + +def test_netcdf_string_type_handler(): + """Test that the String type handler can read/write/append""" + input_data = 'four point oh' + generic_type_handler_check(input_data) + + +def test_netcdf_list_type_handler(): + """Test that the List type handler can read/write/append""" + input_data = [4, 4, 4] + generic_type_handler_check(input_data) + + +def test_netcdf_tuple_type_handler(): + """Test that the tuple type handler can read/write/append""" + input_data = (4, 4, 4) + generic_type_handler_check(input_data) + + +def test_netcdf_array_type_handler(): + """Test that the ndarray type handler can read/write/append""" + input_data = np.array([4, 4.0, 4]) + generic_type_handler_check(input_data) + + +def test_netcdf_quantity_type_handler(): + """Test that the simtk.unit.Quantity type handler can read/write/append with various unit and _value types""" + input_data = 4 * unit.kelvin + generic_type_handler_check(input_data) + input_data = [4, 4, 4] * unit.kilojoules_per_mole + generic_type_handler_check(input_data) + input_data = np.array([4, 4, 4]) / unit.nanosecond + generic_type_handler_check(input_data) + + +def test_netcdf_dictionary_type_handler(): + """Test that the simtk.unit.Quantity type handler can read/write/append with various unit and _value types""" + input_data = { + 'count': 4, + 'ratio': 0.4, + 'name': 'four', + 'repeated': [4,4,4], + 'temperature': 4 * unit.kelvin, + 'box_vectors': (np.eye(3) * 4.0) * unit.nanometer + } + generic_type_handler_check(input_data, with_append=False) diff --git a/openmmtools/tests/test_utils.py b/openmmtools/tests/test_utils.py index 9010ac083..e39b2c3b1 100644 --- a/openmmtools/tests/test_utils.py +++ b/openmmtools/tests/test_utils.py @@ -83,6 +83,23 @@ def test_is_quantity_close(): is_quantity_close(300*unit.kelvin, 1*unit.atmosphere) +def test_quantity_from_string(): + """Test that quantities can be derived from strings""" + test_strings = [ + ('3', 3.0), # Handle basic float + ('meter', unit.meter), # Handle basic unit object + ('300 * kelvin', 300 * unit.kelvin), # Handle standard Quantity + ('" 0.3 * kilojoules_per_mole / watt**3"', 0.3 * unit.kilojoules_per_mole / unit.watt ** 3), # Handle division, exponent, nested string + ('1*meter / (4*second)', 0.25 * unit.meter / unit.second), # Handle compound math and parenthesis + ('1 * watt**2 /((1* kelvin)**3 / gram)', 1 * (unit.watt ** 2) * (unit.gram) / (unit.kelvin ** 3)), # Handle everything + ('/watt', unit.watt ** -1) # Handle special "inverse unit" case + ] + + for test_string in test_strings: + input_string, expected_result = test_string + assert quantity_from_string(input_string) == expected_result + + # ============================================================================= # TEST SERIALIZATION UTILITIES # ============================================================================= diff --git a/openmmtools/utils.py b/openmmtools/utils.py index 0a2584d9b..a7d3170d5 100644 --- a/openmmtools/utils.py +++ b/openmmtools/utils.py @@ -143,7 +143,7 @@ def sanitize_expression(expression, variables): return sanitized_expression, sanitized_variables -def math_eval(expression, variables=None): +def math_eval(expression, variables=None, functions=None): """Evaluate a mathematical expression with variables. All the functions in the standard module math are available together with @@ -157,6 +157,9 @@ def math_eval(expression, variables=None): The mathematical expression as a string. variables : dict of str: float, optional The variables in the expression, if any (default is None). + functions : dict of str: callable function, optional + Additional functions to teach the math eval statement how to handle. + Built-in functions are 'step', 'step_hm', and 'sign' Returns ------- @@ -177,9 +180,12 @@ def math_eval(expression, variables=None): ast.Pow: operator.pow, ast.USub: operator.neg} # Supported functions, not defined in math. - functions = {'step': lambda x: 1 * (x >= 0), - 'step_hm': lambda x: 0.5 * (np.sign(x) + 1), - 'sign': lambda x: np.sign(x)} + if functions is None: + functions = {} + functions.update({'step': lambda x: 1 * (x >= 0), + 'step_hm': lambda x: 0.5 * (np.sign(x) + 1), + 'sign': lambda x: np.sign(x)} + ) def _math_eval(node): if isinstance(node, ast.Num): @@ -219,6 +225,15 @@ def _math_eval(node): # QUANTITY UTILITIES # ============================================================================= +# List of simtk.unit methods that are actually units and functions instead of base classes +# Pre-computed to reduce run-time cost +# Get the built-in units +_VALID_UNITS = {method: getattr(unit, method) for method in dir(unit) if type(getattr(unit, method)) is unit.Unit} +# Get the built in unit functions and make sure they are not just types +_VALID_UNIT_FUNCTIONS = {method: getattr(unit, method) for method in dir(unit) + if callable(getattr(unit, method)) and type(getattr(unit, method)) is not type} + + def is_quantity_close(quantity1, quantity2): """Check if the quantities are equal up to floating-point precision errors. @@ -253,6 +268,82 @@ def is_quantity_close(quantity1, quantity2): return np.isclose(value2, value1, rtol=1e-10, atol=0.0) +def quantity_from_string(expression): + """Special call to the math_eval function designed to handle simtk.unit Quantity strings + + All the functions in the standard module math are available together with + most of the methods inside the simtk.unit module. + + Parameters + ---------- + expression : str + The mathematical expression to rebuild a Quantityas a string. + + Returns + ------- + Quantity + The result of the evaluated expression. + + Examples + -------- + >>> expr = '4 * kilojoules / mole' + >>> quantity_from_string(expr) + Quantity(value=4.0, unit=kilojoule/mole) + + """ + + # Supported functions, not defined in math. + functions = _VALID_UNIT_FUNCTIONS + + # Define the units from simtk.unit as the variables + variables = _VALID_UNITS + + # Eliminate nested quotes and excess whitespace + expression = expression.strip('\'" ') + + # Handle a special case of the unit when it is just "inverse unit", e.g. Hz == /second + if expression[0] == '/': + expression = '(' + expression[1:] + ')**(-1)' + + return math_eval(expression, variables=variables, functions=functions) + + +def typename(atype): + """Convert a type object into a fully qualified typename. + + Parameters + ---------- + atype : type + The type to convert + + Returns + ------- + typename : str + The string typename. + + For example, + + >>> typename(type(1)) + 'int' + + >>> import numpy + >>> x = numpy.array([1,2,3], numpy.float32) + >>> typename(type(x)) + 'numpy.ndarray' + + """ + if not isinstance(atype, type): + raise Exception('Argument is not a type') + + modulename = atype.__module__ + typename = atype.__name__ + + if modulename != '__builtin__': + typename = modulename + '.' + typename + + return typename + + # ============================================================================= # OPENMM PLATFORM UTILITIES # ============================================================================= diff --git a/setup.py b/setup.py index 1902f9116..f74d5d8be 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ DOCLINES = __doc__.split("\n") ######################## -VERSION = "0.9.0" +VERSION = "0.9.1" ISRELEASED = False __version__ = VERSION ######################## @@ -171,7 +171,7 @@ def check_dependencies(): packages=['openmmtools', 'openmmtools.tests', 'openmmtools.scripts'], package_dir={'openmmtools': 'openmmtools'}, package_data={'openmmtools': find_package_data('openmmtools/data', 'openmmtools')}, - install_requires=['numpy', 'scipy', 'openmm', 'parmed'], + install_requires=['numpy', 'scipy', 'openmm', 'parmed', 'netCDF4'], tests_requires=['nose', 'pymbar'], zip_safe=False, scripts=[],