diff --git a/patsy/eval.py b/patsy/eval.py index d4ed83f..897d6c1 100644 --- a/patsy/eval.py +++ b/patsy/eval.py @@ -19,8 +19,9 @@ import ast import numbers import six +from six.moves import cPickle as pickle from patsy import PatsyError -from patsy.util import PushbackAdapter, no_pickling, assert_no_pickling +from patsy.util import PushbackAdapter, no_pickling, assert_no_pickling, check_pickle_version from patsy.tokens import (pretty_untokenize, normalize_token_spacing, python_tokenize) from patsy.compat import call_and_wrap_exc @@ -565,7 +566,26 @@ def eval(self, memorize_state, data): memorize_state, data) - __getstate__ = no_pickling + def __getstate__(self): + return (0, self.code, self.origin) + + def __setstate__(self, state): + (version, code, origin) = state + check_pickle_version(version, 0, self.__class__.__name__) + self.code = code + self.origin = origin + +def test_EvalFactor_pickle_saves_origin(): + # The pickling tests use object equality before and after pickling + # to test that pickling worked correctly. But EvalFactor's origin field + # is not used in equality comparisons, so we need a separate test to + # test that it is being pickled. + ORIGIN = 123456 + f = EvalFactor('a', ORIGIN) + new_f = pickle.loads(pickle.dumps(f)) + + assert f.origin is not None + assert f.origin == new_f.origin def test_EvalFactor_basics(): e = EvalFactor("a+b") @@ -577,8 +597,6 @@ def test_EvalFactor_basics(): assert e.origin is None assert e2.origin == "asdf" - assert_no_pickling(e) - def test_EvalFactor_memorize_passes_needed(): from patsy.state import stateful_transform foo = stateful_transform(lambda: "FOO-OBJ") diff --git a/patsy/test_highlevel.py b/patsy/test_highlevel.py index 9b7be37..bb4c5a3 100644 --- a/patsy/test_highlevel.py +++ b/patsy/test_highlevel.py @@ -5,6 +5,7 @@ # Exhaustive end-to-end tests of the top-level API. import sys +from six.moves import cPickle as pickle import __future__ import six import numpy as np @@ -758,3 +759,26 @@ def test_C_and_pandas_categorical(): [[1, 0], [1, 1], [1, 0]]) + +def test_pickle_builder_roundtrips(): + formulas = ["a + b", + "center(i) + a * b + np.log(x)"] + dataset = {"i": range(3), + "x": [1.1, 2.2, 3.3], + "a": list("abc"), + "b": list("xyx")} + + for formula in formulas: + yield check_pickle_builder_roundtrips, formula, dataset + +def check_pickle_builder_roundtrips(formula, dataset): + design_matrix = dmatrix(formula, dataset) + # TODO Make new_dataset have different values from dataset? + new_dataset = dataset + + m1 = dmatrix(design_matrix.design_info, new_dataset) + + unpickled_design_info = pickle.loads(pickle.dumps(design_matrix.design_info)) + m2 = dmatrix(unpickled_design_info, new_dataset) + + assert np.allclose(m1, m2) diff --git a/patsy/test_pickling.py b/patsy/test_pickling.py new file mode 100644 index 0000000..a195c5f --- /dev/null +++ b/patsy/test_pickling.py @@ -0,0 +1,76 @@ +from __future__ import print_function + +import six +from six.moves import cPickle as pickle + +import os +import shutil + +from patsy import EvalFactor + +PICKE_TESTCASES_ROOTDIR = os.path.join(os.path.dirname(__file__), '..', 'pickle_testcases') + +pickling_testcases = { + "evalfactor_simple": EvalFactor("a+b"), + } + +def test_pickling_same_version_roundtrips(): + for obj in six.itervalues(pickling_testcases): + yield (check_pickling_same_version_roundtrips, obj) + +def check_pickling_same_version_roundtrips(obj): + assert obj == pickle.loads(pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)) + +def test_pickling_old_versions_still_work(): + for (dirpath, dirnames, filenames) in os.walk(PICKE_TESTCASES_ROOTDIR): + for fname in filenames: + if os.path.splitext(fname)[1] == '.pickle': + yield check_pickling_old_versions_still_work, os.path.join(dirpath, fname) + +def check_pickling_old_versions_still_work(pickle_filename): + testcase_name = os.path.splitext(os.path.basename(pickle_filename))[0] + with open(pickle_filename, 'rb') as f: + # When adding features to a class, it will happen that there is no + # way to make an instance of that version version of that class + # equal to any instance of a previous version. How do we handle + # that? + # Maybe adding a minimum version requirement to each test? + assert pickling_testcases[testcase_name] == pickle.load(f) + +def test_unpickling_future_gives_sensible_error_msg(): + # TODO How would we go about testing this? + pass + +def create_pickles(version): + # TODO Add options to overwrite pickles directory, with force=True + # during development. + # TODO Add safety check that said force=True option will still give an + # error when trying to remove pickles for a released version, by + # comparing the version argument here with patsy.__version__. + pickle_testcases_dir = os.path.join(PICKE_TESTCASES_ROOTDIR, version) + if os.path.exists(pickle_testcases_dir): + raise OSError("{} already exists. Aborting.".format(pickle_testcases_dir)) + pickle_testcases_tempdir = pickle_testcases_dir + "_inprogress" + os.mkdir(pickle_testcases_tempdir) + + try: + for name, obj in six.iteritems(pickling_testcases): + with open(os.path.join(pickle_testcases_tempdir, "{}.pickle".format(name)), "wb") as f: + pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL) + except Exception: + print("Exception during creation of pickles for {}. " \ + "Removing partially created directory.".format(version)) + shutil.rmtree(pickle_testcases_tempdir) + raise + finally: + os.rename(pickle_testcases_tempdir, pickle_testcases_dir) + print("Successfully created pickle testcases for new version {}.".format(version)) + +if __name__ == "__main__": + import argparse + + arg_parser = argparse.ArgumentParser(description="Create and save pickle testcases for a new version of patsy.") + arg_parser.add_argument("version", help="The version of patsy for which to save a new set of pickle testcases.") + args = arg_parser.parse_args() + + create_pickles(args.version) diff --git a/patsy/util.py b/patsy/util.py index 6d9225e..ce9dada 100644 --- a/patsy/util.py +++ b/patsy/util.py @@ -24,6 +24,7 @@ ] import sys +from nose.tools import assert_raises import numpy as np import six from six.moves import cStringIO as StringIO @@ -698,7 +699,6 @@ def no_pickling(*args, **kwargs): def assert_no_pickling(obj): import pickle - from nose.tools import assert_raises assert_raises(NotImplementedError, pickle.dumps, obj) # Use like: @@ -720,3 +720,23 @@ def test_safe_string_eq(): assert safe_string_eq(unicode("foo"), "foo") assert not safe_string_eq(np.empty((2, 2)), "foo") + +def check_pickle_version(version, required_version, name=""): + if version > required_version: + error_msg = "This version of patsy is too old to load this pickle" + elif version < required_version: + error_msg = "This pickle is too old and not supported by this version of patsy anymore" + else: + return + + if name: + error_msg += " (for object {})".format(name) + error_msg += "." + + # TODO Use a better exception than ValueError. + raise ValueError(error_msg) + +def test_check_pickle_version(): + assert_raises(ValueError, check_pickle_version, 0, 1) + assert_raises(ValueError, check_pickle_version, 1, 0) + check_pickle_version(0, 0) diff --git a/pickle_testcases/0.5/evalfactor_simple.pickle b/pickle_testcases/0.5/evalfactor_simple.pickle new file mode 100644 index 0000000..1be6d51 Binary files /dev/null and b/pickle_testcases/0.5/evalfactor_simple.pickle differ diff --git a/release-checklist.txt b/release-checklist.txt index 33d2ac4..c25d0cf 100644 --- a/release-checklist.txt +++ b/release-checklist.txt @@ -5,6 +5,7 @@ * verify that the ">97% coverage" claim in overview.rst is still true. * cd docs; make clean html -- check that there are no warnings * check MANIFEST.in +* run python -m patsy.test_pickling NEW_VERSION_NUMBER * update version in doc/changes.rst, patsy/version.py * make sure there are no uncommitted changes * clone a clean source directory (so as to get a clean checkout