From 34bc09b48cead109dc2d8252bb830a03eed2e509 Mon Sep 17 00:00:00 2001 From: Christian Hudon Date: Wed, 20 May 2015 22:46:53 -0400 Subject: [PATCH 01/13] Check that a builder roundtrips through pickle and will give the same results afterwards. --- patsy/test_highlevel.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/patsy/test_highlevel.py b/patsy/test_highlevel.py index 9b7be37..fb2f988 100644 --- a/patsy/test_highlevel.py +++ b/patsy/test_highlevel.py @@ -5,6 +5,7 @@ # Exhaustive end-to-end tests of the top-level API. import sys +from six.moves import cPickle as pickle import __future__ import six import numpy as np @@ -758,3 +759,17 @@ def test_C_and_pandas_categorical(): [[1, 0], [1, 1], [1, 0]]) + +def test_pickle_builder_roundtrips(): + design_matrix = dmatrix("x + a", {"x": [1, 2, 3], + "a": ["a1", "a2", "a3"]}) + builder = design_matrix.design_info.builder + + new_data = {"x": [10, 20, 30], + "a": ["a3", "a1", "a2"]} + m1 = dmatrix(builder, new_data) + + builder2 = pickle.loads(pickle.dumps(design_matrix.design_info.builder)) + m2 = dmatrix(builder2, new_data) + + assert np.allclose(m1, m2) From 3bc08d6867123f8659fba9fd3a1712bcfd69c8ad Mon Sep 17 00:00:00 2001 From: Christian Hudon Date: Tue, 18 Aug 2015 21:04:05 -0400 Subject: [PATCH 02/13] Beginning of work on pickling. --- patsy/eval.py | 17 ++++++++++++++--- patsy/test_highlevel.py | 6 ++++++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/patsy/eval.py b/patsy/eval.py index d4ed83f..f333b84 100644 --- a/patsy/eval.py +++ b/patsy/eval.py @@ -565,7 +565,20 @@ def eval(self, memorize_state, data): memorize_state, data) - __getstate__ = no_pickling + def __getstate__(self): + return (0, self.name, self.origin) + + def __setstate__(self, state): + # TODO What do we do about self.code? + (version, code, origin) = state + assert version == 0 + # TODO Give better error message when version is too recent, etc. + # Should use a single function from somewhere + # + self.code = code + +def test_EvalFactor_pickle_saves_origin(): + assert False def test_EvalFactor_basics(): e = EvalFactor("a+b") @@ -577,8 +590,6 @@ def test_EvalFactor_basics(): assert e.origin is None assert e2.origin == "asdf" - assert_no_pickling(e) - def test_EvalFactor_memorize_passes_needed(): from patsy.state import stateful_transform foo = stateful_transform(lambda: "FOO-OBJ") diff --git a/patsy/test_highlevel.py b/patsy/test_highlevel.py index fb2f988..2dc45fd 100644 --- a/patsy/test_highlevel.py +++ b/patsy/test_highlevel.py @@ -761,9 +761,13 @@ def test_C_and_pandas_categorical(): [1, 0]]) def test_pickle_builder_roundtrips(): + import numpy as np + # TODO Add center(x) and categorical interaction, and call to np.log to patsy formula. design_matrix = dmatrix("x + a", {"x": [1, 2, 3], "a": ["a1", "a2", "a3"]}) + # TODO Remove builder, pass design_info to dmatrix() instead. builder = design_matrix.design_info.builder + del np new_data = {"x": [10, 20, 30], "a": ["a3", "a1", "a2"]} @@ -773,3 +777,5 @@ def test_pickle_builder_roundtrips(): m2 = dmatrix(builder2, new_data) assert np.allclose(m1, m2) + + From d5ed045bddd6765a835112abc3e8d7b537dc0549 Mon Sep 17 00:00:00 2001 From: Christian Hudon Date: Tue, 18 Aug 2015 21:04:43 -0400 Subject: [PATCH 03/13] Beginning of igh-level tests for pickling. --- patsy/test_pickling.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 patsy/test_pickling.py diff --git a/patsy/test_pickling.py b/patsy/test_pickling.py new file mode 100644 index 0000000..8f2cc9a --- /dev/null +++ b/patsy/test_pickling.py @@ -0,0 +1,22 @@ +from six.moves import cPickle as pickle + +from patsy import EvalFactor + +stuff = [ + EvalFactor("a+b"), + ] + +def test_pickling_roundtrips(): + for obj in stuff: + assert obj == pickle.loads(pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)) + +def test_unpickling_future_gives_sensible_error_msg(): + pass + +# Entrypoint: python -m patsy.test_pickling ... + +if __name__ == "__main__": + # TODO Save pickle. Make sure it's running from the right directory, so + # the pickles are saved in the right place. + + From d43176464671db52fc0f4f489700c97252524b86 Mon Sep 17 00:00:00 2001 From: Christian Hudon Date: Fri, 3 Jun 2016 16:24:14 -0700 Subject: [PATCH 04/13] Remove stray whitespace and indents. --- patsy/test_highlevel.py | 2 -- patsy/test_pickling.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/patsy/test_highlevel.py b/patsy/test_highlevel.py index 2dc45fd..0d2fe80 100644 --- a/patsy/test_highlevel.py +++ b/patsy/test_highlevel.py @@ -777,5 +777,3 @@ def test_pickle_builder_roundtrips(): m2 = dmatrix(builder2, new_data) assert np.allclose(m1, m2) - - diff --git a/patsy/test_pickling.py b/patsy/test_pickling.py index 8f2cc9a..292f3c3 100644 --- a/patsy/test_pickling.py +++ b/patsy/test_pickling.py @@ -18,5 +18,3 @@ def test_unpickling_future_gives_sensible_error_msg(): if __name__ == "__main__": # TODO Save pickle. Make sure it's running from the right directory, so # the pickles are saved in the right place. - - From 71e4a64d2604c6bf33cccd1288db245cbd76ca58 Mon Sep 17 00:00:00 2001 From: Christian Hudon Date: Sat, 4 Jun 2016 16:11:25 -0700 Subject: [PATCH 05/13] In-progress work for pickling tests. --- patsy/test_pickling.py | 51 +++++++++++++++--- pickle_testcases/0.5/evalfactor_simple.pickle | Bin 0 -> 33 bytes release-checklist.txt | 1 + 3 files changed, 44 insertions(+), 8 deletions(-) create mode 100644 pickle_testcases/0.5/evalfactor_simple.pickle diff --git a/patsy/test_pickling.py b/patsy/test_pickling.py index 292f3c3..6e87e07 100644 --- a/patsy/test_pickling.py +++ b/patsy/test_pickling.py @@ -1,20 +1,55 @@ +import six from six.moves import cPickle as pickle +import os + from patsy import EvalFactor -stuff = [ - EvalFactor("a+b"), - ] +PICKE_TESTCASES_ROOTDIR = os.path.join(os.path.dirname(__file__), '..', 'pickle_testcases') + +pickling_testcases = { + "evalfactor_simple": EvalFactor("a+b"), + } + -def test_pickling_roundtrips(): - for obj in stuff: +def test_pickling_same_version_roundtrips(): + for obj in six.itervalues(pickling_testcases): + yield (check_pickling_same_version_roundtrips, obj) + +def check_pickling_same_version_roundtrips(obj): assert obj == pickle.loads(pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)) +def test_pickling_old_versions_still_work(): + for (dirpath, dirnames, filenames) in os.walk(PICKE_TESTCASES_ROOTDIR): + for fname in filenames: + if os.path.splitext(fname)[1] == '.pickle': + yield check_pickling_old_versions_still_work, os.path.join(dirpath, fname) + +def check_pickling_old_versions_still_work(pickle_filename): + with open(pickle_filename, 'rb') as f: + testcase_name = os.path.splitext(os.path.basename(pickle_filename))[0] + assert pickling_testcases[testcase_name] == pickle.load(f) + def test_unpickling_future_gives_sensible_error_msg(): + # TODO How do we do this? And how do we test it then? pass -# Entrypoint: python -m patsy.test_pickling ... +def save_pickle_testcases(version): + pickle_testcases_dir = os.path.join(PICKE_TESTCASES_ROOTDIR, version) + # Fails if the directory already exists, which is what we want here + # since we don't want to overwrite testcases accidentally. + os.mkdir(pickle_testcases_dir) + + for name, obj in six.iteritems(pickling_testcases): + with open(os.path.join(pickle_testcases_dir, '{}.pickle'.format(name)), 'wb') as f: + pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL) if __name__ == "__main__": - # TODO Save pickle. Make sure it's running from the right directory, so - # the pickles are saved in the right place. + import argparse + + # Should we use a "create-pickles" sub-command to make things clearer? + arg_parser = argparse.ArgumentParser() + arg_parser.add_argument("version", help="The version of patsy for which to save a new set of pickle testcases.") + args = arg_parser.parse_args() + + save_pickle_testcases(args.version) diff --git a/pickle_testcases/0.5/evalfactor_simple.pickle b/pickle_testcases/0.5/evalfactor_simple.pickle new file mode 100644 index 0000000000000000000000000000000000000000..fb505999bf489d7e6698856844edec0f24ec2532 GIT binary patch literal 33 mcmZo*N-jt&DX!E@ElbSdas?7@iOD7TMO=l9nvI1_-V6Z77z)Aw literal 0 HcmV?d00001 diff --git a/release-checklist.txt b/release-checklist.txt index 33d2ac4..c25d0cf 100644 --- a/release-checklist.txt +++ b/release-checklist.txt @@ -5,6 +5,7 @@ * verify that the ">97% coverage" claim in overview.rst is still true. * cd docs; make clean html -- check that there are no warnings * check MANIFEST.in +* run python -m patsy.test_pickling NEW_VERSION_NUMBER * update version in doc/changes.rst, patsy/version.py * make sure there are no uncommitted changes * clone a clean source directory (so as to get a clean checkout From ddbf6be020f00533500454fcb9301e898c828340 Mon Sep 17 00:00:00 2001 From: Christian Hudon Date: Sat, 4 Jun 2016 16:17:10 -0700 Subject: [PATCH 06/13] Add TODO comment. --- patsy/test_pickling.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/patsy/test_pickling.py b/patsy/test_pickling.py index 6e87e07..2d1741e 100644 --- a/patsy/test_pickling.py +++ b/patsy/test_pickling.py @@ -28,6 +28,11 @@ def test_pickling_old_versions_still_work(): def check_pickling_old_versions_still_work(pickle_filename): with open(pickle_filename, 'rb') as f: testcase_name = os.path.splitext(os.path.basename(pickle_filename))[0] + # When adding features to a class, it will happen that there is no + # way to make an instance of that version version of that class + # equal to any instance of a previous version. How do we handle + # that? + # Maybe adding a minimum version requirement to each test? assert pickling_testcases[testcase_name] == pickle.load(f) def test_unpickling_future_gives_sensible_error_msg(): From 3164cd9bdff1aea76378d1729af60b3d855c61fc Mon Sep 17 00:00:00 2001 From: Christian Hudon Date: Sun, 5 Jun 2016 15:15:39 -0700 Subject: [PATCH 07/13] Pickling of EvalFactor, with testcase for making sure origin is pickled. --- patsy/eval.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/patsy/eval.py b/patsy/eval.py index f333b84..438f022 100644 --- a/patsy/eval.py +++ b/patsy/eval.py @@ -19,6 +19,7 @@ import ast import numbers import six +from six.moves import cPickle as pickle from patsy import PatsyError from patsy.util import PushbackAdapter, no_pickling, assert_no_pickling from patsy.tokens import (pretty_untokenize, normalize_token_spacing, @@ -566,19 +567,28 @@ def eval(self, memorize_state, data): data) def __getstate__(self): - return (0, self.name, self.origin) + return (0, self.code, self.origin) def __setstate__(self, state): - # TODO What do we do about self.code? (version, code, origin) = state assert version == 0 # TODO Give better error message when version is too recent, etc. # Should use a single function from somewhere # self.code = code + self.origin = origin def test_EvalFactor_pickle_saves_origin(): - assert False + # The pickling tests use object equality before and after pickling + # to test that pickling worked correctly. But EvalFactor's origin field + # is not used in equality comparisons, so we need a separate test to + # test that it is being pickled. + ORIGIN = 123456 + f = EvalFactor('a', ORIGIN) + new_f = pickle.loads(pickle.dumps(f)) + + assert f.origin is not None + assert f.origin == new_f.origin def test_EvalFactor_basics(): e = EvalFactor("a+b") From c5a8e7571eea1cc52a1f93205890f045ae2cd24b Mon Sep 17 00:00:00 2001 From: Christian Hudon Date: Sun, 5 Jun 2016 15:18:16 -0700 Subject: [PATCH 08/13] More fully fleshed-out version of pickling test harness. --- patsy/test_pickling.py | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/patsy/test_pickling.py b/patsy/test_pickling.py index 2d1741e..c23e3bc 100644 --- a/patsy/test_pickling.py +++ b/patsy/test_pickling.py @@ -1,7 +1,10 @@ +from __future__ import print_function + import six from six.moves import cPickle as pickle import os +import shutil from patsy import EvalFactor @@ -11,7 +14,6 @@ "evalfactor_simple": EvalFactor("a+b"), } - def test_pickling_same_version_roundtrips(): for obj in six.itervalues(pickling_testcases): yield (check_pickling_same_version_roundtrips, obj) @@ -39,22 +41,30 @@ def test_unpickling_future_gives_sensible_error_msg(): # TODO How do we do this? And how do we test it then? pass -def save_pickle_testcases(version): +def create_pickles(version): pickle_testcases_dir = os.path.join(PICKE_TESTCASES_ROOTDIR, version) - # Fails if the directory already exists, which is what we want here - # since we don't want to overwrite testcases accidentally. - os.mkdir(pickle_testcases_dir) + if os.path.exists(pickle_testcases_dir): + raise OSError("{} already exists. Aborting.".format(pickle_testcases_dir)) + pickle_testcases_tempdir = pickle_testcases_dir + "_inprogress" + os.mkdir(pickle_testcases_tempdir) - for name, obj in six.iteritems(pickling_testcases): - with open(os.path.join(pickle_testcases_dir, '{}.pickle'.format(name)), 'wb') as f: - pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL) + try: + for name, obj in six.iteritems(pickling_testcases): + with open(os.path.join(pickle_testcases_tempdir, "{}.pickle".format(name)), "wb") as f: + pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL) + except Exception: + print("Exception during creation of pickles for {}. Removing directory.".format(version)) + shutil.rmtree(pickle_testcases_tempdir) + raise + finally: + os.rename(pickle_testcases_tempdir, pickle_testcases_dir) + print("Successfully created pickle testcases for new version {}.".format(version)) if __name__ == "__main__": import argparse - # Should we use a "create-pickles" sub-command to make things clearer? - arg_parser = argparse.ArgumentParser() + arg_parser = argparse.ArgumentParser(description="Create and save pickle testcases for a new version of patsy.") arg_parser.add_argument("version", help="The version of patsy for which to save a new set of pickle testcases.") args = arg_parser.parse_args() - save_pickle_testcases(args.version) + create_pickles(args.version) From 62b1ce4405c416659f0883d618513e8f2041b922 Mon Sep 17 00:00:00 2001 From: Christian Hudon Date: Sun, 5 Jun 2016 16:52:12 -0700 Subject: [PATCH 09/13] Better error message when unpickling a version that is not supported. --- patsy/eval.py | 7 ++----- patsy/util.py | 22 +++++++++++++++++++++- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/patsy/eval.py b/patsy/eval.py index 438f022..897d6c1 100644 --- a/patsy/eval.py +++ b/patsy/eval.py @@ -21,7 +21,7 @@ import six from six.moves import cPickle as pickle from patsy import PatsyError -from patsy.util import PushbackAdapter, no_pickling, assert_no_pickling +from patsy.util import PushbackAdapter, no_pickling, assert_no_pickling, check_pickle_version from patsy.tokens import (pretty_untokenize, normalize_token_spacing, python_tokenize) from patsy.compat import call_and_wrap_exc @@ -571,10 +571,7 @@ def __getstate__(self): def __setstate__(self, state): (version, code, origin) = state - assert version == 0 - # TODO Give better error message when version is too recent, etc. - # Should use a single function from somewhere - # + check_pickle_version(version, 0, self.__class__.__name__) self.code = code self.origin = origin diff --git a/patsy/util.py b/patsy/util.py index 6d9225e..ce9dada 100644 --- a/patsy/util.py +++ b/patsy/util.py @@ -24,6 +24,7 @@ ] import sys +from nose.tools import assert_raises import numpy as np import six from six.moves import cStringIO as StringIO @@ -698,7 +699,6 @@ def no_pickling(*args, **kwargs): def assert_no_pickling(obj): import pickle - from nose.tools import assert_raises assert_raises(NotImplementedError, pickle.dumps, obj) # Use like: @@ -720,3 +720,23 @@ def test_safe_string_eq(): assert safe_string_eq(unicode("foo"), "foo") assert not safe_string_eq(np.empty((2, 2)), "foo") + +def check_pickle_version(version, required_version, name=""): + if version > required_version: + error_msg = "This version of patsy is too old to load this pickle" + elif version < required_version: + error_msg = "This pickle is too old and not supported by this version of patsy anymore" + else: + return + + if name: + error_msg += " (for object {})".format(name) + error_msg += "." + + # TODO Use a better exception than ValueError. + raise ValueError(error_msg) + +def test_check_pickle_version(): + assert_raises(ValueError, check_pickle_version, 0, 1) + assert_raises(ValueError, check_pickle_version, 1, 0) + check_pickle_version(0, 0) From c870d4dbe14ed716336ac85e3c10b46192a07f9a Mon Sep 17 00:00:00 2001 From: Christian Hudon Date: Sun, 5 Jun 2016 16:54:05 -0700 Subject: [PATCH 10/13] More flesh-out version of high-level tests for pickling design_info objects. --- patsy/test_highlevel.py | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/patsy/test_highlevel.py b/patsy/test_highlevel.py index 0d2fe80..bb4c5a3 100644 --- a/patsy/test_highlevel.py +++ b/patsy/test_highlevel.py @@ -761,19 +761,24 @@ def test_C_and_pandas_categorical(): [1, 0]]) def test_pickle_builder_roundtrips(): - import numpy as np - # TODO Add center(x) and categorical interaction, and call to np.log to patsy formula. - design_matrix = dmatrix("x + a", {"x": [1, 2, 3], - "a": ["a1", "a2", "a3"]}) - # TODO Remove builder, pass design_info to dmatrix() instead. - builder = design_matrix.design_info.builder - del np - - new_data = {"x": [10, 20, 30], - "a": ["a3", "a1", "a2"]} - m1 = dmatrix(builder, new_data) - - builder2 = pickle.loads(pickle.dumps(design_matrix.design_info.builder)) - m2 = dmatrix(builder2, new_data) + formulas = ["a + b", + "center(i) + a * b + np.log(x)"] + dataset = {"i": range(3), + "x": [1.1, 2.2, 3.3], + "a": list("abc"), + "b": list("xyx")} + + for formula in formulas: + yield check_pickle_builder_roundtrips, formula, dataset + +def check_pickle_builder_roundtrips(formula, dataset): + design_matrix = dmatrix(formula, dataset) + # TODO Make new_dataset have different values from dataset? + new_dataset = dataset + + m1 = dmatrix(design_matrix.design_info, new_dataset) + + unpickled_design_info = pickle.loads(pickle.dumps(design_matrix.design_info)) + m2 = dmatrix(unpickled_design_info, new_dataset) assert np.allclose(m1, m2) From 858a623177e4e6f4a16a1bec18a6a0678254706b Mon Sep 17 00:00:00 2001 From: Christian Hudon Date: Sun, 5 Jun 2016 16:54:58 -0700 Subject: [PATCH 11/13] Small code tweaks. --- patsy/test_pickling.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/patsy/test_pickling.py b/patsy/test_pickling.py index c23e3bc..737446c 100644 --- a/patsy/test_pickling.py +++ b/patsy/test_pickling.py @@ -28,8 +28,8 @@ def test_pickling_old_versions_still_work(): yield check_pickling_old_versions_still_work, os.path.join(dirpath, fname) def check_pickling_old_versions_still_work(pickle_filename): + testcase_name = os.path.splitext(os.path.basename(pickle_filename))[0] with open(pickle_filename, 'rb') as f: - testcase_name = os.path.splitext(os.path.basename(pickle_filename))[0] # When adding features to a class, it will happen that there is no # way to make an instance of that version version of that class # equal to any instance of a previous version. How do we handle @@ -38,7 +38,7 @@ def check_pickling_old_versions_still_work(pickle_filename): assert pickling_testcases[testcase_name] == pickle.load(f) def test_unpickling_future_gives_sensible_error_msg(): - # TODO How do we do this? And how do we test it then? + # TODO How would we go about testing this? pass def create_pickles(version): @@ -53,7 +53,8 @@ def create_pickles(version): with open(os.path.join(pickle_testcases_tempdir, "{}.pickle".format(name)), "wb") as f: pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL) except Exception: - print("Exception during creation of pickles for {}. Removing directory.".format(version)) + print("Exception during creation of pickles for {}. " \ + "Removing partially created directory.".format(version)) shutil.rmtree(pickle_testcases_tempdir) raise finally: From 4b74a0500938eb061497dd2dd4833fe236c188b3 Mon Sep 17 00:00:00 2001 From: Christian Hudon Date: Sun, 5 Jun 2016 16:55:40 -0700 Subject: [PATCH 12/13] Sample test pickle. --- pickle_testcases/0.5/evalfactor_simple.pickle | Bin 33 -> 46 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/pickle_testcases/0.5/evalfactor_simple.pickle b/pickle_testcases/0.5/evalfactor_simple.pickle index fb505999bf489d7e6698856844edec0f24ec2532..1be6d51e6bb45f987d98eed29513af2cb1062a4b 100644 GIT binary patch delta 18 ZcmY$?o1n-W%9^O4t&mj6?AM;82LL9t1aANU delta 4 LcmdN>oS+B*0$>3F From 12c74716c2fc9da1cace7edd5dec48303198cc7e Mon Sep 17 00:00:00 2001 From: Christian Hudon Date: Sun, 5 Jun 2016 17:07:15 -0700 Subject: [PATCH 13/13] Add TODO note for force=True future option for creating test pickles. --- patsy/test_pickling.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/patsy/test_pickling.py b/patsy/test_pickling.py index 737446c..a195c5f 100644 --- a/patsy/test_pickling.py +++ b/patsy/test_pickling.py @@ -42,6 +42,11 @@ def test_unpickling_future_gives_sensible_error_msg(): pass def create_pickles(version): + # TODO Add options to overwrite pickles directory, with force=True + # during development. + # TODO Add safety check that said force=True option will still give an + # error when trying to remove pickles for a released version, by + # comparing the version argument here with patsy.__version__. pickle_testcases_dir = os.path.join(PICKE_TESTCASES_ROOTDIR, version) if os.path.exists(pickle_testcases_dir): raise OSError("{} already exists. Aborting.".format(pickle_testcases_dir))