From ecd07bc78c089092b244c0b390890da6b03bc1a5 Mon Sep 17 00:00:00 2001 From: "Martin K. Scherer" Date: Tue, 7 Jun 2016 14:24:31 +0200 Subject: [PATCH] [coor/datasource] in case input files have different dimensions, give a useful msg. (#822) --- pyemma/coordinates/data/_base/datasource.py | 15 +++++++++++++-- pyemma/coordinates/tests/test_numpyfilereader.py | 13 ++++++++++++- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/pyemma/coordinates/data/_base/datasource.py b/pyemma/coordinates/data/_base/datasource.py index 2bc340fab..91feb1948 100644 --- a/pyemma/coordinates/data/_base/datasource.py +++ b/pyemma/coordinates/data/_base/datasource.py @@ -123,8 +123,19 @@ def filenames(self, filename_list): # ensure all trajs have same dim if not np.unique(ndims).size == 1: - raise ValueError("input data has different dimensions!" - " Dimensions are = %s" % zip(filename_list, ndims)) + # group files by their dimensions to give user indicator + ndims = np.array(ndims) + filename_list = np.asarray(filename_list) + sort_inds = np.argsort(ndims) + import itertools, operator + res = {} + for dim, files in itertools.groupby(zip(ndims[sort_inds], filename_list[sort_inds]), + operator.itemgetter(0)): + res[dim] = list(f[1] for f in files) + + raise ValueError("Input data has different dimensions ({dims})!" + " Files grouped by dimensions: {groups}".format(dims=res.keys(), + groups=res)) self._ndim = ndims[0] self._lengths = lengths diff --git a/pyemma/coordinates/tests/test_numpyfilereader.py b/pyemma/coordinates/tests/test_numpyfilereader.py index eb1c473cf..0d98ce382 100644 --- a/pyemma/coordinates/tests/test_numpyfilereader.py +++ b/pyemma/coordinates/tests/test_numpyfilereader.py @@ -197,7 +197,18 @@ def test_usecols(self): with it: for x in it: np.testing.assert_equal(x, self.d2[:, cols]) - + + def test_different_shapes_value_error(self): + with tempfile.NamedTemporaryFile(delete=False, suffix='.npy') as f: + x=np.zeros((3, 42)) + np.save(f.name, x) + myfiles = self.files2d[:] + myfiles.insert(1, f.name) + + with self.assertRaises(ValueError) as cm: + NumPyFileReader(myfiles) + self.assertIn("different dimensions", cm.exception.args[0]) + print (cm.exception.args) if __name__ == "__main__":