Merge pull request #110 from akleeman/pre-0.1-release-cleanup

Pre 0.1 release cleanup
pydata · May 2, 2014 · 9d09b43 · 9d09b43
2 parents 8cd667d + 03cfc05
commit 9d09b43
Show file tree

Hide file tree

Showing 19 changed files with 623 additions and 434 deletions.
diff --git a/test/__init__.py b/test/__init__.py
@@ -113,3 +113,13 @@ def assertDataArrayAllClose(self, ar1, ar2, rtol=1e-05, atol=1e-08):
 class ReturnItem(object):
     def __getitem__(self, key):
         return key
+
+
+def source_ndarray(array):
+    """Given an ndarray, return the base object which holds its memory, or the
+    object itself.
+    """
+    base = array.base
+    if base is None:
+        base = array
+    return base
diff --git a/test/test_data_array.py b/test/test_data_array.py
@@ -3,7 +3,7 @@
 from textwrap import dedent
 
 from xray import Dataset, DataArray, Variable, align
-from . import TestCase, ReturnItem
+from . import TestCase, ReturnItem, source_ndarray
 
 
 class TestDataArray(TestCase):
@@ -178,6 +178,8 @@ def test_math(self):
             a + b
         with self.assertRaisesRegexp(ValueError, 'not aligned'):
             b + a
+        with self.assertRaisesRegexp(TypeError, 'datasets do not support'):
+            a + a.dataset
 
     def test_dataset_math(self):
         # verify that mathematical operators keep around the expected variables
@@ -238,7 +240,8 @@ def test_inplace_math(self):
         b += 1
         self.assertIs(b, a)
         self.assertIs(b.variable, v)
-        self.assertIs(b.values, x)
+        self.assertArrayEqual(b.values, x)
+        self.assertIs(source_ndarray(b.values), x)
         self.assertIs(b.dataset, self.ds)
 
     def test_transpose(self):

diff --git a/test/test_dataset.py b/test/test_dataset.py
@@ -1,13 +1,13 @@
 from collections import OrderedDict
-from copy import deepcopy
+from copy import copy, deepcopy
 from textwrap import dedent
 import cPickle as pickle
 import unittest
 
 import numpy as np
 import pandas as pd
 
-from xray import Dataset, DataArray, Variable, backends, utils, align
+from xray import Dataset, DataArray, Variable, backends, utils, align, indexing
 
 from . import TestCase
 
@@ -56,13 +56,13 @@ def set_variable(self, name, variable):
         self._variables[name] = variable
         return self._variables[name]
 
+    def open_store_variable(self, var):
+        data = indexing.LazilyIndexedArray(InaccessibleArray(var.values))
+        return Variable(var.dimensions, data, var.attrs)
+
     @property
-    def variables(self):
-        return utils.FrozenOrderedDict(
-            (k, Variable(v.dimensions,
-                         InaccessibleArray(v.values),
-                         v.attrs))
-            for k, v in self._variables.iteritems())
+    def store_variables(self):
+        return self._variables
 
 
 class TestDataset(TestCase):
@@ -161,10 +161,18 @@ def test_equals_and_identical(self):
         data2 = create_test_data(seed=42)
         data2.attrs['foobar'] = 'baz'
         self.assertTrue(data.equals(data2))
+        self.assertTrue(data == data2)
         self.assertFalse(data.identical(data2))
 
         del data2['time']
         self.assertFalse(data.equals(data2))
+        self.assertTrue(data != data2)
+
+    def test_attrs(self):
+        data = create_test_data(seed=42)
+        data.attrs = {'foobar': 'baz'}
+        self.assertTrue(data.attrs['foobar'], 'baz')
+        self.assertIsInstance(data.attrs, OrderedDict)
 
     def test_indexed(self):
         data = create_test_data()
@@ -334,21 +342,21 @@ def test_unselect(self):
     def test_copy(self):
         data = create_test_data()
 
-        copied = data.copy(deep=False)
-        self.assertDatasetIdentical(data, copied)
-        for k in data:
-            v0 = data.variables[k]
-            v1 = copied.variables[k]
-            self.assertIs(v0, v1)
-        copied['foo'] = ('z', np.arange(5))
-        self.assertNotIn('foo', data)
-
-        copied = data.copy(deep=True)
-        self.assertDatasetIdentical(data, copied)
-        for k in data:
-            v0 = data.variables[k]
-            v1 = copied.variables[k]
-            self.assertIsNot(v0, v1)
+        for copied in [data.copy(deep=False), copy(data)]:
+            self.assertDatasetIdentical(data, copied)
+            for k in data:
+                v0 = data.variables[k]
+                v1 = copied.variables[k]
+                self.assertIs(v0, v1)
+            copied['foo'] = ('z', np.arange(5))
+            self.assertNotIn('foo', data)
+
+        for copied in [data.copy(deep=True), deepcopy(data)]:
+            self.assertDatasetIdentical(data, copied)
+            for k in data:
+                v0 = data.variables[k]
+                v1 = copied.variables[k]
+                self.assertIsNot(v0, v1)
 
     def test_rename(self):
         data = create_test_data()
@@ -432,7 +440,6 @@ def test_virtual_variables(self):
         expected = data.indexed(time=slice(10))
         self.assertDatasetIdentical(expected, actual)
 
-    @unittest.expectedFailure
     def test_slice_virtual_variable(self):
         data = create_test_data()
         self.assertVariableEqual(data['time.dayofyear'][:10],
@@ -511,6 +518,8 @@ def test_groupby(self):
         # TODO: test the other edge cases
         with self.assertRaisesRegexp(ValueError, 'must be 1 dimensional'):
             data.groupby('var1')
+        with self.assertRaisesRegexp(ValueError, 'length does not match'):
+            data.groupby(data['dim1'][:3])
 
     def test_concat(self):
         data = create_test_data()
@@ -631,10 +640,13 @@ def test_pickle(self):
 
     def test_lazy_load(self):
         store = InaccessibleVariableDataStore()
-        store.set_variable('dim', Variable(('dim'), np.arange(10)))
-        store.set_variable('var', Variable(('dim'), np.random.uniform(size=10)))
-        ds = Dataset()
-        ds = ds.load_store(store, decode_cf=False)
-        self.assertRaises(UnexpectedDataAccess, lambda: ds['var'].values)
-        ds = ds.load_store(store, decode_cf=True)
-        self.assertRaises(UnexpectedDataAccess, lambda: ds['var'].values)
+        create_test_data().dump_to_store(store)
+
+        for decode_cf in [False, True]:
+            ds = Dataset.load_store(store, decode_cf=decode_cf)
+            with self.assertRaises(UnexpectedDataAccess):
+                ds['var1'].values
+
+            # these should not raise UnexpectedDataAccess:
+            ds.indexed(time=10)
+            ds.indexed(time=slice(10), dim1=[0]).indexed(dim1=0, dim2=-1)
diff --git a/test/test_indexing.py b/test/test_indexing.py
@@ -0,0 +1,119 @@
+import numpy as np
+
+from xray import indexing, variable, Dataset, Variable, Coordinate
+from . import TestCase, ReturnItem
+
+
+class TestIndexers(TestCase):
+    def set_to_zero(self, x, i):
+        x = x.copy()
+        x[i] = 0
+        return x
+
+    def test_expanded_indexer(self):
+        x = np.random.randn(10, 11, 12, 13, 14)
+        y = np.arange(5)
+        I = ReturnItem()
+        for i in [I[:], I[...], I[0, :, 10], I[..., 10], I[:5, ..., 0],
+                  I[..., 0, ...], I[y], I[y, y], I[..., y, y],
+                  I[..., 0, 1, 2, 3, 4]]:
+            j = indexing.expanded_indexer(i, x.ndim)
+            self.assertArrayEqual(x[i], x[j])
+            self.assertArrayEqual(self.set_to_zero(x, i),
+                                  self.set_to_zero(x, j))
+        with self.assertRaisesRegexp(IndexError, 'too many indices'):
+            indexing.expanded_indexer(I[1, 2, 3], 2)
+
+    def test_orthogonal_indexer(self):
+        x = np.random.randn(10, 11, 12, 13, 14)
+        y = np.arange(5)
+        I = ReturnItem()
+        # orthogonal and numpy indexing should be equivalent, because we only
+        # use at most one array and it never in between two slice objects
+        # (i.e., we try to avoid numpy's mind-boggling "partial indexing"
+        # http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html)
+        for i in [I[:], I[0], I[0, 0], I[:5], I[2:5], I[2:5:-1], I[:3, :4],
+                  I[:3, 0, :4], I[:3, 0, :4, 0], I[y], I[:, y], I[0, y],
+                  I[:2, :3, y], I[0, y, :, :4, 0]]:
+            j = indexing.orthogonal_indexer(i, x.shape)
+            self.assertArrayEqual(x[i], x[j])
+            self.assertArrayEqual(self.set_to_zero(x, i),
+                                  self.set_to_zero(x, j))
+        # for more complicated cases, check orthogonal indexing is still
+        # equivalent to slicing
+        z = np.arange(2, 8, 2)
+        for i, j, shape in [
+                (I[y, y], I[:5, :5], (5, 5, 12, 13, 14)),
+                (I[y, z], I[:5, 2:8:2], (5, 3, 12, 13, 14)),
+                (I[0, y, y], I[0, :5, :5], (5, 5, 13, 14)),
+                (I[y, 0, z], I[:5, 0, 2:8:2], (5, 3, 13, 14)),
+                (I[y, :, z], I[:5, :, 2:8:2], (5, 11, 3, 13, 14)),
+                (I[0, :2, y, y, 0], I[0, :2, :5, :5, 0], (2, 5, 5)),
+                (I[0, :, y, :, 0], I[0, :, :5, :, 0], (11, 5, 13)),
+                (I[:, :, y, :, 0], I[:, :, :5, :, 0], (10, 11, 5, 13)),
+                (I[:, :, y, z, :], I[:, :, :5, 2:8:2], (10, 11, 5, 3, 14))]:
+            k = indexing.orthogonal_indexer(i, x.shape)
+            self.assertEqual(shape, x[k].shape)
+            self.assertArrayEqual(x[j], x[k])
+            self.assertArrayEqual(self.set_to_zero(x, j),
+                                  self.set_to_zero(x, k))
+        # standard numpy (non-orthogonal) indexing doesn't work anymore
+        with self.assertRaisesRegexp(ValueError, 'only supports 1d'):
+            indexing.orthogonal_indexer(x > 0, x.shape)
+        with self.assertRaisesRegexp(ValueError, 'invalid subkey'):
+            print indexing.orthogonal_indexer((1.5 * y, 1.5 * y), x.shape)
+
+    def test_convert_label_indexer(self):
+        # TODO: add tests that aren't just for edge cases
+        coord = Coordinate('x', [1, 2, 3])
+        with self.assertRaisesRegexp(ValueError, 'not all values found'):
+            indexing.convert_label_indexer(coord, [0])
+        with self.assertRaises(KeyError):
+            indexing.convert_label_indexer(coord, 0)
+
+    def test_remap_label_indexers(self):
+        # TODO: fill in more tests!
+        data = Dataset({'x': ('x', [1, 2, 3])})
+        test_indexer = lambda x: indexing.remap_label_indexers(data, {'x': x})
+        self.assertEqual({'x': 0}, test_indexer(1))
+        self.assertEqual({'x': 0}, test_indexer(np.int32(1)))
+        self.assertEqual({'x': 0}, test_indexer(Variable([], 1)))
+
+
+class TestLazyArray(TestCase):
+    def test_slice_slice(self):
+        I = ReturnItem()
+        x = np.arange(100)
+        slices = [I[:3], I[:4], I[2:4], I[:1], I[:-1], I[5:-1], I[-5:-1],
+                  I[::-1], I[5::-1], I[:3:-1], I[:30:-1], I[10:4:], I[::4],
+                  I[4:4:4], I[:4:-4]]
+        for i in slices:
+            for j in slices:
+                expected = x[i][j]
+                new_slice = indexing.slice_slice(i, j, size=100)
+                actual = x[new_slice]
+                self.assertArrayEqual(expected, actual)
+
+    def test_lazily_indexed_array(self):
+        x = variable.NumpyArrayAdapter(np.random.rand(10, 20, 30))
+        lazy = indexing.LazilyIndexedArray(x)
+        I = ReturnItem()
+        # test orthogonally applied indexers
+        indexers = [I[:], 0, -2, I[:3], [0, 1, 2, 3], np.arange(10) < 5]
+        for i in indexers:
+            for j in indexers:
+                for k in indexers:
+                    expected = np.asarray(x[i, j, k])
+                    for actual in [lazy[i, j, k],
+                                   lazy[:, j, k][i],
+                                   lazy[:, :, k][:, j][i]]:
+                        self.assertEqual(expected.shape, actual.shape)
+                        self.assertArrayEqual(expected, actual)
+        # test sequentially applied indexers
+        indexers = [(3, 2), (I[:], 0), (I[:2], -1), (I[:4], [0]), ([4, 5], 0),
+                    ([0, 1, 2], [0, 1]), ([0, 3, 5], I[:2])]
+        for i, j in indexers:
+            expected = np.asarray(x[i][j])
+            actual = lazy[i][j]
+            self.assertEqual(expected.shape, actual.shape)
+            self.assertArrayEqual(expected, actual)
diff --git a/test/test_utils.py b/test/test_utils.py
@@ -2,82 +2,8 @@
 import numpy as np
 import pandas as pd
 
-from xray import utils, Dataset, Variable, Coordinate
-from . import TestCase, ReturnItem, requires_netCDF4
-
-
-class TestIndexers(TestCase):
-    def set_to_zero(self, x, i):
-        x = x.copy()
-        x[i] = 0
-        return x
-
-    def test_expanded_indexer(self):
-        x = np.random.randn(10, 11, 12, 13, 14)
-        y = np.arange(5)
-        I = ReturnItem()
-        for i in [I[:], I[...], I[0, :, 10], I[..., 10], I[:5, ..., 0],
-                  I[..., 0, ...], I[y], I[y, y], I[..., y, y],
-                  I[..., 0, 1, 2, 3, 4]]:
-            j = utils.expanded_indexer(i, x.ndim)
-            self.assertArrayEqual(x[i], x[j])
-            self.assertArrayEqual(self.set_to_zero(x, i),
-                                  self.set_to_zero(x, j))
-
-    def test_orthogonal_indexer(self):
-        x = np.random.randn(10, 11, 12, 13, 14)
-        y = np.arange(5)
-        I = ReturnItem()
-        # orthogonal and numpy indexing should be equivalent, because we only
-        # use at most one array and it never in between two slice objects
-        # (i.e., we try to avoid numpy's mind-boggling "partial indexing"
-        # http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html)
-        for i in [I[:], I[0], I[0, 0], I[:5], I[2:5], I[2:5:-1], I[:3, :4],
-                  I[:3, 0, :4], I[:3, 0, :4, 0], I[y], I[:, y], I[0, y],
-                  I[:2, :3, y], I[0, y, :, :4, 0]]:
-            j = utils.orthogonal_indexer(i, x.shape)
-            self.assertArrayEqual(x[i], x[j])
-            self.assertArrayEqual(self.set_to_zero(x, i),
-                                  self.set_to_zero(x, j))
-        # for more complicated cases, check orthogonal indexing is still
-        # equivalent to slicing
-        z = np.arange(2, 8, 2)
-        for i, j, shape in [
-                (I[y, y], I[:5, :5], (5, 5, 12, 13, 14)),
-                (I[y, z], I[:5, 2:8:2], (5, 3, 12, 13, 14)),
-                (I[0, y, y], I[0, :5, :5], (5, 5, 13, 14)),
-                (I[y, 0, z], I[:5, 0, 2:8:2], (5, 3, 13, 14)),
-                (I[y, :, z], I[:5, :, 2:8:2], (5, 11, 3, 13, 14)),
-                (I[0, :2, y, y, 0], I[0, :2, :5, :5, 0], (2, 5, 5)),
-                (I[0, :, y, :, 0], I[0, :, :5, :, 0], (11, 5, 13)),
-                (I[:, :, y, :, 0], I[:, :, :5, :, 0], (10, 11, 5, 13)),
-                (I[:, :, y, z, :], I[:, :, :5, 2:8:2], (10, 11, 5, 3, 14))]:
-            k = utils.orthogonal_indexer(i, x.shape)
-            self.assertEqual(shape, x[k].shape)
-            self.assertArrayEqual(x[j], x[k])
-            self.assertArrayEqual(self.set_to_zero(x, j),
-                                  self.set_to_zero(x, k))
-        # standard numpy (non-orthogonal) indexing doesn't work anymore
-        with self.assertRaisesRegexp(ValueError, 'only supports 1d'):
-            utils.orthogonal_indexer(x > 0, x.shape)
-        with self.assertRaisesRegexp(ValueError, 'invalid subkey'):
-            print utils.orthogonal_indexer((1.5 * y, 1.5 * y), x.shape)
-
-    def test_convert_label_indexer(self):
-        # TODO: add tests that aren't just for edge cases
-        coord = Coordinate('x', [1, 2, 3])
-        with self.assertRaisesRegexp(ValueError, 'not all values found'):
-            utils.convert_label_indexer(coord, [0])
-        with self.assertRaises(KeyError):
-            utils.convert_label_indexer(coord, 0)
-
-    def test_remap_label_indexers(self):
-        # TODO: fill in more tests!
-        data = Dataset({'x': ('x', [1, 2, 3])})
-        test_indexer = lambda x: utils.remap_label_indexers(data, {'x': x})
-        self.assertEqual({'x': 0}, test_indexer(1))
-        self.assertEqual({'x': 0}, test_indexer(np.int32(1)))
-        self.assertEqual({'x': 0}, test_indexer(Variable([], 1)))
+from xray import utils
+from . import TestCase
 
 
 class TestSafeCastToIndex(TestCase):