Skip to content

Commit

Permalink
Merge pull request #110 from akleeman/pre-0.1-release-cleanup
Browse files Browse the repository at this point in the history
Pre 0.1 release cleanup
  • Loading branch information
shoyer committed May 2, 2014
2 parents 8cd667d + 03cfc05 commit 9d09b43
Show file tree
Hide file tree
Showing 19 changed files with 623 additions and 434 deletions.
10 changes: 10 additions & 0 deletions test/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,3 +113,13 @@ def assertDataArrayAllClose(self, ar1, ar2, rtol=1e-05, atol=1e-08):
class ReturnItem(object):
def __getitem__(self, key):
return key


def source_ndarray(array):
"""Given an ndarray, return the base object which holds its memory, or the
object itself.
"""
base = array.base
if base is None:
base = array
return base
7 changes: 5 additions & 2 deletions test/test_data_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from textwrap import dedent

from xray import Dataset, DataArray, Variable, align
from . import TestCase, ReturnItem
from . import TestCase, ReturnItem, source_ndarray


class TestDataArray(TestCase):
Expand Down Expand Up @@ -178,6 +178,8 @@ def test_math(self):
a + b
with self.assertRaisesRegexp(ValueError, 'not aligned'):
b + a
with self.assertRaisesRegexp(TypeError, 'datasets do not support'):
a + a.dataset

def test_dataset_math(self):
# verify that mathematical operators keep around the expected variables
Expand Down Expand Up @@ -238,7 +240,8 @@ def test_inplace_math(self):
b += 1
self.assertIs(b, a)
self.assertIs(b.variable, v)
self.assertIs(b.values, x)
self.assertArrayEqual(b.values, x)
self.assertIs(source_ndarray(b.values), x)
self.assertIs(b.dataset, self.ds)

def test_transpose(self):
Expand Down
74 changes: 43 additions & 31 deletions test/test_dataset.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from collections import OrderedDict
from copy import deepcopy
from copy import copy, deepcopy
from textwrap import dedent
import cPickle as pickle
import unittest

import numpy as np
import pandas as pd

from xray import Dataset, DataArray, Variable, backends, utils, align
from xray import Dataset, DataArray, Variable, backends, utils, align, indexing

from . import TestCase

Expand Down Expand Up @@ -56,13 +56,13 @@ def set_variable(self, name, variable):
self._variables[name] = variable
return self._variables[name]

def open_store_variable(self, var):
data = indexing.LazilyIndexedArray(InaccessibleArray(var.values))
return Variable(var.dimensions, data, var.attrs)

@property
def variables(self):
return utils.FrozenOrderedDict(
(k, Variable(v.dimensions,
InaccessibleArray(v.values),
v.attrs))
for k, v in self._variables.iteritems())
def store_variables(self):
return self._variables


class TestDataset(TestCase):
Expand Down Expand Up @@ -161,10 +161,18 @@ def test_equals_and_identical(self):
data2 = create_test_data(seed=42)
data2.attrs['foobar'] = 'baz'
self.assertTrue(data.equals(data2))
self.assertTrue(data == data2)
self.assertFalse(data.identical(data2))

del data2['time']
self.assertFalse(data.equals(data2))
self.assertTrue(data != data2)

def test_attrs(self):
data = create_test_data(seed=42)
data.attrs = {'foobar': 'baz'}
self.assertTrue(data.attrs['foobar'], 'baz')
self.assertIsInstance(data.attrs, OrderedDict)

def test_indexed(self):
data = create_test_data()
Expand Down Expand Up @@ -334,21 +342,21 @@ def test_unselect(self):
def test_copy(self):
data = create_test_data()

copied = data.copy(deep=False)
self.assertDatasetIdentical(data, copied)
for k in data:
v0 = data.variables[k]
v1 = copied.variables[k]
self.assertIs(v0, v1)
copied['foo'] = ('z', np.arange(5))
self.assertNotIn('foo', data)

copied = data.copy(deep=True)
self.assertDatasetIdentical(data, copied)
for k in data:
v0 = data.variables[k]
v1 = copied.variables[k]
self.assertIsNot(v0, v1)
for copied in [data.copy(deep=False), copy(data)]:
self.assertDatasetIdentical(data, copied)
for k in data:
v0 = data.variables[k]
v1 = copied.variables[k]
self.assertIs(v0, v1)
copied['foo'] = ('z', np.arange(5))
self.assertNotIn('foo', data)

for copied in [data.copy(deep=True), deepcopy(data)]:
self.assertDatasetIdentical(data, copied)
for k in data:
v0 = data.variables[k]
v1 = copied.variables[k]
self.assertIsNot(v0, v1)

def test_rename(self):
data = create_test_data()
Expand Down Expand Up @@ -432,7 +440,6 @@ def test_virtual_variables(self):
expected = data.indexed(time=slice(10))
self.assertDatasetIdentical(expected, actual)

@unittest.expectedFailure
def test_slice_virtual_variable(self):
data = create_test_data()
self.assertVariableEqual(data['time.dayofyear'][:10],
Expand Down Expand Up @@ -511,6 +518,8 @@ def test_groupby(self):
# TODO: test the other edge cases
with self.assertRaisesRegexp(ValueError, 'must be 1 dimensional'):
data.groupby('var1')
with self.assertRaisesRegexp(ValueError, 'length does not match'):
data.groupby(data['dim1'][:3])

def test_concat(self):
data = create_test_data()
Expand Down Expand Up @@ -631,10 +640,13 @@ def test_pickle(self):

def test_lazy_load(self):
store = InaccessibleVariableDataStore()
store.set_variable('dim', Variable(('dim'), np.arange(10)))
store.set_variable('var', Variable(('dim'), np.random.uniform(size=10)))
ds = Dataset()
ds = ds.load_store(store, decode_cf=False)
self.assertRaises(UnexpectedDataAccess, lambda: ds['var'].values)
ds = ds.load_store(store, decode_cf=True)
self.assertRaises(UnexpectedDataAccess, lambda: ds['var'].values)
create_test_data().dump_to_store(store)

for decode_cf in [False, True]:
ds = Dataset.load_store(store, decode_cf=decode_cf)
with self.assertRaises(UnexpectedDataAccess):
ds['var1'].values

# these should not raise UnexpectedDataAccess:
ds.indexed(time=10)
ds.indexed(time=slice(10), dim1=[0]).indexed(dim1=0, dim2=-1)
119 changes: 119 additions & 0 deletions test/test_indexing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import numpy as np

from xray import indexing, variable, Dataset, Variable, Coordinate
from . import TestCase, ReturnItem


class TestIndexers(TestCase):
def set_to_zero(self, x, i):
x = x.copy()
x[i] = 0
return x

def test_expanded_indexer(self):
x = np.random.randn(10, 11, 12, 13, 14)
y = np.arange(5)
I = ReturnItem()
for i in [I[:], I[...], I[0, :, 10], I[..., 10], I[:5, ..., 0],
I[..., 0, ...], I[y], I[y, y], I[..., y, y],
I[..., 0, 1, 2, 3, 4]]:
j = indexing.expanded_indexer(i, x.ndim)
self.assertArrayEqual(x[i], x[j])
self.assertArrayEqual(self.set_to_zero(x, i),
self.set_to_zero(x, j))
with self.assertRaisesRegexp(IndexError, 'too many indices'):
indexing.expanded_indexer(I[1, 2, 3], 2)

def test_orthogonal_indexer(self):
x = np.random.randn(10, 11, 12, 13, 14)
y = np.arange(5)
I = ReturnItem()
# orthogonal and numpy indexing should be equivalent, because we only
# use at most one array and it never in between two slice objects
# (i.e., we try to avoid numpy's mind-boggling "partial indexing"
# http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html)
for i in [I[:], I[0], I[0, 0], I[:5], I[2:5], I[2:5:-1], I[:3, :4],
I[:3, 0, :4], I[:3, 0, :4, 0], I[y], I[:, y], I[0, y],
I[:2, :3, y], I[0, y, :, :4, 0]]:
j = indexing.orthogonal_indexer(i, x.shape)
self.assertArrayEqual(x[i], x[j])
self.assertArrayEqual(self.set_to_zero(x, i),
self.set_to_zero(x, j))
# for more complicated cases, check orthogonal indexing is still
# equivalent to slicing
z = np.arange(2, 8, 2)
for i, j, shape in [
(I[y, y], I[:5, :5], (5, 5, 12, 13, 14)),
(I[y, z], I[:5, 2:8:2], (5, 3, 12, 13, 14)),
(I[0, y, y], I[0, :5, :5], (5, 5, 13, 14)),
(I[y, 0, z], I[:5, 0, 2:8:2], (5, 3, 13, 14)),
(I[y, :, z], I[:5, :, 2:8:2], (5, 11, 3, 13, 14)),
(I[0, :2, y, y, 0], I[0, :2, :5, :5, 0], (2, 5, 5)),
(I[0, :, y, :, 0], I[0, :, :5, :, 0], (11, 5, 13)),
(I[:, :, y, :, 0], I[:, :, :5, :, 0], (10, 11, 5, 13)),
(I[:, :, y, z, :], I[:, :, :5, 2:8:2], (10, 11, 5, 3, 14))]:
k = indexing.orthogonal_indexer(i, x.shape)
self.assertEqual(shape, x[k].shape)
self.assertArrayEqual(x[j], x[k])
self.assertArrayEqual(self.set_to_zero(x, j),
self.set_to_zero(x, k))
# standard numpy (non-orthogonal) indexing doesn't work anymore
with self.assertRaisesRegexp(ValueError, 'only supports 1d'):
indexing.orthogonal_indexer(x > 0, x.shape)
with self.assertRaisesRegexp(ValueError, 'invalid subkey'):
print indexing.orthogonal_indexer((1.5 * y, 1.5 * y), x.shape)

def test_convert_label_indexer(self):
# TODO: add tests that aren't just for edge cases
coord = Coordinate('x', [1, 2, 3])
with self.assertRaisesRegexp(ValueError, 'not all values found'):
indexing.convert_label_indexer(coord, [0])
with self.assertRaises(KeyError):
indexing.convert_label_indexer(coord, 0)

def test_remap_label_indexers(self):
# TODO: fill in more tests!
data = Dataset({'x': ('x', [1, 2, 3])})
test_indexer = lambda x: indexing.remap_label_indexers(data, {'x': x})
self.assertEqual({'x': 0}, test_indexer(1))
self.assertEqual({'x': 0}, test_indexer(np.int32(1)))
self.assertEqual({'x': 0}, test_indexer(Variable([], 1)))


class TestLazyArray(TestCase):
def test_slice_slice(self):
I = ReturnItem()
x = np.arange(100)
slices = [I[:3], I[:4], I[2:4], I[:1], I[:-1], I[5:-1], I[-5:-1],
I[::-1], I[5::-1], I[:3:-1], I[:30:-1], I[10:4:], I[::4],
I[4:4:4], I[:4:-4]]
for i in slices:
for j in slices:
expected = x[i][j]
new_slice = indexing.slice_slice(i, j, size=100)
actual = x[new_slice]
self.assertArrayEqual(expected, actual)

def test_lazily_indexed_array(self):
x = variable.NumpyArrayAdapter(np.random.rand(10, 20, 30))
lazy = indexing.LazilyIndexedArray(x)
I = ReturnItem()
# test orthogonally applied indexers
indexers = [I[:], 0, -2, I[:3], [0, 1, 2, 3], np.arange(10) < 5]
for i in indexers:
for j in indexers:
for k in indexers:
expected = np.asarray(x[i, j, k])
for actual in [lazy[i, j, k],
lazy[:, j, k][i],
lazy[:, :, k][:, j][i]]:
self.assertEqual(expected.shape, actual.shape)
self.assertArrayEqual(expected, actual)
# test sequentially applied indexers
indexers = [(3, 2), (I[:], 0), (I[:2], -1), (I[:4], [0]), ([4, 5], 0),
([0, 1, 2], [0, 1]), ([0, 3, 5], I[:2])]
for i, j in indexers:
expected = np.asarray(x[i][j])
actual = lazy[i][j]
self.assertEqual(expected.shape, actual.shape)
self.assertArrayEqual(expected, actual)
78 changes: 2 additions & 76 deletions test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,82 +2,8 @@
import numpy as np
import pandas as pd

from xray import utils, Dataset, Variable, Coordinate
from . import TestCase, ReturnItem, requires_netCDF4


class TestIndexers(TestCase):
def set_to_zero(self, x, i):
x = x.copy()
x[i] = 0
return x

def test_expanded_indexer(self):
x = np.random.randn(10, 11, 12, 13, 14)
y = np.arange(5)
I = ReturnItem()
for i in [I[:], I[...], I[0, :, 10], I[..., 10], I[:5, ..., 0],
I[..., 0, ...], I[y], I[y, y], I[..., y, y],
I[..., 0, 1, 2, 3, 4]]:
j = utils.expanded_indexer(i, x.ndim)
self.assertArrayEqual(x[i], x[j])
self.assertArrayEqual(self.set_to_zero(x, i),
self.set_to_zero(x, j))

def test_orthogonal_indexer(self):
x = np.random.randn(10, 11, 12, 13, 14)
y = np.arange(5)
I = ReturnItem()
# orthogonal and numpy indexing should be equivalent, because we only
# use at most one array and it never in between two slice objects
# (i.e., we try to avoid numpy's mind-boggling "partial indexing"
# http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html)
for i in [I[:], I[0], I[0, 0], I[:5], I[2:5], I[2:5:-1], I[:3, :4],
I[:3, 0, :4], I[:3, 0, :4, 0], I[y], I[:, y], I[0, y],
I[:2, :3, y], I[0, y, :, :4, 0]]:
j = utils.orthogonal_indexer(i, x.shape)
self.assertArrayEqual(x[i], x[j])
self.assertArrayEqual(self.set_to_zero(x, i),
self.set_to_zero(x, j))
# for more complicated cases, check orthogonal indexing is still
# equivalent to slicing
z = np.arange(2, 8, 2)
for i, j, shape in [
(I[y, y], I[:5, :5], (5, 5, 12, 13, 14)),
(I[y, z], I[:5, 2:8:2], (5, 3, 12, 13, 14)),
(I[0, y, y], I[0, :5, :5], (5, 5, 13, 14)),
(I[y, 0, z], I[:5, 0, 2:8:2], (5, 3, 13, 14)),
(I[y, :, z], I[:5, :, 2:8:2], (5, 11, 3, 13, 14)),
(I[0, :2, y, y, 0], I[0, :2, :5, :5, 0], (2, 5, 5)),
(I[0, :, y, :, 0], I[0, :, :5, :, 0], (11, 5, 13)),
(I[:, :, y, :, 0], I[:, :, :5, :, 0], (10, 11, 5, 13)),
(I[:, :, y, z, :], I[:, :, :5, 2:8:2], (10, 11, 5, 3, 14))]:
k = utils.orthogonal_indexer(i, x.shape)
self.assertEqual(shape, x[k].shape)
self.assertArrayEqual(x[j], x[k])
self.assertArrayEqual(self.set_to_zero(x, j),
self.set_to_zero(x, k))
# standard numpy (non-orthogonal) indexing doesn't work anymore
with self.assertRaisesRegexp(ValueError, 'only supports 1d'):
utils.orthogonal_indexer(x > 0, x.shape)
with self.assertRaisesRegexp(ValueError, 'invalid subkey'):
print utils.orthogonal_indexer((1.5 * y, 1.5 * y), x.shape)

def test_convert_label_indexer(self):
# TODO: add tests that aren't just for edge cases
coord = Coordinate('x', [1, 2, 3])
with self.assertRaisesRegexp(ValueError, 'not all values found'):
utils.convert_label_indexer(coord, [0])
with self.assertRaises(KeyError):
utils.convert_label_indexer(coord, 0)

def test_remap_label_indexers(self):
# TODO: fill in more tests!
data = Dataset({'x': ('x', [1, 2, 3])})
test_indexer = lambda x: utils.remap_label_indexers(data, {'x': x})
self.assertEqual({'x': 0}, test_indexer(1))
self.assertEqual({'x': 0}, test_indexer(np.int32(1)))
self.assertEqual({'x': 0}, test_indexer(Variable([], 1)))
from xray import utils
from . import TestCase


class TestSafeCastToIndex(TestCase):
Expand Down
Loading

0 comments on commit 9d09b43

Please sign in to comment.