diff --git a/awkward/array/jagged.py b/awkward/array/jagged.py index bad1aaf9..25592726 100644 --- a/awkward/array/jagged.py +++ b/awkward/array/jagged.py @@ -30,6 +30,7 @@ import math import numbers +import os import awkward.array.base import awkward.persist @@ -469,7 +470,7 @@ def __getitem__(self, where): thyself._stops.shape = self._stops.shape head = head._tojagged(thyself._starts, thyself._stops, copy=False) - inthead = head.copy(content=head._content.view(awkward.util.numpy.uint8)) + inthead = head.copy(content=head._content.astype(awkward.util.INDEXTYPE)) intheadsum = inthead.sum() offsets = counts2offsets(intheadsum) @@ -798,6 +799,12 @@ def aligned(*jaggedarrays): return True + def argdistincts(self): + return self.argpairs(same=False) + + def distincts(self): + return self.pairs(same=False) + def argpairs(self, same=True): import awkward.array.table self._valid() @@ -900,6 +907,8 @@ def any(self): out = awkward.util.numpy.empty(self._starts.shape + content.shape[1:], dtype=content.dtype) nonterminal = self.offsets[self.offsets != self.offsets[-1]] + if os.name == "nt": # Windows Numpy reduceat requires 32-bit indexes + nonterminal = nonterminal.astype(awkward.util.numpy.int32) out[:len(nonterminal)] = awkward.util.numpy.logical_or.reduceat(content[self._starts[0]:self._stops[-1]], nonterminal) out[self.offsets[1:] == self.offsets[:-1]] = False return out @@ -916,6 +925,8 @@ def all(self): out = awkward.util.numpy.empty(self._starts.shape + content.shape[1:], dtype=content.dtype) nonterminal = self.offsets[self.offsets != self.offsets[-1]] + if os.name == "nt": # Windows Numpy reduceat requires 32-bit indexes + nonterminal = nonterminal.astype(awkward.util.numpy.int32) out[:len(nonterminal)] = awkward.util.numpy.logical_and.reduceat(content[self._starts[0]:self._stops[-1]], nonterminal) out[self.offsets[1:] == self.offsets[:-1]] = True return out @@ -938,6 +949,8 @@ def sum(self): if self._canuseoffset(): out = awkward.util.numpy.empty(self._starts.shape + content.shape[1:], dtype=content.dtype) nonterminal = self.offsets[self.offsets != self.offsets[-1]] + if os.name == "nt": # Windows Numpy reduceat requires 32-bit indexes + nonterminal = nonterminal.astype(awkward.util.numpy.int32) out[:len(nonterminal)] = awkward.util.numpy.add.reduceat(content[self._starts[0]:self._stops[-1]], nonterminal) out[self.offsets[1:] == self.offsets[:-1]] = 0 return out @@ -962,6 +975,8 @@ def prod(self): if self._canuseoffset(): out = awkward.util.numpy.empty(self._starts.shape + content.shape[1:], dtype=content.dtype) nonterminal = self.offsets[self.offsets != self.offsets[-1]] + if os.name == "nt": # Windows Numpy reduceat requires 32-bit indexes + nonterminal = nonterminal.astype(awkward.util.numpy.int32) out[:len(nonterminal)] = awkward.util.numpy.multiply.reduceat(content[self._starts[0]:self._stops[-1]], nonterminal) out[self.offsets[1:] == self.offsets[:-1]] = 1 return out @@ -1021,6 +1036,8 @@ def argmax(self): def _minmax_offset(self, ismin): out = awkward.util.numpy.empty(self._starts.shape + self._content.shape[1:], dtype=self._content.dtype) nonterminal = self.offsets[self.offsets != self.offsets[-1]] + if os.name == "nt": # Windows Numpy reduceat requires 32-bit indexes + nonterminal = nonterminal.astype(awkward.util.numpy.int32) if ismin: out[:len(nonterminal)] = awkward.util.numpy.minimum.reduceat(self._content[self._starts[0]:self._stops[-1]], nonterminal) diff --git a/awkward/version.py b/awkward/version.py index 2f476909..0685257e 100644 --- a/awkward/version.py +++ b/awkward/version.py @@ -30,7 +30,7 @@ import re -__version__ = "0.4.3" +__version__ = "0.4.4" version = __version__ version_info = tuple(re.split(r"[-\.]", __version__)) diff --git a/tests/test_jagged.py b/tests/test_jagged.py index b73987d4..d443b0d9 100644 --- a/tests/test_jagged.py +++ b/tests/test_jagged.py @@ -224,7 +224,45 @@ def test_jagged_regular(self): assert a.regular().tolist() == [[[[0.0], [1.1], [2.2]], [[3.3], [4.4], [5.5]]], [[[6.6], [7.7], [8.8]], [[9.9], [10.0], [11.0]]]] def test_jagged_cross(self): - pass + for i in range(10): + for j in range(5): + a = JaggedArray.fromiter([[], [123], list(range(i)), []]) + b = JaggedArray.fromiter([[], [456], list(range(j)), [999]]) + c = a.cross(b) + assert len(c) == 4 + assert len(c[0]) == 0 + assert len(c[1]) == 1 + assert len(c[2]) == i * j + assert len(c[3]) == 0 + assert c[2]["0"].tolist() == numpy.repeat(range(i), j).tolist() + assert c[2]["1"].tolist() == numpy.tile(range(j), i).tolist() + + def test_jagged_pairs(self): + for i in range(50): + a = JaggedArray.fromiter([[], [123], list(range(i)), []]) + c = a.pairs() + assert len(c) == 4 + assert len(c[0]) == 0 + assert len(c[1]) == 1 + assert len(c[2]) == i * (i + 1) // 2 + assert len(c[3]) == 0 + assert c[2]["0"].tolist() == sum([[x] * (i - x) for x in range(i)], []) + assert c[2]["1"].tolist() == sum([list(range(x, i)) for x in range(i)], []) + + def test_jagged_distincts(self): + print() + for i in range(50): + a = JaggedArray.fromiter([[], [123], list(range(i)), []]) + c = a.distincts() + assert len(c) == 4 + assert len(c[0]) == 0 + assert len(c[1]) == 0 + assert len(c[2]) == i * (i - 1) // 2 + assert len(c[3]) == 0 + left = sum([[x] * (i - x) for x in range(i)], []) + right = sum([list(range(x, i)) for x in range(i)], []) + assert c[2]["0"].tolist() == [x for x, y in zip(left, right) if x != y] + assert c[2]["1"].tolist() == [y for x, y in zip(left, right) if x != y] def test_jagged_sum(self): a = JaggedArray([0, 3, 3, 5], [3, 3, 5, 10], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9])