Merge pull request #35 from scikit-hep/issue-33

Issue 33
scikit-hep · Nov 19, 2018 · 5ee75c2 · 5ee75c2
2 parents cc67b4d + 99c5065
commit 5ee75c2
Show file tree

Hide file tree

Showing 3 changed files with 58 additions and 3 deletions.
diff --git a/awkward/array/jagged.py b/awkward/array/jagged.py
@@ -30,6 +30,7 @@
 
 import math
 import numbers
+import os
 
 import awkward.array.base
 import awkward.persist
@@ -469,7 +470,7 @@ def __getitem__(self, where):
                     thyself._stops.shape = self._stops.shape
 
                 head = head._tojagged(thyself._starts, thyself._stops, copy=False)
-                inthead = head.copy(content=head._content.view(awkward.util.numpy.uint8))
+                inthead = head.copy(content=head._content.astype(awkward.util.INDEXTYPE))
                 intheadsum = inthead.sum()
 
                 offsets = counts2offsets(intheadsum)
@@ -798,6 +799,12 @@ def aligned(*jaggedarrays):
 
         return True
 
+    def argdistincts(self):
+        return self.argpairs(same=False)
+
+    def distincts(self):
+        return self.pairs(same=False)
+
     def argpairs(self, same=True):
         import awkward.array.table
         self._valid()
@@ -900,6 +907,8 @@ def any(self):
 
             out = awkward.util.numpy.empty(self._starts.shape + content.shape[1:], dtype=content.dtype)
             nonterminal = self.offsets[self.offsets != self.offsets[-1]]
+            if os.name == "nt":   # Windows Numpy reduceat requires 32-bit indexes
+                nonterminal = nonterminal.astype(awkward.util.numpy.int32)
             out[:len(nonterminal)] = awkward.util.numpy.logical_or.reduceat(content[self._starts[0]:self._stops[-1]], nonterminal)
             out[self.offsets[1:] == self.offsets[:-1]] = False
             return out
@@ -916,6 +925,8 @@ def all(self):
 
             out = awkward.util.numpy.empty(self._starts.shape + content.shape[1:], dtype=content.dtype)
             nonterminal = self.offsets[self.offsets != self.offsets[-1]]
+            if os.name == "nt":   # Windows Numpy reduceat requires 32-bit indexes
+                nonterminal = nonterminal.astype(awkward.util.numpy.int32)
             out[:len(nonterminal)] = awkward.util.numpy.logical_and.reduceat(content[self._starts[0]:self._stops[-1]], nonterminal)
             out[self.offsets[1:] == self.offsets[:-1]] = True
             return out
@@ -938,6 +949,8 @@ def sum(self):
         if self._canuseoffset():
             out = awkward.util.numpy.empty(self._starts.shape + content.shape[1:], dtype=content.dtype)
             nonterminal = self.offsets[self.offsets != self.offsets[-1]]
+            if os.name == "nt":   # Windows Numpy reduceat requires 32-bit indexes
+                nonterminal = nonterminal.astype(awkward.util.numpy.int32)
             out[:len(nonterminal)] = awkward.util.numpy.add.reduceat(content[self._starts[0]:self._stops[-1]], nonterminal)
             out[self.offsets[1:] == self.offsets[:-1]] = 0
             return out
@@ -962,6 +975,8 @@ def prod(self):
         if self._canuseoffset():
             out = awkward.util.numpy.empty(self._starts.shape + content.shape[1:], dtype=content.dtype)
             nonterminal = self.offsets[self.offsets != self.offsets[-1]]
+            if os.name == "nt":   # Windows Numpy reduceat requires 32-bit indexes
+                nonterminal = nonterminal.astype(awkward.util.numpy.int32)
             out[:len(nonterminal)] = awkward.util.numpy.multiply.reduceat(content[self._starts[0]:self._stops[-1]], nonterminal)
             out[self.offsets[1:] == self.offsets[:-1]] = 1
             return out
@@ -1021,6 +1036,8 @@ def argmax(self):
     def _minmax_offset(self, ismin):
         out = awkward.util.numpy.empty(self._starts.shape + self._content.shape[1:], dtype=self._content.dtype)
         nonterminal = self.offsets[self.offsets != self.offsets[-1]]
+        if os.name == "nt":   # Windows Numpy reduceat requires 32-bit indexes
+            nonterminal = nonterminal.astype(awkward.util.numpy.int32)
 
         if ismin:
             out[:len(nonterminal)] = awkward.util.numpy.minimum.reduceat(self._content[self._starts[0]:self._stops[-1]], nonterminal)

diff --git a/awkward/version.py b/awkward/version.py
@@ -30,7 +30,7 @@
 
 import re
 
-__version__ = "0.4.3"
+__version__ = "0.4.4"
 version = __version__
 version_info = tuple(re.split(r"[-\.]", __version__))
 

diff --git a/tests/test_jagged.py b/tests/test_jagged.py
@@ -224,7 +224,45 @@ def test_jagged_regular(self):
         assert a.regular().tolist() == [[[[0.0], [1.1], [2.2]], [[3.3], [4.4], [5.5]]], [[[6.6], [7.7], [8.8]], [[9.9], [10.0], [11.0]]]]
 
     def test_jagged_cross(self):
-        pass
+        for i in range(10):
+            for j in range(5):
+                a = JaggedArray.fromiter([[], [123], list(range(i)), []])
+                b = JaggedArray.fromiter([[], [456], list(range(j)), [999]])
+                c = a.cross(b)
+                assert len(c) == 4
+                assert len(c[0]) == 0
+                assert len(c[1]) == 1
+                assert len(c[2]) == i * j
+                assert len(c[3]) == 0
+                assert c[2]["0"].tolist() == numpy.repeat(range(i), j).tolist()
+                assert c[2]["1"].tolist() == numpy.tile(range(j), i).tolist()
+
+    def test_jagged_pairs(self):
+        for i in range(50):
+            a = JaggedArray.fromiter([[], [123], list(range(i)), []])
+            c = a.pairs()
+            assert len(c) == 4
+            assert len(c[0]) == 0
+            assert len(c[1]) == 1
+            assert len(c[2]) == i * (i + 1) // 2
+            assert len(c[3]) == 0
+            assert c[2]["0"].tolist() == sum([[x] * (i - x) for x in range(i)], [])
+            assert c[2]["1"].tolist() == sum([list(range(x, i)) for x in range(i)], [])
+
+    def test_jagged_distincts(self):
+        print()
+        for i in range(50):
+            a = JaggedArray.fromiter([[], [123], list(range(i)), []])
+            c = a.distincts()
+            assert len(c) == 4
+            assert len(c[0]) == 0
+            assert len(c[1]) == 0
+            assert len(c[2]) == i * (i - 1) // 2
+            assert len(c[3]) == 0
+            left = sum([[x] * (i - x) for x in range(i)], [])
+            right = sum([list(range(x, i)) for x in range(i)], [])
+            assert c[2]["0"].tolist() == [x for x, y in zip(left, right) if x != y]
+            assert c[2]["1"].tolist() == [y for x, y in zip(left, right) if x != y]
 
     def test_jagged_sum(self):
         a = JaggedArray([0, 3, 3, 5], [3, 3, 5, 10], [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9])