Skip to content
This repository has been archived by the owner on Jun 21, 2022. It is now read-only.

Commit

Permalink
Merge pull request #50 from scikit-hep/issue-49
Browse files Browse the repository at this point in the history
Issue 49
  • Loading branch information
jpivarski authored Nov 30, 2018
2 parents 2b5a069 + ae97261 commit f0c3f7d
Show file tree
Hide file tree
Showing 12 changed files with 142 additions and 53 deletions.
22 changes: 17 additions & 5 deletions awkward/array/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,15 @@ class AwkwardArray(awkward.util.NDArrayOperatorsMixin):
def at(self):
return At(self)

allow_tonumpy = True
allow_iter = True

def _checktonumpy(self):
if not self.allow_tonumpy:
raise RuntimeError("awkward.array.base.AwkwardArray.allow_tonumpy is False; refusing to convert to Numpy")

def __array__(self, dtype=None):
# hitting this function is usually undesirable; uncomment to search for performance bugs
# raise Exception
self._checktonumpy()

if dtype is None:
dtype = self.dtype
Expand All @@ -78,15 +84,21 @@ def __setstate__(self, state):
self.__dict__.update(out.__dict__)
self.__class__ = out.__class__

def __iter__(self):
def _checkiter(self):
if not self.allow_iter:
raise RuntimeError("awkward.array.base.AwkwardArray.allow_iter is False; refusing to iterate")

def __iter__(self, checkiter=True):
if checkiter:
self._checkiter()
for i in range(len(self)):
yield self[i]

def __str__(self):
if len(self) <= 6:
return "[{0}]".format(" ".join(awkward.util.array_str(x) for x in self))
return "[{0}]".format(" ".join(awkward.util.array_str(x) for x in self.__iter__(checkiter=False)))
else:
return "[{0} ... {1}]".format(" ".join(awkward.util.array_str(x) for x in self[:3]), " ".join(awkward.util.array_str(x) for x in self[-3:]))
return "[{0} ... {1}]".format(" ".join(awkward.util.array_str(x) for x in self[:3].__iter__(checkiter=False)), " ".join(awkward.util.array_str(x) for x in self[-3:].__iter__(checkiter=False)))

def __repr__(self):
return "<{0} {1} at {2:012x}>".format(self.__class__.__name__, str(self), id(self))
Expand Down
8 changes: 6 additions & 2 deletions awkward/array/chunked.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,20 +279,24 @@ def __str__(self):
if self.countsknown:
return super(ChunkedArray, self).__str__()
else:
strs = [awkward.util.array_str(x) for x in self[:7]]
strs = [awkward.util.array_str(x) for x in self[:7].__iter__(checkiter=False)]
if len(strs) < 7:
return super(ChunkedArray, self).__str__()
else:
return "[{0} ...]".format(" ".join(strs))

def __iter__(self):
def __iter__(self, checkiter=True):
if checkiter:
self._checkiter()
for i, chunk in enumerate(self._chunks):
if i >= len(self._counts):
self._counts.append(len(chunk))
for x in chunk[:self._counts[i]]:
yield x

def __array__(self, *args, **kwargs):
self._checktonumpy()

if isinstance(self.type.to, awkward.util.numpy.dtype):
if len(self) == 0:
return awkward.util.numpy.empty(0, dtype=awkward.util.DEFAULTTYPE)
Expand Down
12 changes: 9 additions & 3 deletions awkward/array/indexed.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,9 @@ def _valid(self):

self._isvalid = True

def __iter__(self):
def __iter__(self, checkiter=True):
if checkiter:
self._checkiter()
self._valid()
for i in self._index:
yield self._content[i]
Expand Down Expand Up @@ -303,7 +305,9 @@ def _valid(self):

self._isvalid = True

def __iter__(self):
def __iter__(self, checkiter=True):
if checkiter:
self._checkiter()
self._valid()
itemsize = self._dtype.itemsize
for i in self._index:
Expand Down Expand Up @@ -530,7 +534,9 @@ def _valid(self):

self._isvalid = True

def __iter__(self):
def __iter__(self, checkiter=True):
if checkiter:
self._checkiter()
self._valid()

length = self._length
Expand Down
57 changes: 31 additions & 26 deletions awkward/array/jagged.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ def offsets2parents(offsets):
out = awkward.util.numpy.zeros(offsets[-1], dtype=awkward.util.INDEXTYPE)
awkward.util.numpy.add.at(out, offsets[offsets != offsets[-1]][1:], 1)
awkward.util.numpy.cumsum(out, out=out)
if offsets[0] > 0:
out[:offsets[0]] = -1
return out

def startsstops2parents(starts, stops):
Expand Down Expand Up @@ -416,10 +418,12 @@ def _validstartsstops(starts, stops):
if starts.shape[1:] != stops.shape[1:]:
raise ValueError("starts and stops must have the same dimensionality (shape[1:])")

def __iter__(self):
def __iter__(self, checkiter=True):
if checkiter:
self._checkiter()
self._valid()
if len(self._starts.shape) != 1:
for x in super(JaggedArray, self).__iter__():
for x in super(JaggedArray, self).__iter__(checkiter=checkiter):
yield x
else:
stops = self._stops
Expand Down Expand Up @@ -481,14 +485,13 @@ def __getitem__(self, where):

offsets = counts2offsets(intheadsum)

return self.copy(starts=offsets[:-1].reshape(intheadsum.shape), stops=offsets[1:].reshape(intheadsum.shape), content=thyself._content[head._content])
headcontent = awkward.util.numpy.array(head._content, dtype=awkward.util.BOOLTYPE)
headcontent[head.parents < 0] = False

elif len(head.shape) == 1:
raise TypeError("jagged index must be boolean (mask) or integer (fancy indexing)")
return self.copy(starts=offsets[:-1].reshape(intheadsum.shape), stops=offsets[1:].reshape(intheadsum.shape), content=thyself._content[headcontent])

else:
# the other cases are possible, but complicated; the first sets the form
raise NotImplementedError("jagged index content type: {0}".format(head._content.dtype))
raise TypeError("jagged index must be boolean (mask) or integer (fancy indexing)")

else:
starts = self._starts[head]
Expand Down Expand Up @@ -666,24 +669,24 @@ def _tojagged(self, starts=None, stops=None, copy=True):
return self.copy(starts=starts, stops=stops, content=(awkward.util.deepcopy(self._content) if copy else self._content))

else:
out = self.copy(starts=starts, stops=stops, content=awkward.util.numpy.empty(stops.max(), dtype=self._content.dtype))

if offsetsaliased(self._starts, self._stops) or awkward.util.numpy.array_equal(self._starts[1:], self._stops[:-1]):
content = self._content[self._starts[0]:self._stops[-1]]
elif (self._starts[:-1] < self._starts[1:]).all():
content = self._content[awkward.util.numpy.arange(len(self.parents), dtype=awkward.util.INDEXTYPE)[self.parents >= 0]]
else:
order = awkward.util.numpy.argsort(self.parents, kind="mergesort")
content = self._content[order[self.parents[order] >= 0]]

if offsetsaliased(starts, stops) or awkward.util.numpy.array_equal(starts[1:], stops[:-1]):
out._content[starts[0]:stops[-1]] = content
elif (starts[:-1] < starts[1:]).all():
out._content[awkward.util.numpy.arange(len(out.parents), dtype=awkward.util.INDEXTYPE)[out.parents >= 0]] = content
if offsetsaliased(starts, stops):
parents = offsets2parents(starts.base)
elif len(starts.shape) == 1 and awkward.util.numpy.array_equal(starts[1:], stops[:-1]):
if len(self._stops) == 0:
offsets = awkward.util.numpy.array([0], dtype=awkward.util.INDEXTYPE)
else:
offsets = awkward.util.numpy.append(starts, stops[-1])
parents = offsets2parents(offsets)
else:
order = awkward.util.numpy.argsort(out.parents, kind="mergesort")
out._content[order[out.parents[order] >= 0]] = content

parents = startsstops2parents(starts, stops)

good = (parents >= 0)
increase = awkward.util.numpy.arange(len(parents), dtype=awkward.util.INDEXTYPE)
increase[good] -= increase[starts[parents[good]]]
index = self._starts[parents]
index += increase
out = self.copy(starts=starts, stops=stops, content=self._content[index])
out._parents = parents
return out

def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
Expand Down Expand Up @@ -1352,10 +1355,12 @@ def subdtype(self, value):
def _gettype(self, seen):
return awkward.type.ArrayType(awkward.util.numpy.inf, self._subdtype)

def __iter__(self):
def __iter__(self, checkiter=True):
if checkiter:
self._checkiter()
self._valid()
if len(self._starts.shape) != 1:
for x in super(JaggedArray, self).__iter__():
for x in super(JaggedArray, self).__iter__(checkiter=checkiter):
yield x.view(self._subdtype)
else:
stops = self._stops
Expand Down
12 changes: 9 additions & 3 deletions awkward/array/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,9 @@ def _valid(self):

self._isvalid = True

def __iter__(self):
def __iter__(self, checkiter=True):
if checkiter:
self._checkiter()
self._valid()

mask = self._mask
Expand Down Expand Up @@ -394,7 +396,9 @@ def _valid(self):

self._isvalid = True

def __iter__(self):
def __iter__(self, checkiter=True):
if checkiter:
self._checkiter()
self._valid()

one = awkward.util.numpy.uint8(1)
Expand Down Expand Up @@ -594,7 +598,9 @@ def _valid(self):

self._isvalid = True

def __iter__(self):
def __iter__(self, checkiter=True):
if checkiter:
self._checkiter()
self._valid()

mask = self._mask
Expand Down
7 changes: 3 additions & 4 deletions awkward/array/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,14 @@ class Methods(object):

@staticmethod
def mixin(methods, awkwardtype):
assert issubclass(methods, Methods)
assert not issubclass(methods, awkward.array.base.AwkwardArray)
assert issubclass(awkwardtype, awkward.array.base.AwkwardArray)
assert not issubclass(awkwardtype, Methods)
return type(awkwardtype.__name__ + "Methods", (methods, awkwardtype), {})

@staticmethod
def maybemixin(sample, awkwardtype):
if issubclass(sample, Methods):
assert issubclass(sample, awkward.array.base.AwkwardArray)
assert issubclass(awkwardtype, awkward.array.base.AwkwardArray)
allbases = tuple(x for x in sample.__bases__ if not issubclass(x, awkward.array.base.AwkwardArray)) + (awkwardtype,)
return type(awkwardtype.__name__ + "Methods", allbases, {})
else:
Expand Down Expand Up @@ -179,7 +176,9 @@ def _getshape(self):
def _valid(self):
pass

def __iter__(self):
def __iter__(self, checkiter=True):
if checkiter:
self._checkiter()
for x in self._content:
yield self.generator(x, *self._args, **self._kwargs)

Expand Down
9 changes: 7 additions & 2 deletions awkward/array/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,9 @@ def __getitem__(self, where):
def __dir__(self):
return ["_" + x for x in self._table._content if x.isnumeric() or x.isidentifier()] + ["tolist"]

def __iter__(self):
def __iter__(self, checkiter=True):
if checkiter:
self._checkiter()
i = 0
while str(i) in self._table._content:
yield self._table._content[str(i)]
Expand Down Expand Up @@ -423,7 +425,10 @@ def _newslice(self, head):
def _valid(self):
pass

def __iter__(self):
def __iter__(self, checkiter=True):
if checkiter:
self._checkiter()

if self._view is None:
length = self._length()
i = 0
Expand Down
4 changes: 3 additions & 1 deletion awkward/array/union.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,9 @@ def _valid(self):

self._isvalid = True

def __iter__(self):
def __iter__(self, checkiter=True):
if checkiter:
self._checkiter()
self._valid()

tags = self._tags
Expand Down
5 changes: 4 additions & 1 deletion awkward/array/virtual.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,10 +327,13 @@ def __del__(self):
except:
pass

def __iter__(self):
def __iter__(self, checkiter=True):
if checkiter:
self._checkiter()
return iter(self.array)

def __array__(self, *args, **kwargs):
self._checktonumpy()
return awkward.util.numpy.array(self.array, *args, **kwargs)

def __getitem__(self, where):
Expand Down
2 changes: 1 addition & 1 deletion awkward/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

import re

__version__ = "0.5.1"
__version__ = "0.5.2"
version = __version__
version_info = tuple(re.split(r"[-\.]", __version__))

Expand Down
12 changes: 7 additions & 5 deletions specification.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,13 @@ Necessary features of the underlying array library are described in this section

The underlying array library must have the following features or some translation thereof.

* It must be possible to represent ordered sequences of primitive values, known as *arrays*. An array is homogeneous: all values in an array have the same type. Primitive types include integers, floating point numbers, and boolean (true/false) values. An awkward-array library inherits the primitives of the underlying library -- if a base array library doesn't support complex numbers (for instance), then neither does its awkward-array extension. Arrays need not be fixed-size, contiguous in memory, with only fixed-bytesize primitives, but these are the simplest cases.
* Arrays may be _N_-dimensional for any positive integer _N_, though only _N = 1_ is required. We will refer to an array's size in all dimensions as its *shape*, a tuple of positive integers whose product is the total number of elements in the array. The *length* of an array is the array's size in its first dimension.
* It must be possible to *extract* an array element by integer index or a tuple of _N_ integer indexes for an _N_-dimensional array. In this document, we will use 0-indexing: `0` extracts the first element, `1` extracts the second, etc. If an index is greater than or equal to _N_, it is out of bounds. An array library with 1-indexing (`1` extracts the first element) would correspondingly have 1-indexing in its awkward-array extensions. We will use a number between square brackets after the array, as in `myarray[5]`, or a comma-separated tuple like `myarray[3, 1, 4]`, to represent extraction.
* The array library may provide a mechanism for counting from the end of an array dimension. In Python, `-1` refers to the last element, `-2` to the second-to-last element, etc. We will use that convention. Given a negative index `i` and a dimension size `d`, the standard index is `i + d`. If the standard index is still negative, the index is out of bounds.
* It must be possible to *slice* an array to get a subsequence of that array along any dimension. A slice is defined by a *start* index (lower bound element number), a *stop* index (upper bound element number), and an optional *step* (stride length, for skipping a regular number of elements between each return element). In this document, we will use inclusive start values (the lower bound is included in the resulting subsequence) and exclusive stop values (the upper bound is not included). A step of `1` is equivalent to no step, and the step must not be `0`, though negative values may be allowed (to reverse order). If either start or stop is not provided, they may be assumed to be `0` and the size of the dimension (respectively) if step is positive, or one less than the size of the dimension and one less than `0` (respectively) if step is negative. If the difference between step and start is not an integer multiple of step, we take that the subsequence to be truncated at the last element before stop. If either start or step are beyond the bounds of the array, we take them to be truncated to the nearest legal values, which may result in an empty subsequence, but not an error. If negative indexes are allowed for element extraction, they may be allowed for slicing as well.
* It must be possible to represent ordered sequences of primitive values, known as *arrays*. An array is homogeneous: all values in an array have the same *dtype*, or primitive data type. Primitive types include integers, floating point numbers, and boolean (true/false) values. An awkward-array library inherits the primitives of the underlying library -- if a base array library doesn't support complex numbers (for instance), then neither does its awkward-array extension. Arrays need not be fixed-size, contiguous in memory, with only fixed-bytesize primitives, but these are the simplest cases.
* It must be possible to construct _N_-dimensional arrays for any positive integer _N_. We will refer to an array's size in all dimensions as its *shape*, a tuple of positive integers whose product is the total number of elements in the array. The *length* of an array is the array's size in its first dimension.
* It must be possible to *extract* an array element by integer index or a tuple of _N_ integer indexes for an _N_-dimensional array. In this document, we will use 0-indexing: `0` extracts the first element, `1` extracts the second, etc. If an index is greater than or equal to _N_, it is out of bounds. An array library with 1-indexing (`1` extracts the first element) would correspondingly have 1-indexing in its awkward-array extensions. We will use a number between square brackets after the array, as in `myarray[5]`, or a comma-separated tuple like `myarray[3, 1, 4]`, to represent extraction.
* The array library may provide a mechanism for counting from the end of an array dimension. In Python, `-1` refers to the last element, `-2` to the second-to-last element, etc. We will use that convention. Given a negative index `i` and a dimension size `d`, the standard index is `i + d`. If the standard index is still negative, the index is out of bounds.
* It must be possible to *slice* an array dimension to get a subsequence of that array along any dimension. A slice is defined by a *start* index (lower bound element number), a *stop* index (upper bound element number), and an optional *step* (stride length, for skipping a regular number of input elements between each returned element). In this document, we will use inclusive start values (the lower bound is included in the resulting subsequence) and exclusive stop values (the upper bound is not included). A step of `1` is equivalent to no step, and the step must not be `0`, though negative values may be allowed (to reverse order). If either start or stop is not provided, they may be assumed to be `0` and the size of the dimension (respectively) if step is positive, or one less than the size of the dimension and one less than `0` (respectively) if step is negative. If the difference between step and start is not an integer multiple of step, we take that the subsequence to be truncated at the last element before stop. If either start or step are beyond the bounds of the array, we take them to be truncated to the nearest legal values, which may result in an empty subsequence, but not an error. If negative indexes are allowed for element extraction, they may be allowed for slicing as well. The Python syntax for this operation is `myarray[start:stop:step]` (in which any `start`, `stop`, or `step` may be missing or `None`, and the second colon may be omitted if there is no `step`). We will use this syntax in this document.
* It must be possible to *mask* elements in an array dimension by a 1-dimensional boolean array of the same size as that array dimension. The result of such an operation is a sequence in the same order containing only the elements for which the matching boolean value is true. The Numpy syntax for this operation is to put the boolean `mymask` array in square brackets after the array: `myarray[mymask]`, but it may be a named function call.
* It must be possible to *gather* elements in an array dimension by a 1-dimensional integer array, mapping . The result of such an operation is a sequence



Expand Down
Loading

0 comments on commit f0c3f7d

Please sign in to comment.