Skip to content
This repository has been archived by the owner on Jun 21, 2022. It is now read-only.

Commit

Permalink
Merge pull request #18 from scikit-hep/feature-arrow
Browse files Browse the repository at this point in the history
Feature arrow
  • Loading branch information
jpivarski authored Oct 26, 2018
2 parents d23a327 + 28e4862 commit 1a7bea1
Show file tree
Hide file tree
Showing 19 changed files with 1,523 additions and 244 deletions.
6 changes: 4 additions & 2 deletions awkward/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,13 @@
from awkward.array.union import UnionArray
from awkward.array.virtual import VirtualArray

from awkward.derived.strings import StringArray

from awkward.generate import fromiter

from awkward.persist import serialize, deserialize, save, load, tohdf5, fromhdf5
from awkward.persist import serialize, deserialize, save, load, hdf5

# convenient access to the version number
from awkward.version import __version__

__all__ = ["ChunkedArray", "AppendableArray", "IndexedArray", "ByteIndexedArray", "SparseArray", "JaggedArray", "ByteJaggedArray", "MaskedArray", "BitMaskedArray", "IndexedMaskedArray", "Methods", "ObjectArray", "Table", "UnionArray", "VirtualArray", "fromiter", "serialize", "deserialize", "save", "load", "tohdf5", "fromhdf5", "__version__"]
__all__ = ["ChunkedArray", "AppendableArray", "IndexedArray", "ByteIndexedArray", "SparseArray", "JaggedArray", "ByteJaggedArray", "MaskedArray", "BitMaskedArray", "IndexedMaskedArray", "Methods", "ObjectArray", "Table", "UnionArray", "VirtualArray", "StringArray", "fromiter", "serialize", "deserialize", "save", "load", "hdf5", "__version__"]
8 changes: 8 additions & 0 deletions awkward/array/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@
import awkward.util

class AwkwardArray(awkward.util.NDArrayOperatorsMixin):
"""
AwkwardArray: abstract base class
"""

def __array__(self, *args, **kwargs):
# hitting this function is usually undesirable; uncomment to search for performance bugs
# raise Exception("{0} {1}".format(args, kwargs))
Expand Down Expand Up @@ -187,6 +191,10 @@ def minby(self, function):
return self[function(*args, **kwargs).argmin()]

class AwkwardArrayWithContent(AwkwardArray):
"""
AwkwardArrayWithContent: abstract base class
"""

def __setitem__(self, where, what):
if isinstance(where, awkward.util.string):
self._content[where] = what
Expand Down
30 changes: 18 additions & 12 deletions awkward/array/chunked.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@
import awkward.util

class ChunkedArray(awkward.array.base.AwkwardArray):
"""
ChunkedArray
"""

def __init__(self, chunks, counts=[]):
self.chunks = chunks
self.counts = counts
Expand Down Expand Up @@ -77,14 +81,13 @@ def ones_like(self, **overrides):
mine = self._mine(overrides)
return self.copy([awkward.util.numpy.ones_like(x) if isinstance(x, awkward.util.numpy.ndarray) else x.ones_like(**overrides) for x in self._chunks], counts=list(self._counts), **mine)

def __awkward_persist__(self, ident, fill, **kwargs):
def __awkward_persist__(self, ident, fill, prefix, suffix, schemasuffix, storage, compression, **kwargs):
self.knowcounts()
self._valid()
n = self.__class__.__name__
return {"id": ident,
"call": ["awkward", n],
"args": [{"list": [fill(x, n + ".chunk", **kwargs) for c, x in zip(self._counts, self._chunks) if c > 0]},
fill(awkward.util.numpy.array([c for c in self._counts if c > 0]), n + ".counts", **kwargs)]}
"call": ["awkward", self.__class__.__name__],
"args": [{"list": [fill(x, self.__class__.__name__ + ".chunk", prefix, suffix, schemasuffix, storage, compression, **kwargs) for c, x in zip(self._counts, self._chunks) if c > 0]},
{"json": [int(c) for c in self._counts if c > 0]}]}

@property
def chunks(self):
Expand Down Expand Up @@ -611,6 +614,10 @@ def pandas(self):
raise NotImplementedError

class AppendableArray(ChunkedArray):
"""
AppendableArray
"""

def __init__(self, chunkshape, dtype, chunks=[]):
self.chunkshape = chunkshape
self.dtype = dtype
Expand All @@ -633,10 +640,9 @@ def _mine(self, overrides):
mine["dtype"] = overrides.pop("dtype", self._dtype)
return mine

def __awkward_persist__(self, ident, fill, **kwargs):
def __awkward_persist__(self, ident, fill, prefix, suffix, schemasuffix, storage, compression, **kwargs):
self._valid()
n = self.__class__.__name__


chunks = []
for c, x in zip(self._counts, self._chunks):
if 0 < c < len(x):
Expand All @@ -645,10 +651,10 @@ def __awkward_persist__(self, ident, fill, **kwargs):
chunks.append(x)

return {"id": ident,
"call": ["awkward", n],
"args": [{"tuple": list(self._chunkshape)},
{"call": ["awkward.persist", "json2dtype"], "args": [awkward.persist.dtype2json(self._dtype)]},
{"list": [fill(x, n + ".chunk", **kwargs) for x in chunks]}]}
"call": ["awkward", self.__class__.__name__],
"args": [{"tuple": [{"json": int(x)} for x in self._chunkshape]},
{"dtype": awkward.persist.dtype2json(self._dtype)},
{"list": [fill(x, self.__class__.__name__ + ".chunk", prefix, suffix, schemasuffix, storage, compression, **kwargs) for x in chunks]}]}

@property
def chunkshape(self):
Expand Down
55 changes: 33 additions & 22 deletions awkward/array/indexed.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ def invert(permutation):
return out

class IndexedArray(awkward.array.base.AwkwardArrayWithContent):
"""
IndexedArray
"""

def __init__(self, index, content):
self.index = index
self.content = content
Expand Down Expand Up @@ -87,13 +91,12 @@ def ones_like(self, **overrides):
else:
return self.copy(content=self._content.ones_like(**overrides))

def __awkward_persist__(self, ident, fill, **kwargs):
def __awkward_persist__(self, ident, fill, prefix, suffix, schemasuffix, storage, compression, **kwargs):
self._valid()
n = self.__class__.__name__
return {"id": ident,
"call": ["awkward", n],
"args": [fill(self._index, n + ".index", **kwargs),
fill(self._content, n + ".content", **kwargs)]}
"call": ["awkward", self.__class__.__name__],
"args": [fill(self._index, self.__class__.__name__ + ".index", prefix, suffix, schemasuffix, storage, compression, **kwargs),
fill(self._content, self.__class__.__name__ + ".content", prefix, suffix, schemasuffix, storage, compression, **kwargs)]}

@property
def index(self):
Expand Down Expand Up @@ -205,6 +208,10 @@ def pandas(self):
return self._content[self._index].pandas()

class ByteIndexedArray(IndexedArray):
"""
ByteIndexedArray
"""

def __init__(self, index, content, dtype):
super(ByteIndexedArray, self).__init__(index, content)
self.dtype = dtype
Expand Down Expand Up @@ -258,14 +265,13 @@ def ones_like(self, **overrides):
else:
return self.copy(content=self._content.ones_like(**overrides), **mine)

def __awkward_persist__(self, ident, fill, **kwargs):
def __awkward_persist__(self, ident, fill, prefix, suffix, schemasuffix, storage, compression, **kwargs):
self._valid()
n = self.__class__.__name__
return {"id": ident,
"call": ["awkward", n],
"args": [fill(self._index, n + ".index", **kwargs),
fill(self._content, n + ".content", **kwargs),
{"call": ["awkward.persist", "json2dtype"], "args": [awkward.persist.dtype2json(self._dtype)]}]}
"call": ["awkward", self.__class__.__name__],
"args": [fill(self._index, self.__class__.__name__ + ".index", prefix, suffix, schemasuffix, storage, compression, **kwargs),
fill(self._content, self.__class__.__name__ + ".content", prefix, suffix, schemasuffix, storage, compression, **kwargs),
{"dtype": awkward.persist.dtype2json(self._dtype)}]}

@property
def content(self):
Expand Down Expand Up @@ -365,6 +371,10 @@ def pandas(self):
raise NotImplementedError

class SparseArray(awkward.array.base.AwkwardArrayWithContent):
"""
SparseArray
"""

def __init__(self, length, index, content, default=None):
self.length = length
self.index = index
Expand Down Expand Up @@ -423,22 +433,23 @@ def ones_like(self, **overrides):
else:
return self.copy(content=self._content.ones_like(**overrides), **mine)

def __awkward_persist__(self, ident, fill, **kwargs):
def __awkward_persist__(self, ident, fill, prefix, suffix, schemasuffix, storage, compression, **kwargs):
self._valid()
n = self.__class__.__name__

if self._default is None or isinstance(self._default, (numbers.Real, awkward.util.numpy.integer, awkward.util.numpy.floating)):
default = self._default
elif isinstance(self._default, awkward.util.numpy.ndarray):
default = fill(self._default, n + ".default")
if self._default is None:
default = {"json": self._default}
elif isinstance(self._default, (numbers.Integral, awkward.util.numpy.integer)):
default = {"json": int(self._default)}
elif isinstance(self._default, (numbers.Real, awkward.util.numpy.floating)) and awkward.util.numpy.isfinite(self._default):
default = {"json": float(self._default)}
else:
default = {"call": ["pickle", "loads"], "args": pickle.dumps(self._default)}
default = fill(self._default, self.__class__.__name__ + ".default", prefix, suffix, schemasuffix, storage, compression, **kwargs)

return {"id": ident,
"call": ["awkward", n],
"args": [self._length,
fill(self._index, n + ".index", **kwargs),
fill(self._content, n + ".content", **kwargs),
"call": ["awkward", self.__class__.__name__],
"args": [{"json": int(self._length)},
fill(self._index, self.__class__.__name__ + ".index", prefix, suffix, schemasuffix, storage, compression, **kwargs),
fill(self._content, self.__class__.__name__ + ".content", prefix, suffix, schemasuffix, storage, compression, **kwargs),
default]}

@property
Expand Down
58 changes: 30 additions & 28 deletions awkward/array/jagged.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ def uniques2offsetsparents(uniques):
return offsets, parents

class JaggedArray(awkward.array.base.AwkwardArrayWithContent):
"""
JaggedArray
"""

def __init__(self, starts, stops, content):
self.starts = starts
self.stops = stops
Expand Down Expand Up @@ -234,21 +238,19 @@ def ones_like(self, **overrides):
else:
return self.copy(content=self._content.ones_like(**overrides))

def __awkward_persist__(self, ident, fill, **kwargs):
def __awkward_persist__(self, ident, fill, prefix, suffix, schemasuffix, storage, compression, **kwargs):
self._valid()
n = self.__class__.__name__
if offsetsaliased(self._starts, self._stops) and len(self._starts) > 0 and self._starts[0] == 0:
return {"id": ident,
"call": ["awkward", n, "fromcounts"],
"args": [fill(self.counts, n + ".counts", **kwargs),
fill(self._content, n + ".content", **kwargs)]}

"call": ["awkward", self.__class__.__name__, "fromcounts"],
"args": [fill(self.counts, self.__class__.__name__ + ".counts", prefix, suffix, schemasuffix, storage, compression, **kwargs),
fill(self._content, self.__class__.__name__ + ".content", prefix, suffix, schemasuffix, storage, compression, **kwargs)]}
else:
return {"id": ident,
"call": ["awkward", n],
"args": [fill(self._starts, n + ".starts", **kwargs),
fill(self._stops, n + ".stops", **kwargs),
fill(self._content, n + ".content", **kwargs)]}
"call": ["awkward", self.__class__.__name__],
"args": [fill(self._starts, self.__class__.__name__ + ".starts", prefix, suffix, schemasuffix, storage, compression, **kwargs),
fill(self._stops, self.__class__.__name__ + ".stops", prefix, suffix, schemasuffix, storage, compression, **kwargs),
fill(self._content, self.__class__.__name__ + ".content", prefix, suffix, schemasuffix, storage, compression, **kwargs)]}

@property
def starts(self):
Expand Down Expand Up @@ -663,19 +665,17 @@ def recurse(x):

for i in range(len(inputs)):
if isinstance(inputs[i], JaggedArray):
if good is None:
inputs[i] = inputs[i].content
else:
inputs[i] = inputs[i].content[good]
inputs[i] = inputs[i].flatten()

result = getattr(ufunc, method)(*inputs, **kwargs)

counts = stops - starts
if isinstance(result, tuple):
return tuple(awkward.array.objects.Methods.maybemixin(type(x), JaggedArray)(starts, stops, x) if isinstance(x, (awkward.util.numpy.ndarray, awkward.array.base.AwkwardBase)) else x for x in result)
return tuple(awkward.array.objects.Methods.maybemixin(type(x), JaggedArray).fromcounts(counts, x) if isinstance(x, (awkward.util.numpy.ndarray, awkward.array.base.AwkwardBase)) else x for x in result)
elif method == "at":
return None
else:
return awkward.array.objects.Methods.maybemixin(type(result), JaggedArray)(starts, stops, result)
return awkward.array.objects.Methods.maybemixin(type(result), JaggedArray).fromcounts(counts, result)

@staticmethod
def aligned(*jaggedarrays):
Expand Down Expand Up @@ -1093,6 +1093,10 @@ def pandas(self):
return out

class ByteJaggedArray(JaggedArray):
"""
ByteJaggedArray
"""

def __init__(self, starts, stops, content, subdtype):
super(ByteJaggedArray, self).__init__(starts, stops, content)
self.subdtype = subdtype
Expand Down Expand Up @@ -1145,23 +1149,21 @@ def deepcopy(self, starts=None, stops=None, content=None, subdtype=None):
out.subdtype = subdtype
return out

def __awkward_persist__(self, ident, fill, **kwargs):
def __awkward_persist__(self, ident, fill, prefix, suffix, schemasuffix, storage, compression, **kwargs):
self._valid()
n = self.__class__.__name__
if offsetsaliased(self._starts, self._stops) and len(self._starts) > 0 and self._starts[0] == 0:
return {"id": ident,
"call": ["awkward", n, "fromcounts"],
"args": [fill(self.counts, n + ".counts", **kwargs),
fill(self._content, n + ".content", **kwargs),
{"call": ["awkward.persist", "json2dtype"], "args": [awkward.persist.dtype2json(self._subdtype)]}]}

"call": ["awkward", self.__class__.__name__, "fromcounts"],
"args": [fill(self.counts, self.__class__.__name__ + ".counts", prefix, suffix, schemasuffix, storage, compression, **kwargs),
fill(self._content, self.__class__.__name__ + ".content", prefix, suffix, schemasuffix, storage, compression, **kwargs),
{"dtype": awkward.persist.dtype2json(self._subdtype)}]}
else:
return {"id": ident,
"call": ["awkward", n],
"args": [fill(self._starts, n + ".starts", **kwargs),
fill(self._stops, n + ".stops", **kwargs),
fill(self._content, n + ".content", **kwargs),
{"call": ["awkward.persist", "json2dtype"], "args": [awkward.persist.dtype2json(self._subdtype)]}]}
"call": ["awkward", self.__class__.__name__],
"args": [fill(self._starts, self.__class__.__name__ + ".starts", prefix, suffix, schemasuffix, storage, compression, **kwargs),
fill(self._stops, self.__class__.__name__ + ".stops", prefix, suffix, schemasuffix, storage, compression, **kwargs),
fill(self._content, self.__class__.__name__ + ".content", prefix, suffix, schemasuffix, storage, compression, **kwargs),
{"dtype": awkward.persist.dtype2json(self._subdtype)}]}

@property
def content(self):
Expand Down
Loading

0 comments on commit 1a7bea1

Please sign in to comment.