Skip to content

Commit

Permalink
support delta_spec and paging_spec
Browse files Browse the repository at this point in the history
  • Loading branch information
slevang committed Nov 14, 2024
1 parent 29995e3 commit b2c40a8
Show file tree
Hide file tree
Showing 36 changed files with 95 additions and 21 deletions.
9 changes: 9 additions & 0 deletions fixture/pcodec/codec.06/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"delta_encoding_order": 2,
"delta_spec": null,
"equal_pages_up_to": 262144,
"id": "pcodec",
"level": 8,
"mode_spec": "auto",
"paging_spec": "equal_pages_up_to"
}
Binary file added fixture/pcodec/codec.06/encoded.00.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.01.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.02.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.03.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.04.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.05.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.06.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.07.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.08.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.09.dat
Binary file not shown.
9 changes: 9 additions & 0 deletions fixture/pcodec/codec.07/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"delta_encoding_order": null,
"delta_spec": "try_lookback",
"equal_pages_up_to": 262144,
"id": "pcodec",
"level": 8,
"mode_spec": "auto",
"paging_spec": "equal_pages_up_to"
}
Binary file added fixture/pcodec/codec.07/encoded.00.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.01.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.02.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.03.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.04.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.05.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.06.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.07.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.08.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.09.dat
Binary file not shown.
9 changes: 9 additions & 0 deletions fixture/pcodec/codec.08/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"delta_encoding_order": null,
"delta_spec": "none",
"equal_pages_up_to": 262144,
"id": "pcodec",
"level": 8,
"mode_spec": "auto",
"paging_spec": "equal_pages_up_to"
}
Binary file added fixture/pcodec/codec.08/encoded.00.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.01.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.02.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.03.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.04.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.05.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.06.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.07.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.08.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.09.dat
Binary file not shown.
69 changes: 52 additions & 17 deletions numcodecs/pcodec.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from numcodecs.compat import ensure_contiguous_ndarray

try:
from pcodec import ChunkConfig, ModeSpec, PagingSpec, standalone
from pcodec import ChunkConfig, DeltaSpec, ModeSpec, PagingSpec, standalone
except ImportError: # pragma: no cover
standalone = None

Expand All @@ -27,14 +27,17 @@ class PCodec(Codec):
level : int
A compression level from 0-12, where 12 take the longest and compresses
the most.
delta_encoding_order : init or None
Either a delta encoding level from 0-7 or None. If set to None, pcodec
will try to infer the optimal delta encoding order.
mode_spec : {'auto', 'classic'}
mode_spec : {"auto", "classic"}
Configures whether Pcodec should try to infer the best "mode" or
structure of the data (e.g. approximate multiples of 0.1) to improve
compression ratio, or skip this step and just use the numbers as-is
(Classic mode).
delta_spec : {"auto", "none", "try_consecutive", "try_lookback"} or None
Configures the delta encoding strategy. By default, uses "auto" which
will try to infer the best encoding order.
delta_encoding_order : int or None
Explicit delta encoding level from 0-7. Only valid if delta_spec is
"try_consecutive" or None.
equal_pages_up_to : int
Divide the chunk into equal pages of up to this many numbers.
"""
Expand All @@ -44,39 +47,71 @@ class PCodec(Codec):
def __init__(
self,
level: int = 8,
mode_spec: Literal["auto", "classic"] = "auto",
delta_spec: Literal["auto", "none", "try_consecutive", "try_lookback"] | None = None,
paging_spec: Literal["equal_pages_up_to"] = "equal_pages_up_to",
delta_encoding_order: Optional[int] = None,
equal_pages_up_to: int = 262144,
# TODO one day, add support for the Try* mode specs
mode_spec: Literal['auto', 'classic'] = 'auto',
equal_pages_up_to: int = DEFAULT_MAX_PAGE_N,
):
if standalone is None: # pragma: no cover
raise ImportError("pcodec must be installed to use the PCodec codec.")

# note that we use `level` instead of `compression_level` to
# match other codecs
self.level = level
self.mode_spec = mode_spec
self.delta_spec = delta_spec
self.paging_spec = paging_spec
self.delta_encoding_order = delta_encoding_order
self.equal_pages_up_to = equal_pages_up_to
self.mode_spec = mode_spec

def encode(self, buf):
buf = ensure_contiguous_ndarray(buf)

def _get_chunk_config(self):
match self.mode_spec:
case 'auto':
case "auto" | None:
mode_spec = ModeSpec.auto()
case 'classic':
case "classic":
mode_spec = ModeSpec.classic()
case _:
raise ValueError(f"unknown value for mode_spec: {self.mode_spec}")
paging_spec = PagingSpec.equal_pages_up_to(self.equal_pages_up_to)
raise ValueError(f"mode_spec {self.mode_spec} is not supported")

if self.delta_encoding_order is not None:
# backwards compat for before delta_spec was introduced
if self.delta_spec in (None, "try_consecutive"):
delta_spec = DeltaSpec.try_consecutive(self.delta_encoding_order)
else:
raise ValueError(
"delta_encoding_order can only be set for delta_spec='try_consecutive'"
)
else:
match self.delta_spec:
case "auto" | None:
delta_spec = DeltaSpec.auto()
case "none":
delta_spec = DeltaSpec.none()
case "try_consecutive":
delta_spec = DeltaSpec.try_consecutive(self.delta_encoding_order)

Check warning on line 92 in numcodecs/pcodec.py

View check run for this annotation

Codecov / codecov/patch

numcodecs/pcodec.py#L92

Added line #L92 was not covered by tests
case "try_lookback":
delta_spec = DeltaSpec.try_lookback()
case _:
raise ValueError(f"delta_spec {self.delta_spec} is not supported")

match self.paging_spec:
case "equal_pages_up_to" | None:
paging_spec = PagingSpec.equal_pages_up_to(self.equal_pages_up_to)
case _:
raise ValueError(f"paging_spec {self.paging_spec} is not supported")

config = ChunkConfig(
compression_level=self.level,
delta_encoding_order=self.delta_encoding_order,
delta_spec=delta_spec,
mode_spec=mode_spec,
paging_spec=paging_spec,
)
return config

def encode(self, buf):
buf = ensure_contiguous_ndarray(buf)
config = self._get_chunk_config()
return standalone.simple_compress(buf, config)

def decode(self, buf, out=None):
Expand Down
18 changes: 15 additions & 3 deletions numcodecs/tests/test_pcodec.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,11 @@
PCodec(level=1),
PCodec(level=5),
PCodec(level=9),
PCodec(mode_spec='classic'),
PCodec(mode_spec="classic"),
PCodec(equal_pages_up_to=300),
PCodec(delta_encoding_order=2),
PCodec(delta_spec="try_lookback"),
PCodec(delta_spec="none"),
]


Expand Down Expand Up @@ -57,14 +60,23 @@ def test_config():


def test_invalid_config_error():
codec = PCodec(mode_spec='bogus')
for param in ["mode_spec", "delta_spec", "paging_spec"]:
codec = PCodec(**{param: "bogus"})
with pytest.raises(ValueError):
check_encode_decode_array_to_bytes(arrays[0], codec)


def test_invalid_delta_encoding_combo():
codec = PCodec(delta_encoding_order=2, delta_spec="none")
with pytest.raises(ValueError):
check_encode_decode_array_to_bytes(arrays[0], codec)


def test_repr():
check_repr(
"PCodec(delta_encoding_order=None, equal_pages_up_to=262144, level=3, mode_spec='auto')"
"PCodec(delta_encoding_order=None, delta_spec='auto',"
" equal_pages_up_to=262144, level=3, mode_spec='auto',"
" paging_spec='equal_pages_up_to')"
)


Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ zfpy = [
"numpy<2.0.0",
]
pcodec = [
"pcodec>=0.2,<0.3",
"pcodec>=0.3",
]
crc32c = [
"crc32c>=2.7",
Expand Down

0 comments on commit b2c40a8

Please sign in to comment.