Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support pcodec v0.3 #639

Merged
merged 15 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions docs/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,22 @@ Release notes

.. _unreleased:

Unreleased
----------

Breaking changes
~~~~~~~~~~~~~~~~
* All arguments to the ``PCodec`` constructor except for ``level``
are now keyword only, to support the updated API.
By :user:`Sam Levang <slevang>`, :issue:`623`

Enhancements
~~~~~~~~~~~~
* Add support for ``pcodec`` 0.3. This exposes the new ``delta_spec``
and ``paging_spec`` arguments, but maintains full backwards
compatibility for data written with older package versions.
By :user:`Sam Levang <slevang>`, :issue:`623`

0.14.1
------

Expand Down
9 changes: 9 additions & 0 deletions fixture/pcodec/codec.06/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"delta_encoding_order": 2,
"delta_spec": "auto",
"equal_pages_up_to": 262144,
"id": "pcodec",
"level": 8,
"mode_spec": "auto",
"paging_spec": "equal_pages_up_to"
}
Binary file added fixture/pcodec/codec.06/encoded.00.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.01.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.02.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.03.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.04.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.05.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.06.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.07.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.08.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.09.dat
Binary file not shown.
9 changes: 9 additions & 0 deletions fixture/pcodec/codec.07/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"delta_encoding_order": null,
"delta_spec": "try_lookback",
"equal_pages_up_to": 262144,
"id": "pcodec",
"level": 8,
"mode_spec": "auto",
"paging_spec": "equal_pages_up_to"
}
Binary file added fixture/pcodec/codec.07/encoded.00.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.01.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.02.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.03.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.04.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.05.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.06.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.07.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.08.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.09.dat
Binary file not shown.
9 changes: 9 additions & 0 deletions fixture/pcodec/codec.08/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"delta_encoding_order": null,
"delta_spec": "none",
"equal_pages_up_to": 262144,
"id": "pcodec",
"level": 8,
"mode_spec": "auto",
"paging_spec": "equal_pages_up_to"
}
Binary file added fixture/pcodec/codec.08/encoded.00.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.01.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.02.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.03.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.04.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.05.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.06.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.07.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.08.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.09.dat
Binary file not shown.
9 changes: 9 additions & 0 deletions fixture/pcodec/codec.09/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"delta_encoding_order": 1,
"delta_spec": "try_consecutive",
"equal_pages_up_to": 262144,
"id": "pcodec",
"level": 8,
"mode_spec": "auto",
"paging_spec": "equal_pages_up_to"
}
Binary file added fixture/pcodec/codec.09/encoded.00.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.09/encoded.01.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.09/encoded.02.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.09/encoded.03.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.09/encoded.04.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.09/encoded.05.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.09/encoded.06.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.09/encoded.07.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.09/encoded.08.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.09/encoded.09.dat
Binary file not shown.
75 changes: 57 additions & 18 deletions numcodecs/pcodec.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from numcodecs.compat import ensure_contiguous_ndarray

try:
from pcodec import ChunkConfig, ModeSpec, PagingSpec, standalone
from pcodec import ChunkConfig, DeltaSpec, ModeSpec, PagingSpec, standalone
except ImportError: # pragma: no cover
standalone = None

Expand All @@ -27,14 +27,21 @@
level : int
A compression level from 0-12, where 12 take the longest and compresses
the most.
delta_encoding_order : init or None
Either a delta encoding level from 0-7 or None. If set to None, pcodec
will try to infer the optimal delta encoding order.
mode_spec : {'auto', 'classic'}
mode_spec : {"auto", "classic"}
Configures whether Pcodec should try to infer the best "mode" or
structure of the data (e.g. approximate multiples of 0.1) to improve
compression ratio, or skip this step and just use the numbers as-is
(Classic mode).
(Classic mode). Note that the "try*" specs are not currently supported.
delta_spec : {"auto", "none", "try_consecutive", "try_lookback"}
Configures the delta encoding strategy. By default, uses "auto" which
will try to infer the best encoding order.
paging_spec : {"equal_pages_up_to"}
Configures the paging strategy. Only "equal_pages_up_to" is currently
supported.
delta_encoding_order : int or None
Explicit delta encoding level from 0-7. Only valid if delta_spec is
"try_consecutive" or "auto" (to support backwards compatibility with
older versions of this codec).
equal_pages_up_to : int
Divide the chunk into equal pages of up to this many numbers.
"""
Expand All @@ -44,39 +51,71 @@
def __init__(
self,
level: int = 8,
*,
mode_spec: Literal["auto", "classic"] = "auto",
slevang marked this conversation as resolved.
Show resolved Hide resolved
delta_spec: Literal["auto", "none", "try_consecutive", "try_lookback"] = "auto",
paging_spec: Literal["equal_pages_up_to"] = "equal_pages_up_to",
delta_encoding_order: Optional[int] = None,
equal_pages_up_to: int = 262144,
# TODO one day, add support for the Try* mode specs
mode_spec: Literal['auto', 'classic'] = 'auto',
equal_pages_up_to: int = DEFAULT_MAX_PAGE_N,
):
if standalone is None: # pragma: no cover
raise ImportError("pcodec must be installed to use the PCodec codec.")

# note that we use `level` instead of `compression_level` to
# match other codecs
self.level = level
self.mode_spec = mode_spec
self.delta_spec = delta_spec
self.paging_spec = paging_spec

Check warning on line 69 in numcodecs/pcodec.py

View check run for this annotation

Codecov / codecov/patch

numcodecs/pcodec.py#L67-L69

Added lines #L67 - L69 were not covered by tests
self.delta_encoding_order = delta_encoding_order
self.equal_pages_up_to = equal_pages_up_to
self.mode_spec = mode_spec

def encode(self, buf):
buf = ensure_contiguous_ndarray(buf)

def _get_chunk_config(self):
match self.mode_spec:
case 'auto':
case "auto":

Check warning on line 75 in numcodecs/pcodec.py

View check run for this annotation

Codecov / codecov/patch

numcodecs/pcodec.py#L75

Added line #L75 was not covered by tests
mode_spec = ModeSpec.auto()
case 'classic':
case "classic":

Check warning on line 77 in numcodecs/pcodec.py

View check run for this annotation

Codecov / codecov/patch

numcodecs/pcodec.py#L77

Added line #L77 was not covered by tests
mode_spec = ModeSpec.classic()
case _:
raise ValueError(f"unknown value for mode_spec: {self.mode_spec}")
paging_spec = PagingSpec.equal_pages_up_to(self.equal_pages_up_to)
raise ValueError(f"mode_spec {self.mode_spec} is not supported")

Check warning on line 80 in numcodecs/pcodec.py

View check run for this annotation

Codecov / codecov/patch

numcodecs/pcodec.py#L80

Added line #L80 was not covered by tests

if self.delta_encoding_order is not None and self.delta_spec == "auto":

Check warning on line 82 in numcodecs/pcodec.py

View check run for this annotation

Codecov / codecov/patch

numcodecs/pcodec.py#L82

Added line #L82 was not covered by tests
# backwards compat for before delta_spec was introduced
delta_spec = DeltaSpec.try_consecutive(self.delta_encoding_order)
elif self.delta_encoding_order is not None and self.delta_spec != "try_consecutive":
raise ValueError(

Check warning on line 86 in numcodecs/pcodec.py

View check run for this annotation

Codecov / codecov/patch

numcodecs/pcodec.py#L84-L86

Added lines #L84 - L86 were not covered by tests
"delta_encoding_order can only be set for delta_spec='try_consecutive'"
)
else:
match self.delta_spec:
case "auto":
delta_spec = DeltaSpec.auto()
case "none":
delta_spec = DeltaSpec.none()
case "try_consecutive":
delta_spec = DeltaSpec.try_consecutive(self.delta_encoding_order)
case "try_lookback":
delta_spec = DeltaSpec.try_lookback()
case _:
raise ValueError(f"delta_spec {self.delta_spec} is not supported")

Check warning on line 100 in numcodecs/pcodec.py

View check run for this annotation

Codecov / codecov/patch

numcodecs/pcodec.py#L90-L100

Added lines #L90 - L100 were not covered by tests

match self.paging_spec:
case "equal_pages_up_to":
paging_spec = PagingSpec.equal_pages_up_to(self.equal_pages_up_to)
case _:
raise ValueError(f"paging_spec {self.paging_spec} is not supported")

Check warning on line 106 in numcodecs/pcodec.py

View check run for this annotation

Codecov / codecov/patch

numcodecs/pcodec.py#L102-L106

Added lines #L102 - L106 were not covered by tests

config = ChunkConfig(
compression_level=self.level,
delta_encoding_order=self.delta_encoding_order,
delta_spec=delta_spec,
mode_spec=mode_spec,
paging_spec=paging_spec,
)
return config

Check warning on line 114 in numcodecs/pcodec.py

View check run for this annotation

Codecov / codecov/patch

numcodecs/pcodec.py#L114

Added line #L114 was not covered by tests

def encode(self, buf):
buf = ensure_contiguous_ndarray(buf)
config = self._get_chunk_config()

Check warning on line 118 in numcodecs/pcodec.py

View check run for this annotation

Codecov / codecov/patch

numcodecs/pcodec.py#L117-L118

Added lines #L117 - L118 were not covered by tests
return standalone.simple_compress(buf, config)

def decode(self, buf, out=None):
Expand Down
21 changes: 17 additions & 4 deletions numcodecs/tests/test_pcodec.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,12 @@
PCodec(level=1),
PCodec(level=5),
PCodec(level=9),
PCodec(mode_spec='classic'),
PCodec(mode_spec="classic"),
PCodec(equal_pages_up_to=300),
PCodec(delta_encoding_order=2),
PCodec(delta_spec="try_lookback"),
PCodec(delta_spec="none"),
PCodec(delta_spec="try_consecutive", delta_encoding_order=1),
]


Expand Down Expand Up @@ -56,15 +60,24 @@
check_config(codec)


def test_invalid_config_error():
codec = PCodec(mode_spec='bogus')
@pytest.mark.parametrize("param", ["mode_spec", "delta_spec", "paging_spec"])
def test_invalid_config_error(param):
codec = PCodec(**{param: "bogus"})
with pytest.raises(ValueError):
check_encode_decode_array_to_bytes(arrays[0], codec)

Check warning on line 67 in numcodecs/tests/test_pcodec.py

View check run for this annotation

Codecov / codecov/patch

numcodecs/tests/test_pcodec.py#L63-L67

Added lines #L63 - L67 were not covered by tests


def test_invalid_delta_encoding_combo():
codec = PCodec(delta_encoding_order=2, delta_spec="none")

Check warning on line 71 in numcodecs/tests/test_pcodec.py

View check run for this annotation

Codecov / codecov/patch

numcodecs/tests/test_pcodec.py#L70-L71

Added lines #L70 - L71 were not covered by tests
with pytest.raises(ValueError):
check_encode_decode_array_to_bytes(arrays[0], codec)


def test_repr():
check_repr(
"PCodec(delta_encoding_order=None, equal_pages_up_to=262144, level=3, mode_spec='auto')"
"PCodec(delta_encoding_order=None, delta_spec='auto',"
" equal_pages_up_to=262144, level=3, mode_spec='auto',"
" paging_spec='equal_pages_up_to')"
)


Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ zfpy = [
"numpy<2.0.0",
]
pcodec = [
"pcodec>=0.2,<0.3",
"pcodec>=0.3,<0.4",
]
crc32c = [
"crc32c>=2.7",
Expand Down
Loading