Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support pcodec v0.3 #639

Merged
merged 15 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions fixture/pcodec/codec.06/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"delta_encoding_order": 2,
"delta_spec": null,
"equal_pages_up_to": 262144,
"id": "pcodec",
"level": 8,
"mode_spec": "auto",
"paging_spec": "equal_pages_up_to"
}
Binary file added fixture/pcodec/codec.06/encoded.00.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.01.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.02.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.03.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.04.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.05.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.06.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.07.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.08.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.06/encoded.09.dat
Binary file not shown.
9 changes: 9 additions & 0 deletions fixture/pcodec/codec.07/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"delta_encoding_order": null,
"delta_spec": "try_lookback",
"equal_pages_up_to": 262144,
"id": "pcodec",
"level": 8,
"mode_spec": "auto",
"paging_spec": "equal_pages_up_to"
}
Binary file added fixture/pcodec/codec.07/encoded.00.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.01.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.02.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.03.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.04.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.05.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.06.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.07.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.08.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.07/encoded.09.dat
Binary file not shown.
9 changes: 9 additions & 0 deletions fixture/pcodec/codec.08/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"delta_encoding_order": null,
"delta_spec": "none",
"equal_pages_up_to": 262144,
"id": "pcodec",
"level": 8,
"mode_spec": "auto",
"paging_spec": "equal_pages_up_to"
}
Binary file added fixture/pcodec/codec.08/encoded.00.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.01.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.02.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.03.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.04.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.05.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.06.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.07.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.08.dat
Binary file not shown.
Binary file added fixture/pcodec/codec.08/encoded.09.dat
Binary file not shown.
67 changes: 50 additions & 17 deletions numcodecs/pcodec.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from numcodecs.compat import ensure_contiguous_ndarray

try:
from pcodec import ChunkConfig, ModeSpec, PagingSpec, standalone
from pcodec import ChunkConfig, DeltaSpec, ModeSpec, PagingSpec, standalone
except ImportError: # pragma: no cover
standalone = None

Expand All @@ -27,14 +27,17 @@
level : int
A compression level from 0-12, where 12 take the longest and compresses
the most.
delta_encoding_order : init or None
Either a delta encoding level from 0-7 or None. If set to None, pcodec
will try to infer the optimal delta encoding order.
mode_spec : {'auto', 'classic'}
mode_spec : {"auto", "classic"}
Configures whether Pcodec should try to infer the best "mode" or
structure of the data (e.g. approximate multiples of 0.1) to improve
compression ratio, or skip this step and just use the numbers as-is
(Classic mode).
delta_spec : {"auto", "none", "try_consecutive", "try_lookback"} or None
Configures the delta encoding strategy. By default, uses "auto" which
will try to infer the best encoding order.
delta_encoding_order : int or None
Explicit delta encoding level from 0-7. Only valid if delta_spec is
"try_consecutive" or None.
equal_pages_up_to : int
Divide the chunk into equal pages of up to this many numbers.
"""
Expand All @@ -44,39 +47,69 @@
def __init__(
self,
level: int = 8,
mode_spec: Literal["auto", "classic"] = "auto",
slevang marked this conversation as resolved.
Show resolved Hide resolved
delta_spec: Literal["auto", "none", "try_consecutive", "try_lookback"] | None = None,
slevang marked this conversation as resolved.
Show resolved Hide resolved
paging_spec: Literal["equal_pages_up_to"] = "equal_pages_up_to",
delta_encoding_order: Optional[int] = None,
equal_pages_up_to: int = 262144,
# TODO one day, add support for the Try* mode specs
mode_spec: Literal['auto', 'classic'] = 'auto',
equal_pages_up_to: int = DEFAULT_MAX_PAGE_N,
):
if standalone is None: # pragma: no cover
raise ImportError("pcodec must be installed to use the PCodec codec.")

# note that we use `level` instead of `compression_level` to
# match other codecs
self.level = level
self.mode_spec = mode_spec
self.delta_spec = delta_spec
self.paging_spec = paging_spec

Check warning on line 64 in numcodecs/pcodec.py

View check run for this annotation

Codecov / codecov/patch

numcodecs/pcodec.py#L62-L64

Added lines #L62 - L64 were not covered by tests
self.delta_encoding_order = delta_encoding_order
self.equal_pages_up_to = equal_pages_up_to
self.mode_spec = mode_spec

def encode(self, buf):
buf = ensure_contiguous_ndarray(buf)

def _get_chunk_config(self):
match self.mode_spec:
case 'auto':
case "auto" | None:

Check warning on line 70 in numcodecs/pcodec.py

View check run for this annotation

Codecov / codecov/patch

numcodecs/pcodec.py#L70

Added line #L70 was not covered by tests
slevang marked this conversation as resolved.
Show resolved Hide resolved
mode_spec = ModeSpec.auto()
case 'classic':
case "classic":

Check warning on line 72 in numcodecs/pcodec.py

View check run for this annotation

Codecov / codecov/patch

numcodecs/pcodec.py#L72

Added line #L72 was not covered by tests
mode_spec = ModeSpec.classic()
case _:
raise ValueError(f"unknown value for mode_spec: {self.mode_spec}")
paging_spec = PagingSpec.equal_pages_up_to(self.equal_pages_up_to)
raise ValueError(f"mode_spec {self.mode_spec} is not supported")

Check warning on line 75 in numcodecs/pcodec.py

View check run for this annotation

Codecov / codecov/patch

numcodecs/pcodec.py#L75

Added line #L75 was not covered by tests

if self.delta_encoding_order is not None:

Check warning on line 77 in numcodecs/pcodec.py

View check run for this annotation

Codecov / codecov/patch

numcodecs/pcodec.py#L77

Added line #L77 was not covered by tests
slevang marked this conversation as resolved.
Show resolved Hide resolved
# backwards compat for before delta_spec was introduced
if self.delta_spec in (None, "try_consecutive"):
delta_spec = DeltaSpec.try_consecutive(self.delta_encoding_order)

Check warning on line 80 in numcodecs/pcodec.py

View check run for this annotation

Codecov / codecov/patch

numcodecs/pcodec.py#L79-L80

Added lines #L79 - L80 were not covered by tests
else:
raise ValueError(

Check warning on line 82 in numcodecs/pcodec.py

View check run for this annotation

Codecov / codecov/patch

numcodecs/pcodec.py#L82

Added line #L82 was not covered by tests
"delta_encoding_order can only be set for delta_spec='try_consecutive'"
)
else:
match self.delta_spec:
case "auto" | None:
delta_spec = DeltaSpec.auto()
case "none":
delta_spec = DeltaSpec.none()
case "try_lookback":
delta_spec = DeltaSpec.try_lookback()
case _:
raise ValueError(f"delta_spec {self.delta_spec} is not supported")

Check warning on line 94 in numcodecs/pcodec.py

View check run for this annotation

Codecov / codecov/patch

numcodecs/pcodec.py#L86-L94

Added lines #L86 - L94 were not covered by tests

match self.paging_spec:
case "equal_pages_up_to" | None:
paging_spec = PagingSpec.equal_pages_up_to(self.equal_pages_up_to)
case _:
raise ValueError(f"paging_spec {self.paging_spec} is not supported")

Check warning on line 100 in numcodecs/pcodec.py

View check run for this annotation

Codecov / codecov/patch

numcodecs/pcodec.py#L96-L100

Added lines #L96 - L100 were not covered by tests

config = ChunkConfig(
compression_level=self.level,
delta_encoding_order=self.delta_encoding_order,
delta_spec=delta_spec,
mode_spec=mode_spec,
paging_spec=paging_spec,
)
return config

Check warning on line 108 in numcodecs/pcodec.py

View check run for this annotation

Codecov / codecov/patch

numcodecs/pcodec.py#L108

Added line #L108 was not covered by tests

def encode(self, buf):
buf = ensure_contiguous_ndarray(buf)
config = self._get_chunk_config()

Check warning on line 112 in numcodecs/pcodec.py

View check run for this annotation

Codecov / codecov/patch

numcodecs/pcodec.py#L111-L112

Added lines #L111 - L112 were not covered by tests
return standalone.simple_compress(buf, config)

def decode(self, buf, out=None):
Expand Down
18 changes: 15 additions & 3 deletions numcodecs/tests/test_pcodec.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,11 @@
PCodec(level=1),
PCodec(level=5),
PCodec(level=9),
PCodec(mode_spec='classic'),
PCodec(mode_spec="classic"),
PCodec(equal_pages_up_to=300),
PCodec(delta_encoding_order=2),
PCodec(delta_spec="try_lookback"),
PCodec(delta_spec="none"),
]


Expand Down Expand Up @@ -57,14 +60,23 @@


def test_invalid_config_error():
codec = PCodec(mode_spec='bogus')
for param in ["mode_spec", "delta_spec", "paging_spec"]:
slevang marked this conversation as resolved.
Show resolved Hide resolved
codec = PCodec(**{param: "bogus"})
with pytest.raises(ValueError):
check_encode_decode_array_to_bytes(arrays[0], codec)

Check warning on line 66 in numcodecs/tests/test_pcodec.py

View check run for this annotation

Codecov / codecov/patch

numcodecs/tests/test_pcodec.py#L63-L66

Added lines #L63 - L66 were not covered by tests


def test_invalid_delta_encoding_combo():
codec = PCodec(delta_encoding_order=2, delta_spec="none")

Check warning on line 70 in numcodecs/tests/test_pcodec.py

View check run for this annotation

Codecov / codecov/patch

numcodecs/tests/test_pcodec.py#L69-L70

Added lines #L69 - L70 were not covered by tests
with pytest.raises(ValueError):
check_encode_decode_array_to_bytes(arrays[0], codec)


def test_repr():
check_repr(
"PCodec(delta_encoding_order=None, equal_pages_up_to=262144, level=3, mode_spec='auto')"
"PCodec(delta_encoding_order=None, delta_spec='auto',"
" equal_pages_up_to=262144, level=3, mode_spec='auto',"
" paging_spec='equal_pages_up_to')"
)


Expand Down
2 changes: 1 addition & 1 deletion numcodecs/tests/test_zarr3.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@

@pytest.fixture
def store() -> StorePath:
return StorePath(MemoryStore(mode="w"))
return StorePath(MemoryStore())


ALL_CODECS = [getattr(numcodecs.zarr3, cls_name) for cls_name in numcodecs.zarr3.__all__]
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ zfpy = [
"numpy<2.0.0",
]
pcodec = [
"pcodec>=0.2,<0.3",
"pcodec>=0.3",
slevang marked this conversation as resolved.
Show resolved Hide resolved
]
crc32c = [
"crc32c>=2.7",
Expand Down
Loading