diff --git a/docs/release.rst b/docs/release.rst index 93843513..68a751d3 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -16,6 +16,13 @@ Release notes Unreleased ---------- +Breaking changes +~~~~~~~~~~~~~~~~ +* All arguments to the ``PCodec`` constructor except for ``level`` + are now keyword only, to support the updated API. + By :user:`Sam Levang `, :issue:`623` + + Fixes ~~~~~ * Fixes issue with ``Delta`` Zarr 3 codec not working with ``astype``. @@ -24,6 +31,10 @@ Fixes Improvements ~~~~~~~~~~~~ +* Add support for ``pcodec`` 0.3. This exposes the new ``delta_spec`` + and ``paging_spec`` arguments, but maintains full backwards + compatibility for data written with older package versions. + By :user:`Sam Levang `, :issue:`623` * If an import error is raised when trying to define a codec that is *not* an optional dependency, it is no longer silently caught. Instead it will be propagated to the user, as this indicates an issue with the installed diff --git a/fixture/pcodec/codec.06/config.json b/fixture/pcodec/codec.06/config.json new file mode 100644 index 00000000..982d0cc2 --- /dev/null +++ b/fixture/pcodec/codec.06/config.json @@ -0,0 +1,9 @@ +{ + "delta_encoding_order": 2, + "delta_spec": "auto", + "equal_pages_up_to": 262144, + "id": "pcodec", + "level": 8, + "mode_spec": "auto", + "paging_spec": "equal_pages_up_to" +} \ No newline at end of file diff --git a/fixture/pcodec/codec.06/encoded.00.dat b/fixture/pcodec/codec.06/encoded.00.dat new file mode 100644 index 00000000..9332e78a Binary files /dev/null and b/fixture/pcodec/codec.06/encoded.00.dat differ diff --git a/fixture/pcodec/codec.06/encoded.01.dat b/fixture/pcodec/codec.06/encoded.01.dat new file mode 100644 index 00000000..e2729048 Binary files /dev/null and b/fixture/pcodec/codec.06/encoded.01.dat differ diff --git a/fixture/pcodec/codec.06/encoded.02.dat b/fixture/pcodec/codec.06/encoded.02.dat new file mode 100644 index 00000000..d74ba3c2 Binary files /dev/null and b/fixture/pcodec/codec.06/encoded.02.dat differ diff --git a/fixture/pcodec/codec.06/encoded.03.dat b/fixture/pcodec/codec.06/encoded.03.dat new file mode 100644 index 00000000..50e2c45f Binary files /dev/null and b/fixture/pcodec/codec.06/encoded.03.dat differ diff --git a/fixture/pcodec/codec.06/encoded.04.dat b/fixture/pcodec/codec.06/encoded.04.dat new file mode 100644 index 00000000..81b86387 Binary files /dev/null and b/fixture/pcodec/codec.06/encoded.04.dat differ diff --git a/fixture/pcodec/codec.06/encoded.05.dat b/fixture/pcodec/codec.06/encoded.05.dat new file mode 100644 index 00000000..6913bc0f Binary files /dev/null and b/fixture/pcodec/codec.06/encoded.05.dat differ diff --git a/fixture/pcodec/codec.06/encoded.06.dat b/fixture/pcodec/codec.06/encoded.06.dat new file mode 100644 index 00000000..a3fdce34 Binary files /dev/null and b/fixture/pcodec/codec.06/encoded.06.dat differ diff --git a/fixture/pcodec/codec.06/encoded.07.dat b/fixture/pcodec/codec.06/encoded.07.dat new file mode 100644 index 00000000..3498aaad Binary files /dev/null and b/fixture/pcodec/codec.06/encoded.07.dat differ diff --git a/fixture/pcodec/codec.06/encoded.08.dat b/fixture/pcodec/codec.06/encoded.08.dat new file mode 100644 index 00000000..c5ab73c5 Binary files /dev/null and b/fixture/pcodec/codec.06/encoded.08.dat differ diff --git a/fixture/pcodec/codec.06/encoded.09.dat b/fixture/pcodec/codec.06/encoded.09.dat new file mode 100644 index 00000000..b8fe13fd Binary files /dev/null and b/fixture/pcodec/codec.06/encoded.09.dat differ diff --git a/fixture/pcodec/codec.07/config.json b/fixture/pcodec/codec.07/config.json new file mode 100644 index 00000000..0dfb6337 --- /dev/null +++ b/fixture/pcodec/codec.07/config.json @@ -0,0 +1,9 @@ +{ + "delta_encoding_order": null, + "delta_spec": "try_lookback", + "equal_pages_up_to": 262144, + "id": "pcodec", + "level": 8, + "mode_spec": "auto", + "paging_spec": "equal_pages_up_to" +} \ No newline at end of file diff --git a/fixture/pcodec/codec.07/encoded.00.dat b/fixture/pcodec/codec.07/encoded.00.dat new file mode 100644 index 00000000..d7f0d767 Binary files /dev/null and b/fixture/pcodec/codec.07/encoded.00.dat differ diff --git a/fixture/pcodec/codec.07/encoded.01.dat b/fixture/pcodec/codec.07/encoded.01.dat new file mode 100644 index 00000000..e41f009d Binary files /dev/null and b/fixture/pcodec/codec.07/encoded.01.dat differ diff --git a/fixture/pcodec/codec.07/encoded.02.dat b/fixture/pcodec/codec.07/encoded.02.dat new file mode 100644 index 00000000..8128ca9f Binary files /dev/null and b/fixture/pcodec/codec.07/encoded.02.dat differ diff --git a/fixture/pcodec/codec.07/encoded.03.dat b/fixture/pcodec/codec.07/encoded.03.dat new file mode 100644 index 00000000..a6134d2f Binary files /dev/null and b/fixture/pcodec/codec.07/encoded.03.dat differ diff --git a/fixture/pcodec/codec.07/encoded.04.dat b/fixture/pcodec/codec.07/encoded.04.dat new file mode 100644 index 00000000..6894c4ae Binary files /dev/null and b/fixture/pcodec/codec.07/encoded.04.dat differ diff --git a/fixture/pcodec/codec.07/encoded.05.dat b/fixture/pcodec/codec.07/encoded.05.dat new file mode 100644 index 00000000..af7a0aea Binary files /dev/null and b/fixture/pcodec/codec.07/encoded.05.dat differ diff --git a/fixture/pcodec/codec.07/encoded.06.dat b/fixture/pcodec/codec.07/encoded.06.dat new file mode 100644 index 00000000..9879e7b7 Binary files /dev/null and b/fixture/pcodec/codec.07/encoded.06.dat differ diff --git a/fixture/pcodec/codec.07/encoded.07.dat b/fixture/pcodec/codec.07/encoded.07.dat new file mode 100644 index 00000000..8c939467 Binary files /dev/null and b/fixture/pcodec/codec.07/encoded.07.dat differ diff --git a/fixture/pcodec/codec.07/encoded.08.dat b/fixture/pcodec/codec.07/encoded.08.dat new file mode 100644 index 00000000..e7057235 Binary files /dev/null and b/fixture/pcodec/codec.07/encoded.08.dat differ diff --git a/fixture/pcodec/codec.07/encoded.09.dat b/fixture/pcodec/codec.07/encoded.09.dat new file mode 100644 index 00000000..e0220dbf Binary files /dev/null and b/fixture/pcodec/codec.07/encoded.09.dat differ diff --git a/fixture/pcodec/codec.08/config.json b/fixture/pcodec/codec.08/config.json new file mode 100644 index 00000000..b89edcab --- /dev/null +++ b/fixture/pcodec/codec.08/config.json @@ -0,0 +1,9 @@ +{ + "delta_encoding_order": null, + "delta_spec": "none", + "equal_pages_up_to": 262144, + "id": "pcodec", + "level": 8, + "mode_spec": "auto", + "paging_spec": "equal_pages_up_to" +} \ No newline at end of file diff --git a/fixture/pcodec/codec.08/encoded.00.dat b/fixture/pcodec/codec.08/encoded.00.dat new file mode 100644 index 00000000..084e59b6 Binary files /dev/null and b/fixture/pcodec/codec.08/encoded.00.dat differ diff --git a/fixture/pcodec/codec.08/encoded.01.dat b/fixture/pcodec/codec.08/encoded.01.dat new file mode 100644 index 00000000..6d8057e6 Binary files /dev/null and b/fixture/pcodec/codec.08/encoded.01.dat differ diff --git a/fixture/pcodec/codec.08/encoded.02.dat b/fixture/pcodec/codec.08/encoded.02.dat new file mode 100644 index 00000000..531d801c Binary files /dev/null and b/fixture/pcodec/codec.08/encoded.02.dat differ diff --git a/fixture/pcodec/codec.08/encoded.03.dat b/fixture/pcodec/codec.08/encoded.03.dat new file mode 100644 index 00000000..270e94c3 Binary files /dev/null and b/fixture/pcodec/codec.08/encoded.03.dat differ diff --git a/fixture/pcodec/codec.08/encoded.04.dat b/fixture/pcodec/codec.08/encoded.04.dat new file mode 100644 index 00000000..38157b56 Binary files /dev/null and b/fixture/pcodec/codec.08/encoded.04.dat differ diff --git a/fixture/pcodec/codec.08/encoded.05.dat b/fixture/pcodec/codec.08/encoded.05.dat new file mode 100644 index 00000000..748ef1cf Binary files /dev/null and b/fixture/pcodec/codec.08/encoded.05.dat differ diff --git a/fixture/pcodec/codec.08/encoded.06.dat b/fixture/pcodec/codec.08/encoded.06.dat new file mode 100644 index 00000000..0f027398 Binary files /dev/null and b/fixture/pcodec/codec.08/encoded.06.dat differ diff --git a/fixture/pcodec/codec.08/encoded.07.dat b/fixture/pcodec/codec.08/encoded.07.dat new file mode 100644 index 00000000..bfcfc521 Binary files /dev/null and b/fixture/pcodec/codec.08/encoded.07.dat differ diff --git a/fixture/pcodec/codec.08/encoded.08.dat b/fixture/pcodec/codec.08/encoded.08.dat new file mode 100644 index 00000000..0900a9d6 Binary files /dev/null and b/fixture/pcodec/codec.08/encoded.08.dat differ diff --git a/fixture/pcodec/codec.08/encoded.09.dat b/fixture/pcodec/codec.08/encoded.09.dat new file mode 100644 index 00000000..076ba5ad Binary files /dev/null and b/fixture/pcodec/codec.08/encoded.09.dat differ diff --git a/fixture/pcodec/codec.09/config.json b/fixture/pcodec/codec.09/config.json new file mode 100644 index 00000000..39528178 --- /dev/null +++ b/fixture/pcodec/codec.09/config.json @@ -0,0 +1,9 @@ +{ + "delta_encoding_order": 1, + "delta_spec": "try_consecutive", + "equal_pages_up_to": 262144, + "id": "pcodec", + "level": 8, + "mode_spec": "auto", + "paging_spec": "equal_pages_up_to" +} \ No newline at end of file diff --git a/fixture/pcodec/codec.09/encoded.00.dat b/fixture/pcodec/codec.09/encoded.00.dat new file mode 100644 index 00000000..bf36ef64 Binary files /dev/null and b/fixture/pcodec/codec.09/encoded.00.dat differ diff --git a/fixture/pcodec/codec.09/encoded.01.dat b/fixture/pcodec/codec.09/encoded.01.dat new file mode 100644 index 00000000..6787f61b Binary files /dev/null and b/fixture/pcodec/codec.09/encoded.01.dat differ diff --git a/fixture/pcodec/codec.09/encoded.02.dat b/fixture/pcodec/codec.09/encoded.02.dat new file mode 100644 index 00000000..2871fe28 Binary files /dev/null and b/fixture/pcodec/codec.09/encoded.02.dat differ diff --git a/fixture/pcodec/codec.09/encoded.03.dat b/fixture/pcodec/codec.09/encoded.03.dat new file mode 100644 index 00000000..96d8d596 Binary files /dev/null and b/fixture/pcodec/codec.09/encoded.03.dat differ diff --git a/fixture/pcodec/codec.09/encoded.04.dat b/fixture/pcodec/codec.09/encoded.04.dat new file mode 100644 index 00000000..f32dd110 Binary files /dev/null and b/fixture/pcodec/codec.09/encoded.04.dat differ diff --git a/fixture/pcodec/codec.09/encoded.05.dat b/fixture/pcodec/codec.09/encoded.05.dat new file mode 100644 index 00000000..548f11ef Binary files /dev/null and b/fixture/pcodec/codec.09/encoded.05.dat differ diff --git a/fixture/pcodec/codec.09/encoded.06.dat b/fixture/pcodec/codec.09/encoded.06.dat new file mode 100644 index 00000000..2bda9c38 Binary files /dev/null and b/fixture/pcodec/codec.09/encoded.06.dat differ diff --git a/fixture/pcodec/codec.09/encoded.07.dat b/fixture/pcodec/codec.09/encoded.07.dat new file mode 100644 index 00000000..b8f0469e Binary files /dev/null and b/fixture/pcodec/codec.09/encoded.07.dat differ diff --git a/fixture/pcodec/codec.09/encoded.08.dat b/fixture/pcodec/codec.09/encoded.08.dat new file mode 100644 index 00000000..fd00ab9d Binary files /dev/null and b/fixture/pcodec/codec.09/encoded.08.dat differ diff --git a/fixture/pcodec/codec.09/encoded.09.dat b/fixture/pcodec/codec.09/encoded.09.dat new file mode 100644 index 00000000..90d76bec Binary files /dev/null and b/fixture/pcodec/codec.09/encoded.09.dat differ diff --git a/numcodecs/pcodec.py b/numcodecs/pcodec.py index ceb012f0..1739ff0b 100644 --- a/numcodecs/pcodec.py +++ b/numcodecs/pcodec.py @@ -4,7 +4,7 @@ from numcodecs.compat import ensure_contiguous_ndarray try: - from pcodec import ChunkConfig, ModeSpec, PagingSpec, standalone + from pcodec import ChunkConfig, DeltaSpec, ModeSpec, PagingSpec, standalone except ImportError: # pragma: no cover standalone = None @@ -27,14 +27,21 @@ class PCodec(Codec): level : int A compression level from 0-12, where 12 take the longest and compresses the most. - delta_encoding_order : init or None - Either a delta encoding level from 0-7 or None. If set to None, pcodec - will try to infer the optimal delta encoding order. - mode_spec : {'auto', 'classic'} + mode_spec : {"auto", "classic"} Configures whether Pcodec should try to infer the best "mode" or structure of the data (e.g. approximate multiples of 0.1) to improve compression ratio, or skip this step and just use the numbers as-is - (Classic mode). + (Classic mode). Note that the "try*" specs are not currently supported. + delta_spec : {"auto", "none", "try_consecutive", "try_lookback"} + Configures the delta encoding strategy. By default, uses "auto" which + will try to infer the best encoding order. + paging_spec : {"equal_pages_up_to"} + Configures the paging strategy. Only "equal_pages_up_to" is currently + supported. + delta_encoding_order : int or None + Explicit delta encoding level from 0-7. Only valid if delta_spec is + "try_consecutive" or "auto" (to support backwards compatibility with + older versions of this codec). equal_pages_up_to : int Divide the chunk into equal pages of up to this many numbers. """ @@ -44,10 +51,12 @@ class PCodec(Codec): def __init__( self, level: int = 8, + *, + mode_spec: Literal["auto", "classic"] = "auto", + delta_spec: Literal["auto", "none", "try_consecutive", "try_lookback"] = "auto", + paging_spec: Literal["equal_pages_up_to"] = "equal_pages_up_to", delta_encoding_order: Optional[int] = None, - equal_pages_up_to: int = 262144, - # TODO one day, add support for the Try* mode specs - mode_spec: Literal['auto', 'classic'] = 'auto', + equal_pages_up_to: int = DEFAULT_MAX_PAGE_N, ): if standalone is None: # pragma: no cover raise ImportError("pcodec must be installed to use the PCodec codec.") @@ -55,28 +64,58 @@ def __init__( # note that we use `level` instead of `compression_level` to # match other codecs self.level = level + self.mode_spec = mode_spec + self.delta_spec = delta_spec + self.paging_spec = paging_spec self.delta_encoding_order = delta_encoding_order self.equal_pages_up_to = equal_pages_up_to - self.mode_spec = mode_spec - - def encode(self, buf): - buf = ensure_contiguous_ndarray(buf) + def _get_chunk_config(self): match self.mode_spec: - case 'auto': + case "auto": mode_spec = ModeSpec.auto() - case 'classic': + case "classic": mode_spec = ModeSpec.classic() case _: - raise ValueError(f"unknown value for mode_spec: {self.mode_spec}") - paging_spec = PagingSpec.equal_pages_up_to(self.equal_pages_up_to) + raise ValueError(f"mode_spec {self.mode_spec} is not supported") + + if self.delta_encoding_order is not None and self.delta_spec == "auto": + # backwards compat for before delta_spec was introduced + delta_spec = DeltaSpec.try_consecutive(self.delta_encoding_order) + elif self.delta_encoding_order is not None and self.delta_spec != "try_consecutive": + raise ValueError( + "delta_encoding_order can only be set for delta_spec='try_consecutive'" + ) + else: + match self.delta_spec: + case "auto": + delta_spec = DeltaSpec.auto() + case "none": + delta_spec = DeltaSpec.none() + case "try_consecutive": + delta_spec = DeltaSpec.try_consecutive(self.delta_encoding_order) + case "try_lookback": + delta_spec = DeltaSpec.try_lookback() + case _: + raise ValueError(f"delta_spec {self.delta_spec} is not supported") + + match self.paging_spec: + case "equal_pages_up_to": + paging_spec = PagingSpec.equal_pages_up_to(self.equal_pages_up_to) + case _: + raise ValueError(f"paging_spec {self.paging_spec} is not supported") config = ChunkConfig( compression_level=self.level, - delta_encoding_order=self.delta_encoding_order, + delta_spec=delta_spec, mode_spec=mode_spec, paging_spec=paging_spec, ) + return config + + def encode(self, buf): + buf = ensure_contiguous_ndarray(buf) + config = self._get_chunk_config() return standalone.simple_compress(buf, config) def decode(self, buf, out=None): diff --git a/numcodecs/tests/test_pcodec.py b/numcodecs/tests/test_pcodec.py index c10549bd..63a2fb51 100644 --- a/numcodecs/tests/test_pcodec.py +++ b/numcodecs/tests/test_pcodec.py @@ -23,8 +23,12 @@ PCodec(level=1), PCodec(level=5), PCodec(level=9), - PCodec(mode_spec='classic'), + PCodec(mode_spec="classic"), PCodec(equal_pages_up_to=300), + PCodec(delta_encoding_order=2), + PCodec(delta_spec="try_lookback"), + PCodec(delta_spec="none"), + PCodec(delta_spec="try_consecutive", delta_encoding_order=1), ] @@ -56,15 +60,24 @@ def test_config(): check_config(codec) -def test_invalid_config_error(): - codec = PCodec(mode_spec='bogus') +@pytest.mark.parametrize("param", ["mode_spec", "delta_spec", "paging_spec"]) +def test_invalid_config_error(param): + codec = PCodec(**{param: "bogus"}) + with pytest.raises(ValueError): + check_encode_decode_array_to_bytes(arrays[0], codec) + + +def test_invalid_delta_encoding_combo(): + codec = PCodec(delta_encoding_order=2, delta_spec="none") with pytest.raises(ValueError): check_encode_decode_array_to_bytes(arrays[0], codec) def test_repr(): check_repr( - "PCodec(delta_encoding_order=None, equal_pages_up_to=262144, level=3, mode_spec='auto')" + "PCodec(delta_encoding_order=None, delta_spec='auto'," + " equal_pages_up_to=262144, level=3, mode_spec='auto'," + " paging_spec='equal_pages_up_to')" ) diff --git a/pyproject.toml b/pyproject.toml index 928d901b..52ab34c3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,7 +67,7 @@ zfpy = [ "numpy<2.0.0", ] pcodec = [ - "pcodec>=0.2,<0.3", + "pcodec>=0.3,<0.4", ] crc32c = [ "crc32c>=2.7",