diff --git a/peakina/readers/json.py b/peakina/readers/json.py index e74a3a83..496a817d 100644 --- a/peakina/readers/json.py +++ b/peakina/readers/json.py @@ -24,10 +24,22 @@ def read_json( path_or_buf: "FilePathOrBuffer", encoding: str = "utf-8", filter: Optional[str] = None, + preview_offset: int = 0, + preview_nrows: Optional[int] = None, *args: Any, **kwargs: Any, ) -> pd.DataFrame: if filter is not None: with open(path_or_buf, encoding=encoding) as f: path_or_buf = transform_with_jq(f.read(), filter) + + # for the preview_nrows and the preview_offset, we're going to convert in to list here + if preview_nrows is not None: + # In case we don't have the native nrows given in kwargs, we're going + # to use the provided preview_nrows + if (nrows := kwargs.get("nrows")) is None: + nrows = preview_nrows + + kwargs["nrows"] = nrows + return pd.read_json(path_or_buf, encoding=encoding, *args, **kwargs) diff --git a/pyproject.toml b/pyproject.toml index 27d18fa1..9669fdaf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "peakina" -version = "0.7.5" +version = "0.7.6" description = "pandas readers on steroids (remote files, glob patterns, cache, etc.)" authors = ["Toucan Toco "] readme = "README.md" diff --git a/tests/test_datasource.py b/tests/test_datasource.py index 3f5069f2..3de7aae9 100644 --- a/tests/test_datasource.py +++ b/tests/test_datasource.py @@ -248,6 +248,14 @@ def test_basic_json(path): df = pd.DataFrame({"@id": [1, 2], "title": ["Keep on dancin'", "Small Talk"]}) assert ds.get_df().equals(df) + jq_filter = '.records .record[] | .["@id"]|=tonumber' + ds = DataSource( + path("fixture.json"), + reader_kwargs={"filter": jq_filter, "lines": True, "preview_nrows": 1}, + ) + df = pd.DataFrame({"@id": [1], "title": ["Keep on dancin'"]}) + assert ds.get_df().equals(df) + def test_basic_parquet(path): """It should open a basic parquet file"""