diff --git a/dlt/sources/helpers/rest_client/client.py b/dlt/sources/helpers/rest_client/client.py
index 73ae064299..c05dabc30c 100644
--- a/dlt/sources/helpers/rest_client/client.py
+++ b/dlt/sources/helpers/rest_client/client.py
@@ -225,7 +225,7 @@ def raise_for_status(response: Response, *args: Any, **kwargs: Any) -> None:
if paginator is None:
paginator = self.detect_paginator(response, data)
- paginator.update_state(response)
+ paginator.update_state(response, data)
paginator.update_request(request)
# yield data with context
diff --git a/dlt/sources/helpers/rest_client/paginators.py b/dlt/sources/helpers/rest_client/paginators.py
index 4c8ce70bb2..632c93d0c7 100644
--- a/dlt/sources/helpers/rest_client/paginators.py
+++ b/dlt/sources/helpers/rest_client/paginators.py
@@ -1,6 +1,6 @@
import warnings
from abc import ABC, abstractmethod
-from typing import Optional, Dict, Any
+from typing import Any, Dict, List, Optional
from urllib.parse import urlparse, urljoin
from requests import Response, Request
@@ -39,7 +39,7 @@ def init_request(self, request: Request) -> None: # noqa: B027, optional overri
pass
@abstractmethod
- def update_state(self, response: Response) -> None:
+ def update_state(self, response: Response, data: Optional[List[Any]] = None) -> None:
"""Updates the paginator's state based on the response from the API.
This method should extract necessary pagination details (like next page
@@ -73,7 +73,7 @@ def __str__(self) -> str:
class SinglePagePaginator(BasePaginator):
"""A paginator for single-page API responses."""
- def update_state(self, response: Response) -> None:
+ def update_state(self, response: Response, data: Optional[List[Any]] = None) -> None:
self._has_next_page = False
def update_request(self, request: Request) -> None:
@@ -96,6 +96,7 @@ def __init__(
maximum_value: Optional[int] = None,
total_path: Optional[jsonpath.TJsonPath] = None,
error_message_items: str = "items",
+ stop_after_empty_page: Optional[bool] = True,
):
"""
Args:
@@ -116,10 +117,14 @@ def __init__(
If not provided, `maximum_value` must be specified.
error_message_items (str): The name of the items in the error message.
Defaults to 'items'.
+ stop_after_empty_page (bool): Whether pagination should stop when
+ a page contains no result items. Defaults to `True`.
"""
super().__init__()
- if total_path is None and maximum_value is None:
- raise ValueError("Either `total_path` or `maximum_value` must be provided.")
+ if total_path is None and maximum_value is None and not stop_after_empty_page:
+ raise ValueError(
+ "Either `total_path` or `maximum_value` or `stop_after_empty_page` must be provided."
+ )
self.param_name = param_name
self.current_value = initial_value
self.value_step = value_step
@@ -127,6 +132,7 @@ def __init__(
self.maximum_value = maximum_value
self.total_path = jsonpath.compile_path(total_path) if total_path else None
self.error_message_items = error_message_items
+ self.stop_after_empty_page = stop_after_empty_page
def init_request(self, request: Request) -> None:
if request.params is None:
@@ -134,26 +140,32 @@ def init_request(self, request: Request) -> None:
request.params[self.param_name] = self.current_value
- def update_state(self, response: Response) -> None:
- total = None
- if self.total_path:
- response_json = response.json()
- values = jsonpath.find_values(self.total_path, response_json)
- total = values[0] if values else None
- if total is None:
- self._handle_missing_total(response_json)
-
- try:
- total = int(total)
- except ValueError:
- self._handle_invalid_total(total)
-
- self.current_value += self.value_step
-
- if (total is not None and self.current_value >= total + self.base_index) or (
- self.maximum_value is not None and self.current_value >= self.maximum_value
- ):
+ def update_state(self, response: Response, data: Optional[List[Any]] = None) -> None:
+ if self._stop_after_this_page(data):
self._has_next_page = False
+ else:
+ total = None
+ if self.total_path:
+ response_json = response.json()
+ values = jsonpath.find_values(self.total_path, response_json)
+ total = values[0] if values else None
+ if total is None:
+ self._handle_missing_total(response_json)
+
+ try:
+ total = int(total)
+ except ValueError:
+ self._handle_invalid_total(total)
+
+ self.current_value += self.value_step
+
+ if (total is not None and self.current_value >= total + self.base_index) or (
+ self.maximum_value is not None and self.current_value >= self.maximum_value
+ ):
+ self._has_next_page = False
+
+ def _stop_after_this_page(self, data: Optional[List[Any]]=None) -> bool:
+ return self.stop_after_empty_page and not data
def _handle_missing_total(self, response_json: Dict[str, Any]) -> None:
raise ValueError(
@@ -229,6 +241,7 @@ def __init__(
page_param: str = "page",
total_path: jsonpath.TJsonPath = "total",
maximum_page: Optional[int] = None,
+ stop_after_empty_page: Optional[bool] = True,
):
"""
Args:
@@ -246,9 +259,13 @@ def __init__(
will stop once this page is reached or exceeded, even if more
data is available. This allows you to limit the maximum number
of pages for pagination. Defaults to None.
+ stop_after_empty_page (bool): Whether pagination should stop when
+ a page contains no result items. Defaults to `True`.
"""
- if total_path is None and maximum_page is None:
- raise ValueError("Either `total_path` or `maximum_page` must be provided.")
+ if total_path is None and maximum_page is None and not stop_after_empty_page:
+ raise ValueError(
+ "Either `total_path` or `maximum_page` or `stop_after_empty_page` must be provided."
+ )
page = page if page is not None else base_page
@@ -260,6 +277,7 @@ def __init__(
value_step=1,
maximum_value=maximum_page,
error_message_items="pages",
+ stop_after_empty_page=stop_after_empty_page,
)
def __str__(self) -> str:
@@ -330,6 +348,7 @@ def __init__(
limit_param: str = "limit",
total_path: jsonpath.TJsonPath = "total",
maximum_offset: Optional[int] = None,
+ stop_after_empty_page: Optional[bool] = True,
) -> None:
"""
Args:
@@ -347,15 +366,20 @@ def __init__(
pagination will stop once this offset is reached or exceeded,
even if more data is available. This allows you to limit the
maximum range for pagination. Defaults to None.
+ stop_after_empty_page (bool): Whether pagination should stop when
+ a page contains no result items. Defaults to `True`.
"""
- if total_path is None and maximum_offset is None:
- raise ValueError("Either `total_path` or `maximum_offset` must be provided.")
+ if total_path is None and maximum_offset is None and not stop_after_empty_page:
+ raise ValueError(
+ "Either `total_path` or `maximum_offset` or `stop_after_empty_page` must be provided."
+ )
super().__init__(
param_name=offset_param,
initial_value=offset,
total_path=total_path,
value_step=limit,
maximum_value=maximum_offset,
+ stop_after_empty_page=stop_after_empty_page,
)
self.limit_param = limit_param
self.limit = limit
@@ -484,7 +508,7 @@ def __init__(self, links_next_key: str = "next") -> None:
super().__init__()
self.links_next_key = links_next_key
- def update_state(self, response: Response) -> None:
+ def update_state(self, response: Response, data: Optional[List[Any]] = None) -> None:
"""Extracts the next page URL from the 'Link' header in the response."""
self._next_reference = response.links.get(self.links_next_key, {}).get("url")
@@ -539,7 +563,7 @@ def __init__(
super().__init__()
self.next_url_path = jsonpath.compile_path(next_url_path)
- def update_state(self, response: Response) -> None:
+ def update_state(self, response: Response, data: Optional[List[Any]] = None) -> None:
"""Extracts the next page URL from the JSON response."""
values = jsonpath.find_values(self.next_url_path, response.json())
self._next_reference = values[0] if values else None
@@ -618,7 +642,7 @@ def __init__(
self.cursor_path = jsonpath.compile_path(cursor_path)
self.cursor_param = cursor_param
- def update_state(self, response: Response) -> None:
+ def update_state(self, response: Response, data: Optional[List[Any]] = None) -> None:
"""Extracts the cursor value from the JSON response."""
values = jsonpath.find_values(self.cursor_path, response.json())
self._next_reference = values[0] if values else None
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/rest_api.md b/docs/website/docs/dlt-ecosystem/verified-sources/rest_api.md
index 4b72b3276e..b4d2d08daa 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/rest_api.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/rest_api.md
@@ -371,7 +371,7 @@ You can configure the pagination for the `posts` resource like this:
{
"path": "posts",
"paginator": {
- "type": "json_response",
+ "type": "json_link",
"next_url_path": "pagination.next",
}
}
@@ -380,7 +380,7 @@ You can configure the pagination for the `posts` resource like this:
Alternatively, you can use the paginator instance directly:
```py
-from dlt.sources.helpers.rest_client.paginators import JSONResponsePaginator
+from dlt.sources.helpers.rest_client.paginators import JSONLinkPaginator
# ...
@@ -402,8 +402,8 @@ These are the available paginators:
| ------------ | -------------- | ----------- |
| `json_link` | [JSONLinkPaginator](../../general-usage/http/rest-client.md#jsonresponsepaginator) | The link to the next page is in the body (JSON) of the response.
*Parameters:*
- `next_url_path` (str) - the JSONPath to the next page URL
|
| `header_link` | [HeaderLinkPaginator](../../general-usage/http/rest-client.md#headerlinkpaginator) | The links to the next page are in the response headers.
*Parameters:*- `link_header` (str) - the name of the header containing the links. Default is "next".
|
-| `offset` | [OffsetPaginator](../../general-usage/http/rest-client.md#offsetpaginator) | The pagination is based on an offset parameter. With total items count either in the response body or explicitly provided.
*Parameters:*- `limit` (int) - the maximum number of items to retrieve in each request
- `offset` (int) - the initial offset for the first request. Defaults to `0`
- `offset_param` (str) - the name of the query parameter used to specify the offset. Defaults to "offset"
- `limit_param` (str) - the name of the query parameter used to specify the limit. Defaults to "limit"
- `total_path` (str) - a JSONPath expression for the total number of items. If not provided, pagination is controlled by `maximum_offset`
- `maximum_offset` (int) - optional maximum offset value. Limits pagination even without total count
|
-| `page_number` | [PageNumberPaginator](../../general-usage/http/rest-client.md#pagenumberpaginator) | The pagination is based on a page number parameter. With total pages count either in the response body or explicitly provided.
*Parameters:*- `base_page` (int) - the starting page number. Defaults to `0`
- `page_param` (str) - the query parameter name for the page number. Defaults to "page"
- `total_path` (str) - a JSONPath expression for the total number of pages. If not provided, pagination is controlled by `maximum_page`
- `maximum_page` (int) - optional maximum page number. Stops pagination once this page is reached
|
+| `offset` | [OffsetPaginator](../../general-usage/http/rest-client.md#offsetpaginator) | The pagination is based on an offset parameter. With total items count either in the response body or explicitly provided.
*Parameters:*- `limit` (int) - the maximum number of items to retrieve in each request
- `offset` (int) - the initial offset for the first request. Defaults to `0`
- `offset_param` (str) - the name of the query parameter used to specify the offset. Defaults to "offset"
- `limit_param` (str) - the name of the query parameter used to specify the limit. Defaults to "limit"
- `total_path` (str) - a JSONPath expression for the total number of items. If not provided, pagination is controlled by `maximum_offset` and `stop_after_empty_page`
- `maximum_offset` (int) - optional maximum offset value. Limits pagination even without total count
- `stop_after_empty_page` (bool) - Whether pagination should stop when a page contains no result items. Defaults to `True`
|
+| `page_number` | [PageNumberPaginator](../../general-usage/http/rest-client.md#pagenumberpaginator) | The pagination is based on a page number parameter. With total pages count either in the response body or explicitly provided.
*Parameters:*- `base_page` (int) - the starting page number. Defaults to `0`
- `page_param` (str) - the query parameter name for the page number. Defaults to "page"
- `total_path` (str) - a JSONPath expression for the total number of pages. If not provided, pagination is controlled by `maximum_page` and `stop_after_empty_page`
- `maximum_page` (int) - optional maximum page number. Stops pagination once this page is reached
- `stop_after_empty_page` (bool) - Whether pagination should stop when a page contains no result items. Defaults to `True`
|
| `cursor` | [JSONResponseCursorPaginator](../../general-usage/http/rest-client.md#jsonresponsecursorpaginator) | The pagination is based on a cursor parameter. The value of the cursor is in the response body (JSON).
*Parameters:*- `cursor_path` (str) - the JSONPath to the cursor value. Defaults to "cursors.next"
- `cursor_param` (str) - the query parameter name for the cursor. Defaults to "after"
|
| `single_page` | SinglePagePaginator | The response will be interpreted as a single-page response, ignoring possible pagination metadata. |
| `auto` | `None` | Explicitly specify that the source should automatically detect the pagination method. |
diff --git a/docs/website/docs/general-usage/http/rest-client.md b/docs/website/docs/general-usage/http/rest-client.md
index ddd66a233b..40c83f8c5b 100644
--- a/docs/website/docs/general-usage/http/rest-client.md
+++ b/docs/website/docs/general-usage/http/rest-client.md
@@ -183,8 +183,9 @@ need to specify the paginator when the API uses a different relation type.
- `offset`: The initial offset for the first request. Defaults to `0`.
- `offset_param`: The name of the query parameter used to specify the offset. Defaults to `"offset"`.
- `limit_param`: The name of the query parameter used to specify the limit. Defaults to `"limit"`.
-- `total_path`: A JSONPath expression for the total number of items. If not provided, pagination is controlled by `maximum_offset`.
+- `total_path`: A JSONPath expression for the total number of items. If not provided, pagination is controlled by `maximum_offset` and `stop_after_empty_page`.
- `maximum_offset`: Optional maximum offset value. Limits pagination even without total count.
+- `stop_after_empty_page`: Whether pagination should stop when a page contains no result items. Defaults to `True`.
**Example:**
@@ -198,7 +199,7 @@ E.g. `https://api.example.com/items?offset=0&limit=100`, `https://api.example.co
}
```
-You can paginate through responses from this API using `OffsetPaginator`:
+You can paginate through responses from this API using the `OffsetPaginator`:
```py
client = RESTClient(
@@ -210,20 +211,34 @@ client = RESTClient(
)
```
-In a different scenario where the API does not provide the total count, you can use `maximum_offset` to limit the pagination:
+Pagination stops by default when a page contains no records. This is especially useful when the API does not provide the total item count.
+Here, the `total_path` parameter is set to `None` because the API does not provide the total count.
```py
client = RESTClient(
base_url="https://api.example.com",
paginator=OffsetPaginator(
limit=100,
- maximum_offset=1000,
- total_path=None
+ total_path=None,
)
)
```
-Note, that in this case, the `total_path` parameter is set explicitly to `None` to indicate that the API does not provide the total count.
+Additionally, you can limit pagination with `maximum_offset`, for example during development. If `maximum_offset` is reached before the first empty page then pagination stops:
+
+```py
+client = RESTClient(
+ base_url="https://api.example.com",
+ paginator=OffsetPaginator(
+ limit=10,
+ maximum_offset=20, # limits response to 20 records
+ total_path=None,
+ )
+)
+```
+
+You can disable automatic stoppage of pagination by setting `stop_after_stop_after_empty_page = False`. In this case, you must provide either `total_path` or `maximum_offset` to guarantee that the paginator terminates.
+
#### PageNumberPaginator
@@ -234,8 +249,9 @@ Note, that in this case, the `total_path` parameter is set explicitly to `None`
- `base_page`: The index of the initial page from the API perspective. Normally, it's 0-based or 1-based (e.g., 1, 2, 3, ...) indexing for the pages. Defaults to 0.
- `page`: The page number for the first request. If not provided, the initial value will be set to `base_page`.
- `page_param`: The query parameter name for the page number. Defaults to `"page"`.
-- `total_path`: A JSONPath expression for the total number of pages. If not provided, pagination is controlled by `maximum_page`.
+- `total_path`: A JSONPath expression for the total number of pages. If not provided, pagination is controlled by `maximum_page` and `stop_after_empty_page`.
- `maximum_page`: Optional maximum page number. Stops pagination once this page is reached.
+- `stop_after_empty_page`: Whether pagination should stop when a page contains no result items. Defaults to `True`.
**Example:**
@@ -248,7 +264,7 @@ Assuming an API endpoint `https://api.example.com/items` paginates by page numbe
}
```
-You can paginate through responses from this API using `PageNumberPaginator`:
+You can paginate through responses from this API using the `PageNumberPaginator`:
```py
client = RESTClient(
@@ -259,19 +275,32 @@ client = RESTClient(
)
```
-If the API does not provide the total number of pages:
+Pagination stops by default when a page contains no records. This is especially useful when the API does not provide the total item count.
+Here, the `total_path` parameter is set to `None` because the API does not provide the total count.
```py
client = RESTClient(
base_url="https://api.example.com",
paginator=PageNumberPaginator(
- maximum_page=5, # Stops after fetching 5 pages
total_path=None
)
)
```
-Note, that in the case above, the `total_path` parameter is set explicitly to `None` to indicate that the API does not provide the total count.
+Additionally, you can limit pagination with `maximum_offset`, for example during development. If `maximum_page` is reached before the first empty page then pagination stops:
+
+```py
+client = RESTClient(
+ base_url="https://api.example.com",
+ paginator=OffsetPaginator(
+ maximum_page=2, # limits response to 2 pages
+ total_path=None,
+ )
+)
+```
+
+You can disable automatic stoppage of pagination by setting `stop_after_stop_after_empty_page = False`. In this case, you must provide either `total_path` or `maximum_page` to guarantee that the paginator terminates.
+
#### JSONResponseCursorPaginator
@@ -310,7 +339,7 @@ When working with APIs that use non-standard pagination schemes, or when you nee
- `init_request(request: Request) -> None`: This method is called before making the first API call in the `RESTClient.paginate` method. You can use this method to set up the initial request query parameters, headers, etc. For example, you can set the initial page number or cursor value.
-- `update_state(response: Response) -> None`: This method updates the paginator's state based on the response of the API call. Typically, you extract pagination details (like the next page reference) from the response and store them in the paginator instance.
+- `update_state(response: Response, data: Optional[List[Any]]) -> None`: This method updates the paginator's state based on the response of the API call. Typically, you extract pagination details (like the next page reference) from the response and store them in the paginator instance.
- `update_request(request: Request) -> None`: Before making the next API call in `RESTClient.paginate` method, `update_request` is used to modify the request with the necessary parameters to fetch the next page (based on the current state of the paginator). For example, you can add query parameters to the request, or modify the URL.
@@ -319,6 +348,7 @@ When working with APIs that use non-standard pagination schemes, or when you nee
Suppose an API uses query parameters for pagination, incrementing an page parameter for each subsequent page, without providing direct links to next pages in its responses. E.g. `https://api.example.com/posts?page=1`, `https://api.example.com/posts?page=2`, etc. Here's how you could implement a paginator for this scheme:
```py
+from typing import Any, List, Optional
from dlt.sources.helpers.rest_client.paginators import BasePaginator
from dlt.sources.helpers.requests import Response, Request
@@ -332,7 +362,7 @@ class QueryParamPaginator(BasePaginator):
# This will set the initial page number (e.g. page=1)
self.update_request(request)
- def update_state(self, response: Response) -> None:
+ def update_state(self, response: Response, data: Optional[List[Any]] = None) -> None:
# Assuming the API returns an empty list when no more data is available
if not response.json():
self._has_next_page = False
@@ -370,6 +400,7 @@ def get_data():
Some APIs use POST requests for pagination, where the next page is fetched by sending a POST request with a cursor or other parameters in the request body. This is frequently used in "search" API endpoints or other endpoints with big payloads. Here's how you could implement a paginator for a case like this:
```py
+from typing import Any, List, Optional
from dlt.sources.helpers.rest_client.paginators import BasePaginator
from dlt.sources.helpers.rest_client import RESTClient
from dlt.sources.helpers.requests import Response, Request
@@ -379,7 +410,7 @@ class PostBodyPaginator(BasePaginator):
super().__init__()
self.cursor = None
- def update_state(self, response: Response) -> None:
+ def update_state(self, response: Response, data: Optional[List[Any]] = None) -> None:
# Assuming the API returns an empty list when no more data is available
if not response.json():
self._has_next_page = False
diff --git a/tests/sources/helpers/rest_client/test_client.py b/tests/sources/helpers/rest_client/test_client.py
index f5de1ec5da..af914bf89d 100644
--- a/tests/sources/helpers/rest_client/test_client.py
+++ b/tests/sources/helpers/rest_client/test_client.py
@@ -400,7 +400,7 @@ def test_paginate_json_body_without_params(self, rest_client) -> None:
posts_skip = (DEFAULT_TOTAL_PAGES - 3) * DEFAULT_PAGE_SIZE
class JSONBodyPageCursorPaginator(BaseReferencePaginator):
- def update_state(self, response):
+ def update_state(self, response, data):
self._next_reference = response.json().get("next_page")
def update_request(self, request):
diff --git a/tests/sources/helpers/rest_client/test_paginators.py b/tests/sources/helpers/rest_client/test_paginators.py
index 8a3c136e09..5c9f484bbc 100644
--- a/tests/sources/helpers/rest_client/test_paginators.py
+++ b/tests/sources/helpers/rest_client/test_paginators.py
@@ -1,3 +1,4 @@
+from typing import Any, List
from unittest.mock import Mock
import pytest
@@ -16,6 +17,8 @@
from .conftest import assert_pagination
+NON_EMPTY_PAGE = [{"some": "data"}]
+
@pytest.mark.usefixtures("mock_api_server")
class TestHeaderLinkPaginator:
@@ -241,7 +244,7 @@ class TestOffsetPaginator:
def test_update_state(self):
paginator = OffsetPaginator(offset=0, limit=10)
response = Mock(Response, json=lambda: {"total": 20})
- paginator.update_state(response)
+ paginator.update_state(response, data=NON_EMPTY_PAGE)
assert paginator.current_value == 10
assert paginator.has_next_page is True
@@ -252,7 +255,7 @@ def test_update_state(self):
def test_update_state_with_string_total(self):
paginator = OffsetPaginator(0, 10)
response = Mock(Response, json=lambda: {"total": "20"})
- paginator.update_state(response)
+ paginator.update_state(response, data=NON_EMPTY_PAGE)
assert paginator.current_value == 10
assert paginator.has_next_page is True
@@ -260,13 +263,13 @@ def test_update_state_with_invalid_total(self):
paginator = OffsetPaginator(0, 10)
response = Mock(Response, json=lambda: {"total": "invalid"})
with pytest.raises(ValueError):
- paginator.update_state(response)
+ paginator.update_state(response, data=NON_EMPTY_PAGE)
def test_update_state_without_total(self):
paginator = OffsetPaginator(0, 10)
response = Mock(Response, json=lambda: {})
with pytest.raises(ValueError):
- paginator.update_state(response)
+ paginator.update_state(response, data=NON_EMPTY_PAGE)
def test_init_request(self):
paginator = OffsetPaginator(offset=123, limit=42)
@@ -280,7 +283,7 @@ def test_init_request(self):
response = Mock(Response, json=lambda: {"total": 200})
- paginator.update_state(response)
+ paginator.update_state(response, data=NON_EMPTY_PAGE)
# Test for the next request
next_request = Mock(spec=Request)
@@ -294,11 +297,11 @@ def test_init_request(self):
def test_maximum_offset(self):
paginator = OffsetPaginator(offset=0, limit=50, maximum_offset=100, total_path=None)
response = Mock(Response, json=lambda: {"items": []})
- paginator.update_state(response) # Offset 0 to 50
+ paginator.update_state(response, data=NON_EMPTY_PAGE) # Offset 0 to 50
assert paginator.current_value == 50
assert paginator.has_next_page is True
- paginator.update_state(response) # Offset 50 to 100
+ paginator.update_state(response, data=NON_EMPTY_PAGE) # Offset 50 to 100
assert paginator.current_value == 100
assert paginator.has_next_page is False
@@ -312,28 +315,71 @@ def test_client_pagination(self, rest_client):
assert_pagination(pages)
+ def test_stop_after_empty_page(self):
+ paginator = OffsetPaginator(
+ offset=0,
+ limit=50,
+ maximum_offset=100,
+ total_path=None,
+ stop_after_empty_page=True,
+ )
+ response = Mock(Response, json=lambda: {"items": []})
+ no_data_found: List[Any] = []
+ paginator.update_state(response, no_data_found) # Page 1
+ assert paginator.has_next_page is False
+
+ def test_guarantee_termination(self):
+ OffsetPaginator(
+ limit=10,
+ total_path=None,
+ )
+
+ OffsetPaginator(
+ limit=10,
+ total_path=None,
+ maximum_offset=1,
+ stop_after_empty_page=False,
+ )
+
+ with pytest.raises(ValueError) as e:
+ OffsetPaginator(
+ limit=10,
+ total_path=None,
+ stop_after_empty_page=False,
+ )
+ assert e.match("`total_path` or `maximum_offset` or `stop_after_empty_page` must be provided")
+
+ with pytest.raises(ValueError) as e:
+ OffsetPaginator(
+ limit=10,
+ total_path=None,
+ stop_after_empty_page=False,
+ maximum_offset=None,
+ )
+ assert e.match("`total_path` or `maximum_offset` or `stop_after_empty_page` must be provided")
+
@pytest.mark.usefixtures("mock_api_server")
class TestPageNumberPaginator:
def test_update_state(self):
paginator = PageNumberPaginator(base_page=1, page=1, total_path="total_pages")
response = Mock(Response, json=lambda: {"total_pages": 3})
- paginator.update_state(response)
+ paginator.update_state(response, data=NON_EMPTY_PAGE)
assert paginator.current_value == 2
assert paginator.has_next_page is True
- paginator.update_state(response)
+ paginator.update_state(response, data=NON_EMPTY_PAGE)
assert paginator.current_value == 3
assert paginator.has_next_page is True
# Test for reaching the end
- paginator.update_state(response)
+ paginator.update_state(response, data=NON_EMPTY_PAGE)
assert paginator.has_next_page is False
def test_update_state_with_string_total_pages(self):
paginator = PageNumberPaginator(base_page=1, page=1)
response = Mock(Response, json=lambda: {"total": "3"})
- paginator.update_state(response)
+ paginator.update_state(response, data=NON_EMPTY_PAGE)
assert paginator.current_value == 2
assert paginator.has_next_page is True
@@ -341,37 +387,52 @@ def test_update_state_with_invalid_total_pages(self):
paginator = PageNumberPaginator(base_page=1, page=1)
response = Mock(Response, json=lambda: {"total_pages": "invalid"})
with pytest.raises(ValueError):
- paginator.update_state(response)
+ paginator.update_state(response, data=NON_EMPTY_PAGE)
def test_update_state_without_total_pages(self):
paginator = PageNumberPaginator(base_page=1, page=1)
response = Mock(Response, json=lambda: {})
with pytest.raises(ValueError):
- paginator.update_state(response)
+ paginator.update_state(response, data=NON_EMPTY_PAGE)
def test_update_request(self):
paginator = PageNumberPaginator(base_page=1, page=1, page_param="page")
request = Mock(Request)
response = Mock(Response, json=lambda: {"total": 3})
- paginator.update_state(response)
+ paginator.update_state(response, data=NON_EMPTY_PAGE)
request.params = {}
paginator.update_request(request)
assert request.params["page"] == 2
- paginator.update_state(response)
+ paginator.update_state(response, data=NON_EMPTY_PAGE)
paginator.update_request(request)
assert request.params["page"] == 3
def test_maximum_page(self):
paginator = PageNumberPaginator(base_page=1, page=1, maximum_page=3, total_path=None)
response = Mock(Response, json=lambda: {"items": []})
- paginator.update_state(response) # Page 1
+ paginator.update_state(response, data=NON_EMPTY_PAGE) # Page 1
assert paginator.current_value == 2
assert paginator.has_next_page is True
- paginator.update_state(response) # Page 2
+ paginator.update_state(response, data=NON_EMPTY_PAGE) # Page 2
assert paginator.current_value == 3
assert paginator.has_next_page is False
+ def test_stop_after_empty_page(self):
+ paginator = PageNumberPaginator(
+ base_page=1,
+ page=1,
+ maximum_page=5,
+ stop_after_empty_page=True,
+ total_path=None,
+ )
+ response = Mock(Response, json=lambda: {"items": []})
+ no_data_found: List[Any] = []
+ assert paginator.has_next_page is True
+ paginator.update_state(response, no_data_found)
+ assert paginator.current_value == 1
+ assert paginator.has_next_page is False
+
def test_client_pagination_one_based(self, rest_client):
pages_iter = rest_client.paginate(
"/posts",
@@ -402,6 +463,32 @@ def test_client_pagination_zero_based(self, rest_client):
assert_pagination(pages)
+ def test_guarantee_termination(self):
+ PageNumberPaginator(
+ total_path=None,
+ )
+
+ PageNumberPaginator(
+ total_path=None,
+ maximum_page=1,
+ stop_after_empty_page=False,
+ )
+
+ with pytest.raises(ValueError) as e:
+ PageNumberPaginator(
+ total_path=None,
+ stop_after_empty_page=False,
+ )
+ assert e.match("`total_path` or `maximum_page` or `stop_after_empty_page` must be provided")
+
+ with pytest.raises(ValueError) as e:
+ PageNumberPaginator(
+ total_path=None,
+ stop_after_empty_page=False,
+ maximum_page=None,
+ )
+ assert e.match("`total_path` or `maximum_page` or `stop_after_empty_page` must be provided")
+
@pytest.mark.usefixtures("mock_api_server")
class TestJSONResponseCursorPaginator: