Skip to content

Commit

Permalink
feat: add pagination configs for HTTP API Connector (TCTC-9227) (#1794)
Browse files Browse the repository at this point in the history
* feat: add pagination configs for HTTP API Connector

* feat: add pagination type for front configuration

* feat: add hyper_media pagination, options to page based pagination and apply review remarks

* test: add tests for better coverage

* fix: set pagination config base class as full abstract

* fix: add kind const to pagination configs and hide Noop config to end-users

* fix: mark pagination config kind as required

* fix: remove ui hidden option and default value from kind attribute

* feat(HttpAPI): use discriminator field for paginationsConfigs

* feat: move pagination config from connector to data source

* feat(HttpAPI): re-hide kind field from the UI

* doc: add pagination config feature to changelog

---------

Co-authored-by: David Nowinsky <[email protected]>
  • Loading branch information
julien-pinchelimouroux and davinov authored Oct 28, 2024
1 parent 70e5201 commit 1c555cf
Show file tree
Hide file tree
Showing 8 changed files with 712 additions and 102 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## Unreleased

### Changed

- HTTP API: Add a `PaginationConfig` to `HttpAPIDataSource` in order to handle API pagination and fetch all data. It supports the following kinds of pagination: page-based, cursor-based, offset-limit and hypermedia.

## [7.0.3] 2024-10-04

### Fix
Expand Down
314 changes: 311 additions & 3 deletions tests/http_api/test_http_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,18 @@
from pytest_mock import MockFixture

from toucan_connectors.common import transform_with_jq
from toucan_connectors.http_api.http_api_connector import Auth, HttpAPIConnector, HttpAPIDataSource
from toucan_connectors.http_api.http_api_connector import (
Auth,
HttpAPIConnector,
HttpAPIConnectorError,
HttpAPIDataSource,
)
from toucan_connectors.http_api.pagination_configs import (
CursorBasedPaginationConfig,
HyperMediaPaginationConfig,
OffsetLimitPaginationConfig,
PageBasedPaginationConfig,
)
from toucan_connectors.json_wrapper import JsonWrapper


Expand Down Expand Up @@ -49,6 +60,32 @@ def auth():
return Auth(type="basic", args=["username", "password"])


@pytest.fixture(scope="function")
def offset_pagination() -> OffsetLimitPaginationConfig:
return OffsetLimitPaginationConfig(
kind="OffsetLimitPaginationConfig", offset_name="super_offset", limit_name="super_limit", limit=5
)


@pytest.fixture(scope="function")
def page_pagination() -> PageBasedPaginationConfig:
return PageBasedPaginationConfig(
kind="PageBasedPaginationConfig", page_name="my_page", per_page_name="my_per_page", per_page=2, page=1
)


@pytest.fixture(scope="function")
def cursor_pagination() -> CursorBasedPaginationConfig:
return CursorBasedPaginationConfig(
kind="CursorBasedPaginationConfig", cursor_name="my_cursor", cursor_filter=".metadata.next_cursor"
)


@pytest.fixture(scope="function")
def hyper_media_pagination() -> HyperMediaPaginationConfig:
return HyperMediaPaginationConfig(kind="HyperMediaPaginationConfig", next_link_filter=".metadata.next_link")


def test_transform_with_jq():
assert transform_with_jq(data=[1, 2, 3], jq_filter=".[]+1") == [2, 3, 4]
assert transform_with_jq([[1, 2, 3]], ".[]") == [1, 2, 3]
Expand Down Expand Up @@ -82,7 +119,278 @@ def test_get_df_with_auth(connector, data_source, auth):


@responses.activate
def test_get_df_with_parameters(connector, data_source, mocker):
def test_get_df_with_offset_pagination(
connector: HttpAPIConnector, data_source: HttpAPIDataSource, offset_pagination: OffsetLimitPaginationConfig
) -> None:
# first page
responses.add(
responses.GET,
"https://jsonplaceholder.typicode.com/comments?super_offset=0&super_limit=5",
json=[{"a": 1}, {"a": 2}, {"a": 3}, {"a": 4}, {"a": 5}],
)

# second page
responses.add(
responses.GET,
"https://jsonplaceholder.typicode.com/comments?super_offset=5&super_limit=5",
json=[
{"a": 6},
{"a": 7},
{"a": 8},
{"b": 9},
{"b": 10},
],
)

# last page
responses.add(
responses.GET,
"https://jsonplaceholder.typicode.com/comments?super_offset=10&super_limit=5",
json=[
{"b": 11},
{"b": 12},
],
)

data_source.http_pagination_config = offset_pagination
df = connector.get_df(data_source)
assert df.shape == (12, 2)
assert len(responses.calls) == 3


@responses.activate
def test_get_df_with_page_pagination(
connector: HttpAPIConnector, data_source: HttpAPIDataSource, page_pagination: PageBasedPaginationConfig
) -> None:
page_pagination.max_page_filter = ".metadata.number_of_pages"

# first page
responses.add(
responses.GET,
"https://jsonplaceholder.typicode.com/comments?my_page=1&my_per_page=2",
json={
"content": [
{"a": 1},
{"a": 2},
],
"metadata": {"number_of_pages": 2},
},
)

# next page
responses.add(
responses.GET,
"https://jsonplaceholder.typicode.com/comments?my_page=2&my_per_page=2",
json={
"content": [
{"a": 3},
{"a": 4},
],
"metadata": {"number_of_pages": 2},
},
)

data_source.filter = ".content"
data_source.http_pagination_config = page_pagination
df = connector.get_df(data_source)
assert df.shape == (4, 1)
assert len(responses.calls) == 2


@responses.activate
def test_get_df_with_page_pagination_which_can_raise(
connector: HttpAPIConnector, data_source: HttpAPIDataSource, page_pagination: PageBasedPaginationConfig
) -> None:
page_pagination.can_raise_not_found = True

# first page
responses.add(
responses.GET,
"https://jsonplaceholder.typicode.com/comments?my_page=1&my_per_page=2",
json={
"content": [
{"a": 1},
{"a": 2},
],
},
)

# next page
responses.add(
responses.GET,
"https://jsonplaceholder.typicode.com/comments?my_page=2&my_per_page=2",
json={
"content": [
{"a": 3},
{"a": 4},
],
},
)

# not found
responses.add(
responses.GET,
"https://jsonplaceholder.typicode.com/comments?my_page=3&my_per_page=2",
json={"error": "not found"},
status=404,
)

data_source.filter = ".content"
data_source.http_pagination_config = page_pagination
df = connector.get_df(data_source)
assert df.shape == (4, 1)
assert len(responses.calls) == 3


@responses.activate
def test_get_df_with_cursor_pagination(
connector: HttpAPIConnector, data_source: HttpAPIDataSource, cursor_pagination: CursorBasedPaginationConfig
) -> None:
# first page
responses.add(
responses.GET,
"https://jsonplaceholder.typicode.com/comments",
json={
"content": [
{"a": 1},
{"a": 2},
],
"metadata": {"next_cursor": "super_cursor_22222", "number_of_results": 4},
},
)

# next page
responses.add(
responses.GET,
"https://jsonplaceholder.typicode.com/comments?my_cursor=super_cursor_22222",
json={
"content": [
{"a": 3},
{"a": 4},
],
"metadata": {"number_of_results": 4},
},
)
data_source.http_pagination_config = cursor_pagination
data_source.filter = ".content"
df = connector.get_df(data_source)
assert df.shape == (4, 1)
assert len(responses.calls) == 2


@responses.activate
def test_get_df_with_hyper_media_pagination(
connector: HttpAPIConnector, data_source: HttpAPIDataSource, hyper_media_pagination: HyperMediaPaginationConfig
) -> None:
# first page
responses.add(
responses.GET,
"https://jsonplaceholder.typicode.com/comments?custom=yes",
json={
"content": [
{"a": 1},
{"a": 2},
],
"metadata": {
"next_link": "https://jsonplaceholder.typicode.com/comments/next_link?token=12341243&custom=yes",
"number_of_results": 4,
},
},
)

# next page
responses.add(
responses.GET,
"https://jsonplaceholder.typicode.com/comments/next_link?token=12341243&custom=yes",
json={
"content": [
{"a": 3},
{"a": 4},
],
"metadata": {"number_of_results": 4},
},
)
data_source.http_pagination_config = hyper_media_pagination
data_source.filter = ".content"
data_source.params = {"custom": "yes"}
df = connector.get_df(data_source)
assert df.shape == (4, 1)
assert len(responses.calls) == 2


@responses.activate
def test_hyper_media_pagination_raise_if_bad_next_link(
connector: HttpAPIConnector, data_source: HttpAPIDataSource, hyper_media_pagination: HyperMediaPaginationConfig
) -> None:
# first page
responses.add(
responses.GET,
"https://jsonplaceholder.typicode.com/comments?custom=yes",
json={
"content": [
{"a": 1},
{"a": 2},
],
"metadata": {
"next_link": {"real_link": "my_link"},
"number_of_results": 4,
},
},
)

data_source.http_pagination_config = hyper_media_pagination
data_source.filter = ".content"
data_source.params = {"custom": "yes"}
with pytest.raises(ValueError) as exc:
connector.get_df(data_source)
assert str(exc.value) == (
"Invalid next link value. Link can't be a complex value," " got: {'real_link': 'my_link'}"
)


@responses.activate
def test_ignore_if_cant_parse_next_pagination_info(
connector: HttpAPIConnector, data_source: HttpAPIDataSource, hyper_media_pagination: HyperMediaPaginationConfig
) -> None:
# first page
responses.add(
responses.GET,
"https://jsonplaceholder.typicode.com/comments",
json=[
{"a": 1},
{"a": 2},
],
)

data_source.http_pagination_config = hyper_media_pagination # needs a 'metadata' field to retrieve the next link
# Ok even if 'metadata' is missing in the API response
df = connector.get_df(data_source)
assert df.shape == (2, 1)
assert len(responses.calls) == 1


@responses.activate
def test_raises_http_error_on_too_many_requests(connector: HttpAPIConnector, data_source: HttpAPIDataSource) -> None:
# first page
responses.add(
responses.GET,
"https://jsonplaceholder.typicode.com/comments",
json=[
{"a": 1},
{"a": 2},
],
status=429,
)
with pytest.raises(HttpAPIConnectorError) as exc:
connector.get_df(data_source)
assert str(exc.value) == (
"Failed to retrieve data: the connector tried to perform too many requests."
" Please check your API call limitations."
)


@responses.activate
def test_get_df_with_parameters(connector, data_source):
data_source.parameters = {"first_name": "raphael"}
data_source.headers = {"name": "%(first_name)s"}

Expand Down Expand Up @@ -462,7 +770,7 @@ def test_get_cache_key(connector, auth, data_source):
data_source.parameters = {"first_name": "raphael"}
key = connector.get_cache_key(data_source)

assert key == "f24af0b5-f745-3961-8aec-a27d44543fb9"
assert key == "9ef95981-2aab-3f7f-89d1-b0a300d16f14"

data_source.headers = {"name": "{{ first_name }}"} # change the templating style
key2 = connector.get_cache_key(data_source)
Expand Down
12 changes: 9 additions & 3 deletions toucan_connectors/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,11 +259,15 @@ def transform_with_jq(data: object, jq_filter: str) -> list:
return data


FilterSchemaDescription: str = (
"You can apply filters to json response if data is nested. As we rely on a "
"library called jq, we suggest the refer to the dedicated "
'<a href="https://stedolan.github.io/jq/manual/">documentation</a>'
)

FilterSchema = Field(
".",
description="You can apply filters to json response if data is nested. As we rely on a "
"library called jq, we suggest the refer to the dedicated "
'<a href="https://stedolan.github.io/jq/manual/">documentation</a>',
description=FilterSchemaDescription,
)

XpathSchema = Field(
Expand All @@ -273,6 +277,8 @@ def transform_with_jq(data: object, jq_filter: str) -> list:
'<a href="https://developer.mozilla.org/en-US/docs/Web/XPath">documentation</a>',
)

UI_HIDDEN: dict[str, Any] = {"ui.hidden": True}


def get_loop():
"""Sets up event loop"""
Expand Down
Loading

0 comments on commit 1c555cf

Please sign in to comment.