Skip to content

Commit

Permalink
✨(backends) add max_statements option to AsyncLRSHTTP.read
Browse files Browse the repository at this point in the history
This PR adds an option allowing the user to specify a maximum number of
statements to be returned when using `AsyncLRSHTTP.read`. This feature
was requested for comfort of use of the library.
  • Loading branch information
Leobouloc committed Aug 29, 2023
1 parent 13075ef commit a834290
Show file tree
Hide file tree
Showing 5 changed files with 109 additions and 41 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ have an authority field matching that of the user
in the video and virtual classroom profiles. [BC]
- Backends: `LRSHTTP` methods must not be used in `asyncio` events loop (BC)
- Add variable to override PVC name in arnold deployment
- Backends: add `max_statements` option to `AsyncLRSHTTP`

## [3.9.0] - 2023-07-21

Expand Down
41 changes: 25 additions & 16 deletions src/ralph/backends/http/async_lrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from httpx import AsyncClient, HTTPError, HTTPStatusError, RequestError
from more_itertools import chunked
from pydantic import AnyHttpUrl, BaseModel, parse_obj_as
from pydantic.types import PositiveInt

from ralph.conf import LRSHeaders, settings
from ralph.exceptions import BackendException, BackendParameterException
Expand Down Expand Up @@ -105,10 +106,11 @@ async def read( # pylint: disable=too-many-arguments
self,
query: Union[str, LRSQuery] = None,
target: str = None,
chunk_size: Union[None, int] = 500,
chunk_size: Optional[PositiveInt] = 500,
raw_output: bool = False,
ignore_errors: bool = False,
greedy: bool = True,
max_statements: Optional[PositiveInt] = None,
) -> Iterator[Union[bytes, dict]]:
"""Get statements from LRS `target` endpoint.
Expand All @@ -134,6 +136,7 @@ async def read( # pylint: disable=too-many-arguments
before the statements yielded by the generator are consumed. Caution:
this might potentially lead to large amounts of API calls and to the
memory filling up.
max_statements: The maximum number of statements to yield.
"""
if not target:
target = self.statements_endpoint
Expand All @@ -155,18 +158,24 @@ async def read( # pylint: disable=too-many-arguments
fragment="",
).geturl()

# Select the appropriate fetch function
if greedy:
statements_async_generator = self._greedy_fetch_statements(
target, raw_output, query_params
)
else:
statements_async_generator = self._fetch_statements(
target=target, raw_output=raw_output, query_params=query_params
)

# Iterate through results
counter = 0
try:
if greedy:
async for statement in self._greedy_fetch_statements(
target, raw_output, query_params
):
yield statement
else:
async for statement in self._fetch_statements(
target=target, raw_output=raw_output, query_params=query_params
):
yield statement

async for statement in statements_async_generator:
if max_statements and (counter >= max_statements):
break
yield statement
counter += 1
except HTTPError as error:
msg = "Failed to fetch statements."
logger.error("%s. %s", msg, error)
Expand All @@ -175,12 +184,12 @@ async def read( # pylint: disable=too-many-arguments
async def write( # pylint: disable=too-many-arguments
self,
data: Union[Iterable[bytes], Iterable[dict]],
target: Union[None, str] = None,
chunk_size: Union[None, int] = 500,
target: Optional[str] = None,
chunk_size: Optional[PositiveInt] = 500,
ignore_errors: bool = False,
operation_type: Union[None, OperationType] = None,
operation_type: Optional[OperationType] = None,
simultaneous: bool = False,
max_num_simultaneous: Union[None, int] = None,
max_num_simultaneous: Optional[PositiveInt] = None,
) -> int:
"""Write `data` records to the `target` endpoint and return their count.
Expand Down
13 changes: 9 additions & 4 deletions src/ralph/backends/http/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import Iterator, List, Optional, Union

from pydantic import BaseModel, ValidationError
from pydantic.types import PositiveInt

from ralph.exceptions import BackendParameterException

Expand Down Expand Up @@ -114,19 +115,23 @@ async def read( # pylint: disable=too-many-arguments
self,
query: Union[str, BaseQuery] = None,
target: str = None,
chunk_size: Union[None, int] = 500,
chunk_size: Optional[PositiveInt] = 500,
raw_output: bool = False,
ignore_errors: bool = False,
greedy: bool = False,
max_statements: Optional[PositiveInt] = None,
) -> Iterator[Union[bytes, dict]]:
"""Yield records read from the HTTP response results."""

@abstractmethod
async def write( # pylint: disable=too-many-arguments
self,
data: Union[List[bytes], List[dict]],
target: Union[None, str] = None,
chunk_size: Union[None, int] = 500,
target: Optional[str] = None,
chunk_size: Optional[PositiveInt] = 500,
ignore_errors: bool = False,
operation_type: Union[None, OperationType] = None,
operation_type: Optional[OperationType] = None,
simultaneous: bool = False,
max_num_simultaneous: Optional[int] = None,
) -> int:
"""Writes statements into the HTTP server given an input endpoint."""
73 changes: 70 additions & 3 deletions tests/backends/http/test_async_lrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,12 +195,79 @@ async def test_backends_http_lrs_list(caplog):
) in caplog.record_tuples


@pytest.mark.parametrize("max_statements", [None, 2, 4, 8])
@pytest.mark.anyio
async def test_backends_http_lrs_read_max_statements(
httpx_mock: HTTPXMock, max_statements: int
):
"""Test the LRS backend `read` method `max_statements` property."""

base_url = "http://fake-lrs.com"
target = "/xAPI/statements/"
more_target = "/xAPI/statements/?pit_id=fake-pit-id"

chunk_size = 3

statements = {
"statements": [_gen_statement() for _ in range(chunk_size)],
"more": more_target,
}
more_statements = {
"statements": [_gen_statement() for _ in range(chunk_size)],
}

# Mock GET response of HTTPX for target and "more" target without query parameter
params = {"limit": 500}
httpx_mock.add_response(
url=ParseResult(
scheme=urlparse(base_url).scheme,
netloc=urlparse(base_url).netloc,
path=target,
query=urlencode(params),
params="",
fragment="",
).geturl(),
method="GET",
json=statements,
)

if (max_statements is None) or (max_statements > chunk_size):
params.update(dict(parse_qsl(urlparse(more_target).query)))
httpx_mock.add_response(
url=ParseResult(
scheme=urlparse(base_url).scheme,
netloc=urlparse(base_url).netloc,
path=urlparse(more_target).path,
query=urlencode(params),
params="",
fragment="",
).geturl(),
method="GET",
json=more_statements,
)

backend = AsyncLRSHTTP(base_url=base_url, username="user", password="pass")

# Return an iterable of dict
result = await _unpack_async_generator(
backend.read(target=target, max_statements=max_statements)
)
all_statements = statements["statements"] + more_statements["statements"]
assert len(all_statements) == 6

# Assert that result is of the proper length
if max_statements is None:
assert result == all_statements
else:
assert result == all_statements[:max_statements]


@pytest.mark.parametrize("greedy", [False, True])
@pytest.mark.anyio
async def test_backends_http_lrs_read_without_target(
httpx_mock: HTTPXMock, greedy: bool
):
"""Tests the LRS backend `read` method without target parameter value fetches
"""Test that the LRS backend `read` method without target parameter value fetches
statements from '/xAPI/statements' default endpoint.
"""

Expand Down Expand Up @@ -743,7 +810,7 @@ async def test_backends_http_lrs_write_backend_exception(
# Asynchronicity tests for dev purposes (skip in CI)


@pytest.mark.skip(reason="Timing based tests are too unstable to run in CI")
@pytest.mark.skip(reason="Timing based tests are too unstable to run in CI.")
@pytest.mark.anyio
@pytest.mark.parametrize(
"num_pages,chunk_size,network_latency_time", [(3, 3, 0.2), (10, 3, 0.2)]
Expand Down Expand Up @@ -829,7 +896,7 @@ async def _simulate_slow_processing():
await _simulate_slow_processing()
duration_greedy = time.time() - time_2

# Assert gains are close enough to theoritical gains
# Assert gains are close enough to theoretical gains
proximity_ratio = 0.9
assert (
duration_non_greedy
Expand Down
22 changes: 4 additions & 18 deletions tests/backends/http/test_base.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""Tests for Ralph base HTTP backend."""

from typing import Iterator, List, Union
from typing import Iterator, Union

from ralph.backends.http.base import BaseHTTP, BaseQuery, OperationType
from ralph.backends.http.base import BaseHTTP, BaseQuery


def test_backends_http_base_abstract_interface_with_implemented_abstract_method():
Expand All @@ -21,24 +21,10 @@ async def list(
) -> Iterator[Union[str, dict]]:
"""Fakes the list method."""

async def read( # pylint: disable=too-many-arguments
self,
query: Union[str, BaseQuery] = None,
target: str = None,
chunk_size: Union[None, int] = None,
raw_output: bool = False,
ignore_errors: bool = False,
):
async def read(self): # pylint: disable=arguments-differ
"""Fakes the read method."""

async def write( # pylint: disable=too-many-arguments
self,
data: Union[List[bytes], List[dict]],
target: Union[None, str] = None,
chunk_size: Union[None, int] = 500,
ignore_errors: bool = False,
operation_type: Union[None, OperationType] = None,
):
async def write(self): # pylint: disable=arguments-differ
"""Fakes the write method."""

GoodStorage()
Expand Down

0 comments on commit a834290

Please sign in to comment.