Skip to content

Commit

Permalink
fix: forward the session token to s3fs (#374)
Browse files Browse the repository at this point in the history
* fix: forward the session token to s3fs

* this edge case can be ignore

* feat: update the codebase + added a unit test

* fix: uncomment while testing

* fix: pass None instead of empty dict on client_kwargs when it's empty

* keep client inputs as they want it to be passed

* feat: more more more test cases

* fix: do not mutate user input

Signed-off-by: Luka Peschke <[email protected]>

* fxi tests

Signed-off-by: Luka Peschke <[email protected]>

---------

Signed-off-by: Luka Peschke <[email protected]>
Co-authored-by: Luka Peschke <[email protected]>
  • Loading branch information
Sanix-Darker and lukapeschke authored Apr 3, 2023
1 parent 9ca53c8 commit 6590482
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 2 deletions.
9 changes: 8 additions & 1 deletion peakina/io/s3/s3_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,14 @@ def _s3_open_file_with_retries(fs: s3fs.S3FileSystem, path: str, retries: int) -
def s3_open(url: str, *, client_kwargs: dict[str, Any] | None = None) -> IO[bytes]:
"""opens a s3 url and returns a file-like object"""
access_key, secret, bucketname, objectname = parse_s3_url(url)
fs = s3fs.S3FileSystem(key=access_key, secret=secret, client_kwargs=client_kwargs)

token = None
if client_kwargs is not None and "session_token" in client_kwargs:
token = client_kwargs["session_token"]
client_kwargs = {k: v for k, v in client_kwargs.items() if k != "session_token"} or None

fs = s3fs.S3FileSystem(key=access_key, secret=secret, client_kwargs=client_kwargs, token=token)

path = f"{bucketname}/{objectname}"
ret = tempfile.NamedTemporaryFile(suffix=".s3tmp")
file = _s3_open_file_with_retries(fs, path, 3)
Expand Down
4 changes: 3 additions & 1 deletion tests/io/s3/test_s3_fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@ def test_s3_fetcher_open_retry(s3_fetcher, s3_endpoint_url, mocker):
s3_client.upload_file("tests/fixtures/for_retry_0_0.csv", "mybucket", "for_retry_0_0.csv")

class S3FileSystemThatFailsOpen(S3FileSystem): # type:ignore[misc]
def __init__(self, key: str, secret: str, client_kwargs: dict[str, Any]) -> None:
def __init__(
self, key: str, secret: str, client_kwargs: dict[str, Any], token: str | None
) -> None:
super().__init__(key=key, secret=secret, client_kwargs=client_kwargs)
self.invalidated_cache = False

Expand Down
37 changes: 37 additions & 0 deletions tests/io/s3/test_s3_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import io
from unittest.mock import MagicMock

from pytest import raises
from pytest_mock import MockerFixture

from peakina.io.s3.s3_utils import parse_s3_url as pu, s3_open

Expand Down Expand Up @@ -54,3 +56,38 @@ def test_s3_open(mocker):
logger_mock.info.assert_called_once_with("opening mybucket/file.csv")
assert tmpfile.name.endswith(".s3tmp")
assert tmpfile.read() == b"a,b\n0,1\n"


def test_s3_open_with_token(mocker: MockerFixture) -> None:
tempfile_mock = MagicMock()
mocker.patch("tempfile.NamedTemporaryFile", return_value=tempfile_mock)
mocker.patch("peakina.io.s3.s3_utils._s3_open_file_with_retries")
s3fs_file_system = mocker.patch("s3fs.S3FileSystem")
s3fs_file_system.return_value.open.return_value = io.BytesIO(b"a,b\n0,1\n")

# called with a session_token and something else
s3_open(
"s3://my_key:my_secret@mybucket/file.csv",
client_kwargs={"something": "else", "session_token": "xxxx"},
)
s3fs_file_system.assert_called_once_with(
token="xxxx", secret="my_secret", key="my_key", client_kwargs={"something": "else"}
)

# called with just the session token
s3_open("s3://my_key:my_secret@mybucket/file.csv", client_kwargs={"session_token": "xxxx"})
s3fs_file_system.assert_called_with(
token="xxxx", secret="my_secret", key="my_key", client_kwargs=None
)

# called with an empty dict
s3_open("s3://my_key:my_secret@mybucket/file.csv", client_kwargs={})
s3fs_file_system.assert_called_with(
secret="my_secret", key="my_key", token=None, client_kwargs={}
)

# called with None
s3_open("s3://my_key:my_secret@mybucket/file.csv", client_kwargs=None)
s3fs_file_system.assert_called_with(
secret="my_secret", key="my_key", token=None, client_kwargs=None
)

0 comments on commit 6590482

Please sign in to comment.