Skip to content

Commit

Permalink
Implement end-user-facing Python lib wrapping ObjectStore (#240)
Browse files Browse the repository at this point in the history
* sketch of python object-store api

* put in arc

* Progress on Python API

* Added Error enum to pyo3-object_store

* cleaner signer impl

* add delete

* Add list

* Update get and list

* get

* copy

* Working put!!

* generalize put a bit

* Head

* Rename

* remove empty file

* Rename back to put_file

* Configurable chunk size for multipart upload

* multipart visibility
  • Loading branch information
kylebarron authored Oct 17, 2024
1 parent 45be4a1 commit 246c135
Show file tree
Hide file tree
Showing 36 changed files with 1,429 additions and 31 deletions.
18 changes: 18 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ members = [
"arro3-compute",
"arro3-core",
"arro3-io",
"object-store-rs",
"pyo3-arrow",
"pyo3-object_store",
]
Expand All @@ -29,12 +30,16 @@ arrow-csv = "53"
arrow-ipc = { version = "53", features = ["lz4", "zstd"] }
arrow-schema = "53"
arrow-select = "53"
bytes = "1.7.0"
half = "2"
indexmap = "2"
numpy = "0.22"
object_store = "0.11"
parquet = "53"
pyo3 = { version = "0.22", features = ["macros", "indexmap"] }
pyo3-async-runtimes = { git = "https://github.com/PyO3/pyo3-async-runtimes", features = [
"tokio-runtime",
] }
pyo3-file = "0.9"
thiserror = "1"

Expand Down
4 changes: 2 additions & 2 deletions arro3-io/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,13 @@ arrow-buffer = { workspace = true }
arrow-csv = { workspace = true }
arrow-ipc = { workspace = true }
arrow-schema = { workspace = true }
bytes = "1.7.0"
bytes = { workspace = true }
futures = { version = "0.3.30", optional = true }
object_store = { workspace = true, optional = true }
parquet = { workspace = true }
pyo3 = { workspace = true }
pyo3-arrow = { path = "../pyo3-arrow" }
pyo3-async-runtimes = { git = "https://github.com/PyO3/pyo3-async-runtimes", features = [
pyo3-async-runtimes = { workspace = true, features = [
"tokio-runtime",
], optional = true }
pyo3-file = { workspace = true }
Expand Down
36 changes: 36 additions & 0 deletions object-store-rs/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
[package]
name = "object-store-rs"
version = { workspace = true }
authors = { workspace = true }
edition = { workspace = true }
description = "Core library for representing Arrow data in Python."
readme = "README.md"
repository = { workspace = true }
homepage = { workspace = true }
license = { workspace = true }
keywords = { workspace = true }
categories = { workspace = true }
rust-version = { workspace = true }

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
name = "_object_store_rs"
crate-type = ["cdylib"]

[dependencies]
bytes = { workspace = true }
chrono = "0.4.38"
futures = "0.3.31"
http = "1.1"
object_store = { workspace = true }
pyo3 = { workspace = true, features = ["chrono"] }
pyo3-async-runtimes = { workspace = true, features = ["tokio-runtime"] }
pyo3-file = { workspace = true }
pyo3-object_store = { path = "../pyo3-object_store" }
tokio = { version = "1.40", features = [
"macros",
"rt",
"rt-multi-thread",
"sync",
] }
url = "2"
1 change: 1 addition & 0 deletions object-store-rs/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# object-store-rs
20 changes: 20 additions & 0 deletions object-store-rs/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
[build-system]
requires = ["maturin>=1.4.0,<2.0"]
build-backend = "maturin"

[project]
name = "object-store-rs"
requires-python = ">=3.9"
dependencies = []
classifiers = [
"Programming Language :: Rust",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
]
dynamic = ["version"]

[tool.maturin]
features = ["pyo3/extension-module"]
module-name = "object_store_rs._object_store_rs"
python-source = "python"
strip = true
4 changes: 4 additions & 0 deletions object-store-rs/python/object_store_rs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from ._object_store_rs import *
from ._object_store_rs import ___version

__version__: str = ___version()
6 changes: 6 additions & 0 deletions object-store-rs/python/object_store_rs/_copy.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from ._pyo3_object_store import ObjectStore

def copy(store: ObjectStore, from_: str, to: str) -> None: ...
async def copy_async(store: ObjectStore, from_: str, to: str) -> None: ...
def copy_if_not_exists(store: ObjectStore, from_: str, to: str) -> None: ...
async def copy_if_not_exists_async(store: ObjectStore, from_: str, to: str) -> None: ...
4 changes: 4 additions & 0 deletions object-store-rs/python/object_store_rs/_delete.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from ._pyo3_object_store import ObjectStore

def delete(store: ObjectStore, location: str) -> None: ...
async def delete_async(store: ObjectStore, location: str) -> None: ...
114 changes: 114 additions & 0 deletions object-store-rs/python/object_store_rs/_get.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
from datetime import datetime
from typing import Sequence, TypedDict

from ._list import ObjectMeta
from ._pyo3_object_store import ObjectStore
from ._sign import HTTP_METHOD as HTTP_METHOD
from ._sign import SignCapableStore as SignCapableStore
from ._sign import sign_url as sign_url
from ._sign import sign_url_async as sign_url_async

class GetOptions(TypedDict):
"""Options for a get request, such as range"""

if_match: str | None
"""
Request will succeed if the `ObjectMeta::e_tag` matches
otherwise returning [`Error::Precondition`]
See <https://datatracker.ietf.org/doc/html/rfc9110#name-if-match>
Examples:
```text
If-Match: "xyzzy"
If-Match: "xyzzy", "r2d2xxxx", "c3piozzzz"
If-Match: *
```
"""

if_none_match: str | None
"""
Request will succeed if the `ObjectMeta::e_tag` does not match
otherwise returning [`Error::NotModified`]
See <https://datatracker.ietf.org/doc/html/rfc9110#section-13.1.2>
Examples:
```text
If-None-Match: "xyzzy"
If-None-Match: "xyzzy", "r2d2xxxx", "c3piozzzz"
If-None-Match: *
```
"""

if_unmodified_since: datetime | None
"""
Request will succeed if the object has been modified since
<https://datatracker.ietf.org/doc/html/rfc9110#section-13.1.3>
"""

if_modified_since: datetime | None
"""
Request will succeed if the object has not been modified since
otherwise returning [`Error::Precondition`]
Some stores, such as S3, will only return `NotModified` for exact
timestamp matches, instead of for any timestamp greater than or equal.
<https://datatracker.ietf.org/doc/html/rfc9110#section-13.1.4>
"""

# range:
"""
Request transfer of only the specified range of bytes
otherwise returning [`Error::NotModified`]
<https://datatracker.ietf.org/doc/html/rfc9110#name-range>
"""

version: str | None
"""
Request a particular object version
"""

head: bool
"""
Request transfer of no content
<https://datatracker.ietf.org/doc/html/rfc9110#name-head>
"""

class GetResult:
def bytes(self) -> bytes:
"""
Collects the data into bytes
"""

async def bytes_async(self) -> bytes:
"""
Collects the data into bytes
"""

@property
def meta(self) -> ObjectMeta:
"""The ObjectMeta for this object"""

def get(
store: ObjectStore, location: str, *, options: GetOptions | None = None
) -> GetResult: ...
async def get_async(
store: ObjectStore, location: str, *, options: GetOptions | None = None
) -> GetResult: ...
def get_range(store: ObjectStore, location: str, offset: int, length: int) -> bytes: ...
async def get_range_async(
store: ObjectStore, location: str, offset: int, length: int
) -> bytes: ...
def get_ranges(
store: ObjectStore, location: str, offset: Sequence[int], length: Sequence[int]
) -> bytes: ...
async def get_ranges_async(
store: ObjectStore, location: str, offset: Sequence[int], length: Sequence[int]
) -> bytes: ...
5 changes: 5 additions & 0 deletions object-store-rs/python/object_store_rs/_head.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from ._list import ObjectMeta
from ._pyo3_object_store import ObjectStore

def head(store: ObjectStore, location: str) -> ObjectMeta: ...
async def head_async(store: ObjectStore, location: str) -> ObjectMeta: ...
45 changes: 45 additions & 0 deletions object-store-rs/python/object_store_rs/_list.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from datetime import datetime
from typing import List, TypedDict

from ._pyo3_object_store import ObjectStore
from ._sign import HTTP_METHOD as HTTP_METHOD
from ._sign import SignCapableStore as SignCapableStore
from ._sign import sign_url as sign_url
from ._sign import sign_url_async as sign_url_async

class ObjectMeta(TypedDict):
location: str
"""The full path to the object"""

last_modified: datetime
"""The last modified time"""

size: int
"""The size in bytes of the object"""

e_tag: str | None
"""The unique identifier for the object
<https://datatracker.ietf.org/doc/html/rfc9110#name-etag>
"""

version: str | None
"""A version indicator for this object"""

class ListResult(TypedDict):
common_prefixes: List[str]
"""Prefixes that are common (like directories)"""

objects: List[ObjectMeta]
"""Object metadata for the listing"""

def list(store: ObjectStore, prefix: str | None = None) -> List[ObjectMeta]: ...
async def list_async(
store: ObjectStore, prefix: str | None = None
) -> List[ObjectMeta]: ...
def list_with_delimiter(
store: ObjectStore, prefix: str | None = None
) -> ListResult: ...
async def list_with_delimiter_async(
store: ObjectStore, prefix: str | None = None
) -> ListResult: ...
32 changes: 32 additions & 0 deletions object-store-rs/python/object_store_rs/_object_store_rs.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from ._copy import copy as copy
from ._copy import copy_async as copy_async
from ._copy import copy_if_not_exists as copy_if_not_exists
from ._copy import copy_if_not_exists_async as copy_if_not_exists_async
from ._delete import delete as delete
from ._delete import delete_async as delete_async
from ._get import GetOptions as GetOptions
from ._get import GetResult as GetResult
from ._get import get as get
from ._get import get_async as get_async
from ._get import get_range as get_range
from ._get import get_range_async as get_range_async
from ._get import get_ranges as get_ranges
from ._get import get_ranges_async as get_ranges_async
from ._head import head as head
from ._head import head_async as head_async
from ._list import ListResult as ListResult
from ._list import ObjectMeta as ObjectMeta
from ._list import list as list
from ._list import list_async as list_async
from ._list import list_with_delimiter as list_with_delimiter
from ._list import list_with_delimiter_async as list_with_delimiter_async
from ._put import put_file as put_file
from ._put import put_file_async as put_file_async
from ._rename import rename as rename
from ._rename import rename_async as rename_async
from ._rename import rename_if_not_exists as rename_if_not_exists
from ._rename import rename_if_not_exists_async as rename_if_not_exists_async
from ._sign import HTTP_METHOD as HTTP_METHOD
from ._sign import SignCapableStore as SignCapableStore
from ._sign import sign_url as sign_url
from ._sign import sign_url_async as sign_url_async
21 changes: 21 additions & 0 deletions object-store-rs/python/object_store_rs/_put.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from pathlib import Path
from typing import IO

from ._pyo3_object_store import ObjectStore

def put_file(
store: ObjectStore,
location: str,
file: IO[bytes] | Path | bytes,
*,
chunk_size: int = 5 * 1024,
max_concurrency: int = 12,
) -> None: ...
async def put_file_async(
store: ObjectStore,
location: str,
file: IO[bytes] | Path | bytes,
*,
chunk_size: int = 5 * 1024,
max_concurrency: int = 12,
) -> None: ...
Loading

0 comments on commit 246c135

Please sign in to comment.