Skip to content

Commit

Permalink
Add test case for PyArrowFileIO.new_input multi region
Browse files Browse the repository at this point in the history
  • Loading branch information
jiakai-li committed Dec 24, 2024
1 parent 48bb811 commit 8404e6b
Showing 1 changed file with 25 additions and 0 deletions.
25 changes: 25 additions & 0 deletions tests/io/test_pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -2108,3 +2108,28 @@ def test_pyarrow_file_io_fs_by_scheme_cache() -> None:
filesystem_ap_southeast_2_cached = pyarrow_file_io.fs_by_scheme("s3", "ap-southeast-2-bucket")
assert filesystem_ap_southeast_2_cached.region == ap_southeast_2_region
assert pyarrow_file_io.fs_by_scheme.cache_info().hits == 2 # type: ignore


def test_pyarrow_io_new_input_multi_region() -> None:
bucket_regions = [
("us-east-2-bucket", "us-east-2"),
("ap-southeast-2-bucket", "ap-southeast-2"),
]

def _s3_region_map(bucket: str) -> str:
for bucket_region in bucket_regions:
if bucket_region[0] == bucket:
return bucket_region[1]
raise OSError("Unknown bucket")

# For one single pyarrow io instance with configured default s3 region
pyarrow_file_io = PyArrowFileIO({"s3.region": "ap-southeast-2"})
with patch("pyarrow.fs.resolve_s3_region") as mock_s3_region_resolver:
mock_s3_region_resolver.side_effect = _s3_region_map

# The filesystem region is set by provided property by default (when bucket region cannot be resolved)
assert pyarrow_file_io.new_input("s3://non-exist-bucket/path/to/file")._filesystem.region == "ap-southeast-2"

# The filesystem region is overwritten by provided bucket region (when bucket region resolves to a different one)
for bucket_region in bucket_regions:
assert pyarrow_file_io.new_input(f"s3://{bucket_region[0]}/path/to/file")._filesystem.region == bucket_region[1]

0 comments on commit 8404e6b

Please sign in to comment.