From 8404e6b9c36daf1a71316efededfe4d99bfe08d2 Mon Sep 17 00:00:00 2001
From: Jiakai Li
Date: Tue, 24 Dec 2024 14:51:48 +1300
Subject: [PATCH] Add test case for `PyArrowFileIO.new_input` multi region
---
tests/io/test_pyarrow.py | 25 +++++++++++++++++++++++++
1 file changed, 25 insertions(+)
diff --git a/tests/io/test_pyarrow.py b/tests/io/test_pyarrow.py
index e960374f7..2c863611c 100644
--- a/tests/io/test_pyarrow.py
+++ b/tests/io/test_pyarrow.py
@@ -2108,3 +2108,28 @@ def test_pyarrow_file_io_fs_by_scheme_cache() -> None:
filesystem_ap_southeast_2_cached = pyarrow_file_io.fs_by_scheme("s3", "ap-southeast-2-bucket")
assert filesystem_ap_southeast_2_cached.region == ap_southeast_2_region
assert pyarrow_file_io.fs_by_scheme.cache_info().hits == 2 # type: ignore
+
+
+def test_pyarrow_io_new_input_multi_region() -> None:
+ bucket_regions = [
+ ("us-east-2-bucket", "us-east-2"),
+ ("ap-southeast-2-bucket", "ap-southeast-2"),
+ ]
+
+ def _s3_region_map(bucket: str) -> str:
+ for bucket_region in bucket_regions:
+ if bucket_region[0] == bucket:
+ return bucket_region[1]
+ raise OSError("Unknown bucket")
+
+ # For one single pyarrow io instance with configured default s3 region
+ pyarrow_file_io = PyArrowFileIO({"s3.region": "ap-southeast-2"})
+ with patch("pyarrow.fs.resolve_s3_region") as mock_s3_region_resolver:
+ mock_s3_region_resolver.side_effect = _s3_region_map
+
+ # The filesystem region is set by provided property by default (when bucket region cannot be resolved)
+ assert pyarrow_file_io.new_input("s3://non-exist-bucket/path/to/file")._filesystem.region == "ap-southeast-2"
+
+ # The filesystem region is overwritten by provided bucket region (when bucket region resolves to a different one)
+ for bucket_region in bucket_regions:
+ assert pyarrow_file_io.new_input(f"s3://{bucket_region[0]}/path/to/file")._filesystem.region == bucket_region[1]