From 8404e6b9c36daf1a71316efededfe4d99bfe08d2 Mon Sep 17 00:00:00 2001 From: Jiakai Li Date: Tue, 24 Dec 2024 14:51:48 +1300 Subject: [PATCH] Add test case for `PyArrowFileIO.new_input` multi region --- tests/io/test_pyarrow.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tests/io/test_pyarrow.py b/tests/io/test_pyarrow.py index e960374f7..2c863611c 100644 --- a/tests/io/test_pyarrow.py +++ b/tests/io/test_pyarrow.py @@ -2108,3 +2108,28 @@ def test_pyarrow_file_io_fs_by_scheme_cache() -> None: filesystem_ap_southeast_2_cached = pyarrow_file_io.fs_by_scheme("s3", "ap-southeast-2-bucket") assert filesystem_ap_southeast_2_cached.region == ap_southeast_2_region assert pyarrow_file_io.fs_by_scheme.cache_info().hits == 2 # type: ignore + + +def test_pyarrow_io_new_input_multi_region() -> None: + bucket_regions = [ + ("us-east-2-bucket", "us-east-2"), + ("ap-southeast-2-bucket", "ap-southeast-2"), + ] + + def _s3_region_map(bucket: str) -> str: + for bucket_region in bucket_regions: + if bucket_region[0] == bucket: + return bucket_region[1] + raise OSError("Unknown bucket") + + # For one single pyarrow io instance with configured default s3 region + pyarrow_file_io = PyArrowFileIO({"s3.region": "ap-southeast-2"}) + with patch("pyarrow.fs.resolve_s3_region") as mock_s3_region_resolver: + mock_s3_region_resolver.side_effect = _s3_region_map + + # The filesystem region is set by provided property by default (when bucket region cannot be resolved) + assert pyarrow_file_io.new_input("s3://non-exist-bucket/path/to/file")._filesystem.region == "ap-southeast-2" + + # The filesystem region is overwritten by provided bucket region (when bucket region resolves to a different one) + for bucket_region in bucket_regions: + assert pyarrow_file_io.new_input(f"s3://{bucket_region[0]}/path/to/file")._filesystem.region == bucket_region[1]