Skip to content

Commit

Permalink
Support execution in Windows using Local File System and NFS
Browse files Browse the repository at this point in the history
  • Loading branch information
rfung-tla committed Aug 4, 2024
1 parent 846713b commit 2608578
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 0 deletions.
8 changes: 8 additions & 0 deletions pyiceberg/io/pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import itertools
import logging
import os
import platform
import re
import uuid
from abc import ABC, abstractmethod
Expand All @@ -55,6 +56,7 @@
cast,
)
from urllib.parse import urlparse
from urllib.request import url2pathname

import numpy as np
import pyarrow as pa
Expand Down Expand Up @@ -339,6 +341,12 @@ def parse_location(location: str) -> Tuple[str, str, str]:
return "file", uri.netloc, os.path.abspath(location)
elif uri.scheme in ("hdfs", "viewfs"):
return uri.scheme, uri.netloc, uri.path
elif uri.scheme and uri.scheme.lower() in "abcdefghijklmnopqrstuvwxyz" and platform.system() == "Windows":
return "file", "", os.path.abspath(location)
elif uri.scheme == "file" and platform.system() == "Windows":
netloc = rf"\\{uri.netloc}" if uri.netloc else uri.netloc
path = url2pathname(f"{netloc}{uri.path}")
return uri.scheme, uri.netloc, path
else:
return uri.scheme, uri.netloc, f"{uri.netloc}{uri.path}"

Expand Down
12 changes: 12 additions & 0 deletions tests/io/test_pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
# pylint: disable=protected-access,unused-argument,redefined-outer-name

import os
import platform
import tempfile
import uuid
from datetime import date
Expand Down Expand Up @@ -1703,6 +1704,17 @@ def check_results(location: str, expected_schema: str, expected_netloc: str, exp

check_results("/root/foo.txt", "file", "", "/root/foo.txt")
check_results("/root/tmp/foo.txt", "file", "", "/root/tmp/foo.txt")
with (patch("pyiceberg.io.pyarrow.platform") as mock_platform,
patch("pyiceberg.io.pyarrow.os") as mock_os,
patch("pyiceberg.io.pyarrow.url2pathname") as mock_url2pathname):
windows_paths = [r"\\nfs_server\root\tmp\foo.txt", r"C:\root\tmp\foo.txt"]
mock_platform.system.return_value = "Windows"
mock_os.path.abspath.side_effect = windows_paths
mock_url2pathname.side_effect = windows_paths
check_results(r"\\nfs_server\root\tmp\foo.txt", "file", "", r"\\nfs_server\root\tmp\foo.txt")
check_results(r"file://nfs_server/root/tmp/foo.txt", "file", "nfs_server", r"\\nfs_server\root\tmp\foo.txt")
check_results(r"C:\root\tmp\foo.txt", "file", "", r"C:\root\tmp\foo.txt")
check_results(r"file:///C:/root/tmp/foo.txt", "file", "", r"C:\root\tmp\foo.txt")


def test_make_compatible_name() -> None:
Expand Down

0 comments on commit 2608578

Please sign in to comment.