Skip to content

Commit 731075a

Browse files
author
Tom McCormick
committed
use catalog env configs and update to use default scheme and netloc from properties
1 parent cd939c7 commit 731075a

File tree

2 files changed

+24
-30
lines changed

2 files changed

+24
-30
lines changed

pyiceberg/io/pyarrow.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -392,7 +392,7 @@ def __init__(self, properties: Properties = EMPTY_DICT):
392392
super().__init__(properties=properties)
393393

394394
@staticmethod
395-
def parse_location(location: str) -> Tuple[str, str, str]:
395+
def parse_location(location: str, properties: Properties=EMPTY_DICT) -> Tuple[str, str, str]:
396396
"""Return (scheme, netloc, path) for the given location.
397397
398398
Uses environment variables DEFAULT_SCHEME and DEFAULT_NETLOC
@@ -401,8 +401,8 @@ def parse_location(location: str) -> Tuple[str, str, str]:
401401
uri = urlparse(location)
402402

403403
# Load defaults from environment
404-
default_scheme = os.getenv("DEFAULT_SCHEME", "file")
405-
default_netloc = os.getenv("DEFAULT_NETLOC", "")
404+
default_scheme = properties.get("DEFAULT_SCHEME", "file")
405+
default_netloc = properties.get("DEFAULT_NETLOC", "")
406406

407407
# Apply logic
408408
scheme = uri.scheme or default_scheme
@@ -629,7 +629,7 @@ def new_input(self, location: str) -> PyArrowFile:
629629
Returns:
630630
PyArrowFile: A PyArrowFile instance for the given location.
631631
"""
632-
scheme, netloc, path = self.parse_location(location)
632+
scheme, netloc, path = self.parse_location(location, self.properties)
633633
return PyArrowFile(
634634
fs=self.fs_by_scheme(scheme, netloc),
635635
location=location,
@@ -646,7 +646,7 @@ def new_output(self, location: str) -> PyArrowFile:
646646
Returns:
647647
PyArrowFile: A PyArrowFile instance for the given location.
648648
"""
649-
scheme, netloc, path = self.parse_location(location)
649+
scheme, netloc, path = self.parse_location(location, self.properties)
650650
return PyArrowFile(
651651
fs=self.fs_by_scheme(scheme, netloc),
652652
location=location,
@@ -667,7 +667,7 @@ def delete(self, location: Union[str, InputFile, OutputFile]) -> None:
667667
an AWS error code 15.
668668
"""
669669
str_location = location.location if isinstance(location, (InputFile, OutputFile)) else location
670-
scheme, netloc, path = self.parse_location(str_location)
670+
scheme, netloc, path = self.parse_location(str_location, self.properties)
671671
fs = self.fs_by_scheme(scheme, netloc)
672672

673673
try:

tests/io/test_pyarrow.py

Lines changed: 18 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2798,27 +2798,21 @@ def test_parse_location_environment_defaults():
27982798
assert netloc == ""
27992799
assert path == "/foo/bar"
28002800

2801-
try:
2802-
# Test with environment variables set
2803-
os.environ["DEFAULT_SCHEME"] = "scheme"
2804-
os.environ["DEFAULT_NETLOC"] = "netloc:8000"
2805-
2806-
scheme, netloc, path = PyArrowFileIO.parse_location("/foo/bar")
2807-
assert scheme == "scheme"
2808-
assert netloc == "netloc:8000"
2809-
assert path == "netloc:8000/foo/bar"
2810-
2811-
# Set environment variables
2812-
os.environ["DEFAULT_SCHEME"] = "hdfs"
2813-
os.environ["DEFAULT_NETLOC"] = "netloc:8000"
2814-
2815-
scheme, netloc, path = PyArrowFileIO.parse_location("/foo/bar")
2816-
assert scheme == "hdfs"
2817-
assert netloc == "netloc:8000"
2818-
assert path == "/foo/bar"
2819-
finally:
2820-
# Clean up environment variables
2821-
if "DEFAULT_SCHEME" in os.environ:
2822-
del os.environ["DEFAULT_SCHEME"]
2823-
if "DEFAULT_NETLOC" in os.environ:
2824-
del os.environ["DEFAULT_NETLOC"]
2801+
# Test with properties set
2802+
properties = dict()
2803+
properties["DEFAULT_SCHEME"] = "scheme"
2804+
properties["DEFAULT_NETLOC"] = "netloc:8000"
2805+
2806+
scheme, netloc, path = PyArrowFileIO.parse_location("/foo/bar", properties=properties)
2807+
assert scheme == "scheme"
2808+
assert netloc == "netloc:8000"
2809+
assert path == "netloc:8000/foo/bar"
2810+
2811+
# Set properties
2812+
properties["DEFAULT_SCHEME"] = "hdfs"
2813+
properties["DEFAULT_NETLOC"] = "netloc:8000"
2814+
2815+
scheme, netloc, path = PyArrowFileIO.parse_location("/foo/bar", properties=properties)
2816+
assert scheme == "hdfs"
2817+
assert netloc == "netloc:8000"
2818+
assert path == "/foo/bar"

0 commit comments

Comments
 (0)