Skip to content

Commit f4d16f2

Browse files
author
Tom McCormick
committed
add get_str config support and get val from config
1 parent ae22e64 commit f4d16f2

File tree

3 files changed

+26
-18
lines changed

3 files changed

+26
-18
lines changed

pyiceberg/io/pyarrow.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -381,13 +381,15 @@ def to_input_file(self) -> PyArrowFile:
381381

382382
class PyArrowFileIO(FileIO):
383383
fs_by_scheme: Callable[[str, Optional[str]], FileSystem]
384+
config: Config
384385

385386
def __init__(self, properties: Properties = EMPTY_DICT):
386387
self.fs_by_scheme: Callable[[str, Optional[str]], FileSystem] = lru_cache(self._initialize_fs)
388+
self.config = Config()
387389
super().__init__(properties=properties)
388390

389391
@staticmethod
390-
def parse_location(location: str) -> Tuple[str, str, str]:
392+
def parse_location(location: str, config: Config) -> Tuple[str, str, str]:
391393
"""Return (scheme, netloc, path) for the given location.
392394
393395
Uses environment variables DEFAULT_SCHEME and DEFAULT_NETLOC
@@ -396,8 +398,8 @@ def parse_location(location: str) -> Tuple[str, str, str]:
396398
uri = urlparse(location)
397399

398400
# Load defaults from environment
399-
default_scheme = os.getenv("DEFAULT_SCHEME", "file")
400-
default_netloc = os.getenv("DEFAULT_NETLOC", "")
401+
default_scheme = config.get_str("default-scheme") or "file"
402+
default_netloc = config.get_str("default-netloc") or ""
401403

402404
# Apply logic
403405
scheme = uri.scheme or default_scheme
@@ -599,7 +601,7 @@ def new_input(self, location: str) -> PyArrowFile:
599601
Returns:
600602
PyArrowFile: A PyArrowFile instance for the given location.
601603
"""
602-
scheme, netloc, path = self.parse_location(location)
604+
scheme, netloc, path = self.parse_location(location, self.config)
603605
return PyArrowFile(
604606
fs=self.fs_by_scheme(scheme, netloc),
605607
location=location,
@@ -616,7 +618,7 @@ def new_output(self, location: str) -> PyArrowFile:
616618
Returns:
617619
PyArrowFile: A PyArrowFile instance for the given location.
618620
"""
619-
scheme, netloc, path = self.parse_location(location)
621+
scheme, netloc, path = self.parse_location(location, self.config)
620622
return PyArrowFile(
621623
fs=self.fs_by_scheme(scheme, netloc),
622624
location=location,
@@ -637,7 +639,7 @@ def delete(self, location: Union[str, InputFile, OutputFile]) -> None:
637639
an AWS error code 15.
638640
"""
639641
str_location = location.location if isinstance(location, (InputFile, OutputFile)) else location
640-
scheme, netloc, path = self.parse_location(str_location)
642+
scheme, netloc, path = self.parse_location(str_location, self.config)
641643
fs = self.fs_by_scheme(scheme, netloc)
642644

643645
try:

pyiceberg/utils/config.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,3 +180,8 @@ def get_bool(self, key: str) -> Optional[bool]:
180180
except ValueError as err:
181181
raise ValueError(f"{key} should be a boolean or left unset. Current value: {val}") from err
182182
return None
183+
184+
def get_str(self, key: str) -> Optional[str]:
185+
if (val := self.config.get(key)) is not None:
186+
return val
187+
return None

tests/io/test_pyarrow.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@
106106
TimestamptzType,
107107
TimeType,
108108
)
109+
from pyiceberg.utils.config import Config
109110
from tests.catalog.test_base import InMemoryCatalog
110111
from tests.conftest import UNIFIED_AWS_SESSION_PROPERTIES
111112

@@ -2024,7 +2025,7 @@ def test_writing_avro_file_adls(generated_manifest_entry_file: str, pyarrow_file
20242025

20252026
def test_parse_location() -> None:
20262027
def check_results(location: str, expected_schema: str, expected_netloc: str, expected_uri: str) -> None:
2027-
schema, netloc, uri = PyArrowFileIO.parse_location(location)
2028+
schema, netloc, uri = PyArrowFileIO.parse_location(location, Config())
20282029
assert schema == expected_schema
20292030
assert netloc == expected_netloc
20302031
assert uri == expected_uri
@@ -2647,32 +2648,32 @@ def test_parse_location_environment_defaults() -> None:
26472648
from pyiceberg.io.pyarrow import PyArrowFileIO
26482649

26492650
# Test with default environment (no env vars set)
2650-
scheme, netloc, path = PyArrowFileIO.parse_location("/foo/bar")
2651+
scheme, netloc, path = PyArrowFileIO.parse_location("/foo/bar", Config())
26512652
assert scheme == "file"
26522653
assert netloc == ""
26532654
assert path == "/foo/bar"
26542655

26552656
try:
26562657
# Test with environment variables set
2657-
os.environ["DEFAULT_SCHEME"] = "scheme"
2658-
os.environ["DEFAULT_NETLOC"] = "netloc:8000"
2658+
os.environ["PYICEBERG_DEFAULT_SCHEME"] = "scheme"
2659+
os.environ["PYICEBERG_DEFAULT_NETLOC"] = "netloc:8000"
26592660

2660-
scheme, netloc, path = PyArrowFileIO.parse_location("/foo/bar")
2661+
scheme, netloc, path = PyArrowFileIO.parse_location("/foo/bar", Config())
26612662
assert scheme == "scheme"
26622663
assert netloc == "netloc:8000"
26632664
assert path == "netloc:8000/foo/bar"
26642665

26652666
# Set environment variables
2666-
os.environ["DEFAULT_SCHEME"] = "hdfs"
2667-
os.environ["DEFAULT_NETLOC"] = "netloc:8000"
2667+
os.environ["PYICEBERG_DEFAULT_SCHEME"] = "hdfs"
2668+
os.environ["PYICEBERG_DEFAULT_NETLOC"] = "netloc:8000"
26682669

2669-
scheme, netloc, path = PyArrowFileIO.parse_location("/foo/bar")
2670+
scheme, netloc, path = PyArrowFileIO.parse_location("/foo/bar", Config())
26702671
assert scheme == "hdfs"
26712672
assert netloc == "netloc:8000"
26722673
assert path == "/foo/bar"
26732674
finally:
26742675
# Clean up environment variables
2675-
if "DEFAULT_SCHEME" in os.environ:
2676-
del os.environ["DEFAULT_SCHEME"]
2677-
if "DEFAULT_NETLOC" in os.environ:
2678-
del os.environ["DEFAULT_NETLOC"]
2676+
if "PYICEBERG_DEFAULT_SCHEME" in os.environ:
2677+
del os.environ["PYICEBERG_DEFAULT_SCHEME"]
2678+
if "PYICEBERG_DEFAULT_NETLOC" in os.environ:
2679+
del os.environ["PYICEBERG_DEFAULT_NETLOC"]

0 commit comments

Comments
 (0)