Skip to content

Commit b40849e

Browse files
author
Tom McCormick
committed
fix file system with env variables to set scheme and net loc if not specified in file path
1 parent 904c0b7 commit b40849e

File tree

1 file changed

+20
-6
lines changed

1 file changed

+20
-6
lines changed

pyiceberg/io/pyarrow.py

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -388,14 +388,28 @@ def __init__(self, properties: Properties = EMPTY_DICT):
388388

389389
@staticmethod
390390
def parse_location(location: str) -> Tuple[str, str, str]:
391-
"""Return the path without the scheme."""
391+
"""Return (scheme, netloc, path) for the given location.
392+
Uses environment variables DEFAULT_SCHEME and DEFAULT_NETLOC
393+
if scheme/netloc are missing.
394+
"""
392395
uri = urlparse(location)
393-
if not uri.scheme:
394-
return "file", uri.netloc, os.path.abspath(location)
395-
elif uri.scheme in ("hdfs", "viewfs"):
396-
return uri.scheme, uri.netloc, uri.path
396+
397+
# Load defaults from environment
398+
default_scheme = os.getenv("DEFAULT_SCHEME", "file")
399+
default_netloc = os.getenv("DEFAULT_NETLOC", "")
400+
401+
# Apply logic
402+
scheme = uri.scheme or default_scheme
403+
netloc = uri.netloc or default_netloc
404+
405+
if scheme in ("hdfs", "viewfs"):
406+
return scheme, netloc, uri.path
397407
else:
398-
return uri.scheme, uri.netloc, f"{uri.netloc}{uri.path}"
408+
# For non-HDFS URIs, include netloc in the path if present
409+
path = uri.path if uri.scheme else os.path.abspath(location)
410+
if netloc and not path.startswith(netloc):
411+
path = f"{netloc}{path}"
412+
return scheme, netloc, path
399413

400414
def _initialize_fs(self, scheme: str, netloc: Optional[str] = None) -> FileSystem:
401415
"""Initialize FileSystem for different scheme."""

0 commit comments

Comments
 (0)