Skip to content

Commit 7e2c890

Browse files
committed
FileIO must use absolute paths.
1 parent 4cac691 commit 7e2c890

3 files changed

Lines changed: 25 additions & 2 deletions

File tree

pyiceberg/io/fsspec.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,10 @@ def new_input(self, location: str) -> FsspecInputFile:
351351
FsspecInputFile: An FsspecInputFile instance for the given location.
352352
"""
353353
uri = urlparse(location)
354+
if uri.scheme in ("", "file"):
355+
path_to_check = uri.path if uri.scheme else location
356+
if not os.path.isabs(path_to_check):
357+
raise ValueError(f"FileIO implementation for local files requires absolute paths: {location}")
354358
fs = self.get_fs(uri.scheme)
355359
return FsspecInputFile(location=location, fs=fs)
356360

@@ -364,6 +368,10 @@ def new_output(self, location: str) -> FsspecOutputFile:
364368
FsspecOutputFile: An FsspecOutputFile instance for the given location.
365369
"""
366370
uri = urlparse(location)
371+
if uri.scheme in ("", "file"):
372+
path_to_check = uri.path if uri.scheme else location
373+
if not os.path.isabs(path_to_check):
374+
raise ValueError(f"FileIO implementation for local files requires absolute paths: {location}")
367375
fs = self.get_fs(uri.scheme)
368376
return FsspecOutputFile(location=location, fs=fs)
369377

pyiceberg/io/pyarrow.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -391,7 +391,13 @@ def parse_location(location: str) -> Tuple[str, str, str]:
391391
"""Return the path without the scheme."""
392392
uri = urlparse(location)
393393
if not uri.scheme:
394-
return "file", uri.netloc, os.path.abspath(location)
394+
if not os.path.isabs(location):
395+
raise ValueError(f"FileIO implementation for local files requires absolute paths: {location}")
396+
return "file", uri.netloc, location
397+
elif uri.scheme == "file":
398+
if not os.path.isabs(uri.path):
399+
raise ValueError(f"FileIO implementation for local files requires absolute paths: {location}")
400+
return uri.scheme, uri.netloc, uri.path
395401
elif uri.scheme in ("hdfs", "viewfs"):
396402
return uri.scheme, uri.netloc, uri.path
397403
else:

tests/io/test_io.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,6 @@ def test_output_file_to_input_file() -> None:
170170
@pytest.mark.parametrize(
171171
"string_uri",
172172
[
173-
"foo/bar/baz.parquet",
174173
"file:/foo/bar/baz.parquet",
175174
"file:/foo/bar/baz.parquet",
176175
],
@@ -185,6 +184,16 @@ def test_custom_file_io_locations(string_uri: str) -> None:
185184
output_file = file_io.new_output(location=string_uri)
186185
assert output_file.location == string_uri
187186

187+
def test_custom_file_io_location_relative_path() -> None:
188+
string_uri = "foo/bar/baz.parquet"
189+
# Instantiate the file-io and create a new input and output file
190+
file_io = PyArrowFileIO()
191+
with pytest.raises(ValueError) as exc_info:
192+
file_io.new_input(location=string_uri)
193+
194+
assert "FileIO implementation for local files requires absolute paths" in str(exc_info.value)
195+
196+
188197

189198
def test_deleting_local_file_using_file_io() -> None:
190199
"""Test deleting a local file using FileIO.delete(...)"""

0 commit comments

Comments
 (0)