Skip to content

Commit 996978a

Browse files
authored
fix(fsspec): _info() should honor self.dircache (#663)
1 parent 52cb32b commit 996978a

2 files changed

Lines changed: 94 additions & 0 deletions

File tree

obstore/python/obstore/fsspec.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,16 @@ async def _get_file(self, rpath: str, lpath: str, **_kwargs: Any) -> None:
487487
await self._local_store._pipe_file(lpath, resp) # noqa: SLF001
488488

489489
async def _info(self, path: str, **_kwargs: Any) -> dict[str, Any]:
490+
# Consult `self.dircache` before issuing a HEAD request. An empty
491+
# filter means the cache has evidence `path` is a directory but no
492+
# entry under that exact name, so we return a synthetic directory.
493+
cached = self._ls_from_cache(path)
494+
if cached is not None:
495+
match = next((entry for entry in cached if entry["name"] == path), None)
496+
if match is not None:
497+
return match
498+
return {"name": path, "size": 0, "type": "directory"}
499+
490500
bucket, path_no_bucket = self._split_path(path)
491501
store = self._construct_store(bucket)
492502

tests/test_fsspec.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,90 @@ def test_register():
7171
assert issubclass(fsspec.get_filesystem_class("abfs"), FsspecStore)
7272

7373

74+
@pytest.mark.asyncio
75+
async def test_info_returns_cached_entry_without_constructing_store():
76+
register("file")
77+
fs: FsspecStore = fsspec.filesystem("file", asynchronous=True)
78+
79+
cached_entry = {
80+
"name": "bucket/file.parquet",
81+
"size": 12345,
82+
"type": "file",
83+
"e_tag": None,
84+
"last_modified": None,
85+
"version": None,
86+
}
87+
fs.dircache["bucket"] = [cached_entry]
88+
89+
with patch.object(fs, "_construct_store") as mock_construct:
90+
result = await fs._info("bucket/file.parquet")
91+
92+
assert result == cached_entry
93+
assert mock_construct.call_count == 0
94+
95+
96+
@pytest.mark.asyncio
97+
async def test_info_falls_through_to_head_on_dircache_miss():
98+
register("file")
99+
fs: FsspecStore = fsspec.filesystem("file", asynchronous=True)
100+
assert fs.dircache == {}
101+
102+
with patch.object(fs, "_construct_store") as mock_construct:
103+
mock_construct.side_effect = RuntimeError("HEAD path entered")
104+
with pytest.raises(RuntimeError, match="HEAD path entered"):
105+
await fs._info("bucket/some/file.parquet")
106+
107+
assert mock_construct.call_count == 1
108+
109+
110+
@pytest.mark.asyncio
111+
async def test_info_raises_filenotfound_when_parent_cached_and_child_absent():
112+
register("file")
113+
fs: FsspecStore = fsspec.filesystem("file", asynchronous=True)
114+
fs.dircache["bucket"] = [
115+
{"name": "bucket/other.parquet", "type": "file", "size": 1},
116+
]
117+
118+
with (
119+
patch.object(fs, "_construct_store") as mock_construct,
120+
pytest.raises(FileNotFoundError),
121+
):
122+
await fs._info("bucket/missing.parquet")
123+
124+
assert mock_construct.call_count == 0
125+
126+
127+
@pytest.mark.asyncio
128+
async def test_info_synthesizes_directory_when_path_is_cached_dir_key():
129+
register("file")
130+
fs: FsspecStore = fsspec.filesystem("file", asynchronous=True)
131+
fs.dircache["bucket/sub"] = [
132+
{"name": "bucket/sub/child1", "type": "file", "size": 1},
133+
{"name": "bucket/sub/child2", "type": "file", "size": 2},
134+
]
135+
136+
with patch.object(fs, "_construct_store") as mock_construct:
137+
result = await fs._info("bucket/sub")
138+
139+
assert result == {"name": "bucket/sub", "size": 0, "type": "directory"}
140+
assert mock_construct.call_count == 0
141+
142+
143+
@pytest.mark.asyncio
144+
async def test_info_synthesizes_directory_for_trailing_slash_query():
145+
register("file")
146+
fs: FsspecStore = fsspec.filesystem("file", asynchronous=True)
147+
fs.dircache["bucket"] = [
148+
{"name": "bucket/sub", "type": "directory", "size": 0},
149+
]
150+
151+
with patch.object(fs, "_construct_store") as mock_construct:
152+
result = await fs._info("bucket/sub/")
153+
154+
assert result == {"name": "bucket/sub/", "size": 0, "type": "directory"}
155+
assert mock_construct.call_count == 0
156+
157+
74158
def test_construct_store_cache_diff_bucket_name(
75159
minio_bucket: tuple[S3Config, ClientConfig],
76160
):

0 commit comments

Comments
 (0)