33
44use futures:: StreamExt ;
55use futures:: TryStreamExt ;
6+ use futures:: stream;
67use futures:: stream:: BoxStream ;
78use vortex_error:: VortexResult ;
89use vortex_error:: vortex_bail;
@@ -22,14 +23,18 @@ impl dyn FileSystem + '_ {
2223 pub fn glob ( & self , pattern : & str ) -> VortexResult < BoxStream < ' _ , VortexResult < FileListing > > > {
2324 validate_glob ( pattern) ?;
2425
25- // If there are no glob characters, the pattern is an exact file path.
26- // Return it directly without listing the filesystem.
26+ // If there are no glob characters, the pattern is an exact file path. `list` enumerates
27+ // entries *under* a prefix on a path-segment basis and never yields the prefix itself, so
28+ // listing an exact path would report an existing file as missing (and could surface prefix
29+ // collisions such as `foo.vortex.backup` when the caller asked for `foo.vortex`). Use
30+ // `head` to confirm the file exists and capture its size, yielding a single-element stream
31+ // when it does and an empty stream when it does not.
2732 if !pattern. contains ( [ '*' , '?' , '[' ] ) {
28- let listing = FileListing {
29- path : pattern . to_string ( ) ,
30- size : None ,
31- } ;
32- return Ok ( futures :: stream:: once ( async { Ok ( listing ) } ) . boxed ( ) ) ;
33+ let pattern = pattern . to_string ( ) ;
34+ let stream = stream :: once ( async move { self . head ( & pattern ) . await } )
35+ . try_filter_map ( |listing| async move { Ok ( listing ) } )
36+ . boxed ( ) ;
37+ return Ok ( stream) ;
3338 }
3439
3540 let glob_pattern = glob:: Pattern :: new ( pattern)
@@ -93,14 +98,41 @@ mod tests {
9398 use crate :: VortexReadAt ;
9499 use crate :: filesystem:: FileSystem ;
95100
96- /// A mock filesystem that panics if `list` is called.
101+ /// A mock filesystem that resolves exact paths through [`head`](FileSystem::head) and
102+ /// panics if [`list`](FileSystem::list) is called. This encodes the invariant the fix
103+ /// depends on: the exact-path glob branch must never list, because an object store's `list`
104+ /// does not return the exact path of a file.
97105 #[ derive( Debug ) ]
98- struct NoListFileSystem ;
106+ struct HeadFileSystem {
107+ files : Vec < FileListing > ,
108+ }
109+
110+ impl HeadFileSystem {
111+ fn new ( files : & [ ( & str , u64 ) ] ) -> Self {
112+ Self {
113+ files : files
114+ . iter ( )
115+ . map ( |& ( path, size) | FileListing {
116+ path : path. to_string ( ) ,
117+ size : Some ( size) ,
118+ } )
119+ . collect ( ) ,
120+ }
121+ }
122+ }
99123
100124 #[ async_trait]
101- impl FileSystem for NoListFileSystem {
125+ impl FileSystem for HeadFileSystem {
102126 fn list ( & self , _prefix : & str ) -> BoxStream < ' _ , VortexResult < FileListing > > {
103- vortex_panic ! ( "list() should not be called for exact paths" )
127+ vortex_panic ! ( "list() must not be called for an exact path; glob should use head()" )
128+ }
129+
130+ async fn head ( & self , path : & str ) -> VortexResult < Option < FileListing > > {
131+ Ok ( self
132+ . files
133+ . iter ( )
134+ . find ( |listing| listing. path == path)
135+ . cloned ( ) )
104136 }
105137
106138 async fn open_read ( & self , _path : & str ) -> VortexResult < Arc < dyn VortexReadAt > > {
@@ -113,12 +145,43 @@ mod tests {
113145 }
114146
115147 #[ tokio:: test]
116- async fn test_glob_exact_path_skips_list ( ) -> VortexResult < ( ) > {
117- let fs: & dyn FileSystem = & NoListFileSystem ;
118- let results: Vec < FileListing > = fs. glob ( "data/file.vortex" ) ?. try_collect ( ) . await ?;
148+ async fn test_glob_exact_path_existing_returns_listing_with_size ( ) -> VortexResult < ( ) > {
149+ let fs = HeadFileSystem :: new ( & [ ( "data/file.vortex" , 1024 ) ] ) ;
150+ let fs_dyn: & dyn FileSystem = & fs;
151+ let results: Vec < FileListing > = fs_dyn. glob ( "data/file.vortex" ) ?. try_collect ( ) . await ?;
119152 assert_eq ! ( results. len( ) , 1 ) ;
120153 assert_eq ! ( results[ 0 ] . path, "data/file.vortex" ) ;
121- assert_eq ! ( results[ 0 ] . size, None ) ;
154+ assert_eq ! (
155+ results[ 0 ] . size,
156+ Some ( 1024 ) ,
157+ "exact-path glob should propagate the size reported by head"
158+ ) ;
159+ Ok ( ( ) )
160+ }
161+
162+ #[ tokio:: test]
163+ async fn test_glob_exact_path_missing_returns_empty_stream ( ) -> VortexResult < ( ) > {
164+ let fs = HeadFileSystem :: new ( & [ ] ) ;
165+ let fs_dyn: & dyn FileSystem = & fs;
166+ let results: Vec < FileListing > = fs_dyn. glob ( "data/missing.vortex" ) ?. try_collect ( ) . await ?;
167+ assert ! (
168+ results. is_empty( ) ,
169+ "missing exact path should yield an empty stream"
170+ ) ;
171+ Ok ( ( ) )
172+ }
173+
174+ #[ tokio:: test]
175+ async fn test_glob_exact_path_ignores_prefix_siblings ( ) -> VortexResult < ( ) > {
176+ // A real object store lists by prefix and would surface `foo.vortex.backup` when asked to
177+ // list `foo.vortex`. Resolving the exact path via head sidesteps that: only the requested
178+ // key is returned, and the panicking `list` proves the branch never enumerated.
179+ let fs = HeadFileSystem :: new ( & [ ( "foo.vortex" , 10 ) , ( "foo.vortex.backup" , 20 ) ] ) ;
180+ let fs_dyn: & dyn FileSystem = & fs;
181+ let results: Vec < FileListing > = fs_dyn. glob ( "foo.vortex" ) ?. try_collect ( ) . await ?;
182+ assert_eq ! ( results. len( ) , 1 ) ;
183+ assert_eq ! ( results[ 0 ] . path, "foo.vortex" ) ;
184+ assert_eq ! ( results[ 0 ] . size, Some ( 10 ) ) ;
122185 Ok ( ( ) )
123186 }
124187
0 commit comments