Skip to content

Commit 682cdd4

Browse files
jackye1995claude
andauthored
fix: add dir_listing_to_manifest_migration_enabled flag to avoid extra object store calls (lance-format#6507)
## Summary - Adds `dir_listing_to_manifest_migration_enabled` flag (default: `false`) to `DirectoryNamespaceBuilder` and `DirectoryNamespace` - When `false` and both `manifest_enabled` and `dir_listing_enabled` are `true`, root-level table operations (`table_exists`, `describe_table`, `list_tables`) skip the manifest check and use directory listing directly, avoiding extra object store listing calls - When `true`, preserves the existing hybrid behavior of checking manifest first then falling back to directory listing - Includes a test with a counting object store wrapper verifying only a single `list_with_delimiter` call is made for root-level table operations without migration mode --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 0108b96 commit 682cdd4

4 files changed

Lines changed: 464 additions & 24 deletions

File tree

python/python/tests/test_namespace_dir.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -815,6 +815,7 @@ def test_connect_with_properties(self, use_custom):
815815
"root": f"memory://test_connect_{unique_id}",
816816
"manifest_enabled": "true",
817817
"dir_listing_enabled": "true",
818+
"dir_listing_to_manifest_migration_enabled": "true",
818819
}
819820

820821
# Connect via lance.namespace.connect

rust/lance-core/src/utils/testing.rs

Lines changed: 103 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,11 @@ use std::collections::HashMap;
1717
use std::fmt::Debug;
1818
use std::future;
1919
use std::ops::Range;
20-
use std::sync::{Arc, Mutex};
20+
use std::pin::Pin;
21+
use std::sync::{
22+
Arc, Mutex,
23+
atomic::{AtomicUsize, Ordering},
24+
};
2125

2226
// A policy function takes in the name of the operation (e.g. "put") and the location
2327
// that is being accessed / modified and returns an optional error.
@@ -121,6 +125,42 @@ impl std::fmt::Display for ProxyObjectStore {
121125
}
122126
}
123127

128+
/// An object store wrapper that counts listing operations.
129+
///
130+
/// This increments the shared counter for both `list` and `list_with_delimiter`
131+
/// so tests can observe all listing-based directory and version discovery calls.
132+
#[derive(Debug)]
133+
pub struct CountingObjectStore {
134+
target: Arc<dyn ObjectStore>,
135+
listing_count: Arc<AtomicUsize>,
136+
}
137+
138+
impl CountingObjectStore {
139+
pub fn new(target: Arc<dyn ObjectStore>, listing_count: Arc<AtomicUsize>) -> Self {
140+
Self {
141+
target,
142+
listing_count,
143+
}
144+
}
145+
146+
fn record_listing(&self) {
147+
self.listing_count.fetch_add(1, Ordering::SeqCst);
148+
}
149+
150+
fn delegate_list(
151+
&self,
152+
prefix: Option<&Path>,
153+
) -> Pin<Box<dyn futures::Stream<Item = OSResult<ObjectMeta>> + Send>> {
154+
self.target.list(prefix)
155+
}
156+
}
157+
158+
impl std::fmt::Display for CountingObjectStore {
159+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
160+
write!(f, "CountingObjectStore({})", self.target)
161+
}
162+
}
163+
124164
#[async_trait]
125165
impl ObjectStore for ProxyObjectStore {
126166
async fn put_opts(
@@ -204,3 +244,65 @@ impl ObjectStore for ProxyObjectStore {
204244
self.target.copy_if_not_exists(from, to).await
205245
}
206246
}
247+
248+
#[async_trait]
249+
impl ObjectStore for CountingObjectStore {
250+
async fn put_opts(
251+
&self,
252+
location: &Path,
253+
bytes: PutPayload,
254+
opts: PutOptions,
255+
) -> OSResult<PutResult> {
256+
self.target.put_opts(location, bytes, opts).await
257+
}
258+
259+
async fn put_multipart_opts(
260+
&self,
261+
location: &Path,
262+
opts: PutMultipartOptions,
263+
) -> OSResult<Box<dyn MultipartUpload>> {
264+
self.target.put_multipart_opts(location, opts).await
265+
}
266+
267+
async fn get_opts(&self, location: &Path, options: GetOptions) -> OSResult<GetResult> {
268+
self.target.get_opts(location, options).await
269+
}
270+
271+
async fn get_range(&self, location: &Path, range: Range<u64>) -> OSResult<Bytes> {
272+
self.target.get_range(location, range).await
273+
}
274+
275+
async fn get_ranges(&self, location: &Path, ranges: &[Range<u64>]) -> OSResult<Vec<Bytes>> {
276+
self.target.get_ranges(location, ranges).await
277+
}
278+
279+
async fn head(&self, location: &Path) -> OSResult<ObjectMeta> {
280+
self.target.head(location).await
281+
}
282+
283+
async fn delete(&self, location: &Path) -> OSResult<()> {
284+
self.target.delete(location).await
285+
}
286+
287+
fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, OSResult<ObjectMeta>> {
288+
self.record_listing();
289+
self.delegate_list(prefix).boxed()
290+
}
291+
292+
async fn list_with_delimiter(&self, prefix: Option<&Path>) -> OSResult<ListResult> {
293+
self.record_listing();
294+
self.target.list_with_delimiter(prefix).await
295+
}
296+
297+
async fn copy(&self, from: &Path, to: &Path) -> OSResult<()> {
298+
self.target.copy(from, to).await
299+
}
300+
301+
async fn rename(&self, from: &Path, to: &Path) -> OSResult<()> {
302+
self.target.rename(from, to).await
303+
}
304+
305+
async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> OSResult<()> {
306+
self.target.copy_if_not_exists(from, to).await
307+
}
308+
}

0 commit comments

Comments
 (0)