Skip to content

Commit ac10a1b

Browse files
authored
feat: support enable/disable auto-cleanup (#4037)
1 parent b936329 commit ac10a1b

3 files changed

Lines changed: 79 additions & 0 deletions

File tree

python/python/lance/dataset.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2697,6 +2697,17 @@ def update_config(self, upsert_values: Dict[str, str]) -> None:
26972697
"""
26982698
self._ds.update_config(upsert_values)
26992699

2700+
def delete_config_keys(self, keys: list[str]) -> None:
2701+
"""Delete specified configuration keys from the dataset.
2702+
2703+
Parameters
2704+
----------
2705+
keys : list of str
2706+
A list of configuration keys to remove from the dataset.
2707+
Non-existent keys will be silently ignored.
2708+
"""
2709+
self._ds.delete_config_keys(keys)
2710+
27002711
@property
27012712
def optimize(self) -> "DatasetOptimizer":
27022713
return DatasetOptimizer(self)
@@ -3836,6 +3847,29 @@ def optimize_indices(self, **kwargs):
38363847
"""
38373848
self._dataset._ds.optimize_indices(**kwargs)
38383849

3850+
def enable_auto_cleanup(self, auto_cleanup_config: AutoCleanupConfig, **kwargs):
3851+
"""Enable autocleaning for an existing dataset.
3852+
3853+
Parameters
3854+
----------
3855+
auto_cleanup_config: AutoCleanupConfig
3856+
Config options for automatic cleanup of the dataset.
3857+
If set, dataset's old versions will be automatically
3858+
cleaned up according to this parameter.
3859+
"""
3860+
self._dataset._ds.update_config(
3861+
{
3862+
"lance.auto_cleanup.interval": str(auto_cleanup_config["interval"]),
3863+
"lance.auto_cleanup.older_than": f"{auto_cleanup_config['older_than_seconds']}s", # noqa E501
3864+
}
3865+
)
3866+
3867+
def disable_auto_cleanup(self, **kwargs):
3868+
"""Disable autocleaning via delete related keys."""
3869+
self._dataset._ds.delete_config_keys(
3870+
["lance.auto_cleanup.interval", "lance.auto_cleanup.older_than"]
3871+
)
3872+
38393873

38403874
class Tags:
38413875
"""

python/python/tests/test_dataset.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1166,6 +1166,41 @@ def test_auto_cleanup_invalid(tmp_path):
11661166
assert len(dataset.versions()) == 4
11671167

11681168

1169+
def test_enable_disable_auto_cleanup(tmp_path):
1170+
table = pa.Table.from_pydict({"a": range(100), "b": range(100)})
1171+
base_dir = tmp_path / "test"
1172+
ds = lance.write_dataset(table, base_dir, mode="create")
1173+
auto_cleanup_options = AutoCleanupConfig(
1174+
interval=1,
1175+
older_than_seconds=1,
1176+
)
1177+
# enable auto cleanup
1178+
ds.optimize.enable_auto_cleanup(auto_cleanup_options)
1179+
lance.write_dataset(table, base_dir, mode="append")
1180+
lance.write_dataset(table, base_dir, mode="append")
1181+
lance.write_dataset(table, base_dir, mode="append")
1182+
1183+
time.sleep(5)
1184+
1185+
# trigger cleanup
1186+
lance.write_dataset(table, base_dir, mode="append")
1187+
assert len(ds.versions()) == 2
1188+
1189+
# this is a transactional commit, so will increase a version
1190+
ds.optimize.disable_auto_cleanup()
1191+
1192+
lance.write_dataset(table, base_dir, mode="append")
1193+
lance.write_dataset(table, base_dir, mode="append")
1194+
lance.write_dataset(table, base_dir, mode="append")
1195+
1196+
time.sleep(5)
1197+
1198+
# wait to see if cleanup would be trigger
1199+
lance.write_dataset(table, base_dir, mode="append")
1200+
1201+
assert len(ds.versions()) == 7
1202+
1203+
11691204
def test_create_from_commit(tmp_path: Path):
11701205
table = pa.Table.from_pydict({"a": range(100), "b": range(100)})
11711206
base_dir = tmp_path / "test"

python/src/dataset.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1768,6 +1768,16 @@ impl Dataset {
17681768
self.ds = Arc::new(new_self);
17691769
Ok(())
17701770
}
1771+
1772+
#[pyo3(signature = (keys))]
1773+
fn delete_config_keys(&mut self, keys: Vec<String>) -> PyResult<()> {
1774+
let key_refs: Vec<&str> = keys.iter().map(|k| k.as_str()).collect();
1775+
let mut new_self = self.ds.as_ref().clone();
1776+
RT.block_on(None, new_self.delete_config_keys(&key_refs))?
1777+
.map_err(|err| PyIOError::new_err(err.to_string()))?;
1778+
self.ds = Arc::new(new_self);
1779+
Ok(())
1780+
}
17711781
}
17721782

17731783
#[derive(FromPyObject)]

0 commit comments

Comments
 (0)