Skip to content

Commit 2091f00

Browse files
committed
Added cli command to delete file.
1 parent 5ee0ca6 commit 2091f00

File tree

2 files changed

+91
-1
lines changed

2 files changed

+91
-1
lines changed

pyiceberg/cli/console.py

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
from pyiceberg.cli.output import ConsoleOutput, JsonOutput, Output
3232
from pyiceberg.exceptions import NoSuchNamespaceError, NoSuchPropertyException, NoSuchTableError
3333
from pyiceberg.table import TableProperties
34-
from pyiceberg.table.refs import SnapshotRef, SnapshotRefType
34+
from pyiceberg.table.refs import MAIN_BRANCH, SnapshotRef, SnapshotRefType
3535
from pyiceberg.utils.properties import property_as_int
3636

3737

@@ -179,6 +179,59 @@ def files(ctx: Context, identifier: str, history: bool) -> None:
179179
output.files(catalog_table, history)
180180

181181

182+
@run.command("delete-files")
183+
@click.argument("identifier")
184+
@click.argument("file_paths", nargs=-1)
185+
@click.option("--branch", default=None, help="Branch to delete files from (default: main).")
186+
@click.option(
187+
"--property",
188+
"-p",
189+
"properties",
190+
multiple=True,
191+
help="Snapshot property key=value (repeatable).",
192+
)
193+
@click.pass_context
194+
@catch_exception()
195+
def delete_files(
196+
ctx: Context,
197+
identifier: str,
198+
file_paths: tuple[str, ...],
199+
branch: str | None,
200+
properties: tuple[str, ...],
201+
) -> None:
202+
203+
204+
"""
205+
Remove one or more data files from the table by path
206+
"""
207+
208+
"""Remove one or more data files from the table by path."""
209+
if not file_paths:
210+
raise click.UsageError("At least one file path is required.")
211+
212+
catalog, output = _catalog_and_output(ctx)
213+
214+
snapshot_properties: dict[str, str] = {}
215+
for prop in properties:
216+
if "=" not in prop:
217+
raise click.UsageError(f"Property must be in key=value form, got: {prop!r}")
218+
key, _, value = prop.partition("=")
219+
snapshot_properties[key] = value
220+
221+
table = catalog.load_table(identifier)
222+
223+
file_paths_list = []
224+
for item in file_paths:
225+
file_paths_list.append(item)
226+
227+
table.delete_files(
228+
file_paths=list(file_paths),
229+
branch=branch or MAIN_BRANCH,
230+
snapshot_properties=snapshot_properties
231+
)
232+
output.text(f"Deleted {len(file_paths)} file(s) from {identifier}")
233+
234+
182235
@run.command()
183236
@click.argument("identifier")
184237
@click.pass_context
@@ -470,3 +523,7 @@ def _retention_properties(ref: SnapshotRef, table_properties: dict[str, str]) ->
470523
retention_properties["max_ref_age_ms"] = str(ref.max_ref_age_ms) if ref.max_ref_age_ms else "forever"
471524

472525
return retention_properties
526+
527+
528+
if __name__ == "__main__":
529+
run()

tests/cli/test_console.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1029,3 +1029,36 @@ def test_log_level_cli_overrides_env(mocker: MockFixture) -> None:
10291029
mock_basicConfig.assert_called_once()
10301030
call_kwargs = mock_basicConfig.call_args[1]
10311031
assert call_kwargs["level"] == logging.ERROR
1032+
1033+
def test_delete_files_requires_at_least_one_path(catalog: InMemoryCatalog) -> None:
1034+
runner = CliRunner()
1035+
result = runner.invoke(run, ["delete-files", "default.my_table"])
1036+
assert result.exit_code == 2
1037+
out = (result.output or "") + (getattr(result, "stderr", "") or "")
1038+
assert "file path" in out.lower() or "At least one" in out
1039+
1040+
1041+
def test_delete_files_invalid_property_format(catalog: InMemoryCatalog, mocker: MockFixture) -> None:
1042+
catalog.create_namespace(TEST_TABLE_NAMESPACE)
1043+
catalog.create_table(
1044+
identifier=TEST_TABLE_IDENTIFIER,
1045+
schema=TEST_TABLE_SCHEMA,
1046+
partition_spec=TEST_TABLE_PARTITION_SPEC,
1047+
)
1048+
runner = CliRunner()
1049+
result = runner.invoke(
1050+
run,
1051+
["delete-files", "default.my_table", "s3://bucket/file.parquet", "--property", "invalid_no_equals"],
1052+
)
1053+
assert result.exit_code == 2
1054+
out = (result.output or "") + (getattr(result, "stderr", "") or "")
1055+
assert "key=value" in out or "invalid_no_equals" in out
1056+
1057+
1058+
def test_delete_files_table_does_not_exist(catalog: InMemoryCatalog) -> None:
1059+
catalog.create_namespace(TEST_TABLE_NAMESPACE)
1060+
runner = CliRunner()
1061+
result = runner.invoke(run, ["delete-files", "default.doesnotexist", "s3://bucket/file.parquet"])
1062+
assert result.exit_code == 1
1063+
out = (result.output or "") + (getattr(result, "stderr", "") or "")
1064+
assert "default.doesnotexist" in out and ("Table does not exist" in out or "does not exist" in out)

0 commit comments

Comments
 (0)