|
21 | 21 | from unittest import mock |
22 | 22 | from unittest.mock import MagicMock |
23 | 23 |
|
| 24 | +import pyarrow as pa |
24 | 25 | import pytest |
25 | 26 | from click.testing import CliRunner |
26 | 27 | from pytest_mock import MockFixture |
@@ -1071,3 +1072,118 @@ def test_warehouse_cli_option_forwarded_to_catalog(mocker: MockFixture) -> None: |
1071 | 1072 | assert result.exit_code == 0 |
1072 | 1073 | mock_basicConfig.assert_called_once() |
1073 | 1074 | mock_load_catalog.assert_called_once_with("rest", uri="https://catalog.service", warehouse="example-warehouse") |
| 1075 | + |
| 1076 | + |
| 1077 | +def _create_table_with_expirable_snapshot(catalog: InMemoryCatalog) -> int: |
| 1078 | + """Create a table with two snapshots and return the older (non-HEAD) one. |
| 1079 | +
|
| 1080 | + The HEAD snapshot of a branch is protected from expiration, so to test the |
| 1081 | + expire-snapshots command we need a snapshot that has been superseded. |
| 1082 | + """ |
| 1083 | + catalog.create_namespace(TEST_TABLE_NAMESPACE) |
| 1084 | + table = catalog.create_table( |
| 1085 | + identifier=TEST_TABLE_IDENTIFIER, |
| 1086 | + schema=TEST_TABLE_SCHEMA, |
| 1087 | + partition_spec=TEST_TABLE_PARTITION_SPEC, |
| 1088 | + ) |
| 1089 | + arrow_schema = pa.schema( |
| 1090 | + [ |
| 1091 | + pa.field("x", pa.int64(), nullable=False), |
| 1092 | + pa.field("y", pa.int64(), nullable=False), |
| 1093 | + pa.field("z", pa.int64(), nullable=False), |
| 1094 | + ] |
| 1095 | + ) |
| 1096 | + table.append(pa.Table.from_pylist([{"x": 1, "y": 2, "z": 3}], schema=arrow_schema)) |
| 1097 | + table.refresh() |
| 1098 | + older_snapshot_id = table.current_snapshot().snapshot_id |
| 1099 | + table.append(pa.Table.from_pylist([{"x": 4, "y": 5, "z": 6}], schema=arrow_schema)) |
| 1100 | + return older_snapshot_id |
| 1101 | + |
| 1102 | + |
| 1103 | +def test_expire_snapshots_requires_option(catalog: InMemoryCatalog) -> None: |
| 1104 | + catalog.create_namespace(TEST_TABLE_NAMESPACE) |
| 1105 | + catalog.create_table( |
| 1106 | + identifier=TEST_TABLE_IDENTIFIER, |
| 1107 | + schema=TEST_TABLE_SCHEMA, |
| 1108 | + partition_spec=TEST_TABLE_PARTITION_SPEC, |
| 1109 | + ) |
| 1110 | + |
| 1111 | + runner = CliRunner() |
| 1112 | + result = runner.invoke(run, ["maintenance", "expire-snapshots", "default.my_table"]) |
| 1113 | + |
| 1114 | + assert result.exit_code == 1 |
| 1115 | + assert "Must provide at least one of --snapshot-id or --older-than." in result.output |
| 1116 | + |
| 1117 | + |
| 1118 | +def test_expire_snapshots_table_does_not_exists(catalog: InMemoryCatalog) -> None: |
| 1119 | + # pylint: disable=unused-argument |
| 1120 | + |
| 1121 | + runner = CliRunner() |
| 1122 | + result = runner.invoke(run, ["maintenance", "expire-snapshots", "default.doesnotexist", "--snapshot-id", "1"]) |
| 1123 | + |
| 1124 | + assert result.exit_code == 1 |
| 1125 | + assert result.output == "Table does not exist: default.doesnotexist\n" |
| 1126 | + |
| 1127 | + |
| 1128 | +def test_expire_snapshots_by_id(catalog: InMemoryCatalog) -> None: |
| 1129 | + snapshot_id = _create_table_with_expirable_snapshot(catalog) |
| 1130 | + |
| 1131 | + runner = CliRunner() |
| 1132 | + result = runner.invoke(run, ["maintenance", "expire-snapshots", "default.my_table", "--snapshot-id", str(snapshot_id)]) |
| 1133 | + |
| 1134 | + assert result.exit_code == 0 |
| 1135 | + assert result.output == "Expired snapshots on default.my_table\n" |
| 1136 | + |
| 1137 | + refreshed = catalog.load_table(TEST_TABLE_IDENTIFIER) |
| 1138 | + assert refreshed.metadata.snapshot_by_id(snapshot_id) is None |
| 1139 | + |
| 1140 | + |
| 1141 | +def test_expire_snapshots_older_than(catalog: InMemoryCatalog) -> None: |
| 1142 | + snapshot_id = _create_table_with_expirable_snapshot(catalog) |
| 1143 | + cutoff = datetime.datetime.now() + datetime.timedelta(days=1) |
| 1144 | + |
| 1145 | + runner = CliRunner() |
| 1146 | + result = runner.invoke( |
| 1147 | + run, |
| 1148 | + [ |
| 1149 | + "maintenance", |
| 1150 | + "expire-snapshots", |
| 1151 | + "default.my_table", |
| 1152 | + "--older-than", |
| 1153 | + cutoff.strftime("%Y-%m-%dT%H:%M:%S"), |
| 1154 | + ], |
| 1155 | + ) |
| 1156 | + |
| 1157 | + assert result.exit_code == 0 |
| 1158 | + assert result.output == "Expired snapshots on default.my_table\n" |
| 1159 | + |
| 1160 | + refreshed = catalog.load_table(TEST_TABLE_IDENTIFIER) |
| 1161 | + assert refreshed.metadata.snapshot_by_id(snapshot_id) is None |
| 1162 | + |
| 1163 | + |
| 1164 | +def test_expire_snapshots_unknown_snapshot_id(catalog: InMemoryCatalog) -> None: |
| 1165 | + catalog.create_namespace(TEST_TABLE_NAMESPACE) |
| 1166 | + catalog.create_table( |
| 1167 | + identifier=TEST_TABLE_IDENTIFIER, |
| 1168 | + schema=TEST_TABLE_SCHEMA, |
| 1169 | + partition_spec=TEST_TABLE_PARTITION_SPEC, |
| 1170 | + ) |
| 1171 | + |
| 1172 | + runner = CliRunner() |
| 1173 | + result = runner.invoke(run, ["maintenance", "expire-snapshots", "default.my_table", "--snapshot-id", "999"]) |
| 1174 | + |
| 1175 | + assert result.exit_code == 1 |
| 1176 | + assert "Snapshot with ID 999 does not exist." in result.output |
| 1177 | + |
| 1178 | + |
| 1179 | +def test_json_expire_snapshots_by_id(catalog: InMemoryCatalog) -> None: |
| 1180 | + snapshot_id = _create_table_with_expirable_snapshot(catalog) |
| 1181 | + |
| 1182 | + runner = CliRunner() |
| 1183 | + result = runner.invoke( |
| 1184 | + run, |
| 1185 | + ["--output=json", "maintenance", "expire-snapshots", "default.my_table", "--snapshot-id", str(snapshot_id)], |
| 1186 | + ) |
| 1187 | + |
| 1188 | + assert result.exit_code == 0 |
| 1189 | + assert result.output == '"Expired snapshots on default.my_table"\n' |
0 commit comments