|
66 | 66 | from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec |
67 | 67 | from pyiceberg.schema import Schema, SchemaVisitor, visit |
68 | 68 | from pyiceberg.serializers import FromInputFile |
69 | | -from pyiceberg.table import CommitTableRequest, CommitTableResponse, Table |
| 69 | +from pyiceberg.table import CommitTableRequest, CommitTableResponse, Table, update_table_metadata |
70 | 70 | from pyiceberg.table.metadata import new_table_metadata |
71 | 71 | from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder |
72 | 72 | from pyiceberg.typedef import EMPTY_DICT |
@@ -150,6 +150,7 @@ def _construct_hive_storage_descriptor(schema: Schema, location: Optional[str]) |
150 | 150 | PROP_TABLE_TYPE = "table_type" |
151 | 151 | PROP_METADATA_LOCATION = "metadata_location" |
152 | 152 | PROP_PREVIOUS_METADATA_LOCATION = "previous_metadata_location" |
| 153 | +DEFAULT_PROPERTIES = {'write.parquet.compression-codec': 'zstd'} |
153 | 154 |
|
154 | 155 |
|
155 | 156 | def _construct_parameters(metadata_location: str, previous_metadata_location: Optional[str] = None) -> Dict[str, Any]: |
@@ -272,14 +273,19 @@ def create_table( |
272 | 273 | AlreadyExistsError: If a table with the name already exists. |
273 | 274 | ValueError: If the identifier is invalid. |
274 | 275 | """ |
| 276 | + properties = {**DEFAULT_PROPERTIES, **properties} |
275 | 277 | database_name, table_name = self.identifier_to_database_and_table(identifier) |
276 | 278 | current_time_millis = int(time.time() * 1000) |
277 | 279 |
|
278 | 280 | location = self._resolve_table_location(location, database_name, table_name) |
279 | 281 |
|
280 | 282 | metadata_location = self._get_metadata_location(location=location) |
281 | 283 | metadata = new_table_metadata( |
282 | | - location=location, schema=schema, partition_spec=partition_spec, sort_order=sort_order, properties=properties |
| 284 | + location=location, |
| 285 | + schema=schema, |
| 286 | + partition_spec=partition_spec, |
| 287 | + sort_order=sort_order, |
| 288 | + properties=properties, |
283 | 289 | ) |
284 | 290 | io = load_file_io({**self.properties, **properties}, location=location) |
285 | 291 | self._write_metadata(metadata, io, metadata_location) |
@@ -330,7 +336,37 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons |
330 | 336 | Raises: |
331 | 337 | NoSuchTableError: If a table with the given identifier does not exist. |
332 | 338 | """ |
333 | | - raise NotImplementedError |
| 339 | + identifier_tuple = self.identifier_to_tuple_without_catalog( |
| 340 | + tuple(table_request.identifier.namespace.root + [table_request.identifier.name]) |
| 341 | + ) |
| 342 | + current_table = self.load_table(identifier_tuple) |
| 343 | + database_name, table_name = self.identifier_to_database_and_table(identifier_tuple, NoSuchTableError) |
| 344 | + base_metadata = current_table.metadata |
| 345 | + for requirement in table_request.requirements: |
| 346 | + requirement.validate(base_metadata) |
| 347 | + |
| 348 | + updated_metadata = update_table_metadata(base_metadata, table_request.updates) |
| 349 | + if updated_metadata == base_metadata: |
| 350 | + # no changes, do nothing |
| 351 | + return CommitTableResponse(metadata=base_metadata, metadata_location=current_table.metadata_location) |
| 352 | + |
| 353 | + # write new metadata |
| 354 | + new_metadata_version = self._parse_metadata_version(current_table.metadata_location) + 1 |
| 355 | + new_metadata_location = self._get_metadata_location(current_table.metadata.location, new_metadata_version) |
| 356 | + self._write_metadata(updated_metadata, current_table.io, new_metadata_location) |
| 357 | + |
| 358 | + # commit to hive |
| 359 | + try: |
| 360 | + with self._client as open_client: |
| 361 | + tbl = open_client.get_table(dbname=database_name, tbl_name=table_name) |
| 362 | + tbl.parameters = _construct_parameters( |
| 363 | + metadata_location=new_metadata_location, previous_metadata_location=current_table.metadata_location |
| 364 | + ) |
| 365 | + open_client.alter_table(dbname=database_name, tbl_name=table_name, new_tbl=tbl) |
| 366 | + except NoSuchObjectException as e: |
| 367 | + raise NoSuchTableError(f"Table does not exist: {table_name}") from e |
| 368 | + |
| 369 | + return CommitTableResponse(metadata=updated_metadata, metadata_location=new_metadata_location) |
334 | 370 |
|
335 | 371 | def load_table(self, identifier: Union[str, Identifier]) -> Table: |
336 | 372 | """Load the table's metadata and return the table instance. |
|
0 commit comments