-
Notifications
You must be signed in to change notification settings - Fork 524
[Bug Fix] cast None current-snapshot-id as -1 for Backwards Compatibility
#473
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
710f913
d401045
cd38c5a
3a4ada5
7b810c7
fecbd4d
cc9ead5
a929d20
a102467
f4a302b
130b3cc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,12 +18,14 @@ | |
|
|
||
| import codecs | ||
| import gzip | ||
| import json | ||
| from abc import ABC, abstractmethod | ||
| from typing import Callable | ||
|
|
||
| from pyiceberg.io import InputFile, InputStream, OutputFile | ||
| from pyiceberg.table.metadata import TableMetadata, TableMetadataUtil | ||
| from pyiceberg.table.metadata import CURRENT_SNAPSHOT_ID, TableMetadata, TableMetadataUtil | ||
| from pyiceberg.typedef import UTF8 | ||
| from pyiceberg.utils.config import Config | ||
|
|
||
| GZIP = "gzip" | ||
|
|
||
|
|
@@ -127,6 +129,11 @@ def table_metadata(metadata: TableMetadata, output_file: OutputFile, overwrite: | |
| overwrite (bool): Where to overwrite the file if it already exists. Defaults to `False`. | ||
| """ | ||
| with output_file.create(overwrite=overwrite) as output_stream: | ||
| json_bytes = metadata.model_dump_json().encode(UTF8) | ||
| model_dump = metadata.model_dump_json() | ||
| if Config().get_bool("legacy-current-snapshot-id") and metadata.current_snapshot_id is None: | ||
| model_dict = json.loads(model_dump) | ||
| model_dict[CURRENT_SNAPSHOT_ID] = -1 | ||
| model_dump = json.dumps(model_dict) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this is the best place to fix this. Mostly because we have to deserialize and serialize the metadata, and the rest of the deserialization logic is part of the Pydantic model. I think option 1 is much cleaner. We can set the ignore-None to @staticmethod
def table_metadata(metadata: TableMetadata, output_file: OutputFile, overwrite: bool = False) -> None:
"""Write a TableMetadata instance to an output file.
Args:
output_file (OutputFile): A custom implementation of the iceberg.io.file.OutputFile abstract base class.
overwrite (bool): Where to overwrite the file if it already exists. Defaults to `False`.
"""
with output_file.create(overwrite=overwrite) as output_stream:
json_bytes = metadata.model_dump_json(exclude_none=False).encode(UTF8)
json_bytes = Compressor.get_compressor(output_file.location).bytes_compressor()(json_bytes)
output_stream.write(json_bytes)
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh that's a good suggestion. I'll add the field_serializer and set exclude_none to False so we can print out |
||
| json_bytes = model_dump.encode(UTF8) | ||
| json_bytes = Compressor.get_compressor(output_file.location).bytes_compressor()(json_bytes) | ||
| output_stream.write(json_bytes) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -120,7 +120,7 @@ def check_sort_orders(table_metadata: TableMetadata) -> TableMetadata: | |
|
|
||
| def construct_refs(table_metadata: TableMetadata) -> TableMetadata: | ||
| """Set the main branch if missing.""" | ||
| if table_metadata.current_snapshot_id is not None: | ||
| if table_metadata.current_snapshot_id is not None and table_metadata.current_snapshot_id != -1: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this -1 check still necessary?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are right! I'll submit a fix thank you!
sungwy marked this conversation as resolved.
Outdated
|
||
| if MAIN_BRANCH not in table_metadata.refs: | ||
| table_metadata.refs[MAIN_BRANCH] = SnapshotRef( | ||
| snapshot_id=table_metadata.current_snapshot_id, snapshot_ref_type=SnapshotRefType.BRANCH | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,50 @@ | ||
| # Licensed to the Apache Software Foundation (ASF) under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the | ||
| # "License"); you may not use this file except in compliance | ||
| # with the License. You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, | ||
| # software distributed under the License is distributed on an | ||
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| # KIND, either express or implied. See the License for the | ||
| # specific language governing permissions and limitations | ||
| # under the License. | ||
|
|
||
| import json | ||
| import os | ||
| import uuid | ||
| from typing import Any, Dict | ||
|
|
||
| import pytest | ||
| from pytest_mock import MockFixture | ||
|
|
||
| from pyiceberg.serializers import ToOutputFile | ||
| from pyiceberg.table import StaticTable | ||
| from pyiceberg.table.metadata import TableMetadataV1 | ||
|
|
||
|
|
||
| def test_legacy_current_snapshot_id( | ||
| mocker: MockFixture, tmp_path_factory: pytest.TempPathFactory, example_table_metadata_no_snapshot_v1: Dict[str, Any] | ||
| ) -> None: | ||
| from pyiceberg.io.pyarrow import PyArrowFileIO | ||
|
|
||
| metadata_location = str(tmp_path_factory.mktemp("metadata") / f"{uuid.uuid4()}.metadata.json") | ||
| metadata = TableMetadataV1(**example_table_metadata_no_snapshot_v1) | ||
| ToOutputFile.table_metadata(metadata, PyArrowFileIO().new_output(location=metadata_location), overwrite=True) | ||
| static_table = StaticTable.from_metadata(metadata_location) | ||
| assert static_table.metadata.current_snapshot_id is None | ||
|
|
||
| mocker.patch.dict(os.environ, values={"PYICEBERG_LEGACY_CURRENT_SNAPSHOT_ID": "True"}) | ||
|
|
||
| ToOutputFile.table_metadata(metadata, PyArrowFileIO().new_output(location=metadata_location), overwrite=True) | ||
| with PyArrowFileIO().new_input(location=metadata_location).open() as input_stream: | ||
| metadata_json_bytes = input_stream.read() | ||
| assert json.loads(metadata_json_bytes)['current-snapshot-id'] == -1 | ||
| backwards_compatible_static_table = StaticTable.from_metadata(metadata_location) | ||
| assert backwards_compatible_static_table.metadata.current_snapshot_id is None | ||
| assert backwards_compatible_static_table.metadata == static_table.metadata |
Uh oh!
There was an error while loading. Please reload this page.