Skip to content

Commit 14f2e14

Browse files
authored
fix(spec): clean up -1 snapshot ID sentinel usage and add deserialization test (#2294)
## Which issue does this PR close? - Closes #352. ## What changes are included in this PR? - Replaces hardcoded `-1` with `EMPTY_SNAPSHOT_ID` constant in table metadata deserialization. - Adds `test_empty_snapshot_id_is_normalized_to_none` to verify that the Java-style `-1` sentinel for `current-snapshot-id` is normalized to `None` during deserialization. - Removes the public `UNASSIGNED_SNAPSHOT_ID` constant and moving it to a private constant scoped to the manifest writer module. ## Are these changes tested? Adds a test `test_empty_snapshot_id_is_normalized_to_none` verifying the deserialization normalization.
1 parent 6ee5e71 commit 14f2e14

4 files changed

Lines changed: 26 additions & 9 deletions

File tree

crates/iceberg/src/spec/manifest/writer.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,14 @@ use crate::spec::manifest::_serde::{ManifestEntryV1, ManifestEntryV2};
3232
use crate::spec::manifest::{manifest_schema_v1, manifest_schema_v2};
3333
use crate::spec::{
3434
DataContentType, DataFile, FieldSummary, ManifestEntry, ManifestFile, ManifestMetadata,
35-
ManifestStatus, PrimitiveLiteral, SchemaRef, StructType, UNASSIGNED_SNAPSHOT_ID,
35+
ManifestStatus, PrimitiveLiteral, SchemaRef, StructType,
3636
};
3737
use crate::{Error, ErrorKind};
3838

39+
/// Placeholder for snapshot ID. The field with this value must be replaced
40+
/// with the actual snapshot ID before it is committed.
41+
const UNASSIGNED_SNAPSHOT_ID: i64 = -1;
42+
3943
/// The builder used to create a [`ManifestWriter`].
4044
pub struct ManifestWriterBuilder {
4145
output: OutputFile,

crates/iceberg/src/spec/snapshot.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,6 @@ use crate::{Error, ErrorKind};
3333

3434
/// The ref name of the main branch of the table.
3535
pub const MAIN_BRANCH: &str = "main";
36-
/// Placeholder for snapshot ID. The field with this value must be replaced with the actual snapshot ID before it is committed.
37-
pub const UNASSIGNED_SNAPSHOT_ID: i64 = -1;
3836

3937
/// Reference to [`Snapshot`].
4038
pub type SnapshotRef = Arc<Snapshot>;

crates/iceberg/src/spec/table_metadata.rs

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ use crate::{Error, ErrorKind};
4747
static MAIN_BRANCH: &str = "main";
4848
pub(crate) static ONE_MINUTE_MS: i64 = 60_000;
4949

50+
/// Sentinel value used by the Java implementation and older metadata files
51+
/// to represent a missing/empty current snapshot ID. During deserialization,
52+
/// this value is normalized to `None`.
5053
pub(crate) static EMPTY_SNAPSHOT_ID: i64 = -1;
5154
pub(crate) static INITIAL_SEQUENCE_NUMBER: i64 = 0;
5255

@@ -765,8 +768,8 @@ pub(super) mod _serde {
765768
use uuid::Uuid;
766769

767770
use super::{
768-
DEFAULT_PARTITION_SPEC_ID, FormatVersion, MAIN_BRANCH, MetadataLog, SnapshotLog,
769-
TableMetadata,
771+
DEFAULT_PARTITION_SPEC_ID, EMPTY_SNAPSHOT_ID, FormatVersion, MAIN_BRANCH, MetadataLog,
772+
SnapshotLog, TableMetadata,
770773
};
771774
use crate::spec::schema::_serde::{SchemaV1, SchemaV2};
772775
use crate::spec::snapshot::_serde::{SnapshotV1, SnapshotV2, SnapshotV3};
@@ -950,7 +953,7 @@ pub(super) mod _serde {
950953
encryption_keys,
951954
snapshots,
952955
} = value;
953-
let current_snapshot_id = if let &Some(-1) = &value.current_snapshot_id {
956+
let current_snapshot_id = if value.current_snapshot_id == Some(EMPTY_SNAPSHOT_ID) {
954957
None
955958
} else {
956959
value.current_snapshot_id
@@ -1063,7 +1066,7 @@ pub(super) mod _serde {
10631066
fn try_from(value: TableMetadataV2) -> Result<Self, self::Error> {
10641067
let snapshots = value.snapshots;
10651068
let value = value.shared;
1066-
let current_snapshot_id = if let &Some(-1) = &value.current_snapshot_id {
1069+
let current_snapshot_id = if value.current_snapshot_id == Some(EMPTY_SNAPSHOT_ID) {
10671070
None
10681071
} else {
10691072
value.current_snapshot_id
@@ -1170,7 +1173,7 @@ pub(super) mod _serde {
11701173
impl TryFrom<TableMetadataV1> for TableMetadata {
11711174
type Error = Error;
11721175
fn try_from(value: TableMetadataV1) -> Result<Self, Error> {
1173-
let current_snapshot_id = if let &Some(-1) = &value.current_snapshot_id {
1176+
let current_snapshot_id = if value.current_snapshot_id == Some(EMPTY_SNAPSHOT_ID) {
11741177
None
11751178
} else {
11761179
value.current_snapshot_id
@@ -3300,6 +3303,18 @@ mod tests {
33003303
check_table_metadata_serde(&metadata, expected);
33013304
}
33023305

3306+
#[test]
3307+
fn test_empty_snapshot_id_is_normalized_to_none() {
3308+
let metadata =
3309+
fs::read_to_string("testdata/table_metadata/TableMetadataV1Valid.json").unwrap();
3310+
let deserialized: TableMetadata = serde_json::from_str(&metadata).unwrap();
3311+
assert_eq!(
3312+
deserialized.current_snapshot_id(),
3313+
None,
3314+
"current_snapshot_id of -1 should be deserialized as None"
3315+
);
3316+
}
3317+
33033318
#[test]
33043319
fn test_table_metadata_v1_compat() {
33053320
let metadata =

crates/iceberg/src/spec/table_metadata_builder.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -570,7 +570,7 @@ impl TableMetadataBuilder {
570570

571571
/// Remove a reference
572572
///
573-
/// If `ref_name='main'` the current snapshot id is set to -1.
573+
/// If `ref_name='main'` the current snapshot id is set to `None`.
574574
pub fn remove_ref(mut self, ref_name: &str) -> Self {
575575
if ref_name == MAIN_BRANCH {
576576
self.metadata.current_snapshot_id = None;

0 commit comments

Comments
 (0)