1414# KIND, either express or implied. See the License for the
1515# specific language governing permissions and limitations
1616# under the License.
17- """StandardKeyMetadata Avro serialization .
17+ """StandardKeyMetadata Avro codec .
1818
19- Wire format: ``0x01 version byte || Avro-encoded fields``
20-
21- Avro schema:
22- - encryption_key: bytes (required)
23- - aad_prefix: union[null, bytes] (optional)
24- - file_length: union[null, long] (optional)
19+ Wire: ``0x01 version`` || encryption_key (bytes) || aad_prefix (union[null,bytes])
20+ || file_length (union[null,long]).
2521"""
2622
2723from __future__ import annotations
3228
3329
3430def _read_avro_long (data : bytes , offset : int ) -> tuple [int , int ]:
35- """Read a zigzag-encoded Avro long from data at offset. Returns (value, new_offset)."""
3631 result = 0
3732 shift = 0
3833 while True :
@@ -44,12 +39,10 @@ def _read_avro_long(data: bytes, offset: int) -> tuple[int, int]:
4439 if (b & 0x80 ) == 0 :
4540 break
4641 shift += 7
47- # Zigzag decode
4842 return (result >> 1 ) ^ - (result & 1 ), offset
4943
5044
5145def _read_avro_bytes (data : bytes , offset : int ) -> tuple [bytes , int ]:
52- """Read Avro bytes (length-prefixed). Returns (bytes_value, new_offset)."""
5346 length , offset = _read_avro_long (data , offset )
5447 if length < 0 :
5548 raise ValueError (f"Negative Avro bytes length: { length } " )
@@ -61,31 +54,20 @@ def _read_avro_bytes(data: bytes, offset: int) -> tuple[bytes, int]:
6154
6255@dataclass (frozen = True )
6356class StandardKeyMetadata :
64- """Standard key metadata for Iceberg table encryption.
65-
66- Contains the plaintext encryption key (DEK), AAD prefix, and optional file length.
67- """
68-
6957 encryption_key : bytes
7058 aad_prefix : bytes = b""
7159 file_length : int | None = None
7260
7361 @staticmethod
7462 def deserialize (data : bytes ) -> StandardKeyMetadata :
75- """Deserialize from wire format: ``0x01 version || Avro-encoded fields``."""
7663 if not data :
7764 raise ValueError ("Empty key metadata buffer" )
78-
79- version = data [0 ]
80- if version != V1 :
81- raise ValueError (f"Unsupported key metadata version: { version } " )
82-
65+ if data [0 ] != V1 :
66+ raise ValueError (f"Unsupported key metadata version: { data [0 ]} " )
8367 offset = 1
8468
85- # Read encryption_key (required bytes)
8669 encryption_key , offset = _read_avro_bytes (data , offset )
8770
88- # Read aad_prefix (optional: union[null, bytes])
8971 union_index , offset = _read_avro_long (data , offset )
9072 if union_index == 0 :
9173 aad_prefix = b""
@@ -94,50 +76,30 @@ def deserialize(data: bytes) -> StandardKeyMetadata:
9476 else :
9577 raise ValueError (f"Invalid union index for aad_prefix: { union_index } " )
9678
97- # Read file_length (optional: union[null, long])
98- file_length = None
79+ file_length : int | None = None
9980 if offset < len (data ):
10081 union_index , offset = _read_avro_long (data , offset )
101- if union_index == 0 :
102- file_length = None
103- elif union_index == 1 :
82+ if union_index == 1 :
10483 file_length , offset = _read_avro_long (data , offset )
105- else :
84+ elif union_index != 0 :
10685 raise ValueError (f"Invalid union index for file_length: { union_index } " )
10786
108- return StandardKeyMetadata (
109- encryption_key = encryption_key ,
110- aad_prefix = aad_prefix ,
111- file_length = file_length ,
112- )
87+ return StandardKeyMetadata (encryption_key = encryption_key , aad_prefix = aad_prefix , file_length = file_length )
11388
11489 def serialize (self ) -> bytes :
115- """Serialize to wire format: ``0x01 version || Avro-encoded fields``."""
116- parts = [bytes ([V1 ])]
117-
118- # encryption_key (required bytes)
119- parts .append (_encode_avro_bytes (self .encryption_key ))
120-
121- # aad_prefix (union[null, bytes])
90+ parts = [bytes ([V1 ]), _encode_avro_bytes (self .encryption_key )]
12291 if self .aad_prefix :
123- parts .append (_encode_avro_long (1 )) # union index 1 = bytes
124- parts .append (_encode_avro_bytes (self .aad_prefix ))
92+ parts += [_encode_avro_long (1 ), _encode_avro_bytes (self .aad_prefix )]
12593 else :
126- parts .append (_encode_avro_long (0 )) # union index 0 = null
127-
128- # file_length (union[null, long])
94+ parts .append (_encode_avro_long (0 ))
12995 if self .file_length is not None :
130- parts .append (_encode_avro_long (1 )) # union index 1 = long
131- parts .append (_encode_avro_long (self .file_length ))
96+ parts += [_encode_avro_long (1 ), _encode_avro_long (self .file_length )]
13297 else :
133- parts .append (_encode_avro_long (0 )) # union index 0 = null
134-
98+ parts .append (_encode_avro_long (0 ))
13599 return b"" .join (parts )
136100
137101
138102def _encode_avro_long (value : int ) -> bytes :
139- """Encode a long as zigzag-encoded Avro varint."""
140- # Zigzag encode
141103 n = (value << 1 ) ^ (value >> 63 )
142104 result = bytearray ()
143105 while n & ~ 0x7F :
@@ -148,5 +110,4 @@ def _encode_avro_long(value: int) -> bytes:
148110
149111
150112def _encode_avro_bytes (data : bytes ) -> bytes :
151- """Encode bytes with Avro length prefix."""
152113 return _encode_avro_long (len (data )) + data
0 commit comments