1515# specific language governing permissions and limitations
1616# under the License.
1717import inspect
18+ from copy import copy
1819from datetime import date , datetime , time
1920from enum import Enum
2021from tempfile import TemporaryDirectory
@@ -111,7 +112,7 @@ def todict(obj: Any) -> Any:
111112 return obj .value
112113 elif hasattr (obj , "__iter__" ) and not isinstance (obj , str ) and not isinstance (obj , bytes ):
113114 return [todict (v ) for v in obj ]
114- elif hasattr (obj , "__dict__" ):
115+ elif isinstance (obj , Record ):
115116 return {key : todict (value ) for key , value in inspect .getmembers (obj ) if not callable (value ) and not key .startswith ("_" )}
116117 else :
117118 return obj
@@ -258,8 +259,6 @@ def test_write_manifest_entry_with_fastavro_read_with_iceberg(format_version: in
258259 sort_order_id = 4 ,
259260 spec_id = 3 ,
260261 )
261- if format_version == 1 :
262- data_file .block_size_in_bytes = DEFAULT_BLOCK_SIZE
263262
264263 entry = ManifestEntry (
265264 status = ManifestEntryStatus .ADDED ,
@@ -277,16 +276,44 @@ def test_write_manifest_entry_with_fastavro_read_with_iceberg(format_version: in
277276 with open (tmp_avro_file , "wb" ) as out :
278277 writer (out , schema , [todict (entry )])
279278
279+ # Read as V2
280280 with avro .AvroFile [ManifestEntry ](
281- PyArrowFileIO ().new_input (tmp_avro_file ),
282- MANIFEST_ENTRY_SCHEMAS [format_version ],
283- {- 1 : ManifestEntry , 2 : DataFile },
281+ input_file = PyArrowFileIO ().new_input (tmp_avro_file ),
282+ read_schema = MANIFEST_ENTRY_SCHEMAS [2 ],
283+ read_types = {- 1 : ManifestEntry , 2 : DataFile },
284284 ) as avro_reader :
285285 it = iter (avro_reader )
286286 avro_entry = next (it )
287287
288288 assert entry == avro_entry
289289
290+ # Read as the original version
291+ with avro .AvroFile [ManifestEntry ](
292+ input_file = PyArrowFileIO ().new_input (tmp_avro_file ),
293+ read_schema = MANIFEST_ENTRY_SCHEMAS [format_version ],
294+ read_types = {- 1 : ManifestEntry , 2 : DataFile },
295+ ) as avro_reader :
296+ it = iter (avro_reader )
297+ avro_entry = next (it )
298+
299+ if format_version == 1 :
300+ v1_datafile = copy (data_file )
301+ # Not part of V1
302+ v1_datafile .equality_ids = None
303+
304+ assert avro_entry == ManifestEntry (
305+ status = ManifestEntryStatus .ADDED ,
306+ snapshot_id = 8638475580105682862 ,
307+ # Not part of v1
308+ data_sequence_number = None ,
309+ file_sequence_number = None ,
310+ data_file = v1_datafile ,
311+ )
312+ elif format_version == 2 :
313+ assert entry == avro_entry
314+ else :
315+ raise ValueError (f"Unsupported version: { format_version } " )
316+
290317
291318@pytest .mark .parametrize ("is_required" , [True , False ])
292319def test_all_primitive_types (is_required : bool ) -> None :
0 commit comments