@@ -47,6 +47,8 @@ class DataFileMeta:
4747 file_source : Optional [str ] = None
4848 value_stats_cols : Optional [List [str ]] = None
4949 external_path : Optional [str ] = None
50+ first_row_id : Optional [int ] = None
51+ write_cols : Optional [List [str ]] = None
5052
5153 # not a schema field, just for internal usage
5254 file_path : str = None
@@ -59,6 +61,58 @@ def set_file_path(self, table_path: Path, partition: GenericRow, bucket: int):
5961 path_builder = path_builder / ("bucket-" + str (bucket )) / self .file_name
6062 self .file_path = str (path_builder )
6163
64+ def assign_first_row_id (self , first_row_id : int ) -> 'DataFileMeta' :
65+ """Create a new DataFileMeta with the assigned first_row_id."""
66+ return DataFileMeta (
67+ file_name = self .file_name ,
68+ file_size = self .file_size ,
69+ row_count = self .row_count ,
70+ min_key = self .min_key ,
71+ max_key = self .max_key ,
72+ key_stats = self .key_stats ,
73+ value_stats = self .value_stats ,
74+ min_sequence_number = self .min_sequence_number ,
75+ max_sequence_number = self .max_sequence_number ,
76+ schema_id = self .schema_id ,
77+ level = self .level ,
78+ extra_files = self .extra_files ,
79+ creation_time = self .creation_time ,
80+ delete_row_count = self .delete_row_count ,
81+ embedded_index = self .embedded_index ,
82+ file_source = self .file_source ,
83+ value_stats_cols = self .value_stats_cols ,
84+ external_path = self .external_path ,
85+ first_row_id = first_row_id ,
86+ write_cols = self .write_cols ,
87+ file_path = self .file_path
88+ )
89+
90+ def assign_sequence_number (self , min_sequence_number : int , max_sequence_number : int ) -> 'DataFileMeta' :
91+ """Create a new DataFileMeta with the assigned sequence numbers."""
92+ return DataFileMeta (
93+ file_name = self .file_name ,
94+ file_size = self .file_size ,
95+ row_count = self .row_count ,
96+ min_key = self .min_key ,
97+ max_key = self .max_key ,
98+ key_stats = self .key_stats ,
99+ value_stats = self .value_stats ,
100+ min_sequence_number = min_sequence_number ,
101+ max_sequence_number = max_sequence_number ,
102+ schema_id = self .schema_id ,
103+ level = self .level ,
104+ extra_files = self .extra_files ,
105+ creation_time = self .creation_time ,
106+ delete_row_count = self .delete_row_count ,
107+ embedded_index = self .embedded_index ,
108+ file_source = self .file_source ,
109+ value_stats_cols = self .value_stats_cols ,
110+ external_path = self .external_path ,
111+ first_row_id = self .first_row_id ,
112+ write_cols = self .write_cols ,
113+ file_path = self .file_path
114+ )
115+
62116
63117DATA_FILE_META_SCHEMA = {
64118 "type" : "record" ,
@@ -83,9 +137,14 @@ def set_file_path(self, table_path: Path, partition: GenericRow, bucket: int):
83137 "default" : None },
84138 {"name" : "_DELETE_ROW_COUNT" , "type" : ["null" , "long" ], "default" : None },
85139 {"name" : "_EMBEDDED_FILE_INDEX" , "type" : ["null" , "bytes" ], "default" : None },
86- {"name" : "_FILE_SOURCE" , "type" : ["null" , "int " ], "default" : None },
140+ {"name" : "_FILE_SOURCE" , "type" : ["null" , "string " ], "default" : None },
87141 {"name" : "_VALUE_STATS_COLS" ,
88142 "type" : ["null" , {"type" : "array" , "items" : "string" }],
89143 "default" : None },
144+ {"name" : "_EXTERNAL_PATH" , "type" : ["null" , "string" ], "default" : None },
145+ {"name" : "_FIRST_ROW_ID" , "type" : ["null" , "long" ], "default" : None },
146+ {"name" : "_WRITE_COLS" ,
147+ "type" : ["null" , {"type" : "array" , "items" : "string" }],
148+ "default" : None },
90149 ]
91150}
0 commit comments