@@ -52,13 +52,15 @@ fn make_top_level_fields_nullable(batch: &RecordBatch) -> RecordBatch {
5252async fn single_commit_two_add_files ( ) -> Result < ( ) , Box < dyn std:: error:: Error > > {
5353 let batch = generate_simple_batch ( ) ?;
5454 let storage = Arc :: new ( InMemory :: new ( ) ) ;
55+ let parquet_bytes = record_batch_to_bytes ( & batch) ;
56+ let file_size = parquet_bytes. len ( ) as u64 ;
5557 add_commit (
5658 storage. as_ref ( ) ,
5759 0 ,
5860 actions_to_string ( vec ! [
5961 TestAction :: Metadata ,
60- TestAction :: Add ( PARQUET_FILE1 . to_string( ) ) ,
61- TestAction :: Add ( PARQUET_FILE2 . to_string( ) ) ,
62+ TestAction :: AddWithSize ( PARQUET_FILE1 . to_string( ) , file_size ) ,
63+ TestAction :: AddWithSize ( PARQUET_FILE2 . to_string( ) , file_size ) ,
6264 ] ) ,
6365 )
6466 . await ?;
@@ -99,19 +101,24 @@ async fn single_commit_two_add_files() -> Result<(), Box<dyn std::error::Error>>
99101async fn two_commits ( ) -> Result < ( ) , Box < dyn std:: error:: Error > > {
100102 let batch = generate_simple_batch ( ) ?;
101103 let storage = Arc :: new ( InMemory :: new ( ) ) ;
104+ let parquet_bytes = record_batch_to_bytes ( & batch) ;
105+ let file_size = parquet_bytes. len ( ) as u64 ;
102106 add_commit (
103107 storage. as_ref ( ) ,
104108 0 ,
105109 actions_to_string ( vec ! [
106110 TestAction :: Metadata ,
107- TestAction :: Add ( PARQUET_FILE1 . to_string( ) ) ,
111+ TestAction :: AddWithSize ( PARQUET_FILE1 . to_string( ) , file_size ) ,
108112 ] ) ,
109113 )
110114 . await ?;
111115 add_commit (
112116 storage. as_ref ( ) ,
113117 1 ,
114- actions_to_string ( vec ! [ TestAction :: Add ( PARQUET_FILE2 . to_string( ) ) ] ) ,
118+ actions_to_string ( vec ! [ TestAction :: AddWithSize (
119+ PARQUET_FILE2 . to_string( ) ,
120+ file_size,
121+ ) ] ) ,
115122 )
116123 . await ?;
117124 storage
@@ -152,25 +159,33 @@ async fn two_commits() -> Result<(), Box<dyn std::error::Error>> {
152159async fn remove_action ( ) -> Result < ( ) , Box < dyn std:: error:: Error > > {
153160 let batch = generate_simple_batch ( ) ?;
154161 let storage = Arc :: new ( InMemory :: new ( ) ) ;
162+ let parquet_bytes = record_batch_to_bytes ( & batch) ;
163+ let file_size = parquet_bytes. len ( ) as u64 ;
155164 add_commit (
156165 storage. as_ref ( ) ,
157166 0 ,
158167 actions_to_string ( vec ! [
159168 TestAction :: Metadata ,
160- TestAction :: Add ( PARQUET_FILE1 . to_string( ) ) ,
169+ TestAction :: AddWithSize ( PARQUET_FILE1 . to_string( ) , file_size ) ,
161170 ] ) ,
162171 )
163172 . await ?;
164173 add_commit (
165174 storage. as_ref ( ) ,
166175 1 ,
167- actions_to_string ( vec ! [ TestAction :: Add ( PARQUET_FILE2 . to_string( ) ) ] ) ,
176+ actions_to_string ( vec ! [ TestAction :: AddWithSize (
177+ PARQUET_FILE2 . to_string( ) ,
178+ file_size,
179+ ) ] ) ,
168180 )
169181 . await ?;
170182 add_commit (
171183 storage. as_ref ( ) ,
172184 2 ,
173- actions_to_string ( vec ! [ TestAction :: Remove ( PARQUET_FILE2 . to_string( ) ) ] ) ,
185+ actions_to_string ( vec ! [ TestAction :: RemoveWithSize (
186+ PARQUET_FILE2 . to_string( ) ,
187+ file_size,
188+ ) ] ) ,
174189 )
175190 . await ?;
176191 storage
@@ -209,6 +224,8 @@ async fn stats() -> Result<(), Box<dyn std::error::Error>> {
209224 TestAction :: Add ( path) => format ! ( r#"{{"{action}":{{"path":"{path}","partitionValues":{{}},"size":262,"modificationTime":1587968586000,"dataChange":true, "stats":"{{\"numRecords\":2,\"nullCount\":{{\"id\":0}},\"minValues\":{{\"id\": 5}},\"maxValues\":{{\"id\":7}}}}"}}}}"# , action = "add" , path = path) ,
210225 TestAction :: Remove ( path) => format ! ( r#"{{"{action}":{{"path":"{path}","partitionValues":{{}},"size":262,"modificationTime":1587968586000,"dataChange":true}}}}"# , action = "remove" , path = path) ,
211226 TestAction :: Metadata => METADATA . into ( ) ,
227+ TestAction :: AddWithSize ( path, size) => format ! ( r#"{{"{action}":{{"path":"{path}","partitionValues":{{}},"size":{size},"modificationTime":1587968586000,"dataChange":true, "stats":"{{\"numRecords\":2,\"nullCount\":{{\"id\":0}},\"minValues\":{{\"id\": 5}},\"maxValues\":{{\"id\":7}}}}"}}}}"# , action = "add" , path = path) ,
228+ TestAction :: RemoveWithSize ( path, size) => format ! ( r#"{{"{action}":{{"path":"{path}","partitionValues":{{}},"size":{size},"modificationTime":1587968586000,"dataChange":true}}}}"# , action = "remove" , path = path) ,
212229 } )
213230 . fold ( String :: new ( ) , |a, b| a + & b + "\n " )
214231 }
@@ -218,22 +235,27 @@ async fn stats() -> Result<(), Box<dyn std::error::Error>> {
218235 ( "id" , vec![ 5 , 7 ] . into_array( ) ) ,
219236 ( "val" , vec![ "e" , "g" ] . into_array( ) ) ,
220237 ] ) ?) ;
238+ let file_size1 = record_batch_to_bytes ( & batch1) . len ( ) as u64 ;
239+ let file_size2 = record_batch_to_bytes ( & batch2) . len ( ) as u64 ;
221240 let storage = Arc :: new ( InMemory :: new ( ) ) ;
222241 // valid commit with min/max (0, 2)
223242 add_commit (
224243 storage. as_ref ( ) ,
225244 0 ,
226245 actions_to_string ( vec ! [
227246 TestAction :: Metadata ,
228- TestAction :: Add ( PARQUET_FILE1 . to_string( ) ) ,
247+ TestAction :: AddWithSize ( PARQUET_FILE1 . to_string( ) , file_size1 ) ,
229248 ] ) ,
230249 )
231250 . await ?;
232251 // storage.add_commit(1, &format!("{}\n", r#"{{"add":{{"path":"doesnotexist","partitionValues":{{}},"size":262,"modificationTime":1587968586000,"dataChange":true, "stats":"{{\"numRecords\":2,\"nullCount\":{{\"id\":0}},\"minValues\":{{\"id\": 0}},\"maxValues\":{{\"id\":2}}}}"}}}}"#));
233252 add_commit (
234253 storage. as_ref ( ) ,
235254 1 ,
236- generate_commit2 ( vec ! [ TestAction :: Add ( PARQUET_FILE2 . to_string( ) ) ] ) ,
255+ generate_commit2 ( vec ! [ TestAction :: AddWithSize (
256+ PARQUET_FILE2 . to_string( ) ,
257+ file_size2,
258+ ) ] ) ,
237259 )
238260 . await ?;
239261
@@ -1335,15 +1357,18 @@ async fn test_row_index_metadata_column() -> Result<(), Box<dyn std::error::Erro
13351357 ( "value" , vec![ "p" , "q" , "r" , "s" ] . into_array( ) ) ,
13361358 ] ) ?;
13371359
1360+ let file_size1 = record_batch_to_bytes ( & batch1) . len ( ) as u64 ;
1361+ let file_size2 = record_batch_to_bytes ( & batch2) . len ( ) as u64 ;
1362+ let file_size3 = record_batch_to_bytes ( & batch3) . len ( ) as u64 ;
13381363 let storage = Arc :: new ( InMemory :: new ( ) ) ;
13391364 add_commit (
13401365 storage. as_ref ( ) ,
13411366 0 ,
13421367 actions_to_string ( vec ! [
13431368 TestAction :: Metadata ,
1344- TestAction :: Add ( PARQUET_FILE1 . to_string( ) ) ,
1345- TestAction :: Add ( PARQUET_FILE2 . to_string( ) ) ,
1346- TestAction :: Add ( PARQUET_FILE3 . to_string( ) ) ,
1369+ TestAction :: AddWithSize ( PARQUET_FILE1 . to_string( ) , file_size1 ) ,
1370+ TestAction :: AddWithSize ( PARQUET_FILE2 . to_string( ) , file_size2 ) ,
1371+ TestAction :: AddWithSize ( PARQUET_FILE3 . to_string( ) , file_size3 ) ,
13471372 ] ) ,
13481373 )
13491374 . await ?;
@@ -1430,14 +1455,16 @@ async fn test_file_path_metadata_column() -> Result<(), Box<dyn std::error::Erro
14301455 ( "value" , vec![ "x" , "y" ] . into_array( ) ) ,
14311456 ] ) ?;
14321457
1458+ let file_size1 = record_batch_to_bytes ( & batch1) . len ( ) as u64 ;
1459+ let file_size2 = record_batch_to_bytes ( & batch2) . len ( ) as u64 ;
14331460 let storage = Arc :: new ( InMemory :: new ( ) ) ;
14341461 add_commit (
14351462 storage. as_ref ( ) ,
14361463 0 ,
14371464 actions_to_string ( vec ! [
14381465 TestAction :: Metadata ,
1439- TestAction :: Add ( PARQUET_FILE1 . to_string( ) ) ,
1440- TestAction :: Add ( PARQUET_FILE2 . to_string( ) ) ,
1466+ TestAction :: AddWithSize ( PARQUET_FILE1 . to_string( ) , file_size1 ) ,
1467+ TestAction :: AddWithSize ( PARQUET_FILE2 . to_string( ) , file_size2 ) ,
14411468 ] ) ,
14421469 )
14431470 . await ?;
0 commit comments