2121use crate :: cast:: AsArray ;
2222use crate :: { Array , ArrayRef , StructArray , new_empty_array} ;
2323use arrow_schema:: { ArrowError , DataType , Field , FieldRef , Schema , SchemaBuilder , SchemaRef } ;
24+ use std:: collections:: HashMap ;
2425use std:: ops:: Index ;
2526use std:: sync:: Arc ;
2627
@@ -229,6 +230,12 @@ pub struct RecordBatch {
229230 ///
230231 /// This is stored separately from the columns to handle the case of no columns
231232 row_count : usize ,
233+
234+ /// Per-batch custom metadata
235+ ///
236+ /// This corresponds to the `custom_metadata` field on the IPC `Message`
237+ /// flatbuffer, allowing per-batch metadata separate from schema-level metadata.
238+ custom_metadata : HashMap < String , String > ,
232239}
233240
234241impl RecordBatch {
@@ -289,6 +296,7 @@ impl RecordBatch {
289296 schema,
290297 columns,
291298 row_count,
299+ custom_metadata : HashMap :: new ( ) ,
292300 }
293301 }
294302
@@ -316,6 +324,7 @@ impl RecordBatch {
316324 schema,
317325 columns,
318326 row_count : 0 ,
327+ custom_metadata : HashMap :: new ( ) ,
319328 }
320329 }
321330
@@ -390,14 +399,30 @@ impl RecordBatch {
390399 schema,
391400 columns,
392401 row_count,
402+ custom_metadata : HashMap :: new ( ) ,
393403 } )
394404 }
395405
396406 /// Return the schema, columns and row count of this [`RecordBatch`]
407+ ///
408+ /// Note: this discards any [`Self::custom_metadata`]. Use
409+ /// [`Self::into_parts_with_custom_metadata`] to also retrieve it.
397410 pub fn into_parts ( self ) -> ( SchemaRef , Vec < ArrayRef > , usize ) {
398411 ( self . schema , self . columns , self . row_count )
399412 }
400413
414+ /// Return the schema, columns, row count and custom metadata of this [`RecordBatch`]
415+ pub fn into_parts_with_custom_metadata (
416+ self ,
417+ ) -> ( SchemaRef , Vec < ArrayRef > , usize , HashMap < String , String > ) {
418+ (
419+ self . schema ,
420+ self . columns ,
421+ self . row_count ,
422+ self . custom_metadata ,
423+ )
424+ }
425+
401426 /// Override the schema of this [`RecordBatch`]
402427 ///
403428 /// Returns an error if `schema` is not a superset of the current schema
@@ -416,6 +441,7 @@ impl RecordBatch {
416441 schema,
417442 columns : self . columns ,
418443 row_count : self . row_count ,
444+ custom_metadata : self . custom_metadata ,
419445 } )
420446 }
421447
@@ -451,6 +477,25 @@ impl RecordBatch {
451477 & mut schema. metadata
452478 }
453479
480+ /// Returns a reference to the per-batch custom metadata.
481+ ///
482+ /// This metadata corresponds to the `custom_metadata` field on the IPC
483+ /// `Message` flatbuffer, separate from schema-level metadata.
484+ pub fn custom_metadata ( & self ) -> & HashMap < String , String > {
485+ & self . custom_metadata
486+ }
487+
488+ /// Returns a mutable reference to the per-batch custom metadata.
489+ pub fn custom_metadata_mut ( & mut self ) -> & mut HashMap < String , String > {
490+ & mut self . custom_metadata
491+ }
492+
493+ /// Sets the per-batch custom metadata, returning `self`.
494+ pub fn with_custom_metadata ( mut self , metadata : HashMap < String , String > ) -> Self {
495+ self . custom_metadata = metadata;
496+ self
497+ }
498+
454499 /// Projects the schema onto the specified columns
455500 pub fn project ( & self , indices : & [ usize ] ) -> Result < RecordBatch , ArrowError > {
456501 let projected_schema = self . schema . project ( indices) ?;
@@ -475,7 +520,8 @@ impl RecordBatch {
475520 SchemaRef :: new ( projected_schema) ,
476521 batch_fields,
477522 self . row_count ,
478- ) )
523+ )
524+ . with_custom_metadata ( self . custom_metadata . clone ( ) ) )
479525 }
480526 }
481527
@@ -570,7 +616,9 @@ impl RecordBatch {
570616 }
571617 }
572618 }
619+ let custom_metadata = self . custom_metadata . clone ( ) ;
573620 RecordBatch :: try_new ( Arc :: new ( Schema :: new ( fields) ) , columns)
621+ . map ( |b| b. with_custom_metadata ( custom_metadata) )
574622 }
575623
576624 /// Returns the number of columns in the record batch.
@@ -691,6 +739,7 @@ impl RecordBatch {
691739 schema : self . schema . clone ( ) ,
692740 columns,
693741 row_count : length,
742+ custom_metadata : self . custom_metadata . clone ( ) ,
694743 }
695744 }
696745
@@ -864,6 +913,7 @@ impl From<StructArray> for RecordBatch {
864913 schema : Arc :: new ( Schema :: new ( fields) ) ,
865914 row_count,
866915 columns,
916+ custom_metadata : HashMap :: new ( ) ,
867917 }
868918 }
869919}
@@ -1792,4 +1842,81 @@ mod tests {
17921842 assert ! ( col. is_null( 1 ) ) ;
17931843 assert ! ( col. is_valid( 2 ) ) ;
17941844 }
1845+
1846+ #[ test]
1847+ fn test_with_custom_metadata ( ) {
1848+ let batch = record_batch ! ( ( "a" , Int32 , [ 1 , 2 , 3 ] ) ) . unwrap ( ) ;
1849+ assert ! ( batch. custom_metadata( ) . is_empty( ) ) ;
1850+
1851+ let mut metadata = HashMap :: new ( ) ;
1852+ metadata. insert ( "key" . to_string ( ) , "value" . to_string ( ) ) ;
1853+ let batch = batch. with_custom_metadata ( metadata. clone ( ) ) ;
1854+ assert_eq ! ( batch. custom_metadata( ) , & metadata) ;
1855+ }
1856+
1857+ #[ test]
1858+ fn test_custom_metadata_mut ( ) {
1859+ let mut batch = record_batch ! ( ( "a" , Int32 , [ 1 , 2 , 3 ] ) ) . unwrap ( ) ;
1860+ batch
1861+ . custom_metadata_mut ( )
1862+ . insert ( "key" . to_string ( ) , "value" . to_string ( ) ) ;
1863+ assert_eq ! (
1864+ batch. custom_metadata( ) . get( "key" ) ,
1865+ Some ( & "value" . to_string( ) )
1866+ ) ;
1867+ }
1868+
1869+ #[ test]
1870+ fn test_slice_preserves_custom_metadata ( ) {
1871+ let batch = record_batch ! ( ( "a" , Int32 , [ 1 , 2 , 3 ] ) ) . unwrap ( ) ;
1872+ let mut metadata = HashMap :: new ( ) ;
1873+ metadata. insert ( "k" . to_string ( ) , "v" . to_string ( ) ) ;
1874+ let batch = batch. with_custom_metadata ( metadata. clone ( ) ) ;
1875+
1876+ let sliced = batch. slice ( 0 , 2 ) ;
1877+ assert_eq ! ( sliced. custom_metadata( ) , & metadata) ;
1878+ }
1879+
1880+ #[ test]
1881+ fn test_project_preserves_custom_metadata ( ) {
1882+ let a: ArrayRef = Arc :: new ( Int32Array :: from ( vec ! [ 1 , 2 , 3 ] ) ) ;
1883+ let b: ArrayRef = Arc :: new ( StringArray :: from ( vec ! [ "a" , "b" , "c" ] ) ) ;
1884+ let batch = RecordBatch :: try_from_iter ( vec ! [ ( "a" , a) , ( "b" , b) ] ) . unwrap ( ) ;
1885+
1886+ let mut metadata = HashMap :: new ( ) ;
1887+ metadata. insert ( "k" . to_string ( ) , "v" . to_string ( ) ) ;
1888+ let batch = batch. with_custom_metadata ( metadata. clone ( ) ) ;
1889+
1890+ let projected = batch. project ( & [ 0 ] ) . unwrap ( ) ;
1891+ assert_eq ! ( projected. custom_metadata( ) , & metadata) ;
1892+ }
1893+
1894+ #[ test]
1895+ fn test_into_parts_with_custom_metadata ( ) {
1896+ let batch = record_batch ! ( ( "a" , Int32 , [ 1 , 2 , 3 ] ) ) . unwrap ( ) ;
1897+ let mut metadata = HashMap :: new ( ) ;
1898+ metadata. insert ( "k" . to_string ( ) , "v" . to_string ( ) ) ;
1899+ let batch = batch. with_custom_metadata ( metadata. clone ( ) ) ;
1900+
1901+ let ( schema, columns, row_count, custom_metadata) = batch. into_parts_with_custom_metadata ( ) ;
1902+ assert_eq ! ( schema. fields( ) . len( ) , 1 ) ;
1903+ assert_eq ! ( columns. len( ) , 1 ) ;
1904+ assert_eq ! ( row_count, 3 ) ;
1905+ assert_eq ! ( custom_metadata, metadata) ;
1906+ }
1907+
1908+ #[ test]
1909+ fn test_custom_metadata_equality ( ) {
1910+ let batch1 = record_batch ! ( ( "a" , Int32 , [ 1 , 2 , 3 ] ) ) . unwrap ( ) ;
1911+ let batch2 = record_batch ! ( ( "a" , Int32 , [ 1 , 2 , 3 ] ) ) . unwrap ( ) ;
1912+
1913+ // Both empty metadata -> equal
1914+ assert_eq ! ( batch1, batch2) ;
1915+
1916+ // Different metadata -> not equal
1917+ let mut metadata = HashMap :: new ( ) ;
1918+ metadata. insert ( "k" . to_string ( ) , "v" . to_string ( ) ) ;
1919+ let batch1 = batch1. with_custom_metadata ( metadata) ;
1920+ assert_ne ! ( batch1, batch2) ;
1921+ }
17951922}
0 commit comments