2121use crate :: cast:: AsArray ;
2222use crate :: { Array , ArrayRef , StructArray , new_empty_array} ;
2323use arrow_schema:: { ArrowError , DataType , Field , FieldRef , Schema , SchemaBuilder , SchemaRef } ;
24+ use std:: collections:: HashMap ;
2425use std:: ops:: Index ;
2526use std:: sync:: Arc ;
2627
@@ -207,6 +208,12 @@ pub struct RecordBatch {
207208 ///
208209 /// This is stored separately from the columns to handle the case of no columns
209210 row_count : usize ,
211+
212+ /// Per-batch custom metadata
213+ ///
214+ /// This corresponds to the `custom_metadata` field on the IPC `Message`
215+ /// flatbuffer, allowing per-batch metadata separate from schema-level metadata.
216+ custom_metadata : HashMap < String , String > ,
210217}
211218
212219impl RecordBatch {
@@ -267,6 +274,7 @@ impl RecordBatch {
267274 schema,
268275 columns,
269276 row_count,
277+ custom_metadata : HashMap :: new ( ) ,
270278 }
271279 }
272280
@@ -294,6 +302,7 @@ impl RecordBatch {
294302 schema,
295303 columns,
296304 row_count : 0 ,
305+ custom_metadata : HashMap :: new ( ) ,
297306 }
298307 }
299308
@@ -368,14 +377,30 @@ impl RecordBatch {
368377 schema,
369378 columns,
370379 row_count,
380+ custom_metadata : HashMap :: new ( ) ,
371381 } )
372382 }
373383
374384 /// Return the schema, columns and row count of this [`RecordBatch`]
385+ ///
386+ /// Note: this discards any [`Self::custom_metadata`]. Use
387+ /// [`Self::into_parts_with_custom_metadata`] to also retrieve it.
375388 pub fn into_parts ( self ) -> ( SchemaRef , Vec < ArrayRef > , usize ) {
376389 ( self . schema , self . columns , self . row_count )
377390 }
378391
392+ /// Return the schema, columns, row count and custom metadata of this [`RecordBatch`]
393+ pub fn into_parts_with_custom_metadata (
394+ self ,
395+ ) -> ( SchemaRef , Vec < ArrayRef > , usize , HashMap < String , String > ) {
396+ (
397+ self . schema ,
398+ self . columns ,
399+ self . row_count ,
400+ self . custom_metadata ,
401+ )
402+ }
403+
379404 /// Override the schema of this [`RecordBatch`]
380405 ///
381406 /// Returns an error if `schema` is not a superset of the current schema
@@ -394,6 +419,7 @@ impl RecordBatch {
394419 schema,
395420 columns : self . columns ,
396421 row_count : self . row_count ,
422+ custom_metadata : self . custom_metadata ,
397423 } )
398424 }
399425
@@ -429,6 +455,25 @@ impl RecordBatch {
429455 & mut schema. metadata
430456 }
431457
458+ /// Returns a reference to the per-batch custom metadata.
459+ ///
460+ /// This metadata corresponds to the `custom_metadata` field on the IPC
461+ /// `Message` flatbuffer, separate from schema-level metadata.
462+ pub fn custom_metadata ( & self ) -> & HashMap < String , String > {
463+ & self . custom_metadata
464+ }
465+
466+ /// Returns a mutable reference to the per-batch custom metadata.
467+ pub fn custom_metadata_mut ( & mut self ) -> & mut HashMap < String , String > {
468+ & mut self . custom_metadata
469+ }
470+
471+ /// Sets the per-batch custom metadata, returning `self`.
472+ pub fn with_custom_metadata ( mut self , metadata : HashMap < String , String > ) -> Self {
473+ self . custom_metadata = metadata;
474+ self
475+ }
476+
432477 /// Projects the schema onto the specified columns
433478 pub fn project ( & self , indices : & [ usize ] ) -> Result < RecordBatch , ArrowError > {
434479 let projected_schema = self . schema . project ( indices) ?;
@@ -453,7 +498,8 @@ impl RecordBatch {
453498 SchemaRef :: new ( projected_schema) ,
454499 batch_fields,
455500 self . row_count ,
456- ) )
501+ )
502+ . with_custom_metadata ( self . custom_metadata . clone ( ) ) )
457503 }
458504 }
459505
@@ -556,7 +602,9 @@ impl RecordBatch {
556602 }
557603 }
558604 }
605+ let custom_metadata = self . custom_metadata . clone ( ) ;
559606 RecordBatch :: try_new ( Arc :: new ( Schema :: new ( fields) ) , columns)
607+ . map ( |b| b. with_custom_metadata ( custom_metadata) )
560608 }
561609
562610 /// Returns the number of columns in the record batch.
@@ -677,6 +725,7 @@ impl RecordBatch {
677725 schema : self . schema . clone ( ) ,
678726 columns,
679727 row_count : length,
728+ custom_metadata : self . custom_metadata . clone ( ) ,
680729 }
681730 }
682731
@@ -836,6 +885,7 @@ impl From<StructArray> for RecordBatch {
836885 schema : Arc :: new ( Schema :: new ( fields) ) ,
837886 row_count,
838887 columns,
888+ custom_metadata : HashMap :: new ( ) ,
839889 }
840890 }
841891}
@@ -1706,4 +1756,81 @@ mod tests {
17061756 "bar"
17071757 ) ;
17081758 }
1759+
1760+ #[ test]
1761+ fn test_with_custom_metadata ( ) {
1762+ let batch = record_batch ! ( ( "a" , Int32 , [ 1 , 2 , 3 ] ) ) . unwrap ( ) ;
1763+ assert ! ( batch. custom_metadata( ) . is_empty( ) ) ;
1764+
1765+ let mut metadata = HashMap :: new ( ) ;
1766+ metadata. insert ( "key" . to_string ( ) , "value" . to_string ( ) ) ;
1767+ let batch = batch. with_custom_metadata ( metadata. clone ( ) ) ;
1768+ assert_eq ! ( batch. custom_metadata( ) , & metadata) ;
1769+ }
1770+
1771+ #[ test]
1772+ fn test_custom_metadata_mut ( ) {
1773+ let mut batch = record_batch ! ( ( "a" , Int32 , [ 1 , 2 , 3 ] ) ) . unwrap ( ) ;
1774+ batch
1775+ . custom_metadata_mut ( )
1776+ . insert ( "key" . to_string ( ) , "value" . to_string ( ) ) ;
1777+ assert_eq ! (
1778+ batch. custom_metadata( ) . get( "key" ) ,
1779+ Some ( & "value" . to_string( ) )
1780+ ) ;
1781+ }
1782+
1783+ #[ test]
1784+ fn test_slice_preserves_custom_metadata ( ) {
1785+ let batch = record_batch ! ( ( "a" , Int32 , [ 1 , 2 , 3 ] ) ) . unwrap ( ) ;
1786+ let mut metadata = HashMap :: new ( ) ;
1787+ metadata. insert ( "k" . to_string ( ) , "v" . to_string ( ) ) ;
1788+ let batch = batch. with_custom_metadata ( metadata. clone ( ) ) ;
1789+
1790+ let sliced = batch. slice ( 0 , 2 ) ;
1791+ assert_eq ! ( sliced. custom_metadata( ) , & metadata) ;
1792+ }
1793+
1794+ #[ test]
1795+ fn test_project_preserves_custom_metadata ( ) {
1796+ let a: ArrayRef = Arc :: new ( Int32Array :: from ( vec ! [ 1 , 2 , 3 ] ) ) ;
1797+ let b: ArrayRef = Arc :: new ( StringArray :: from ( vec ! [ "a" , "b" , "c" ] ) ) ;
1798+ let batch = RecordBatch :: try_from_iter ( vec ! [ ( "a" , a) , ( "b" , b) ] ) . unwrap ( ) ;
1799+
1800+ let mut metadata = HashMap :: new ( ) ;
1801+ metadata. insert ( "k" . to_string ( ) , "v" . to_string ( ) ) ;
1802+ let batch = batch. with_custom_metadata ( metadata. clone ( ) ) ;
1803+
1804+ let projected = batch. project ( & [ 0 ] ) . unwrap ( ) ;
1805+ assert_eq ! ( projected. custom_metadata( ) , & metadata) ;
1806+ }
1807+
1808+ #[ test]
1809+ fn test_into_parts_with_custom_metadata ( ) {
1810+ let batch = record_batch ! ( ( "a" , Int32 , [ 1 , 2 , 3 ] ) ) . unwrap ( ) ;
1811+ let mut metadata = HashMap :: new ( ) ;
1812+ metadata. insert ( "k" . to_string ( ) , "v" . to_string ( ) ) ;
1813+ let batch = batch. with_custom_metadata ( metadata. clone ( ) ) ;
1814+
1815+ let ( schema, columns, row_count, custom_metadata) = batch. into_parts_with_custom_metadata ( ) ;
1816+ assert_eq ! ( schema. fields( ) . len( ) , 1 ) ;
1817+ assert_eq ! ( columns. len( ) , 1 ) ;
1818+ assert_eq ! ( row_count, 3 ) ;
1819+ assert_eq ! ( custom_metadata, metadata) ;
1820+ }
1821+
1822+ #[ test]
1823+ fn test_custom_metadata_equality ( ) {
1824+ let batch1 = record_batch ! ( ( "a" , Int32 , [ 1 , 2 , 3 ] ) ) . unwrap ( ) ;
1825+ let batch2 = record_batch ! ( ( "a" , Int32 , [ 1 , 2 , 3 ] ) ) . unwrap ( ) ;
1826+
1827+ // Both empty metadata -> equal
1828+ assert_eq ! ( batch1, batch2) ;
1829+
1830+ // Different metadata -> not equal
1831+ let mut metadata = HashMap :: new ( ) ;
1832+ metadata. insert ( "k" . to_string ( ) , "v" . to_string ( ) ) ;
1833+ let batch1 = batch1. with_custom_metadata ( metadata) ;
1834+ assert_ne ! ( batch1, batch2) ;
1835+ }
17091836}
0 commit comments