1616// under the License.
1717
1818use crate :: execution:: operators:: { copy_array, copy_or_unpack_array, CopyMode } ;
19+ use crate :: parquet:: get_batch_context;
1920use crate :: {
2021 errors:: CometError ,
2122 execution:: {
@@ -79,6 +80,11 @@ pub struct ScanExec {
7980 baseline_metrics : BaselineMetrics ,
8081 /// Whether native code can assume ownership of batches that it receives
8182 arrow_ffi_safe : bool ,
83+ /// When true, data columns are read directly from the native reader's
84+ /// BatchContext instead of through JVM FFI (zero-copy).
85+ native_batch_passthrough : bool ,
86+ /// Number of data columns from native reader. Remaining are partition columns.
87+ num_data_columns : usize ,
8288}
8389
8490impl ScanExec {
@@ -88,6 +94,8 @@ impl ScanExec {
8894 input_source_description : & str ,
8995 data_types : Vec < DataType > ,
9096 arrow_ffi_safe : bool ,
97+ native_batch_passthrough : bool ,
98+ num_data_columns : usize ,
9199 ) -> Result < Self , CometError > {
92100 let metrics_set = ExecutionPlanMetricsSet :: default ( ) ;
93101 let baseline_metrics = BaselineMetrics :: new ( & metrics_set, 0 ) ;
@@ -115,6 +123,8 @@ impl ScanExec {
115123 baseline_metrics,
116124 schema,
117125 arrow_ffi_safe,
126+ native_batch_passthrough,
127+ num_data_columns,
118128 } )
119129 }
120130
@@ -143,12 +153,21 @@ impl ScanExec {
143153
144154 let mut current_batch = self . batch . try_lock ( ) . unwrap ( ) ;
145155 if current_batch. is_none ( ) {
146- let next_batch = ScanExec :: get_next (
147- self . exec_context_id ,
148- self . input_source . as_ref ( ) . unwrap ( ) . as_obj ( ) ,
149- self . data_types . len ( ) ,
150- self . arrow_ffi_safe ,
151- ) ?;
156+ let next_batch = if self . native_batch_passthrough {
157+ ScanExec :: get_next_passthrough (
158+ self . exec_context_id ,
159+ self . input_source . as_ref ( ) . unwrap ( ) . as_obj ( ) ,
160+ self . num_data_columns ,
161+ self . data_types . len ( ) ,
162+ ) ?
163+ } else {
164+ ScanExec :: get_next (
165+ self . exec_context_id ,
166+ self . input_source . as_ref ( ) . unwrap ( ) . as_obj ( ) ,
167+ self . data_types . len ( ) ,
168+ self . arrow_ffi_safe ,
169+ ) ?
170+ } ;
152171 * current_batch = Some ( next_batch) ;
153172 }
154173
@@ -259,6 +278,98 @@ impl ScanExec {
259278 Ok ( InputBatch :: new ( inputs, Some ( actual_num_rows) ) )
260279 }
261280
281+ /// Passthrough mode: data columns are read directly from native BatchContext
282+ /// (zero-copy Arc::clone). Only partition columns are imported from JVM via FFI.
283+ fn get_next_passthrough (
284+ exec_context_id : i64 ,
285+ iter : & JObject ,
286+ num_data_cols : usize ,
287+ num_total_cols : usize ,
288+ ) -> Result < InputBatch , CometError > {
289+ if exec_context_id == TEST_EXEC_CONTEXT_ID {
290+ return Ok ( InputBatch :: EOF ) ;
291+ }
292+
293+ if iter. is_null ( ) {
294+ return Err ( CometError :: from ( ExecutionError :: GeneralError ( format ! (
295+ "Null batch iterator object. Plan id: {exec_context_id}"
296+ ) ) ) ) ;
297+ }
298+
299+ let mut env = JVMClasses :: get_env ( ) ?;
300+
301+ // 1. Advance reader; get native batch handle (data stays in Rust)
302+ let handle: i64 = unsafe {
303+ jni_call ! ( & mut env,
304+ comet_batch_iterator( iter) . advance_passthrough( ) -> i64 ) ?
305+ } ;
306+ if handle == 0 {
307+ return Ok ( InputBatch :: EOF ) ;
308+ }
309+
310+ // 2. Get data columns from native BatchContext (zero-copy)
311+ let context = get_batch_context ( handle) ?;
312+ let batch = context. current_batch . as_ref ( ) . ok_or_else ( || {
313+ CometError :: from ( ExecutionError :: GeneralError (
314+ "No current batch in BatchContext" . to_string ( ) ,
315+ ) )
316+ } ) ?;
317+
318+ let num_rows = batch. num_rows ( ) ;
319+ let mut inputs: Vec < ArrayRef > = Vec :: with_capacity ( num_total_cols) ;
320+
321+ for i in 0 ..num_data_cols {
322+ // Zero-copy: just increment the Arc reference count
323+ inputs. push ( Arc :: clone ( batch. column ( i) ) ) ;
324+ }
325+
326+ // 3. Import partition columns from JVM FFI (if any)
327+ let num_partition_cols = num_total_cols - num_data_cols;
328+ if num_partition_cols > 0 {
329+ let mut array_addrs = Vec :: with_capacity ( num_partition_cols) ;
330+ let mut schema_addrs = Vec :: with_capacity ( num_partition_cols) ;
331+
332+ for _ in 0 ..num_partition_cols {
333+ let arrow_array = Rc :: new ( FFI_ArrowArray :: empty ( ) ) ;
334+ let arrow_schema = Rc :: new ( FFI_ArrowSchema :: empty ( ) ) ;
335+ array_addrs. push ( Rc :: into_raw ( arrow_array) as i64 ) ;
336+ schema_addrs. push ( Rc :: into_raw ( arrow_schema) as i64 ) ;
337+ }
338+
339+ let long_array_addrs = env. new_long_array ( num_partition_cols as jsize ) ?;
340+ let long_schema_addrs = env. new_long_array ( num_partition_cols as jsize ) ?;
341+ env. set_long_array_region ( & long_array_addrs, 0 , & array_addrs) ?;
342+ env. set_long_array_region ( & long_schema_addrs, 0 , & schema_addrs) ?;
343+
344+ let array_obj = JObject :: from ( long_array_addrs) ;
345+ let schema_obj = JObject :: from ( long_schema_addrs) ;
346+ let num_data_cols_jint = num_data_cols as i32 ;
347+
348+ let _part_rows: i32 = unsafe {
349+ jni_call ! ( & mut env,
350+ comet_batch_iterator( iter) . next_partition_columns_only(
351+ JValueGen :: Object ( array_obj. as_ref( ) ) ,
352+ JValueGen :: Object ( schema_obj. as_ref( ) ) ,
353+ JValueGen :: Int ( num_data_cols_jint)
354+ ) -> i32 ) ?
355+ } ;
356+
357+ for i in 0 ..num_partition_cols {
358+ let array_data = ArrayData :: from_spark ( ( array_addrs[ i] , schema_addrs[ i] ) ) ?;
359+ let array = make_array ( array_data) ;
360+ // Partition columns come from JVM mutable buffers, must copy
361+ inputs. push ( copy_array ( & array) ) ;
362+
363+ unsafe {
364+ Rc :: from_raw ( array_addrs[ i] as * const FFI_ArrowArray ) ;
365+ Rc :: from_raw ( schema_addrs[ i] as * const FFI_ArrowSchema ) ;
366+ }
367+ }
368+ }
369+
370+ Ok ( InputBatch :: new ( inputs, Some ( num_rows) ) )
371+ }
372+
262373 /// Allocates Arrow FFI structures and calls JNI to get the next batch data.
263374 /// Returns the number of rows and the allocated array/schema addresses.
264375 fn allocate_and_fetch_batch (
0 commit comments