1616// under the License.
1717
1818use crate :: execution:: operators:: { copy_array, copy_or_unpack_array, CopyMode } ;
19- use crate :: parquet:: get_batch_context;
2019use crate :: {
2120 errors:: CometError ,
2221 execution:: {
@@ -80,11 +79,6 @@ pub struct ScanExec {
8079 baseline_metrics : BaselineMetrics ,
8180 /// Whether native code can assume ownership of batches that it receives
8281 arrow_ffi_safe : bool ,
83- /// When true, data columns are read directly from the native reader's
84- /// BatchContext instead of through JVM FFI (zero-copy).
85- native_batch_passthrough : bool ,
86- /// Number of data columns from native reader. Remaining are partition columns.
87- num_data_columns : usize ,
8882}
8983
9084impl ScanExec {
@@ -94,8 +88,6 @@ impl ScanExec {
9488 input_source_description : & str ,
9589 data_types : Vec < DataType > ,
9690 arrow_ffi_safe : bool ,
97- native_batch_passthrough : bool ,
98- num_data_columns : usize ,
9991 ) -> Result < Self , CometError > {
10092 let metrics_set = ExecutionPlanMetricsSet :: default ( ) ;
10193 let baseline_metrics = BaselineMetrics :: new ( & metrics_set, 0 ) ;
@@ -123,8 +115,6 @@ impl ScanExec {
123115 baseline_metrics,
124116 schema,
125117 arrow_ffi_safe,
126- native_batch_passthrough,
127- num_data_columns,
128118 } )
129119 }
130120
@@ -153,21 +143,12 @@ impl ScanExec {
153143
154144 let mut current_batch = self . batch . try_lock ( ) . unwrap ( ) ;
155145 if current_batch. is_none ( ) {
156- let next_batch = if self . native_batch_passthrough {
157- ScanExec :: get_next_passthrough (
158- self . exec_context_id ,
159- self . input_source . as_ref ( ) . unwrap ( ) . as_obj ( ) ,
160- self . num_data_columns ,
161- self . data_types . len ( ) ,
162- ) ?
163- } else {
164- ScanExec :: get_next (
165- self . exec_context_id ,
166- self . input_source . as_ref ( ) . unwrap ( ) . as_obj ( ) ,
167- self . data_types . len ( ) ,
168- self . arrow_ffi_safe ,
169- ) ?
170- } ;
146+ let next_batch = ScanExec :: get_next (
147+ self . exec_context_id ,
148+ self . input_source . as_ref ( ) . unwrap ( ) . as_obj ( ) ,
149+ self . data_types . len ( ) ,
150+ self . arrow_ffi_safe ,
151+ ) ?;
171152 * current_batch = Some ( next_batch) ;
172153 }
173154
@@ -278,98 +259,6 @@ impl ScanExec {
278259 Ok ( InputBatch :: new ( inputs, Some ( actual_num_rows) ) )
279260 }
280261
281- /// Passthrough mode: data columns are read directly from native BatchContext
282- /// (zero-copy Arc::clone). Only partition columns are imported from JVM via FFI.
283- fn get_next_passthrough (
284- exec_context_id : i64 ,
285- iter : & JObject ,
286- num_data_cols : usize ,
287- num_total_cols : usize ,
288- ) -> Result < InputBatch , CometError > {
289- if exec_context_id == TEST_EXEC_CONTEXT_ID {
290- return Ok ( InputBatch :: EOF ) ;
291- }
292-
293- if iter. is_null ( ) {
294- return Err ( CometError :: from ( ExecutionError :: GeneralError ( format ! (
295- "Null batch iterator object. Plan id: {exec_context_id}"
296- ) ) ) ) ;
297- }
298-
299- let mut env = JVMClasses :: get_env ( ) ?;
300-
301- // 1. Advance reader; get native batch handle (data stays in Rust)
302- let handle: i64 = unsafe {
303- jni_call ! ( & mut env,
304- comet_batch_iterator( iter) . advance_passthrough( ) -> i64 ) ?
305- } ;
306- if handle == 0 {
307- return Ok ( InputBatch :: EOF ) ;
308- }
309-
310- // 2. Get data columns from native BatchContext (zero-copy)
311- let context = get_batch_context ( handle) ?;
312- let batch = context. current_batch . as_ref ( ) . ok_or_else ( || {
313- CometError :: from ( ExecutionError :: GeneralError (
314- "No current batch in BatchContext" . to_string ( ) ,
315- ) )
316- } ) ?;
317-
318- let num_rows = batch. num_rows ( ) ;
319- let mut inputs: Vec < ArrayRef > = Vec :: with_capacity ( num_total_cols) ;
320-
321- for i in 0 ..num_data_cols {
322- // Zero-copy: just increment the Arc reference count
323- inputs. push ( Arc :: clone ( batch. column ( i) ) ) ;
324- }
325-
326- // 3. Import partition columns from JVM FFI (if any)
327- let num_partition_cols = num_total_cols - num_data_cols;
328- if num_partition_cols > 0 {
329- let mut array_addrs = Vec :: with_capacity ( num_partition_cols) ;
330- let mut schema_addrs = Vec :: with_capacity ( num_partition_cols) ;
331-
332- for _ in 0 ..num_partition_cols {
333- let arrow_array = Rc :: new ( FFI_ArrowArray :: empty ( ) ) ;
334- let arrow_schema = Rc :: new ( FFI_ArrowSchema :: empty ( ) ) ;
335- array_addrs. push ( Rc :: into_raw ( arrow_array) as i64 ) ;
336- schema_addrs. push ( Rc :: into_raw ( arrow_schema) as i64 ) ;
337- }
338-
339- let long_array_addrs = env. new_long_array ( num_partition_cols as jsize ) ?;
340- let long_schema_addrs = env. new_long_array ( num_partition_cols as jsize ) ?;
341- env. set_long_array_region ( & long_array_addrs, 0 , & array_addrs) ?;
342- env. set_long_array_region ( & long_schema_addrs, 0 , & schema_addrs) ?;
343-
344- let array_obj = JObject :: from ( long_array_addrs) ;
345- let schema_obj = JObject :: from ( long_schema_addrs) ;
346- let num_data_cols_jint = num_data_cols as i32 ;
347-
348- let _part_rows: i32 = unsafe {
349- jni_call ! ( & mut env,
350- comet_batch_iterator( iter) . next_partition_columns_only(
351- JValueGen :: Object ( array_obj. as_ref( ) ) ,
352- JValueGen :: Object ( schema_obj. as_ref( ) ) ,
353- JValueGen :: Int ( num_data_cols_jint)
354- ) -> i32 ) ?
355- } ;
356-
357- for i in 0 ..num_partition_cols {
358- let array_data = ArrayData :: from_spark ( ( array_addrs[ i] , schema_addrs[ i] ) ) ?;
359- let array = make_array ( array_data) ;
360- // Partition columns come from JVM mutable buffers, must copy
361- inputs. push ( copy_array ( & array) ) ;
362-
363- unsafe {
364- Rc :: from_raw ( array_addrs[ i] as * const FFI_ArrowArray ) ;
365- Rc :: from_raw ( schema_addrs[ i] as * const FFI_ArrowSchema ) ;
366- }
367- }
368- }
369-
370- Ok ( InputBatch :: new ( inputs, Some ( num_rows) ) )
371- }
372-
373262 /// Allocates Arrow FFI structures and calls JNI to get the next batch data.
374263 /// Returns the number of rows and the allocated array/schema addresses.
375264 fn allocate_and_fetch_batch (
0 commit comments