@@ -12,10 +12,11 @@ use std::num::NonZeroUsize;
1212 * compatible open source license.
1313 */
1414use std:: ptr:: addr_of_mut;
15- use jni:: objects:: { JByteArray , JClass , JMap , JObject } ;
15+ use jni:: objects:: { GlobalRef , JByteArray , JClass , JMap , JObject } ;
1616use jni:: objects:: JLongArray ;
1717use jni:: sys:: { jboolean, jbyteArray, jint, jlong, jstring} ;
1818use jni:: { JNIEnv , JavaVM } ;
19+ use std:: future:: Future ;
1920use std:: sync:: { Arc , OnceLock } ;
2021use arrow_array:: { Array , RecordBatch , StructArray } ;
2122use arrow_array:: ffi:: FFI_ArrowArray ;
@@ -67,7 +68,7 @@ use tokio::runtime::Runtime;
6768use std:: result;
6869use datafusion:: execution:: disk_manager:: { DiskManagerBuilder , DiskManagerMode } ;
6970use datafusion:: physical_plan:: stream:: RecordBatchStreamAdapter ;
70- use futures:: TryStreamExt ;
71+ use futures:: { TryStreamExt , FutureExt } ;
7172
7273pub type Result < T , E = DataFusionError > = result:: Result < T , E > ;
7374
@@ -128,6 +129,59 @@ where
128129 } )
129130}
130131
132+ /// Extract a human-readable message from a panic payload.
133+ fn panic_message ( payload : & Box < dyn std:: any:: Any + Send > ) -> String {
134+ if let Some ( s) = payload. downcast_ref :: < & str > ( ) {
135+ s. to_string ( )
136+ } else if let Some ( s) = payload. downcast_ref :: < String > ( ) {
137+ s. clone ( )
138+ } else {
139+ "unknown panic payload" . to_string ( )
140+ }
141+ }
142+
143+ /// Spawn an async task on `runtime` that calls an ActionListener exactly once.
144+ ///
145+ /// The entire `task` future runs inside `catch_unwind`. Any panic is converted
146+ /// to a `DataFusionError` and surfaced to the Java caller via `listener_ref`.
147+ /// This ensures the `CompletableFuture` on the Java side is always completed,
148+ /// never left hanging.
149+ ///
150+ /// `on_ok` receives the success value and is responsible for calling the
151+ /// appropriate `set_action_listener_ok_*` variant. `T` is inferred from
152+ /// the closure, which in turn pins the `Output` type of `task`.
153+ fn spawn_jni_task < Fut , T , FOk > (
154+ runtime : & tokio:: runtime:: Handle ,
155+ task_name : & ' static str ,
156+ listener_ref : GlobalRef ,
157+ task : Fut ,
158+ on_ok : FOk ,
159+ )
160+ where
161+ Fut : Future < Output = Result < T , DataFusionError > > + Send + ' static ,
162+ T : Send + ' static ,
163+ FOk : FnOnce ( & mut JNIEnv , & GlobalRef , T ) + Send + ' static ,
164+ {
165+ let _ = runtime. spawn ( async move {
166+ let result = std:: panic:: AssertUnwindSafe ( task)
167+ . catch_unwind ( )
168+ . await
169+ . unwrap_or_else ( |panic| {
170+ let msg = panic_message ( & panic) ;
171+ log_error ! ( "{} panicked: {}" , task_name, msg) ;
172+ Err ( DataFusionError :: Execution ( format ! ( "{} panicked: {}" , task_name, msg) ) )
173+ } ) ;
174+
175+ with_jni_env ( |env| match result {
176+ Ok ( value) => on_ok ( env, & listener_ref, value) ,
177+ Err ( e) => {
178+ log_error ! ( "{} failed: {}" , task_name, e) ;
179+ set_action_listener_error_global ( env, & listener_ref, & e) ;
180+ }
181+ } ) ;
182+ } ) ;
183+ }
184+
131185/// Initialize the logger for Rust->Java logging bridge.
132186/// This should be called once when the native library is loaded.
133187#[ no_mangle]
@@ -619,9 +673,11 @@ pub extern "system" fn Java_org_opensearch_datafusion_jni_NativeBridge_executeQu
619673 let table_path = shard_view. table_path ( ) ;
620674 let files_meta = shard_view. files_metadata ( ) ;
621675
622- io_runtime. block_on ( async move {
623-
624- let result = query_executor:: execute_query_with_cross_rt_stream (
676+ spawn_jni_task (
677+ & io_runtime,
678+ "executeQueryPhaseAsync" ,
679+ listener_ref,
680+ query_executor:: execute_query_with_cross_rt_stream (
625681 table_path,
626682 files_meta,
627683 table_name,
@@ -630,22 +686,9 @@ pub extern "system" fn Java_org_opensearch_datafusion_jni_NativeBridge_executeQu
630686 target_partitions,
631687 runtime,
632688 cpu_executor,
633- ) . await ;
634-
635- match result {
636- Ok ( stream_ptr) => {
637- with_jni_env ( |env| {
638- set_action_listener_ok_global ( env, & listener_ref, stream_ptr) ;
639- } ) ;
640- }
641- Err ( e) => {
642- with_jni_env ( |env| {
643- log_error ! ( "Query execution failed: {}" , e) ;
644- set_action_listener_error_global ( env, & listener_ref, & e) ;
645- } ) ;
646- }
647- }
648- } ) ;
689+ ) ,
690+ |env, listener_ref, stream_pointer| set_action_listener_ok_global ( env, listener_ref, stream_pointer) ,
691+ ) ;
649692}
650693
651694#[ no_mangle]
@@ -680,22 +723,13 @@ pub extern "system" fn Java_org_opensearch_datafusion_jni_NativeBridge_fetchSegm
680723 let shard_view = unsafe { & * ( shard_view_ptr as * const ShardView ) } ;
681724 let files_meta = shard_view. files_metadata ( ) ;
682725
683- io_runtime. block_on ( async move {
684- let file_stats = util:: fetch_segment_statistics ( files_meta) . await ;
685- match file_stats {
686- Ok ( map) => {
687- with_jni_env ( |env| {
688- set_action_listener_ok_global_with_map ( env, & listener_ref, & map) ;
689- } ) ;
690- }
691- Err ( e) => {
692- with_jni_env ( |env| {
693- log_error ! ( "Collecting file stats failed: {}" , e) ;
694- set_action_listener_error_global ( env, & listener_ref, & e) ;
695- } ) ;
696- }
697- }
698- } ) ;
726+ spawn_jni_task (
727+ & io_runtime,
728+ "fetchSegmentStats" ,
729+ listener_ref,
730+ async move { util:: fetch_segment_statistics ( files_meta) . await } ,
731+ |env, listener_ref, stats_map| set_action_listener_ok_global_with_map ( env, listener_ref, & stats_map) ,
732+ ) ;
699733}
700734
701735#[ no_mangle]
@@ -732,41 +766,40 @@ pub extern "system" fn Java_org_opensearch_datafusion_jni_NativeBridge_streamNex
732766 let stream_ptr = stream;
733767 let io_runtime = manager. io_runtime . clone ( ) ;
734768
735- io_runtime. block_on ( async move {
736-
737- let stream = unsafe { & mut * ( stream_ptr as * mut RecordBatchStreamAdapter < CrossRtStream > ) } ;
738- let result = stream. try_next ( ) . await ;
739-
740- // Uncomment for monitoring stream next
741- // let result = STREAM_NEXT_MONITOR.instrument(async {
742- // stream.try_next().await
743- // }).await;
744-
745- // Use thread-local JNI env - auto-attaches!
746- with_jni_env ( |env| {
769+ // Ensure stream_ptr lifetime is guaranteed beyond the spawn boundary
770+ // (e.g., wrap in Arc<Mutex<...>> or ensure sequential access contract)
771+ spawn_jni_task (
772+ & io_runtime,
773+ "streamNext" ,
774+ listener_ref,
775+ async move {
776+ let stream = unsafe { & mut * ( stream_ptr as * mut RecordBatchStreamAdapter < CrossRtStream > ) } ;
777+ // Poll the stream with monitoring
778+ let result = stream. try_next ( ) . await ?;
779+
780+ // Uncomment for monitoring stream next
781+ // let result = STREAM_NEXT_MONITOR.instrument(async {
782+ // stream.try_next().await
783+ // }).await;
747784 match result {
748- Ok ( Some ( batch) ) => {
785+ Some ( batch) => {
749786 log_info ! ( "[RUST streamNext] Batch produced: {} rows, {} columns, schema: {:?}" ,
750787 batch. num_rows( ) , batch. num_columns( ) , batch. schema( ) . fields( ) . iter( ) . map( |f| f. name( ) . as_str( ) ) . collect:: <Vec <_>>( ) ) ;
751788 // Convert to FFI
752789 let struct_array: StructArray = batch. into ( ) ;
753790 let array_data = struct_array. into_data ( ) ;
754791 let ffi_array = FFI_ArrowArray :: new ( & array_data) ;
755- let ffi_array_ptr = Box :: into_raw ( Box :: new ( ffi_array) ) ;
756- set_action_listener_ok_global ( env, & listener_ref, ffi_array_ptr as jlong ) ;
792+ Ok ( Box :: into_raw ( Box :: new ( ffi_array) ) as jlong )
757793 }
758- Ok ( None ) => {
794+ None => {
759795 log_info ! ( "[RUST streamNext] End of stream reached" ) ;
760796 // End of stream
761- set_action_listener_ok_global ( env, & listener_ref, 0 ) ;
762- }
763- Err ( err) => {
764- log_error ! ( "Stream next failed: {}" , err) ;
765- set_action_listener_error_global ( env, & listener_ref, & err) ;
797+ Ok ( 0 )
766798 }
767799 }
768- } ) ;
769- } ) ;
800+ } ,
801+ |env, listener_ref, data_pointer| set_action_listener_ok_global ( env, listener_ref, data_pointer) ,
802+ ) ;
770803 // Function returns immediately to java - async rust work continues in background
771804}
772805
0 commit comments