@@ -395,6 +395,41 @@ fn start_shard_positions_service(
395395 } ) ;
396396}
397397
398+ /// Waits for the shutdown signal and notifies all other services when it
399+ /// occurs.
400+ ///
401+ /// Usually called when receiving a SIGTERM signal, e.g. k8s trying to
402+ /// decomission a pod.
403+ async fn shutdown_signal_handler (
404+ shutdown_signal : BoxFutureInfaillible < ( ) > ,
405+ universe : Universe ,
406+ ingester_opt : Option < Ingester > ,
407+ grpc_shutdown_trigger_tx : oneshot:: Sender < ( ) > ,
408+ rest_shutdown_trigger_tx : oneshot:: Sender < ( ) > ,
409+ cluster : Cluster ,
410+ ) -> HashMap < String , ActorExitStatus > {
411+ shutdown_signal. await ;
412+ // We must decommission the ingester first before terminating the indexing pipelines that
413+ // may consume from it. We also need to keep the gRPC server running while doing so.
414+ if let Some ( ingester) = ingester_opt {
415+ if let Err ( error) = wait_for_ingester_decommission ( ingester) . await {
416+ error ! ( "failed to decommission ingester gracefully: {:?}" , error) ;
417+ }
418+ }
419+ let actor_exit_statuses = universe. quit ( ) . await ;
420+
421+ if grpc_shutdown_trigger_tx. send ( ( ) ) . is_err ( ) {
422+ debug ! ( "gRPC server shutdown signal receiver was dropped" ) ;
423+ }
424+ if rest_shutdown_trigger_tx. send ( ( ) ) . is_err ( ) {
425+ debug ! ( "REST server shutdown signal receiver was dropped" ) ;
426+ }
427+ if let Err ( err) = cluster. initiate_shutdown ( ) . await {
428+ debug ! ( "{err}" ) ;
429+ }
430+ actor_exit_statuses
431+ }
432+
398433pub async fn serve_quickwit (
399434 node_config : NodeConfig ,
400435 runtimes_config : RuntimesConfig ,
@@ -757,7 +792,7 @@ pub async fn serve_quickwit(
757792 // Thus readiness task is started once gRPC and REST servers are started.
758793 spawn_named_task (
759794 node_readiness_reporting_task (
760- cluster,
795+ cluster. clone ( ) ,
761796 metastore_through_control_plane,
762797 ingester_opt. clone ( ) ,
763798 grpc_readiness_signal_rx,
@@ -767,26 +802,14 @@ pub async fn serve_quickwit(
767802 "node_readiness_reporting" ,
768803 ) ;
769804
770- let shutdown_handle = tokio:: spawn ( async move {
771- shutdown_signal. await ;
772-
773- // We must decommission the ingester first before terminating the indexing pipelines that
774- // may consume from it. We also need to keep the gRPC server running while doing so.
775- if let Some ( ingester) = ingester_opt {
776- if let Err ( error) = wait_for_ingester_decommission ( ingester) . await {
777- error ! ( "failed to decommission ingester gracefully: {:?}" , error) ;
778- }
779- }
780- let actor_exit_statuses = universe. quit ( ) . await ;
781-
782- if grpc_shutdown_trigger_tx. send ( ( ) ) . is_err ( ) {
783- debug ! ( "gRPC server shutdown signal receiver was dropped" ) ;
784- }
785- if rest_shutdown_trigger_tx. send ( ( ) ) . is_err ( ) {
786- debug ! ( "REST server shutdown signal receiver was dropped" ) ;
787- }
788- actor_exit_statuses
789- } ) ;
805+ let shutdown_handle = tokio:: spawn ( shutdown_signal_handler (
806+ shutdown_signal,
807+ universe,
808+ ingester_opt,
809+ grpc_shutdown_trigger_tx,
810+ rest_shutdown_trigger_tx,
811+ cluster. clone ( ) ,
812+ ) ) ;
790813 let grpc_join_handle = async move {
791814 spawn_named_task ( grpc_server, "grpc_server" )
792815 . await
@@ -801,7 +824,9 @@ pub async fn serve_quickwit(
801824 . context ( "REST server failed" )
802825 } ;
803826
804- if let Err ( err) = tokio:: try_join!( grpc_join_handle, rest_join_handle) {
827+ let chitchat_server_handle = cluster. chitchat_server_termination_watcher ( ) . await ;
828+
829+ if let Err ( err) = tokio:: try_join!( grpc_join_handle, rest_join_handle, chitchat_server_handle) {
805830 error ! ( "server failed: {err:?}" ) ;
806831 }
807832
0 commit comments