@@ -217,8 +217,56 @@ impl RefreshCoordinator {
217217 * state = RefreshCoordinatorState :: Idle ;
218218 self . changed . notify_all ( ) ;
219219 }
220+ RefreshCoordinatorState :: Running ( active) if active. key == * key => {
221+ // Recovery path: if begin_completion() panicked, the state was
222+ // restored to Running before the unwind. Transition to Idle so
223+ // waiters are not stuck forever.
224+ * state = RefreshCoordinatorState :: Idle ;
225+ self . changed . notify_all ( ) ;
226+ }
220227 RefreshCoordinatorState :: Idle => { }
221- _ => { }
228+ _ => {
229+ // Mismatched key — another refresh owns this state. Log and
230+ // leave it alone; the owning refresh will clean up.
231+ error ! (
232+ "force_complete_request called with mismatched key; current state not owned by caller"
233+ ) ;
234+ }
235+ }
236+ }
237+ }
238+
239+ /// Safety guard created when a refresh thread takes ownership of the `Running`
240+ /// state. If the thread exits the `Start` arm without ever constructing a
241+ /// `RefreshCompletionGuard` (e.g., because `begin_completion` panics), this
242+ /// guard calls `force_complete_request` to transition the coordinator back to
243+ /// `Idle`, preventing a permanent deadlock.
244+ struct RefreshSafetyGuard < ' a > {
245+ coordinator : & ' a RefreshCoordinator ,
246+ key : RefreshKey ,
247+ disarmed : bool ,
248+ }
249+
250+ impl < ' a > RefreshSafetyGuard < ' a > {
251+ fn new ( coordinator : & ' a RefreshCoordinator , key : RefreshKey ) -> Self {
252+ Self {
253+ coordinator,
254+ key,
255+ disarmed : false ,
256+ }
257+ }
258+
259+ /// Disarm the safety guard once a `RefreshCompletionGuard` takes over
260+ /// responsibility for the state transition.
261+ fn disarm ( & mut self ) {
262+ self . disarmed = true ;
263+ }
264+ }
265+
266+ impl Drop for RefreshSafetyGuard < ' _ > {
267+ fn drop ( & mut self ) {
268+ if !self . disarmed {
269+ self . coordinator . force_complete_request ( & self . key ) ;
222270 }
223271 }
224272}
@@ -530,6 +578,11 @@ pub fn handle_configure(context: Arc<Context>, id: u32, params: Value) {
530578 state. config . cache_directory = Some ( cache_directory) ;
531579 }
532580 state. generation += 1 ;
581+ // Reset missing-env reporting so that the next refresh
582+ // after reconfiguration can trigger it again (Fixes #395).
583+ // Done inside the write lock to avoid a TOCTOU window with
584+ // concurrent refresh threads reading the generation.
585+ MISSING_ENVS_REPORTING_STATE . store ( MISSING_ENVS_AVAILABLE , Ordering :: Release ) ;
533586 trace ! (
534587 "Configuring locators with generation {}: {:?}" ,
535588 state. generation,
@@ -886,6 +939,15 @@ pub fn handle_refresh(context: Arc<Context>, id: u32, params: Value) {
886939 context. refresh_coordinator . wait_until_idle ( ) ;
887940 }
888941 RefreshRegistration :: Start => {
942+ // Safety guard: if anything in this arm panics
943+ // (including begin_completion), force the
944+ // coordinator back to Idle so waiters are not
945+ // stuck forever.
946+ let mut safety_guard = RefreshSafetyGuard :: new (
947+ & context. refresh_coordinator ,
948+ refresh_key. clone ( ) ,
949+ ) ;
950+
889951 let refresh_result = panic:: catch_unwind ( AssertUnwindSafe ( || {
890952 execute_refresh (
891953 context. as_ref ( ) ,
@@ -901,6 +963,7 @@ pub fn handle_refresh(context: Arc<Context>, id: u32, params: Value) {
901963 & context. refresh_coordinator ,
902964 & refresh_key,
903965 ) ;
966+ safety_guard. disarm ( ) ;
904967 finish_refresh_replies ( & mut completion_guard, & refresh_result) ;
905968 report_refresh_follow_up ( execution) ;
906969 }
@@ -913,6 +976,7 @@ pub fn handle_refresh(context: Arc<Context>, id: u32, params: Value) {
913976 & context. refresh_coordinator ,
914977 & refresh_key,
915978 ) ;
979+ safety_guard. disarm ( ) ;
916980 finish_refresh_errors (
917981 & mut completion_guard,
918982 "Refresh failed unexpectedly" ,
@@ -1898,4 +1962,138 @@ mod tests {
18981962 assert_eq ! ( result_config. executables, Some ( vec![ executable] ) ) ;
18991963 assert ! ( matches!( search_scope, Some ( SearchScope :: Workspace ) ) ) ;
19001964 }
1965+
1966+ /// Test for #396: force_complete_request recovers from Running state.
1967+ /// When begin_completion() cannot be reached (e.g., the thread panics before
1968+ /// constructing a RefreshCompletionGuard), force_complete_request must still
1969+ /// transition Running → Idle to unblock waiters.
1970+ #[ test]
1971+ fn test_force_complete_request_recovers_from_running_state ( ) {
1972+ let coordinator = RefreshCoordinator :: default ( ) ;
1973+ let key = make_refresh_key ( 1 , RefreshOptions :: default ( ) ) ;
1974+
1975+ // State → Running(key)
1976+ assert ! ( matches!(
1977+ coordinator. register_request( 1 , key. clone( ) ) ,
1978+ RefreshRegistration :: Start
1979+ ) ) ;
1980+
1981+ // Simulate recovery: force_complete_request from Running state.
1982+ coordinator. force_complete_request ( & key) ;
1983+
1984+ // Verify we're back to Idle and can start a new refresh.
1985+ assert ! ( matches!(
1986+ coordinator. register_request( 2 , key. clone( ) ) ,
1987+ RefreshRegistration :: Start
1988+ ) ) ;
1989+ }
1990+
1991+ /// Test for #396: RefreshSafetyGuard transitions Running → Idle on drop
1992+ /// when begin_completion is never reached.
1993+ #[ test]
1994+ fn test_safety_guard_recovers_running_state_on_drop ( ) {
1995+ let coordinator = Arc :: new ( RefreshCoordinator :: default ( ) ) ;
1996+ let key = make_refresh_key ( 1 , RefreshOptions :: default ( ) ) ;
1997+ let other_key = make_refresh_key (
1998+ 1 ,
1999+ RefreshOptions {
2000+ search_kind : Some ( PythonEnvironmentKind :: Venv ) ,
2001+ search_paths : None ,
2002+ } ,
2003+ ) ;
2004+
2005+ assert ! ( matches!(
2006+ coordinator. register_request( 1 , key. clone( ) ) ,
2007+ RefreshRegistration :: Start
2008+ ) ) ;
2009+
2010+ let ( state_tx, state_rx) = mpsc:: channel ( ) ;
2011+ let waiter = {
2012+ let coordinator = coordinator. clone ( ) ;
2013+ let other_key = other_key. clone ( ) ;
2014+ thread:: spawn ( move || {
2015+ // Different key → returns Wait (not Joined).
2016+ assert ! ( matches!(
2017+ coordinator. register_request( 2 , other_key. clone( ) ) ,
2018+ RefreshRegistration :: Wait
2019+ ) ) ;
2020+ state_tx. send ( "waiting" ) . unwrap ( ) ;
2021+ coordinator. wait_until_idle ( ) ;
2022+ state_tx. send ( "idle" ) . unwrap ( ) ;
2023+ } )
2024+ } ;
2025+
2026+ assert_eq ! ( state_rx. recv( ) . unwrap( ) , "waiting" ) ;
2027+
2028+ // Create and immediately drop the safety guard without disarming it.
2029+ // This simulates the thread dying before begin_completion.
2030+ {
2031+ let _guard = RefreshSafetyGuard :: new ( & coordinator, key. clone ( ) ) ;
2032+ }
2033+
2034+ // Waiter should be unblocked.
2035+ assert_eq ! ( state_rx. recv( ) . unwrap( ) , "idle" ) ;
2036+ waiter. join ( ) . unwrap ( ) ;
2037+ }
2038+
2039+ /// Test for #396: RefreshSafetyGuard does NOT interfere when disarmed
2040+ /// (normal path where RefreshCompletionGuard takes over).
2041+ #[ test]
2042+ fn test_safety_guard_disarmed_does_not_interfere ( ) {
2043+ let coordinator = RefreshCoordinator :: default ( ) ;
2044+ let key = make_refresh_key ( 1 , RefreshOptions :: default ( ) ) ;
2045+
2046+ assert ! ( matches!(
2047+ coordinator. register_request( 1 , key. clone( ) ) ,
2048+ RefreshRegistration :: Start
2049+ ) ) ;
2050+
2051+ {
2052+ let mut safety_guard = RefreshSafetyGuard :: new ( & coordinator, key. clone ( ) ) ;
2053+ let mut completion_guard = RefreshCompletionGuard :: begin ( & coordinator, & key) ;
2054+ safety_guard. disarm ( ) ;
2055+ let ids = completion_guard. drain_request_ids ( ) ;
2056+ assert_eq ! ( ids, vec![ 1 ] ) ;
2057+ assert ! ( completion_guard. finish_if_no_pending( ) ) ;
2058+ }
2059+
2060+ // Should be Idle — can start a new refresh.
2061+ assert ! ( matches!(
2062+ coordinator. register_request( 2 , key. clone( ) ) ,
2063+ RefreshRegistration :: Start
2064+ ) ) ;
2065+ }
2066+
2067+ /// Test for #395: configure resets MISSING_ENVS_REPORTING_STATE so that
2068+ /// subsequent refreshes can trigger missing-env reporting again.
2069+ #[ test]
2070+ fn test_configure_resets_completed_missing_env_reporting ( ) {
2071+ let _guard = MISSING_ENVS_TEST_LOCK . lock ( ) . unwrap ( ) ;
2072+
2073+ let configuration = Arc :: new ( RwLock :: new ( ConfigurationState {
2074+ generation : 1 ,
2075+ config : Configuration :: default ( ) ,
2076+ } ) ) ;
2077+
2078+ // Simulate a completed first refresh.
2079+ MISSING_ENVS_REPORTING_STATE . store ( MISSING_ENVS_AVAILABLE , Ordering :: Release ) ;
2080+ assert ! ( try_begin_missing_env_reporting( configuration. as_ref( ) , 1 ) ) ;
2081+ complete_missing_env_reporting ( 1 ) ;
2082+
2083+ // Missing-env reporting is now exhausted.
2084+ assert ! ( !try_begin_missing_env_reporting( configuration. as_ref( ) , 1 ) ) ;
2085+
2086+ // Simulate what handle_configure does: bump generation and reset.
2087+ {
2088+ let mut state = configuration. write ( ) . unwrap ( ) ;
2089+ state. generation = 2 ;
2090+ MISSING_ENVS_REPORTING_STATE . store ( MISSING_ENVS_AVAILABLE , Ordering :: Release ) ;
2091+ }
2092+
2093+ // Missing-env reporting should work again for the new generation.
2094+ assert ! ( try_begin_missing_env_reporting( configuration. as_ref( ) , 2 ) ) ;
2095+
2096+ // Cleanup.
2097+ MISSING_ENVS_REPORTING_STATE . store ( MISSING_ENVS_AVAILABLE , Ordering :: Release ) ;
2098+ }
19012099}
0 commit comments