@@ -1451,3 +1451,71 @@ fn many_actors_same_alarm_time() {
14511451 ) ;
14521452 } ) ;
14531453}
1454+
1455+ /// Regression test for the alarm-during-sleep-transition race.
1456+ ///
1457+ /// Scenario: an actor schedules an alarm in the near future, then immediately
1458+ /// sends a sleep intent. The stop flow may take long enough that the alarm
1459+ /// becomes overdue while `handle_stopped` is processing `Decision::Sleep`.
1460+ ///
1461+ /// Before the fix in `actor2/runtime.rs`, this window cleared `state.alarm_ts`
1462+ /// without handling the overdue alarm, so the scheduled work was silently
1463+ /// dropped and the actor went to sleep. The handler would never run.
1464+ ///
1465+ /// After the fix, `Decision::Sleep` detects the overdue alarm, reallocates the
1466+ /// actor, and bumps the generation so the alarm handler runs. This test
1467+ /// verifies that path by setting a very short alarm offset and checking the
1468+ /// actor wakes to generation 1 instead of sleeping forever.
1469+ ///
1470+ /// Expected: the alarm triggers via reallocation. If the fix is reverted, the
1471+ /// alarm will never trigger and this test will time out waiting for the wake.
1472+ #[ test]
1473+ #[ ignore = "captures alarm-during-sleep-transition race; times out if the overdue-alarm reallocation path regresses" ]
1474+ fn alarm_overdue_during_sleep_transition_fires_via_reallocation ( ) {
1475+ common:: run (
1476+ common:: TestOpts :: new ( 1 ) . with_timeout ( 15 ) ,
1477+ |ctx| async move {
1478+ let ( namespace, _) = common:: setup_test_namespace ( ctx. leader_dc ( ) ) . await ;
1479+
1480+ let ( ready_tx, ready_rx) = tokio:: sync:: oneshot:: channel ( ) ;
1481+ let ready_tx = Arc :: new ( Mutex :: new ( Some ( ready_tx) ) ) ;
1482+
1483+ let runner = common:: setup_runner ( ctx. leader_dc ( ) , & namespace, |builder| {
1484+ builder. with_actor_behavior ( "alarm-actor" , move |_| {
1485+ let ready_tx = ready_tx. clone ( ) ;
1486+ // 100ms offset leaves enough time to dispatch the sleep intent
1487+ // but is short enough that the alarm is near-overdue by the
1488+ // time the workflow reaches `Decision::Sleep`.
1489+ Box :: new ( AlarmAndSleepOnceActor :: new ( 100 , ready_tx) )
1490+ } )
1491+ } )
1492+ . await ;
1493+
1494+ let res = common:: create_actor (
1495+ ctx. leader_dc ( ) . guard_port ( ) ,
1496+ & namespace,
1497+ "alarm-actor" ,
1498+ runner. name ( ) ,
1499+ rivet_types:: actors:: CrashPolicy :: Destroy ,
1500+ )
1501+ . await ;
1502+
1503+ let actor_id = res. actor . actor_id . to_string ( ) ;
1504+
1505+ ready_rx. await . expect ( "actor should send ready signal" ) ;
1506+
1507+ let lifecycle_rx = runner. subscribe_lifecycle_events ( ) ;
1508+
1509+ // If the overdue alarm was dropped, the actor would enter sleep and
1510+ // never wake. A successful reallocation wakes the actor at generation 1.
1511+ wait_for_actor_wake_from_alarm ( lifecycle_rx, & actor_id, 1 , 10 )
1512+ . await
1513+ . expect (
1514+ "actor should wake from the overdue alarm via reallocation; \
1515+ if this times out, the `Decision::Sleep` overdue-alarm path was dropped",
1516+ ) ;
1517+
1518+ tracing:: info!( ?actor_id, "overdue alarm fired via reallocation" ) ;
1519+ } ,
1520+ ) ;
1521+ }
0 commit comments