Skip to content

Commit c3cba36

Browse files
committed
Merge tag 'sched-urgent-2026-05-03' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Ingo Molnar: - Fix the delayed dequeue negative lag increase fix in the fair scheduler (Peter Zijlstra) - Fix wakeup_preempt_fair() to do proper delayed dequeue (Vincent Guittot) - Clear sched_entity::rel_deadline when initializing forked entities, which bug can cause all tasks to be EEVDF-ineligible, causing a NULL pointer dereference crash in pick_next_entity() (Zicheng Qu) * tag 'sched-urgent-2026-05-03' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/fair: Clear rel_deadline when initializing forked entities sched/fair: Fix wakeup_preempt_fair() vs delayed dequeue sched/fair: Fix the negative lag increase fix
2 parents 66edb90 + 3da56dc commit c3cba36

2 files changed

Lines changed: 25 additions & 18 deletions

File tree

kernel/sched/core.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4458,6 +4458,7 @@ static void __sched_fork(u64 clone_flags, struct task_struct *p)
44584458
p->se.nr_migrations = 0;
44594459
p->se.vruntime = 0;
44604460
p->se.vlag = 0;
4461+
p->se.rel_deadline = 0;
44614462
INIT_LIST_HEAD(&p->se.group_node);
44624463

44634464
/* A delayed task cannot be in clone(). */

kernel/sched/fair.c

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -847,13 +847,19 @@ static s64 entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 avrunt
847847
* Similarly, check that the entity didn't gain positive lag when DELAY_ZERO
848848
* is set.
849849
*
850-
* Return true if the lag has been adjusted.
850+
* Return true if the vlag has been modified. Specifically:
851+
*
852+
* se->vlag != avg_vruntime() - se->vruntime
853+
*
854+
* This can be due to clamping in entity_lag() or clamping due to
855+
* sched_delayed. Either way, when vlag is modified and the entity is
856+
* retained, the tree needs to be adjusted.
851857
*/
852858
static __always_inline
853859
bool update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
854860
{
855-
s64 vlag = entity_lag(cfs_rq, se, avg_vruntime(cfs_rq));
856-
bool ret;
861+
u64 avruntime = avg_vruntime(cfs_rq);
862+
s64 vlag = entity_lag(cfs_rq, se, avruntime);
857863

858864
WARN_ON_ONCE(!se->on_rq);
859865

@@ -863,10 +869,9 @@ bool update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
863869
if (sched_feat(DELAY_ZERO))
864870
vlag = min(vlag, 0);
865871
}
866-
ret = (vlag == se->vlag);
867872
se->vlag = vlag;
868873

869-
return ret;
874+
return avruntime - vlag != se->vruntime;
870875
}
871876

872877
/*
@@ -1099,7 +1104,7 @@ static inline void cancel_protect_slice(struct sched_entity *se)
10991104
*
11001105
* Which allows tree pruning through eligibility.
11011106
*/
1102-
static struct sched_entity *__pick_eevdf(struct cfs_rq *cfs_rq, bool protect)
1107+
static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq, bool protect)
11031108
{
11041109
struct rb_node *node = cfs_rq->tasks_timeline.rb_root.rb_node;
11051110
struct sched_entity *se = __pick_first_entity(cfs_rq);
@@ -1170,11 +1175,6 @@ static struct sched_entity *__pick_eevdf(struct cfs_rq *cfs_rq, bool protect)
11701175
return best;
11711176
}
11721177

1173-
static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
1174-
{
1175-
return __pick_eevdf(cfs_rq, true);
1176-
}
1177-
11781178
struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
11791179
{
11801180
struct rb_node *last = rb_last(&cfs_rq->tasks_timeline.rb_root);
@@ -5749,11 +5749,11 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags);
57495749
* 4) do not run the "skip" process, if something else is available
57505750
*/
57515751
static struct sched_entity *
5752-
pick_next_entity(struct rq *rq, struct cfs_rq *cfs_rq)
5752+
pick_next_entity(struct rq *rq, struct cfs_rq *cfs_rq, bool protect)
57535753
{
57545754
struct sched_entity *se;
57555755

5756-
se = pick_eevdf(cfs_rq);
5756+
se = pick_eevdf(cfs_rq, protect);
57575757
if (se->sched_delayed) {
57585758
dequeue_entities(rq, se, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
57595759
/*
@@ -9027,7 +9027,7 @@ static void wakeup_preempt_fair(struct rq *rq, struct task_struct *p, int wake_f
90279027
{
90289028
enum preempt_wakeup_action preempt_action = PREEMPT_WAKEUP_PICK;
90299029
struct task_struct *donor = rq->donor;
9030-
struct sched_entity *se = &donor->se, *pse = &p->se;
9030+
struct sched_entity *nse, *se = &donor->se, *pse = &p->se;
90319031
struct cfs_rq *cfs_rq = task_cfs_rq(donor);
90329032
int cse_is_idle, pse_is_idle;
90339033

@@ -9138,11 +9138,17 @@ static void wakeup_preempt_fair(struct rq *rq, struct task_struct *p, int wake_f
91389138
}
91399139

91409140
pick:
9141+
nse = pick_next_entity(rq, cfs_rq, preempt_action != PREEMPT_WAKEUP_SHORT);
9142+
/* If @p has become the most eligible task, force preemption */
9143+
if (nse == pse)
9144+
goto preempt;
9145+
91419146
/*
9142-
* If @p has become the most eligible task, force preemption.
9147+
* Because p is enqueued, nse being null can only mean that we
9148+
* dequeued a delayed task.
91439149
*/
9144-
if (__pick_eevdf(cfs_rq, preempt_action != PREEMPT_WAKEUP_SHORT) == pse)
9145-
goto preempt;
9150+
if (!nse)
9151+
goto pick;
91469152

91479153
if (sched_feat(RUN_TO_PARITY))
91489154
update_protect_slice(cfs_rq, se);
@@ -9179,7 +9185,7 @@ static struct task_struct *pick_task_fair(struct rq *rq, struct rq_flags *rf)
91799185

91809186
throttled |= check_cfs_rq_runtime(cfs_rq);
91819187

9182-
se = pick_next_entity(rq, cfs_rq);
9188+
se = pick_next_entity(rq, cfs_rq, true);
91839189
if (!se)
91849190
goto again;
91859191
cfs_rq = group_cfs_rq(se);

0 commit comments

Comments
 (0)