Skip to content

Commit fcb7031

Browse files
arighigithub-actions[bot]
authored andcommitted
sched/fair: Attach sched_domain_shared to sd_asym_cpucapacity
BugLink: https://bugs.launchpad.net/bugs/2150671 On asymmetric CPU capacity systems, the wakeup path uses select_idle_capacity(), which scans the span of sd_asym_cpucapacity rather than sd_llc. The has_idle_cores hint however lives on sd_llc->shared, so the wakeup-time read of has_idle_cores operates on an LLC-scoped blob while the actual scan/decision spans the wider asym domain; nr_busy_cpus also lives in the same shared sched_domain data, but it's never used in the asym CPU capacity scenario. Therefore, move the sched_domain_shared object to sd_asym_cpucapacity whenever the CPU has a SD_ASYM_CPUCAPACITY_FULL ancestor and that ancestor is non-overlapping (i.e., not built from SD_NUMA). In that case the scope of has_idle_cores matches the scope of the wakeup scan. Fall back to attaching the shared object to sd_llc in three cases: 1) plain symmetric systems (no SD_ASYM_CPUCAPACITY_FULL anywhere); 2) CPUs in an exclusive cpuset that carves out a symmetric capacity island: has_asym is system-wide but those CPUs have no SD_ASYM_CPUCAPACITY_FULL ancestor in their hierarchy and follow the symmetric LLC path in select_idle_sibling(); 3) exotic topologies where SD_ASYM_CPUCAPACITY_FULL lands on an SD_NUMA-built domain. init_sched_domain_shared() keys the shared blob off cpumask_first(span), which on overlapping NUMA domains would alias unrelated spans onto the same blob. Keep the shared object on the LLC there; select_idle_capacity() gracefully skips the has_idle_cores preference when sd->shared is NULL. While at it, also rename the per-CPU sd_llc_shared to sd_balance_shared, as it is no longer strictly tied to the LLC. Co-developed-by: Andrea Righi <arighi@nvidia.com> Signed-off-by: Andrea Righi <arighi@nvidia.com> Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Shrikanth Hegde <sshegde@linux.ibm.com> Acked-by: Vincent Guittot <vincent.guittot@linaro.org> Link: https://patch.msgid.link/20260516055850.1345932-1-arighi@nvidia.com (backported from fdfe5a8 linux-next) [ arighi: - backport full logic to attach sd->shared in build_sched_domains() - do not rename sd_llc_shared to reduce the risk of conflicts ] Signed-off-by: Andrea Righi <arighi@nvidia.com> Acked-by: Seth Forshee <sforshee@nvidia.com> Acked-by: Nirmoy Das <nirmoyd@nvidia.com> Acked-by: Matthew R. Ochs <mochs@nvidia.com> Signed-off-by: Seth Forshee <sforshee@nvidia.com>
1 parent 77bb67d commit fcb7031

2 files changed

Lines changed: 90 additions & 18 deletions

File tree

kernel/sched/fair.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12663,7 +12663,8 @@ static void set_cpu_sd_state_busy(int cpu)
1266312663
goto unlock;
1266412664
sd->nohz_idle = 0;
1266512665

12666-
atomic_inc(&sd->shared->nr_busy_cpus);
12666+
if (sd->shared)
12667+
atomic_inc(&sd->shared->nr_busy_cpus);
1266712668
unlock:
1266812669
rcu_read_unlock();
1266912670
}
@@ -12693,7 +12694,8 @@ static void set_cpu_sd_state_idle(int cpu)
1269312694
goto unlock;
1269412695
sd->nohz_idle = 1;
1269512696

12696-
atomic_dec(&sd->shared->nr_busy_cpus);
12697+
if (sd->shared)
12698+
atomic_dec(&sd->shared->nr_busy_cpus);
1269712699
unlock:
1269812700
rcu_read_unlock();
1269912701
}

kernel/sched/topology.c

Lines changed: 86 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -675,16 +675,38 @@ static void update_top_cache_domain(int cpu)
675675
int id = cpu;
676676
int size = 1;
677677

678+
sd = lowest_flag_domain(cpu, SD_ASYM_CPUCAPACITY_FULL);
679+
/*
680+
* The shared object is attached to sd_asym_cpucapacity only when the
681+
* asym domain is non-overlapping (i.e., not built from SD_NUMA).
682+
* On overlapping (NUMA) asym domains we fall back to letting the
683+
* SD_SHARE_LLC path own the shared object, so sd->shared may be NULL
684+
* here.
685+
*/
686+
if (sd && sd->shared)
687+
sds = sd->shared;
688+
689+
rcu_assign_pointer(per_cpu(sd_asym_cpucapacity, cpu), sd);
690+
678691
sd = highest_flag_domain(cpu, SD_SHARE_LLC);
679692
if (sd) {
680693
id = cpumask_first(sched_domain_span(sd));
681694
size = cpumask_weight(sched_domain_span(sd));
682-
sds = sd->shared;
695+
696+
/*
697+
* If sd_asym_cpucapacity didn't claim the shared object,
698+
* sd_llc must have one linked.
699+
*/
700+
if (!sds) {
701+
WARN_ON_ONCE(!sd->shared);
702+
sds = sd->shared;
703+
}
683704
}
684705

685706
rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
686707
per_cpu(sd_llc_size, cpu) = size;
687708
per_cpu(sd_llc_id, cpu) = id;
709+
688710
rcu_assign_pointer(per_cpu(sd_llc_shared, cpu), sds);
689711

690712
sd = lowest_flag_domain(cpu, SD_CLUSTER);
@@ -703,9 +725,6 @@ static void update_top_cache_domain(int cpu)
703725

704726
sd = highest_flag_domain(cpu, SD_ASYM_PACKING);
705727
rcu_assign_pointer(per_cpu(sd_asym_packing, cpu), sd);
706-
707-
sd = lowest_flag_domain(cpu, SD_ASYM_CPUCAPACITY_FULL);
708-
rcu_assign_pointer(per_cpu(sd_asym_cpucapacity, cpu), sd);
709728
}
710729

711730
/*
@@ -1628,13 +1647,12 @@ sd_init(struct sched_domain_topology_level *tl,
16281647
{
16291648
struct sd_data *sdd = &tl->data;
16301649
struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
1631-
int sd_id, sd_weight, sd_flags = 0;
1650+
int sd_weight, sd_flags = 0;
16321651
struct cpumask *sd_span;
16331652

16341653
sd_span = sched_domain_span(sd);
16351654
cpumask_and(sd_span, cpu_map, tl->mask(tl, cpu));
16361655
sd_weight = cpumask_weight(sd_span);
1637-
sd_id = cpumask_first(sd_span);
16381656

16391657
if (tl->sd_flags)
16401658
sd_flags = (*tl->sd_flags)();
@@ -1713,16 +1731,6 @@ sd_init(struct sched_domain_topology_level *tl,
17131731
sd->cache_nice_tries = 1;
17141732
}
17151733

1716-
/*
1717-
* For all levels sharing cache; connect a sched_domain_shared
1718-
* instance.
1719-
*/
1720-
if (sd->flags & SD_SHARE_LLC) {
1721-
sd->shared = *per_cpu_ptr(sdd->sds, sd_id);
1722-
atomic_inc(&sd->shared->ref);
1723-
atomic_set(&sd->shared->nr_busy_cpus, sd_weight);
1724-
}
1725-
17261734
sd->private = sdd;
17271735

17281736
return sd;
@@ -2477,6 +2485,16 @@ static bool topology_span_sane(const struct cpumask *cpu_map)
24772485
return true;
24782486
}
24792487

2488+
static void init_sched_domain_shared(struct sched_domain *sd)
2489+
{
2490+
struct sd_data *sdd = sd->private;
2491+
int sd_id = cpumask_first(sched_domain_span(sd));
2492+
2493+
sd->shared = *per_cpu_ptr(sdd->sds, sd_id);
2494+
atomic_set(&sd->shared->nr_busy_cpus, sd->span_weight);
2495+
atomic_inc(&sd->shared->ref);
2496+
}
2497+
24802498
/*
24812499
* Build sched domains for a given set of CPUs and attach the sched domains
24822500
* to the individual CPUs
@@ -2534,6 +2552,58 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
25342552
}
25352553
}
25362554

2555+
for_each_cpu(i, cpu_map) {
2556+
struct sched_domain *sd_asym = NULL;
2557+
bool asym_claimed = false;
2558+
2559+
sd = *per_cpu_ptr(d.sd, i);
2560+
if (!sd)
2561+
continue;
2562+
2563+
/*
2564+
* In case of ASYM_CPUCAPACITY, attach sd->shared to
2565+
* sd_asym_cpucapacity for wakeup stat tracking.
2566+
*
2567+
* Caveats:
2568+
*
2569+
* 1) has_asym is system-wide, but a given CPU may still
2570+
* lack an SD_ASYM_CPUCAPACITY_FULL ancestor (e.g., an
2571+
* exclusive cpuset carving out a symmetric capacity island).
2572+
* Such CPUs must fall through to the LLC seeding path below.
2573+
*
2574+
* 2) Skip the asym attach if the asym ancestor is an
2575+
* overlapping domain (SD_NUMA). On those topologies let the
2576+
* LLC path own the shared object instead.
2577+
*
2578+
* XXX: This assumes SD_ASYM_CPUCAPACITY_FULL domain
2579+
* always has more than one group else it is prone to
2580+
* degeneration.
2581+
*/
2582+
if (has_asym) {
2583+
sd_asym = sd;
2584+
while (sd_asym && !(sd_asym->flags & SD_ASYM_CPUCAPACITY_FULL))
2585+
sd_asym = sd_asym->parent;
2586+
2587+
if (sd_asym && !(sd_asym->flags & SD_NUMA)) {
2588+
init_sched_domain_shared(sd_asym);
2589+
asym_claimed = true;
2590+
}
2591+
}
2592+
2593+
/* First, find the topmost SD_SHARE_LLC domain */
2594+
while (sd->parent && (sd->parent->flags & SD_SHARE_LLC))
2595+
sd = sd->parent;
2596+
2597+
if (sd->flags & SD_SHARE_LLC) {
2598+
/*
2599+
* Initialize the sd->shared for SD_SHARE_LLC unless
2600+
* the asym path above already claimed it.
2601+
*/
2602+
if (!asym_claimed)
2603+
init_sched_domain_shared(sd);
2604+
}
2605+
}
2606+
25372607
/*
25382608
* Calculate an allowed NUMA imbalance such that LLCs do not get
25392609
* imbalanced.

0 commit comments

Comments
 (0)