Skip to content

Commit be61197

Browse files
committed
NVIDIA: SAUCE: sched/fair: Host has_idle_cores/nr_busy_cpus on sd_asym_cpucapacity
On asymmetric CPU capacity systems the wakeup path uses select_idle_capacity(), which scans the span of sd_asym_cpucapacity rather than sd_llc. The has_idle_cores hint however lives on sd_llc->shared, so the wakeup-time read of has_idle_cores operates on an LLC-scoped blob while the actual scan/decision spans the wider asym domain. Idle cores in a sibling LLC inside the asym span are therefore invisible to the hint; nr_busy_cpus lives in the same shared sched_domain data, but it's never used in the asym CPU capacity case. Therefore, move the sched_domain_shared object to sd_asym_cpucapacity whenever the CPU has a SD_ASYM_CPUCAPACITY_FULL ancestor and that ancestor is non-overlapping (i.e., not built from SD_NUMA). In that case the scope of has_idle_cores matches the scope of the wakeup scan. Fall back to attaching the shared object to sd_llc in three cases: 1) plain symmetric systems (no SD_ASYM_CPUCAPACITY_FULL anywhere); 2) CPUs in an exclusive cpuset that carves out a symmetric capacity island: has_asym is system-wide but those CPUs have no SD_ASYM_CPUCAPACITY_FULL ancestor in their hierarchy and follow the symmetric LLC path in select_idle_sibling(); 3) exotic topologies where SD_ASYM_CPUCAPACITY_FULL lands on an SD_NUMA-built domain. init_sched_domain_shared() keys the shared blob off cpumask_first(span), which on overlapping NUMA domains would alias unrelated spans onto the same blob. Keep the shared object on the LLC there; select_idle_capacity() gracefully skips the has_idle_cores preference when sd->shared is NULL. On for-6.17, sched_domain_shared is still wired in a post-pass after sched groups are built (init_sched_domain_shared() uses sd->private), not from sd_init(), matching this branch's topology build path. Co-developed-by: Andrea Righi <arighi@nvidia.com> Signed-off-by: Andrea Righi <arighi@nvidia.com> Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com> (backported from https://lore.kernel.org/all/20260428051720.3180182-1-arighi@nvidia.com) [ arighi: backport full logic to attach sd->shared in build_sched_domains() ] Signed-off-by: Andrea Righi <arighi@nvidia.com>
1 parent 93e1b6a commit be61197

2 files changed

Lines changed: 90 additions & 18 deletions

File tree

kernel/sched/fair.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12347,7 +12347,8 @@ static void set_cpu_sd_state_busy(int cpu)
1234712347
goto unlock;
1234812348
sd->nohz_idle = 0;
1234912349

12350-
atomic_inc(&sd->shared->nr_busy_cpus);
12350+
if (sd->shared)
12351+
atomic_inc(&sd->shared->nr_busy_cpus);
1235112352
unlock:
1235212353
rcu_read_unlock();
1235312354
}
@@ -12377,7 +12378,8 @@ static void set_cpu_sd_state_idle(int cpu)
1237712378
goto unlock;
1237812379
sd->nohz_idle = 1;
1237912380

12380-
atomic_dec(&sd->shared->nr_busy_cpus);
12381+
if (sd->shared)
12382+
atomic_dec(&sd->shared->nr_busy_cpus);
1238112383
unlock:
1238212384
rcu_read_unlock();
1238312385
}

kernel/sched/topology.c

Lines changed: 86 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -675,16 +675,39 @@ static void update_top_cache_domain(int cpu)
675675
int id = cpu;
676676
int size = 1;
677677

678+
sd = lowest_flag_domain(cpu, SD_ASYM_CPUCAPACITY_FULL);
679+
/*
680+
* The shared object is attached to sd_asym_cpucapacity only when the
681+
* asym domain is non-overlapping (i.e., not built from SD_NUMA).
682+
* On overlapping (NUMA) asym domains we fall back to letting the
683+
* SD_SHARE_LLC path own the shared object, so sd->shared may be NULL
684+
* here.
685+
*/
686+
if (sd && sd->shared)
687+
sds = sd->shared;
688+
689+
rcu_assign_pointer(per_cpu(sd_asym_cpucapacity, cpu), sd);
690+
678691
sd = highest_flag_domain(cpu, SD_SHARE_LLC);
679692
if (sd) {
680693
id = cpumask_first(sched_domain_span(sd));
681694
size = cpumask_weight(sched_domain_span(sd));
682-
sds = sd->shared;
695+
696+
/*
697+
* If sd_asym_cpucapacity didn't claim the shared object,
698+
* sd_llc must have one linked.
699+
*/
700+
if (!sds) {
701+
WARN_ON_ONCE(!sd->shared);
702+
sds = sd->shared;
703+
}
683704
}
684705

685706
rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
686707
per_cpu(sd_llc_size, cpu) = size;
687708
per_cpu(sd_llc_id, cpu) = id;
709+
710+
/* TODO: Rename sd_llc_shared to fit the new role. */
688711
rcu_assign_pointer(per_cpu(sd_llc_shared, cpu), sds);
689712

690713
sd = lowest_flag_domain(cpu, SD_CLUSTER);
@@ -703,9 +726,6 @@ static void update_top_cache_domain(int cpu)
703726

704727
sd = highest_flag_domain(cpu, SD_ASYM_PACKING);
705728
rcu_assign_pointer(per_cpu(sd_asym_packing, cpu), sd);
706-
707-
sd = lowest_flag_domain(cpu, SD_ASYM_CPUCAPACITY_FULL);
708-
rcu_assign_pointer(per_cpu(sd_asym_cpucapacity, cpu), sd);
709729
}
710730

711731
/*
@@ -1628,7 +1648,7 @@ sd_init(struct sched_domain_topology_level *tl,
16281648
{
16291649
struct sd_data *sdd = &tl->data;
16301650
struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
1631-
int sd_id, sd_weight, sd_flags = 0;
1651+
int sd_weight, sd_flags = 0;
16321652
struct cpumask *sd_span;
16331653

16341654
sd_weight = cpumask_weight(tl->mask(tl, cpu));
@@ -1670,7 +1690,6 @@ sd_init(struct sched_domain_topology_level *tl,
16701690

16711691
sd_span = sched_domain_span(sd);
16721692
cpumask_and(sd_span, cpu_map, tl->mask(tl, cpu));
1673-
sd_id = cpumask_first(sd_span);
16741693

16751694
sd->flags |= asym_cpu_capacity_classify(sd_span, cpu_map);
16761695

@@ -1709,16 +1728,6 @@ sd_init(struct sched_domain_topology_level *tl,
17091728
sd->cache_nice_tries = 1;
17101729
}
17111730

1712-
/*
1713-
* For all levels sharing cache; connect a sched_domain_shared
1714-
* instance.
1715-
*/
1716-
if (sd->flags & SD_SHARE_LLC) {
1717-
sd->shared = *per_cpu_ptr(sdd->sds, sd_id);
1718-
atomic_inc(&sd->shared->ref);
1719-
atomic_set(&sd->shared->nr_busy_cpus, sd_weight);
1720-
}
1721-
17221731
sd->private = sdd;
17231732

17241733
return sd;
@@ -2428,6 +2437,16 @@ static bool topology_span_sane(const struct cpumask *cpu_map)
24282437
return true;
24292438
}
24302439

2440+
static void init_sched_domain_shared(struct sched_domain *sd)
2441+
{
2442+
struct sd_data *sdd = sd->private;
2443+
int sd_id = cpumask_first(sched_domain_span(sd));
2444+
2445+
sd->shared = *per_cpu_ptr(sdd->sds, sd_id);
2446+
atomic_set(&sd->shared->nr_busy_cpus, sd->span_weight);
2447+
atomic_inc(&sd->shared->ref);
2448+
}
2449+
24312450
/*
24322451
* Build sched domains for a given set of CPUs and attach the sched domains
24332452
* to the individual CPUs
@@ -2485,6 +2504,57 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
24852504
}
24862505
}
24872506

2507+
for_each_cpu(i, cpu_map) {
2508+
struct sched_domain *sd_asym = NULL;
2509+
bool asym_claimed = false;
2510+
2511+
sd = *per_cpu_ptr(d.sd, i);
2512+
if (!sd)
2513+
continue;
2514+
2515+
/*
2516+
* In case of ASYM_CPUCAPACITY, attach sd->shared to
2517+
* sd_asym_cpucapacity for wakeup stat tracking.
2518+
*
2519+
* Caveats:
2520+
*
2521+
* 1) has_asym is system-wide, but a given CPU may still
2522+
* lack an SD_ASYM_CPUCAPACITY_FULL ancestor (e.g., an
2523+
* exclusive cpuset carving out a symmetric capacity island).
2524+
* Such CPUs must fall through to the LLC seeding path below.
2525+
*
2526+
* 2) Skip the asym attach if the asym ancestor is an
2527+
* overlapping domain (SD_NUMA). On those topologies let the
2528+
* LLC path own the shared object instead.
2529+
*
2530+
* XXX: This assumes SD_ASYM_CPUCAPACITY_FULL domain
2531+
* always has more than one group else it is prone to
2532+
* degeneration.
2533+
*/
2534+
sd_asym = sd;
2535+
while (sd_asym && !(sd_asym->flags & SD_ASYM_CPUCAPACITY_FULL))
2536+
sd_asym = sd_asym->parent;
2537+
2538+
if (sd_asym && !(sd_asym->flags & SD_NUMA)) {
2539+
init_sched_domain_shared(sd_asym);
2540+
asym_claimed = true;
2541+
}
2542+
2543+
/* First, find the topmost SD_SHARE_LLC domain */
2544+
sd = *per_cpu_ptr(d.sd, i);
2545+
while (sd->parent && (sd->parent->flags & SD_SHARE_LLC))
2546+
sd = sd->parent;
2547+
2548+
if (sd->flags & SD_SHARE_LLC) {
2549+
/*
2550+
* Initialize the sd->shared for SD_SHARE_LLC unless
2551+
* the asym path above already claimed it.
2552+
*/
2553+
if (!asym_claimed)
2554+
init_sched_domain_shared(sd);
2555+
}
2556+
}
2557+
24882558
/*
24892559
* Calculate an allowed NUMA imbalance such that LLCs do not get
24902560
* imbalanced.

0 commit comments

Comments
 (0)