Skip to content

Commit 2078ef2

Browse files
committed
deepin: arm64: cpufeature: disable LSE on UMA
deepin inclusion category: performance Disable LSE (Large System Extension) atomic instructions on UMA (Uniform Memory Access) systems to improve performance of per-CPU atomic operations. LSE atomics can exhibit significant overhead on certain microarchitectures (e.g., Neoverse V2) due to "far atomic" implementations bypassing L1 cache [1]. LL/SC (Load-Link/Store-Conditional) is substantially faster for uncontended per-CPU workloads typical on UMA systems. The default value is 1 (enabled), which automatically disables LSE on single-node (UMA) systems. Set to 0 to force LSE enablement on UMA systems regardless of performance impact. When this feature is active, the kernel logs: "LSE atomics: disabled on UMA, use lse_disable_on_uma=0 to enable." NUMA systems are unaffected and continue using hardware detected LSE capability normally. PS: Test with byte-unixbench6 in kp920 24c and 64GB memory, improve whole scores by 3.8%. Link: https://lore.kernel.org/r/e7d539ed-ced0-4b96-8ecd-048a5b803b85@paulmck-laptop [1] Signed-off-by: Wentao Guan <guanwentao@uniontech.com>
1 parent c075a7c commit 2078ef2

2 files changed

Lines changed: 46 additions & 1 deletion

File tree

Documentation/admin-guide/kernel-parameters.txt

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3065,6 +3065,27 @@
30653065
ltpc= [NET]
30663066
Format: <io>,<irq>,<dma>
30673067

3068+
lse_disable_on_uma [ARM64]
3069+
Disable LSE (Large System Extension) atomic instructions
3070+
on UMA (Uniform Memory Access) systems to improve
3071+
performance of per-CPU atomic operations. LSE atomics can
3072+
exhibit significant overhead on certain microarchitectures
3073+
(e.g., Neoverse V2) due to "far atomic" implementations
3074+
bypassing L1 cache. LL/SC (Load-Link/Store-Conditional)
3075+
is substantially faster for uncontended per-CPU workloads
3076+
typical on UMA systems.
3077+
3078+
The default value is 1 (enabled), which automatically
3079+
disables LSE on single-node (UMA) systems. Set to 0 to
3080+
force LSE enablement on UMA systems regardless of
3081+
performance impact.
3082+
3083+
When this feature is active, the kernel logs:
3084+
"LSE atomics: disabled on UMA, use lse_disable_on_uma=0 to enable."
3085+
3086+
NUMA systems are unaffected and continue using hardware
3087+
detected LSE capability normally.
3088+
30683089
lsm.debug [SECURITY] Enable LSM initialization debugging output.
30693090

30703091
lsm=lsm1,...,lsmN

arch/arm64/kernel/cpufeature.c

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1540,6 +1540,30 @@ static bool has_32bit_el0(const struct arm64_cpu_capabilities *entry, int scope)
15401540
return true;
15411541
}
15421542

1543+
static bool lse_disable_on_uma __read_mostly = true; // UMA decision enabled by default
1544+
1545+
static int __init arm64_lse_disable_on_uma_setup(char *str)
1546+
{
1547+
return kstrtobool(str, &lse_disable_on_uma);
1548+
}
1549+
early_param("lse_disable_on_uma", arm64_lse_disable_on_uma_setup);
1550+
1551+
static bool has_lse_capability_uma_aware(const struct arm64_cpu_capabilities *cap,
1552+
int scope)
1553+
{
1554+
int num_nodes = num_possible_nodes();
1555+
1556+
/* UMA system: disable LSE when lse_disable_on_uma is enabled */
1557+
if (num_nodes <= 1 && lse_disable_on_uma) {
1558+
if (scope == SCOPE_SYSTEM && smp_processor_id() == 0)
1559+
pr_info("LSE atomics: disabled on UMA, use lse_disable_on_uma=0 to enable.\n");
1560+
return false;
1561+
}
1562+
1563+
/* NUMA system or user disabled the feature, use hardware capability */
1564+
return has_cpuid_feature(cap, scope);
1565+
}
1566+
15431567
static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry, int scope)
15441568
{
15451569
bool has_sre;
@@ -2348,7 +2372,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
23482372
.desc = "LSE atomic instructions",
23492373
.capability = ARM64_HAS_LSE_ATOMICS,
23502374
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
2351-
.matches = has_cpuid_feature,
2375+
.matches = has_lse_capability_uma_aware,
23522376
ARM64_CPUID_FIELDS(ID_AA64ISAR0_EL1, ATOMIC, IMP)
23532377
},
23542378
#endif /* CONFIG_ARM64_LSE_ATOMICS */

0 commit comments

Comments
 (0)