From a296a2d78a2b351e2daa95aab1c049852cce4dc3 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 11 Oct 2022 22:36:56 +0100 Subject: [PATCH 01/95] arm64/booting: Document boot requirements for FEAT_NMI commit c8b89f40f6393a3e2bb8892e23bb7f6da787b5e7 openEuler In order to use FEAT_NMI we must be able to use ALLINT, require that it behave as though not trapped when it is present. Signed-off-by: Mark Brown Signed-off-by: Jie Liu Signed-off-by: huwentao Signed-off-by: WangYuli --- Documentation/arch/arm64/booting.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/arch/arm64/booting.rst b/Documentation/arch/arm64/booting.rst index b57776a68f156..29126bf0eac9a 100644 --- a/Documentation/arch/arm64/booting.rst +++ b/Documentation/arch/arm64/booting.rst @@ -411,6 +411,12 @@ Before jumping into the kernel, the following conditions must be met: - HFGRWR_EL2.nPIRE0_EL1 (bit 57) must be initialised to 0b1. + For CPUs with Non-maskable Interrupts (FEAT_NMI): + + - If the kernel is entered at EL1 and EL2 is present: + + - HCRX_EL2.TALLINT must be initialised to 0b0. + The requirements described above for CPU mode, caches, MMUs, architected timers, coherency and system registers apply to all CPUs. All CPUs must enter the kernel in the same exception level. Where the values documented From ec154b39086d05c414a58bd6f85d932c762eddd0 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 11 Oct 2022 22:39:28 +0100 Subject: [PATCH 02/95] arm64/sysreg: Add definitions for immediate versions of MSR ALLINT commit abffad0b651f06d94330f8cadbdab00dd0c1c11e openEuler Encodings are provided for ALLINT which allow setting of ALLINT.ALLINT using an immediate rather than requiring that a register be loaded with the value to write. Since these don't currently fit within the scheme we have for sysreg generation add manual encodings like we currently do for other similar registers such as SVCR. Since it is required that these immediate versions be encoded with xzr as the source register provide asm wrapper which ensure this is the case. Signed-off-by: Mark Brown Signed-off-by: Jie Liu Signed-off-by: huwentao Signed-off-by: WangYuli --- arch/arm64/include/asm/daifflags.h | 1 + arch/arm64/include/asm/nmi.h | 19 +++++++++++++++++++ arch/arm64/include/asm/sysreg.h | 2 ++ 3 files changed, 22 insertions(+) create mode 100644 arch/arm64/include/asm/nmi.h diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 55f57dfa8e2fe..b3bed20043423 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -141,4 +141,5 @@ static inline void local_daif_inherit(struct pt_regs *regs) */ write_sysreg(flags, daif); } + #endif diff --git a/arch/arm64/include/asm/nmi.h b/arch/arm64/include/asm/nmi.h new file mode 100644 index 0000000000000..f2d20b713c488 --- /dev/null +++ b/arch/arm64/include/asm/nmi.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2022 ARM Ltd. + */ +#ifndef __ASM_NMI_H +#define __ASM_NMI_H + + +static __always_inline void _allint_clear(void) +{ + asm volatile(__msr_s(SYS_ALLINT_CLR, "xzr")); +} + +static __always_inline void _allint_set(void) +{ + asm volatile(__msr_s(SYS_ALLINT_SET, "xzr")); +} + +#endif \ No newline at end of file diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 38296579a4fd5..810e5b9e7a2ce 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -167,6 +167,8 @@ * System registers, organised loosely by encoding but grouped together * where the architected name contains an index. e.g. ID_MMFR_EL1. */ +#define SYS_ALLINT_CLR sys_reg(0, 1, 4, 0, 0) +#define SYS_ALLINT_SET sys_reg(0, 1, 4, 1, 0) #define SYS_SVCR_SMSTOP_SM_EL0 sys_reg(0, 3, 4, 2, 3) #define SYS_SVCR_SMSTART_SM_EL0 sys_reg(0, 3, 4, 3, 3) #define SYS_SVCR_SMSTOP_SMZA_EL0 sys_reg(0, 3, 4, 6, 3) From ee3437343d2afdcd00bb81e148bc1c6122192924 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 7 Oct 2022 12:17:01 +0100 Subject: [PATCH 03/95] arm64/asm: Introduce assembly macros for managing ALLINT commit 1c203efa0a421d6a2368653f017143a430137be9 openEuler In order to allow assembly code to ensure that not even superpriorty interrupts can preempt it provide macros for enabling and disabling ALLINT.ALLINT. This is not integrated into the existing DAIF macros since we do not always wish to manage ALLINT along with DAIF and the use of DAIF in the naming of the existing macros might lead to surprises if ALLINT is also managed. Signed-off-by: Mark Brown Signed-off-by: Jie Liu Signed-off-by: huwentao Signed-off-by: WangYuli --- arch/arm64/include/asm/assembler.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 0fa067c2324de..c010f6df62e86 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -34,6 +34,22 @@ wx\n .req w\n .endr + .macro disable_allint +#ifdef CONFIG_ARM64_NMI +alternative_if ARM64_HAS_NMI + msr_s SYS_ALLINT_SET, xzr +alternative_else_nop_endif +#endif + .endm + + .macro enable_allint +#ifdef CONFIG_ARM64_NMI +alternative_if ARM64_HAS_NMI + msr_s SYS_ALLINT_CLR, xzr +alternative_else_nop_endif +#endif + .endm + .macro disable_daif msr daifset, #0xf .endm From e7d88893eb77a4d31fadec764aa368710d58bdeb Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 6 Oct 2022 18:21:35 +0100 Subject: [PATCH 04/95] arm64/hyp-stub: Enable access to ALLINT commit 611b58ad24474484e47e99acbbcd34bce12726bd openEuler In order to use NMIs we need to ensure that traps are disabled for it so update HCRX_EL2 to ensure that TALLINT is not set when we detect support for NMIs. Signed-off-by: Mark Brown Signed-off-by: Jie Liu Signed-off-by: huwentao Signed-off-by: WangYuli --- arch/arm64/kernel/hyp-stub.S | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 65f76064c86b2..731344c908711 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -76,6 +76,18 @@ SYM_CODE_END(elx_sync) SYM_CODE_START_LOCAL(__finalise_el2) finalise_el2_state + // NMIs + check_override id_aa64pfr1 ID_AA64PFR1_EL1_NMI_SHIFT .Linit_nmi .Lskip_nmi x1 x2 +.Linit_nmi: + mrs x1, id_aa64mmfr1_el1 // HCRX_EL2 present? + ubfx x1, x1, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4 + cbz x1, .Lskip_nmi + + mrs_s x1, SYS_HCRX_EL2 + bic x1, x1, #HCRX_EL2_TALLINT_MASK // Don't trap ALLINT + msr_s SYS_HCRX_EL2, x1 +.Lskip_nmi: + // nVHE? No way! Give me the real thing! // Sanity check: MMU *must* be off mrs x1, sctlr_el2 From 40efa4993566d9d68431f5c643e09eb17314d074 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 4 Nov 2022 18:08:06 +0000 Subject: [PATCH 05/95] arm64/idreg: Add an override for FEAT_NMI commit 7c694a36239a15ccdb2806ad7a32a43d7a26fe3f openEuler Add a named override for FEAT_NMI, allowing it to be explicitly disabled in case of problems. Signed-off-by: Mark Brown Signed-off-by: Jie Liu Signed-off-by: huwentao Signed-off-by: WangYuli --- arch/arm64/kernel/idreg-override.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index 3addc09f87461..6f248ef8c9d26 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -100,6 +100,7 @@ static const struct ftr_set_desc pfr1 __initconst = { .fields = { FIELD("bt", ID_AA64PFR1_EL1_BT_SHIFT, NULL ), FIELD("mte", ID_AA64PFR1_EL1_MTE_SHIFT, NULL), + FIELD("nmi", ID_AA64PFR1_EL1_NMI_SHIFT, NULL), FIELD("sme", ID_AA64PFR1_EL1_SME_SHIFT, pfr1_sme_filter), {} }, From f9171c8d6bdd4fa738bddb37a865345acfc77058 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 3 Nov 2022 15:50:08 +0000 Subject: [PATCH 06/95] arm64/cpufeature: Detect PE support for FEAT_NMI commit f72387e1f8bc992a1b8c6ecc37c90c458e92d455 openEuler Use of FEAT_NMI requires that all the PEs in the system and the GIC have NMI support. This patch implements the PE part of that detection. In order to avoid problematic interactions between real and pseudo NMIs we disable the architected feature if the user has enabled pseudo NMIs on the command line. If this is done on a system where support for the architected feature is detected then a warning is printed during boot in order to help users spot what is likely to be a misconfiguration. In order to allow KVM to offer the feature to guests even if pseudo NMIs are in use by the host we have a separate feature for the raw feature which is used in KVM. Signed-off-by: Mark Brown Signed-off-by: Jie Liu Signed-off-by: huwentao Signed-off-by: WangYuli --- arch/arm64/include/asm/cpufeature.h | 6 +++ arch/arm64/kernel/cpufeature.c | 66 ++++++++++++++++++++++++++++- arch/arm64/tools/cpucaps | 2 + 3 files changed, 73 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 5bba393760557..53343c6cba583 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -808,6 +808,12 @@ static __always_inline bool system_uses_irq_prio_masking(void) cpus_have_const_cap(ARM64_HAS_GIC_PRIO_MASKING); } +static __always_inline bool system_uses_nmi(void) +{ + return IS_ENABLED(CONFIG_ARM64_NMI) && + cpus_have_const_cap(ARM64_USES_NMI); +} + static inline bool system_supports_mte(void) { return IS_ENABLED(CONFIG_ARM64_MTE) && diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 7debd940c470a..f569da53f440d 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -85,6 +85,7 @@ #include #include #include +#include #include #include #include @@ -255,6 +256,7 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_NMI_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_SME_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_MPAM_frac_SHIFT, 4, 0), @@ -2153,9 +2155,11 @@ static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap) } #endif /* CONFIG_ARM64_E0PD */ -#ifdef CONFIG_ARM64_PSEUDO_NMI +#if IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) || IS_ENABLED(CONFIG_ARM64_NMI) static bool enable_pseudo_nmi; +#endif +#ifdef CONFIG_ARM64_PSEUDO_NMI static int __init early_enable_pseudo_nmi(char *p) { return kstrtobool(p, &enable_pseudo_nmi); @@ -2205,6 +2209,41 @@ static bool has_gic_prio_relaxed_sync(const struct arm64_cpu_capabilities *entry } #endif +#ifdef CONFIG_ARM64_NMI +static bool use_nmi(const struct arm64_cpu_capabilities *entry, int scope) +{ + if (!has_cpuid_feature(entry, scope)) + return false; + + /* + * Having both real and pseudo NMIs enabled simultaneously is + * likely to cause confusion. Since pseudo NMIs must be + * enabled with an explicit command line option, if the user + * has set that option on a system with real NMIs for some + * reason assume they know what they're doing. + */ + if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && enable_pseudo_nmi) { + pr_info("Pseudo NMI enabled, not using architected NMI\n"); + return false; + } + + return true; +} + +static void nmi_enable(const struct arm64_cpu_capabilities *__unused) +{ + /* + * Enable use of NMIs controlled by ALLINT, SPINTMASK should + * be clear by default but make it explicit that we are using + * this mode. Ensure that ALLINT is clear first in order to + * avoid leaving things masked. + */ + _allint_clear(); + sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPINTMASK, SCTLR_EL1_NMI); + isb(); +} +#endif + #ifdef CONFIG_ARM64_BTI static void bti_enable(const struct arm64_cpu_capabilities *__unused) { @@ -2799,6 +2838,31 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, EVT, IMP) }, +#ifdef CONFIG_ARM64_NMI + { + .desc = "Non-maskable Interrupts present", + .capability = ARM64_HAS_NMI, + .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64PFR1_EL1_NMI_SHIFT, + .field_width = 4, + .min_field_value = ID_AA64PFR1_EL1_NMI_IMP, + .matches = has_cpuid_feature, + }, + { + .desc = "Non-maskable Interrupts enabled", + .capability = ARM64_USES_NMI, + .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64PFR1_EL1_NMI_SHIFT, + .field_width = 4, + .min_field_value = ID_AA64PFR1_EL1_NMI_IMP, + .matches = use_nmi, + .cpu_enable = nmi_enable, + }, +#endif {}, }; diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index c251ef3caae56..686b95b15ccea 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -39,6 +39,7 @@ HAS_LDAPR HAS_LSE_ATOMICS HAS_MOPS HAS_NESTED_VIRT +HAS_NMI HAS_NO_FPSIMD HAS_NO_HW_PREFETCH HAS_PAN @@ -68,6 +69,7 @@ SPECTRE_BHB SSBS SVE UNMAP_KERNEL_AT_EL0 +USES_NMI WORKAROUND_834220 WORKAROUND_843419 WORKAROUND_845719 From 2a0c41fc2e6a0769230ad25df95a4ea6aeae7b40 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 11 Nov 2022 18:45:29 +0000 Subject: [PATCH 07/95] KVM: arm64: Hide FEAT_NMI from guests commit 032d8169dbdc3c12142f3dc5b58b9c6d1a52fa59 openEuler FEAT_NMI is not yet useful to guests pending implementation of vGIC support. Mask out the feature from the ID register and prevent guests creating state in ALLINT.ALLINT by activating the trap on write provided in HCRX_EL2.TALLINT when they are running. There is no trap available for reads from ALLINT. We do not need to check for FEAT_HCRX since it is mandatory since v8.7 and FEAT_NMI is a v8.8 feature. Signed-off-by: Mark Brown Signed-off-by: Jie Liu Signed-off-by: huwentao Signed-off-by: WangYuli --- arch/arm64/kvm/hyp/include/hyp/switch.h | 6 ++++++ arch/arm64/kvm/sys_regs.c | 1 + 2 files changed, 7 insertions(+) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 526085401f663..84c2d5873d2c0 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -194,6 +194,9 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) vcpu_set_flag(vcpu, PMUSERENR_ON_CPU); } + if (cpus_have_final_cap(ARM64_HAS_NMI)) + sysreg_clear_set_s(SYS_HCRX_EL2, 0, HCRX_EL2_TALLINT); + vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2); write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); @@ -218,6 +221,9 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) { write_sysreg(vcpu->arch.mdcr_el2_host, mdcr_el2); + if (cpus_have_final_cap(ARM64_HAS_NMI)) + sysreg_clear_set_s(SYS_HCRX_EL2, HCRX_EL2_TALLINT, 0); + write_sysreg(0, hstr_el2); if (kvm_arm_support_pmu_v3()) { struct kvm_cpu_context *hctxt; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 475eab52b4788..2e8306d0f7793 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1338,6 +1338,7 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu, val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE); val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SME); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_NMI); val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MPAM_frac); break; case SYS_ID_AA64ISAR1_EL1: From 99b0d70c6188ce222bddc7857734465f98d8d441 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 11 Nov 2022 13:35:25 +0000 Subject: [PATCH 08/95] arm64/nmi: Manage masking for superpriority interrupts along with DAIF commit dd8b74f042237727edef1187ffa936c3686d300b openEuler As we do for pseudo NMIs add code to our DAIF management which keeps superpriority interrupts unmasked when we have asynchronous exceptions enabled. Since superpriority interrupts are not masked through DAIF like pseduo NMIs are we also need to modify the assembler macros for managing DAIF to ensure that the masking is done in the assembly code. At present users of the assembly macros always mask pseudo NMIs. There is a difference to the actual handling between pseudo NMIs and superpriority interrupts in the assembly save_and_disable_irq and restore_irq macros, these cover both interrupts and FIQs using DAIF without regard for the use of pseudo NMIs so also mask those but are not updated here to mask superpriority interrupts. Given the names it is not clear that the behaviour with pseudo NMIs is particularly intentional, and in any case these macros are only used in the implementation of alternatives for software PAN while hardware PAN has been mandatory since v8.1 so it is not anticipated that practical systems with support for FEAT_NMI will ever execute the affected code. This should be a conservative set of masked regions, we may be able to relax this in future, but this should represent a good starting point. Signed-off-by: Mark Brown Signed-off-by: Jie Liu Signed-off-by: huwentao Signed-off-by: WangYuli --- arch/arm64/include/asm/assembler.h | 2 ++ arch/arm64/include/asm/daifflags.h | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index c010f6df62e86..979ba9ddee414 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -51,11 +51,13 @@ alternative_else_nop_endif .endm .macro disable_daif + disable_allint msr daifset, #0xf .endm .macro enable_daif msr daifclr, #0xf + enable_allint .endm /* diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index b3bed20043423..2417cc6b1631a 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #define DAIF_PROCCTX 0 @@ -35,6 +36,9 @@ static inline void local_daif_mask(void) if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + if (system_uses_nmi()) + _allint_set(); + trace_hardirqs_off(); } @@ -116,6 +120,14 @@ static inline void local_daif_restore(unsigned long flags) write_sysreg(flags, daif); + /* If we can take asynchronous errors we can take NMIs */ + if (system_uses_nmi()) { + if (flags & PSR_A_BIT) + _allint_set(); + else + _allint_clear(); + } + if (irq_disabled) trace_hardirqs_off(); } @@ -140,6 +152,14 @@ static inline void local_daif_inherit(struct pt_regs *regs) * use the pmr instead. */ write_sysreg(flags, daif); + + /* The ALLINT field is at the same position in pstate and ALLINT */ + if (system_uses_nmi()) { + if (regs->pstate & ALLINT_ALLINT) + _allint_set(); + else + _allint_clear(); + } } #endif From 39970c76df7b5a50bee61e2b374ea4fbddbe34d1 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 12 Dec 2022 13:43:36 +0000 Subject: [PATCH 09/95] arm64/entry: Don't call preempt_schedule_irq() with NMIs masked commit 2117a37e18658d788b7eee9904b488570086b987 openEuler As we do for pseudo NMIs don't call preempt_schedule_irq() when architechted NMIs are masked. If they are masked then we are calling from a preempting context so skip preemption. Signed-off-by: Mark Brown Signed-off-by: Jie Liu Signed-off-by: huwentao Signed-off-by: WangYuli --- arch/arm64/kernel/entry-common.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 61eef36d9a883..e70371ff530bb 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -250,6 +250,15 @@ static void __sched arm64_preempt_schedule_irq(void) if (READ_ONCE(current_thread_info()->preempt_count) != 0) return; + /* + * Architected NMIs are unmasked prior to handling regular + * IRQs and masked while handling FIQs. If ALLINT is set then + * we are in a NMI or other preempting context so skip + * preemption. + */ + if (system_uses_nmi() && (read_sysreg_s(SYS_ALLINT) & ALLINT_ALLINT)) + return; + /* * DAIF.DA are cleared at the start of IRQ/FIQ handling, and when GIC * priority masking is used the GIC irqchip driver will clear DAIF.IF From 534c87e0684d87ecfde079e113297a4ae7c4faf4 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 2 Nov 2022 21:13:07 +0000 Subject: [PATCH 10/95] arm64/irq: Document handling of FEAT_NMI in irqflags.h commit feb4809ad8f4b5d14c643eff47ac208432eac95f openEuler We have documentation at the top of irqflags.h which explains the DAIF masking. Since the additional masking with NMIs is related and also covers the IF in DAIF extend the comment to note what's going on with NMIs though none of the code in irqflags.h is updated to handle NMIs. Signed-off-by: Mark Brown Signed-off-by: Jie Liu Signed-off-by: huwentao Signed-off-by: WangYuli --- arch/arm64/include/asm/irqflags.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index 1f31ec146d161..a9f117fb43c66 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -19,6 +19,16 @@ * always masked and unmasked together, and have no side effects for other * flags. Keeping to this order makes it easier for entry.S to know which * exceptions should be unmasked. + * + * With the addition of the FEAT_NMI extension we gain an additional + * class of superpriority IRQ/FIQ which is separately masked with a + * choice of modes controlled by SCTLR_ELn.{SPINTMASK,NMI}. Linux + * sets SPINTMASK to 0 and NMI to 1 which results in ALLINT.ALLINT + * masking both superpriority interrupts and IRQ/FIQ regardless of the + * I and F settings. Since these superpriority interrupts are being + * used as NMIs we do not include them in the interrupt masking here, + * anything that requires that NMIs be masked needs to explicitly do + * so. */ static __always_inline bool __irqflags_uses_pmr(void) From 3dccb9a654f002c8744d9e4a56016b81f8141795 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 11 Oct 2022 18:53:47 +0100 Subject: [PATCH 11/95] arm64/nmi: Add handling of superpriority interrupts as NMIs commit 8f1897e1dfc2c616c9ef834494c24755b8852be2 openEuler Our goal with superpriority interrupts is to use them as NMIs, taking advantage of the much smaller regions where they are masked to allow prompt handling of the most time critical interrupts. When an interrupt configured with superpriority we will enter EL1 as normal for any interrupt, the presence of a superpriority interrupt is indicated with a status bit in ISR_EL1. We use this to check for the presence of a superpriority interrupt before we unmask anything in elX_interrupt(), reporting without unmasking any interrupts. If no superpriority interrupt is present then we handle normal interrupts as normal, superpriority interrupts will be unmasked while doing so as a result of setting DAIF_PROCCTX. Both IRQs and FIQs may be configured with superpriority so we handle both, passing an additional root handler into the elX_interrupt() function along with the mask for the bit in ISR_EL1 which indicates the presence of the relevant kind of superpriority interrupt. These root handlers can be configured by the interrupt controller similarly to the root handlers for normal interrupts using the newly added set_handle_nmi_irq() and set_handle_nmi_fiq() functions. Signed-off-by: Mark Brown Signed-off-by: Jie Liu Signed-off-by: huwentao Signed-off-by: WangYuli --- arch/arm64/include/asm/irq.h | 1 + arch/arm64/kernel/entry-common.c | 51 +++++++++++++++++++++++++++----- arch/arm64/kernel/irq.c | 22 ++++++++++++++ 3 files changed, 66 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index fac08e18bcd51..62fb5117ef7bf 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -8,6 +8,7 @@ struct pt_regs; +int set_handle_nmi_irq(void (*handle_irq)(struct pt_regs *)); int set_handle_irq(void (*handle_irq)(struct pt_regs *)); #define set_handle_irq set_handle_irq int set_handle_fiq(void (*handle_fiq)(struct pt_regs *)); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index e70371ff530bb..f32882f3e5765 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -293,6 +293,8 @@ static void do_interrupt_handler(struct pt_regs *regs, set_irq_regs(old_regs); } +extern void (*handle_arch_nmi_irq)(struct pt_regs *regs); +extern void (*handle_arch_nmi_fiq)(struct pt_regs *regs); extern void (*handle_arch_irq)(struct pt_regs *); extern void (*handle_arch_fiq)(struct pt_regs *); @@ -521,6 +523,14 @@ asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs) } } +static __always_inline void __el1_nmi(struct pt_regs *regs, + void (*handler)(struct pt_regs *)) +{ + arm64_enter_nmi(regs); + do_interrupt_handler(regs, handler); + arm64_exit_nmi(regs); +} + static __always_inline void __el1_pnmi(struct pt_regs *regs, void (*handler)(struct pt_regs *)) { @@ -542,9 +552,17 @@ static __always_inline void __el1_irq(struct pt_regs *regs, exit_to_kernel_mode(regs); } -static void noinstr el1_interrupt(struct pt_regs *regs, - void (*handler)(struct pt_regs *)) + +static void noinstr el1_interrupt(struct pt_regs *regs, u64 nmi_flag, + void (*handler)(struct pt_regs *), + void (*nmi_handler)(struct pt_regs *)) { + /* Is there a NMI to handle? */ + if (system_uses_nmi() && (read_sysreg(isr_el1) & nmi_flag)) { + __el1_nmi(regs, nmi_handler); + return; + } + write_sysreg(DAIF_PROCCTX_NOIRQ, daif); if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) @@ -555,12 +573,12 @@ static void noinstr el1_interrupt(struct pt_regs *regs, asmlinkage void noinstr el1h_64_irq_handler(struct pt_regs *regs) { - el1_interrupt(regs, handle_arch_irq); + el1_interrupt(regs, ISR_EL1_IS, handle_arch_irq, handle_arch_nmi_irq); } asmlinkage void noinstr el1h_64_fiq_handler(struct pt_regs *regs) { - el1_interrupt(regs, handle_arch_fiq); + el1_interrupt(regs, ISR_EL1_FS, handle_arch_fiq, handle_arch_nmi_fiq); } asmlinkage void noinstr el1h_64_error_handler(struct pt_regs *regs) @@ -783,11 +801,28 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs) } } -static void noinstr el0_interrupt(struct pt_regs *regs, - void (*handler)(struct pt_regs *)) +static void noinstr el0_interrupt(struct pt_regs *regs, u64 nmi_flag, + void (*handler)(struct pt_regs *), + void (*nmi_handler)(struct pt_regs *)) { enter_from_user_mode(regs); + /* Is there a NMI to handle? */ + if (system_uses_nmi() && (read_sysreg(isr_el1) & nmi_flag)) { + /* + * Any system with FEAT_NMI should have FEAT_CSV2 and + * not be affected by Spectre v2 so we don't mitigate + * here. + */ + + arm64_enter_nmi(regs); + do_interrupt_handler(regs, nmi_handler); + arm64_exit_nmi(regs); + + exit_to_user_mode(regs); + return; + } + write_sysreg(DAIF_PROCCTX_NOIRQ, daif); if (regs->pc & BIT(55)) @@ -802,7 +837,7 @@ static void noinstr el0_interrupt(struct pt_regs *regs, static void noinstr __el0_irq_handler_common(struct pt_regs *regs) { - el0_interrupt(regs, handle_arch_irq); + el0_interrupt(regs, ISR_EL1_IS, handle_arch_irq, handle_arch_nmi_irq); } asmlinkage void noinstr el0t_64_irq_handler(struct pt_regs *regs) @@ -812,7 +847,7 @@ asmlinkage void noinstr el0t_64_irq_handler(struct pt_regs *regs) static void noinstr __el0_fiq_handler_common(struct pt_regs *regs) { - el0_interrupt(regs, handle_arch_fiq); + el0_interrupt(regs, ISR_EL1_FS, handle_arch_fiq, handle_arch_nmi_fiq); } asmlinkage void noinstr el0t_64_fiq_handler(struct pt_regs *regs) diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 85087e2df5649..b1f2a9b490393 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -87,6 +87,16 @@ void do_softirq_own_stack(void) } #endif +static void default_handle_nmi_irq(struct pt_regs *regs) +{ + panic("Superpriority IRQ taken without a root NMI IRQ handler\n"); +} + +static void default_handle_nmi_fiq(struct pt_regs *regs) +{ + panic("Superpriority FIQ taken without a root NMI FIQ handler\n"); +} + static void default_handle_irq(struct pt_regs *regs) { panic("IRQ taken without a root IRQ handler\n"); @@ -97,9 +107,21 @@ static void default_handle_fiq(struct pt_regs *regs) panic("FIQ taken without a root FIQ handler\n"); } +void (*handle_arch_nmi_irq)(struct pt_regs *) __ro_after_init = default_handle_nmi_irq; +void (*handle_arch_nmi_fiq)(struct pt_regs *) __ro_after_init = default_handle_nmi_fiq; void (*handle_arch_irq)(struct pt_regs *) __ro_after_init = default_handle_irq; void (*handle_arch_fiq)(struct pt_regs *) __ro_after_init = default_handle_fiq; +int __init set_handle_nmi_irq(void (*handle_nmi_irq)(struct pt_regs *)) +{ + if (handle_arch_nmi_irq != default_handle_nmi_irq) + return -EBUSY; + + handle_arch_nmi_irq = handle_nmi_irq; + pr_info("Root superpriority IRQ handler: %ps\n", handle_nmi_irq); + return 0; +} + int __init set_handle_irq(void (*handle_irq)(struct pt_regs *)) { if (handle_arch_irq != default_handle_irq) From 16b6cc6c3615da9875e2245981d7d6a9e8d6d73f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 11 Oct 2022 12:57:00 +0100 Subject: [PATCH 12/95] arm64/nmi: Add Kconfig for NMI commit 77ca8065cb64d1066aa2276e6aecb7c176210922 openEuler Since NMI handling is in some fairly hot paths we provide a Kconfig option which allows support to be compiled out when not needed. Signed-off-by: Mark Brown Signed-off-by: Jie Liu Signed-off-by: huwentao Signed-off-by: WangYuli --- arch/arm64/Kconfig | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 502ffb294d914..623fdedcce591 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2146,6 +2146,23 @@ config ARM64_EPAN if the cpu does not implement the feature. endmenu # "ARMv8.7 architectural features" +menu "ARMv8.8 architectural features" + +config ARM64_NMI + bool "Enable support for Non-maskable Interrupts (NMI)" + default y + help + Non-maskable interrupts are an architecture and GIC feature + which allow the system to configure some interrupts to be + configured to have superpriority, allowing them to be handled + before other interrupts and masked for shorter periods of time. + + The feature is detected at runtime, and will remain disabled + if the cpu does not implement the feature. It will also be + disabled if pseudo NMIs are enabled at runtime. + +endmenu # "ARMv8.8 architectural features" + config ARM64_SVE bool "ARM Scalable Vector Extension support" default y From 8064f24f2f055095c69f50053609356d27b75b5c Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 13 Oct 2022 16:05:33 +0200 Subject: [PATCH 13/95] irqchip/gic-v3: Implement FEAT_GICv3_NMI support commit 0408b5bc43005221087db0b668aed30d06be9672 openEuler The FEAT_GICv3_NMI GIC feature coupled with the CPU FEAT_NMI enables handling NMI interrupts in HW on aarch64, by adding a superpriority interrupt to the existing GIC priority scheme. Implement GIC driver support for the FEAT_GICv3_NMI feature. Rename gic_supports_nmi() helper function to gic_supports_pseudo_nmis() to make the pseudo NMIs code path clearer and more explicit. Check, through the ARM64 capabilitity infrastructure, if support for FEAT_NMI was detected on the core and the system has not overridden the detection and forced pseudo-NMIs enablement. If FEAT_NMI is detected, it was not overridden (check embedded in the system_uses_nmi() call) and the GIC supports the FEAT_GICv3_NMI feature, install an NMI handler and initialize NMIs related HW GIC registers. Signed-off-by: Lorenzo Pieralisi Signed-off-by: Mark Brown Signed-off-by: Jie Liu Signed-off-by: huwentao Signed-off-by: WangYuli --- drivers/irqchip/irq-gic-v3.c | 153 ++++++++++++++++++++++++----- include/linux/irqchip/arm-gic-v3.h | 4 + 2 files changed, 130 insertions(+), 27 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 49db1a0057716..cf205ae1e17ff 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -61,6 +61,7 @@ struct gic_chip_data { u32 nr_redist_regions; u64 flags; bool has_rss; + bool has_nmi; unsigned int ppi_nr; struct partition_desc **ppi_descs; }; @@ -149,6 +150,20 @@ enum gic_intid_range { __INVALID_RANGE__ }; +#ifdef CONFIG_ARM64 +#include + +static inline bool has_v3_3_nmi(void) +{ + return gic_data.has_nmi && system_uses_nmi(); +} +#else +static inline bool has_v3_3_nmi(void) +{ + return false; +} +#endif + static enum gic_intid_range __get_intid_range(irq_hw_number_t hwirq) { switch (hwirq) { @@ -387,6 +402,42 @@ static int gic_peek_irq(struct irq_data *d, u32 offset) return !!(readl_relaxed(base + offset + (index / 32) * 4) & mask); } +static DEFINE_RAW_SPINLOCK(irq_controller_lock); + +static void gic_irq_configure_nmi(struct irq_data *d, bool enable) +{ + void __iomem *base, *addr; + u32 offset, index, mask, val; + + offset = convert_offset_index(d, GICD_INMIR, &index); + mask = 1 << (index % 32); + + if (gic_irq_in_rdist(d)) + base = gic_data_rdist_sgi_base(); + else + base = gic_data.dist_base; + + addr = base + offset + (index / 32) * 4; + + raw_spin_lock(&irq_controller_lock); + + val = readl_relaxed(addr); + val = enable ? (val | mask) : (val & ~mask); + writel_relaxed(val, addr); + + raw_spin_unlock(&irq_controller_lock); +} + +static void gic_irq_enable_nmi(struct irq_data *d) +{ + gic_irq_configure_nmi(d, true); +} + +static void gic_irq_disable_nmi(struct irq_data *d) +{ + gic_irq_configure_nmi(d, false); +} + static void gic_poke_irq(struct irq_data *d, u32 offset) { void __iomem *base; @@ -432,7 +483,7 @@ static void gic_unmask_irq(struct irq_data *d) gic_poke_irq(d, GICD_ISENABLER); } -static inline bool gic_supports_nmi(void) +static inline bool gic_supports_pseudo_nmis(void) { return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && static_branch_likely(&supports_pseudo_nmis); @@ -535,7 +586,7 @@ static int gic_irq_nmi_setup(struct irq_data *d) { struct irq_desc *desc = irq_to_desc(d->irq); - if (!gic_supports_nmi()) + if (!gic_supports_pseudo_nmis() && !has_v3_3_nmi()) return -EINVAL; if (gic_peek_irq(d, GICD_ISENABLER)) { @@ -563,7 +614,10 @@ static int gic_irq_nmi_setup(struct irq_data *d) desc->handle_irq = handle_fasteoi_nmi; } - gic_irq_set_prio(d, GICD_INT_NMI_PRI); + if (has_v3_3_nmi()) + gic_irq_enable_nmi(d); + else + gic_irq_set_prio(d, GICD_INT_NMI_PRI); return 0; } @@ -572,7 +626,7 @@ static void gic_irq_nmi_teardown(struct irq_data *d) { struct irq_desc *desc = irq_to_desc(d->irq); - if (WARN_ON(!gic_supports_nmi())) + if (WARN_ON(!gic_supports_pseudo_nmis() && !has_v3_3_nmi())) return; if (gic_peek_irq(d, GICD_ISENABLER)) { @@ -598,7 +652,10 @@ static void gic_irq_nmi_teardown(struct irq_data *d) desc->handle_irq = handle_fasteoi_irq; } - gic_irq_set_prio(d, GICD_INT_DEF_PRI); + if (has_v3_3_nmi()) + gic_irq_disable_nmi(d); + else + gic_irq_set_prio(d, GICD_INT_DEF_PRI); } static bool gic_arm64_erratum_2941627_needed(struct irq_data *d) @@ -757,7 +814,7 @@ static inline void gic_complete_ack(u32 irqnr) static bool gic_rpr_is_nmi_prio(void) { - if (!gic_supports_nmi()) + if (!gic_supports_pseudo_nmis()) return false; return unlikely(gic_read_rpr() == GICD_INT_RPR_PRI(GICD_INT_NMI_PRI)); @@ -789,7 +846,8 @@ static void __gic_handle_nmi(u32 irqnr, struct pt_regs *regs) gic_complete_ack(irqnr); if (generic_handle_domain_nmi(gic_data.domain, irqnr)) { - WARN_ONCE(true, "Unexpected pseudo-NMI (irqnr %u)\n", irqnr); + WARN_ONCE(true, "Unexpected %sNMI (irqnr %u)\n", + gic_supports_pseudo_nmis() ? "pseudo-" : "", irqnr); gic_deactivate_unhandled(irqnr); } } @@ -865,9 +923,37 @@ static void __gic_handle_irq_from_irqsoff(struct pt_regs *regs) __gic_handle_nmi(irqnr, regs); } +#ifdef CONFIG_ARM64 +static inline u64 gic_read_nmiar(void) +{ + u64 irqstat; + + irqstat = read_sysreg_s(SYS_ICC_NMIAR1_EL1); + + dsb(sy); + + return irqstat; +} + +static asmlinkage void __exception_irq_entry gic_handle_nmi_irq(struct pt_regs *regs) +{ + u32 irqnr = gic_read_nmiar(); + + __gic_handle_nmi(irqnr, regs); +} + +static inline void gic_setup_nmi_handler(void) +{ + if (has_v3_3_nmi()) + set_handle_nmi_irq(gic_handle_nmi_irq); +} +#else +static inline void gic_setup_nmi_handler(void) { } +#endif + static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) { - if (unlikely(gic_supports_nmi() && !interrupts_enabled(regs))) + if (unlikely(gic_supports_pseudo_nmis() && !interrupts_enabled(regs))) __gic_handle_irq_from_irqsoff(regs); else __gic_handle_irq_from_irqson(regs); @@ -1157,7 +1243,7 @@ static void gic_cpu_sys_reg_init(void) /* Set priority mask register */ if (!gic_prio_masking_enabled()) { write_gicreg(DEFAULT_PMR_VALUE, ICC_PMR_EL1); - } else if (gic_supports_nmi()) { + } else if (gic_supports_pseudo_nmis()) { /* * Mismatch configuration with boot CPU, the system is likely * to die as interrupt masking will not work properly on all @@ -1947,25 +2033,8 @@ static const struct gic_quirk gic_quirks[] = { } }; -static void gic_enable_nmi_support(void) +static void gic_enable_pseudo_nmis(void) { - int i; - - if (!gic_prio_masking_enabled()) - return; - - if (gic_data.flags & FLAGS_WORKAROUND_MTK_GICR_SAVE) { - pr_warn("Skipping NMI enable due to firmware issues\n"); - return; - } - - ppi_nmi_refs = kcalloc(gic_data.ppi_nr, sizeof(*ppi_nmi_refs), GFP_KERNEL); - if (!ppi_nmi_refs) - return; - - for (i = 0; i < gic_data.ppi_nr; i++) - refcount_set(&ppi_nmi_refs[i], 0); - pr_info("Pseudo-NMIs enabled using %s ICC_PMR_EL1 synchronisation\n", gic_has_relaxed_pmr_sync() ? "relaxed" : "forced"); @@ -2000,6 +2069,33 @@ static void gic_enable_nmi_support(void) static_branch_enable(&gic_nonsecure_priorities); static_branch_enable(&supports_pseudo_nmis); +} + +static void gic_enable_nmi_support(void) +{ + int i; + + if (!gic_prio_masking_enabled() && !has_v3_3_nmi()) + return; + + if (gic_data.flags & FLAGS_WORKAROUND_MTK_GICR_SAVE) { + pr_warn("Skipping NMI enable due to firmware issues\n"); + return; + } + + ppi_nmi_refs = kcalloc(gic_data.ppi_nr, sizeof(*ppi_nmi_refs), GFP_KERNEL); + if (!ppi_nmi_refs) + return; + + for (i = 0; i < gic_data.ppi_nr; i++) + refcount_set(&ppi_nmi_refs[i], 0); + + /* + * Initialize pseudo-NMIs only if GIC driver cannot take advantage + * of core (FEAT_NMI) and GIC (FEAT_GICv3_NMI) in HW + */ + if (!has_v3_3_nmi()) + gic_enable_pseudo_nmis(); if (static_branch_likely(&supports_deactivate_key)) gic_eoimode1_chip.flags |= IRQCHIP_SUPPORTS_NMI; @@ -2068,6 +2164,7 @@ static int __init gic_init_bases(phys_addr_t dist_phys_base, irq_domain_update_bus_token(gic_data.domain, DOMAIN_BUS_WIRED); gic_data.has_rss = !!(typer & GICD_TYPER_RSS); + gic_data.has_nmi = !!(typer & GICD_TYPER_NMI); if (typer & GICD_TYPER_MBIS) { err = mbi_init(handle, gic_data.domain); @@ -2077,6 +2174,8 @@ static int __init gic_init_bases(phys_addr_t dist_phys_base, set_handle_irq(gic_handle_irq); + gic_setup_nmi_handler(); + gic_update_rdist_properties(); gic_dist_init(); diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 002b16d034d6b..a109c3212aad3 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -30,6 +30,7 @@ #define GICD_ICFGR 0x0C00 #define GICD_IGRPMODR 0x0D00 #define GICD_NSACR 0x0E00 +#define GICD_INMIR 0x0F80 #define GICD_IGROUPRnE 0x1000 #define GICD_ISENABLERnE 0x1200 #define GICD_ICENABLERnE 0x1400 @@ -39,6 +40,7 @@ #define GICD_ICACTIVERnE 0x1C00 #define GICD_IPRIORITYRnE 0x2000 #define GICD_ICFGRnE 0x3000 +#define GICD_INMIRnE 0x3B00 #define GICD_IROUTER 0x6000 #define GICD_IROUTERnE 0x8000 #define GICD_IDREGS 0xFFD0 @@ -83,6 +85,7 @@ #define GICD_TYPER_LPIS (1U << 17) #define GICD_TYPER_MBIS (1U << 16) #define GICD_TYPER_ESPI (1U << 8) +#define GICD_TYPER_NMI (1U << 9) #define GICD_TYPER_ID_BITS(typer) ((((typer) >> 19) & 0x1f) + 1) #define GICD_TYPER_NUM_LPIS(typer) ((((typer) >> 11) & 0x1f) + 1) @@ -238,6 +241,7 @@ #define GICR_ICFGR0 GICD_ICFGR #define GICR_IGRPMODR0 GICD_IGRPMODR #define GICR_NSACR GICD_NSACR +#define GICR_INMIR0 GICD_INMIR #define GICR_TYPER_PLPIS (1U << 0) #define GICR_TYPER_VLPIS (1U << 1) From 966cba9aa94724c95a5ff266b929f8718d0368b2 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Tue, 30 Jan 2024 12:38:58 +0000 Subject: [PATCH 14/95] irqchip/gic-v3: Fix hard LOCKUP caused by NMI being masked commit eefea61569216c047964a2ee1e370b5dc4fb29da openEuler When handling an exception, both daif and allint will be set by hardware. In __gic_handle_irq_from_irqson(), it only consider the Pseudo-NMI by clear daif.I and daif.F and set PMR to GIC_PRIO_IRQOFF to enable Pseudo-NMI and mask IRQ. If the hardwire NMI is enabled, it should also clear allint to enable hardware NMI and mask IRQ before handle a IRQ, otherwise the allint will be set in softirq context and local_irq_enable() can not enable IRQ, and watchdog NMI can not enter too which will cause below hard LOCKUP. And in gic_handle_irq(), it only consider the Pseudo-NMI when an exception has been taken from a context with IRQs disabled. So add a gic_supports_nmi() helper which consider both Pseudo-NMI and hardware NMI. And define PSR_ALLINT_BIT bit and update interrupts_enabled() as well as fast_interrupts_enabled() to consider the ALLINT bit. watchdog: Watchdog detected hard LOCKUP on cpu 1 Modules linked in: Sending NMI from CPU 0 to CPUs 1: Kernel panic - not syncing: Hard LOCKUP CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.6.0-gec40ec8c5e9f #295 Hardware name: linux,dummy-virt (DT) Call trace: dump_backtrace+0x98/0xf8 show_stack+0x20/0x38 dump_stack_lvl+0x48/0x60 dump_stack+0x18/0x28 panic+0x384/0x3e0 nmi_panic+0x94/0xa0 watchdog_hardlockup_check+0x1bc/0x1c8 watchdog_buddy_check_hardlockup+0x68/0x80 watchdog_timer_fn+0x88/0x2f8 __hrtimer_run_queues+0x17c/0x368 hrtimer_run_queues+0xd4/0x158 update_process_times+0x3c/0xc0 tick_periodic+0x44/0xc8 tick_handle_periodic+0x3c/0xb0 arch_timer_handler_virt+0x3c/0x58 handle_percpu_devid_irq+0x90/0x248 generic_handle_domain_irq+0x34/0x58 gic_handle_irq+0x58/0x110 call_on_irq_stack+0x24/0x58 do_interrupt_handler+0x88/0x98 el1_interrupt+0x40/0xc0 el1h_64_irq_handler+0x24/0x30 el1h_64_irq+0x64/0x68 default_idle_call+0x5c/0x160 do_idle+0x220/0x288 cpu_startup_entry+0x40/0x50 rest_init+0xf0/0xf8 arch_call_rest_init+0x18/0x20 start_kernel+0x520/0x668 __primary_switched+0xbc/0xd0 Fixes: dd8b74f04223 ("arm64/nmi: Manage masking for superpriority interrupts along with DAIF") Signed-off-by: Jinjie Ruan Signed-off-by: Jie Liu Signed-off-by: huwentao Signed-off-by: WangYuli --- arch/arm64/include/asm/ptrace.h | 5 +++-- arch/arm64/include/uapi/asm/ptrace.h | 1 + drivers/irqchip/irq-gic-v3.c | 5 +++++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 47ec58031f11b..81bb2cb17d850 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -240,10 +240,11 @@ static inline void forget_syscall(struct pt_regs *regs) true) #define interrupts_enabled(regs) \ - (!((regs)->pstate & PSR_I_BIT) && irqs_priority_unmasked(regs)) + (!((regs)->pstate & PSR_ALLINT_BIT) && !((regs)->pstate & PSR_I_BIT) && \ + irqs_priority_unmasked(regs)) #define fast_interrupts_enabled(regs) \ - (!((regs)->pstate & PSR_F_BIT)) + (!((regs)->pstate & PSR_ALLINT_BIT) && !(regs)->pstate & PSR_F_BIT) static inline unsigned long user_stack_pointer(struct pt_regs *regs) { diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 7fa2f7036aa78..8a125a1986bea 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -48,6 +48,7 @@ #define PSR_D_BIT 0x00000200 #define PSR_BTYPE_MASK 0x00000c00 #define PSR_SSBS_BIT 0x00001000 +#define PSR_ALLINT_BIT 0x00002000 #define PSR_PAN_BIT 0x00400000 #define PSR_UAO_BIT 0x00800000 #define PSR_DIT_BIT 0x01000000 diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index cf205ae1e17ff..c30ec2ecf41c9 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -151,6 +151,7 @@ enum gic_intid_range { }; #ifdef CONFIG_ARM64 +#include #include static inline bool has_v3_3_nmi(void) @@ -881,6 +882,10 @@ static void __gic_handle_irq_from_irqson(struct pt_regs *regs) if (gic_prio_masking_enabled()) { gic_pmr_mask_irqs(); gic_arch_enable_irqs(); + } else if (has_v3_3_nmi()) { +#ifdef CONFIG_ARM64_NMI + _allint_clear(); +#endif } if (!is_nmi) From b60a0e5d52888f5f5f1cf59eac612a67ee7a7388 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Fri, 29 Mar 2024 16:44:23 +0800 Subject: [PATCH 15/95] arm64: Enable hardware NMI for perf events NMI commit 5cc820c154b7894835919d6a64720f198c907714 openEuler Like pseudo NMI, also select HAVE_PERF_EVENTS_NMI for hardware NMI, and update the comment for arch_perf_nmi_is_available(). Signed-off-by: Jinjie Ruan Signed-off-by: huwentao Signed-off-by: WangYuli --- arch/arm64/Kconfig | 2 +- arch/arm64/kernel/watchdog_hld.c | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 623fdedcce591..2e3950f8522a0 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -220,7 +220,7 @@ config ARM64 select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI select HAVE_PERF_EVENTS - select HAVE_PERF_EVENTS_NMI if ARM64_PSEUDO_NMI + select HAVE_PERF_EVENTS_NMI if ARM64_PSEUDO_NMI || ARM64_NMI select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_PREEMPT_DYNAMIC_KEY diff --git a/arch/arm64/kernel/watchdog_hld.c b/arch/arm64/kernel/watchdog_hld.c index e55548cb26df0..ae0da38214fa8 100644 --- a/arch/arm64/kernel/watchdog_hld.c +++ b/arch/arm64/kernel/watchdog_hld.c @@ -28,9 +28,10 @@ u64 hw_nmi_get_sample_period(int watchdog_thresh) bool __init arch_perf_nmi_is_available(void) { /* - * hardlockup_detector_perf_init() will success even if Pseudo-NMI turns off, - * however, the pmu interrupts will act like a normal interrupt instead of - * NMI and the hardlockup detector would be broken. + * hardlockup_detector_perf_init() will success even if Pseudo-NMI or + * Hardware NMI turns off. However, the pmu interrupts will act like + * a normal interrupt instead of NMI and the hardlockup detector would + * be broken. */ return arm_pmu_irq_is_nmi(); } From 138647f5f595ee69ef0ebdc1a11622fe1f7eac7a Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Thu, 16 May 2024 17:20:16 +0800 Subject: [PATCH 16/95] irqchip/gic-v3: Fix one race condition due to NMI withdraw commit 626602294dca57b664e95ba72ee32435179af627 openEuler The introduce of FEAT_NMI/FEAT_GICv3_NMI will cause a race problem that we may handle the normal interrupt in interrupt disabled context due to the withdraw of NMI interrupt. The flow will be like below: [interrupt disabled] <- normal interrupt pending, for example timer interrupt <- NMI occurs, ISR_EL1.nmi = 1 do_el1_interrupt() <- NMI withdraw, ISR_EL1.nmi = 0 ISR_EL1.nmi = 0, not an NMI interrupt gic_handle_irq() __gic_handle_irq_from_irqson() irqnr = gic_read_iar() <- Oops, ack and handle an normal interrupt in interrupt disabled context! Fix this by checking the interrupt status in __gic_handle_irq_from_irqson() and ignore the interrupt if we're in interrupt disabled context. Fixes: 2731a6c26d4e ("irqchip/gic-v3: Implement FEAT_GICv3_NMI support") Signed-off-by: Yicong Yang Signed-off-by: Jie Liu Signed-off-by: huwentao Signed-off-by: WangYuli --- drivers/irqchip/irq-gic-v3.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index c30ec2ecf41c9..67d2e4dde7286 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -869,6 +869,28 @@ static void __gic_handle_irq_from_irqson(struct pt_regs *regs) bool is_nmi; u32 irqnr; + /* + * We should enter here with interrupts disabled, otherwise we may met + * a race here with FEAT_NMI/FEAT_GICv3_NMI: + * + * [interrupt disabled] + * <- normal interrupt pending, for example timer interrupt + * <- NMI occurs, ISR_EL1.nmi = 1 + * do_el1_interrupt() + * <- NMI withdraw, ISR_EL1.nmi = 0 + * ISR_EL1.nmi = 0, not an NMI interrupt + * gic_handle_irq() + * __gic_handle_irq_from_irqson() + * irqnr = gic_read_iar() <- Oops, ack and handle an normal interrupt + * in interrupt disabled context! + * + * So if we met this case here, just return from the interrupt context. + * Since the interrupt is still pending, we can handle it once the + * interrupt re-enabled and it'll not be missing. + */ + if (!interrupts_enabled(regs)) + return; + irqnr = gic_read_iar(); is_nmi = gic_rpr_is_nmi_prio(); From 94dbcc322e913e95eb53fc8e5ba0996634ad7262 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Thu, 28 Mar 2024 20:01:32 +0800 Subject: [PATCH 17/95] irqchip/gic-v3: Fix a system stall when using pseudo NMI with CONFIG_ARM64_NMI closed commit f7cea6febbbc02eebfd80209c6b44883b3dd6a63 openEuler A system stall occurrs when using pseudo NMI with CONFIG_ARM64_NMI closed. If the hardware supports FEAT_NMI, the ALLINT bit in pstate may set or clear on exception trap whether the software enables it or not, so it is not safe to use it to check interrupts_enabled() or fast_interrupts_enabled() when FEAT_NMI not enabled in kernel, so recover it. After applying this patch, the system stall not happen again on hardware with FEAT_NMI feature. Fixes: b087eeb7a685 ("irqchip/gic-v3: Fix hard LOCKUP caused by NMI being masked") Signed-off-by: Jinjie Ruan Signed-off-by: zhaolichang <943677312@qq.com> Signed-off-by: WangYuli --- arch/arm64/include/asm/ptrace.h | 5 ++--- arch/arm64/include/uapi/asm/ptrace.h | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 81bb2cb17d850..a90ae87ebec5f 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -240,11 +240,10 @@ static inline void forget_syscall(struct pt_regs *regs) true) #define interrupts_enabled(regs) \ - (!((regs)->pstate & PSR_ALLINT_BIT) && !((regs)->pstate & PSR_I_BIT) && \ - irqs_priority_unmasked(regs)) + (!((regs)->pstate & PSR_I_BIT) && irqs_priority_unmasked(regs)) #define fast_interrupts_enabled(regs) \ - (!((regs)->pstate & PSR_ALLINT_BIT) && !(regs)->pstate & PSR_F_BIT) + (!(regs)->pstate & PSR_F_BIT) static inline unsigned long user_stack_pointer(struct pt_regs *regs) { diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 8a125a1986bea..7fa2f7036aa78 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -48,7 +48,6 @@ #define PSR_D_BIT 0x00000200 #define PSR_BTYPE_MASK 0x00000c00 #define PSR_SSBS_BIT 0x00001000 -#define PSR_ALLINT_BIT 0x00002000 #define PSR_PAN_BIT 0x00400000 #define PSR_UAO_BIT 0x00800000 #define PSR_DIT_BIT 0x01000000 From d4738b8440de584e11350d849a7257243d0c3341 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Thu, 5 Dec 2024 11:07:16 +0800 Subject: [PATCH 18/95] drivers/perf: hisi: Add support for HiSilicon DDRC v3 PMU driver [Upstream commit 17aa34e86936d0dba4e7c05c55ffc3e12c0ccec9] On HiSilicon HIP10C platform, the DDRC PMU is a v3 PMU. And only the offset of it's interrupt registers offset quite a different tune DDRC v2 PMUs. The control registeres and events code are the same as v2 PMU. Their modification was unexpected, which caused the interrupt handler to not handle counter overflows correctly (HiSilicon Erratum 162400501). The before DDRC PMU driver will probe v3 as v2. Therefore DDRC v3 interrupt handler cannot work properly. We fixed that by adding new ID HISI0235 to correct the DDRC v3 PMU interrupt register offset (including mask/status/clear registers). Signed-off-by: Junhao He (cherry picked from commit aa58bd1c28c317c65a4196215ef29cb85aafb922) Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 146 ++++++++++-------- 1 file changed, 85 insertions(+), 61 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index ffb039d05d07b..0aa68f218c5d8 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -43,6 +43,11 @@ #define DDRC_V2_EVENT_TYPE 0xe74 #define DDRC_V2_PERF_CTRL 0xeA0 +/* DDRC interrupt registers definition in v3 */ +#define DDRC_V3_INT_MASK 0x534 +#define DDRC_V3_INT_STATUS 0x538 +#define DDRC_V3_INT_CLEAR 0x53C + /* DDRC has 8-counters */ #define DDRC_NR_COUNTERS 0x8 #define DDRC_V1_PERF_CTRL_EN 0x2 @@ -63,6 +68,12 @@ static const u32 ddrc_reg_off[] = { DDRC_PRE_CMD, DDRC_ACT_CMD, DDRC_RNK_CHG, DDRC_RW_CHG }; +struct hisi_ddrc_pmu_regs { + u32 int_mask; + u32 int_clear; + u32 int_status; +}; + /* * Select the counter register offset using the counter index. * In PMU v1, there are no programmable counter, the count @@ -223,77 +234,43 @@ static void hisi_ddrc_pmu_v2_disable_counter(struct hisi_pmu *ddrc_pmu, writel(val, ddrc_pmu->base + DDRC_V2_EVENT_CTRL); } -static void hisi_ddrc_pmu_v1_enable_counter_int(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) -{ - u32 val; - - /* Write 0 to enable interrupt */ - val = readl(ddrc_pmu->base + DDRC_INT_MASK); - val &= ~(1 << hwc->idx); - writel(val, ddrc_pmu->base + DDRC_INT_MASK); -} - -static void hisi_ddrc_pmu_v1_disable_counter_int(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) -{ - u32 val; - - /* Write 1 to mask interrupt */ - val = readl(ddrc_pmu->base + DDRC_INT_MASK); - val |= 1 << hwc->idx; - writel(val, ddrc_pmu->base + DDRC_INT_MASK); -} - -static void hisi_ddrc_pmu_v2_enable_counter_int(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) +static void hisi_ddrc_pmu_enable_counter_int(struct hisi_pmu *ddrc_pmu, + struct hw_perf_event *hwc) { + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; u32 val; - val = readl(ddrc_pmu->base + DDRC_V2_INT_MASK); + val = readl(ddrc_pmu->base + regs->int_mask); val &= ~(1 << hwc->idx); - writel(val, ddrc_pmu->base + DDRC_V2_INT_MASK); + writel(val, ddrc_pmu->base + regs->int_mask); } -static void hisi_ddrc_pmu_v2_disable_counter_int(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) +static void hisi_ddrc_pmu_disable_counter_int(struct hisi_pmu *ddrc_pmu, + struct hw_perf_event *hwc) { + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; u32 val; - val = readl(ddrc_pmu->base + DDRC_V2_INT_MASK); + val = readl(ddrc_pmu->base + regs->int_mask); val |= 1 << hwc->idx; - writel(val, ddrc_pmu->base + DDRC_V2_INT_MASK); + writel(val, ddrc_pmu->base + regs->int_mask); } -static u32 hisi_ddrc_pmu_v1_get_int_status(struct hisi_pmu *ddrc_pmu) +static u32 hisi_ddrc_pmu_get_int_status(struct hisi_pmu *ddrc_pmu) { - return readl(ddrc_pmu->base + DDRC_INT_STATUS); -} + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; -static void hisi_ddrc_pmu_v1_clear_int_status(struct hisi_pmu *ddrc_pmu, - int idx) -{ - writel(1 << idx, ddrc_pmu->base + DDRC_INT_CLEAR); + return readl(ddrc_pmu->base + regs->int_status); } -static u32 hisi_ddrc_pmu_v2_get_int_status(struct hisi_pmu *ddrc_pmu) +static void hisi_ddrc_pmu_clear_int_status(struct hisi_pmu *ddrc_pmu, + int idx) { - return readl(ddrc_pmu->base + DDRC_V2_INT_STATUS); -} + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; -static void hisi_ddrc_pmu_v2_clear_int_status(struct hisi_pmu *ddrc_pmu, - int idx) -{ - writel(1 << idx, ddrc_pmu->base + DDRC_V2_INT_CLEAR); + writel(1 << idx, ddrc_pmu->base + regs->int_clear); } -static const struct acpi_device_id hisi_ddrc_pmu_acpi_match[] = { - { "HISI0233", }, - { "HISI0234", }, - {} -}; -MODULE_DEVICE_TABLE(acpi, hisi_ddrc_pmu_acpi_match); - static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *ddrc_pmu) { @@ -315,6 +292,10 @@ static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, /* DDRC PMUs only share the same SCCL */ ddrc_pmu->ccl_id = -1; + ddrc_pmu->dev_info = device_get_match_data(&pdev->dev); + if (!ddrc_pmu->dev_info) + return -ENODEV; + ddrc_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(ddrc_pmu->base)) { dev_err(&pdev->dev, "ioremap failed for ddrc_pmu resource\n"); @@ -428,12 +409,12 @@ static const struct hisi_uncore_ops hisi_uncore_ddrc_v1_ops = { .stop_counters = hisi_ddrc_pmu_v1_stop_counters, .enable_counter = hisi_ddrc_pmu_v1_enable_counter, .disable_counter = hisi_ddrc_pmu_v1_disable_counter, - .enable_counter_int = hisi_ddrc_pmu_v1_enable_counter_int, - .disable_counter_int = hisi_ddrc_pmu_v1_disable_counter_int, + .enable_counter_int = hisi_ddrc_pmu_enable_counter_int, + .disable_counter_int = hisi_ddrc_pmu_disable_counter_int, .write_counter = hisi_ddrc_pmu_v1_write_counter, .read_counter = hisi_ddrc_pmu_v1_read_counter, - .get_int_status = hisi_ddrc_pmu_v1_get_int_status, - .clear_int_status = hisi_ddrc_pmu_v1_clear_int_status, + .get_int_status = hisi_ddrc_pmu_get_int_status, + .clear_int_status = hisi_ddrc_pmu_clear_int_status, }; static const struct hisi_uncore_ops hisi_uncore_ddrc_v2_ops = { @@ -443,12 +424,12 @@ static const struct hisi_uncore_ops hisi_uncore_ddrc_v2_ops = { .stop_counters = hisi_ddrc_pmu_v2_stop_counters, .enable_counter = hisi_ddrc_pmu_v2_enable_counter, .disable_counter = hisi_ddrc_pmu_v2_disable_counter, - .enable_counter_int = hisi_ddrc_pmu_v2_enable_counter_int, - .disable_counter_int = hisi_ddrc_pmu_v2_disable_counter_int, + .enable_counter_int = hisi_ddrc_pmu_enable_counter_int, + .disable_counter_int = hisi_ddrc_pmu_disable_counter_int, .write_counter = hisi_ddrc_pmu_v2_write_counter, .read_counter = hisi_ddrc_pmu_v2_read_counter, - .get_int_status = hisi_ddrc_pmu_v2_get_int_status, - .clear_int_status = hisi_ddrc_pmu_v2_clear_int_status, + .get_int_status = hisi_ddrc_pmu_get_int_status, + .clear_int_status = hisi_ddrc_pmu_clear_int_status, }; static int hisi_ddrc_pmu_dev_probe(struct platform_device *pdev, @@ -467,15 +448,14 @@ static int hisi_ddrc_pmu_dev_probe(struct platform_device *pdev, if (ddrc_pmu->identifier >= HISI_PMU_V2) { ddrc_pmu->counter_bits = 48; ddrc_pmu->check_event = DDRC_V2_NR_EVENTS; - ddrc_pmu->pmu_events.attr_groups = hisi_ddrc_pmu_v2_attr_groups; ddrc_pmu->ops = &hisi_uncore_ddrc_v2_ops; } else { ddrc_pmu->counter_bits = 32; ddrc_pmu->check_event = DDRC_V1_NR_EVENTS; - ddrc_pmu->pmu_events.attr_groups = hisi_ddrc_pmu_v1_attr_groups; ddrc_pmu->ops = &hisi_uncore_ddrc_v1_ops; } + ddrc_pmu->pmu_events.attr_groups = ddrc_pmu->dev_info->attr_groups; ddrc_pmu->num_counters = DDRC_NR_COUNTERS; ddrc_pmu->dev = &pdev->dev; ddrc_pmu->on_cpu = -1; @@ -541,6 +521,50 @@ static int hisi_ddrc_pmu_remove(struct platform_device *pdev) return 0; } +static struct hisi_ddrc_pmu_regs hisi_ddrc_v1_pmu_regs = { + .int_mask = DDRC_INT_MASK, + .int_clear = DDRC_INT_CLEAR, + .int_status = DDRC_INT_STATUS, +}; + +static const struct hisi_pmu_dev_info hisi_ddrc_v1 = { + .name = "ddrc", + .attr_groups = hisi_ddrc_pmu_v1_attr_groups, + .private = &hisi_ddrc_v1_pmu_regs, +}; + +static struct hisi_ddrc_pmu_regs hisi_ddrc_v2_pmu_regs = { + .int_mask = DDRC_V2_INT_MASK, + .int_clear = DDRC_V2_INT_CLEAR, + .int_status = DDRC_V2_INT_STATUS, +}; + +static const struct hisi_pmu_dev_info hisi_ddrc_v2 = { + .name = "ddrc", + .attr_groups = hisi_ddrc_pmu_v2_attr_groups, + .private = &hisi_ddrc_v2_pmu_regs, +}; + +static struct hisi_ddrc_pmu_regs hisi_ddrc_v3_pmu_regs = { + .int_mask = DDRC_V3_INT_MASK, + .int_clear = DDRC_V3_INT_CLEAR, + .int_status = DDRC_V3_INT_STATUS, +}; + +static const struct hisi_pmu_dev_info hisi_ddrc_v3 = { + .name = "ddrc", + .attr_groups = hisi_ddrc_pmu_v2_attr_groups, + .private = &hisi_ddrc_v3_pmu_regs, +}; + +static const struct acpi_device_id hisi_ddrc_pmu_acpi_match[] = { + { "HISI0233", (kernel_ulong_t)&hisi_ddrc_v1}, + { "HISI0234", (kernel_ulong_t)&hisi_ddrc_v2}, + { "HISI0235", (kernel_ulong_t)&hisi_ddrc_v3}, + {} +}; +MODULE_DEVICE_TABLE(acpi, hisi_ddrc_pmu_acpi_match); + static struct platform_driver hisi_ddrc_pmu_driver = { .driver = { .name = "hisi_ddrc_pmu", From ac54b748ee46df8bb3461af9cfa7a38a24670dbd Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 11 Apr 2025 19:13:51 +0800 Subject: [PATCH 19/95] drivers/perf: hisi: Define a symbol namespace for HiSilicon Uncore PMUs [Upstream commit 41729809ac8504abb7ac757105c6db2c2fbbc466] The HiSilicon Uncore PMU framework implements some common functions and exported them to the drivers. Use a specific HISI_PMU namespace for the exported symbols to avoid pollute the generic ones. Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c | 1 + drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 1 + drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 1 + drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 1 + drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 1 + drivers/perf/hisilicon/hisi_uncore_pmu.c | 36 +++++++++---------- drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 1 + drivers/perf/hisilicon/hisi_uncore_uc_pmu.c | 1 + 8 files changed, 25 insertions(+), 18 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c index 40f1bc9f9b913..79e6093a35fb3 100644 --- a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c @@ -390,6 +390,7 @@ static void __exit hisi_cpa_pmu_module_exit(void) } module_exit(hisi_cpa_pmu_module_exit); +MODULE_IMPORT_NS(HISI_PMU); MODULE_DESCRIPTION("HiSilicon SoC CPA PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Qi Liu "); diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 0aa68f218c5d8..424b81730f104 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -604,6 +604,7 @@ static void __exit hisi_ddrc_pmu_module_exit(void) } module_exit(hisi_ddrc_pmu_module_exit); +MODULE_IMPORT_NS(HISI_PMU); MODULE_DESCRIPTION("HiSilicon SoC DDRC uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Shaokun Zhang "); diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 15caf99e1eefe..14062246a3162 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -582,6 +582,7 @@ static void __exit hisi_hha_pmu_module_exit(void) } module_exit(hisi_hha_pmu_module_exit); +MODULE_IMPORT_NS(HISI_PMU); MODULE_DESCRIPTION("HiSilicon SoC HHA uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Shaokun Zhang "); diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 794dbcd19b7a7..8b28498612767 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -616,6 +616,7 @@ static void __exit hisi_l3c_pmu_module_exit(void) } module_exit(hisi_l3c_pmu_module_exit); +MODULE_IMPORT_NS(HISI_PMU); MODULE_DESCRIPTION("HiSilicon SoC L3C uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Anurup M "); diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index 797cf201996a9..0c1fa866dcded 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -570,6 +570,7 @@ static void __exit hisi_pa_pmu_module_exit(void) } module_exit(hisi_pa_pmu_module_exit); +MODULE_IMPORT_NS(HISI_PMU); MODULE_DESCRIPTION("HiSilicon Protocol Adapter uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Shaokun Zhang "); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index c3013059cca82..3903284f13842 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -48,7 +48,7 @@ ssize_t hisi_event_sysfs_show(struct device *dev, return sysfs_emit(page, "config=0x%lx\n", (unsigned long)eattr->var); } -EXPORT_SYMBOL_GPL(hisi_event_sysfs_show); +EXPORT_SYMBOL_NS_GPL(hisi_event_sysfs_show, HISI_PMU); /* * sysfs cpumask attributes. For uncore PMU, we only have a single CPU to show @@ -60,7 +60,7 @@ ssize_t hisi_cpumask_sysfs_show(struct device *dev, return sysfs_emit(buf, "%d\n", hisi_pmu->on_cpu); } -EXPORT_SYMBOL_GPL(hisi_cpumask_sysfs_show); +EXPORT_SYMBOL_NS_GPL(hisi_cpumask_sysfs_show, HISI_PMU); static bool hisi_validate_event_group(struct perf_event *event) { @@ -110,7 +110,7 @@ int hisi_uncore_pmu_get_event_idx(struct perf_event *event) return idx; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_get_event_idx); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_get_event_idx, HISI_PMU); ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, struct device_attribute *attr, @@ -120,7 +120,7 @@ ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, return sysfs_emit(page, "0x%08x\n", hisi_pmu->identifier); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_identifier_attr_show); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_identifier_attr_show, HISI_PMU); static void hisi_uncore_pmu_clear_event_idx(struct hisi_pmu *hisi_pmu, int idx) { @@ -179,7 +179,7 @@ int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, return 0; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_init_irq); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_init_irq, HISI_PMU); int hisi_uncore_pmu_event_init(struct perf_event *event) { @@ -233,7 +233,7 @@ int hisi_uncore_pmu_event_init(struct perf_event *event) return 0; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_event_init); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_event_init, HISI_PMU); /* * Set the counter to count the event that we're interested in, @@ -287,7 +287,7 @@ void hisi_uncore_pmu_set_event_period(struct perf_event *event) /* Write start value to the hardware event counter */ hisi_pmu->ops->write_counter(hisi_pmu, hwc, val); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_set_event_period); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_set_event_period, HISI_PMU); void hisi_uncore_pmu_event_update(struct perf_event *event) { @@ -308,7 +308,7 @@ void hisi_uncore_pmu_event_update(struct perf_event *event) HISI_MAX_PERIOD(hisi_pmu->counter_bits); local64_add(delta, &event->count); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_event_update); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_event_update, HISI_PMU); void hisi_uncore_pmu_start(struct perf_event *event, int flags) { @@ -331,7 +331,7 @@ void hisi_uncore_pmu_start(struct perf_event *event, int flags) hisi_uncore_pmu_enable_event(event); perf_event_update_userpage(event); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_start); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_start, HISI_PMU); void hisi_uncore_pmu_stop(struct perf_event *event, int flags) { @@ -348,7 +348,7 @@ void hisi_uncore_pmu_stop(struct perf_event *event, int flags) hisi_uncore_pmu_event_update(event); hwc->state |= PERF_HES_UPTODATE; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_stop); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_stop, HISI_PMU); int hisi_uncore_pmu_add(struct perf_event *event, int flags) { @@ -371,7 +371,7 @@ int hisi_uncore_pmu_add(struct perf_event *event, int flags) return 0; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_add); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_add, HISI_PMU); void hisi_uncore_pmu_del(struct perf_event *event, int flags) { @@ -383,14 +383,14 @@ void hisi_uncore_pmu_del(struct perf_event *event, int flags) perf_event_update_userpage(event); hisi_pmu->pmu_events.hw_events[hwc->idx] = NULL; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_del); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_del, HISI_PMU); void hisi_uncore_pmu_read(struct perf_event *event) { /* Read hardware counter and update the perf counter statistics */ hisi_uncore_pmu_event_update(event); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_read); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_read, HISI_PMU); void hisi_uncore_pmu_enable(struct pmu *pmu) { @@ -403,7 +403,7 @@ void hisi_uncore_pmu_enable(struct pmu *pmu) hisi_pmu->ops->start_counters(hisi_pmu); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_enable); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_enable, HISI_PMU); void hisi_uncore_pmu_disable(struct pmu *pmu) { @@ -411,7 +411,7 @@ void hisi_uncore_pmu_disable(struct pmu *pmu) hisi_pmu->ops->stop_counters(hisi_pmu); } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_disable); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_disable, HISI_PMU); /* @@ -498,7 +498,7 @@ int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) return 0; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_online_cpu); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_online_cpu, HISI_PMU); int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) { @@ -531,7 +531,7 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) return 0; } -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_offline_cpu); +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_offline_cpu, HISI_PMU); void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module) { @@ -550,6 +550,6 @@ void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module) pmu->attr_groups = hisi_pmu->pmu_events.attr_groups; pmu->capabilities = PERF_PMU_CAP_NO_EXCLUDE; } -EXPORT_SYMBOL_GPL(hisi_pmu_init); +EXPORT_SYMBOL_NS_GPL(hisi_pmu_init, HISI_PMU); MODULE_LICENSE("GPL v2"); diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index e706ca5676764..84e80b0ba529c 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -508,6 +508,7 @@ static void __exit hisi_sllc_pmu_module_exit(void) } module_exit(hisi_sllc_pmu_module_exit); +MODULE_IMPORT_NS(HISI_PMU); MODULE_DESCRIPTION("HiSilicon SLLC uncore PMU driver"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Shaokun Zhang "); diff --git a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c index 481dcc9e8fbf8..9c724dab6b649 100644 --- a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c @@ -613,6 +613,7 @@ static void __exit hisi_uc_pmu_module_exit(void) } module_exit(hisi_uc_pmu_module_exit); +MODULE_IMPORT_NS(HISI_PMU); MODULE_DESCRIPTION("HiSilicon SoC UC uncore PMU driver"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Junhao He "); From 545e4bb7ae501ef0b66348090bcc302ac63dba8d Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 11 Apr 2025 19:13:52 +0800 Subject: [PATCH 20/95] drivers/perf: hisi: Don't update the associated_cpus on CPU offline [Upstream commit f2368a209a713267b68f7a4906a5012a29925410] Event will be scheduled on CPU of hisi_pmu::on_cpu which is selected from the intersection of hisi_pmu::associated_cpus and online CPUs. So the associated_cpus don't need to be maintained with online CPUs. This will save one update operation if one associated CPU is offlined. Signed-off-by: Yicong Yang Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- drivers/perf/hisilicon/hisi_uncore_pmu.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 3903284f13842..ee9b67b6b1423 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -507,9 +507,6 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) cpumask_t pmu_online_cpus; unsigned int target; - if (!cpumask_test_and_clear_cpu(cpu, &hisi_pmu->associated_cpus)) - return 0; - /* Nothing to do if this CPU doesn't own the PMU */ if (hisi_pmu->on_cpu != cpu) return 0; From 187c91f3987b62a13a587b41a0621f734c1f6d56 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 11 Apr 2025 19:13:53 +0800 Subject: [PATCH 21/95] drivers/perf: hisi: Migrate to one online CPU if no associated one online [Upstream commit 83037a47d3aa5f3e35b0c02433a87806e9c34438] If the selected CPU hisi_pmu::on_cpu goes offline, driver will select a new online CPU from hisi_pmu::associated_cpus, or if no online CPU found the PMU context won't be migrated. However for uncore PMUs the associated CPUs are just a peference and it also works to schedule the events on any online CPUs. So add a fallback to choose an online CPU if no associated CPUs found. Signed-off-by: Yicong Yang Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- drivers/perf/hisilicon/hisi_uncore_pmu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index ee9b67b6b1423..783ef4d21d951 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -518,6 +518,10 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) cpumask_and(&pmu_online_cpus, &hisi_pmu->associated_cpus, cpu_online_mask); target = cpumask_any_but(&pmu_online_cpus, cpu); + + if (target >= nr_cpu_ids) + target = cpumask_any_but(cpu_online_mask, cpu); + if (target >= nr_cpu_ids) return 0; From aaf9ac0dbdc13e27bcd5783d7f779907dd9cb54b Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 11 Apr 2025 19:13:54 +0800 Subject: [PATCH 22/95] drivers/perf: hisi: Refactor the detection of associated CPUs [Upstream commit 6cd137088fdf02488ab29d11c64f66ac650ec1ad] There are two type of PMUs supported currently: 1) PMUs locate on SCCL (Super CPU Cluster [1]), associated with certain CCL (CPU cluster [1])(e.g. L3C PMU) or not (e.g. DDRC PMU) 2) PMUs locate on the SICL (Super IO Cluster [1]), which has no association with certain CPU topology (e.g. CPA PMU) Currently the associated CPUs of the PMU is detected in the cpuhp online callback as below: - for type 1) the CPUs match PMU's sccl_id and ccl_id - for type 2) PMU's sccl_id is -1 and all online CPUs will be associated Since uncore PMUs are not bound to certain CPU context and event could be counting started by any online CPU, the associated CPUs are just a preference. Below disadvantages are observed in current implementation: - the PMU cannot be used if its associated CPUs are offline - SICL PMUs are associated to all the online CPUs implicitly without the consideration of locality So refactor the way we detect the associated CPUs in below aspects: - add a clear definition of hisi_pmu::associated_cpus - initialize hisi_pmu::on_cpu based on locality if no associated CPU found, otherwise update it from associated CPUs - drop the detection with a sccl_id of -1 for SICL PMUs [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/admin-guide/perf/hisi-pmu.rst?h=v6.12-rc1 Signed-off-by: Yicong Yang Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- drivers/perf/hisilicon/hisi_uncore_pmu.c | 22 +++++++++++++++------- drivers/perf/hisilicon/hisi_uncore_pmu.h | 6 +++++- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 783ef4d21d951..22ccd47a20c81 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -460,10 +460,6 @@ static bool hisi_pmu_cpu_is_associated_pmu(struct hisi_pmu *hisi_pmu) { int sccl_id, ccl_id; - /* If SCCL_ID is -1, the PMU is in a SICL and has no CPU affinity */ - if (hisi_pmu->sccl_id == -1) - return true; - if (hisi_pmu->ccl_id == -1) { /* If CCL_ID is -1, the PMU only shares the same SCCL */ hisi_read_sccl_and_ccl_id(&sccl_id, NULL); @@ -481,13 +477,25 @@ int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) struct hisi_pmu *hisi_pmu = hlist_entry_safe(node, struct hisi_pmu, node); - if (!hisi_pmu_cpu_is_associated_pmu(hisi_pmu)) + /* + * If the CPU is not associated to PMU, initialize the hisi_pmu->on_cpu + * based on the locality if it hasn't been initialized yet. For PMUs + * do have associated CPUs, it'll be updated later. + */ + if (!hisi_pmu_cpu_is_associated_pmu(hisi_pmu)) { + if (hisi_pmu->on_cpu != -1) + return 0; + + hisi_pmu->on_cpu = cpumask_local_spread(0, dev_to_node(hisi_pmu->dev)); + WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu))); return 0; + } cpumask_set_cpu(cpu, &hisi_pmu->associated_cpus); - /* If another CPU is already managing this PMU, simply return. */ - if (hisi_pmu->on_cpu != -1) + /* If another associated CPU is already managing this PMU, simply return. */ + if (hisi_pmu->on_cpu != -1 && + cpumask_test_cpu(hisi_pmu->on_cpu, &hisi_pmu->associated_cpus)) return 0; /* Use this CPU in cpumask for event counting */ diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 67d1c3d3a41c0..8290380c34c47 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -88,7 +88,11 @@ struct hisi_pmu { const struct hisi_uncore_ops *ops; const struct hisi_pmu_dev_info *dev_info; struct hisi_pmu_hwevents pmu_events; - /* associated_cpus: All CPUs associated with the PMU */ + /* + * CPUs associated to the PMU and are preferred to use for counting. + * Could be empty if PMU has no association (e.g. PMU on SICL), in + * which case any online CPU will be used. + */ cpumask_t associated_cpus; /* CPU used for counting */ int on_cpu; From 6b96639eb853a72e72cd4059e60dfb38e01bab23 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 11 Apr 2025 19:13:55 +0800 Subject: [PATCH 23/95] drivers/perf: hisi: Extract topology information to a separate structure [Upstream commit c192026ceea793a73d4b54ed46dc1cfeb21d3853] HiSilicon Uncore PMUs are identified by the IDs of the topology element on which the PMUs are located. Add a new separate struct hisi_pmu_toplogy to encapsulate this information. Add additional documentation on the meaning of each ID. - make sccl_id and sicl_id into a union since they're exclusive. It can also be accessed by scl_id if the SICL/SCCL distinction is not relevant - make index_id and sub_id signed so -1 may be used to indicate the PMU doesn't have this topology element or it could not be retrieved. This patch should have no functional changes. Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c | 12 +++--- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 18 ++++----- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 12 +++--- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 8 ++-- drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 14 +++---- drivers/perf/hisilicon/hisi_uncore_pmu.c | 7 ++-- drivers/perf/hisilicon/hisi_uncore_pmu.h | 38 +++++++++++++++---- drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 10 ++--- drivers/perf/hisilicon/hisi_uncore_uc_pmu.c | 11 +++--- 9 files changed, 78 insertions(+), 52 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c index 79e6093a35fb3..0154a3dfde991 100644 --- a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c @@ -181,19 +181,19 @@ static int hisi_cpa_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *cpa_pmu) { if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &cpa_pmu->sicl_id)) { + &cpa_pmu->topo.sicl_id)) { dev_err(&pdev->dev, "Can not read sicl-id\n"); return -EINVAL; } if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", - &cpa_pmu->index_id)) { + &cpa_pmu->topo.index_id)) { dev_err(&pdev->dev, "Cannot read idx-id\n"); return -EINVAL; } - cpa_pmu->ccl_id = -1; - cpa_pmu->sccl_id = -1; + cpa_pmu->topo.ccl_id = -1; + cpa_pmu->topo.sccl_id = -1; cpa_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(cpa_pmu->base)) return PTR_ERR(cpa_pmu->base); @@ -311,8 +311,8 @@ static int hisi_cpa_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_cpa%u", - cpa_pmu->sicl_id, cpa_pmu->index_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_cpa%d", + cpa_pmu->topo.sicl_id, cpa_pmu->topo.index_id); if (!name) return -ENOMEM; diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 424b81730f104..8f31721935dae 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -279,18 +279,18 @@ static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, * DDRC PMU, while SCCL_ID is in MPIDR[aff2]. */ if (device_property_read_u32(&pdev->dev, "hisilicon,ch-id", - &ddrc_pmu->index_id)) { + &ddrc_pmu->topo.index_id)) { dev_err(&pdev->dev, "Can not read ddrc channel-id!\n"); return -EINVAL; } if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &ddrc_pmu->sccl_id)) { + &ddrc_pmu->topo.sccl_id)) { dev_err(&pdev->dev, "Can not read ddrc sccl-id!\n"); return -EINVAL; } /* DDRC PMUs only share the same SCCL */ - ddrc_pmu->ccl_id = -1; + ddrc_pmu->topo.ccl_id = -1; ddrc_pmu->dev_info = device_get_match_data(&pdev->dev); if (!ddrc_pmu->dev_info) @@ -305,7 +305,7 @@ static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, ddrc_pmu->identifier = readl(ddrc_pmu->base + DDRC_VERSION); if (ddrc_pmu->identifier >= HISI_PMU_V2) { if (device_property_read_u32(&pdev->dev, "hisilicon,sub-id", - &ddrc_pmu->sub_id)) { + &ddrc_pmu->topo.sub_id)) { dev_err(&pdev->dev, "Can not read sub-id!\n"); return -EINVAL; } @@ -481,13 +481,13 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) if (ddrc_pmu->identifier >= HISI_PMU_V2) name = devm_kasprintf(&pdev->dev, GFP_KERNEL, - "hisi_sccl%u_ddrc%u_%u", - ddrc_pmu->sccl_id, ddrc_pmu->index_id, - ddrc_pmu->sub_id); + "hisi_sccl%d_ddrc%d_%d", + ddrc_pmu->topo.sccl_id, ddrc_pmu->topo.index_id, + ddrc_pmu->topo.sub_id); else name = devm_kasprintf(&pdev->dev, GFP_KERNEL, - "hisi_sccl%u_ddrc%u", ddrc_pmu->sccl_id, - ddrc_pmu->index_id); + "hisi_sccl%d_ddrc%d", ddrc_pmu->topo.sccl_id, + ddrc_pmu->topo.index_id); if (!name) return -ENOMEM; diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 14062246a3162..12f19745e472b 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -300,7 +300,7 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, * SCCL_ID is in MPIDR[aff2]. */ if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &hha_pmu->sccl_id)) { + &hha_pmu->topo.sccl_id)) { dev_err(&pdev->dev, "Can not read hha sccl-id!\n"); return -EINVAL; } @@ -310,7 +310,7 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, * both "hisilicon, idx-id" as preference, if available. */ if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", - &hha_pmu->index_id)) { + &hha_pmu->topo.index_id)) { status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev), "_UID", NULL, &id); if (ACPI_FAILURE(status)) { @@ -318,10 +318,10 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, return -EINVAL; } - hha_pmu->index_id = id; + hha_pmu->topo.index_id = id; } /* HHA PMUs only share the same SCCL */ - hha_pmu->ccl_id = -1; + hha_pmu->topo.ccl_id = -1; hha_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(hha_pmu->base)) { @@ -510,8 +510,8 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_hha%u", - hha_pmu->sccl_id, hha_pmu->index_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_hha%d", + hha_pmu->topo.sccl_id, hha_pmu->topo.index_id); if (!name) return -ENOMEM; diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 8b28498612767..0e26d10841bc2 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -360,13 +360,13 @@ static int hisi_l3c_pmu_init_data(struct platform_device *pdev, * SCCL_ID is in MPIDR[aff2] and CCL_ID is in MPIDR[aff1]. */ if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &l3c_pmu->sccl_id)) { + &l3c_pmu->topo.sccl_id)) { dev_err(&pdev->dev, "Can not read l3c sccl-id!\n"); return -EINVAL; } if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id", - &l3c_pmu->ccl_id)) { + &l3c_pmu->topo.ccl_id)) { dev_err(&pdev->dev, "Can not read l3c ccl-id!\n"); return -EINVAL; } @@ -544,8 +544,8 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u", - l3c_pmu->sccl_id, l3c_pmu->ccl_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_l3c%d", + l3c_pmu->topo.sccl_id, l3c_pmu->topo.ccl_id); if (!name) return -ENOMEM; diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index 0c1fa866dcded..b91bec0d7d628 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -274,19 +274,19 @@ static int hisi_pa_pmu_init_data(struct platform_device *pdev, * to identify the PA PMU. */ if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &pa_pmu->sicl_id)) { + &pa_pmu->topo.sicl_id)) { dev_err(&pdev->dev, "Cannot read sicl-id!\n"); return -EINVAL; } if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", - &pa_pmu->index_id)) { + &pa_pmu->topo.index_id)) { dev_err(&pdev->dev, "Cannot read idx-id!\n"); return -EINVAL; } - pa_pmu->ccl_id = -1; - pa_pmu->sccl_id = -1; + pa_pmu->topo.ccl_id = -1; + pa_pmu->topo.sccl_id = -1; pa_pmu->dev_info = device_get_match_data(&pdev->dev); if (!pa_pmu->dev_info) @@ -488,9 +488,9 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_%s%u", - pa_pmu->sicl_id, pa_pmu->dev_info->name, - pa_pmu->index_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%d_%s%d", + pa_pmu->topo.sicl_id, pa_pmu->dev_info->name, + pa_pmu->topo.index_id); if (!name) return -ENOMEM; diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 22ccd47a20c81..aa841a3595bce 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -458,18 +458,19 @@ static void hisi_read_sccl_and_ccl_id(int *scclp, int *cclp) */ static bool hisi_pmu_cpu_is_associated_pmu(struct hisi_pmu *hisi_pmu) { + struct hisi_pmu_topology *topo = &hisi_pmu->topo; int sccl_id, ccl_id; - if (hisi_pmu->ccl_id == -1) { + if (topo->ccl_id == -1) { /* If CCL_ID is -1, the PMU only shares the same SCCL */ hisi_read_sccl_and_ccl_id(&sccl_id, NULL); - return sccl_id == hisi_pmu->sccl_id; + return sccl_id == topo->sccl_id; } hisi_read_sccl_and_ccl_id(&sccl_id, &ccl_id); - return sccl_id == hisi_pmu->sccl_id && ccl_id == hisi_pmu->ccl_id; + return sccl_id == topo->sccl_id && ccl_id == topo->ccl_id; } int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 8290380c34c47..1fc436ccda5c7 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -82,12 +82,43 @@ struct hisi_pmu_hwevents { const struct attribute_group **attr_groups; }; +/** + * struct hisi_pmu_topology - Describe the topology hierarchy on which the PMU + * is located. + * @sccl_id: ID of the SCCL on which the PMU locate is located. + * @sicl_id: ID of the SICL on which the PMU locate is located. + * @scl_id: ID used by the core which is unaware of the SCCL/SICL. + * @ccl_id: ID of the CCL (CPU cluster) on which the PMU is located. + * @index_id: the ID of the PMU module if there're several PMUs at a + * particularly location in the topology. + * @sub_id: submodule ID of the PMU. For example we use this for DDRC PMU v2 + * since each DDRC has more than one DMC + * + * The ID will be -1 if the PMU isn't located on a certain topology. + */ +struct hisi_pmu_topology { + /* + * SCCL (Super CPU CLuster) and SICL (Super I/O Cluster) are parallel + * so a PMU cannot locate on a SCCL and a SICL. If the SCCL/SICL + * distinction is not relevant, use scl_id instead. + */ + union { + int sccl_id; + int sicl_id; + int scl_id; + }; + int ccl_id; + int index_id; + int sub_id; +}; + /* Generic pmu struct for different pmu types */ struct hisi_pmu { struct pmu pmu; const struct hisi_uncore_ops *ops; const struct hisi_pmu_dev_info *dev_info; struct hisi_pmu_hwevents pmu_events; + struct hisi_pmu_topology topo; /* * CPUs associated to the PMU and are preferred to use for counting. * Could be empty if PMU has no association (e.g. PMU on SICL), in @@ -99,14 +130,7 @@ struct hisi_pmu { int irq; struct device *dev; struct hlist_node node; - int sccl_id; - int sicl_id; - int ccl_id; void __iomem *base; - /* the ID of the PMU modules */ - u32 index_id; - /* For DDRC PMU v2: each DDRC has more than one DMC */ - u32 sub_id; int num_counters; int counter_bits; /* check event code range */ diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index 84e80b0ba529c..3b18e355cf7b8 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -293,19 +293,19 @@ static int hisi_sllc_pmu_init_data(struct platform_device *pdev, * while SCCL_ID is from MPIDR_EL1 by CPU. */ if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &sllc_pmu->sccl_id)) { + &sllc_pmu->topo.sccl_id)) { dev_err(&pdev->dev, "Cannot read sccl-id!\n"); return -EINVAL; } if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", - &sllc_pmu->index_id)) { + &sllc_pmu->topo.index_id)) { dev_err(&pdev->dev, "Cannot read idx-id!\n"); return -EINVAL; } /* SLLC PMUs only share the same SCCL */ - sllc_pmu->ccl_id = -1; + sllc_pmu->topo.ccl_id = -1; sllc_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(sllc_pmu->base)) { @@ -433,8 +433,8 @@ static int hisi_sllc_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_sllc%u", - sllc_pmu->sccl_id, sllc_pmu->index_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_sllc%d", + sllc_pmu->topo.sccl_id, sllc_pmu->topo.index_id); if (!name) return -ENOMEM; diff --git a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c index 9c724dab6b649..19a44498758f0 100644 --- a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c @@ -372,19 +372,19 @@ static int hisi_uc_pmu_init_data(struct platform_device *pdev, * They have some CCLs per SCCL and then 4 UC PMU per CCL. */ if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &uc_pmu->sccl_id)) { + &uc_pmu->topo.sccl_id)) { dev_err(&pdev->dev, "Can not read uc sccl-id!\n"); return -EINVAL; } if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id", - &uc_pmu->ccl_id)) { + &uc_pmu->topo.ccl_id)) { dev_err(&pdev->dev, "Can not read uc ccl-id!\n"); return -EINVAL; } if (device_property_read_u32(&pdev->dev, "hisilicon,sub-id", - &uc_pmu->sub_id)) { + &uc_pmu->topo.sub_id)) { dev_err(&pdev->dev, "Can not read sub-id!\n"); return -EINVAL; } @@ -538,8 +538,9 @@ static int hisi_uc_pmu_probe(struct platform_device *pdev) if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_uc%d_%u", - uc_pmu->sccl_id, uc_pmu->ccl_id, uc_pmu->sub_id); + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_uc%d_%d", + uc_pmu->topo.sccl_id, uc_pmu->topo.ccl_id, + uc_pmu->topo.sub_id); if (!name) return -ENOMEM; From c26479ee60189af78bda48d131976e1a7ee97705 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 11 Apr 2025 19:13:56 +0800 Subject: [PATCH 24/95] drivers/perf: hisi: Add a common function to retrieve topology from firmware [Upstream commit 32528b165ea1266ee25afe6a29be0107b3c5e76a] Currently each type of uncore PMU driver uses almost the same routine and the same firmware interface (or properties) to retrieve the topology information, then reset the unused IDs to -1. Extract the common parts to the framework in hisi_uncore_pmu_init_topology(). Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c | 10 +++---- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 10 +++---- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 10 +++---- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 8 ++--- drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 11 +++---- drivers/perf/hisilicon/hisi_uncore_pmu.c | 30 +++++++++++++++++++ drivers/perf/hisilicon/hisi_uncore_pmu.h | 1 + drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 11 +++---- drivers/perf/hisilicon/hisi_uncore_uc_pmu.c | 12 ++++---- 9 files changed, 60 insertions(+), 43 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c index 0154a3dfde991..ee314252e91a4 100644 --- a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c @@ -180,20 +180,18 @@ MODULE_DEVICE_TABLE(acpi, hisi_cpa_pmu_acpi_match); static int hisi_cpa_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *cpa_pmu) { - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &cpa_pmu->topo.sicl_id)) { + hisi_uncore_pmu_init_topology(cpa_pmu, &pdev->dev); + + if (cpa_pmu->topo.sicl_id < 0) { dev_err(&pdev->dev, "Can not read sicl-id\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", - &cpa_pmu->topo.index_id)) { + if (cpa_pmu->topo.index_id < 0) { dev_err(&pdev->dev, "Cannot read idx-id\n"); return -EINVAL; } - cpa_pmu->topo.ccl_id = -1; - cpa_pmu->topo.sccl_id = -1; cpa_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(cpa_pmu->base)) return PTR_ERR(cpa_pmu->base); diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 8f31721935dae..4a19cb97dce2b 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -274,6 +274,8 @@ static void hisi_ddrc_pmu_clear_int_status(struct hisi_pmu *ddrc_pmu, static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *ddrc_pmu) { + hisi_uncore_pmu_init_topology(ddrc_pmu, &pdev->dev); + /* * Use the SCCL_ID and DDRC channel ID to identify the * DDRC PMU, while SCCL_ID is in MPIDR[aff2]. @@ -284,13 +286,10 @@ static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &ddrc_pmu->topo.sccl_id)) { + if (ddrc_pmu->topo.sccl_id < 0) { dev_err(&pdev->dev, "Can not read ddrc sccl-id!\n"); return -EINVAL; } - /* DDRC PMUs only share the same SCCL */ - ddrc_pmu->topo.ccl_id = -1; ddrc_pmu->dev_info = device_get_match_data(&pdev->dev); if (!ddrc_pmu->dev_info) @@ -304,8 +303,7 @@ static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, ddrc_pmu->identifier = readl(ddrc_pmu->base + DDRC_VERSION); if (ddrc_pmu->identifier >= HISI_PMU_V2) { - if (device_property_read_u32(&pdev->dev, "hisilicon,sub-id", - &ddrc_pmu->topo.sub_id)) { + if (ddrc_pmu->topo.sub_id < 0) { dev_err(&pdev->dev, "Can not read sub-id!\n"); return -EINVAL; } diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 12f19745e472b..8d2c0f8088759 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -295,12 +295,13 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, unsigned long long id; acpi_status status; + hisi_uncore_pmu_init_topology(hha_pmu, &pdev->dev); + /* * Use SCCL_ID and UID to identify the HHA PMU, while * SCCL_ID is in MPIDR[aff2]. */ - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &hha_pmu->topo.sccl_id)) { + if (hha_pmu->topo.sccl_id < 0) { dev_err(&pdev->dev, "Can not read hha sccl-id!\n"); return -EINVAL; } @@ -309,8 +310,7 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, * Early versions of BIOS support _UID by mistake, so we support * both "hisilicon, idx-id" as preference, if available. */ - if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", - &hha_pmu->topo.index_id)) { + if (hha_pmu->topo.index_id < 0) { status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev), "_UID", NULL, &id); if (ACPI_FAILURE(status)) { @@ -320,8 +320,6 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, hha_pmu->topo.index_id = id; } - /* HHA PMUs only share the same SCCL */ - hha_pmu->topo.ccl_id = -1; hha_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(hha_pmu->base)) { diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 0e26d10841bc2..9be03dbbdc5ef 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -355,18 +355,18 @@ MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match); static int hisi_l3c_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *l3c_pmu) { + hisi_uncore_pmu_init_topology(l3c_pmu, &pdev->dev); + /* * Use the SCCL_ID and CCL_ID to identify the L3C PMU, while * SCCL_ID is in MPIDR[aff2] and CCL_ID is in MPIDR[aff1]. */ - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &l3c_pmu->topo.sccl_id)) { + if (l3c_pmu->topo.sccl_id < 0) { dev_err(&pdev->dev, "Can not read l3c sccl-id!\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id", - &l3c_pmu->topo.ccl_id)) { + if (l3c_pmu->topo.ccl_id < 0) { dev_err(&pdev->dev, "Can not read l3c ccl-id!\n"); return -EINVAL; } diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index b91bec0d7d628..fd9dc6ac306ab 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -269,25 +269,22 @@ static void hisi_pa_pmu_clear_int_status(struct hisi_pmu *pa_pmu, int idx) static int hisi_pa_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *pa_pmu) { + hisi_uncore_pmu_init_topology(pa_pmu, &pdev->dev); + /* * As PA PMU is in a SICL, use the SICL_ID and the index ID * to identify the PA PMU. */ - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &pa_pmu->topo.sicl_id)) { + if (pa_pmu->topo.sicl_id < 0) { dev_err(&pdev->dev, "Cannot read sicl-id!\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", - &pa_pmu->topo.index_id)) { + if (pa_pmu->topo.index_id < 0) { dev_err(&pdev->dev, "Cannot read idx-id!\n"); return -EINVAL; } - pa_pmu->topo.ccl_id = -1; - pa_pmu->topo.sccl_id = -1; - pa_pmu->dev_info = device_get_match_data(&pdev->dev); if (!pa_pmu->dev_info) return -ENODEV; diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index aa841a3595bce..dbe0227622ca1 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -543,6 +544,35 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) } EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_offline_cpu, HISI_PMU); +/* + * Retrieve the topology information from the firmware for the hisi_pmu device. + * The topology ID will be -1 if we cannot initialize it, it may either due to + * the PMU doesn't locate on this certain topology or the firmware needs to be + * fixed. + */ +void hisi_uncore_pmu_init_topology(struct hisi_pmu *hisi_pmu, struct device *dev) +{ + struct hisi_pmu_topology *topo = &hisi_pmu->topo; + + topo->sccl_id = -1; + topo->ccl_id = -1; + topo->index_id = -1; + topo->sub_id = -1; + + if (device_property_read_u32(dev, "hisilicon,scl-id", &topo->sccl_id)) + dev_dbg(dev, "no scl-id present\n"); + + if (device_property_read_u32(dev, "hisilicon,ccl-id", &topo->ccl_id)) + dev_dbg(dev, "no ccl-id present\n"); + + if (device_property_read_u32(dev, "hisilicon,idx-id", &topo->index_id)) + dev_dbg(dev, "no idx-id present\n"); + + if (device_property_read_u32(dev, "hisilicon,sub-id", &topo->sub_id)) + dev_dbg(dev, "no sub-id present\n"); +} +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_init_topology, HISI_PMU); + void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module) { struct pmu *pmu = &hisi_pmu->pmu; diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 1fc436ccda5c7..8fc5e42a38031 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -163,6 +163,7 @@ ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, char *page); int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, struct platform_device *pdev); +void hisi_uncore_pmu_init_topology(struct hisi_pmu *hisi_pmu, struct device *dev); void hisi_pmu_init(struct hisi_pmu *hisi_pmu, struct module *module); #endif /* __HISI_UNCORE_PMU_H__ */ diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index 3b18e355cf7b8..7d77c21326f01 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -288,25 +288,22 @@ MODULE_DEVICE_TABLE(acpi, hisi_sllc_pmu_acpi_match); static int hisi_sllc_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *sllc_pmu) { + hisi_uncore_pmu_init_topology(sllc_pmu, &pdev->dev); + /* * Use the SCCL_ID and the index ID to identify the SLLC PMU, * while SCCL_ID is from MPIDR_EL1 by CPU. */ - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &sllc_pmu->topo.sccl_id)) { + if (sllc_pmu->topo.sccl_id < 0) { dev_err(&pdev->dev, "Cannot read sccl-id!\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", - &sllc_pmu->topo.index_id)) { + if (sllc_pmu->topo.index_id < 0) { dev_err(&pdev->dev, "Cannot read idx-id!\n"); return -EINVAL; } - /* SLLC PMUs only share the same SCCL */ - sllc_pmu->topo.ccl_id = -1; - sllc_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(sllc_pmu->base)) { dev_err(&pdev->dev, "ioremap failed for sllc_pmu resource.\n"); diff --git a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c index 19a44498758f0..2df8976ab4be9 100644 --- a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c @@ -11,7 +11,6 @@ #include #include #include -#include #include "hisi_uncore_pmu.h" @@ -366,25 +365,24 @@ static void hisi_uc_pmu_clear_int_status(struct hisi_pmu *uc_pmu, int idx) static int hisi_uc_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *uc_pmu) { + hisi_uncore_pmu_init_topology(uc_pmu, &pdev->dev); + /* * Use SCCL (Super CPU Cluster) ID and CCL (CPU Cluster) ID to * identify the topology information of UC PMU devices in the chip. * They have some CCLs per SCCL and then 4 UC PMU per CCL. */ - if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", - &uc_pmu->topo.sccl_id)) { + if (uc_pmu->topo.sccl_id < 0) { dev_err(&pdev->dev, "Can not read uc sccl-id!\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id", - &uc_pmu->topo.ccl_id)) { + if (uc_pmu->topo.ccl_id < 0) { dev_err(&pdev->dev, "Can not read uc ccl-id!\n"); return -EINVAL; } - if (device_property_read_u32(&pdev->dev, "hisilicon,sub-id", - &uc_pmu->topo.sub_id)) { + if (uc_pmu->topo.sub_id < 0) { dev_err(&pdev->dev, "Can not read sub-id!\n"); return -EINVAL; } From f08e39965ca7742b8175dcc30e76917d51adf277 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 11 Apr 2025 19:13:57 +0800 Subject: [PATCH 25/95] drivers/perf: hisi: Provide a generic implementation of cpumask/identifier [Upstream commit 8688c01e313d542124fae82e82c8d6d5c073899f] Each type of HiSilicon Uncore PMU has the following sysfs attributes: - format: bitmask in perf_event_attr::config[012] of corresponding attribute - event: events name and corresponding event code - cpumask: range of CPUs the events can be opened on - identifier: the version of this PMU Different types of PMU have different implementations of the "format" and "event" but all share the same implementation of the "cpumask" and "identifier". Thus we can move cpumask and identifier to the hisi_uncore_pmu framework and drivers can use the generic implementation. Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c | 27 +---------- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 31 ++---------- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 31 ++---------- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 31 ++---------- drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 35 +++----------- drivers/perf/hisilicon/hisi_uncore_pmu.c | 47 ++++++++++++++----- drivers/perf/hisilicon/hisi_uncore_pmu.h | 4 ++ drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 27 +---------- drivers/perf/hisilicon/hisi_uncore_uc_pmu.c | 27 +---------- 9 files changed, 64 insertions(+), 196 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c index ee314252e91a4..006afb4d12085 100644 --- a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c @@ -225,34 +225,11 @@ static const struct attribute_group hisi_cpa_pmu_events_group = { .attrs = hisi_cpa_pmu_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_cpa_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL -}; - -static const struct attribute_group hisi_cpa_pmu_cpumask_attr_group = { - .attrs = hisi_cpa_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_cpa_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_cpa_pmu_identifier_attrs[] = { - &hisi_cpa_pmu_identifier_attr.attr, - NULL -}; - -static const struct attribute_group hisi_cpa_pmu_identifier_group = { - .attrs = hisi_cpa_pmu_identifier_attrs, -}; - static const struct attribute_group *hisi_cpa_pmu_attr_groups[] = { &hisi_cpa_pmu_format_group, &hisi_cpa_pmu_events_group, - &hisi_cpa_pmu_cpumask_attr_group, - &hisi_cpa_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 4a19cb97dce2b..33e789154242c 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -361,42 +361,19 @@ static const struct attribute_group hisi_ddrc_pmu_v2_events_group = { .attrs = hisi_ddrc_pmu_v2_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_ddrc_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL, -}; - -static const struct attribute_group hisi_ddrc_pmu_cpumask_attr_group = { - .attrs = hisi_ddrc_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_ddrc_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_ddrc_pmu_identifier_attrs[] = { - &hisi_ddrc_pmu_identifier_attr.attr, - NULL -}; - -static const struct attribute_group hisi_ddrc_pmu_identifier_group = { - .attrs = hisi_ddrc_pmu_identifier_attrs, -}; - static const struct attribute_group *hisi_ddrc_pmu_v1_attr_groups[] = { &hisi_ddrc_pmu_v1_format_group, &hisi_ddrc_pmu_v1_events_group, - &hisi_ddrc_pmu_cpumask_attr_group, - &hisi_ddrc_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL, }; static const struct attribute_group *hisi_ddrc_pmu_v2_attr_groups[] = { &hisi_ddrc_pmu_v2_format_group, &hisi_ddrc_pmu_v2_events_group, - &hisi_ddrc_pmu_cpumask_attr_group, - &hisi_ddrc_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 8d2c0f8088759..cb928b450b97b 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -405,42 +405,19 @@ static const struct attribute_group hisi_hha_pmu_v2_events_group = { .attrs = hisi_hha_pmu_v2_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_hha_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL, -}; - -static const struct attribute_group hisi_hha_pmu_cpumask_attr_group = { - .attrs = hisi_hha_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_hha_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_hha_pmu_identifier_attrs[] = { - &hisi_hha_pmu_identifier_attr.attr, - NULL -}; - -static const struct attribute_group hisi_hha_pmu_identifier_group = { - .attrs = hisi_hha_pmu_identifier_attrs, -}; - static const struct attribute_group *hisi_hha_pmu_v1_attr_groups[] = { &hisi_hha_pmu_v1_format_group, &hisi_hha_pmu_v1_events_group, - &hisi_hha_pmu_cpumask_attr_group, - &hisi_hha_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL, }; static const struct attribute_group *hisi_hha_pmu_v2_attr_groups[] = { &hisi_hha_pmu_v2_format_group, &hisi_hha_pmu_v2_events_group, - &hisi_hha_pmu_cpumask_attr_group, - &hisi_hha_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 9be03dbbdc5ef..4c74eb88e8f0e 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -441,42 +441,19 @@ static const struct attribute_group hisi_l3c_pmu_v2_events_group = { .attrs = hisi_l3c_pmu_v2_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_l3c_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL, -}; - -static const struct attribute_group hisi_l3c_pmu_cpumask_attr_group = { - .attrs = hisi_l3c_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_l3c_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_l3c_pmu_identifier_attrs[] = { - &hisi_l3c_pmu_identifier_attr.attr, - NULL -}; - -static const struct attribute_group hisi_l3c_pmu_identifier_group = { - .attrs = hisi_l3c_pmu_identifier_attrs, -}; - static const struct attribute_group *hisi_l3c_pmu_v1_attr_groups[] = { &hisi_l3c_pmu_v1_format_group, &hisi_l3c_pmu_v1_events_group, - &hisi_l3c_pmu_cpumask_attr_group, - &hisi_l3c_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL, }; static const struct attribute_group *hisi_l3c_pmu_v2_attr_groups[] = { &hisi_l3c_pmu_v2_format_group, &hisi_l3c_pmu_v2_events_group, - &hisi_l3c_pmu_cpumask_attr_group, - &hisi_l3c_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index fd9dc6ac306ab..b78515ab7556e 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -353,29 +353,6 @@ static const struct attribute_group hisi_h60pa_pmu_events_group = { .attrs = hisi_h60pa_pmu_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_pa_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL -}; - -static const struct attribute_group hisi_pa_pmu_cpumask_attr_group = { - .attrs = hisi_pa_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_pa_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_pa_pmu_identifier_attrs[] = { - &hisi_pa_pmu_identifier_attr.attr, - NULL -}; - -static const struct attribute_group hisi_pa_pmu_identifier_group = { - .attrs = hisi_pa_pmu_identifier_attrs, -}; - static struct hisi_pa_pmu_int_regs hisi_pa_pmu_regs = { .mask_offset = PA_INT_MASK, .clear_offset = PA_INT_CLEAR, @@ -385,8 +362,8 @@ static struct hisi_pa_pmu_int_regs hisi_pa_pmu_regs = { static const struct attribute_group *hisi_pa_pmu_v2_attr_groups[] = { &hisi_pa_pmu_v2_format_group, &hisi_pa_pmu_v2_events_group, - &hisi_pa_pmu_cpumask_attr_group, - &hisi_pa_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -399,8 +376,8 @@ static const struct hisi_pmu_dev_info hisi_h32pa_v2 = { static const struct attribute_group *hisi_pa_pmu_v3_attr_groups[] = { &hisi_pa_pmu_v2_format_group, &hisi_pa_pmu_v3_events_group, - &hisi_pa_pmu_cpumask_attr_group, - &hisi_pa_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; @@ -419,8 +396,8 @@ static struct hisi_pa_pmu_int_regs hisi_h60pa_pmu_regs = { static const struct attribute_group *hisi_h60pa_pmu_attr_groups[] = { &hisi_pa_pmu_v2_format_group, &hisi_h60pa_pmu_events_group, - &hisi_pa_pmu_cpumask_attr_group, - &hisi_pa_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index dbe0227622ca1..5e5d9b3269042 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -35,7 +35,7 @@ ssize_t hisi_format_sysfs_show(struct device *dev, return sysfs_emit(buf, "%s\n", (char *)eattr->var); } -EXPORT_SYMBOL_GPL(hisi_format_sysfs_show); +EXPORT_SYMBOL_NS_GPL(hisi_format_sysfs_show, HISI_PMU); /* * PMU event attributes @@ -63,6 +63,41 @@ ssize_t hisi_cpumask_sysfs_show(struct device *dev, } EXPORT_SYMBOL_NS_GPL(hisi_cpumask_sysfs_show, HISI_PMU); +static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); + +static struct attribute *hisi_pmu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL +}; + +const struct attribute_group hisi_pmu_cpumask_attr_group = { + .attrs = hisi_pmu_cpumask_attrs, +}; +EXPORT_SYMBOL_NS_GPL(hisi_pmu_cpumask_attr_group, HISI_PMU); + +ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev)); + + return sysfs_emit(page, "0x%08x\n", hisi_pmu->identifier); +} +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_identifier_attr_show, HISI_PMU); + +static struct device_attribute hisi_pmu_identifier_attr = + __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); + +static struct attribute *hisi_pmu_identifier_attrs[] = { + &hisi_pmu_identifier_attr.attr, + NULL +}; + +const struct attribute_group hisi_pmu_identifier_group = { + .attrs = hisi_pmu_identifier_attrs, +}; +EXPORT_SYMBOL_NS_GPL(hisi_pmu_identifier_group, HISI_PMU); + static bool hisi_validate_event_group(struct perf_event *event) { struct perf_event *sibling, *leader = event->group_leader; @@ -113,16 +148,6 @@ int hisi_uncore_pmu_get_event_idx(struct perf_event *event) } EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_get_event_idx, HISI_PMU); -ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, - struct device_attribute *attr, - char *page) -{ - struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev)); - - return sysfs_emit(page, "0x%08x\n", hisi_pmu->identifier); -} -EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_identifier_attr_show, HISI_PMU); - static void hisi_uncore_pmu_clear_event_idx(struct hisi_pmu *hisi_pmu, int idx) { clear_bit(idx, hisi_pmu->pmu_events.used_mask); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 8fc5e42a38031..6020fc1a4616d 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -138,6 +138,10 @@ struct hisi_pmu { u32 identifier; }; +/* Generic implementation of cpumask/identifier group */ +extern const struct attribute_group hisi_pmu_cpumask_attr_group; +extern const struct attribute_group hisi_pmu_identifier_group; + int hisi_uncore_pmu_get_event_idx(struct perf_event *event); void hisi_uncore_pmu_read(struct perf_event *event); int hisi_uncore_pmu_add(struct perf_event *event, int flags); diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index 7d77c21326f01..e99a5b32bd92f 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -344,34 +344,11 @@ static const struct attribute_group hisi_sllc_pmu_v2_events_group = { .attrs = hisi_sllc_pmu_v2_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_sllc_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL -}; - -static const struct attribute_group hisi_sllc_pmu_cpumask_attr_group = { - .attrs = hisi_sllc_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_sllc_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_sllc_pmu_identifier_attrs[] = { - &hisi_sllc_pmu_identifier_attr.attr, - NULL -}; - -static const struct attribute_group hisi_sllc_pmu_identifier_group = { - .attrs = hisi_sllc_pmu_identifier_attrs, -}; - static const struct attribute_group *hisi_sllc_pmu_v2_attr_groups[] = { &hisi_sllc_pmu_v2_format_group, &hisi_sllc_pmu_v2_events_group, - &hisi_sllc_pmu_cpumask_attr_group, - &hisi_sllc_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; diff --git a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c index 2df8976ab4be9..f90f752f32dd8 100644 --- a/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_uc_pmu.c @@ -437,34 +437,11 @@ static const struct attribute_group hisi_uc_pmu_events_group = { .attrs = hisi_uc_pmu_events_attr, }; -static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); - -static struct attribute *hisi_uc_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL, -}; - -static const struct attribute_group hisi_uc_pmu_cpumask_attr_group = { - .attrs = hisi_uc_pmu_cpumask_attrs, -}; - -static struct device_attribute hisi_uc_pmu_identifier_attr = - __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); - -static struct attribute *hisi_uc_pmu_identifier_attrs[] = { - &hisi_uc_pmu_identifier_attr.attr, - NULL -}; - -static const struct attribute_group hisi_uc_pmu_identifier_group = { - .attrs = hisi_uc_pmu_identifier_attrs, -}; - static const struct attribute_group *hisi_uc_pmu_attr_groups[] = { &hisi_uc_pmu_format_group, &hisi_uc_pmu_events_group, - &hisi_uc_pmu_cpumask_attr_group, - &hisi_uc_pmu_identifier_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, NULL }; From c2f8873529008d81073c9d245bf9cad58d18b52a Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 11 Apr 2025 19:13:58 +0800 Subject: [PATCH 26/95] drivers/perf: hisi: Export associated CPUs of each PMU through sysfs [Upstream commit 3b051bb7cb4344d12b9b9b4974c77706462d4246] Although the event of the uncore PMU can only be opened on a single CPU, some PMU does have the affinity on a range of CPUs. For example the L3C PMU is associated to the CPUs sharing the L3T it monitors. Users may infer this affinity by the PMU name which may have SCCL ID and CCL ID encoded (for L3C etc), but it's not that straightforward. So export this information by adding an "associated_cpus" sysfs attribute then user can get this directly. Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- Documentation/admin-guide/perf/hisi-pmu.rst | 5 ++++- drivers/perf/hisilicon/hisi_uncore_pmu.c | 10 ++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/perf/hisi-pmu.rst b/Documentation/admin-guide/perf/hisi-pmu.rst index e0174d20809a1..adb67311c8113 100644 --- a/Documentation/admin-guide/perf/hisi-pmu.rst +++ b/Documentation/admin-guide/perf/hisi-pmu.rst @@ -36,7 +36,10 @@ e.g. hisi_sccl1_hha0/rx_operations is RX_OPERATIONS event of HHA index #0 in SCCL ID #1. The driver also provides a "cpumask" sysfs attribute, which shows the CPU core -ID used to count the uncore PMU event. +ID used to count the uncore PMU event. An "associated_cpus" sysfs attribute is +also provided to show the CPUs associated with this PMU. The "cpumask" indicates +the CPUs to open the events, usually as a hint for userspaces tools like perf. +It only contains one associated CPU from the "associated_cpus". Example usage of perf:: diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 5e5d9b3269042..94e122605b8c6 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -65,8 +65,18 @@ EXPORT_SYMBOL_NS_GPL(hisi_cpumask_sysfs_show, HISI_PMU); static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); +static ssize_t hisi_associated_cpus_sysfs_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev)); + + return cpumap_print_to_pagebuf(true, buf, &hisi_pmu->associated_cpus); +} +static DEVICE_ATTR(associated_cpus, 0444, hisi_associated_cpus_sysfs_show, NULL); + static struct attribute *hisi_pmu_cpumask_attrs[] = { &dev_attr_cpumask.attr, + &dev_attr_associated_cpus.attr, NULL }; From f92e179401e33f5209da2a6ba76a6e9dc110e6bb Mon Sep 17 00:00:00 2001 From: Junhao He Date: Fri, 11 Apr 2025 19:13:59 +0800 Subject: [PATCH 27/95] drivers/perf: hisi: Fix incorrect variable name "hha_pmu" in DDRC PMU driver [Upstream commit 4e15bcffa19acf15b6acb2cb3f4a1dd923ee4708] In the callback function write_evtype(), the variable name of struct hisi_pmu should be "ddrc_pmu" instead of "hha_pmu". Signed-off-by: Junhao He Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 33e789154242c..7d2dca1f7c654 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -122,14 +122,14 @@ static void hisi_ddrc_pmu_v2_write_counter(struct hisi_pmu *ddrc_pmu, * so there is no need to write event type, while it is programmable counter in * PMU v2. */ -static void hisi_ddrc_pmu_write_evtype(struct hisi_pmu *hha_pmu, int idx, +static void hisi_ddrc_pmu_write_evtype(struct hisi_pmu *ddrc_pmu, int idx, u32 type) { u32 offset; - if (hha_pmu->identifier >= HISI_PMU_V2) { + if (ddrc_pmu->identifier >= HISI_PMU_V2) { offset = DDRC_V2_EVENT_TYPE + 4 * idx; - writel(type, hha_pmu->base + offset); + writel(type, ddrc_pmu->base + offset); } } From 5cb94faa176f965c58c1ef020e56bccbd5642d4c Mon Sep 17 00:00:00 2001 From: Junhao He Date: Fri, 11 Apr 2025 19:14:00 +0800 Subject: [PATCH 28/95] drivers/perf: hisi: Delete redundant blank line of DDRC PMU [Upstream commit f03241fbebdf47b9b435752f7e72d3f1e96e4529] Do not add blank line at the end of a code block defined by braces. Signed-off-by: Junhao He Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: Slim6882 Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 7d2dca1f7c654..062a2938640a0 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -575,7 +575,6 @@ static void __exit hisi_ddrc_pmu_module_exit(void) { platform_driver_unregister(&hisi_ddrc_pmu_driver); cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE); - } module_exit(hisi_ddrc_pmu_module_exit); From 65dc8b8581313c06bc7e1e985f7fe344a01e0d70 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Fri, 11 Apr 2025 19:14:01 +0800 Subject: [PATCH 29/95] drivers/perf: hisi: Simplify the probe process for each DDRC version [Upstream commit dc86791ff68c38a2954c3bf2c444b6d6d9da52f3] Version 1 and 2 of DDRC PMU also use different HID. Make use of struct acpi_device_id::driver_data for version specific information rather than judge the version register. This will help to simplify the probe process and also a bit easier for extension. In order to support this extend struct hisi_pmu_dev_info for version specific counter bits and event range. Signed-off-by: Junhao He Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 232 +++++++----------- drivers/perf/hisilicon/hisi_uncore_pmu.h | 2 + 2 files changed, 84 insertions(+), 150 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 062a2938640a0..c51738c7f6a49 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -43,11 +43,6 @@ #define DDRC_V2_EVENT_TYPE 0xe74 #define DDRC_V2_PERF_CTRL 0xeA0 -/* DDRC interrupt registers definition in v3 */ -#define DDRC_V3_INT_MASK 0x534 -#define DDRC_V3_INT_STATUS 0x538 -#define DDRC_V3_INT_CLEAR 0x53C - /* DDRC has 8-counters */ #define DDRC_NR_COUNTERS 0x8 #define DDRC_V1_PERF_CTRL_EN 0x2 @@ -55,6 +50,10 @@ #define DDRC_V1_NR_EVENTS 0x7 #define DDRC_V2_NR_EVENTS 0x90 +#define DDRC_EVENT_CNTn(base, n) ((base) + (n) * 8) +#define DDRC_EVENT_TYPEn(base, n) ((base) + (n) * 4) +#define DDRC_UNIMPLEMENTED_REG GENMASK(31, 0) + /* * For PMU v1, there are eight-events and every event has been mapped * to fixed-purpose counters which register offset is not consistent. @@ -69,52 +68,36 @@ static const u32 ddrc_reg_off[] = { }; struct hisi_ddrc_pmu_regs { + u32 event_cnt; + u32 event_ctrl; + u32 event_type; + u32 perf_ctrl; + u32 perf_ctrl_en; u32 int_mask; u32 int_clear; u32 int_status; }; -/* - * Select the counter register offset using the counter index. - * In PMU v1, there are no programmable counter, the count - * is read form the statistics counter register itself. - */ -static u32 hisi_ddrc_pmu_v1_get_counter_offset(int cntr_idx) +static u64 hisi_ddrc_pmu_read_counter(struct hisi_pmu *ddrc_pmu, + struct hw_perf_event *hwc) { - return ddrc_reg_off[cntr_idx]; -} + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; -static u32 hisi_ddrc_pmu_v2_get_counter_offset(int cntr_idx) -{ - return DDRC_V2_EVENT_CNT + cntr_idx * 8; -} + if (regs->event_cnt == DDRC_UNIMPLEMENTED_REG) + return readl(ddrc_pmu->base + ddrc_reg_off[hwc->idx]); -static u64 hisi_ddrc_pmu_v1_read_counter(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) -{ - return readl(ddrc_pmu->base + - hisi_ddrc_pmu_v1_get_counter_offset(hwc->idx)); + return readq(ddrc_pmu->base + DDRC_EVENT_CNTn(regs->event_cnt, hwc->idx)); } -static void hisi_ddrc_pmu_v1_write_counter(struct hisi_pmu *ddrc_pmu, +static void hisi_ddrc_pmu_write_counter(struct hisi_pmu *ddrc_pmu, struct hw_perf_event *hwc, u64 val) { - writel((u32)val, - ddrc_pmu->base + hisi_ddrc_pmu_v1_get_counter_offset(hwc->idx)); -} - -static u64 hisi_ddrc_pmu_v2_read_counter(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) -{ - return readq(ddrc_pmu->base + - hisi_ddrc_pmu_v2_get_counter_offset(hwc->idx)); -} + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; -static void hisi_ddrc_pmu_v2_write_counter(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc, u64 val) -{ - writeq(val, - ddrc_pmu->base + hisi_ddrc_pmu_v2_get_counter_offset(hwc->idx)); + if (regs->event_cnt == DDRC_UNIMPLEMENTED_REG) + writel((u32)val, ddrc_pmu->base + ddrc_reg_off[hwc->idx]); + else + writeq(val, ddrc_pmu->base + DDRC_EVENT_CNTn(regs->event_cnt, hwc->idx)); } /* @@ -125,54 +108,12 @@ static void hisi_ddrc_pmu_v2_write_counter(struct hisi_pmu *ddrc_pmu, static void hisi_ddrc_pmu_write_evtype(struct hisi_pmu *ddrc_pmu, int idx, u32 type) { - u32 offset; - - if (ddrc_pmu->identifier >= HISI_PMU_V2) { - offset = DDRC_V2_EVENT_TYPE + 4 * idx; - writel(type, ddrc_pmu->base + offset); - } -} - -static void hisi_ddrc_pmu_v1_start_counters(struct hisi_pmu *ddrc_pmu) -{ - u32 val; - - /* Set perf_enable in DDRC_PERF_CTRL to start event counting */ - val = readl(ddrc_pmu->base + DDRC_PERF_CTRL); - val |= DDRC_V1_PERF_CTRL_EN; - writel(val, ddrc_pmu->base + DDRC_PERF_CTRL); -} - -static void hisi_ddrc_pmu_v1_stop_counters(struct hisi_pmu *ddrc_pmu) -{ - u32 val; - - /* Clear perf_enable in DDRC_PERF_CTRL to stop event counting */ - val = readl(ddrc_pmu->base + DDRC_PERF_CTRL); - val &= ~DDRC_V1_PERF_CTRL_EN; - writel(val, ddrc_pmu->base + DDRC_PERF_CTRL); -} - -static void hisi_ddrc_pmu_v1_enable_counter(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) -{ - u32 val; + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; - /* Set counter index(event code) in DDRC_EVENT_CTRL register */ - val = readl(ddrc_pmu->base + DDRC_EVENT_CTRL); - val |= (1 << GET_DDRC_EVENTID(hwc)); - writel(val, ddrc_pmu->base + DDRC_EVENT_CTRL); -} + if (regs->event_type == DDRC_UNIMPLEMENTED_REG) + return; -static void hisi_ddrc_pmu_v1_disable_counter(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) -{ - u32 val; - - /* Clear counter index(event code) in DDRC_EVENT_CTRL register */ - val = readl(ddrc_pmu->base + DDRC_EVENT_CTRL); - val &= ~(1 << GET_DDRC_EVENTID(hwc)); - writel(val, ddrc_pmu->base + DDRC_EVENT_CTRL); + writel(type, ddrc_pmu->base + DDRC_EVENT_TYPEn(regs->event_type, idx)); } static int hisi_ddrc_pmu_v1_get_event_idx(struct perf_event *event) @@ -191,47 +132,58 @@ static int hisi_ddrc_pmu_v1_get_event_idx(struct perf_event *event) return idx; } -static int hisi_ddrc_pmu_v2_get_event_idx(struct perf_event *event) +static int hisi_ddrc_pmu_get_event_idx(struct perf_event *event) { + struct hisi_pmu *ddrc_pmu = to_hisi_pmu(event->pmu); + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; + + if (regs->event_type == DDRC_UNIMPLEMENTED_REG) + return hisi_ddrc_pmu_v1_get_event_idx(event); + return hisi_uncore_pmu_get_event_idx(event); } -static void hisi_ddrc_pmu_v2_start_counters(struct hisi_pmu *ddrc_pmu) +static void hisi_ddrc_pmu_start_counters(struct hisi_pmu *ddrc_pmu) { + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; u32 val; - val = readl(ddrc_pmu->base + DDRC_V2_PERF_CTRL); - val |= DDRC_V2_PERF_CTRL_EN; - writel(val, ddrc_pmu->base + DDRC_V2_PERF_CTRL); + val = readl(ddrc_pmu->base + regs->perf_ctrl); + val |= regs->perf_ctrl_en; + writel(val, ddrc_pmu->base + regs->perf_ctrl); } -static void hisi_ddrc_pmu_v2_stop_counters(struct hisi_pmu *ddrc_pmu) +static void hisi_ddrc_pmu_stop_counters(struct hisi_pmu *ddrc_pmu) { + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; u32 val; - val = readl(ddrc_pmu->base + DDRC_V2_PERF_CTRL); - val &= ~DDRC_V2_PERF_CTRL_EN; - writel(val, ddrc_pmu->base + DDRC_V2_PERF_CTRL); + val = readl(ddrc_pmu->base + regs->perf_ctrl); + val &= ~regs->perf_ctrl_en; + writel(val, ddrc_pmu->base + regs->perf_ctrl); } -static void hisi_ddrc_pmu_v2_enable_counter(struct hisi_pmu *ddrc_pmu, +static void hisi_ddrc_pmu_enable_counter(struct hisi_pmu *ddrc_pmu, struct hw_perf_event *hwc) { + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; u32 val; - val = readl(ddrc_pmu->base + DDRC_V2_EVENT_CTRL); - val |= 1 << hwc->idx; - writel(val, ddrc_pmu->base + DDRC_V2_EVENT_CTRL); + val = readl(ddrc_pmu->base + regs->event_ctrl); + val |= BIT_ULL(hwc->idx); + writel(val, ddrc_pmu->base + regs->event_ctrl); } -static void hisi_ddrc_pmu_v2_disable_counter(struct hisi_pmu *ddrc_pmu, +static void hisi_ddrc_pmu_disable_counter(struct hisi_pmu *ddrc_pmu, struct hw_perf_event *hwc) { + struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; u32 val; - val = readl(ddrc_pmu->base + DDRC_V2_EVENT_CTRL); - val &= ~(1 << hwc->idx); - writel(val, ddrc_pmu->base + DDRC_V2_EVENT_CTRL); + + val = readl(ddrc_pmu->base + regs->event_ctrl); + val &= ~BIT_ULL(hwc->idx); + writel(val, ddrc_pmu->base + regs->event_ctrl); } static void hisi_ddrc_pmu_enable_counter_int(struct hisi_pmu *ddrc_pmu, @@ -241,7 +193,7 @@ static void hisi_ddrc_pmu_enable_counter_int(struct hisi_pmu *ddrc_pmu, u32 val; val = readl(ddrc_pmu->base + regs->int_mask); - val &= ~(1 << hwc->idx); + val &= ~BIT_ULL(hwc->idx); writel(val, ddrc_pmu->base + regs->int_mask); } @@ -251,8 +203,9 @@ static void hisi_ddrc_pmu_disable_counter_int(struct hisi_pmu *ddrc_pmu, struct hisi_ddrc_pmu_regs *regs = ddrc_pmu->dev_info->private; u32 val; + val = readl(ddrc_pmu->base + regs->int_mask); - val |= 1 << hwc->idx; + val |= BIT_ULL(hwc->idx); writel(val, ddrc_pmu->base + regs->int_mask); } @@ -377,32 +330,17 @@ static const struct attribute_group *hisi_ddrc_pmu_v2_attr_groups[] = { NULL }; -static const struct hisi_uncore_ops hisi_uncore_ddrc_v1_ops = { - .write_evtype = hisi_ddrc_pmu_write_evtype, - .get_event_idx = hisi_ddrc_pmu_v1_get_event_idx, - .start_counters = hisi_ddrc_pmu_v1_start_counters, - .stop_counters = hisi_ddrc_pmu_v1_stop_counters, - .enable_counter = hisi_ddrc_pmu_v1_enable_counter, - .disable_counter = hisi_ddrc_pmu_v1_disable_counter, - .enable_counter_int = hisi_ddrc_pmu_enable_counter_int, - .disable_counter_int = hisi_ddrc_pmu_disable_counter_int, - .write_counter = hisi_ddrc_pmu_v1_write_counter, - .read_counter = hisi_ddrc_pmu_v1_read_counter, - .get_int_status = hisi_ddrc_pmu_get_int_status, - .clear_int_status = hisi_ddrc_pmu_clear_int_status, -}; - -static const struct hisi_uncore_ops hisi_uncore_ddrc_v2_ops = { +static const struct hisi_uncore_ops hisi_uncore_ddrc_ops = { .write_evtype = hisi_ddrc_pmu_write_evtype, - .get_event_idx = hisi_ddrc_pmu_v2_get_event_idx, - .start_counters = hisi_ddrc_pmu_v2_start_counters, - .stop_counters = hisi_ddrc_pmu_v2_stop_counters, - .enable_counter = hisi_ddrc_pmu_v2_enable_counter, - .disable_counter = hisi_ddrc_pmu_v2_disable_counter, + .get_event_idx = hisi_ddrc_pmu_get_event_idx, + .start_counters = hisi_ddrc_pmu_start_counters, + .stop_counters = hisi_ddrc_pmu_stop_counters, + .enable_counter = hisi_ddrc_pmu_enable_counter, + .disable_counter = hisi_ddrc_pmu_disable_counter, .enable_counter_int = hisi_ddrc_pmu_enable_counter_int, .disable_counter_int = hisi_ddrc_pmu_disable_counter_int, - .write_counter = hisi_ddrc_pmu_v2_write_counter, - .read_counter = hisi_ddrc_pmu_v2_read_counter, + .write_counter = hisi_ddrc_pmu_write_counter, + .read_counter = hisi_ddrc_pmu_read_counter, .get_int_status = hisi_ddrc_pmu_get_int_status, .clear_int_status = hisi_ddrc_pmu_clear_int_status, }; @@ -420,15 +358,10 @@ static int hisi_ddrc_pmu_dev_probe(struct platform_device *pdev, if (ret) return ret; - if (ddrc_pmu->identifier >= HISI_PMU_V2) { - ddrc_pmu->counter_bits = 48; - ddrc_pmu->check_event = DDRC_V2_NR_EVENTS; - ddrc_pmu->ops = &hisi_uncore_ddrc_v2_ops; - } else { - ddrc_pmu->counter_bits = 32; - ddrc_pmu->check_event = DDRC_V1_NR_EVENTS; - ddrc_pmu->ops = &hisi_uncore_ddrc_v1_ops; - } + ddrc_pmu->pmu_events.attr_groups = ddrc_pmu->dev_info->attr_groups; + ddrc_pmu->counter_bits = ddrc_pmu->dev_info->counter_bits; + ddrc_pmu->check_event = ddrc_pmu->dev_info->check_event; + ddrc_pmu->ops = &hisi_uncore_ddrc_ops; ddrc_pmu->pmu_events.attr_groups = ddrc_pmu->dev_info->attr_groups; ddrc_pmu->num_counters = DDRC_NR_COUNTERS; @@ -497,45 +430,44 @@ static int hisi_ddrc_pmu_remove(struct platform_device *pdev) } static struct hisi_ddrc_pmu_regs hisi_ddrc_v1_pmu_regs = { + .event_cnt = DDRC_UNIMPLEMENTED_REG, + .event_ctrl = DDRC_EVENT_CTRL, + .event_type = DDRC_UNIMPLEMENTED_REG, + .perf_ctrl = DDRC_PERF_CTRL, + .perf_ctrl_en = DDRC_V1_PERF_CTRL_EN, .int_mask = DDRC_INT_MASK, .int_clear = DDRC_INT_CLEAR, .int_status = DDRC_INT_STATUS, }; static const struct hisi_pmu_dev_info hisi_ddrc_v1 = { - .name = "ddrc", + .counter_bits = 32, + .check_event = DDRC_V1_NR_EVENTS, .attr_groups = hisi_ddrc_pmu_v1_attr_groups, .private = &hisi_ddrc_v1_pmu_regs, }; static struct hisi_ddrc_pmu_regs hisi_ddrc_v2_pmu_regs = { + .event_cnt = DDRC_V2_EVENT_CNT, + .event_ctrl = DDRC_V2_EVENT_CTRL, + .event_type = DDRC_V2_EVENT_TYPE, + .perf_ctrl = DDRC_V2_PERF_CTRL, + .perf_ctrl_en = DDRC_V2_PERF_CTRL_EN, .int_mask = DDRC_V2_INT_MASK, .int_clear = DDRC_V2_INT_CLEAR, .int_status = DDRC_V2_INT_STATUS, }; static const struct hisi_pmu_dev_info hisi_ddrc_v2 = { - .name = "ddrc", + .counter_bits = 48, + .check_event = DDRC_V2_NR_EVENTS, .attr_groups = hisi_ddrc_pmu_v2_attr_groups, .private = &hisi_ddrc_v2_pmu_regs, }; -static struct hisi_ddrc_pmu_regs hisi_ddrc_v3_pmu_regs = { - .int_mask = DDRC_V3_INT_MASK, - .int_clear = DDRC_V3_INT_CLEAR, - .int_status = DDRC_V3_INT_STATUS, -}; - -static const struct hisi_pmu_dev_info hisi_ddrc_v3 = { - .name = "ddrc", - .attr_groups = hisi_ddrc_pmu_v2_attr_groups, - .private = &hisi_ddrc_v3_pmu_regs, -}; - static const struct acpi_device_id hisi_ddrc_pmu_acpi_match[] = { { "HISI0233", (kernel_ulong_t)&hisi_ddrc_v1}, { "HISI0234", (kernel_ulong_t)&hisi_ddrc_v2}, - { "HISI0235", (kernel_ulong_t)&hisi_ddrc_v3}, {} }; MODULE_DEVICE_TABLE(acpi, hisi_ddrc_pmu_acpi_match); diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 6020fc1a4616d..01dc5ef9c7668 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -73,6 +73,8 @@ struct hisi_uncore_ops { struct hisi_pmu_dev_info { const char *name; const struct attribute_group **attr_groups; + u32 counter_bits; + u32 check_event; void *private; }; From 96d74810bfeccde23cfe589bf0371d1cd8c580ff Mon Sep 17 00:00:00 2001 From: Junhao He Date: Fri, 11 Apr 2025 19:14:02 +0800 Subject: [PATCH 30/95] drivers/perf: hisi: Add support for HiSilicon DDRC v3 PMU driver [Upstream commit 17aa34e86936d0dba4e7c05c55ffc3e12c0ccec9] HiSilicon DDRC v3 PMU has the different interrupt register offset compared to the v2. Add device information of v3 PMU with ACPI HID HISI0235. Signed-off-by: Junhao He Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index c51738c7f6a49..ccf8aa399de82 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -43,6 +43,11 @@ #define DDRC_V2_EVENT_TYPE 0xe74 #define DDRC_V2_PERF_CTRL 0xeA0 +/* DDRC interrupt registers definition in v3 */ +#define DDRC_V3_INT_MASK 0x534 +#define DDRC_V3_INT_STATUS 0x538 +#define DDRC_V3_INT_CLEAR 0x53C + /* DDRC has 8-counters */ #define DDRC_NR_COUNTERS 0x8 #define DDRC_V1_PERF_CTRL_EN 0x2 @@ -465,9 +470,28 @@ static const struct hisi_pmu_dev_info hisi_ddrc_v2 = { .private = &hisi_ddrc_v2_pmu_regs, }; +static struct hisi_ddrc_pmu_regs hisi_ddrc_v3_pmu_regs = { + .event_cnt = DDRC_V2_EVENT_CNT, + .event_ctrl = DDRC_V2_EVENT_CTRL, + .event_type = DDRC_V2_EVENT_TYPE, + .perf_ctrl = DDRC_V2_PERF_CTRL, + .perf_ctrl_en = DDRC_V2_PERF_CTRL_EN, + .int_mask = DDRC_V3_INT_MASK, + .int_clear = DDRC_V3_INT_CLEAR, + .int_status = DDRC_V3_INT_STATUS, +}; + +static const struct hisi_pmu_dev_info hisi_ddrc_v3 = { + .counter_bits = 48, + .check_event = DDRC_V2_NR_EVENTS, + .attr_groups = hisi_ddrc_pmu_v2_attr_groups, + .private = &hisi_ddrc_v3_pmu_regs, +}; + static const struct acpi_device_id hisi_ddrc_pmu_acpi_match[] = { { "HISI0233", (kernel_ulong_t)&hisi_ddrc_v1}, { "HISI0234", (kernel_ulong_t)&hisi_ddrc_v2}, + { "HISI0235", (kernel_ulong_t)&hisi_ddrc_v3 }, {} }; MODULE_DEVICE_TABLE(acpi, hisi_ddrc_pmu_acpi_match); From 73a1dbd3d6d5e426930ea3a9cd64f4631e2eac54 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 17 May 2024 14:30:33 +0800 Subject: [PATCH 31/95] perf parse-events: Make legacy events lower priority than sysfs/JSON [Upstream commit a24d9d9dc096fc0d0bd85302c9a4fe4fe3b1107b] The perf tool has previously made legacy events the priority so with or without a PMU the legacy event would be opened: $ perf stat -e cpu-cycles,cpu/cpu-cycles/ true Using CPUID GenuineIntel-6-8D-1 intel_pt default config: tsc,mtc,mtc_period=3,psb_period=3,pt,branch Attempting to add event pmu 'cpu' with 'cpu-cycles,' that may result in non-fatal errors After aliases, add event pmu 'cpu' with 'cpu-cycles,' that may result in non-fatal errors Control descriptor is not initialized ------------------------------------------------------------ perf_event_attr: type 0 (PERF_TYPE_HARDWARE) size 136 config 0 (PERF_COUNT_HW_CPU_CYCLES) sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING disabled 1 inherit 1 enable_on_exec 1 exclude_guest 1 ------------------------------------------------------------ sys_perf_event_open: pid 833967 cpu -1 group_fd -1 flags 0x8 = 3 ------------------------------------------------------------ perf_event_attr: type 0 (PERF_TYPE_HARDWARE) size 136 config 0 (PERF_COUNT_HW_CPU_CYCLES) sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING disabled 1 inherit 1 enable_on_exec 1 exclude_guest 1 ------------------------------------------------------------ ... Fixes to make hybrid/BIG.little PMUs behave correctly, ie as core PMUs capable of opening legacy events on each, removing hard coded "cpu_core" and "cpu_atom" Intel PMU names, etc. caused a behavioral difference on Apple/ARM due to latent issues in the PMU driver reported in: https://lore.kernel.org/lkml/08f1f185-e259-4014-9ca4-6411d5c1bc65@marcan.st/ As part of that report Mark Rutland requested that legacy events not be higher in priority when a PMU is specified reversing what has until this change been perf's default behavior. With this change the above becomes: $ perf stat -e cpu-cycles,cpu/cpu-cycles/ true Using CPUID GenuineIntel-6-8D-1 Attempt to add: cpu/cpu-cycles=0/ ..after resolving event: cpu/event=0x3c/ Control descriptor is not initialized ------------------------------------------------------------ perf_event_attr: type 0 (PERF_TYPE_HARDWARE) size 136 config 0 (PERF_COUNT_HW_CPU_CYCLES) sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING disabled 1 inherit 1 enable_on_exec 1 exclude_guest 1 ------------------------------------------------------------ sys_perf_event_open: pid 827628 cpu -1 group_fd -1 flags 0x8 = 3 ------------------------------------------------------------ perf_event_attr: type 4 (PERF_TYPE_RAW) size 136 config 0x3c sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING disabled 1 inherit 1 enable_on_exec 1 exclude_guest 1 ------------------------------------------------------------ ... So the second event has become a raw event as /sys/devices/cpu/events/cpu-cycles exists. A fix was necessary to config_term_pmu in parse-events.c as check_alias expansion needs to happen after config_term_pmu, and config_term_pmu may need calling a second time because of this. config_term_pmu is updated to not use the legacy event when the PMU has such a named event (either from JSON or sysfs). The bulk of this change is updating all of the parse-events test expectations so that if a sysfs/JSON event exists for a PMU the test doesn't fail - a further sign, if it were needed, that the legacy event priority was a known and tested behavior of the perf tool. Reported-by: Hector Martin Signed-off-by: Ian Rogers Tested-by: Hector Martin Tested-by: Marc Zyngier Acked-by: Mark Rutland Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231123042922.834425-1-irogers@google.com [ Initialize the 'alias_rewrote_terms' variable to false to address a clang warning ] Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- tools/perf/tests/parse-events.c | 256 +++++++++++++++++++++++--------- tools/perf/util/parse-events.c | 50 +++++-- tools/perf/util/pmu.c | 8 +- tools/perf/util/pmu.h | 3 +- 4 files changed, 229 insertions(+), 88 deletions(-) diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index d47f1f8711641..bc7b335b05f7f 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -162,6 +162,22 @@ static int test__checkevent_numeric(struct evlist *evlist) return TEST_OK; } + +static int assert_hw(struct perf_evsel *evsel, enum perf_hw_id id, const char *name) +{ + struct perf_pmu *pmu; + + if (evsel->attr.type == PERF_TYPE_HARDWARE) { + TEST_ASSERT_VAL("wrong config", test_perf_config(evsel, id)); + return 0; + } + pmu = perf_pmus__find_by_type(evsel->attr.type); + + TEST_ASSERT_VAL("unexpected PMU type", pmu); + TEST_ASSERT_VAL("PMU missing event", perf_pmu__have_event(pmu, name)); + return 0; +} + static int test__checkevent_symbolic_name(struct evlist *evlist) { struct perf_evsel *evsel; @@ -169,10 +185,12 @@ static int test__checkevent_symbolic_name(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 0 != evlist->core.nr_entries); perf_evlist__for_each_evsel(&evlist->core, evsel) { - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); - TEST_ASSERT_VAL("wrong config", - test_perf_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + int ret = assert_hw(evsel, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); + + if (ret) + return ret; } + return TEST_OK; } @@ -183,8 +201,10 @@ static int test__checkevent_symbolic_name_config(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 0 != evlist->core.nr_entries); perf_evlist__for_each_evsel(&evlist->core, evsel) { - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); - TEST_ASSERT_VAL("wrong config", test_perf_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + int ret = assert_hw(evsel, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + + if (ret) + return ret; /* * The period value gets configured within evlist__config, * while this test executes only parse events method. @@ -861,10 +881,14 @@ static int test__group1(struct evlist *evlist) evlist__nr_groups(evlist) == num_core_entries()); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* instructions:k */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -878,8 +902,10 @@ static int test__group1(struct evlist *evlist) /* cycles:upp */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -907,6 +933,8 @@ static int test__group2(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of groups", 1 == evlist__nr_groups(evlist)); evlist__for_each_entry(evlist, evsel) { + int ret; + if (evsel->core.attr.type == PERF_TYPE_SOFTWARE) { /* faults + :ku modifier */ leader = evsel; @@ -939,8 +967,10 @@ static int test__group2(struct evlist *evlist) continue; } /* cycles:k */ - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -957,6 +987,7 @@ static int test__group2(struct evlist *evlist) static int test__group3(struct evlist *evlist __maybe_unused) { struct evsel *evsel, *group1_leader = NULL, *group2_leader = NULL; + int ret; TEST_ASSERT_VAL("wrong number of entries", evlist->core.nr_entries == (3 * perf_pmus__num_core_pmus() + 2)); @@ -1045,8 +1076,10 @@ static int test__group3(struct evlist *evlist __maybe_unused) continue; } /* instructions:u */ - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1070,10 +1103,14 @@ static int test__group4(struct evlist *evlist __maybe_unused) num_core_entries() == evlist__nr_groups(evlist)); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* cycles:u + p */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1089,8 +1126,10 @@ static int test__group4(struct evlist *evlist __maybe_unused) /* instructions:kp + p */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1108,6 +1147,7 @@ static int test__group4(struct evlist *evlist __maybe_unused) static int test__group5(struct evlist *evlist __maybe_unused) { struct evsel *evsel = NULL, *leader; + int ret; TEST_ASSERT_VAL("wrong number of entries", evlist->core.nr_entries == (5 * num_core_entries())); @@ -1117,8 +1157,10 @@ static int test__group5(struct evlist *evlist __maybe_unused) for (int i = 0; i < num_core_entries(); i++) { /* cycles + G */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1133,8 +1175,10 @@ static int test__group5(struct evlist *evlist __maybe_unused) /* instructions + G */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1148,8 +1192,10 @@ static int test__group5(struct evlist *evlist __maybe_unused) for (int i = 0; i < num_core_entries(); i++) { /* cycles:G */ evsel = leader = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1164,8 +1210,10 @@ static int test__group5(struct evlist *evlist __maybe_unused) /* instructions:G */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1178,8 +1226,10 @@ static int test__group5(struct evlist *evlist __maybe_unused) for (int i = 0; i < num_core_entries(); i++) { /* cycles */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1201,10 +1251,14 @@ static int test__group_gh1(struct evlist *evlist) evlist__nr_groups(evlist) == num_core_entries()); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* cycles + :H group modifier */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1218,8 +1272,10 @@ static int test__group_gh1(struct evlist *evlist) /* cache-misses:G + :H group modifier */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1242,10 +1298,14 @@ static int test__group_gh2(struct evlist *evlist) evlist__nr_groups(evlist) == num_core_entries()); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* cycles + :G group modifier */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1259,8 +1319,10 @@ static int test__group_gh2(struct evlist *evlist) /* cache-misses:H + :G group modifier */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1283,10 +1345,14 @@ static int test__group_gh3(struct evlist *evlist) evlist__nr_groups(evlist) == num_core_entries()); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* cycles:G + :u group modifier */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1300,8 +1366,10 @@ static int test__group_gh3(struct evlist *evlist) /* cache-misses:H + :u group modifier */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1324,10 +1392,14 @@ static int test__group_gh4(struct evlist *evlist) evlist__nr_groups(evlist) == num_core_entries()); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* cycles:G + :uG group modifier */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1341,8 +1413,10 @@ static int test__group_gh4(struct evlist *evlist) /* cache-misses:H + :uG group modifier */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1363,10 +1437,14 @@ static int test__leader_sample1(struct evlist *evlist) evlist->core.nr_entries == (3 * num_core_entries())); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* cycles - sampling group leader */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1379,8 +1457,10 @@ static int test__leader_sample1(struct evlist *evlist) /* cache-misses - not sampling */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1392,8 +1472,10 @@ static int test__leader_sample1(struct evlist *evlist) /* branch-misses - not sampling */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_BRANCH_MISSES, "branch-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1415,10 +1497,14 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused) evlist->core.nr_entries == (2 * num_core_entries())); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* instructions - sampling group leader */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1431,8 +1517,10 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused) /* branch-misses - not sampling */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_BRANCH_MISSES, "branch-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1472,10 +1560,14 @@ static int test__pinned_group(struct evlist *evlist) evlist->core.nr_entries == (3 * num_core_entries())); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* cycles - group leader */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong group name", !evsel->group_name); TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); /* TODO: The group modifier is not copied to the split group leader. */ @@ -1484,13 +1576,18 @@ static int test__pinned_group(struct evlist *evlist) /* cache-misses - can not be pinned, but will go on with the leader */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned); /* branch-misses - ditto */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_BRANCH_MISSES, "branch-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned); } return TEST_OK; @@ -1517,10 +1614,14 @@ static int test__exclusive_group(struct evlist *evlist) evlist->core.nr_entries == 3 * num_core_entries()); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* cycles - group leader */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong group name", !evsel->group_name); TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); /* TODO: The group modifier is not copied to the split group leader. */ @@ -1529,13 +1630,18 @@ static int test__exclusive_group(struct evlist *evlist) /* cache-misses - can not be pinned, but will go on with the leader */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive); /* branch-misses - ditto */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_BRANCH_MISSES, "branch-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive); } return TEST_OK; @@ -1677,9 +1783,11 @@ static int test__checkevent_raw_pmu(struct evlist *evlist) static int test__sym_event_slash(struct evlist *evlist) { struct evsel *evsel = evlist__first(evlist); + int ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + + if (ret) + return ret; - TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); return TEST_OK; } @@ -1687,9 +1795,11 @@ static int test__sym_event_slash(struct evlist *evlist) static int test__sym_event_dc(struct evlist *evlist) { struct evsel *evsel = evlist__first(evlist); + int ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + + if (ret) + return ret; - TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); return TEST_OK; } @@ -1697,9 +1807,11 @@ static int test__sym_event_dc(struct evlist *evlist) static int test__term_equal_term(struct evlist *evlist) { struct evsel *evsel = evlist__first(evlist); + int ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + + if (ret) + return ret; - TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "name") == 0); return TEST_OK; } @@ -1707,9 +1819,11 @@ static int test__term_equal_term(struct evlist *evlist) static int test__term_equal_legacy(struct evlist *evlist) { struct evsel *evsel = evlist__first(evlist); + int ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + + if (ret) + return ret; - TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "l1d") == 0); return TEST_OK; } diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 65608a3cba819..ac5fea49e6dbd 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -974,7 +974,7 @@ static int config_term_pmu(struct perf_event_attr *attr, struct parse_events_error *err) { if (term->type_term == PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE) { - const struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); + struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); if (!pmu) { char *err_str; @@ -984,15 +984,23 @@ static int config_term_pmu(struct perf_event_attr *attr, err_str, /*help=*/NULL); return -EINVAL; } - if (perf_pmu__supports_legacy_cache(pmu)) { + /* + * Rewrite the PMU event to a legacy cache one unless the PMU + * doesn't support legacy cache events or the event is present + * within the PMU. + */ + if (perf_pmu__supports_legacy_cache(pmu) && + !perf_pmu__have_event(pmu, term->config)) { attr->type = PERF_TYPE_HW_CACHE; return parse_events__decode_legacy_cache(term->config, pmu->type, &attr->config); - } else + } else { term->type_term = PARSE_EVENTS__TERM_TYPE_USER; + term->no_value = true; + } } if (term->type_term == PARSE_EVENTS__TERM_TYPE_HARDWARE) { - const struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); + struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); if (!pmu) { char *err_str; @@ -1002,10 +1010,19 @@ static int config_term_pmu(struct perf_event_attr *attr, err_str, /*help=*/NULL); return -EINVAL; } - attr->type = PERF_TYPE_HARDWARE; - attr->config = term->val.num; - if (perf_pmus__supports_extended_type()) - attr->config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; + /* + * If the PMU has a sysfs or json event prefer it over + * legacy. ARM requires this. + */ + if (perf_pmu__have_event(pmu, term->config)) { + term->type_term = PARSE_EVENTS__TERM_TYPE_USER; + term->no_value = true; + } else { + attr->type = PERF_TYPE_HARDWARE; + attr->config = term->val.num; + if (perf_pmus__supports_extended_type()) + attr->config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; + } return 0; } if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER || @@ -1379,6 +1396,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, struct parse_events_error *err = parse_state->error; YYLTYPE *loc = loc_; LIST_HEAD(config_terms); + bool alias_rewrote_terms = false; pmu = parse_state->fake_pmu ?: perf_pmus__find(name); @@ -1426,7 +1444,13 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, return evsel ? 0 : -ENOMEM; } - if (!parse_state->fake_pmu && perf_pmu__check_alias(pmu, head_config, &info, err)) + /* Configure attr/terms with a known PMU, this will set hardcoded terms. */ + if (config_attr(&attr, head_config, parse_state->error, config_term_pmu)) + return -EINVAL; + + /* Look for event names in the terms and rewrite into format based terms. */ + if (!parse_state->fake_pmu && perf_pmu__check_alias(pmu, head_config, + &info, &alias_rewrote_terms, err)) return -EINVAL; if (verbose > 1) { @@ -1438,11 +1462,9 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, strbuf_release(&sb); } - /* - * Configure hardcoded terms first, no need to check - * return value when called with fail == 0 ;) - */ - if (config_attr(&attr, head_config, parse_state->error, config_term_pmu)) + /* Configure attr/terms again if an alias was expanded. */ + if (alias_rewrote_terms && + config_attr(&attr, head_config, parse_state->error, config_term_pmu)) return -EINVAL; if (get_config_terms(head_config, &config_terms)) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 2587c4b463fa8..da899baadc8e3 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1542,12 +1542,14 @@ static int check_info_data(struct perf_pmu *pmu, * defined for the alias */ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, - struct perf_pmu_info *info, struct parse_events_error *err) + struct perf_pmu_info *info, bool *rewrote_terms, + struct parse_events_error *err) { struct parse_events_term *term, *h; struct perf_pmu_alias *alias; int ret; + *rewrote_terms = false; info->per_pkg = false; /* @@ -1569,7 +1571,7 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, NULL); return ret; } - + *rewrote_terms = true; ret = check_info_data(pmu, alias, info, err, term->err_term); if (ret) return ret; @@ -1663,6 +1665,8 @@ bool perf_pmu__auto_merge_stats(const struct perf_pmu *pmu) bool perf_pmu__have_event(struct perf_pmu *pmu, const char *name) { + if (!name) + return false; if (perf_pmu__find_alias(pmu, name, /*load=*/ true) != NULL) return true; if (pmu->cpu_aliases_added || !pmu->events_table) diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 5a03c361cb04c..b4de5bf86e184 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -207,7 +207,8 @@ int perf_pmu__config_terms(struct perf_pmu *pmu, __u64 perf_pmu__format_bits(struct perf_pmu *pmu, const char *name); int perf_pmu__format_type(struct perf_pmu *pmu, const char *name); int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, - struct perf_pmu_info *info, struct parse_events_error *err); + struct perf_pmu_info *info, bool *rewrote_terms, + struct parse_events_error *err); int perf_pmu__find_event(struct perf_pmu *pmu, const char *event, void *state, pmu_event_callback cb); int perf_pmu__format_parse(struct perf_pmu *pmu, int dirfd, bool eager_load); From 5e4f60253a0654ba8b8b7b060eaa39b32a65ae0e Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 17 May 2024 14:30:34 +0800 Subject: [PATCH 32/95] perf x86 test: Update hybrid expectations [Upstream commit eb00697b91646de22d6d9b4e6a5fadf4495fdf69] The legacy events cpu-cycles and instructions have sysfs event equivalents on x86 (see /sys/devices/cpu_core/events). As sysfs/JSON events are now higher in priority than legacy events this causes the hybrid test expectations not to be met. To fix this switch to legacy events that don't have sysfs versions, namely cpu-cycles becomes cycles and instructions becomes branches. Fixes: a24d9d9dc096fc0d ("perf parse-events: Make legacy events lower priority than sysfs/JSON") Reported-by: Arnaldo Carvalho de Melo Reviewed-by: Kan Liang Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Closes: https://lore.kernel.org/lkml/ZYbm5L7tw7bdpDpE@kernel.org/ Link: https://lore.kernel.org/r/20240103170159.1435753-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- tools/perf/arch/x86/tests/hybrid.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/perf/arch/x86/tests/hybrid.c b/tools/perf/arch/x86/tests/hybrid.c index eb152770f1485..05a5f81e81671 100644 --- a/tools/perf/arch/x86/tests/hybrid.c +++ b/tools/perf/arch/x86/tests/hybrid.c @@ -47,7 +47,7 @@ static int test__hybrid_hw_group_event(struct evlist *evlist) evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW)); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_INSTRUCTIONS)); TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); return TEST_OK; } @@ -102,7 +102,7 @@ static int test__hybrid_group_modifier1(struct evlist *evlist) evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong hybrid type", test_hybrid_type(evsel, PERF_TYPE_RAW)); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_INSTRUCTIONS)); TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); @@ -171,27 +171,27 @@ struct evlist_test { static const struct evlist_test test__hybrid_events[] = { { - .name = "cpu_core/cpu-cycles/", + .name = "cpu_core/cycles/", .check = test__hybrid_hw_event_with_pmu, /* 0 */ }, { - .name = "{cpu_core/cpu-cycles/,cpu_core/instructions/}", + .name = "{cpu_core/cycles/,cpu_core/branches/}", .check = test__hybrid_hw_group_event, /* 1 */ }, { - .name = "{cpu-clock,cpu_core/cpu-cycles/}", + .name = "{cpu-clock,cpu_core/cycles/}", .check = test__hybrid_sw_hw_group_event, /* 2 */ }, { - .name = "{cpu_core/cpu-cycles/,cpu-clock}", + .name = "{cpu_core/cycles/,cpu-clock}", .check = test__hybrid_hw_sw_group_event, /* 3 */ }, { - .name = "{cpu_core/cpu-cycles/k,cpu_core/instructions/u}", + .name = "{cpu_core/cycles/k,cpu_core/branches/u}", .check = test__hybrid_group_modifier1, /* 4 */ }, From 2e80f795849f777152b6708146918fd4585b1d75 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Thu, 29 Aug 2024 17:03:32 +0800 Subject: [PATCH 33/95] drivers/perf: hisi_pcie: Export supported Root Ports [bdf_min, bdf_max] commit 177a4b0304ae43e7b56af7446c7c188fc1e6198a openEuler Currently users can get the Root Ports supported by the PCIe PMU by "bus" sysfs attributes which indicates the PCIe bus number where Root Ports are located. This maybe insufficient since Root Ports supported by different PCIe PMUs may be located on the same PCIe bus. So export the BDF range the Root Ports additionally. Signed-off-by: Yicong Yang Acked-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240829090332.28756-4-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: zhangyuyang Signed-off-by: Slim6882 Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- .../admin-guide/perf/hisi-pcie-pmu.rst | 4 +++- drivers/perf/hisilicon/hisi_pcie_pmu.c | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst index 7e863662e2d42..82a8c2429dd66 100644 --- a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst +++ b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst @@ -28,7 +28,9 @@ The "identifier" sysfs file allows users to identify the version of the PMU hardware device. The "bus" sysfs file allows users to get the bus number of Root Ports -monitored by PMU. +monitored by PMU. Furthermore users can get the Root Ports range in +[bdf_min, bdf_max] from "bdf_min" and "bdf_max" sysfs attributes +respectively. Example usage of perf:: diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c index 4a902da5c1d49..cb9024314a911 100644 --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c @@ -152,6 +152,22 @@ static ssize_t bus_show(struct device *dev, struct device_attribute *attr, char } static DEVICE_ATTR_RO(bus); +static ssize_t bdf_min_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(dev_get_drvdata(dev)); + + return sysfs_emit(buf, "%#04x\n", pcie_pmu->bdf_min); +} +static DEVICE_ATTR_RO(bdf_min); + +static ssize_t bdf_max_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(dev_get_drvdata(dev)); + + return sysfs_emit(buf, "%#04x\n", pcie_pmu->bdf_max); +} +static DEVICE_ATTR_RO(bdf_max); + static struct hisi_pcie_reg_pair hisi_pcie_parse_reg_value(struct hisi_pcie_pmu *pcie_pmu, u32 reg_off) { @@ -771,6 +787,8 @@ static const struct attribute_group hisi_pcie_pmu_format_group = { static struct attribute *hisi_pcie_pmu_bus_attrs[] = { &dev_attr_bus.attr, + &dev_attr_bdf_max.attr, + &dev_attr_bdf_min.attr, NULL }; From f0a9b9927cc54711233598c939d0fe4dccb535b1 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 6 Jun 2024 23:53:43 -0700 Subject: [PATCH 34/95] perf arm: Workaround ARM PMUs cpu maps having offline cpus [Upstream commit 5518063fcb2e022411c1112b1b9b069e68380bbb] When PMUs have a cpu map in the 'cpus' or 'cpumask' file, perf will try to open events on those CPUs. ARM doesn't remove offline CPUs meaning taking a CPU offline will cause perf commands to fail unless a CPU map is passed on the command line. More context in: https://lore.kernel.org/lkml/20240603092812.46616-1-yangyicong@huawei.com/ Reported-by: Yicong Yang Closes: https://lore.kernel.org/lkml/20240603092812.46616-2-yangyicong@huawei.com/ Signed-off-by: Ian Rogers Tested-by: Yicong Yang Tested-by: Leo Yan Cc: James Clark Cc: Suzuki K Poulose Cc: Will Deacon Cc: Mike Leach Cc: Leo Yan Cc: linux-arm-kernel@lists.infradead.org Cc: coresight@lists.linaro.org Cc: John Garry Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240607065343.695369-1-irogers@google.com Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- tools/perf/arch/arm/util/pmu.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c index a9623b128ece2..c56d0aa2a76f4 100644 --- a/tools/perf/arch/arm/util/pmu.c +++ b/tools/perf/arch/arm/util/pmu.c @@ -11,12 +11,15 @@ #include "arm-spe.h" #include "hisi-ptt.h" +#include "../../../util/cpumap.h" #include "../../../util/pmu.h" #include "../../../util/cs-etm.h" struct perf_event_attr -*perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) +*perf_pmu__get_default_config(struct perf_pmu *pmu) { + struct perf_cpu_map *intersect; + #ifdef HAVE_AUXTRACE_SUPPORT if (!strcmp(pmu->name, CORESIGHT_ETM_PMU_NAME)) { /* add ETM default config here */ @@ -31,5 +34,10 @@ struct perf_event_attr } #endif + /* Workaround some ARM PMU's failing to correctly set CPU maps for online processors. */ + intersect = perf_cpu_map__intersect(cpu_map__online(), pmu->cpus); + perf_cpu_map__put(pmu->cpus); + pmu->cpus = intersect; + return NULL; } From 6416186c48b92f5265e4d0ce14fd00281ba789ff Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 25 Apr 2025 11:38:45 +0800 Subject: [PATCH 35/95] perf mem: Count L2 HITM for c2c statistic [Upstream commit fa9b3578ed628641504ab74958bbe36a42c2615a] L2 HITM is not counted in c2c statistic decoding. Count it for lcl_hitm like how we handle L2 Peer snoop. Reviewed-by: Leo Yan Signed-off-by: Yicong Yang Cc: CaiJingtao Cc: Catalin Marinas Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Jonathan Cameron Cc: Junhao He Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: Yushan Wang Cc: Zeng Tao Cc: xueshan2@huawei.com Link: https://lore.kernel.org/r/20250425033845.57671-4-yangyicong@huawei.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Qizhi Zhang Signed-off-by: zhaolichang <943677312@qq.com> Signed-off-by: WangYuli --- tools/perf/util/mem-events.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 3a2e3687878c1..a6996d3e32c01 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -586,7 +586,10 @@ do { \ if (lvl & P(LVL, LFB)) stats->ld_fbhit++; if (lvl & P(LVL, L1 )) stats->ld_l1hit++; if (lvl & P(LVL, L2)) { - stats->ld_l2hit++; + if (snoop & P(SNOOP, HITM)) + HITM_INC(lcl_hitm); + else + stats->ld_l2hit++; if (snoopx & P(SNOOPX, PEER)) PEER_INC(lcl_peer); From b796bd42203c6b66ffecf1a3f176da36912fb21d Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 14 May 2024 17:19:01 +0800 Subject: [PATCH 36/95] arm64: arm_pmuv3: Correctly extract and check the PMUVer [Upstream commit b782e8d07baac95a5ce3f8773cc61f4ed7d0ccbc] Currently we're using "sbfx" to extract the PMUVer from ID_AA64DFR0_EL1 and skip the init/reset if no PMU present when the extracted PMUVer is negative or is zero. However for PMUv3p8 the PMUVer will be 0b1000 and PMUVer extracted by "sbfx" will always be negative and we'll skip the init/reset in __init_el2_debug/reset_pmuserenr_el0 unexpectedly. So this patch use "ubfx" instead of "sbfx" to extract the PMUVer. If the PMUVer is implementation defined (0b1111) or not implemented(0b0000) then skip the reset/init. Previously we'll also skip the init/reset if the PMUVer is higher than the version we known (currently PMUv3p9), with this patch we'll only skip if the PMU is not implemented or implementation defined. This keeps consistence with how we probe the PMU in the driver with pmuv3_implemented(). Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20240411123030.7201-1-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: zhaolichang <943677312@qq.com> Signed-off-by: WangYuli --- arch/arm64/include/asm/assembler.h | 7 ++++--- arch/arm64/include/asm/el2_setup.h | 9 +++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 979ba9ddee414..f6f92c582e0f3 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -521,9 +521,10 @@ alternative_endif */ .macro reset_pmuserenr_el0, tmpreg mrs \tmpreg, id_aa64dfr0_el1 - sbfx \tmpreg, \tmpreg, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4 - cmp \tmpreg, #1 // Skip if no PMU present - b.lt 9000f + ubfx \tmpreg, \tmpreg, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4 + cmp \tmpreg, #ID_AA64DFR0_EL1_PMUVer_NI + ccmp \tmpreg, #ID_AA64DFR0_EL1_PMUVer_IMP_DEF, #4, ne + b.eq 9000f // Skip if no PMU present or IMP_DEF msr pmuserenr_el0, xzr // Disable PMU access from EL0 9000: .endm diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index b7afaa026842b..e4546b29dd0cb 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -59,13 +59,14 @@ .macro __init_el2_debug mrs x1, id_aa64dfr0_el1 - sbfx x0, x1, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4 - cmp x0, #1 - b.lt .Lskip_pmu_\@ // Skip if no PMU present + ubfx x0, x1, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4 + cmp x0, #ID_AA64DFR0_EL1_PMUVer_NI + ccmp x0, #ID_AA64DFR0_EL1_PMUVer_IMP_DEF, #4, ne + b.eq .Lskip_pmu_\@ // Skip if no PMU present or IMP_DEF mrs x0, pmcr_el0 // Disable debug access traps ubfx x0, x0, #11, #5 // to EL2 and allow access to .Lskip_pmu_\@: - csel x2, xzr, x0, lt // all PMU counters from EL1 + csel x2, xzr, x0, eq // all PMU counters from EL1 /* Statistical profiling */ ubfx x0, x1, #ID_AA64DFR0_EL1_PMSVer_SHIFT, #4 From a944d2f5c62303d47323af82d1ab8ccd1b4cd022 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 25 Apr 2025 11:38:43 +0800 Subject: [PATCH 37/95] arm64: cputype: Add cputype definition for HIP12 [Upstream commit 226ff35039d05826b738160eff05844c0fa5c2a0] Add MIDR encoding for HiSilicon HIP12 which is used on HiSilicon HIP12 SoCs. Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20250425033845.57671-2-yangyicong@huawei.com Signed-off-by: Will Deacon Signed-off-by: Qizhi Zhang Signed-off-by: zhaolichang <943677312@qq.com> Signed-off-by: WangYuli --- arch/arm64/include/asm/cputype.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index fc98922de921c..a206dc8ca1f2b 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -135,6 +135,7 @@ #define HISI_CPU_PART_TSV110 0xD01 #define HISI_CPU_PART_HIP09 0xD02 +#define HISI_CPU_PART_HIP12 0xD06 #define APPLE_CPU_PART_M1_ICESTORM 0x022 #define APPLE_CPU_PART_M1_FIRESTORM 0x023 @@ -222,6 +223,7 @@ #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX) #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110) #define MIDR_HISI_HIP09 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP09) +#define MIDR_HISI_HIP12 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP12) #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM) #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM) #define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO) From dd0e1a48ea2a1f079b2911b891d0d40e6c311e7d Mon Sep 17 00:00:00 2001 From: Junhao He Date: Fri, 11 Apr 2025 19:14:03 +0800 Subject: [PATCH 38/95] drivers/perf: hisi: Use ACPI driver_data to retrieve SLLC PMU information [Upstream commit 29614c55fe6ff8e5ddee9c6247d5c3dbe05ca8bc] Make use of struct acpi_device_id::driver_data for version specific information rather than judge the version register. This will help to simplify the probe process and also a bit easier for extension. Factor out SLLC register definition to struct hisi_sllc_pmu_regs. No functional changes intended. Signed-off-by: Junhao He Signed-off-by: Yicong Yang Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 178 ++++++++++++------ 1 file changed, 118 insertions(+), 60 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index e99a5b32bd92f..dd17db7adc0e5 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -41,6 +41,7 @@ #define SLLC_SRCID_CMD_SHIFT 1 #define SLLC_SRCID_MSK_SHIFT 12 #define SLLC_NR_EVENTS 0x80 +#define SLLC_EVENT_CNTn(cnt0, n) ((cnt0) + (n) * 8) HISI_PMU_EVENT_ATTR_EXTRACTOR(tgtid_min, config1, 10, 0); HISI_PMU_EVENT_ATTR_EXTRACTOR(tgtid_max, config1, 21, 11); @@ -48,6 +49,23 @@ HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_cmd, config1, 32, 22); HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_msk, config1, 43, 33); HISI_PMU_EVENT_ATTR_EXTRACTOR(tracetag_en, config1, 44, 44); +struct hisi_sllc_pmu_regs { + u32 int_mask; + u32 int_clear; + u32 int_status; + u32 perf_ctrl; + u32 srcid_ctrl; + u32 srcid_cmd_shift; + u32 srcid_mask_shift; + u32 tgtid_ctrl; + u32 tgtid_min_shift; + u32 tgtid_max_shift; + u32 event_ctrl; + u32 event_type0; + u32 version; + u32 event_cnt0; +}; + static bool tgtid_is_valid(u32 max, u32 min) { return max > 0 && max >= min; @@ -56,96 +74,104 @@ static bool tgtid_is_valid(u32 max, u32 min) static void hisi_sllc_pmu_enable_tracetag(struct perf_event *event) { struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 tt_en = hisi_get_tracetag_en(event); if (tt_en) { u32 val; - val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val = readl(sllc_pmu->base + regs->perf_ctrl); val |= SLLC_TRACETAG_EN | SLLC_FILT_EN; - writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + writel(val, sllc_pmu->base + regs->perf_ctrl); } } static void hisi_sllc_pmu_disable_tracetag(struct perf_event *event) { struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 tt_en = hisi_get_tracetag_en(event); if (tt_en) { u32 val; - val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val = readl(sllc_pmu->base + regs->perf_ctrl); val &= ~(SLLC_TRACETAG_EN | SLLC_FILT_EN); - writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + writel(val, sllc_pmu->base + regs->perf_ctrl); } } static void hisi_sllc_pmu_config_tgtid(struct perf_event *event) { struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 min = hisi_get_tgtid_min(event); u32 max = hisi_get_tgtid_max(event); if (tgtid_is_valid(max, min)) { - u32 val = (max << SLLC_TGTID_MAX_SHIFT) | (min << SLLC_TGTID_MIN_SHIFT); + u32 val = (max << regs->tgtid_max_shift) | + (min << regs->tgtid_min_shift); - writel(val, sllc_pmu->base + SLLC_TGTID_CTRL); + writel(val, sllc_pmu->base + regs->tgtid_ctrl); /* Enable the tgtid */ - val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val = readl(sllc_pmu->base + regs->perf_ctrl); val |= SLLC_TGTID_EN | SLLC_FILT_EN; - writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + writel(val, sllc_pmu->base + regs->perf_ctrl); } } static void hisi_sllc_pmu_clear_tgtid(struct perf_event *event) { struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 min = hisi_get_tgtid_min(event); u32 max = hisi_get_tgtid_max(event); if (tgtid_is_valid(max, min)) { u32 val; - writel(SLLC_TGTID_NONE, sllc_pmu->base + SLLC_TGTID_CTRL); + writel(SLLC_TGTID_NONE, sllc_pmu->base + regs->tgtid_ctrl); /* Disable the tgtid */ - val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val = readl(sllc_pmu->base + regs->perf_ctrl); val &= ~(SLLC_TGTID_EN | SLLC_FILT_EN); - writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + writel(val, sllc_pmu->base + regs->perf_ctrl); } } static void hisi_sllc_pmu_config_srcid(struct perf_event *event) { struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 cmd = hisi_get_srcid_cmd(event); if (cmd) { u32 val, msk; msk = hisi_get_srcid_msk(event); - val = (cmd << SLLC_SRCID_CMD_SHIFT) | (msk << SLLC_SRCID_MSK_SHIFT); - writel(val, sllc_pmu->base + SLLC_SRCID_CTRL); + val = (cmd << regs->srcid_cmd_shift) | + (msk << regs->srcid_mask_shift); + writel(val, sllc_pmu->base + regs->srcid_ctrl); /* Enable the srcid */ - val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val = readl(sllc_pmu->base + regs->perf_ctrl); val |= SLLC_SRCID_EN | SLLC_FILT_EN; - writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + writel(val, sllc_pmu->base + regs->perf_ctrl); } } static void hisi_sllc_pmu_clear_srcid(struct perf_event *event) { struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 cmd = hisi_get_srcid_cmd(event); if (cmd) { u32 val; - writel(SLLC_SRCID_NONE, sllc_pmu->base + SLLC_SRCID_CTRL); + writel(SLLC_SRCID_NONE, sllc_pmu->base + regs->srcid_ctrl); /* Disable the srcid */ - val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val = readl(sllc_pmu->base + regs->perf_ctrl); val &= ~(SLLC_SRCID_EN | SLLC_FILT_EN); - writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + writel(val, sllc_pmu->base + regs->perf_ctrl); } } @@ -167,29 +193,27 @@ static void hisi_sllc_pmu_clear_filter(struct perf_event *event) } } -static u32 hisi_sllc_pmu_get_counter_offset(int idx) -{ - return (SLLC_EVENT_CNT0_L + idx * 8); -} - static u64 hisi_sllc_pmu_read_counter(struct hisi_pmu *sllc_pmu, struct hw_perf_event *hwc) { - return readq(sllc_pmu->base + - hisi_sllc_pmu_get_counter_offset(hwc->idx)); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; + + return readq(sllc_pmu->base + SLLC_EVENT_CNTn(regs->event_cnt0, hwc->idx)); } static void hisi_sllc_pmu_write_counter(struct hisi_pmu *sllc_pmu, struct hw_perf_event *hwc, u64 val) { - writeq(val, sllc_pmu->base + - hisi_sllc_pmu_get_counter_offset(hwc->idx)); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; + + writeq(val, sllc_pmu->base + SLLC_EVENT_CNTn(regs->event_cnt0, hwc->idx)); } static void hisi_sllc_pmu_write_evtype(struct hisi_pmu *sllc_pmu, int idx, u32 type) { - u32 reg, reg_idx, shift, val; + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; + u32 reg, val; /* * Select the appropriate event select register(SLLC_EVENT_TYPE0/1). @@ -198,96 +222,98 @@ static void hisi_sllc_pmu_write_evtype(struct hisi_pmu *sllc_pmu, int idx, * SLLC_EVENT_TYPE0 is chosen. For the latter 4 hardware counters, * SLLC_EVENT_TYPE1 is chosen. */ - reg = SLLC_EVENT_TYPE0 + (idx / 4) * 4; - reg_idx = idx % 4; - shift = 8 * reg_idx; + reg = regs->event_type0 + (idx / 4) * 4; /* Write event code to SLLC_EVENT_TYPEx Register */ val = readl(sllc_pmu->base + reg); - val &= ~(SLLC_EVTYPE_MASK << shift); - val |= (type << shift); + val &= ~(SLLC_EVTYPE_MASK << HISI_PMU_EVTYPE_SHIFT(idx)); + val |= (type << HISI_PMU_EVTYPE_SHIFT(idx)); writel(val, sllc_pmu->base + reg); } static void hisi_sllc_pmu_start_counters(struct hisi_pmu *sllc_pmu) { + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 val; - val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val = readl(sllc_pmu->base + regs->perf_ctrl); val |= SLLC_PERF_CTRL_EN; - writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + writel(val, sllc_pmu->base + regs->perf_ctrl); } static void hisi_sllc_pmu_stop_counters(struct hisi_pmu *sllc_pmu) { + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 val; - val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val = readl(sllc_pmu->base + regs->perf_ctrl); val &= ~(SLLC_PERF_CTRL_EN); - writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + writel(val, sllc_pmu->base + regs->perf_ctrl); } static void hisi_sllc_pmu_enable_counter(struct hisi_pmu *sllc_pmu, struct hw_perf_event *hwc) { + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 val; - val = readl(sllc_pmu->base + SLLC_EVENT_CTRL); - val |= 1 << hwc->idx; - writel(val, sllc_pmu->base + SLLC_EVENT_CTRL); + val = readl(sllc_pmu->base + regs->event_ctrl); + val |= BIT_ULL(hwc->idx); + writel(val, sllc_pmu->base + regs->event_ctrl); } static void hisi_sllc_pmu_disable_counter(struct hisi_pmu *sllc_pmu, struct hw_perf_event *hwc) { + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 val; - val = readl(sllc_pmu->base + SLLC_EVENT_CTRL); - val &= ~(1 << hwc->idx); - writel(val, sllc_pmu->base + SLLC_EVENT_CTRL); + val = readl(sllc_pmu->base + regs->event_ctrl); + val &= ~BIT_ULL(hwc->idx); + writel(val, sllc_pmu->base + regs->event_ctrl); } static void hisi_sllc_pmu_enable_counter_int(struct hisi_pmu *sllc_pmu, struct hw_perf_event *hwc) { + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 val; - val = readl(sllc_pmu->base + SLLC_INT_MASK); - /* Write 0 to enable interrupt */ - val &= ~(1 << hwc->idx); - writel(val, sllc_pmu->base + SLLC_INT_MASK); + val = readl(sllc_pmu->base + regs->int_mask); + val &= ~BIT_ULL(hwc->idx); + writel(val, sllc_pmu->base + regs->int_mask); } static void hisi_sllc_pmu_disable_counter_int(struct hisi_pmu *sllc_pmu, struct hw_perf_event *hwc) { + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; u32 val; - val = readl(sllc_pmu->base + SLLC_INT_MASK); - /* Write 1 to mask interrupt */ - val |= 1 << hwc->idx; - writel(val, sllc_pmu->base + SLLC_INT_MASK); + val = readl(sllc_pmu->base + regs->int_mask); + val |= BIT_ULL(hwc->idx); + writel(val, sllc_pmu->base + regs->int_mask); } static u32 hisi_sllc_pmu_get_int_status(struct hisi_pmu *sllc_pmu) { - return readl(sllc_pmu->base + SLLC_INT_STATUS); + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; + + return readl(sllc_pmu->base + regs->int_status); } static void hisi_sllc_pmu_clear_int_status(struct hisi_pmu *sllc_pmu, int idx) { - writel(1 << idx, sllc_pmu->base + SLLC_INT_CLEAR); -} + struct hisi_sllc_pmu_regs *regs = sllc_pmu->dev_info->private; -static const struct acpi_device_id hisi_sllc_pmu_acpi_match[] = { - { "HISI0263", }, - {} -}; -MODULE_DEVICE_TABLE(acpi, hisi_sllc_pmu_acpi_match); + writel(BIT_ULL(idx), sllc_pmu->base + regs->int_clear); +} static int hisi_sllc_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *sllc_pmu) { + struct hisi_sllc_pmu_regs *regs; + hisi_uncore_pmu_init_topology(sllc_pmu, &pdev->dev); /* @@ -304,13 +330,18 @@ static int hisi_sllc_pmu_init_data(struct platform_device *pdev, return -EINVAL; } + sllc_pmu->dev_info = device_get_match_data(&pdev->dev); + if (!sllc_pmu->dev_info) + return -ENODEV; + sllc_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(sllc_pmu->base)) { dev_err(&pdev->dev, "ioremap failed for sllc_pmu resource.\n"); return PTR_ERR(sllc_pmu->base); } - sllc_pmu->identifier = readl(sllc_pmu->base + SLLC_VERSION); + regs = sllc_pmu->dev_info->private; + sllc_pmu->identifier = readl(sllc_pmu->base + regs->version); return 0; } @@ -352,6 +383,27 @@ static const struct attribute_group *hisi_sllc_pmu_v2_attr_groups[] = { NULL }; +static struct hisi_sllc_pmu_regs hisi_sllc_v2_pmu_regs = { + .int_mask = SLLC_INT_MASK, + .int_clear = SLLC_INT_CLEAR, + .int_status = SLLC_INT_STATUS, + .perf_ctrl = SLLC_PERF_CTRL, + .srcid_ctrl = SLLC_SRCID_CTRL, + .srcid_cmd_shift = SLLC_SRCID_CMD_SHIFT, + .srcid_mask_shift = SLLC_SRCID_MSK_SHIFT, + .tgtid_ctrl = SLLC_TGTID_CTRL, + .tgtid_min_shift = SLLC_TGTID_MIN_SHIFT, + .tgtid_max_shift = SLLC_TGTID_MAX_SHIFT, + .event_ctrl = SLLC_EVENT_CTRL, + .event_type0 = SLLC_EVENT_TYPE0, + .version = SLLC_VERSION, + .event_cnt0 = SLLC_EVENT_CNT0_L, +}; + +static const struct hisi_pmu_dev_info hisi_sllc_v2 = { + .private = &hisi_sllc_v2_pmu_regs, +}; + static const struct hisi_uncore_ops hisi_uncore_sllc_ops = { .write_evtype = hisi_sllc_pmu_write_evtype, .get_event_idx = hisi_uncore_pmu_get_event_idx, @@ -444,6 +496,12 @@ static int hisi_sllc_pmu_remove(struct platform_device *pdev) return 0; } +static const struct acpi_device_id hisi_sllc_pmu_acpi_match[] = { + { "HISI0263", (kernel_ulong_t)&hisi_sllc_v2 }, + {} +}; +MODULE_DEVICE_TABLE(acpi, hisi_sllc_pmu_acpi_match); + static struct platform_driver hisi_sllc_pmu_driver = { .driver = { .name = "hisi_sllc_pmu", From f25cc8fe039552c8461525380cc889f2f4aac739 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Fri, 11 Apr 2025 19:14:04 +0800 Subject: [PATCH 39/95] drivers/perf: hisi: Add support for HiSilicon SLLC v3 PMU driver [Upstream commit 1fd20ba0a1dcaf3bf8757c0e0b8ff754ab25b228] SLLC v3 PMU has the following changes compared to previous version: a) update the register layout b) update the definition of SRCID_CTRL and TGTID_CTRL registers. To be compatible with v2, we use maximum width (11 bits) and mask the extra length for themselves. c) remove latency events (driver does not need to be adapted). SLLC v3 PMU is identified with HID HISI0264. Signed-off-by: Junhao He Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index dd17db7adc0e5..ca0e422adca7d 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -28,6 +28,18 @@ #define SLLC_VERSION 0x1cf0 #define SLLC_EVENT_CNT0_L 0x1d00 +/* SLLC registers definition in v3 */ +#define SLLC_V3_INT_MASK 0x6834 +#define SLLC_V3_INT_STATUS 0x6838 +#define SLLC_V3_INT_CLEAR 0x683c +#define SLLC_V3_VERSION 0x6c00 +#define SLLC_V3_PERF_CTRL 0x6d00 +#define SLLC_V3_SRCID_CTRL 0x6d04 +#define SLLC_V3_TGTID_CTRL 0x6d08 +#define SLLC_V3_EVENT_CTRL 0x6d14 +#define SLLC_V3_EVENT_TYPE0 0x6d18 +#define SLLC_V3_EVENT_CNT0_L 0x6e00 + #define SLLC_EVTYPE_MASK 0xff #define SLLC_PERF_CTRL_EN BIT(0) #define SLLC_FILT_EN BIT(1) @@ -40,6 +52,12 @@ #define SLLC_TGTID_MAX_SHIFT 12 #define SLLC_SRCID_CMD_SHIFT 1 #define SLLC_SRCID_MSK_SHIFT 12 + +#define SLLC_V3_TGTID_MIN_SHIFT 1 +#define SLLC_V3_TGTID_MAX_SHIFT 10 +#define SLLC_V3_SRCID_CMD_SHIFT 1 +#define SLLC_V3_SRCID_MSK_SHIFT 10 + #define SLLC_NR_EVENTS 0x80 #define SLLC_EVENT_CNTn(cnt0, n) ((cnt0) + (n) * 8) @@ -404,6 +422,27 @@ static const struct hisi_pmu_dev_info hisi_sllc_v2 = { .private = &hisi_sllc_v2_pmu_regs, }; +static struct hisi_sllc_pmu_regs hisi_sllc_v3_pmu_regs = { + .int_mask = SLLC_V3_INT_MASK, + .int_clear = SLLC_V3_INT_CLEAR, + .int_status = SLLC_V3_INT_STATUS, + .perf_ctrl = SLLC_V3_PERF_CTRL, + .srcid_ctrl = SLLC_V3_SRCID_CTRL, + .srcid_cmd_shift = SLLC_V3_SRCID_CMD_SHIFT, + .srcid_mask_shift = SLLC_V3_SRCID_MSK_SHIFT, + .tgtid_ctrl = SLLC_V3_TGTID_CTRL, + .tgtid_min_shift = SLLC_V3_TGTID_MIN_SHIFT, + .tgtid_max_shift = SLLC_V3_TGTID_MAX_SHIFT, + .event_ctrl = SLLC_V3_EVENT_CTRL, + .event_type0 = SLLC_V3_EVENT_TYPE0, + .version = SLLC_V3_VERSION, + .event_cnt0 = SLLC_V3_EVENT_CNT0_L, +}; + +static const struct hisi_pmu_dev_info hisi_sllc_v3 = { + .private = &hisi_sllc_v3_pmu_regs, +}; + static const struct hisi_uncore_ops hisi_uncore_sllc_ops = { .write_evtype = hisi_sllc_pmu_write_evtype, .get_event_idx = hisi_uncore_pmu_get_event_idx, @@ -498,6 +537,7 @@ static int hisi_sllc_pmu_remove(struct platform_device *pdev) static const struct acpi_device_id hisi_sllc_pmu_acpi_match[] = { { "HISI0263", (kernel_ulong_t)&hisi_sllc_v2 }, + { "HISI0264", (kernel_ulong_t)&hisi_sllc_v3 }, {} }; MODULE_DEVICE_TABLE(acpi, hisi_sllc_pmu_acpi_match); From c55ac54a41af53d077d20f68f63918b357ad39d2 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Fri, 11 Apr 2025 19:14:05 +0800 Subject: [PATCH 40/95] drivers/perf: hisi: Relax the event number check of v2 PMUs [Upstream commit 35f5b36e8cc2d241083ee0f08fa8b5366bde6f22] The supported event number range of each Uncore PMUs is provided by each driver in hisi_pmu::check_event and out of range events will be rejected. A later version with expanded event number range needs to register the PMU with updated hisi_pmu::check_event even if it's the only update, which means the expanded events cannot be used unless the driver's updated. However the unsupported events won't be counted by the hardware so we can relax the event number check to allow the use the expanded events. Signed-off-by: Junhao He Signed-off-by: Yicong Yang Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 6 +++--- drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index ccf8aa399de82..b22775f6f727d 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -53,7 +53,7 @@ #define DDRC_V1_PERF_CTRL_EN 0x2 #define DDRC_V2_PERF_CTRL_EN 0x1 #define DDRC_V1_NR_EVENTS 0x7 -#define DDRC_V2_NR_EVENTS 0x90 +#define DDRC_V2_NR_EVENTS 0xFF #define DDRC_EVENT_CNTn(base, n) ((base) + (n) * 8) #define DDRC_EVENT_TYPEn(base, n) ((base) + (n) * 4) diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index cb928b450b97b..a0a467cc2f49e 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -47,9 +47,9 @@ #define HHA_SRCID_CMD GENMASK(16, 6) #define HHA_SRCID_MSK GENMASK(30, 20) #define HHA_DATSRC_SKT_EN BIT(23) -#define HHA_EVTYPE_NONE 0xff +#define HHA_EVTYPE_MASK GENMASK(7, 0) #define HHA_V1_NR_EVENT 0x65 -#define HHA_V2_NR_EVENT 0xCE +#define HHA_V2_NR_EVENT 0xFF HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_cmd, config1, 10, 0); HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_msk, config1, 21, 11); @@ -197,7 +197,7 @@ static void hisi_hha_pmu_write_evtype(struct hisi_pmu *hha_pmu, int idx, /* Write event code to HHA_EVENT_TYPEx register */ val = readl(hha_pmu->base + reg); - val &= ~(HHA_EVTYPE_NONE << shift); + val &= ~(HHA_EVTYPE_MASK << shift); val |= (type << shift); writel(val, hha_pmu->base + reg); } diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index b78515ab7556e..bf79ef2885647 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -440,7 +440,7 @@ static int hisi_pa_pmu_dev_probe(struct platform_device *pdev, pa_pmu->pmu_events.attr_groups = pa_pmu->dev_info->attr_groups; pa_pmu->num_counters = PA_NR_COUNTERS; pa_pmu->ops = &hisi_uncore_pa_ops; - pa_pmu->check_event = 0xB0; + pa_pmu->check_event = PA_EVTYPE_MASK; pa_pmu->counter_bits = 64; pa_pmu->dev = &pdev->dev; pa_pmu->on_cpu = -1; diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index ca0e422adca7d..020bea8c6c7bb 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -58,7 +58,7 @@ #define SLLC_V3_SRCID_CMD_SHIFT 1 #define SLLC_V3_SRCID_MSK_SHIFT 10 -#define SLLC_NR_EVENTS 0x80 +#define SLLC_NR_EVENTS 0xff #define SLLC_EVENT_CNTn(cnt0, n) ((cnt0) + (n) * 8) HISI_PMU_EVENT_ATTR_EXTRACTOR(tgtid_min, config1, 10, 0); From 2ba1dc28fd411a14d24b91502f8e23f4d10ea0f6 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 11 Apr 2025 19:14:06 +0800 Subject: [PATCH 41/95] drivers/perf: hisi: Support PMUs with no interrupt [Upstream commit e480898e767c54a883e965fc306e2ece013cbca5] We'll have PMUs don't have an interrupt to indicate the counter overflow, but the Uncore PMU core assume all the PMUs have interrupt. So handle this case in the core. The existing PMUs won't be affected. Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- drivers/perf/hisilicon/hisi_uncore_pmu.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 94e122605b8c6..0d826b9bd5c70 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -524,7 +524,9 @@ int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) return 0; hisi_pmu->on_cpu = cpumask_local_spread(0, dev_to_node(hisi_pmu->dev)); - WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu))); + if (hisi_pmu->irq > 0) + WARN_ON(irq_set_affinity(hisi_pmu->irq, + cpumask_of(hisi_pmu->on_cpu))); return 0; } @@ -539,7 +541,8 @@ int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) hisi_pmu->on_cpu = cpu; /* Overflow interrupt also should use the same CPU */ - WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu))); + if (hisi_pmu->irq > 0) + WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu))); return 0; } @@ -573,7 +576,9 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) perf_pmu_migrate_context(&hisi_pmu->pmu, cpu, target); /* Use this CPU for event counting */ hisi_pmu->on_cpu = target; - WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(target))); + + if (hisi_pmu->irq > 0) + WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(target))); return 0; } From 6f634a8f44f9af87a5ac6ca9c68bbcf0d78fb1fe Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 11 Apr 2025 19:14:07 +0800 Subject: [PATCH 42/95] drivers/perf: hisi: Add support for HiSilicon NoC PMU [Upstream commit e31c0eb10388e2af0502b77e61453efbc8a4f974] Adds the support for HiSilicon NoC (Network on Chip) PMU which will be used to monitor the events on the system bus. The PMU device will be named after the SCL ID (either Super CPU cluster or Super IO cluster) and the index ID, just similar to other HiSilicon Uncore PMUs. Below PMU formats are provided besides the event: - ch: the transaction channel (data, request, response, etc) which can be used to filter the counting. - tt_en: tracetag filtering enable. Just as other HiSilicon Uncore PMUs the NoC PMU supports only counting the transactions with tracetag. The NoC PMU doesn't have an interrupt to indicate the overflow. However we have a 64 bit counter which is large enough and it's nearly impossible to overflow. Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: Will Deacon Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- Documentation/admin-guide/perf/hisi-pmu.rst | 11 + drivers/perf/hisilicon/Makefile | 3 +- drivers/perf/hisilicon/hisi_uncore_noc_pmu.c | 392 +++++++++++++++++++ 3 files changed, 405 insertions(+), 1 deletion(-) create mode 100644 drivers/perf/hisilicon/hisi_uncore_noc_pmu.c diff --git a/Documentation/admin-guide/perf/hisi-pmu.rst b/Documentation/admin-guide/perf/hisi-pmu.rst index adb67311c8113..102ea54fd64c3 100644 --- a/Documentation/admin-guide/perf/hisi-pmu.rst +++ b/Documentation/admin-guide/perf/hisi-pmu.rst @@ -113,6 +113,17 @@ uring channel. It is 2 bits. Some important codes are as follows: - 2'b00: default value, count the events which sent to the both uring and uring_ext channel; +6. ch: NoC PMU supports filtering the event counts of certain transaction +channel with this option. The current supported channels are as follows: + +- 3'b010: Request channel +- 3'b100: Snoop channel +- 3'b110: Response channel +- 3'b111: Data channel + +7. tt_en: NoC PMU supports counting only transactions that have tracetag set +if this option is set. See the 2nd list for more information about tracetag. + Users could configure IDs to count data come from specific CCL/ICL, by setting srcid_cmd & srcid_msk, and data desitined for specific CCL/ICL by setting tgtid_cmd & tgtid_msk. A set bit in srcid_msk/tgtid_msk means the PMU will not diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile index 48dcc8381ea75..dcec8f39719d0 100644 --- a/drivers/perf/hisilicon/Makefile +++ b/drivers/perf/hisilicon/Makefile @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \ hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o \ - hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o hisi_uncore_uc_pmu.o + hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o hisi_uncore_uc_pmu.o \ + hisi_uncore_noc_pmu.o obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o obj-$(CONFIG_HNS3_PMU) += hns3_pmu.o diff --git a/drivers/perf/hisilicon/hisi_uncore_noc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_noc_pmu.c new file mode 100644 index 0000000000000..bf0a6bce71c43 --- /dev/null +++ b/drivers/perf/hisilicon/hisi_uncore_noc_pmu.c @@ -0,0 +1,392 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Driver for HiSilicon Uncore NoC (Network on Chip) PMU device + * + * Copyright (c) 2025 HiSilicon Technologies Co., Ltd. + * Author: Yicong Yang + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hisi_uncore_pmu.h" + +#define NOC_PMU_VERSION 0x1e00 +#define NOC_PMU_GLOBAL_CTRL 0x1e04 +#define NOC_PMU_GLOBAL_CTRL_PMU_EN BIT(0) +#define NOC_PMU_GLOBAL_CTRL_TT_EN BIT(1) +#define NOC_PMU_CNT_INFO 0x1e08 +#define NOC_PMU_CNT_INFO_OVERFLOW(n) BIT(n) +#define NOC_PMU_EVENT_CTRL(n) (0x1e20 + 4 * (n)) +#define NOC_PMU_EVENT_CTRL_TYPE GENMASK(4, 0) +/* + * Note channel of 0x0 will reset the counter value, so don't do it before + * we read out the counter. + */ +#define NOC_PMU_EVENT_CTRL_CHANNEL GENMASK(10, 8) +#define NOC_PMU_EVENT_CTRL_EN BIT(11) +#define NOC_PMU_EVENT_COUNTER(n) (0x1e80 + 8 * (n)) + +#define NOC_PMU_NR_COUNTERS 4 +#define NOC_PMU_COUNTER_BITS 64 + +#define NOC_PMU_CH_DEFAULT 0x7 + +HISI_PMU_EVENT_ATTR_EXTRACTOR(ch, config1, 2, 0); +HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_en, config1, 3, 3); + +/* Dynamic CPU hotplug state used by this PMU driver */ +static enum cpuhp_state hisi_noc_pmu_cpuhp_state; + +/* + * Tracetag filtering is not per event and all the events should keep + * the consistence. Return true if the new comer doesn't match the + * tracetag filtering configuration of the current scheduled events. + */ +static bool hisi_noc_pmu_check_global_filter(struct perf_event *curr, + struct perf_event *new) +{ + return hisi_get_tt_en(curr) == hisi_get_tt_en(new); +} + +static void hisi_noc_pmu_write_evtype(struct hisi_pmu *noc_pmu, int idx, u32 type) +{ + u32 reg; + + reg = readl(noc_pmu->base + NOC_PMU_EVENT_CTRL(idx)); + reg &= ~NOC_PMU_EVENT_CTRL_TYPE; + reg |= FIELD_PREP(NOC_PMU_EVENT_CTRL_TYPE, type); + writel(reg, noc_pmu->base + NOC_PMU_EVENT_CTRL(idx)); +} + +static int hisi_noc_pmu_get_event_idx(struct perf_event *event) +{ + struct hisi_pmu *noc_pmu = to_hisi_pmu(event->pmu); + struct hisi_pmu_hwevents *pmu_events = &noc_pmu->pmu_events; + int cur_idx; + + cur_idx = find_first_bit(pmu_events->used_mask, noc_pmu->num_counters); + if (cur_idx != noc_pmu->num_counters && + !hisi_noc_pmu_check_global_filter(pmu_events->hw_events[cur_idx], event)) + return -EAGAIN; + + return hisi_uncore_pmu_get_event_idx(event); +} + +static u64 hisi_noc_pmu_read_counter(struct hisi_pmu *noc_pmu, + struct hw_perf_event *hwc) +{ + return readq(noc_pmu->base + NOC_PMU_EVENT_COUNTER(hwc->idx)); +} + +static void hisi_noc_pmu_write_counter(struct hisi_pmu *noc_pmu, + struct hw_perf_event *hwc, u64 val) +{ + writeq(val, noc_pmu->base + NOC_PMU_EVENT_COUNTER(hwc->idx)); +} + +static void hisi_noc_pmu_enable_counter(struct hisi_pmu *noc_pmu, + struct hw_perf_event *hwc) +{ + u32 reg; + + reg = readl(noc_pmu->base + NOC_PMU_EVENT_CTRL(hwc->idx)); + reg |= NOC_PMU_EVENT_CTRL_EN; + writel(reg, noc_pmu->base + NOC_PMU_EVENT_CTRL(hwc->idx)); +} + +static void hisi_noc_pmu_disable_counter(struct hisi_pmu *noc_pmu, + struct hw_perf_event *hwc) +{ + u32 reg; + + reg = readl(noc_pmu->base + NOC_PMU_EVENT_CTRL(hwc->idx)); + reg &= ~NOC_PMU_EVENT_CTRL_EN; + writel(reg, noc_pmu->base + NOC_PMU_EVENT_CTRL(hwc->idx)); +} + +static void hisi_noc_pmu_enable_counter_int(struct hisi_pmu *noc_pmu, + struct hw_perf_event *hwc) +{ + /* We don't support interrupt, so a stub here. */ +} + +static void hisi_noc_pmu_disable_counter_int(struct hisi_pmu *noc_pmu, + struct hw_perf_event *hwc) +{ +} + +static void hisi_noc_pmu_start_counters(struct hisi_pmu *noc_pmu) +{ + u32 reg; + + reg = readl(noc_pmu->base + NOC_PMU_GLOBAL_CTRL); + reg |= NOC_PMU_GLOBAL_CTRL_PMU_EN; + writel(reg, noc_pmu->base + NOC_PMU_GLOBAL_CTRL); +} + +static void hisi_noc_pmu_stop_counters(struct hisi_pmu *noc_pmu) +{ + u32 reg; + + reg = readl(noc_pmu->base + NOC_PMU_GLOBAL_CTRL); + reg &= ~NOC_PMU_GLOBAL_CTRL_PMU_EN; + writel(reg, noc_pmu->base + NOC_PMU_GLOBAL_CTRL); +} + +static u32 hisi_noc_pmu_get_int_status(struct hisi_pmu *noc_pmu) +{ + return readl(noc_pmu->base + NOC_PMU_CNT_INFO); +} + +static void hisi_noc_pmu_clear_int_status(struct hisi_pmu *noc_pmu, int idx) +{ + writel(~NOC_PMU_CNT_INFO_OVERFLOW(idx), noc_pmu->base + NOC_PMU_CNT_INFO); +} + +static void hisi_noc_pmu_enable_filter(struct perf_event *event) +{ + struct hisi_pmu *noc_pmu = to_hisi_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + u32 tt_en = hisi_get_tt_en(event); + u32 ch = hisi_get_ch(event); + u32 reg; + + if (!ch) + ch = NOC_PMU_CH_DEFAULT; + + reg = readl(noc_pmu->base + NOC_PMU_EVENT_CTRL(hwc->idx)); + reg &= ~NOC_PMU_EVENT_CTRL_CHANNEL; + reg |= FIELD_PREP(NOC_PMU_EVENT_CTRL_CHANNEL, ch); + writel(reg, noc_pmu->base + NOC_PMU_EVENT_CTRL(hwc->idx)); + + /* + * Since tracetag filter applies to all the counters, don't touch it + * if user doesn't specify it explicitly. + */ + if (tt_en) { + reg = readl(noc_pmu->base + NOC_PMU_GLOBAL_CTRL); + reg |= NOC_PMU_GLOBAL_CTRL_TT_EN; + writel(reg, noc_pmu->base + NOC_PMU_GLOBAL_CTRL); + } +} + +static void hisi_noc_pmu_disable_filter(struct perf_event *event) +{ + struct hisi_pmu *noc_pmu = to_hisi_pmu(event->pmu); + u32 tt_en = hisi_get_tt_en(event); + u32 reg; + + /* + * If we're not the last counter, don't touch the global tracetag + * configuration. + */ + if (bitmap_weight(noc_pmu->pmu_events.used_mask, noc_pmu->num_counters) > 1) + return; + + if (tt_en) { + reg = readl(noc_pmu->base + NOC_PMU_GLOBAL_CTRL); + reg &= ~NOC_PMU_GLOBAL_CTRL_TT_EN; + writel(reg, noc_pmu->base + NOC_PMU_GLOBAL_CTRL); + } +} + +static const struct hisi_uncore_ops hisi_uncore_noc_ops = { + .write_evtype = hisi_noc_pmu_write_evtype, + .get_event_idx = hisi_noc_pmu_get_event_idx, + .read_counter = hisi_noc_pmu_read_counter, + .write_counter = hisi_noc_pmu_write_counter, + .enable_counter = hisi_noc_pmu_enable_counter, + .disable_counter = hisi_noc_pmu_disable_counter, + .enable_counter_int = hisi_noc_pmu_enable_counter_int, + .disable_counter_int = hisi_noc_pmu_disable_counter_int, + .start_counters = hisi_noc_pmu_start_counters, + .stop_counters = hisi_noc_pmu_stop_counters, + .get_int_status = hisi_noc_pmu_get_int_status, + .clear_int_status = hisi_noc_pmu_clear_int_status, + .enable_filter = hisi_noc_pmu_enable_filter, + .disable_filter = hisi_noc_pmu_disable_filter, +}; + +static struct attribute *hisi_noc_pmu_format_attrs[] = { + HISI_PMU_FORMAT_ATTR(event, "config:0-7"), + HISI_PMU_FORMAT_ATTR(ch, "config1:0-2"), + HISI_PMU_FORMAT_ATTR(tt_en, "config1:3"), + NULL +}; + +static const struct attribute_group hisi_noc_pmu_format_group = { + .name = "format", + .attrs = hisi_noc_pmu_format_attrs, +}; + +static struct attribute *hisi_noc_pmu_events_attrs[] = { + HISI_PMU_EVENT_ATTR(cycles, 0x0e), + /* Flux on/off the ring */ + HISI_PMU_EVENT_ATTR(ingress_flow_sum, 0x1a), + HISI_PMU_EVENT_ATTR(egress_flow_sum, 0x17), + /* Buffer full duration on/off the ring */ + HISI_PMU_EVENT_ATTR(ingress_buf_full, 0x19), + HISI_PMU_EVENT_ATTR(egress_buf_full, 0x12), + /* Failure packets count on/off the ring */ + HISI_PMU_EVENT_ATTR(cw_ingress_fail, 0x01), + HISI_PMU_EVENT_ATTR(cc_ingress_fail, 0x09), + HISI_PMU_EVENT_ATTR(cw_egress_fail, 0x03), + HISI_PMU_EVENT_ATTR(cc_egress_fail, 0x0b), + /* Flux of the ring */ + HISI_PMU_EVENT_ATTR(cw_main_flow_sum, 0x05), + HISI_PMU_EVENT_ATTR(cc_main_flow_sum, 0x0d), + NULL +}; + +static const struct attribute_group hisi_noc_pmu_events_group = { + .name = "events", + .attrs = hisi_noc_pmu_events_attrs, +}; + +static const struct attribute_group *hisi_noc_pmu_attr_groups[] = { + &hisi_noc_pmu_format_group, + &hisi_noc_pmu_events_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, + NULL +}; + +static int hisi_noc_pmu_dev_init(struct platform_device *pdev, struct hisi_pmu *noc_pmu) +{ + hisi_uncore_pmu_init_topology(noc_pmu, &pdev->dev); + + if (noc_pmu->topo.scl_id < 0) + return dev_err_probe(&pdev->dev, -EINVAL, "failed to get scl-id\n"); + + if (noc_pmu->topo.index_id < 0) + return dev_err_probe(&pdev->dev, -EINVAL, "failed to get idx-id\n"); + + if (noc_pmu->topo.sub_id < 0) + return dev_err_probe(&pdev->dev, -EINVAL, "failed to get sub-id\n"); + + noc_pmu->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(noc_pmu->base)) + return dev_err_probe(&pdev->dev, PTR_ERR(noc_pmu->base), + "fail to remap io memory\n"); + + noc_pmu->on_cpu = -1; + noc_pmu->dev = &pdev->dev; + noc_pmu->ops = &hisi_uncore_noc_ops; + noc_pmu->pmu_events.attr_groups = hisi_noc_pmu_attr_groups; + noc_pmu->num_counters = NOC_PMU_NR_COUNTERS; + noc_pmu->counter_bits = NOC_PMU_COUNTER_BITS; + noc_pmu->check_event = NOC_PMU_EVENT_CTRL_TYPE; + noc_pmu->identifier = readl(noc_pmu->base + NOC_PMU_VERSION); + return 0; +} + +static void hisi_noc_pmu_remove_cpuhp_instance(void *hotplug_node) +{ + cpuhp_state_remove_instance_nocalls(hisi_noc_pmu_cpuhp_state, hotplug_node); +} + +static void hisi_noc_pmu_unregister_pmu(void *pmu) +{ + perf_pmu_unregister(pmu); +} + +static int hisi_noc_pmu_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct hisi_pmu *noc_pmu; + char *name; + int ret; + + noc_pmu = devm_kzalloc(dev, sizeof(*noc_pmu), GFP_KERNEL); + if (!noc_pmu) + return -ENOMEM; + + /* + * HiSilicon Uncore PMU framework needs to get common hisi_pmu device + * from device's drvdata. + */ + platform_set_drvdata(pdev, noc_pmu); + + ret = hisi_noc_pmu_dev_init(pdev, noc_pmu); + if (ret) + return ret; + + ret = cpuhp_state_add_instance(hisi_noc_pmu_cpuhp_state, &noc_pmu->node); + if (ret) + return dev_err_probe(dev, ret, "Fail to register cpuhp instance\n"); + + ret = devm_add_action_or_reset(dev, hisi_noc_pmu_remove_cpuhp_instance, + &noc_pmu->node); + if (ret) + return ret; + + hisi_pmu_init(noc_pmu, THIS_MODULE); + + name = devm_kasprintf(dev, GFP_KERNEL, "hisi_scl%d_noc%d_%d", + noc_pmu->topo.scl_id, noc_pmu->topo.index_id, + noc_pmu->topo.sub_id); + if (!name) + return -ENOMEM; + + ret = perf_pmu_register(&noc_pmu->pmu, name, -1); + if (ret) + return dev_err_probe(dev, ret, "Fail to register PMU\n"); + + return devm_add_action_or_reset(dev, hisi_noc_pmu_unregister_pmu, + &noc_pmu->pmu); +} + +const struct acpi_device_id hisi_noc_pmu_ids[] = { + { "HISI04E0", }, + { } +}; +MODULE_DEVICE_TABLE(acpi, hisi_noc_pmu_ids); + +static struct platform_driver hisi_noc_pmu_driver = { + .driver = { + .name = "hisi_noc_pmu", + .acpi_match_table = hisi_noc_pmu_ids, + .suppress_bind_attrs = true, + }, + .probe = hisi_noc_pmu_probe, +}; + +static int __init hisi_noc_pmu_module_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "perf/hisi/noc:online", + hisi_uncore_pmu_online_cpu, + hisi_uncore_pmu_offline_cpu); + if (ret < 0) { + pr_err("hisi_noc_pmu: Fail to setup cpuhp callbacks, ret = %d\n", ret); + return ret; + } + hisi_noc_pmu_cpuhp_state = ret; + + ret = platform_driver_register(&hisi_noc_pmu_driver); + if (ret) + cpuhp_remove_multi_state(hisi_noc_pmu_cpuhp_state); + + return ret; +} +module_init(hisi_noc_pmu_module_init); + +static void __exit hisi_noc_pmu_module_exit(void) +{ + platform_driver_unregister(&hisi_noc_pmu_driver); + cpuhp_remove_multi_state(hisi_noc_pmu_cpuhp_state); +} +module_exit(hisi_noc_pmu_module_exit); + +MODULE_IMPORT_NS(HISI_PMU); +MODULE_DESCRIPTION("HiSilicon SoC Uncore NoC PMU driver"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Yicong Yang "); From 45c30545141c9dc181267e82da4b56ef20025225 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Fri, 11 Apr 2025 19:14:08 +0800 Subject: [PATCH 43/95] drivers/perf: hisi: Add support for HiSilicon MN PMU driver [Upstream commit 2257798498b3b069e5ff46ad957c32a9a06b5fc9] MN (Miscellaneous Node) is a hybrid node in ARM CHI. It broadcasts the following two types of requests: DVM operations and PCIe configuration. MN PMU devices exist on both SCCL and SICL, so we named the MN pmu driver after SCL (Super cluster) ID. The MN PMU driver using the HiSilicon uncore PMU framework. And only the event parameter is supported. Signed-off-by: Junhao He Reviewed-by: Jonathan Cameron Signed-off-by: Yicong Yang Signed-off-by: Will Deacon Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- drivers/perf/hisilicon/Makefile | 2 +- drivers/perf/hisilicon/hisi_uncore_mn_pmu.c | 355 ++++++++++++++++++++ 2 files changed, 356 insertions(+), 1 deletion(-) create mode 100644 drivers/perf/hisilicon/hisi_uncore_mn_pmu.c diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile index dcec8f39719d0..186be3d02238b 100644 --- a/drivers/perf/hisilicon/Makefile +++ b/drivers/perf/hisilicon/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \ hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o \ hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o hisi_uncore_uc_pmu.o \ - hisi_uncore_noc_pmu.o + hisi_uncore_noc_pmu.o hisi_uncore_mn_pmu.o obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o obj-$(CONFIG_HNS3_PMU) += hns3_pmu.o diff --git a/drivers/perf/hisilicon/hisi_uncore_mn_pmu.c b/drivers/perf/hisilicon/hisi_uncore_mn_pmu.c new file mode 100644 index 0000000000000..bad06cac3f8a3 --- /dev/null +++ b/drivers/perf/hisilicon/hisi_uncore_mn_pmu.c @@ -0,0 +1,355 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * HiSilicon SoC MN uncore Hardware event counters support + * + * Copyright (c) 2025 HiSilicon Technologies Co., Ltd. + */ +#include +#include +#include +#include +#include +#include +#include + +#include "hisi_uncore_pmu.h" + +/* Dynamic CPU hotplug state used by MN PMU */ +static enum cpuhp_state hisi_mn_pmu_online; + +/* MN register definition */ +#define HISI_MN_DYNAMIC_CTRL 0x400 +#define HISI_MN_DYNAMIC_CTRL_EN BIT(0) +#define HISI_MN_PERF_CTRL_REG 0x408 +#define HISI_MN_PERF_CTRL_EN BIT(6) +#define HISI_MN_INT_MASK_REG 0x800 +#define HISI_MN_INT_STATUS_REG 0x808 +#define HISI_MN_INT_CLEAR_REG 0x80C +#define HISI_MN_EVENT_CTRL_REG 0x1C00 +#define HISI_MN_VERSION_REG 0x1C04 +#define HISI_MN_EVTYPE_REGn(n) (0x1d00 + (n) * 4) +#define HISI_MN_EVTYPE_MASK GENMASK(7, 0) +#define HISI_MN_CNTR_REGn(n) (0x1e00 + (n) * 8) + +#define HISI_MN_NR_COUNTERS 4 +#define HISI_MN_COUNTER_BITS 48 +#define HISI_MN_TIMEOUT_US 500U + +/* + * Each event request takes a certain amount of time to complete. If + * we counting the latency related event, we need to wait for the all + * requests complete. Otherwise, the value of counter is slightly larger. + */ +static void hisi_mn_pmu_counter_flush(struct hisi_pmu *mn_pmu) +{ + int ret; + u32 val; + + val = readl(mn_pmu->base + HISI_MN_DYNAMIC_CTRL); + val |= HISI_MN_DYNAMIC_CTRL_EN; + writel(val, mn_pmu->base + HISI_MN_DYNAMIC_CTRL); + + ret = readl_poll_timeout_atomic(mn_pmu->base + HISI_MN_DYNAMIC_CTRL, + val, !(val & HISI_MN_DYNAMIC_CTRL_EN), + 1, HISI_MN_TIMEOUT_US); + if (ret) + dev_warn(mn_pmu->dev, "Counter flush timeout\n"); +} + +static u64 hisi_mn_pmu_read_counter(struct hisi_pmu *mn_pmu, + struct hw_perf_event *hwc) +{ + return readq(mn_pmu->base + HISI_MN_CNTR_REGn(hwc->idx)); +} + +static void hisi_mn_pmu_write_counter(struct hisi_pmu *mn_pmu, + struct hw_perf_event *hwc, u64 val) +{ + writeq(val, mn_pmu->base + HISI_MN_CNTR_REGn(hwc->idx)); +} + +static void hisi_mn_pmu_write_evtype(struct hisi_pmu *mn_pmu, int idx, u32 type) +{ + u32 val; + + /* + * Select the appropriate event select register. + * There are 2 32-bit event select registers for the + * 8 hardware counters, each event code is 8-bit wide. + */ + val = readl(mn_pmu->base + HISI_MN_EVTYPE_REGn(idx / 4)); + val &= ~(HISI_MN_EVTYPE_MASK << HISI_PMU_EVTYPE_SHIFT(idx)); + val |= (type << HISI_PMU_EVTYPE_SHIFT(idx)); + writel(val, mn_pmu->base + HISI_MN_EVTYPE_REGn(idx / 4)); +} + +static void hisi_mn_pmu_start_counters(struct hisi_pmu *mn_pmu) +{ + u32 val; + + val = readl(mn_pmu->base + HISI_MN_PERF_CTRL_REG); + val |= HISI_MN_PERF_CTRL_EN; + writel(val, mn_pmu->base + HISI_MN_PERF_CTRL_REG); +} + +static void hisi_mn_pmu_stop_counters(struct hisi_pmu *mn_pmu) +{ + u32 val; + + val = readl(mn_pmu->base + HISI_MN_PERF_CTRL_REG); + val &= ~HISI_MN_PERF_CTRL_EN; + writel(val, mn_pmu->base + HISI_MN_PERF_CTRL_REG); + + hisi_mn_pmu_counter_flush(mn_pmu); +} + +static void hisi_mn_pmu_enable_counter(struct hisi_pmu *mn_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(mn_pmu->base + HISI_MN_EVENT_CTRL_REG); + val |= BIT(hwc->idx); + writel(val, mn_pmu->base + HISI_MN_EVENT_CTRL_REG); +} + +static void hisi_mn_pmu_disable_counter(struct hisi_pmu *mn_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(mn_pmu->base + HISI_MN_EVENT_CTRL_REG); + val &= ~BIT(hwc->idx); + writel(val, mn_pmu->base + HISI_MN_EVENT_CTRL_REG); +} + +static void hisi_mn_pmu_enable_counter_int(struct hisi_pmu *mn_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(mn_pmu->base + HISI_MN_INT_MASK_REG); + val &= ~BIT(hwc->idx); + writel(val, mn_pmu->base + HISI_MN_INT_MASK_REG); +} + +static void hisi_mn_pmu_disable_counter_int(struct hisi_pmu *mn_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(mn_pmu->base + HISI_MN_INT_MASK_REG); + val |= BIT(hwc->idx); + writel(val, mn_pmu->base + HISI_MN_INT_MASK_REG); +} + +static u32 hisi_mn_pmu_get_int_status(struct hisi_pmu *mn_pmu) +{ + return readl(mn_pmu->base + HISI_MN_INT_STATUS_REG); +} + +static void hisi_mn_pmu_clear_int_status(struct hisi_pmu *mn_pmu, int idx) +{ + writel(BIT(idx), mn_pmu->base + HISI_MN_INT_CLEAR_REG); +} + +static struct attribute *hisi_mn_pmu_format_attr[] = { + HISI_PMU_FORMAT_ATTR(event, "config:0-7"), + NULL +}; + +static const struct attribute_group hisi_mn_pmu_format_group = { + .name = "format", + .attrs = hisi_mn_pmu_format_attr, +}; + +static struct attribute *hisi_mn_pmu_events_attr[] = { + HISI_PMU_EVENT_ATTR(req_eobarrier_num, 0x00), + HISI_PMU_EVENT_ATTR(req_ecbarrier_num, 0x01), + HISI_PMU_EVENT_ATTR(req_dvmop_num, 0x02), + HISI_PMU_EVENT_ATTR(req_dvmsync_num, 0x03), + HISI_PMU_EVENT_ATTR(req_retry_num, 0x04), + HISI_PMU_EVENT_ATTR(req_writenosnp_num, 0x05), + HISI_PMU_EVENT_ATTR(req_readnosnp_num, 0x06), + HISI_PMU_EVENT_ATTR(snp_dvm_num, 0x07), + HISI_PMU_EVENT_ATTR(snp_dvmsync_num, 0x08), + HISI_PMU_EVENT_ATTR(l3t_req_dvm_num, 0x09), + HISI_PMU_EVENT_ATTR(l3t_req_dvmsync_num, 0x0A), + HISI_PMU_EVENT_ATTR(mn_req_dvm_num, 0x0B), + HISI_PMU_EVENT_ATTR(mn_req_dvmsync_num, 0x0C), + HISI_PMU_EVENT_ATTR(pa_req_dvm_num, 0x0D), + HISI_PMU_EVENT_ATTR(pa_req_dvmsync_num, 0x0E), + HISI_PMU_EVENT_ATTR(snp_dvm_latency, 0x80), + HISI_PMU_EVENT_ATTR(snp_dvmsync_latency, 0x81), + HISI_PMU_EVENT_ATTR(l3t_req_dvm_latency, 0x82), + HISI_PMU_EVENT_ATTR(l3t_req_dvmsync_latency, 0x83), + HISI_PMU_EVENT_ATTR(mn_req_dvm_latency, 0x84), + HISI_PMU_EVENT_ATTR(mn_req_dvmsync_latency, 0x85), + HISI_PMU_EVENT_ATTR(pa_req_dvm_latency, 0x86), + HISI_PMU_EVENT_ATTR(pa_req_dvmsync_latency, 0x87), + NULL +}; + +static const struct attribute_group hisi_mn_pmu_events_group = { + .name = "events", + .attrs = hisi_mn_pmu_events_attr, +}; + +static const struct attribute_group *hisi_mn_pmu_attr_groups[] = { + &hisi_mn_pmu_format_group, + &hisi_mn_pmu_events_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, + NULL +}; + +static const struct hisi_uncore_ops hisi_uncore_mn_ops = { + .write_evtype = hisi_mn_pmu_write_evtype, + .get_event_idx = hisi_uncore_pmu_get_event_idx, + .start_counters = hisi_mn_pmu_start_counters, + .stop_counters = hisi_mn_pmu_stop_counters, + .enable_counter = hisi_mn_pmu_enable_counter, + .disable_counter = hisi_mn_pmu_disable_counter, + .enable_counter_int = hisi_mn_pmu_enable_counter_int, + .disable_counter_int = hisi_mn_pmu_disable_counter_int, + .write_counter = hisi_mn_pmu_write_counter, + .read_counter = hisi_mn_pmu_read_counter, + .get_int_status = hisi_mn_pmu_get_int_status, + .clear_int_status = hisi_mn_pmu_clear_int_status, +}; + +static int hisi_mn_pmu_dev_init(struct platform_device *pdev, + struct hisi_pmu *mn_pmu) +{ + int ret; + + hisi_uncore_pmu_init_topology(mn_pmu, &pdev->dev); + + if (mn_pmu->topo.scl_id < 0) + return dev_err_probe(&pdev->dev, -EINVAL, + "Failed to read MN scl id\n"); + + if (mn_pmu->topo.index_id < 0) + return dev_err_probe(&pdev->dev, -EINVAL, + "Failed to read MN index id\n"); + + mn_pmu->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(mn_pmu->base)) + return dev_err_probe(&pdev->dev, PTR_ERR(mn_pmu->base), + "Failed to ioremap resource\n"); + + ret = hisi_uncore_pmu_init_irq(mn_pmu, pdev); + if (ret) + return ret; + + mn_pmu->on_cpu = -1; + mn_pmu->dev = &pdev->dev; + mn_pmu->ops = &hisi_uncore_mn_ops; + mn_pmu->pmu_events.attr_groups = hisi_mn_pmu_attr_groups; + mn_pmu->check_event = HISI_MN_EVTYPE_MASK; + mn_pmu->num_counters = HISI_MN_NR_COUNTERS; + mn_pmu->counter_bits = HISI_MN_COUNTER_BITS; + mn_pmu->identifier = readl(mn_pmu->base + HISI_MN_VERSION_REG); + return 0; +} + +static void hisi_mn_pmu_remove_cpuhp(void *hotplug_node) +{ + cpuhp_state_remove_instance_nocalls(hisi_mn_pmu_online, hotplug_node); +} + +static void hisi_mn_pmu_unregister(void *pmu) +{ + perf_pmu_unregister(pmu); +} + +static int hisi_mn_pmu_probe(struct platform_device *pdev) +{ + struct hisi_pmu *mn_pmu; + char *name; + int ret; + + mn_pmu = devm_kzalloc(&pdev->dev, sizeof(*mn_pmu), GFP_KERNEL); + if (!mn_pmu) + return -ENOMEM; + + platform_set_drvdata(pdev, mn_pmu); + + ret = hisi_mn_pmu_dev_init(pdev, mn_pmu); + if (ret) + return ret; + + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_scl%d_mn%d", + mn_pmu->topo.scl_id, mn_pmu->topo.index_id); + if (!name) + return -ENOMEM; + + ret = cpuhp_state_add_instance(hisi_mn_pmu_online, &mn_pmu->node); + if (ret) + return dev_err_probe(&pdev->dev, ret, "Failed to register cpu hotplug\n"); + + ret = devm_add_action_or_reset(&pdev->dev, hisi_mn_pmu_remove_cpuhp, &mn_pmu->node); + if (ret) + return ret; + + hisi_pmu_init(mn_pmu, THIS_MODULE); + + ret = perf_pmu_register(&mn_pmu->pmu, name, -1); + if (ret) + return dev_err_probe(mn_pmu->dev, ret, "Failed to register MN PMU\n"); + + return devm_add_action_or_reset(&pdev->dev, hisi_mn_pmu_unregister, &mn_pmu->pmu); +} + +static const struct acpi_device_id hisi_mn_pmu_acpi_match[] = { + { "HISI0222", }, + { } +}; +MODULE_DEVICE_TABLE(acpi, hisi_mn_pmu_acpi_match); + +static struct platform_driver hisi_mn_pmu_driver = { + .driver = { + .name = "hisi_mn_pmu", + .acpi_match_table = hisi_mn_pmu_acpi_match, + /* + * We have not worked out a safe bind/unbind process, + * Forcefully unbinding during sampling will lead to a + * kernel panic, so this is not supported yet. + */ + .suppress_bind_attrs = true, + }, + .probe = hisi_mn_pmu_probe, +}; + +static int __init hisi_mn_pmu_module_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "perf/hisi/mn:online", + hisi_uncore_pmu_online_cpu, + hisi_uncore_pmu_offline_cpu); + if (ret < 0) { + pr_err("hisi_mn_pmu: Failed to setup MN PMU hotplug: %d\n", ret); + return ret; + } + hisi_mn_pmu_online = ret; + + ret = platform_driver_register(&hisi_mn_pmu_driver); + if (ret) + cpuhp_remove_multi_state(hisi_mn_pmu_online); + + return ret; +} +module_init(hisi_mn_pmu_module_init); + +static void __exit hisi_mn_pmu_module_exit(void) +{ + platform_driver_unregister(&hisi_mn_pmu_driver); + cpuhp_remove_multi_state(hisi_mn_pmu_online); +} +module_exit(hisi_mn_pmu_module_exit); + +MODULE_IMPORT_NS(HISI_PMU); +MODULE_DESCRIPTION("HiSilicon SoC MN uncore PMU driver"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Junhao He "); From a08ce982a44dce518fd15efe994cebc8166e7b15 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Wed, 16 Apr 2025 15:04:42 +0800 Subject: [PATCH 44/95] drivers/perf: hisi: Export hisi_uncore_pmu_isr() commit 38cd3d496695927d10a0675517eabd88d96b75ad openEuler Currently Uncore PMU framework assume one PMU device only have one interrupt and will help register the interrupt handler. It cannot support a PMU with multiple interrupt resources. However the interrupt handling for the Uncore PMU maybe the same and could share the same interrupt handler. Export hisi_uncore_pmu_isr() to allow drivers register the irq handler by their own routine. Signed-off-by: Yicong Yang Signed-off-by: Slim6882 Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- drivers/perf/hisilicon/hisi_uncore_pmu.c | 3 ++- drivers/perf/hisilicon/hisi_uncore_pmu.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 0d826b9bd5c70..6d5a10d903b36 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -163,7 +163,7 @@ static void hisi_uncore_pmu_clear_event_idx(struct hisi_pmu *hisi_pmu, int idx) clear_bit(idx, hisi_pmu->pmu_events.used_mask); } -static irqreturn_t hisi_uncore_pmu_isr(int irq, void *data) +irqreturn_t hisi_uncore_pmu_isr(int irq, void *data) { struct hisi_pmu *hisi_pmu = data; struct perf_event *event; @@ -192,6 +192,7 @@ static irqreturn_t hisi_uncore_pmu_isr(int irq, void *data) return IRQ_HANDLED; } +EXPORT_SYMBOL_NS_GPL(hisi_uncore_pmu_isr, HISI_PMU); int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, struct platform_device *pdev) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 01dc5ef9c7668..000e4027b9694 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -167,6 +167,7 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node); ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, struct device_attribute *attr, char *page); +irqreturn_t hisi_uncore_pmu_isr(int irq, void *data); int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, struct platform_device *pdev); void hisi_uncore_pmu_init_topology(struct hisi_pmu *hisi_pmu, struct device *dev); From 4507e8afd27643f977fbd8b09152e8aef3d9f899 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Wed, 16 Apr 2025 15:04:43 +0800 Subject: [PATCH 45/95] drivers/perf: hisi: Simplify the probe process of each L3C PMU version commit e4003998b8e13554db94190a61cd2a312a59bc6c openEuler Version 1 and 2 of L3C PMU also use different HID. Make use of struct acpi_device_id::driver_data for version specific information rather than judge the version register. This will help to simplify the probe process and also a bit easier for extension. Signed-off-by: Yicong Yang Signed-off-by: Slim6882 Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 43 ++++++++++++-------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 4c74eb88e8f0e..298687d1e09ab 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -345,13 +345,6 @@ static void hisi_l3c_pmu_clear_int_status(struct hisi_pmu *l3c_pmu, int idx) writel(1 << idx, l3c_pmu->base + L3C_INT_CLEAR); } -static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = { - { "HISI0213", }, - { "HISI0214", }, - {} -}; -MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match); - static int hisi_l3c_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *l3c_pmu) { @@ -371,6 +364,10 @@ static int hisi_l3c_pmu_init_data(struct platform_device *pdev, return -EINVAL; } + l3c_pmu->dev_info = device_get_match_data(&pdev->dev); + if (!l3c_pmu->dev_info) + return -ENODEV; + l3c_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(l3c_pmu->base)) { dev_err(&pdev->dev, "ioremap failed for l3c_pmu resource\n"); @@ -457,6 +454,18 @@ static const struct attribute_group *hisi_l3c_pmu_v2_attr_groups[] = { NULL }; +static const struct hisi_pmu_dev_info hisi_l3c_pmu_v1 = { + .attr_groups = hisi_l3c_pmu_v1_attr_groups, + .counter_bits = 48, + .check_event = L3C_V1_NR_EVENTS, +}; + +static const struct hisi_pmu_dev_info hisi_l3c_pmu_v2 = { + .attr_groups = hisi_l3c_pmu_v2_attr_groups, + .counter_bits = 64, + .check_event = L3C_V2_NR_EVENTS, +}; + static const struct hisi_uncore_ops hisi_uncore_l3c_ops = { .write_evtype = hisi_l3c_pmu_write_evtype, .get_event_idx = hisi_uncore_pmu_get_event_idx, @@ -487,16 +496,9 @@ static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev, if (ret) return ret; - if (l3c_pmu->identifier >= HISI_PMU_V2) { - l3c_pmu->counter_bits = 64; - l3c_pmu->check_event = L3C_V2_NR_EVENTS; - l3c_pmu->pmu_events.attr_groups = hisi_l3c_pmu_v2_attr_groups; - } else { - l3c_pmu->counter_bits = 48; - l3c_pmu->check_event = L3C_V1_NR_EVENTS; - l3c_pmu->pmu_events.attr_groups = hisi_l3c_pmu_v1_attr_groups; - } - + l3c_pmu->pmu_events.attr_groups = l3c_pmu->dev_info->attr_groups; + l3c_pmu->counter_bits = l3c_pmu->dev_info->counter_bits; + l3c_pmu->check_event = l3c_pmu->dev_info->check_event; l3c_pmu->num_counters = L3C_NR_COUNTERS; l3c_pmu->ops = &hisi_uncore_l3c_ops; l3c_pmu->dev = &pdev->dev; @@ -555,6 +557,13 @@ static int hisi_l3c_pmu_remove(struct platform_device *pdev) return 0; } +static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = { + { "HISI0213", (kernel_ulong_t)&hisi_l3c_pmu_v1 }, + { "HISI0214", (kernel_ulong_t)&hisi_l3c_pmu_v2 }, + {} +}; +MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match); + static struct platform_driver hisi_l3c_pmu_driver = { .driver = { .name = "hisi_l3c_pmu", From 9ccbbee24c0f16269b95fc699f9b395a8344a088 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Wed, 16 Apr 2025 15:04:44 +0800 Subject: [PATCH 46/95] drivers/perf: hisi: Extract the event filter check of L3C PMU commit edb401884a9442a4f04c581ff6a2e9bed272b9e4 openEuler L3C PMU has 4 filter options which are sharing perf_event_attr::config1. Driver will check config1 to see whether a certain event has a filter setting. It'll be incorrect if we make use of other bits in config1 for non-filter options. So check whether each filter options are set directly in a separate function instead. Signed-off-by: Yicong Yang Signed-off-by: Slim6882 Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 298687d1e09ab..760195be9e6ed 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -204,9 +204,15 @@ static void hisi_l3c_pmu_clear_core_tracetag(struct perf_event *event) } } +static bool hisi_l3c_pmu_have_filter(struct perf_event *event) +{ + return hisi_get_tt_req(event) || hisi_get_tt_core(event) || + hisi_get_datasrc_cfg(event) || hisi_get_datasrc_skt(event); +} + static void hisi_l3c_pmu_enable_filter(struct perf_event *event) { - if (event->attr.config1 != 0x0) { + if (hisi_l3c_pmu_have_filter(event)) { hisi_l3c_pmu_config_req_tracetag(event); hisi_l3c_pmu_config_core_tracetag(event); hisi_l3c_pmu_config_ds(event); @@ -215,7 +221,7 @@ static void hisi_l3c_pmu_enable_filter(struct perf_event *event) static void hisi_l3c_pmu_disable_filter(struct perf_event *event) { - if (event->attr.config1 != 0x0) { + if (hisi_l3c_pmu_have_filter(event)) { hisi_l3c_pmu_clear_ds(event); hisi_l3c_pmu_clear_core_tracetag(event); hisi_l3c_pmu_clear_req_tracetag(event); From 813833131b9e256656f1ea02ecc53a0ae20e8188 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Wed, 16 Apr 2025 15:04:45 +0800 Subject: [PATCH 47/95] drivers/perf: hisi: Extend the field of tt_core commit f0c966398117236ff1bd926f3912aace1aeecbda openEuler Currently the tt_core's using config1's bit [7, 0] and can not be extended. For some platforms there's more the 8 CPUs sharing the L3 cache. So make tt_core use config2's bit [15, 0] and the remaining bits in config2 is reserved for extension. Signed-off-by: Yicong Yang Signed-off-by: Slim6882 Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 760195be9e6ed..915413e7f77d4 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -55,10 +55,10 @@ #define L3C_V1_NR_EVENTS 0x59 #define L3C_V2_NR_EVENTS 0xFF -HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core, config1, 7, 0); HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_req, config1, 10, 8); HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_cfg, config1, 15, 11); HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_skt, config1, 16, 16); +HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core, config2, 15, 0); static void hisi_l3c_pmu_config_req_tracetag(struct perf_event *event) { @@ -397,7 +397,7 @@ static const struct attribute_group hisi_l3c_pmu_v1_format_group = { static struct attribute *hisi_l3c_pmu_v2_format_attr[] = { HISI_PMU_FORMAT_ATTR(event, "config:0-7"), - HISI_PMU_FORMAT_ATTR(tt_core, "config1:0-7"), + HISI_PMU_FORMAT_ATTR(tt_core, "config2:0-15"), HISI_PMU_FORMAT_ATTR(tt_req, "config1:8-10"), HISI_PMU_FORMAT_ATTR(datasrc_cfg, "config1:11-15"), HISI_PMU_FORMAT_ATTR(datasrc_skt, "config1:16"), From 94f70c4c7d5a14b35fab3bddcd38a659c279191a Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Wed, 16 Apr 2025 15:04:46 +0800 Subject: [PATCH 48/95] drivers/perf: hisi: Refactor the event configuration of L3C PMU commit 43c50937a4f7171351fd0c021d342505f6c55cba openEuler The event register is configured using hisi_pmu::base directly since only one address space is supported for L3C PMU. It'll be hard to extend if events configuration locates in different address space. In order to make preparation for such hardware, extract the event register configuration to separate function using hw_perf_event::event_base as each event's base address. Implement a private hisi_uncore_ops::get_event_idx() callback for initialize the event_base besides get the hardware index. No functional changes intended. Signed-off-by: Yicong Yang Signed-off-by: Slim6882 Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 128 ++++++++++++------- 1 file changed, 83 insertions(+), 45 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 915413e7f77d4..9ae7563ec98e5 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -60,51 +60,86 @@ HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_cfg, config1, 15, 11); HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_skt, config1, 16, 16); HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core, config2, 15, 0); -static void hisi_l3c_pmu_config_req_tracetag(struct perf_event *event) +static int hisi_l3c_pmu_get_event_idx(struct perf_event *event) { struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + unsigned long *used_mask = l3c_pmu->pmu_events.used_mask; + u32 num_counters = l3c_pmu->num_counters; + int idx; + + idx = find_first_zero_bit(used_mask, num_counters); + if (idx == num_counters) + return -EAGAIN; + + set_bit(idx, used_mask); + event->hw.event_base = (unsigned long)l3c_pmu->base; + return idx; +} + +static u32 hisi_l3c_pmu_event_readl(struct hw_perf_event *hwc, u32 reg) +{ + return readl((void __iomem *)hwc->event_base + reg); +} + +static void hisi_l3c_pmu_event_writel(struct hw_perf_event *hwc, u32 reg, u32 val) +{ + writel(val, (void __iomem *)hwc->event_base + reg); +} + +static u64 hisi_l3c_pmu_event_readq(struct hw_perf_event *hwc, u32 reg) +{ + return readq((void __iomem *)hwc->event_base + reg); +} + +static void hisi_l3c_pmu_event_writeq(struct hw_perf_event *hwc, u32 reg, u64 val) +{ + writeq(val, (void __iomem *)hwc->event_base + reg); +} + +static void hisi_l3c_pmu_config_req_tracetag(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; u32 tt_req = hisi_get_tt_req(event); if (tt_req) { u32 val; /* Set request-type for tracetag */ - val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_TRACETAG_CTRL); val |= tt_req << L3C_TRACETAG_REQ_SHIFT; val |= L3C_TRACETAG_REQ_EN; - writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_TRACETAG_CTRL, val); /* Enable request-tracetag statistics */ - val = readl(l3c_pmu->base + L3C_PERF_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_PERF_CTRL); val |= L3C_TRACETAG_EN; - writel(val, l3c_pmu->base + L3C_PERF_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_PERF_CTRL, val); } } static void hisi_l3c_pmu_clear_req_tracetag(struct perf_event *event) { - struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; u32 tt_req = hisi_get_tt_req(event); if (tt_req) { u32 val; /* Clear request-type */ - val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_TRACETAG_CTRL); val &= ~(tt_req << L3C_TRACETAG_REQ_SHIFT); val &= ~L3C_TRACETAG_REQ_EN; - writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_TRACETAG_CTRL, val); /* Disable request-tracetag statistics */ - val = readl(l3c_pmu->base + L3C_PERF_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_PERF_CTRL); val &= ~L3C_TRACETAG_EN; - writel(val, l3c_pmu->base + L3C_PERF_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_PERF_CTRL, val); } } static void hisi_l3c_pmu_write_ds(struct perf_event *event, u32 ds_cfg) { - struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; u32 reg, reg_idx, shift, val; int idx = hwc->idx; @@ -120,15 +155,15 @@ static void hisi_l3c_pmu_write_ds(struct perf_event *event, u32 ds_cfg) reg_idx = idx % 4; shift = 8 * reg_idx; - val = readl(l3c_pmu->base + reg); + val = hisi_l3c_pmu_event_readl(hwc, reg); val &= ~(L3C_DATSRC_MASK << shift); val |= ds_cfg << shift; - writel(val, l3c_pmu->base + reg); + hisi_l3c_pmu_event_writel(hwc, reg, val); } static void hisi_l3c_pmu_config_ds(struct perf_event *event) { - struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; u32 ds_cfg = hisi_get_datasrc_cfg(event); u32 ds_skt = hisi_get_datasrc_skt(event); @@ -138,15 +173,15 @@ static void hisi_l3c_pmu_config_ds(struct perf_event *event) if (ds_skt) { u32 val; - val = readl(l3c_pmu->base + L3C_DATSRC_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_DATSRC_CTRL); val |= L3C_DATSRC_SKT_EN; - writel(val, l3c_pmu->base + L3C_DATSRC_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_DATSRC_CTRL, val); } } static void hisi_l3c_pmu_clear_ds(struct perf_event *event) { - struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; u32 ds_cfg = hisi_get_datasrc_cfg(event); u32 ds_skt = hisi_get_datasrc_skt(event); @@ -156,51 +191,51 @@ static void hisi_l3c_pmu_clear_ds(struct perf_event *event) if (ds_skt) { u32 val; - val = readl(l3c_pmu->base + L3C_DATSRC_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_DATSRC_CTRL); val &= ~L3C_DATSRC_SKT_EN; - writel(val, l3c_pmu->base + L3C_DATSRC_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_DATSRC_CTRL, val); } } static void hisi_l3c_pmu_config_core_tracetag(struct perf_event *event) { - struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; u32 core = hisi_get_tt_core(event); if (core) { u32 val; /* Config and enable core information */ - writel(core, l3c_pmu->base + L3C_CORE_CTRL); - val = readl(l3c_pmu->base + L3C_PERF_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_CORE_CTRL, core); + val = hisi_l3c_pmu_event_readl(hwc, L3C_PERF_CTRL); val |= L3C_CORE_EN; - writel(val, l3c_pmu->base + L3C_PERF_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_PERF_CTRL, val); /* Enable core-tracetag statistics */ - val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_TRACETAG_CTRL); val |= L3C_TRACETAG_CORE_EN; - writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_TRACETAG_CTRL, val); } } static void hisi_l3c_pmu_clear_core_tracetag(struct perf_event *event) { - struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; u32 core = hisi_get_tt_core(event); if (core) { u32 val; /* Clear core information */ - writel(L3C_COER_NONE, l3c_pmu->base + L3C_CORE_CTRL); - val = readl(l3c_pmu->base + L3C_PERF_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_CORE_CTRL, L3C_COER_NONE); + val = hisi_l3c_pmu_event_readl(hwc, L3C_PERF_CTRL); val &= ~L3C_CORE_EN; - writel(val, l3c_pmu->base + L3C_PERF_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_PERF_CTRL, val); /* Disable core-tracetag statistics */ - val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_TRACETAG_CTRL); val &= ~L3C_TRACETAG_CORE_EN; - writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_TRACETAG_CTRL, val); } } @@ -239,18 +274,19 @@ static u32 hisi_l3c_pmu_get_counter_offset(int cntr_idx) static u64 hisi_l3c_pmu_read_counter(struct hisi_pmu *l3c_pmu, struct hw_perf_event *hwc) { - return readq(l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx)); + return hisi_l3c_pmu_event_readq(hwc, hisi_l3c_pmu_get_counter_offset(hwc->idx)); } static void hisi_l3c_pmu_write_counter(struct hisi_pmu *l3c_pmu, struct hw_perf_event *hwc, u64 val) { - writeq(val, l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx)); + hisi_l3c_pmu_event_writeq(hwc, hisi_l3c_pmu_get_counter_offset(hwc->idx), val); } static void hisi_l3c_pmu_write_evtype(struct hisi_pmu *l3c_pmu, int idx, u32 type) { + struct hw_perf_event *hwc = &l3c_pmu->pmu_events.hw_events[idx]->hw; u32 reg, reg_idx, shift, val; /* @@ -265,10 +301,10 @@ static void hisi_l3c_pmu_write_evtype(struct hisi_pmu *l3c_pmu, int idx, shift = 8 * reg_idx; /* Write event code to L3C_EVENT_TYPEx Register */ - val = readl(l3c_pmu->base + reg); + val = hisi_l3c_pmu_event_readl(hwc, reg); val &= ~(L3C_EVTYPE_NONE << shift); val |= (type << shift); - writel(val, l3c_pmu->base + reg); + hisi_l3c_pmu_event_writel(hwc, reg, val); } static void hisi_l3c_pmu_start_counters(struct hisi_pmu *l3c_pmu) @@ -303,9 +339,9 @@ static void hisi_l3c_pmu_enable_counter(struct hisi_pmu *l3c_pmu, u32 val; /* Enable counter index in L3C_EVENT_CTRL register */ - val = readl(l3c_pmu->base + L3C_EVENT_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_EVENT_CTRL); val |= (1 << hwc->idx); - writel(val, l3c_pmu->base + L3C_EVENT_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_EVENT_CTRL, val); } static void hisi_l3c_pmu_disable_counter(struct hisi_pmu *l3c_pmu, @@ -314,9 +350,9 @@ static void hisi_l3c_pmu_disable_counter(struct hisi_pmu *l3c_pmu, u32 val; /* Clear counter index in L3C_EVENT_CTRL register */ - val = readl(l3c_pmu->base + L3C_EVENT_CTRL); + val = hisi_l3c_pmu_event_readl(hwc, L3C_EVENT_CTRL); val &= ~(1 << hwc->idx); - writel(val, l3c_pmu->base + L3C_EVENT_CTRL); + hisi_l3c_pmu_event_writel(hwc, L3C_EVENT_CTRL, val); } static void hisi_l3c_pmu_enable_counter_int(struct hisi_pmu *l3c_pmu, @@ -324,10 +360,10 @@ static void hisi_l3c_pmu_enable_counter_int(struct hisi_pmu *l3c_pmu, { u32 val; - val = readl(l3c_pmu->base + L3C_INT_MASK); + val = hisi_l3c_pmu_event_readl(hwc, L3C_INT_MASK); /* Write 0 to enable interrupt */ val &= ~(1 << hwc->idx); - writel(val, l3c_pmu->base + L3C_INT_MASK); + hisi_l3c_pmu_event_writel(hwc, L3C_INT_MASK, val); } static void hisi_l3c_pmu_disable_counter_int(struct hisi_pmu *l3c_pmu, @@ -335,10 +371,10 @@ static void hisi_l3c_pmu_disable_counter_int(struct hisi_pmu *l3c_pmu, { u32 val; - val = readl(l3c_pmu->base + L3C_INT_MASK); + val = hisi_l3c_pmu_event_readl(hwc, L3C_INT_MASK); /* Write 1 to mask interrupt */ val |= (1 << hwc->idx); - writel(val, l3c_pmu->base + L3C_INT_MASK); + hisi_l3c_pmu_event_writel(hwc, L3C_INT_MASK, val); } static u32 hisi_l3c_pmu_get_int_status(struct hisi_pmu *l3c_pmu) @@ -348,7 +384,9 @@ static u32 hisi_l3c_pmu_get_int_status(struct hisi_pmu *l3c_pmu) static void hisi_l3c_pmu_clear_int_status(struct hisi_pmu *l3c_pmu, int idx) { - writel(1 << idx, l3c_pmu->base + L3C_INT_CLEAR); + struct hw_perf_event *hwc = &l3c_pmu->pmu_events.hw_events[idx]->hw; + + hisi_l3c_pmu_event_writel(hwc, L3C_INT_CLEAR, 1 << idx); } static int hisi_l3c_pmu_init_data(struct platform_device *pdev, @@ -474,7 +512,7 @@ static const struct hisi_pmu_dev_info hisi_l3c_pmu_v2 = { static const struct hisi_uncore_ops hisi_uncore_l3c_ops = { .write_evtype = hisi_l3c_pmu_write_evtype, - .get_event_idx = hisi_uncore_pmu_get_event_idx, + .get_event_idx = hisi_l3c_pmu_get_event_idx, .start_counters = hisi_l3c_pmu_start_counters, .stop_counters = hisi_l3c_pmu_stop_counters, .enable_counter = hisi_l3c_pmu_enable_counter, From aed26b725081484a360566745886422f7c6dd13b Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Wed, 16 Apr 2025 15:04:47 +0800 Subject: [PATCH 49/95] drivers/perf: hisi: Add support for L3C PMU v3 commit 89711c8962cccf3b96060623f8a85152dbe73238 openEuler This patch adds support for L3C PMU v3. The v3 L3C PMU supports an extended events space which can be controlled in a second address space with a separate overflow interrupt. The offset of the control/event registers keep the same. The extended events with original ones together cover the monitoring of all the L3C transactions. The extended events is specified with `ext[=1]` option for the driver to distinguish like: perf stat -e hisi_sccl0_l3c0_0/event=,ext/ Currently only event option using config bit [7, 0]. There's still lots of field unused. Make ext using config 16 and reserve bit [15, 8] for event option for future extension. The hw_perf_event::event_base is initialized to the base MMIO address of the event and will be used for later control, overflow handling and counts readout. We still make use of the Uncore PMU framework for handling the events and interrupt migration on CPU hotplug. The framework's cpuhp callback will handle the event migration and interrupt migration of orginial event, if PMU supports extended events then the interrupt of extended events is migrated to the same CPU choosed by the framework. A new HID of HISI0215 is used for this version of L3C PMU. Signed-off-by: Yicong Yang Signed-off-by: Slim6882 Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 350 +++++++++++++++++-- 1 file changed, 328 insertions(+), 22 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 9ae7563ec98e5..f4afd8c7bf379 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -55,24 +55,73 @@ #define L3C_V1_NR_EVENTS 0x59 #define L3C_V2_NR_EVENTS 0xFF +HISI_PMU_EVENT_ATTR_EXTRACTOR(ext, config, 16, 16); HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_req, config1, 10, 8); HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_cfg, config1, 15, 11); HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_skt, config1, 16, 16); HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core, config2, 15, 0); +struct hisi_l3c_pmu { + struct hisi_pmu l3c_pmu; + unsigned long feature; +#define L3C_PMU_FEAT_EXT 0x1 + + /* MMIO and IRQ resources for extension events */ + void __iomem *ext_base; + int ext_irq; +}; + +#define to_hisi_l3c_pmu(_l3c_pmu) \ + container_of(_l3c_pmu, struct hisi_l3c_pmu, l3c_pmu) + +/* + * The hardware counter idx used in counter enable/disable, + * interrupt enable/disable and status check, etc. + */ +#define L3C_HW_IDX(_idx) ((_idx) % L3C_NR_COUNTERS) + static int hisi_l3c_pmu_get_event_idx(struct perf_event *event) { struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); unsigned long *used_mask = l3c_pmu->pmu_events.used_mask; u32 num_counters = l3c_pmu->num_counters; + struct hisi_l3c_pmu *hisi_l3c_pmu; int idx; - idx = find_first_zero_bit(used_mask, num_counters); + hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + + /* + * For an L3C PMU that supports extension events, we can monitor + * maximum 2 * num_counters events. Thus use bit [0, num_counters - 1] + * for normal events and bit [num_counters, 2 * num_counters - 1] for + * extension events. The idx allocation will keep unchanged for normal + * events and we can also use the idx to distinguish whether it's an + * extension event or not. + * + * Since normal events and extension events locates on the different + * address space, save the base address to the event->hw.event_base. + */ + if (hisi_get_ext(event)) { + if (!(hisi_l3c_pmu->feature & L3C_PMU_FEAT_EXT)) + return -EOPNOTSUPP; + + event->hw.event_base = (unsigned long)hisi_l3c_pmu->ext_base; + idx = find_next_zero_bit(used_mask, num_counters, L3C_NR_COUNTERS); + } else { + event->hw.event_base = (unsigned long)l3c_pmu->base; + idx = find_next_zero_bit(used_mask, L3C_NR_COUNTERS, 0); + if (idx == L3C_NR_COUNTERS) + idx = num_counters; + } + if (idx == num_counters) return -EAGAIN; set_bit(idx, used_mask); - event->hw.event_base = (unsigned long)l3c_pmu->base; + + WARN_ON(hisi_get_ext(event) && idx < L3C_NR_COUNTERS); + WARN_ON(!hisi_get_ext(event) && idx >= L3C_NR_COUNTERS); + return idx; } @@ -142,7 +191,7 @@ static void hisi_l3c_pmu_write_ds(struct perf_event *event, u32 ds_cfg) { struct hw_perf_event *hwc = &event->hw; u32 reg, reg_idx, shift, val; - int idx = hwc->idx; + int idx = L3C_HW_IDX(hwc->idx); /* * Select the appropriate datasource register(L3C_DATSRC_TYPE0/1). @@ -268,7 +317,7 @@ static void hisi_l3c_pmu_disable_filter(struct perf_event *event) */ static u32 hisi_l3c_pmu_get_counter_offset(int cntr_idx) { - return (L3C_CNTR0_LOWER + (cntr_idx * 8)); + return (L3C_CNTR0_LOWER + (L3C_HW_IDX(cntr_idx) * 8)); } static u64 hisi_l3c_pmu_read_counter(struct hisi_pmu *l3c_pmu, @@ -289,6 +338,8 @@ static void hisi_l3c_pmu_write_evtype(struct hisi_pmu *l3c_pmu, int idx, struct hw_perf_event *hwc = &l3c_pmu->pmu_events.hw_events[idx]->hw; u32 reg, reg_idx, shift, val; + idx = L3C_HW_IDX(idx); + /* * Select the appropriate event select register(L3C_EVENT_TYPE0/1). * There are 2 event select registers for the 8 hardware counters. @@ -309,28 +360,48 @@ static void hisi_l3c_pmu_write_evtype(struct hisi_pmu *l3c_pmu, int idx, static void hisi_l3c_pmu_start_counters(struct hisi_pmu *l3c_pmu) { + struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + unsigned long *used_mask = l3c_pmu->pmu_events.used_mask; u32 val; /* * Set perf_enable bit in L3C_PERF_CTRL register to start counting * for all enabled counters. */ - val = readl(l3c_pmu->base + L3C_PERF_CTRL); - val |= L3C_PERF_CTRL_EN; - writel(val, l3c_pmu->base + L3C_PERF_CTRL); + if (find_first_bit(used_mask, l3c_pmu->num_counters) < L3C_NR_COUNTERS) { + val = readl(l3c_pmu->base + L3C_PERF_CTRL); + val |= L3C_PERF_CTRL_EN; + writel(val, l3c_pmu->base + L3C_PERF_CTRL); + } + + if (find_next_bit(used_mask, l3c_pmu->num_counters, L3C_NR_COUNTERS) != l3c_pmu->num_counters) { + val = readl(hisi_l3c_pmu->ext_base + L3C_PERF_CTRL); + val |= L3C_PERF_CTRL_EN; + writel(val, hisi_l3c_pmu->ext_base + L3C_PERF_CTRL); + } } static void hisi_l3c_pmu_stop_counters(struct hisi_pmu *l3c_pmu) { + struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + unsigned long *used_mask = l3c_pmu->pmu_events.used_mask; u32 val; /* * Clear perf_enable bit in L3C_PERF_CTRL register to stop counting * for all enabled counters. */ - val = readl(l3c_pmu->base + L3C_PERF_CTRL); - val &= ~(L3C_PERF_CTRL_EN); - writel(val, l3c_pmu->base + L3C_PERF_CTRL); + if (find_first_bit(used_mask, l3c_pmu->num_counters) < L3C_NR_COUNTERS) { + val = readl(l3c_pmu->base + L3C_PERF_CTRL); + val &= ~(L3C_PERF_CTRL_EN); + writel(val, l3c_pmu->base + L3C_PERF_CTRL); + } + + if (find_next_bit(used_mask, l3c_pmu->num_counters, L3C_NR_COUNTERS) != l3c_pmu->num_counters) { + val = readl(hisi_l3c_pmu->ext_base + L3C_PERF_CTRL); + val &= ~(L3C_PERF_CTRL_EN); + writel(val, hisi_l3c_pmu->ext_base + L3C_PERF_CTRL); + } } static void hisi_l3c_pmu_enable_counter(struct hisi_pmu *l3c_pmu, @@ -340,7 +411,7 @@ static void hisi_l3c_pmu_enable_counter(struct hisi_pmu *l3c_pmu, /* Enable counter index in L3C_EVENT_CTRL register */ val = hisi_l3c_pmu_event_readl(hwc, L3C_EVENT_CTRL); - val |= (1 << hwc->idx); + val |= (1 << L3C_HW_IDX(hwc->idx)); hisi_l3c_pmu_event_writel(hwc, L3C_EVENT_CTRL, val); } @@ -351,7 +422,7 @@ static void hisi_l3c_pmu_disable_counter(struct hisi_pmu *l3c_pmu, /* Clear counter index in L3C_EVENT_CTRL register */ val = hisi_l3c_pmu_event_readl(hwc, L3C_EVENT_CTRL); - val &= ~(1 << hwc->idx); + val &= ~(1 << L3C_HW_IDX(hwc->idx)); hisi_l3c_pmu_event_writel(hwc, L3C_EVENT_CTRL, val); } @@ -362,7 +433,7 @@ static void hisi_l3c_pmu_enable_counter_int(struct hisi_pmu *l3c_pmu, val = hisi_l3c_pmu_event_readl(hwc, L3C_INT_MASK); /* Write 0 to enable interrupt */ - val &= ~(1 << hwc->idx); + val &= ~(1 << L3C_HW_IDX(hwc->idx)); hisi_l3c_pmu_event_writel(hwc, L3C_INT_MASK, val); } @@ -373,20 +444,27 @@ static void hisi_l3c_pmu_disable_counter_int(struct hisi_pmu *l3c_pmu, val = hisi_l3c_pmu_event_readl(hwc, L3C_INT_MASK); /* Write 1 to mask interrupt */ - val |= (1 << hwc->idx); + val |= (1 << L3C_HW_IDX(hwc->idx)); hisi_l3c_pmu_event_writel(hwc, L3C_INT_MASK, val); } static u32 hisi_l3c_pmu_get_int_status(struct hisi_pmu *l3c_pmu) { - return readl(l3c_pmu->base + L3C_INT_STATUS); + struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + u32 status, status_ext = 0; + + status = readl(l3c_pmu->base + L3C_INT_STATUS); + if (hisi_l3c_pmu->feature & L3C_PMU_FEAT_EXT) + status_ext = readl(hisi_l3c_pmu->ext_base + L3C_INT_STATUS); + + return status | (status_ext << L3C_NR_COUNTERS); } static void hisi_l3c_pmu_clear_int_status(struct hisi_pmu *l3c_pmu, int idx) { struct hw_perf_event *hwc = &l3c_pmu->pmu_events.hw_events[idx]->hw; - hisi_l3c_pmu_event_writel(hwc, L3C_INT_CLEAR, 1 << idx); + hisi_l3c_pmu_event_writel(hwc, L3C_INT_CLEAR, 1 << L3C_HW_IDX(idx)); } static int hisi_l3c_pmu_init_data(struct platform_device *pdev, @@ -423,6 +501,41 @@ static int hisi_l3c_pmu_init_data(struct platform_device *pdev, return 0; } +static int hisi_l3c_pmu_init_ext(struct hisi_pmu *l3c_pmu, struct platform_device *pdev) +{ + struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + char *irqname; + int ret, irq; + + hisi_l3c_pmu->ext_base = devm_platform_ioremap_resource(pdev, 1); + if (IS_ERR(hisi_l3c_pmu->ext_base)) + return PTR_ERR(hisi_l3c_pmu->ext_base); + + irq = platform_get_irq(pdev, 1); + /* + * We may don't need to handle -EPROBDEFER since we should have already + * handle it when probling irq[0]. + */ + if (irq < 0) + return irq; + + irqname = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s ext", dev_name(&pdev->dev)); + if (!irqname) + return -ENOMEM; + + ret = devm_request_irq(&pdev->dev, irq, hisi_uncore_pmu_isr, + IRQF_NOBALANCING | IRQF_NO_THREAD, + irqname, l3c_pmu); + if (ret < 0) { + dev_err(&pdev->dev, + "Fail to request EXT IRQ: %d ret: %d.\n", irq, ret); + return ret; + } + + hisi_l3c_pmu->ext_irq = irq; + return 0; +} + static struct attribute *hisi_l3c_pmu_v1_format_attr[] = { HISI_PMU_FORMAT_ATTR(event, "config:0-7"), NULL, @@ -447,6 +560,19 @@ static const struct attribute_group hisi_l3c_pmu_v2_format_group = { .attrs = hisi_l3c_pmu_v2_format_attr, }; +static struct attribute *hisi_l3c_pmu_v3_format_attr[] = { + HISI_PMU_FORMAT_ATTR(event, "config:0-7"), + HISI_PMU_FORMAT_ATTR(ext, "config:16"), + HISI_PMU_FORMAT_ATTR(tt_req, "config1:8-10"), + HISI_PMU_FORMAT_ATTR(tt_core, "config2:0-15"), + NULL +}; + +static const struct attribute_group hisi_l3c_pmu_v3_format_group = { + .name = "format", + .attrs = hisi_l3c_pmu_v3_format_attr, +}; + static struct attribute *hisi_l3c_pmu_v1_events_attr[] = { HISI_PMU_EVENT_ATTR(rd_cpipe, 0x00), HISI_PMU_EVENT_ATTR(wr_cpipe, 0x01), @@ -482,6 +608,105 @@ static const struct attribute_group hisi_l3c_pmu_v2_events_group = { .attrs = hisi_l3c_pmu_v2_events_attr, }; +struct hisi_l3c_pmu_v3_event { + unsigned long event_id; + bool ext; +}; + +static ssize_t hisi_l3c_pmu_event_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct hisi_l3c_pmu_v3_event *event; + struct dev_ext_attribute *eattr; + + eattr = container_of(attr, struct dev_ext_attribute, attr); + event = eattr->var; + + if (!event->ext) + return sysfs_emit(page, "event=0x%lx\n", event->event_id); + else + return sysfs_emit(page, "event=0x%lx,ext=1\n", event->event_id); +} + +#define HISI_L3C_PMU_EVENT_ATTR(_name, _event, _ext) \ +static struct hisi_l3c_pmu_v3_event hisi_l3c_##_name = { _event, _ext }; \ +static struct dev_ext_attribute hisi_l3c_##_name##_attr = \ + { __ATTR(_name, 0444, hisi_l3c_pmu_event_show, NULL), (void *) &hisi_l3c_##_name } + +HISI_L3C_PMU_EVENT_ATTR(rd_cpipe, 0x00, true); +HISI_L3C_PMU_EVENT_ATTR(rd_hit_cpipe, 0x01, true); +HISI_L3C_PMU_EVENT_ATTR(wr_cpipe, 0x02, true); +HISI_L3C_PMU_EVENT_ATTR(wr_hit_cpipe, 0x03, true); +HISI_L3C_PMU_EVENT_ATTR(io_rd_cpipe, 0x04, true); +HISI_L3C_PMU_EVENT_ATTR(io_rd_hit_cpipe, 0x05, true); +HISI_L3C_PMU_EVENT_ATTR(io_wr_cpipe, 0x06, true); +HISI_L3C_PMU_EVENT_ATTR(io_wr_hit_cpipe, 0x07, true); +HISI_L3C_PMU_EVENT_ATTR(victim_num, 0x0c, true); +HISI_L3C_PMU_EVENT_ATTR(rd_spipe, 0x18, false); +HISI_L3C_PMU_EVENT_ATTR(rd_hit_spipe, 0x19, false); +HISI_L3C_PMU_EVENT_ATTR(wr_spipe, 0x1a, false); +HISI_L3C_PMU_EVENT_ATTR(wr_hit_spipe, 0x1b, false); +HISI_L3C_PMU_EVENT_ATTR(io_rd_spipe, 0x1c, false); +HISI_L3C_PMU_EVENT_ATTR(io_rd_hit_spipe, 0x1d, false); +HISI_L3C_PMU_EVENT_ATTR(io_wr_spipe, 0x1e, false); +HISI_L3C_PMU_EVENT_ATTR(io_wr_hit_spipe, 0x1f, false); +HISI_L3C_PMU_EVENT_ATTR(cycles, 0x7f, false); +HISI_L3C_PMU_EVENT_ATTR(l3t_comp_sum, 0x80, false); +HISI_L3C_PMU_EVENT_ATTR(l3t_rdnotram, 0x83, true); +HISI_L3C_PMU_EVENT_ATTR(l3c_ref, 0xbc, false); +HISI_L3C_PMU_EVENT_ATTR(l3c2ring, 0xbd, true); + +static struct attribute *hisi_l3c_pmu_v3_events_attr[] = { + &hisi_l3c_rd_cpipe_attr.attr.attr, + &hisi_l3c_rd_hit_cpipe_attr.attr.attr, + &hisi_l3c_wr_cpipe_attr.attr.attr, + &hisi_l3c_wr_hit_cpipe_attr.attr.attr, + &hisi_l3c_io_rd_cpipe_attr.attr.attr, + &hisi_l3c_io_rd_hit_cpipe_attr.attr.attr, + &hisi_l3c_io_wr_cpipe_attr.attr.attr, + &hisi_l3c_io_wr_hit_cpipe_attr.attr.attr, + &hisi_l3c_victim_num_attr.attr.attr, + &hisi_l3c_rd_spipe_attr.attr.attr, + &hisi_l3c_rd_hit_spipe_attr.attr.attr, + &hisi_l3c_wr_spipe_attr.attr.attr, + &hisi_l3c_wr_hit_spipe_attr.attr.attr, + &hisi_l3c_io_rd_spipe_attr.attr.attr, + &hisi_l3c_io_rd_hit_spipe_attr.attr.attr, + &hisi_l3c_io_wr_spipe_attr.attr.attr, + &hisi_l3c_io_wr_hit_spipe_attr.attr.attr, + &hisi_l3c_cycles_attr.attr.attr, + &hisi_l3c_l3t_comp_sum_attr.attr.attr, + &hisi_l3c_l3t_rdnotram_attr.attr.attr, + &hisi_l3c_l3c_ref_attr.attr.attr, + &hisi_l3c_l3c2ring_attr.attr.attr, + NULL +}; + +static umode_t hisi_l3c_pmu_v3_events_visible(struct kobject *kobj, + struct attribute *attr, int unused) +{ + struct device *dev = kobj_to_dev(kobj); + struct pmu *pmu = dev_get_drvdata(dev); + struct hisi_pmu *l3c_pmu = to_hisi_pmu(pmu); + struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + struct hisi_l3c_pmu_v3_event *event; + struct dev_ext_attribute *ext_attr; + + ext_attr = container_of(attr, struct dev_ext_attribute, attr.attr); + event = ext_attr->var; + + if (!event->ext || (hisi_l3c_pmu->feature & L3C_PMU_FEAT_EXT)) + return attr->mode; + + return 0; +} + +static const struct attribute_group hisi_l3c_pmu_v3_events_group = { + .name = "events", + .is_visible = hisi_l3c_pmu_v3_events_visible, + .attrs = hisi_l3c_pmu_v3_events_attr, +}; + static const struct attribute_group *hisi_l3c_pmu_v1_attr_groups[] = { &hisi_l3c_pmu_v1_format_group, &hisi_l3c_pmu_v1_events_group, @@ -498,6 +723,14 @@ static const struct attribute_group *hisi_l3c_pmu_v2_attr_groups[] = { NULL }; +static const struct attribute_group *hisi_l3c_pmu_v3_attr_groups[] = { + &hisi_l3c_pmu_v3_format_group, + &hisi_l3c_pmu_v3_events_group, + &hisi_pmu_cpumask_attr_group, + &hisi_pmu_identifier_group, + NULL +}; + static const struct hisi_pmu_dev_info hisi_l3c_pmu_v1 = { .attr_groups = hisi_l3c_pmu_v1_attr_groups, .counter_bits = 48, @@ -510,6 +743,13 @@ static const struct hisi_pmu_dev_info hisi_l3c_pmu_v2 = { .check_event = L3C_V2_NR_EVENTS, }; +static const struct hisi_pmu_dev_info hisi_l3c_pmu_v3 = { + .attr_groups = hisi_l3c_pmu_v3_attr_groups, + .counter_bits = 64, + .check_event = L3C_V2_NR_EVENTS, + .private = (void *) L3C_PMU_FEAT_EXT, +}; + static const struct hisi_uncore_ops hisi_uncore_l3c_ops = { .write_evtype = hisi_l3c_pmu_write_evtype, .get_event_idx = hisi_l3c_pmu_get_event_idx, @@ -530,6 +770,7 @@ static const struct hisi_uncore_ops hisi_uncore_l3c_ops = { static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev, struct hisi_pmu *l3c_pmu) { + struct hisi_l3c_pmu *hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); int ret; ret = hisi_l3c_pmu_init_data(pdev, l3c_pmu); @@ -548,27 +789,50 @@ static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev, l3c_pmu->dev = &pdev->dev; l3c_pmu->on_cpu = -1; + if ((unsigned long)l3c_pmu->dev_info->private & L3C_PMU_FEAT_EXT) { + ret = hisi_l3c_pmu_init_ext(l3c_pmu, pdev); + if (ret) { + dev_warn(&pdev->dev, "ext event is unavailable, ret = %d\n", ret); + } else { + /* + * The extension events have their own counters with the + * same number of the normal events counters. So we can + * have at maximum num_counters * 2 events monitored. + */ + l3c_pmu->num_counters <<= 1; + + hisi_l3c_pmu->feature |= L3C_PMU_FEAT_EXT; + } + } + return 0; } static int hisi_l3c_pmu_probe(struct platform_device *pdev) { + struct hisi_l3c_pmu *hisi_l3c_pmu; struct hisi_pmu *l3c_pmu; char *name; int ret; - l3c_pmu = devm_kzalloc(&pdev->dev, sizeof(*l3c_pmu), GFP_KERNEL); - if (!l3c_pmu) + hisi_l3c_pmu = devm_kzalloc(&pdev->dev, sizeof(*hisi_l3c_pmu), GFP_KERNEL); + if (!hisi_l3c_pmu) return -ENOMEM; + l3c_pmu = &hisi_l3c_pmu->l3c_pmu; platform_set_drvdata(pdev, l3c_pmu); ret = hisi_l3c_pmu_dev_probe(pdev, l3c_pmu); if (ret) return ret; - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_l3c%d", - l3c_pmu->topo.sccl_id, l3c_pmu->topo.ccl_id); + if (l3c_pmu->topo.sub_id >= 0) + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_l3c%d_%d", + l3c_pmu->topo.sccl_id, l3c_pmu->topo.ccl_id, + l3c_pmu->topo.sub_id); + else + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%d_l3c%d", + l3c_pmu->topo.sccl_id, l3c_pmu->topo.ccl_id); if (!name) return -ENOMEM; @@ -604,6 +868,7 @@ static int hisi_l3c_pmu_remove(struct platform_device *pdev) static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = { { "HISI0213", (kernel_ulong_t)&hisi_l3c_pmu_v1 }, { "HISI0214", (kernel_ulong_t)&hisi_l3c_pmu_v2 }, + { "HISI0215", (kernel_ulong_t)&hisi_l3c_pmu_v3 }, {} }; MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match); @@ -618,14 +883,55 @@ static struct platform_driver hisi_l3c_pmu_driver = { .remove = hisi_l3c_pmu_remove, }; +static int hisi_l3c_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct hisi_pmu *l3c_pmu = hlist_entry_safe(node, struct hisi_pmu, node); + struct hisi_l3c_pmu *hisi_l3c_pmu; + int ret; + + hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + + /* + * Invoking the framework's online function for doing the core logic + * of CPU, interrupt and perf context migrating. Then return directly + * if we don't support L3C_PMU_FEAT_EXT. Otherwise migrate the ext_irq + * using the migrated CPU. + * + * Same logic for CPU offline. + */ + ret = hisi_uncore_pmu_online_cpu(cpu, node); + if (!(hisi_l3c_pmu->feature & L3C_PMU_FEAT_EXT) || + l3c_pmu->on_cpu >= nr_cpu_ids) + return ret; + + WARN_ON(irq_set_affinity(hisi_l3c_pmu->ext_irq, cpumask_of(l3c_pmu->on_cpu))); + return ret; +} + +static int hisi_l3c_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct hisi_pmu *l3c_pmu = hlist_entry_safe(node, struct hisi_pmu, node); + struct hisi_l3c_pmu *hisi_l3c_pmu; + int ret; + + hisi_l3c_pmu = to_hisi_l3c_pmu(l3c_pmu); + ret = hisi_uncore_pmu_offline_cpu(cpu, node); + if (!(hisi_l3c_pmu->feature & L3C_PMU_FEAT_EXT) || + l3c_pmu->on_cpu >= nr_cpu_ids) + return ret; + + WARN_ON(irq_set_affinity(hisi_l3c_pmu->ext_irq, cpumask_of(l3c_pmu->on_cpu))); + return ret; +} + static int __init hisi_l3c_pmu_module_init(void) { int ret; ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, "AP_PERF_ARM_HISI_L3_ONLINE", - hisi_uncore_pmu_online_cpu, - hisi_uncore_pmu_offline_cpu); + hisi_l3c_pmu_online_cpu, + hisi_l3c_pmu_offline_cpu); if (ret) { pr_err("L3C PMU: Error setup hotplug, ret = %d\n", ret); return ret; From 9adbc0cf85678ff8f5ab91368658a5b6dcf728e9 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Mon, 16 Jun 2025 18:28:31 +0800 Subject: [PATCH 50/95] drivers/perf: hisi: Clarifying event names and fix event ID for pa_pmu commit 9b279935679b7ca8aac0250468c5b767a8456cd7 openEuler The rx_req and tx_req events were only counting the number of flit package requests for link0. Hisilicon PA supports 4 links (0-3), and the original event names could be misinterpreted as counting the total flit packages across all links. Additionally, the tx_req event ID was incorrect. Fixes this by: 1) Adding the "link0" suffix and "pa" prefix to the event names to clearly indicate that they are specific to link0 and belong to the PA PMU. 2) Updating the tx_req event ID to 0x60 to reflect the correct identifier. These changes ensure accurate event naming and counting for link0 and flit packages. Fixes: a0ab25cd82ee ("drivers/perf: hisi: Add support for HiSilicon PA PMU driver") Signed-off-by: Junhao He Signed-off-by: Qizhi Zhang Signed-off-by: huwentao Signed-off-by: WangYuli --- drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index bf79ef2885647..e09822d67920e 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -316,8 +316,8 @@ static const struct attribute_group hisi_pa_pmu_v2_format_group = { }; static struct attribute *hisi_pa_pmu_v2_events_attr[] = { - HISI_PMU_EVENT_ATTR(rx_req, 0x40), - HISI_PMU_EVENT_ATTR(tx_req, 0x5c), + HISI_PMU_EVENT_ATTR(pa_rx_req_link0, 0x40), + HISI_PMU_EVENT_ATTR(pa_tx_req_link0, 0x60), HISI_PMU_EVENT_ATTR(cycle, 0x78), NULL }; From e9e199fde4372168b6093f0f2ea48509c14457cb Mon Sep 17 00:00:00 2001 From: Yushan Wang Date: Mon, 16 Jun 2025 18:47:47 +0800 Subject: [PATCH 51/95] perf: Remove unstable events for uncore L3C PMU commit f1987fadd0f0f632fd24bd5eab355036f281bdc8 openEuler Currently when taking event 0x80, 0x83, the value of PMU counters will be unstable if the value overflows and cause IRQs. Remove these events in sysfs then. Fixes: 89711c8962cc ("drivers/perf: hisi: Add support for L3C PMU v3") Signed-off-by: Yushan Wang Signed-off-by: Qizhi Zhang Signed-off-by: huwentao Signed-off-by: WangYuli --- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index f4afd8c7bf379..6950ccfa0a33e 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -651,8 +651,6 @@ HISI_L3C_PMU_EVENT_ATTR(io_rd_hit_spipe, 0x1d, false); HISI_L3C_PMU_EVENT_ATTR(io_wr_spipe, 0x1e, false); HISI_L3C_PMU_EVENT_ATTR(io_wr_hit_spipe, 0x1f, false); HISI_L3C_PMU_EVENT_ATTR(cycles, 0x7f, false); -HISI_L3C_PMU_EVENT_ATTR(l3t_comp_sum, 0x80, false); -HISI_L3C_PMU_EVENT_ATTR(l3t_rdnotram, 0x83, true); HISI_L3C_PMU_EVENT_ATTR(l3c_ref, 0xbc, false); HISI_L3C_PMU_EVENT_ATTR(l3c2ring, 0xbd, true); @@ -675,8 +673,6 @@ static struct attribute *hisi_l3c_pmu_v3_events_attr[] = { &hisi_l3c_io_wr_spipe_attr.attr.attr, &hisi_l3c_io_wr_hit_spipe_attr.attr.attr, &hisi_l3c_cycles_attr.attr.attr, - &hisi_l3c_l3t_comp_sum_attr.attr.attr, - &hisi_l3c_l3t_rdnotram_attr.attr.attr, &hisi_l3c_l3c_ref_attr.attr.attr, &hisi_l3c_l3c2ring_attr.attr.attr, NULL From 19d4e9ccd7d3648248457a9f4a17a0f1840986f9 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Mon, 16 Jun 2025 18:47:51 +0800 Subject: [PATCH 52/95] drivers/perf: hisi: Add cacheable option for L3C PMU commit ce7f8102e76b61cab1d00ec4eaa261345247d13d openEuler L3C PMU v3 implement additional control for tracetag which may influence the filter of certain events. Add below options: - cacheable: whether to filter the cacheable or noncacheable operation Fixes: 89711c8962cc ("drivers/perf: hisi: Add support for L3C PMU v3") Signed-off-by: Yicong Yang Signed-off-by: Qizhi Zhang Signed-off-by: huwentao Signed-off-by: WangYuli --- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 6950ccfa0a33e..00ed571a30306 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -46,6 +46,7 @@ #define L3C_TRACETAG_MARK_EN BIT(0) #define L3C_TRACETAG_REQ_EN (L3C_TRACETAG_MARK_EN | BIT(2)) #define L3C_TRACETAG_CORE_EN (L3C_TRACETAG_MARK_EN | BIT(3)) +#define L3C_TRACETAG_CACHEABLE_EN BIT(12) #define L3C_CORE_EN BIT(20) #define L3C_COER_NONE 0x0 #define L3C_DATSRC_MASK 0xFF @@ -59,6 +60,7 @@ HISI_PMU_EVENT_ATTR_EXTRACTOR(ext, config, 16, 16); HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_req, config1, 10, 8); HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_cfg, config1, 15, 11); HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_skt, config1, 16, 16); +HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_cacheable, config1, 17, 17); HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core, config2, 15, 0); struct hisi_l3c_pmu { @@ -157,6 +159,10 @@ static void hisi_l3c_pmu_config_req_tracetag(struct perf_event *event) val = hisi_l3c_pmu_event_readl(hwc, L3C_TRACETAG_CTRL); val |= tt_req << L3C_TRACETAG_REQ_SHIFT; val |= L3C_TRACETAG_REQ_EN; + + if (hisi_get_tt_cacheable(event)) + val |= L3C_TRACETAG_CACHEABLE_EN; + hisi_l3c_pmu_event_writel(hwc, L3C_TRACETAG_CTRL, val); /* Enable request-tracetag statistics */ @@ -178,6 +184,10 @@ static void hisi_l3c_pmu_clear_req_tracetag(struct perf_event *event) val = hisi_l3c_pmu_event_readl(hwc, L3C_TRACETAG_CTRL); val &= ~(tt_req << L3C_TRACETAG_REQ_SHIFT); val &= ~L3C_TRACETAG_REQ_EN; + + if (hisi_get_tt_cacheable(event)) + val &= ~L3C_TRACETAG_CACHEABLE_EN; + hisi_l3c_pmu_event_writel(hwc, L3C_TRACETAG_CTRL, val); /* Disable request-tracetag statistics */ @@ -564,6 +574,7 @@ static struct attribute *hisi_l3c_pmu_v3_format_attr[] = { HISI_PMU_FORMAT_ATTR(event, "config:0-7"), HISI_PMU_FORMAT_ATTR(ext, "config:16"), HISI_PMU_FORMAT_ATTR(tt_req, "config1:8-10"), + HISI_PMU_FORMAT_ATTR(tt_cacheable, "config1:17"), HISI_PMU_FORMAT_ATTR(tt_core, "config2:0-15"), NULL }; From 2b84ce41173b04044f1d7344ce780062e4948785 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Mon, 16 Jun 2025 21:25:59 +0800 Subject: [PATCH 53/95] drivers/perf: hisi: Fixes the incorrect bitmask limit for the CPA event sysfs interface commit 2548d13070513efd7a79120d69f54ff19798ef17 openEuler However, when users input events in the range of 0x1ff to 0xffff, the driver does not return an error. Therefore, the bitmask limit should be adjusted to bitmask(0-7) to ensure proper functionality. before the patch: [root@localhost ~]# perf stat -e hisi_sicl0_cpa0/event=0x1FF/ sleep 1 Performance counter stats for 'system wide': hisi_sicl0_cpa0/event=0x1FF/ after the patch: [root@localhost ~]# perf stat -e hisi_sicl0_cpa0/event=0x1FF/ sleep 1 event syntax error: '..cpa0/event=0x1FF/' \___ value too big for format, maximum is 255 Fixes: 6b79738b6ed9 ("drivers/perf: hisi: Add Support for CPA PMU") Signed-off-by: Junhao He Signed-off-by: Qizhi Zhang Signed-off-by: huwentao Signed-off-by: WangYuli --- drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c index 006afb4d12085..cfc3ed2a79af5 100644 --- a/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_cpa_pmu.c @@ -202,7 +202,7 @@ static int hisi_cpa_pmu_init_data(struct platform_device *pdev, } static struct attribute *hisi_cpa_pmu_format_attr[] = { - HISI_PMU_FORMAT_ATTR(event, "config:0-15"), + HISI_PMU_FORMAT_ATTR(event, "config:0-7"), NULL }; From c7bbce14577d659d7a852ac54acfd1854c1c8817 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Tue, 17 Jun 2025 17:17:31 +0800 Subject: [PATCH 54/95] drivers/perf: hisi: Add events and rename event "cycle" for pa_pmu commit 76da7c37267a2ef04171f0659ddd836218d6d3d6 openEuler 1) Add events to count the number of request flit packages for links 1-3 and data flit packages for all links (0-3). 2) rename event "cycle" to pa_cycles to clearly indicate that it belongs to the PA PMU Fixes: a0ab25cd82ee ("drivers/perf: hisi: Add support for HiSilicon PA PMU driver") Signed-off-by: Junhao He Signed-off-by: Qizhi Zhang Signed-off-by: huwentao Signed-off-by: WangYuli --- drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index e09822d67920e..294746e11e395 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -317,8 +317,22 @@ static const struct attribute_group hisi_pa_pmu_v2_format_group = { static struct attribute *hisi_pa_pmu_v2_events_attr[] = { HISI_PMU_EVENT_ATTR(pa_rx_req_link0, 0x40), + HISI_PMU_EVENT_ATTR(pa_rx_req_link1, 0x41), + HISI_PMU_EVENT_ATTR(pa_rx_req_link2, 0x42), + HISI_PMU_EVENT_ATTR(pa_rx_req_link3, 0x43), + HISI_PMU_EVENT_ATTR(pa_rx_data_link0, 0x44), + HISI_PMU_EVENT_ATTR(pa_rx_data_link1, 0x45), + HISI_PMU_EVENT_ATTR(pa_rx_data_link2, 0x46), + HISI_PMU_EVENT_ATTR(pa_rx_data_link3, 0x47), HISI_PMU_EVENT_ATTR(pa_tx_req_link0, 0x60), - HISI_PMU_EVENT_ATTR(cycle, 0x78), + HISI_PMU_EVENT_ATTR(pa_tx_req_link1, 0x61), + HISI_PMU_EVENT_ATTR(pa_tx_req_link2, 0x62), + HISI_PMU_EVENT_ATTR(pa_tx_req_link3, 0x63), + HISI_PMU_EVENT_ATTR(pa_tx_data_link0, 0x64), + HISI_PMU_EVENT_ATTR(pa_tx_data_link1, 0x65), + HISI_PMU_EVENT_ATTR(pa_tx_data_link2, 0x66), + HISI_PMU_EVENT_ATTR(pa_tx_data_link3, 0x67), + HISI_PMU_EVENT_ATTR(pa_cycles, 0x78), NULL }; From b43819cbae2f8213c9fd8cacb6edde316ed77f12 Mon Sep 17 00:00:00 2001 From: Miguel Luis Date: Fri, 20 Sep 2024 15:52:49 +0800 Subject: [PATCH 55/95] arm64: Add missing _EL12 encodings [Upstream commit d5cb781b77416a9ed4129960712932dbcf8a9f30] Some _EL12 encodings are missing. Add them. Reviewed-by: Eric Auger Signed-off-by: Miguel Luis Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20231023095444.1587322-2-maz@kernel.org Signed-off-by: Oliver Upton Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- arch/arm64/include/asm/sysreg.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 810e5b9e7a2ce..7dc930bdfedea 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -569,19 +569,30 @@ #define SYS_CNTHCTL_EL2 sys_reg(3, 4, 14, 1, 0) /* VHE encodings for architectural EL0/1 system registers */ +#define SYS_BRBCR_EL12 sys_reg(2, 5, 9, 0, 0) #define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0) +#define SYS_CPACR_EL12 sys_reg(3, 5, 1, 0, 2) +#define SYS_SCTLR2_EL12 sys_reg(3, 5, 1, 0, 3) +#define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0) +#define SYS_TRFCR_EL12 sys_reg(3, 5, 1, 2, 1) +#define SYS_SMCR_EL12 sys_reg(3, 5, 1, 2, 6) #define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0) #define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1) #define SYS_TCR_EL12 sys_reg(3, 5, 2, 0, 2) +#define SYS_TCR2_EL12 sys_reg(3, 5, 2, 0, 3) #define SYS_SPSR_EL12 sys_reg(3, 5, 4, 0, 0) #define SYS_ELR_EL12 sys_reg(3, 5, 4, 0, 1) #define SYS_AFSR0_EL12 sys_reg(3, 5, 5, 1, 0) #define SYS_AFSR1_EL12 sys_reg(3, 5, 5, 1, 1) #define SYS_ESR_EL12 sys_reg(3, 5, 5, 2, 0) #define SYS_TFSR_EL12 sys_reg(3, 5, 5, 6, 0) +#define SYS_FAR_EL12 sys_reg(3, 5, 6, 0, 0) +#define SYS_PMSCR_EL12 sys_reg(3, 5, 9, 9, 0) #define SYS_MAIR_EL12 sys_reg(3, 5, 10, 2, 0) #define SYS_AMAIR_EL12 sys_reg(3, 5, 10, 3, 0) #define SYS_VBAR_EL12 sys_reg(3, 5, 12, 0, 0) +#define SYS_CONTEXTIDR_EL12 sys_reg(3, 5, 13, 0, 1) +#define SYS_SCXTNUM_EL12 sys_reg(3, 5, 13, 0, 7) #define SYS_CNTKCTL_EL12 sys_reg(3, 5, 14, 1, 0) #define SYS_CNTP_TVAL_EL02 sys_reg(3, 5, 14, 2, 0) #define SYS_CNTP_CTL_EL02 sys_reg(3, 5, 14, 2, 1) From fd16ce1af6c71ad7b81f095a15ffbb71e0c02108 Mon Sep 17 00:00:00 2001 From: Miguel Luis Date: Fri, 20 Sep 2024 15:57:45 +0800 Subject: [PATCH 56/95] arm64: Add missing _EL2 encodings [Upstream commit 41f6c9344713e6b895a77f2bdd596cf56d4cc197] Some _EL2 encodings are missing. Add them. Signed-off-by: Miguel Luis Reviewed-by: Eric Auger [maz: dropped secure encodings] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20231023095444.1587322-3-maz@kernel.org Signed-off-by: Oliver Upton Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- arch/arm64/include/asm/sysreg.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 7dc930bdfedea..cad681476a33b 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -272,6 +272,8 @@ /* ETM */ #define SYS_TRCOSLAR sys_reg(2, 1, 1, 0, 4) +#define SYS_BRBCR_EL2 sys_reg(2, 4, 9, 0, 0) + #define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0) #define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5) #define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6) @@ -486,6 +488,7 @@ #define SYS_SCTLR_EL2 sys_reg(3, 4, 1, 0, 0) #define SYS_ACTLR_EL2 sys_reg(3, 4, 1, 0, 1) +#define SYS_SCTLR2_EL2 sys_reg(3, 4, 1, 0, 3) #define SYS_HCR_EL2 sys_reg(3, 4, 1, 1, 0) #define SYS_MDCR_EL2 sys_reg(3, 4, 1, 1, 1) #define SYS_CPTR_EL2 sys_reg(3, 4, 1, 1, 2) @@ -499,6 +502,7 @@ #define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2) #define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1) +#define SYS_VNCR_EL2 sys_reg(3, 4, 2, 2, 0) #define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6) #define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0) #define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1) @@ -564,9 +568,23 @@ #define SYS_CONTEXTIDR_EL2 sys_reg(3, 4, 13, 0, 1) #define SYS_TPIDR_EL2 sys_reg(3, 4, 13, 0, 2) +#define SYS_SCXTNUM_EL2 sys_reg(3, 4, 13, 0, 7) + +#define __AMEV_op2(m) (m & 0x7) +#define __AMEV_CRm(n, m) (n | ((m & 0x8) >> 3)) +#define __SYS__AMEVCNTVOFF0n_EL2(m) sys_reg(3, 4, 13, __AMEV_CRm(0x8, m), __AMEV_op2(m)) +#define SYS_AMEVCNTVOFF0n_EL2(m) __SYS__AMEVCNTVOFF0n_EL2(m) +#define __SYS__AMEVCNTVOFF1n_EL2(m) sys_reg(3, 4, 13, __AMEV_CRm(0xA, m), __AMEV_op2(m)) +#define SYS_AMEVCNTVOFF1n_EL2(m) __SYS__AMEVCNTVOFF1n_EL2(m) #define SYS_CNTVOFF_EL2 sys_reg(3, 4, 14, 0, 3) #define SYS_CNTHCTL_EL2 sys_reg(3, 4, 14, 1, 0) +#define SYS_CNTHP_TVAL_EL2 sys_reg(3, 4, 14, 2, 0) +#define SYS_CNTHP_CTL_EL2 sys_reg(3, 4, 14, 2, 1) +#define SYS_CNTHP_CVAL_EL2 sys_reg(3, 4, 14, 2, 2) +#define SYS_CNTHV_TVAL_EL2 sys_reg(3, 4, 14, 3, 0) +#define SYS_CNTHV_CTL_EL2 sys_reg(3, 4, 14, 3, 1) +#define SYS_CNTHV_CVAL_EL2 sys_reg(3, 4, 14, 3, 2) /* VHE encodings for architectural EL0/1 system registers */ #define SYS_BRBCR_EL12 sys_reg(2, 5, 9, 0, 0) From 350a46ed961ce62ece0ce221d78ba82f93b7b283 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 29 Feb 2024 14:54:17 +0000 Subject: [PATCH 57/95] KVM: arm64: Fix TRFCR_EL1/PMSCR_EL1 access in hVHE mode [Upstream commit 9a3bfb27ef65ad41d994765c031ca18217afb058] When running in hVHE mode, EL1 accesses are performed with the EL12 accessor, as we run with HCR_EL2.E2H=1. Unfortunately, both PMSCR_EL1 and TRFCR_EL1 are used with the EL1 accessor, meaning that we actually affect the EL2 state. Duh. Switch to using the {read,write}_sysreg_el1() helpers that will do the right thing in all circumstances. Note that the 'Fixes:' tag doesn't represent the point where the bug was introduced (there is no such point), but the first practical point where the hVHE feature is usable. Cc: James Clark Cc: Anshuman Khandual Fixes: 38cba55008e5 ("KVM: arm64: Force HCR_E2H in guest context when ARM64_KVM_HVHE is set") Signed-off-by: Marc Zyngier Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20240229145417.3606279-1-maz@kernel.org Signed-off-by: Oliver Upton Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- arch/arm64/kvm/hyp/nvhe/debug-sr.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c index 4558c02eb352d..7746ea507b6f0 100644 --- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c @@ -31,8 +31,8 @@ static void __debug_save_spe(u64 *pmscr_el1) return; /* Yes; save the control register and disable data generation */ - *pmscr_el1 = read_sysreg_s(SYS_PMSCR_EL1); - write_sysreg_s(0, SYS_PMSCR_EL1); + *pmscr_el1 = read_sysreg_el1(SYS_PMSCR); + write_sysreg_el1(0, SYS_PMSCR); isb(); /* Now drain all buffered data to memory */ @@ -48,7 +48,7 @@ static void __debug_restore_spe(u64 pmscr_el1) isb(); /* Re-enable data generation */ - write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1); + write_sysreg_el1(pmscr_el1, SYS_PMSCR); } static void __debug_save_trace(u64 *trfcr_el1) @@ -63,8 +63,8 @@ static void __debug_save_trace(u64 *trfcr_el1) * Since access to TRFCR_EL1 is trapped, the guest can't * modify the filtering set by the host. */ - *trfcr_el1 = read_sysreg_s(SYS_TRFCR_EL1); - write_sysreg_s(0, SYS_TRFCR_EL1); + *trfcr_el1 = read_sysreg_el1(SYS_TRFCR); + write_sysreg_el1(0, SYS_TRFCR); isb(); /* Drain the trace buffer to memory */ tsb_csync(); @@ -76,7 +76,7 @@ static void __debug_restore_trace(u64 trfcr_el1) return; /* Restore trace filter controls */ - write_sysreg_s(trfcr_el1, SYS_TRFCR_EL1); + write_sysreg_el1(trfcr_el1, SYS_TRFCR); } void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu) From 865b459c686968a093136b60a845843c1017b8d3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 1 Mar 2024 11:16:56 +0000 Subject: [PATCH 58/95] KVM: arm64: Add accessor for per-CPU state [Upstream commit 87f842c6c6543cf0dd66161fdf4b62cec804479b] In order to facilitate the introduction of new per-CPU state, add a new host_data_ptr() helped that hides some of the per-CPU verbosity, and make it easier to move that state around in the future. Reviewed-by: Suzuki K Poulose Signed-off-by: Marc Zyngier Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- arch/arm64/include/asm/kvm_host.h | 37 +++++++++++++++++++++++ arch/arm64/kvm/arm.c | 2 +- arch/arm64/kvm/hyp/include/hyp/debug-sr.h | 4 +-- arch/arm64/kvm/hyp/include/hyp/switch.h | 8 ++--- arch/arm64/kvm/hyp/nvhe/psci-relay.c | 2 +- arch/arm64/kvm/hyp/nvhe/setup.c | 3 +- arch/arm64/kvm/hyp/nvhe/switch.c | 4 +-- arch/arm64/kvm/hyp/vhe/switch.c | 4 +-- arch/arm64/kvm/hyp/vhe/sysreg-sr.c | 4 +-- arch/arm64/kvm/pmu.c | 2 +- 10 files changed, 53 insertions(+), 17 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index df73e452d2cc0..05cb02aae21ca 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -429,6 +429,17 @@ struct kvm_cpu_context { struct kvm_vcpu *__hyp_running_vcpu; }; +/* + * This structure is instantiated on a per-CPU basis, and contains + * data that is: + * + * - tied to a single physical CPU, and + * - either have a lifetime that does not extend past vcpu_put() + * - or is an invariant for the lifetime of the system + * + * Use host_data_ptr(field) as a way to access a pointer to such a + * field. + */ struct kvm_host_data { struct kvm_cpu_context host_ctxt; }; @@ -1037,6 +1048,32 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); DECLARE_KVM_HYP_PER_CPU(struct kvm_host_data, kvm_host_data); +/* + * How we access per-CPU host data depends on the where we access it from, + * and the mode we're in: + * + * - VHE and nVHE hypervisor bits use their locally defined instance + * + * - the rest of the kernel use either the VHE or nVHE one, depending on + * the mode we're running in. + * + * Unless we're in protected mode, fully deprivileged, and the nVHE + * per-CPU stuff is exclusively accessible to the protected EL2 code. + * In this case, the EL1 code uses the *VHE* data as its private state + * (which makes sense in a way as there shouldn't be any shared state + * between the host and the hypervisor). + * + * Yes, this is all totally trivial. Shoot me now. + */ +#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__) +#define host_data_ptr(f) (&this_cpu_ptr(&kvm_host_data)->f) +#else +#define host_data_ptr(f) \ + (static_branch_unlikely(&kvm_protected_mode_initialized) ? \ + &this_cpu_ptr(&kvm_host_data)->f : \ + &this_cpu_ptr_hyp_sym(kvm_host_data)->f) +#endif + static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt) { /* The host's MPIDR is immutable, so let's set it up at boot time */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index fe4314af8eecc..7d761a20e8c04 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1832,7 +1832,7 @@ static void cpu_set_hyp_vector(void) static void cpu_hyp_init_context(void) { - kvm_init_host_cpu_context(&this_cpu_ptr_hyp_sym(kvm_host_data)->host_ctxt); + kvm_init_host_cpu_context(host_data_ptr(host_ctxt)); if (!is_kernel_in_hyp_mode()) cpu_init_hyp_mode(); diff --git a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h index 961bbef104a63..eec0f8ccda562 100644 --- a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h @@ -135,7 +135,7 @@ static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu) if (!vcpu_get_flag(vcpu, DEBUG_DIRTY)) return; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); guest_ctxt = &vcpu->arch.ctxt; host_dbg = &vcpu->arch.host_debug_state.regs; guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); @@ -154,7 +154,7 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu) if (!vcpu_get_flag(vcpu, DEBUG_DIRTY)) return; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); guest_ctxt = &vcpu->arch.ctxt; host_dbg = &vcpu->arch.host_debug_state.regs; guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 84c2d5873d2c0..cf6f05218484c 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -81,7 +81,7 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu) { - struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp; u64 r_val, w_val; @@ -156,7 +156,7 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu) static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu) { - struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt); if (!cpus_have_final_cap(ARM64_HAS_FGT)) return; @@ -188,7 +188,7 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) write_sysreg(0, pmselr_el0); - hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + hctxt = host_data_ptr(host_ctxt); ctxt_sys_reg(hctxt, PMUSERENR_EL0) = read_sysreg(pmuserenr_el0); write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); vcpu_set_flag(vcpu, PMUSERENR_ON_CPU); @@ -228,7 +228,7 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) if (kvm_arm_support_pmu_v3()) { struct kvm_cpu_context *hctxt; - hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + hctxt = host_data_ptr(host_ctxt); write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0); vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU); } diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index d57bcb6ab94d2..dfe8fe0f7eaff 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -205,7 +205,7 @@ asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on) struct psci_boot_args *boot_args; struct kvm_cpu_context *host_ctxt; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); if (is_cpu_on) boot_args = this_cpu_ptr(&cpu_on_args); diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 0d5e0a89ddce5..c6edba9c66b68 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -257,8 +257,7 @@ static int fix_hyp_pgtable_refcnt(void) void __noreturn __pkvm_init_finalise(void) { - struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data); - struct kvm_cpu_context *host_ctxt = &host_data->host_ctxt; + struct kvm_cpu_context *host_ctxt = host_data_ptr(host_ctxt); unsigned long nr_pages, reserved_pages, pfn; int ret; diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 27ad6933bec00..19d4b0dc82c90 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -302,7 +302,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) pmr_sync(); } - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); host_ctxt->__hyp_running_vcpu = vcpu; guest_ctxt = &vcpu->arch.ctxt; @@ -405,7 +405,7 @@ asmlinkage void __noreturn hyp_panic(void) struct kvm_cpu_context *host_ctxt; struct kvm_vcpu *vcpu; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); vcpu = host_ctxt->__hyp_running_vcpu; if (vcpu) { diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 31304da0d6173..52750ab7570d9 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -206,7 +206,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) struct kvm_cpu_context *guest_ctxt; u64 exit_code; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); host_ctxt->__hyp_running_vcpu = vcpu; guest_ctxt = &vcpu->arch.ctxt; @@ -301,7 +301,7 @@ static void __hyp_call_panic(u64 spsr, u64 elr, u64 par) struct kvm_cpu_context *host_ctxt; struct kvm_vcpu *vcpu; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); vcpu = host_ctxt->__hyp_running_vcpu; __deactivate_traps(vcpu); diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c index b35a178e7e0db..5ca8f74853180 100644 --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -67,7 +67,7 @@ void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu) struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; struct kvm_cpu_context *host_ctxt; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); __sysreg_save_user_state(host_ctxt); /* @@ -112,7 +112,7 @@ void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu) struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt; struct kvm_cpu_context *host_ctxt; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); deactivate_traps_vhe_put(vcpu); __sysreg_save_el1_state(guest_ctxt); diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c index a243934c5568b..329819806096b 100644 --- a/arch/arm64/kvm/pmu.c +++ b/arch/arm64/kvm/pmu.c @@ -232,7 +232,7 @@ bool kvm_set_pmuserenr(u64 val) if (!vcpu || !vcpu_get_flag(vcpu, PMUSERENR_ON_CPU)) return false; - hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + hctxt = host_data_ptr(host_ctxt); ctxt_sys_reg(hctxt, PMUSERENR_EL0) = val; return true; } From a2d7853e2748dfba7ad70968bcf7453497677ec6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 26 Feb 2024 15:58:46 +0000 Subject: [PATCH 59/95] KVM: arm64: Exclude host_debug_data from vcpu_arch [Upstream commit 6db55734ec4008da39e10d2fffa913fd9751ccaa] Keeping host_debug_state on a per-vcpu basis is completely pointless. The lifetime of this data is only that of the inner run-loop, which means it is never accessed outside of the core EL2 code. Move the structure into kvm_host_data, and save over 500 bytes per vcpu. Reviewed-by: Suzuki K Poulose Signed-off-by: Marc Zyngier Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- arch/arm64/include/asm/kvm_host.h | 31 +++++++++++++---------- arch/arm64/kvm/hyp/include/hyp/debug-sr.h | 4 +-- arch/arm64/kvm/hyp/nvhe/debug-sr.c | 8 +++--- 3 files changed, 23 insertions(+), 20 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 05cb02aae21ca..9f84f0b52a005 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -442,6 +442,19 @@ struct kvm_cpu_context { */ struct kvm_host_data { struct kvm_cpu_context host_ctxt; + + /* + * host_debug_state contains the host registers which are + * saved and restored during world switches. + */ + struct { + /* {Break,watch}point registers */ + struct kvm_guest_debug_arch regs; + /* Statistical profiling extension */ + u64 pmscr_el1; + /* Self-hosted trace */ + u64 trfcr_el1; + } host_debug_state; }; struct kvm_host_psci_config { @@ -534,11 +547,10 @@ struct kvm_vcpu_arch { * We maintain more than a single set of debug registers to support * debugging the guest from the host and to maintain separate host and * guest state during world switches. vcpu_debug_state are the debug - * registers of the vcpu as the guest sees them. host_debug_state are - * the host registers which are saved and restored during - * world switches. external_debug_state contains the debug - * values we want to debug the guest. This is set via the - * KVM_SET_GUEST_DEBUG ioctl. + * registers of the vcpu as the guest sees them. + * + * external_debug_state contains the debug values we want to debug the + * guest. This is set via the KVM_SET_GUEST_DEBUG ioctl. * * debug_ptr points to the set of debug registers that should be loaded * onto the hardware when running the guest. @@ -549,15 +561,6 @@ struct kvm_vcpu_arch { struct task_struct *parent_task; - struct { - /* {Break,watch}point registers */ - struct kvm_guest_debug_arch regs; - /* Statistical profiling extension */ - u64 pmscr_el1; - /* Self-hosted trace */ - u64 trfcr_el1; - } host_debug_state; - /* VGIC state */ struct vgic_cpu vgic_cpu; struct arch_timer_cpu timer_cpu; diff --git a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h index eec0f8ccda562..d00093699aaf1 100644 --- a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h @@ -137,7 +137,7 @@ static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu) host_ctxt = host_data_ptr(host_ctxt); guest_ctxt = &vcpu->arch.ctxt; - host_dbg = &vcpu->arch.host_debug_state.regs; + host_dbg = host_data_ptr(host_debug_state.regs); guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); __debug_save_state(host_dbg, host_ctxt); @@ -156,7 +156,7 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu) host_ctxt = host_data_ptr(host_ctxt); guest_ctxt = &vcpu->arch.ctxt; - host_dbg = &vcpu->arch.host_debug_state.regs; + host_dbg = host_data_ptr(host_debug_state.regs); guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); __debug_save_state(guest_dbg, guest_ctxt); diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c index 7746ea507b6f0..53efda0235cfe 100644 --- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c @@ -83,10 +83,10 @@ void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu) { /* Disable and flush SPE data generation */ if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE)) - __debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1); + __debug_save_spe(host_data_ptr(host_debug_state.pmscr_el1)); /* Disable and flush Self-Hosted Trace generation */ if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE)) - __debug_save_trace(&vcpu->arch.host_debug_state.trfcr_el1); + __debug_save_trace(host_data_ptr(host_debug_state.trfcr_el1)); } void __debug_switch_to_guest(struct kvm_vcpu *vcpu) @@ -97,9 +97,9 @@ void __debug_switch_to_guest(struct kvm_vcpu *vcpu) void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu) { if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE)) - __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1); + __debug_restore_spe(*host_data_ptr(host_debug_state.pmscr_el1)); if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE)) - __debug_restore_trace(vcpu->arch.host_debug_state.trfcr_el1); + __debug_restore_trace(*host_data_ptr(host_debug_state.trfcr_el1)); } void __debug_switch_to_host(struct kvm_vcpu *vcpu) From 72b8f60e01d29211b961dabe902a6bbd6fc3e066 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 27 Feb 2024 08:27:54 +0000 Subject: [PATCH 60/95] KVM: arm64: Exclude mdcr_el2_host from kvm_vcpu_arch [Upstream commit 4bacd723705a6b6c8386daf3d5148aca66135f3c] As for the rest of the host debug state, the host copy of mdcr_el2 has little to do in the vcpu, and is better placed in the host_data structure. Reviewed-by : Suzuki K Poulose Signed-off-by: Marc Zyngier Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- arch/arm64/include/asm/kvm_host.h | 5 ++--- arch/arm64/kvm/hyp/include/hyp/switch.h | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 9f84f0b52a005..2c60d75746937 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -454,6 +454,8 @@ struct kvm_host_data { u64 pmscr_el1; /* Self-hosted trace */ u64 trfcr_el1; + /* Values of trap registers for the host before guest entry. */ + u64 mdcr_el2; } host_debug_state; }; @@ -512,9 +514,6 @@ struct kvm_vcpu_arch { u64 hcr_el2; u64 mdcr_el2; - /* Values of trap registers for the host before guest entry. */ - u64 mdcr_el2_host; - /* Exception Information */ struct kvm_vcpu_fault_info fault; diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index cf6f05218484c..437de0b2d8166 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -197,7 +197,7 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) if (cpus_have_final_cap(ARM64_HAS_NMI)) sysreg_clear_set_s(SYS_HCRX_EL2, 0, HCRX_EL2_TALLINT); - vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2); + *host_data_ptr(host_debug_state.mdcr_el2) = read_sysreg(mdcr_el2); write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); if (cpus_have_final_cap(ARM64_HAS_HCX)) { @@ -219,7 +219,7 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) { - write_sysreg(vcpu->arch.mdcr_el2_host, mdcr_el2); + write_sysreg(*host_data_ptr(host_debug_state.mdcr_el2), mdcr_el2); if (cpus_have_final_cap(ARM64_HAS_NMI)) sysreg_clear_set_s(SYS_HCRX_EL2, HCRX_EL2_TALLINT, 0); From 90075ab90f2c41abaf592ff00bbf3dfa976edbd4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 1 Mar 2024 12:06:44 +0000 Subject: [PATCH 61/95] KVM: arm64: Exclude host_fpsimd_state pointer from kvm_vcpu_arch [Upstream commit 51e09b5572d665645ce394f94f24a7d6ec32bda9] As the name of the field indicates, host_fpsimd_state is strictly a host piece of data, and we reset this pointer on each PID change. So let's move it where it belongs, and set it at load-time. Although this is slightly more often, it is a well defined life-cycle which matches other pieces of data. Reviewed-by: Mark Brown Signed-off-by: Marc Zyngier Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- arch/arm64/include/asm/kvm_host.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2c60d75746937..c25d827c9c4de 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -442,6 +442,7 @@ struct kvm_cpu_context { */ struct kvm_host_data { struct kvm_cpu_context host_ctxt; + struct user_fpsimd_state *fpsimd_state; /* hyp VA */ /* * host_debug_state contains the host registers which are From 36028409aef165339f479d6e454b5142b1b9c79c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 28 May 2022 12:38:20 +0100 Subject: [PATCH 62/95] KVM: arm64: Move vcpu SVE/SME flags to the state flag set [Upstream commit 0affa37fcd1d6f701a0fe805c4ceb7f348d377d5] The two HOST_{SVE,SME}_ENABLED are only used for the host kernel to track its own state across a vcpu run so that it can be fully restored. Move these flags to the so called state set. Reviewed-by: Fuad Tabba Reviewed-by: Reiji Watanabe Signed-off-by: Marc Zyngier Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- arch/arm64/include/asm/kvm_host.h | 5 +++++ arch/arm64/kvm/fpsimd.c | 27 +++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index c25d827c9c4de..eaf0a12349c14 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -746,6 +746,11 @@ struct kvm_vcpu_arch { #define IN_WFI __vcpu_single_flag(sflags, BIT(7)) +/* SVE enabled for host EL0 */ +#define HOST_SVE_ENABLED __vcpu_single_flag(sflags, BIT(0)) +/* SME enabled for EL0 */ +#define HOST_SME_ENABLED __vcpu_single_flag(sflags, BIT(1)) + /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ #define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) + \ sve_ffr_offset((vcpu)->arch.sve_max_vl)) diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index e57db49ea468e..9c75f74e48c5a 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -87,6 +87,17 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) */ fpsimd_save_and_flush_cpu_state(); vcpu->arch.fp_state = FP_STATE_FREE; + *host_data_ptr(fpsimd_state) = NULL; + + vcpu_clear_flag(vcpu, HOST_SVE_ENABLED); + if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) + vcpu_set_flag(vcpu, HOST_SVE_ENABLED); + + if (system_supports_sme()) { + vcpu_clear_flag(vcpu, HOST_SME_ENABLED); + if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN) + vcpu_set_flag(vcpu, HOST_SME_ENABLED); + } } /* @@ -151,6 +162,22 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) local_irq_save(flags); + /* + * If we have VHE then the Hyp code will reset CPACR_EL1 to + * CPACR_EL1_DEFAULT and we need to reenable SME. + */ + if (has_vhe() && system_supports_sme()) { + /* Also restore EL0 state seen on entry */ + if (vcpu_get_flag(vcpu, HOST_SME_ENABLED)) + sysreg_clear_set(CPACR_EL1, 0, + CPACR_EL1_SMEN_EL0EN | + CPACR_EL1_SMEN_EL1EN); + else + sysreg_clear_set(CPACR_EL1, + CPACR_EL1_SMEN_EL0EN, + CPACR_EL1_SMEN_EL1EN); + } + if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) { /* * Flush (save and invalidate) the fpsimd/sve state so that if From ba341b3980fd289366e9bd01f6e3242a2f89cc8b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 1 Mar 2024 17:42:31 +0000 Subject: [PATCH 63/95] KVM: arm64: Exclude FP ownership from kvm_vcpu_arch [Upstream commit 5294afdbf45aced5295fe5941c58b40c41c23800] In retrospect, it is fairly obvious that the FP state ownership is only meaningful for a given CPU, and that locating this information in the vcpu was just a mistake. Move the ownership tracking into the host data structure, and rename it from fp_state to fp_owner, which is a better description (name suggested by Mark Brown). Reviewed-by: Mark Brown Signed-off-by: Marc Zyngier Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- arch/arm64/include/asm/kvm_emulate.h | 4 ++-- arch/arm64/include/asm/kvm_host.h | 14 +++++++------- arch/arm64/kvm/arm.c | 6 ------ arch/arm64/kvm/fpsimd.c | 23 ++++++++++++++++++----- arch/arm64/kvm/hyp/include/hyp/switch.h | 8 ++++++-- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 2 -- arch/arm64/kvm/hyp/nvhe/switch.c | 2 +- arch/arm64/kvm/hyp/vhe/switch.c | 2 +- 8 files changed, 35 insertions(+), 26 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 3d6725ff0bf6d..c0cb2c1748486 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -592,7 +592,7 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu) val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN); if (!vcpu_has_sve(vcpu) || - (vcpu->arch.fp_state != FP_STATE_GUEST_OWNED)) + (*host_data_ptr(fp_owner) != FP_STATE_GUEST_OWNED)) val |= CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN; if (cpus_have_final_cap(ARM64_SME)) val |= CPACR_EL1_SMEN_EL1EN | CPACR_EL1_SMEN_EL0EN; @@ -600,7 +600,7 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu) val = CPTR_NVHE_EL2_RES1; if (vcpu_has_sve(vcpu) && - (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED)) + (*host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED)) val |= CPTR_EL2_TZ; if (cpus_have_final_cap(ARM64_SME)) val &= ~CPTR_EL2_TSM; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index eaf0a12349c14..9c8e3661bb302 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -444,6 +444,13 @@ struct kvm_host_data { struct kvm_cpu_context host_ctxt; struct user_fpsimd_state *fpsimd_state; /* hyp VA */ + /* Ownership of the FP regs */ + enum { + FP_STATE_FREE, + FP_STATE_HOST_OWNED, + FP_STATE_GUEST_OWNED, + } fp_owner; + /* * host_debug_state contains the host registers which are * saved and restored during world switches. @@ -518,13 +525,6 @@ struct kvm_vcpu_arch { /* Exception Information */ struct kvm_vcpu_fault_info fault; - /* Ownership of the FP regs */ - enum { - FP_STATE_FREE, - FP_STATE_HOST_OWNED, - FP_STATE_GUEST_OWNED, - } fp_state; - /* Configuration flags, set once and for all before the vcpu can run */ u8 cflags; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 7d761a20e8c04..89492a378df7e 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -370,12 +370,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO; - /* - * Default value for the FP state, will be overloaded at load - * time if we support FP (pretty likely) - */ - vcpu->arch.fp_state = FP_STATE_FREE; - /* Set up the timer */ kvm_timer_vcpu_init(vcpu); diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 9c75f74e48c5a..41760ec51745b 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -85,8 +85,9 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) * When the host may use SME, fpsimd_save_and_flush_cpu_state() ensures * that PSTATE.{SM,ZA} == {0,0}. */ + fpsimd_save_and_flush_cpu_state(); - vcpu->arch.fp_state = FP_STATE_FREE; + *host_data_ptr(fp_owner) = FP_STATE_FREE; *host_data_ptr(fpsimd_state) = NULL; vcpu_clear_flag(vcpu, HOST_SVE_ENABLED); @@ -110,7 +111,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu) { if (test_thread_flag(TIF_FOREIGN_FPSTATE)) - vcpu->arch.fp_state = FP_STATE_FREE; + *host_data_ptr(fp_owner) = FP_STATE_FREE; } /* @@ -126,7 +127,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) WARN_ON_ONCE(!irqs_disabled()); - if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) { + if (*host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED) { /* * Currently we do not support SME guests so SVCR is @@ -164,7 +165,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) /* * If we have VHE then the Hyp code will reset CPACR_EL1 to - * CPACR_EL1_DEFAULT and we need to reenable SME. + * the default value and we need to reenable SME. */ if (has_vhe() && system_supports_sme()) { /* Also restore EL0 state seen on entry */ @@ -176,9 +177,21 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) sysreg_clear_set(CPACR_EL1, CPACR_EL1_SMEN_EL0EN, CPACR_EL1_SMEN_EL1EN); + isb(); } - if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) { + if (*host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED) { + if (vcpu_has_sve(vcpu)) { + __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR); + + /* Restore the VL that was saved when bound to the CPU */ + if (!has_vhe()) + sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, + SYS_ZCR_EL1); + } + + fpsimd_save_and_flush_cpu_state(); + } else if (has_vhe() && system_supports_sve()) { /* * Flush (save and invalidate) the fpsimd/sve state so that if * the host tries to use fpsimd/sve, it's not using stale data diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 437de0b2d8166..2f08a04e57880 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -41,7 +41,7 @@ extern struct kvm_exception_table_entry __stop___kvm_ex_table; /* Check whether the FP regs are owned by the guest */ static inline bool guest_owns_fp_regs(struct kvm_vcpu *vcpu) { - return vcpu->arch.fp_state == FP_STATE_GUEST_OWNED; + return *host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED; } /* Save the 32-bit only FPSIMD system register state */ @@ -382,6 +382,10 @@ static inline bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) } isb(); + /* Write out the host state if it's in the registers */ + if (*host_data_ptr(fp_owner) == FP_STATE_HOST_OWNED) + __fpsimd_save_state(*host_data_ptr(fpsimd_state)); + /* Restore the guest state */ if (sve_guest) __hyp_sve_restore_guest(vcpu); @@ -392,7 +396,7 @@ static inline bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) if (!(read_sysreg(hcr_el2) & HCR_RW)) write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2); - vcpu->arch.fp_state = FP_STATE_GUEST_OWNED; + *host_data_ptr(fp_owner) = FP_STATE_GUEST_OWNED; return true; } diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 350d1775a5ce8..90c7621fa2392 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -39,7 +39,6 @@ static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) hyp_vcpu->vcpu.arch.mdcr_el2 = host_vcpu->arch.mdcr_el2; hyp_vcpu->vcpu.arch.iflags = host_vcpu->arch.iflags; - hyp_vcpu->vcpu.arch.fp_state = host_vcpu->arch.fp_state; hyp_vcpu->vcpu.arch.debug_ptr = kern_hyp_va(host_vcpu->arch.debug_ptr); @@ -62,7 +61,6 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) host_vcpu->arch.fault = hyp_vcpu->vcpu.arch.fault; host_vcpu->arch.iflags = hyp_vcpu->vcpu.arch.iflags; - host_vcpu->arch.fp_state = hyp_vcpu->vcpu.arch.fp_state; host_cpu_if->vgic_hcr = hyp_cpu_if->vgic_hcr; for (i = 0; i < hyp_cpu_if->used_lrs; ++i) diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 19d4b0dc82c90..ec2f32cbd80c4 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -375,7 +375,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) __sysreg_restore_state_nvhe(host_ctxt); - if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) + if (*host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED) __fpsimd_save_fpexc32(vcpu); __debug_switch_to_host(vcpu); diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 52750ab7570d9..e9eb477a202d5 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -253,7 +253,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) sysreg_restore_host_state_vhe(host_ctxt); - if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) + if (*host_data_ptr(fp_owner) == FP_STATE_GUEST_OWNED) __fpsimd_save_fpexc32(vcpu); __debug_switch_to_host(vcpu); From 067c7b94d9285756944c76b108d420170526cc97 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 19 Apr 2024 11:29:24 +0100 Subject: [PATCH 64/95] KVM: arm64: nv: Drop VCPU_HYP_CONTEXT flag [Upstream commit 6f57c6be2a0889cc0fd32b0cd2eb25dfee20dde3] It has become obvious that HCR_EL2.NV serves the exact same use as VCPU_HYP_CONTEXT, only in an architectural way. So just drop the flag for good. Reviewed-by: Joey Gouly Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20240419102935.1935571-5-maz@kernel.org Signed-off-by: Marc Zyngier Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- arch/arm64/include/asm/kvm_host.h | 2 -- arch/arm64/kvm/hyp/vhe/switch.c | 7 +------ 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 9c8e3661bb302..cbbe1e6868ad4 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -729,8 +729,6 @@ struct kvm_vcpu_arch { #define DEBUG_STATE_SAVE_SPE __vcpu_single_flag(iflags, BIT(5)) /* Save TRBE context if active */ #define DEBUG_STATE_SAVE_TRBE __vcpu_single_flag(iflags, BIT(6)) -/* vcpu running in HYP context */ -#define VCPU_HYP_CONTEXT __vcpu_single_flag(iflags, BIT(7)) /* Physical CPU not in supported_cpus */ #define ON_UNSUPPORTED_CPU __vcpu_single_flag(sflags, BIT(2)) diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index e9eb477a202d5..a38bafc27d65a 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -180,7 +180,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) * If we were in HYP context on entry, adjust the PSTATE view * so that the usual helpers work correctly. */ - if (unlikely(vcpu_get_flag(vcpu, VCPU_HYP_CONTEXT))) { + if (vcpu_has_nv(vcpu) && (read_sysreg(hcr_el2) & HCR_NV)) { u64 mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT); switch (mode) { @@ -233,11 +233,6 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) sysreg_restore_guest_state_vhe(guest_ctxt); __debug_switch_to_guest(vcpu); - if (is_hyp_ctxt(vcpu)) - vcpu_set_flag(vcpu, VCPU_HYP_CONTEXT); - else - vcpu_clear_flag(vcpu, VCPU_HYP_CONTEXT); - do { /* Jump in the fire! */ exit_code = __guest_enter(vcpu); From 32589c5d987dfcb67ef22071e24a7739c0322333 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Tue, 23 Apr 2024 16:05:09 +0100 Subject: [PATCH 65/95] KVM: arm64: Initialize the kvm host data's fpsimd_state pointer in pKVM [Upstream commit 4c22a40dd9c3dcc2156f312ffc71955e56192a76] Since the host_fpsimd_state has been removed from kvm_vcpu_arch, it isn't pointing to the hyp's version of the host fp_regs in protected mode. Initialize the host_data fpsimd_state point to the host_data's context fp_regs on pKVM initialization. Fixes: 51e09b5572d6 ("KVM: arm64: Exclude host_fpsimd_state pointer from kvm_vcpu_arch") Signed-off-by: Fuad Tabba Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240423150538.2103045-2-tabba@google.com Signed-off-by: Marc Zyngier Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- arch/arm64/kvm/hyp/include/nvhe/pkvm.h | 1 + arch/arm64/kvm/hyp/nvhe/pkvm.c | 11 +++++++++++ arch/arm64/kvm/hyp/nvhe/setup.c | 1 + 3 files changed, 13 insertions(+) diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h index 82b3d62538a61..20c3f6e13b99f 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h @@ -54,6 +54,7 @@ pkvm_hyp_vcpu_to_hyp_vm(struct pkvm_hyp_vcpu *hyp_vcpu) } void pkvm_hyp_vm_table_init(void *tbl); +void pkvm_host_fpsimd_state_init(void); int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, unsigned long pgd_hva); diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 03acc8343c5d1..80fe9378deac9 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -218,6 +218,17 @@ void pkvm_hyp_vm_table_init(void *tbl) vm_table = tbl; } +void pkvm_host_fpsimd_state_init(void) +{ + unsigned long i; + + for (i = 0; i < hyp_nr_cpus; i++) { + struct kvm_host_data *host_data = per_cpu_ptr(&kvm_host_data, i); + + host_data->fpsimd_state = &host_data->host_ctxt.fp_regs; + } +} + /* * Return the hyp vm structure corresponding to the handle. */ diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index c6edba9c66b68..2fb337b6e7e48 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -300,6 +300,7 @@ void __noreturn __pkvm_init_finalise(void) goto out; pkvm_hyp_vm_table_init(vm_table_base); + pkvm_host_fpsimd_state_init(); out: /* * We tail-called to here from handle___pkvm_init() and will not return, From 73a3ec034b369ee0c582adc2a83a7cd1f2e6e4b9 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 13 Jun 2024 11:47:23 +0530 Subject: [PATCH 66/95] arm64/sysreg: Add BRBE registers and fields [Upstream commit 52e4a56ab8b85a11ffc017eaec5b2f38642a43ba] This patch adds definitions related to the Branch Record Buffer Extension (BRBE) as per ARM DDI 0487K.a. These will be used by KVM and a BRBE driver in subsequent patches. Some existing BRBE definitions in asm/sysreg.h are replaced with equivalent generated definitions. Cc: Catalin Marinas Cc: Will Deacon Cc: Marc Zyngier Cc: Mark Rutland Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Mark Rutland Reviewed-by: Mark Brown Signed-off-by: Anshuman Khandual Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- arch/arm64/include/asm/sysreg.h | 17 ++--- arch/arm64/tools/sysreg | 131 ++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+), 11 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index cad681476a33b..976761b24a2f8 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -197,16 +197,8 @@ #define SYS_DBGVCR32_EL2 sys_reg(2, 4, 0, 7, 0) #define SYS_BRBINF_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 0)) -#define SYS_BRBINFINJ_EL1 sys_reg(2, 1, 9, 1, 0) #define SYS_BRBSRC_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 1)) -#define SYS_BRBSRCINJ_EL1 sys_reg(2, 1, 9, 1, 1) #define SYS_BRBTGT_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 2)) -#define SYS_BRBTGTINJ_EL1 sys_reg(2, 1, 9, 1, 2) -#define SYS_BRBTS_EL1 sys_reg(2, 1, 9, 0, 2) - -#define SYS_BRBCR_EL1 sys_reg(2, 1, 9, 0, 0) -#define SYS_BRBFCR_EL1 sys_reg(2, 1, 9, 0, 1) -#define SYS_BRBIDR0_EL1 sys_reg(2, 1, 9, 2, 0) #define SYS_TRCITECR_EL1 sys_reg(3, 0, 1, 2, 3) #define SYS_TRCACATR(m) sys_reg(2, 1, 2, ((m & 7) << 1), (2 | (m >> 3))) @@ -272,8 +264,6 @@ /* ETM */ #define SYS_TRCOSLAR sys_reg(2, 1, 1, 0, 4) -#define SYS_BRBCR_EL2 sys_reg(2, 4, 9, 0, 0) - #define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0) #define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5) #define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6) @@ -587,7 +577,6 @@ #define SYS_CNTHV_CVAL_EL2 sys_reg(3, 4, 14, 3, 2) /* VHE encodings for architectural EL0/1 system registers */ -#define SYS_BRBCR_EL12 sys_reg(2, 5, 9, 0, 0) #define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0) #define SYS_CPACR_EL12 sys_reg(3, 5, 1, 0, 2) #define SYS_SCTLR2_EL12 sys_reg(3, 5, 1, 0, 3) @@ -773,6 +762,12 @@ #define OP_DVP_RCTX sys_insn(1, 3, 7, 3, 5) #define OP_CPP_RCTX sys_insn(1, 3, 7, 3, 7) +/* + * BRBE Instructions + */ +#define BRB_IALL_INSN __emit_inst(0xd5000000 | OP_BRB_IALL | (0x1f)) +#define BRB_INJ_INSN __emit_inst(0xd5000000 | OP_BRB_INJ | (0x1f)) + /* Common SCTLR_ELx flags. */ #define SCTLR_ELx_ENTP2 (BIT(60)) #define SCTLR_ELx_DSSBS (BIT(44)) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 76ce150e7347e..c97275a6adbc3 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -1002,6 +1002,137 @@ UnsignedEnum 3:0 BT EndEnum EndSysreg + +SysregFields BRBINFx_EL1 +Res0 63:47 +Field 46 CCU +Field 45:32 CC +Res0 31:18 +Field 17 LASTFAILED +Field 16 T +Res0 15:14 +Enum 13:8 TYPE + 0b000000 DIRECT_UNCOND + 0b000001 INDIRECT + 0b000010 DIRECT_LINK + 0b000011 INDIRECT_LINK + 0b000101 RET + 0b000111 ERET + 0b001000 DIRECT_COND + 0b100001 DEBUG_HALT + 0b100010 CALL + 0b100011 TRAP + 0b100100 SERROR + 0b100110 INSN_DEBUG + 0b100111 DATA_DEBUG + 0b101010 ALIGN_FAULT + 0b101011 INSN_FAULT + 0b101100 DATA_FAULT + 0b101110 IRQ + 0b101111 FIQ + 0b110000 IMPDEF_TRAP_EL3 + 0b111001 DEBUG_EXIT +EndEnum +Enum 7:6 EL + 0b00 EL0 + 0b01 EL1 + 0b10 EL2 + 0b11 EL3 +EndEnum +Field 5 MPRED +Res0 4:2 +Enum 1:0 VALID + 0b00 NONE + 0b01 TARGET + 0b10 SOURCE + 0b11 FULL +EndEnum +EndSysregFields + +SysregFields BRBCR_ELx +Res0 63:24 +Field 23 EXCEPTION +Field 22 ERTN +Res0 21:10 +Field 9 FZPSS +Field 8 FZP +Res0 7 +Enum 6:5 TS + 0b01 VIRTUAL + 0b10 GUEST_PHYSICAL + 0b11 PHYSICAL +EndEnum +Field 4 MPRED +Field 3 CC +Res0 2 +Field 1 ExBRE +Field 0 E0BRE +EndSysregFields + +Sysreg BRBCR_EL1 2 1 9 0 0 +Fields BRBCR_ELx +EndSysreg + +Sysreg BRBFCR_EL1 2 1 9 0 1 +Res0 63:30 +Enum 29:28 BANK + 0b00 BANK_0 + 0b01 BANK_1 +EndEnum +Res0 27:23 +Field 22 CONDDIR +Field 21 DIRCALL +Field 20 INDCALL +Field 19 RTN +Field 18 INDIRECT +Field 17 DIRECT +Field 16 EnI +Res0 15:8 +Field 7 PAUSED +Field 6 LASTFAILED +Res0 5:0 +EndSysreg + +Sysreg BRBTS_EL1 2 1 9 0 2 +Field 63:0 TS +EndSysreg + +Sysreg BRBINFINJ_EL1 2 1 9 1 0 +Fields BRBINFx_EL1 +EndSysreg + +Sysreg BRBSRCINJ_EL1 2 1 9 1 1 +Field 63:0 ADDRESS +EndSysreg + +Sysreg BRBTGTINJ_EL1 2 1 9 1 2 +Field 63:0 ADDRESS +EndSysreg + +Sysreg BRBIDR0_EL1 2 1 9 2 0 +Res0 63:16 +Enum 15:12 CC + 0b0101 20_BIT +EndEnum +Enum 11:8 FORMAT + 0b0000 FORMAT_0 +EndEnum +Enum 7:0 NUMREC + 0b00001000 8 + 0b00010000 16 + 0b00100000 32 + 0b01000000 64 +EndEnum +EndSysreg + +Sysreg BRBCR_EL2 2 4 9 0 0 +Fields BRBCR_ELx +EndSysreg + +Sysreg BRBCR_EL12 2 5 9 0 0 +Fields BRBCR_ELx +EndSysreg + Sysreg ID_AA64ZFR0_EL1 3 0 0 4 4 Res0 63:60 UnsignedEnum 59:56 F64MM From 550e2bf14717ebd6b9754b2b78869d22dedec1ce Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 5 Apr 2024 08:16:32 +0530 Subject: [PATCH 67/95] KVM: arm64: Explicitly handle BRBE traps as UNDEFINED [Upstream commit a7f1fa5564be565bd4bc18875bb46ffd0c01d292] The Branch Record Buffer Extension (BRBE) adds a number of system registers and instructions, which we don't currently intend to expose to guests. Our existing logic handles this safely, but this could be improved with some explicit handling of BRBE. The presence of BRBE is currently hidden from guests as the cpufeature code's ftr_id_aa64dfr0[] table doesn't have an entry for the BRBE field, and so this will be zero in the sanitised value of ID_AA64DFR0 exposed to guests via read_sanitised_id_aa64dfr0_el1(). As the ftr_id_aa64dfr0[] table may gain an entry for the BRBE field in future, for robustness we should explicitly mask out the BRBE field in read_sanitised_id_aa64dfr0_el1(). The BRBE system registers and instructions are currently trapped by the existing configuration of the fine-grained traps. As neither the registers nor the instructions are described in the sys_reg_descs[] table, emulate_sys_reg() will warn that these are unknown before injecting an UNDEFINED exception into the guest. Well-behaved guests shouldn't try to use the registers or instructions, but badly-behaved guests could use these, resulting in unnecessary warnings. To avoid those warnings, we should explicitly handle the BRBE registers and instructions as UNDEFINED. Address the above by having read_sanitised_id_aa64dfr0_el1() mask out the ID_AA64DFR0.BRBE field, and explicitly handling all of the BRBE system registers and instructions as UNDEFINED. Cc: Marc Zyngier Cc: Oliver Upton Cc: James Morse Cc: Suzuki K Poulose Cc: Catalin Marinas Cc: Will Deacon Cc: kvmarm@lists.linux.dev Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- arch/arm64/kvm/sys_regs.c | 56 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 2e8306d0f7793..529fdcf82d7fb 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1113,6 +1113,11 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return true; } +#define BRB_INF_SRC_TGT_EL1(n) \ + { SYS_DESC(SYS_BRBINF_EL1(n)), undef_access }, \ + { SYS_DESC(SYS_BRBSRC_EL1(n)), undef_access }, \ + { SYS_DESC(SYS_BRBTGT_EL1(n)), undef_access } \ + /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ #define DBG_BCR_BVR_WCR_WVR_EL1(n) \ { SYS_DESC(SYS_DBGBVRn_EL1(n)), \ @@ -1512,6 +1517,9 @@ static u64 read_sanitised_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, /* Hide SPE from guests */ val &= ~ID_AA64DFR0_EL1_PMSVer_MASK; + /* Hide BRBE from guests */ + val &= ~ID_AA64DFR0_EL1_BRBE_MASK; + return val; } @@ -1985,6 +1993,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_DC_CISW), access_dcsw }, { SYS_DESC(SYS_DC_CIGSW), access_dcgsw }, { SYS_DESC(SYS_DC_CIGDSW), access_dcgsw }, + { SYS_DESC(OP_BRB_IALL), undef_access }, + { SYS_DESC(OP_BRB_INJ), undef_access }, DBG_BCR_BVR_WCR_WVR_EL1(0), DBG_BCR_BVR_WCR_WVR_EL1(1), @@ -2015,6 +2025,52 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_DBGCLAIMCLR_EL1), trap_raz_wi }, { SYS_DESC(SYS_DBGAUTHSTATUS_EL1), trap_dbgauthstatus_el1 }, + /* + * BRBE branch record sysreg address space is interleaved between + * corresponding BRBINF_EL1, BRBSRC_EL1, and BRBTGT_EL1. + */ + BRB_INF_SRC_TGT_EL1(0), + BRB_INF_SRC_TGT_EL1(16), + BRB_INF_SRC_TGT_EL1(1), + BRB_INF_SRC_TGT_EL1(17), + BRB_INF_SRC_TGT_EL1(2), + BRB_INF_SRC_TGT_EL1(18), + BRB_INF_SRC_TGT_EL1(3), + BRB_INF_SRC_TGT_EL1(19), + BRB_INF_SRC_TGT_EL1(4), + BRB_INF_SRC_TGT_EL1(20), + BRB_INF_SRC_TGT_EL1(5), + BRB_INF_SRC_TGT_EL1(21), + BRB_INF_SRC_TGT_EL1(6), + BRB_INF_SRC_TGT_EL1(22), + BRB_INF_SRC_TGT_EL1(7), + BRB_INF_SRC_TGT_EL1(23), + BRB_INF_SRC_TGT_EL1(8), + BRB_INF_SRC_TGT_EL1(24), + BRB_INF_SRC_TGT_EL1(9), + BRB_INF_SRC_TGT_EL1(25), + BRB_INF_SRC_TGT_EL1(10), + BRB_INF_SRC_TGT_EL1(26), + BRB_INF_SRC_TGT_EL1(11), + BRB_INF_SRC_TGT_EL1(27), + BRB_INF_SRC_TGT_EL1(12), + BRB_INF_SRC_TGT_EL1(28), + BRB_INF_SRC_TGT_EL1(13), + BRB_INF_SRC_TGT_EL1(29), + BRB_INF_SRC_TGT_EL1(14), + BRB_INF_SRC_TGT_EL1(30), + BRB_INF_SRC_TGT_EL1(15), + BRB_INF_SRC_TGT_EL1(31), + + /* Remaining BRBE sysreg addresses space */ + { SYS_DESC(SYS_BRBCR_EL1), undef_access }, + { SYS_DESC(SYS_BRBFCR_EL1), undef_access }, + { SYS_DESC(SYS_BRBTS_EL1), undef_access }, + { SYS_DESC(SYS_BRBINFINJ_EL1), undef_access }, + { SYS_DESC(SYS_BRBSRCINJ_EL1), undef_access }, + { SYS_DESC(SYS_BRBTGTINJ_EL1), undef_access }, + { SYS_DESC(SYS_BRBIDR0_EL1), undef_access }, + { SYS_DESC(SYS_MDCCSR_EL0), trap_raz_wi }, { SYS_DESC(SYS_DBGDTR_EL0), trap_raz_wi }, // DBGDTR[TR]X_EL0 share the same encoding From 9d24a96977e12336683e90a3e26af7e3357e79ff Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 5 Apr 2024 08:16:33 +0530 Subject: [PATCH 68/95] drivers: perf: arm_pmu: Add infrastructure for branch stack sampling commit cddc555caf1eb744ec2c8c818899fd2e81793ec2 openEuler In order to support the Branch Record Buffer Extension (BRBE), we need to extend the arm_pmu framework with some basic infrastructure for branch stack sampling which arm_pmu drivers can opt-in to using. Subsequent patches will use this to add support for BRBE in the PMUv3 driver. With BRBE, the hardware records branches into a hardware FIFO, which will be sampled by software when perf events overflow. A task may be context- switched an arbitrary number of times between overflows, and to avoid losing samples we need to save the current records when a task is context- switched out. To do these we'll need to use the pmu::sched_task() callback, and we'll also need to allocate some per-task storage space via event flag PERF_ATTACH_TASK_DATA. Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- drivers/perf/arm_pmu.c | 46 +++++++++++++++++++++++++++++++++--- include/linux/perf/arm_pmu.h | 32 ++++++++++++++++++++++++- 2 files changed, 74 insertions(+), 4 deletions(-) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 30d67e670603b..8d1a565390e2c 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -289,6 +289,23 @@ static void armpmu_start(struct perf_event *event, int flags) { struct arm_pmu *armpmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; + struct pmu_hw_events *cpuc = this_cpu_ptr(armpmu->hw_events); + int idx; + + /* + * Merge all branch filter requests from different perf + * events being added into this PMU. This includes both + * privilege and branch type filters. + */ + if (armpmu->has_branch_stack) { + cpuc->branch_sample_type = 0; + for (idx = 0; idx < ARMPMU_MAX_HWEVENTS; idx++) { + struct perf_event *event_idx = cpuc->events[idx]; + + if (event_idx && has_branch_stack(event_idx)) + cpuc->branch_sample_type |= event_idx->attr.branch_sample_type; + } + } /* * ARM pmu always has to reprogram the period, so ignore @@ -317,6 +334,9 @@ armpmu_del(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; + if (has_branch_stack(event)) + armpmu->branch_stack_del(event, hw_events); + armpmu_stop(event, PERF_EF_UPDATE); hw_events->events[idx] = NULL; armpmu->clear_event_idx(hw_events, event); @@ -345,6 +365,13 @@ armpmu_add(struct perf_event *event, int flags) /* The newly-allocated counter should be empty */ WARN_ON_ONCE(hw_events->events[idx]); + if (has_branch_stack(event)) + armpmu->branch_stack_add(event, hw_events); + + /* + * If there is an event in the counter we are going to use then make + * sure it is disabled. + */ event->hw.idx = idx; hw_events->events[idx] = event; @@ -509,13 +536,25 @@ static int armpmu_event_init(struct perf_event *event) !cpumask_test_cpu(event->cpu, &armpmu->supported_cpus)) return -ENOENT; - /* does not support taken branch sampling */ - if (has_branch_stack(event)) - return -EOPNOTSUPP; + if (has_branch_stack(event)) { + if (!armpmu->has_branch_stack) + return -EOPNOTSUPP; + + if (!armpmu->branch_stack_init(event)) + return -EOPNOTSUPP; + } return __hw_perf_event_init(event); } +static void armpmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) +{ + struct arm_pmu *armpmu = to_arm_pmu(pmu_ctx->pmu); + + if (armpmu->sched_task) + armpmu->sched_task(pmu_ctx, sched_in); +} + static void armpmu_enable(struct pmu *pmu) { struct arm_pmu *armpmu = to_arm_pmu(pmu); @@ -862,6 +901,7 @@ struct arm_pmu *armpmu_alloc(void) } pmu->pmu = (struct pmu) { + .sched_task = armpmu_sched_task, .pmu_enable = armpmu_enable, .pmu_disable = armpmu_disable, .event_init = armpmu_event_init, diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 337f01674b38b..fc02950059d5f 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -46,6 +46,18 @@ static_assert((PERF_EVENT_FLAG_ARCH & ARMPMU_EVT_63BIT) == ARMPMU_EVT_63BIT); }, \ } +/* + * Maximum branch record entries which could be processed + * for core perf branch stack sampling support, regardless + * of the hardware support available on a given ARM PMU. + */ +#define MAX_BRANCH_RECORDS 64 + +struct branch_records { + struct perf_branch_stack branch_stack; + struct perf_branch_entry branch_entries[MAX_BRANCH_RECORDS]; +}; + /* The events for a given PMU register set. */ struct pmu_hw_events { /* @@ -72,6 +84,17 @@ struct pmu_hw_events { struct arm_pmu *percpu_pmu; int irq; + + struct branch_records *branches; + + /* Active context for task events */ + void *branch_context; + + /* Active events requesting branch records */ + unsigned int branch_users; + + /* Active branch sample type filters */ + unsigned long branch_sample_type; }; enum armpmu_attr_groups { @@ -102,8 +125,15 @@ struct arm_pmu { void (*stop)(struct arm_pmu *); void (*reset)(void *); int (*map_event)(struct perf_event *event); + void (*sched_task)(struct perf_event_pmu_context *pmu_ctx, bool sched_in); + bool (*branch_stack_init)(struct perf_event *event); + void (*branch_stack_add)(struct perf_event *event, struct pmu_hw_events *cpuc); + void (*branch_stack_del)(struct perf_event *event, struct pmu_hw_events *cpuc); + void (*branch_stack_reset)(void); int num_events; - bool secure_access; /* 32-bit ARM only */ + unsigned int secure_access : 1, /* 32-bit ARM only */ + has_branch_stack: 1, /* 64-bit ARM only */ + reserved : 30; #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS); #define ARMV8_PMUV3_EXT_COMMON_EVENT_BASE 0x4000 From e1e07d56899609d44748b671a13bbd4fc676ec13 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 5 Apr 2024 08:16:34 +0530 Subject: [PATCH 69/95] arm64/boot: Enable EL2 requirements for BRBE commit 9192278e57e60fe5c18f98d2b44f5829f7b6c854 openEuler Fine grained trap control for BRBE registers, and instructions access need to be configured in HDFGRTR_EL2, HDFGWTR_EL2 and HFGITR_EL2 registers when kernel enters at EL1 but EL2 is present. This changes __init_el2_fgt() as required. Similarly cycle and mis-prediction capture need to be enabled in BRBCR_EL1 and BRBCR_EL2 when the kernel enters either into EL1 or EL2. This adds new __init_el2_brbe() to achieve this objective. This also updates Documentation/arch/arm64/booting.rst with all the above EL2 along with MDRC_EL3.SBRBE requirements. First this replaces an existing hard encoding (1 << 62) with corresponding applicable macro HDFGRTR_EL2_nPMSNEVFR_EL1_MASK. Cc: Catalin Marinas Cc: Will Deacon Cc: Jonathan Corbet Cc: Marc Zyngier Cc: Oliver Upton Cc: linux-arm-kernel@lists.infradead.org Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- Documentation/arch/arm64/booting.rst | 21 +++++++ arch/arm64/include/asm/el2_setup.h | 88 +++++++++++++++++++++++++++- 2 files changed, 106 insertions(+), 3 deletions(-) diff --git a/Documentation/arch/arm64/booting.rst b/Documentation/arch/arm64/booting.rst index 29126bf0eac9a..408d2e27b6418 100644 --- a/Documentation/arch/arm64/booting.rst +++ b/Documentation/arch/arm64/booting.rst @@ -349,6 +349,27 @@ Before jumping into the kernel, the following conditions must be met: - HWFGWTR_EL2.nSMPRI_EL1 (bit 54) must be initialised to 0b01. + For CPUs with feature Branch Record Buffer Extension (FEAT_BRBE): + + - If EL3 is present: + + - MDCR_EL3.SBRBE (bits 33:32) must be initialised to 0b11. + + - If the kernel is entered at EL1 and EL2 is present: + + - BRBCR_EL2.CC (bit 3) must be initialised to 0b1. + - BRBCR_EL2.MPRED (bit 4) must be initialised to 0b1. + + - HDFGRTR_EL2.nBRBDATA (bit 61) must be initialised to 0b1. + - HDFGRTR_EL2.nBRBCTL (bit 60) must be initialised to 0b1. + - HDFGRTR_EL2.nBRBIDR (bit 59) must be initialised to 0b1. + + - HDFGWTR_EL2.nBRBDATA (bit 61) must be initialised to 0b1. + - HDFGWTR_EL2.nBRBCTL (bit 60) must be initialised to 0b1. + + - HFGITR_EL2.nBRBIALL (bit 56) must be initialised to 0b1. + - HFGITR_EL2.nBRBINJ (bit 55) must be initialised to 0b1. + For CPUs with the Scalable Matrix Extension FA64 feature (FEAT_SME_FA64): - If EL3 is present: diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index e4546b29dd0cb..8da13fcce6f9c 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -155,6 +155,41 @@ .Lskip_set_cptr_\@: .endm +/* + * Enable BRBE to record cycle counts and branch mispredicts. + * + * At any EL, to record cycle counts BRBE requires that both + * BRBCR_EL2.CC=1 and BRBCR_EL1.CC=1. + * + * At any EL, to record branch mispredicts BRBE requires that both + * BRBCR_EL2.MPRED=1 and BRBCR_EL1.MPRED=1. + * + * When HCR_EL2.E2H=1, the BRBCR_EL1 encoding is redirected to + * BRBCR_EL2, but the {CC,MPRED} bits in the real BRBCR_EL1 register + * still apply. + * + * Set {CC,MPRBED} in both BRBCR_EL2 and BRBCR_EL1 so that at runtime we + * only need to enable/disable thse in BRBCR_EL1 regardless of whether + * the kernel ends up executing in EL1 or EL2. + */ +.macro __init_el2_brbe + mrs x1, id_aa64dfr0_el1 + ubfx x1, x1, #ID_AA64DFR0_EL1_BRBE_SHIFT, #4 + cbz x1, .Lskip_brbe_\@ + + mov_q x0, BRBCR_ELx_CC | BRBCR_ELx_MPRED + msr_s SYS_BRBCR_EL2, x0 + + __check_hvhe .Lset_brbe_nvhe_\@, x1 + msr_s SYS_BRBCR_EL12, x0 // VHE + + b .Lskip_brbe_\@ + +.Lset_brbe_nvhe_\@ : + msr_s SYS_BRBCR_EL1, x0 // NVHE +.Lskip_brbe_\@ : +.endm + /* Disable any fine grained traps */ .macro __init_el2_fgt mrs x1, id_aa64mmfr0_el1 @@ -162,16 +197,48 @@ cbz x1, .Lskip_fgt_\@ mov x0, xzr + mov x2, xzr mrs x1, id_aa64dfr0_el1 ubfx x1, x1, #ID_AA64DFR0_EL1_PMSVer_SHIFT, #4 cmp x1, #3 b.lt .Lset_debug_fgt_\@ + /* Disable PMSNEVFR_EL1 read and write traps */ - orr x0, x0, #(1 << 62) + orr x0, x0, #HDFGRTR_EL2_nPMSNEVFR_EL1_MASK + orr x2, x2, #HDFGWTR_EL2_nPMSNEVFR_EL1_MASK .Lset_debug_fgt_\@: +#ifdef CONFIG_ARM64_BRBE + mrs x1, id_aa64dfr0_el1 + ubfx x1, x1, #ID_AA64DFR0_EL1_BRBE_SHIFT, #4 + cbz x1, .Lskip_brbe_reg_fgt_\@ + + /* + * Disable read traps for the following registers + * + * [BRBSRC|BRBTGT|RBINF]_EL1 + * [BRBSRCINJ|BRBTGTINJ|BRBINFINJ|BRBTS]_EL1 + */ + orr x0, x0, #HDFGRTR_EL2_nBRBDATA_MASK + + /* + * Disable write traps for the following registers + * + * [BRBSRCINJ|BRBTGTINJ|BRBINFINJ|BRBTS]_EL1 + */ + orr x2, x2, #HDFGWTR_EL2_nBRBDATA_MASK + + /* Disable read and write traps for [BRBCR|BRBFCR]_EL1 */ + orr x0, x0, #HDFGRTR_EL2_nBRBCTL_MASK + orr x2, x2, #HDFGWTR_EL2_nBRBCTL_MASK + + /* Disable read traps for BRBIDR_EL1 */ + orr x0, x0, #HDFGRTR_EL2_nBRBIDR_MASK + +.Lskip_brbe_reg_fgt_\@ : +#endif /* CONFIG_ARM64_BRBE */ msr_s SYS_HDFGRTR_EL2, x0 - msr_s SYS_HDFGWTR_EL2, x0 + msr_s SYS_HDFGWTR_EL2, x2 mov x0, xzr mrs x1, id_aa64pfr1_el1 @@ -194,7 +261,21 @@ .Lset_fgt_\@: msr_s SYS_HFGRTR_EL2, x0 msr_s SYS_HFGWTR_EL2, x0 - msr_s SYS_HFGITR_EL2, xzr + mov x0, xzr +#ifdef CONFIG_ARM64_BRBE + mrs x1, id_aa64dfr0_el1 + ubfx x1, x1, #ID_AA64DFR0_EL1_BRBE_SHIFT, #4 + cbz x1, .Lskip_brbe_insn_fgt_\@ + + /* Disable traps for BRBIALL instruction */ + orr x0, x0, #HFGITR_EL2_nBRBIALL_MASK + + /* Disable traps for BRBINJ instruction */ + orr x0, x0, #HFGITR_EL2_nBRBINJ_MASK + +.Lskip_brbe_insn_fgt_\@ : +#endif /* CONFIG_ARM64_BRBE */ + msr_s SYS_HFGITR_EL2, x0 mrs x1, id_aa64pfr0_el1 // AMU traps UNDEF without AMU ubfx x1, x1, #ID_AA64PFR0_EL1_AMU_SHIFT, #4 @@ -229,6 +310,7 @@ __init_el2_nvhe_idregs __init_el2_cptr __init_el2_fgt + __init_el2_brbe .endm #ifndef __KVM_NVHE_HYPERVISOR__ From f21229b6e4e2e2ab1baa52e6972a6a65ce2d97e7 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 13 Jun 2024 11:47:27 +0530 Subject: [PATCH 70/95] drivers: perf: arm_pmuv3: Enable branch stack sampling via FEAT_BRBE commit 6c6848e7e00cdff4e22650bd2d5658a9e5c23b1d openEuler This extends recently added branch stack sampling framework in ARMV8 PMU to enable such events via new architecture feature called Branch Record Buffer Extension aka BRBE. This implements all the armv8pmu_branch_xxx() callbacks as expected at ARMV8 PMU level required to drive perf branch stack sampling events. This adds a new config option CONFIG_ARM64_BRBE to encapsulate this BRBE based implementation, available only on ARM64 platforms. BRBE hardware captures a branch record via three distinct system registers representing branch source address, branch target address, and other branch information. A BRBE buffer implementation is organized as multiple banks of 32 branch records each, which is a collection of BRBSRC_EL1, BRBTGT_EL1 and BRBINF_EL1 registers. Though total BRBE record entries i.e BRBE_MAX_ENTRIES cannot exceed MAX_BRANCH_RECORDS as defined for ARM PMU. Branch stack sampling is enabled and disabled along with regular PMU events . This adds required function callbacks in armv8pmu_branch_xxx() format, to drive the PMU branch stack hardware when supported. This also adds fallback stub definitions for these callbacks for PMUs which would not have required support. BRBE hardware attributes get captured in a new reg_brbidr element in struct arm_pmu during armv8pmu_branch_probe() which is called from broader probing function __armv8pmu_probe_pmu(). Attributes such as number of branch record entries implemented in the hardware can be derived from armpmu->reg_brbidr. BRBE gets enabled via armv8pmu_branch_enable() where it also derives branch filter, and additional requirements from event's 'attr.branch_sample_type' and configures them via BRBFCR_EL1 and BRBCR_EL1 registers. PMU event overflow triggers IRQ, where current branch records get captured, stitched along with older records available in 'task_ctx', before getting processed for core perf ring buffer. Task context switch outs incrementally save current branch records in event's 'pmu_ctx->task_ctx_data' to optimize workload's branch record samples. In case multiple events with different branch sample type requests converge on the same PMU, BRBE gets enabled for the merged branch filter accommoding all those event's branch sample type. Captured branch records get filterted in software for an overflown event if BRBE hardware config does not match its branch sample type, while handling the PMU IRQ. Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Signed-off-by: Junhao He Signed-off-by: Slim6882 Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- drivers/perf/Kconfig | 12 + drivers/perf/Makefile | 1 + drivers/perf/arm_brbe.c | 1198 +++++++++++++++++++++++++++++++ drivers/perf/arm_pmuv3.c | 160 ++++- drivers/perf/arm_pmuv3_branch.h | 83 +++ include/linux/perf/arm_pmu.h | 5 + 6 files changed, 1458 insertions(+), 1 deletion(-) create mode 100644 drivers/perf/arm_brbe.c create mode 100644 drivers/perf/arm_pmuv3_branch.h diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 9c2199d3b255d..263de982cdbae 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -180,6 +180,18 @@ config ARM_SPE_PMU Extension, which provides periodic sampling of operations in the CPU pipeline and reports this via the perf AUX interface. +config ARM64_BRBE + bool "Enable support for branch stack sampling using FEAT_BRBE" + depends on PERF_EVENTS && ARM64 && ARM_PMU + depends on !FUNCTION_ALIGNMENT_64B + default y + help + Enable perf support for Branch Record Buffer Extension (BRBE) which + records all branches taken in an execution path. This supports some + branch types and privilege based filtering. It captures additional + relevant information such as cycle count, misprediction and branch + type, branch privilege level etc. + config ARM_DMC620_PMU tristate "Enable PMU support for the ARM DMC-620 memory controller" depends on (ARM64 && ACPI) || COMPILE_TEST diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index 66891ae9f4bd5..26bbec14a1ddd 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -18,6 +18,7 @@ obj-$(CONFIG_RISCV_PMU_SBI) += riscv_pmu_sbi.o obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o +obj-$(CONFIG_ARM64_BRBE) += arm_brbe.o obj-$(CONFIG_ARM_DMC620_PMU) += arm_dmc620_pmu.o obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o diff --git a/drivers/perf/arm_brbe.c b/drivers/perf/arm_brbe.c new file mode 100644 index 0000000000000..d795e8fd646fc --- /dev/null +++ b/drivers/perf/arm_brbe.c @@ -0,0 +1,1198 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Branch Record Buffer Extension Driver. + * + * Copyright (C) 2022-2023 ARM Limited + * + * Author: Anshuman Khandual + */ +#include "arm_pmuv3_branch.h" + +#define BRBFCR_EL1_BRANCH_FILTERS (BRBFCR_EL1_DIRECT | \ + BRBFCR_EL1_INDIRECT | \ + BRBFCR_EL1_RTN | \ + BRBFCR_EL1_INDCALL | \ + BRBFCR_EL1_DIRCALL | \ + BRBFCR_EL1_CONDDIR) + +#define BRBFCR_EL1_CONFIG_MASK (BRBFCR_EL1_BANK_MASK | \ + BRBFCR_EL1_PAUSED | \ + BRBFCR_EL1_EnI | \ + BRBFCR_EL1_BRANCH_FILTERS) + +/* + * BRBTS_EL1 is currently not used for branch stack implementation + * purpose but BRBCR_ELx.TS needs to have a valid value from all + * available options. BRBCR_ELx_TS_VIRTUAL is selected for this. + */ +#define BRBCR_ELx_DEFAULT_TS FIELD_PREP(BRBCR_ELx_TS_MASK, BRBCR_ELx_TS_VIRTUAL) + +#define BRBCR_ELx_CONFIG_MASK (BRBCR_ELx_EXCEPTION | \ + BRBCR_ELx_ERTN | \ + BRBCR_ELx_CC | \ + BRBCR_ELx_MPRED | \ + BRBCR_ELx_ExBRE | \ + BRBCR_ELx_E0BRE | \ + BRBCR_ELx_FZP | \ + BRBCR_ELx_TS_MASK) + +/* + * BRBE Buffer Organization + * + * BRBE buffer is arranged as multiple banks of 32 branch record + * entries each. An individual branch record in a given bank could + * be accessed, after selecting the bank in BRBFCR_EL1.BANK and + * accessing the registers i.e [BRBSRC, BRBTGT, BRBINF] set with + * indices [0..31]. + * + * Bank 0 + * + * --------------------------------- ------ + * | 00 | BRBSRC | BRBTGT | BRBINF | | 00 | + * --------------------------------- ------ + * | 01 | BRBSRC | BRBTGT | BRBINF | | 01 | + * --------------------------------- ------ + * | .. | BRBSRC | BRBTGT | BRBINF | | .. | + * --------------------------------- ------ + * | 31 | BRBSRC | BRBTGT | BRBINF | | 31 | + * --------------------------------- ------ + * + * Bank 1 + * + * --------------------------------- ------ + * | 32 | BRBSRC | BRBTGT | BRBINF | | 00 | + * --------------------------------- ------ + * | 33 | BRBSRC | BRBTGT | BRBINF | | 01 | + * --------------------------------- ------ + * | .. | BRBSRC | BRBTGT | BRBINF | | .. | + * --------------------------------- ------ + * | 63 | BRBSRC | BRBTGT | BRBINF | | 31 | + * --------------------------------- ------ + */ +#define BRBE_BANK_MAX_ENTRIES 32 +#define BRBE_MAX_BANK 2 +#define BRBE_MAX_ENTRIES (BRBE_BANK_MAX_ENTRIES * BRBE_MAX_BANK) + +#define BRBE_BANK0_IDX_MIN 0 +#define BRBE_BANK0_IDX_MAX 31 +#define BRBE_BANK1_IDX_MIN 32 +#define BRBE_BANK1_IDX_MAX 63 + +struct brbe_regset { + unsigned long brbsrc; + unsigned long brbtgt; + unsigned long brbinf; +}; + +#define PERF_BR_ARM64_MAX (PERF_BR_MAX + PERF_BR_NEW_MAX) + +struct arm64_perf_task_context { + struct brbe_regset store[BRBE_MAX_ENTRIES]; + int nr_brbe_records; + + /* + * Branch Filter Mask + * + * This mask represents all branch record types i.e PERF_BR_XXX + * (as defined in core perf ABI) that can be generated with the + * event's branch_sample_type request. The mask layout could be + * found here. Although the bit 15 i.e PERF_BR_EXTEND_ABI never + * gets set in the mask. + * + * 23 (PERF_BR_MAX + PERF_BR_NEW_MAX) 0 + * | | + * --------------------------------------------------------- + * | Extended ABI section | X | ABI section | + * --------------------------------------------------------- + */ + DECLARE_BITMAP(br_type_mask, PERF_BR_ARM64_MAX); +}; + +static void branch_mask_set_all(unsigned long *event_type_mask) +{ + int idx; + + for (idx = PERF_BR_UNKNOWN; idx < PERF_BR_EXTEND_ABI; idx++) + set_bit(idx, event_type_mask); + + for (idx = PERF_BR_NEW_FAULT_ALGN; idx < PERF_BR_NEW_MAX; idx++) + set_bit(PERF_BR_MAX + idx, event_type_mask); +} + +static void branch_mask_set_arch(unsigned long *event_type_mask) +{ + set_bit(PERF_BR_MAX + PERF_BR_NEW_FAULT_ALGN, event_type_mask); + set_bit(PERF_BR_MAX + PERF_BR_NEW_FAULT_DATA, event_type_mask); + set_bit(PERF_BR_MAX + PERF_BR_NEW_FAULT_INST, event_type_mask); + + set_bit(PERF_BR_MAX + PERF_BR_ARM64_FIQ, event_type_mask); + set_bit(PERF_BR_MAX + PERF_BR_ARM64_DEBUG_HALT, event_type_mask); + set_bit(PERF_BR_MAX + PERF_BR_ARM64_DEBUG_EXIT, event_type_mask); + set_bit(PERF_BR_MAX + PERF_BR_ARM64_DEBUG_INST, event_type_mask); + set_bit(PERF_BR_MAX + PERF_BR_ARM64_DEBUG_DATA, event_type_mask); +} + +static void branch_entry_mask(struct perf_branch_entry *entry, + unsigned long *event_type_mask) +{ + u64 idx; + + bitmap_zero(event_type_mask, PERF_BR_ARM64_MAX); + for (idx = PERF_BR_UNKNOWN; idx < PERF_BR_EXTEND_ABI; idx++) { + if (entry->type == idx) + set_bit(idx, event_type_mask); + } + + if (entry->type == PERF_BR_EXTEND_ABI) { + for (idx = PERF_BR_NEW_FAULT_ALGN; idx < PERF_BR_NEW_MAX; idx++) { + if (entry->new_type == idx) + set_bit(PERF_BR_MAX + idx, event_type_mask); + } + } +} + +static void prepare_event_branch_type_mask(struct perf_event *event, + unsigned long *event_type_mask) +{ + u64 branch_sample = event->attr.branch_sample_type; + + bitmap_zero(event_type_mask, PERF_BR_ARM64_MAX); + + /* + * The platform specific branch types might not follow event's + * branch filter requests accurately. Let's add all of them as + * acceptible branch types during the filtering process. + */ + branch_mask_set_arch(event_type_mask); + + if (branch_sample & PERF_SAMPLE_BRANCH_ANY) { + branch_mask_set_all(event_type_mask); + return; + } + + if (branch_sample & PERF_SAMPLE_BRANCH_IND_JUMP) + set_bit(PERF_BR_IND, event_type_mask); + + set_bit(PERF_BR_UNCOND, event_type_mask); + if (branch_sample & PERF_SAMPLE_BRANCH_COND) { + clear_bit(PERF_BR_UNCOND, event_type_mask); + set_bit(PERF_BR_COND, event_type_mask); + } + + if (branch_sample & PERF_SAMPLE_BRANCH_CALL) + set_bit(PERF_BR_CALL, event_type_mask); + + if (branch_sample & PERF_SAMPLE_BRANCH_IND_CALL) + set_bit(PERF_BR_IND_CALL, event_type_mask); + + if (branch_sample & PERF_SAMPLE_BRANCH_ANY_CALL) { + set_bit(PERF_BR_CALL, event_type_mask); + set_bit(PERF_BR_IRQ, event_type_mask); + set_bit(PERF_BR_SYSCALL, event_type_mask); + set_bit(PERF_BR_SERROR, event_type_mask); + + if (branch_sample & PERF_SAMPLE_BRANCH_COND) + set_bit(PERF_BR_COND_CALL, event_type_mask); + } + + if (branch_sample & PERF_SAMPLE_BRANCH_ANY_RETURN) { + set_bit(PERF_BR_RET, event_type_mask); + set_bit(PERF_BR_ERET, event_type_mask); + set_bit(PERF_BR_SYSRET, event_type_mask); + + if (branch_sample & PERF_SAMPLE_BRANCH_COND) + set_bit(PERF_BR_COND_RET, event_type_mask); + } +} + +struct brbe_hw_attr { + int brbe_version; + int brbe_cc; + int brbe_nr; + int brbe_format; +}; + +#define RETURN_READ_BRBSRCN(n) \ + read_sysreg_s(SYS_BRBSRC_EL1(n)) + +#define RETURN_READ_BRBTGTN(n) \ + read_sysreg_s(SYS_BRBTGT_EL1(n)) + +#define RETURN_READ_BRBINFN(n) \ + read_sysreg_s(SYS_BRBINF_EL1(n)) + +#define BRBE_REGN_CASE(n, case_macro) \ + case n: return case_macro(n); break + +#define BRBE_REGN_SWITCH(x, case_macro) \ + do { \ + switch (x) { \ + BRBE_REGN_CASE(0, case_macro); \ + BRBE_REGN_CASE(1, case_macro); \ + BRBE_REGN_CASE(2, case_macro); \ + BRBE_REGN_CASE(3, case_macro); \ + BRBE_REGN_CASE(4, case_macro); \ + BRBE_REGN_CASE(5, case_macro); \ + BRBE_REGN_CASE(6, case_macro); \ + BRBE_REGN_CASE(7, case_macro); \ + BRBE_REGN_CASE(8, case_macro); \ + BRBE_REGN_CASE(9, case_macro); \ + BRBE_REGN_CASE(10, case_macro); \ + BRBE_REGN_CASE(11, case_macro); \ + BRBE_REGN_CASE(12, case_macro); \ + BRBE_REGN_CASE(13, case_macro); \ + BRBE_REGN_CASE(14, case_macro); \ + BRBE_REGN_CASE(15, case_macro); \ + BRBE_REGN_CASE(16, case_macro); \ + BRBE_REGN_CASE(17, case_macro); \ + BRBE_REGN_CASE(18, case_macro); \ + BRBE_REGN_CASE(19, case_macro); \ + BRBE_REGN_CASE(20, case_macro); \ + BRBE_REGN_CASE(21, case_macro); \ + BRBE_REGN_CASE(22, case_macro); \ + BRBE_REGN_CASE(23, case_macro); \ + BRBE_REGN_CASE(24, case_macro); \ + BRBE_REGN_CASE(25, case_macro); \ + BRBE_REGN_CASE(26, case_macro); \ + BRBE_REGN_CASE(27, case_macro); \ + BRBE_REGN_CASE(28, case_macro); \ + BRBE_REGN_CASE(29, case_macro); \ + BRBE_REGN_CASE(30, case_macro); \ + BRBE_REGN_CASE(31, case_macro); \ + default: \ + pr_warn("unknown register index\n"); \ + return -1; \ + } \ + } while (0) + +static inline int buffer_to_brbe_idx(int buffer_idx) +{ + return buffer_idx % BRBE_BANK_MAX_ENTRIES; +} + +static inline u64 get_brbsrc_reg(int buffer_idx) +{ + int brbe_idx = buffer_to_brbe_idx(buffer_idx); + + BRBE_REGN_SWITCH(brbe_idx, RETURN_READ_BRBSRCN); +} + +static inline u64 get_brbtgt_reg(int buffer_idx) +{ + int brbe_idx = buffer_to_brbe_idx(buffer_idx); + + BRBE_REGN_SWITCH(brbe_idx, RETURN_READ_BRBTGTN); +} + +static inline u64 get_brbinf_reg(int buffer_idx) +{ + int brbe_idx = buffer_to_brbe_idx(buffer_idx); + + BRBE_REGN_SWITCH(brbe_idx, RETURN_READ_BRBINFN); +} + +static inline u64 brbe_record_valid(u64 brbinf) +{ + return FIELD_GET(BRBINFx_EL1_VALID_MASK, brbinf); +} + +static inline bool brbe_invalid(u64 brbinf) +{ + return brbe_record_valid(brbinf) == BRBINFx_EL1_VALID_NONE; +} + +static inline bool brbe_record_is_complete(u64 brbinf) +{ + return brbe_record_valid(brbinf) == BRBINFx_EL1_VALID_FULL; +} + +static inline bool brbe_record_is_source_only(u64 brbinf) +{ + return brbe_record_valid(brbinf) == BRBINFx_EL1_VALID_SOURCE; +} + +static inline bool brbe_record_is_target_only(u64 brbinf) +{ + return brbe_record_valid(brbinf) == BRBINFx_EL1_VALID_TARGET; +} + +static inline int brbe_get_in_tx(u64 brbinf) +{ + return FIELD_GET(BRBINFx_EL1_T_MASK, brbinf); +} + +static inline int brbe_get_mispredict(u64 brbinf) +{ + return FIELD_GET(BRBINFx_EL1_MPRED_MASK, brbinf); +} + +static inline int brbe_get_lastfailed(u64 brbinf) +{ + return FIELD_GET(BRBINFx_EL1_LASTFAILED_MASK, brbinf); +} + +static inline int brbe_get_cycles(u64 brbinf) +{ + /* + * Captured cycle count is unknown and hence + * should not be passed on to the user space. + */ + if (brbinf & BRBINFx_EL1_CCU) + return 0; + + return FIELD_GET(BRBINFx_EL1_CC_MASK, brbinf); +} + +static inline int brbe_get_type(u64 brbinf) +{ + return FIELD_GET(BRBINFx_EL1_TYPE_MASK, brbinf); +} + +static inline int brbe_get_el(u64 brbinf) +{ + return FIELD_GET(BRBINFx_EL1_EL_MASK, brbinf); +} + +static inline int brbe_get_numrec(u64 brbidr) +{ + return FIELD_GET(BRBIDR0_EL1_NUMREC_MASK, brbidr); +} + +static inline int brbe_get_format(u64 brbidr) +{ + return FIELD_GET(BRBIDR0_EL1_FORMAT_MASK, brbidr); +} + +static inline int brbe_get_cc_bits(u64 brbidr) +{ + return FIELD_GET(BRBIDR0_EL1_CC_MASK, brbidr); +} + +void armv8pmu_branch_stack_reset(void) +{ + asm volatile(BRB_IALL_INSN); + isb(); +} + +void armv8pmu_branch_stack_add(struct perf_event *event, struct pmu_hw_events *hw_events) +{ + struct arm64_perf_task_context *task_ctx = event->pmu_ctx->task_ctx_data; + + if (event->ctx->task) + prepare_event_branch_type_mask(event, task_ctx->br_type_mask); + + /* + * Reset branch records buffer if a new CPU bound event + * gets scheduled on a PMU. Otherwise existing branch + * records present in the buffer might just leak into + * such events. + * + * Also reset current 'hw_events->branch_context' because + * any previous task bound event now would have lost an + * opportunity for continuous branch records. + */ + if (!event->ctx->task) { + hw_events->branch_context = NULL; + armv8pmu_branch_stack_reset(); + } + + /* + * Reset branch records buffer if a new task event gets + * scheduled on a PMU which might have existing records. + * Otherwise older branch records present in the buffer + * might leak into the new task event. + */ + if (event->ctx->task && hw_events->branch_context != event->ctx) { + hw_events->branch_context = event->ctx; + armv8pmu_branch_stack_reset(); + } + hw_events->branch_users++; +} + +void armv8pmu_branch_stack_del(struct perf_event *event, struct pmu_hw_events *hw_events) +{ + WARN_ON_ONCE(!hw_events->branch_users); + hw_events->branch_users--; + if (!hw_events->branch_users) { + hw_events->branch_context = NULL; + hw_events->branch_sample_type = 0; + } +} + +static bool valid_brbe_nr(int brbe_nr) +{ + return brbe_nr == BRBIDR0_EL1_NUMREC_8 || + brbe_nr == BRBIDR0_EL1_NUMREC_16 || + brbe_nr == BRBIDR0_EL1_NUMREC_32 || + brbe_nr == BRBIDR0_EL1_NUMREC_64; +} + +static bool valid_brbe_cc(int brbe_cc) +{ + return brbe_cc == BRBIDR0_EL1_CC_20_BIT; +} + +static bool valid_brbe_format(int brbe_format) +{ + return brbe_format == BRBIDR0_EL1_FORMAT_FORMAT_0; +} + +static bool valid_brbe_version(int brbe_version) +{ + return brbe_version == ID_AA64DFR0_EL1_BRBE_IMP || + brbe_version == ID_AA64DFR0_EL1_BRBE_BRBE_V1P1; +} + +static void select_brbe_bank(int bank) +{ + u64 brbfcr; + + WARN_ON(bank > 1); + brbfcr = read_sysreg_s(SYS_BRBFCR_EL1); + brbfcr &= ~BRBFCR_EL1_BANK_MASK; + brbfcr |= SYS_FIELD_PREP(BRBFCR_EL1, BANK, bank); + write_sysreg_s(brbfcr, SYS_BRBFCR_EL1); + isb(); +} + +static bool __read_brbe_regset(struct brbe_regset *entry, int idx) +{ + entry->brbinf = get_brbinf_reg(idx); + + if (brbe_invalid(entry->brbinf)) + return false; + + entry->brbsrc = get_brbsrc_reg(idx); + entry->brbtgt = get_brbtgt_reg(idx); + return true; +} + +/* + * Read all BRBE entries in HW until the first invalid entry. + * + * The caller must ensure that the BRBE is not concurrently modifying these + * branch entries. + */ +static int capture_brbe_regset(struct brbe_regset *buf, int nr_hw_entries) +{ + int idx = 0; + + select_brbe_bank(0); + while (idx < nr_hw_entries && idx <= BRBE_BANK0_IDX_MAX) { + if (!__read_brbe_regset(&buf[idx], idx)) + return idx; + idx++; + } + + select_brbe_bank(1); + while (idx < nr_hw_entries && idx <= BRBE_BANK1_IDX_MAX) { + if (!__read_brbe_regset(&buf[idx], idx)) + return idx; + idx++; + } + return idx; +} + +/* + * This function concatenates branch records from stored and live buffer + * up to maximum nr_max records and the stored buffer holds the resultant + * buffer. The concatenated buffer contains all the branch records from + * the live buffer but might contain some from stored buffer considering + * the maximum combined length does not exceed 'nr_max'. + * + * Stored records Live records + * ------------------------------------------------^ + * | S0 | L0 | Newest | + * --------------------------------- | + * | S1 | L1 | | + * --------------------------------- | + * | S2 | L2 | | + * --------------------------------- | + * | S3 | L3 | | + * --------------------------------- | + * | S4 | L4 | nr_max + * --------------------------------- | + * | | L5 | | + * --------------------------------- | + * | | L6 | | + * --------------------------------- | + * | | L7 | | + * --------------------------------- | + * | | | | + * --------------------------------- | + * | | | Oldest | + * ------------------------------------------------V + * + * + * S0 is the newest in the stored records, where as L7 is the oldest in + * the live records. Unless the live buffer is detected as being full + * thus potentially dropping off some older records, L7 and S0 records + * are contiguous in time for a user task context. The stitched buffer + * here represents maximum possible branch records, contiguous in time. + * + * Stored records Live records + * ------------------------------------------------^ + * | L0 | L0 | Newest | + * --------------------------------- | + * | L1 | L1 | | + * --------------------------------- | + * | L2 | L2 | | + * --------------------------------- | + * | L3 | L3 | | + * --------------------------------- | + * | L4 | L4 | nr_max + * --------------------------------- | + * | L5 | L5 | | + * --------------------------------- | + * | L6 | L6 | | + * --------------------------------- | + * | L7 | L7 | | + * --------------------------------- | + * | S0 | | | + * --------------------------------- | + * | S1 | | Oldest | + * ------------------------------------------------V + * | S2 | <----| + * ----------------- | + * | S3 | <----| Dropped off after nr_max + * ----------------- | + * | S4 | <----| + * ----------------- + */ +static int stitch_stored_live_entries(struct brbe_regset *stored, + struct brbe_regset *live, + int nr_stored, int nr_live, + int nr_max) +{ + int nr_move = min(nr_stored, nr_max - nr_live); + + /* Move the tail of the buffer to make room for the new entries */ + memmove(&stored[nr_live], &stored[0], nr_move * sizeof(*stored)); + + /* Copy the new entries into the head of the buffer */ + memcpy(&stored[0], &live[0], nr_live * sizeof(*stored)); + + /* Return the number of entries in the stitched buffer */ + return min(nr_live + nr_stored, nr_max); +} + +static int brbe_branch_save(struct brbe_regset *live, int nr_hw_entries) +{ + u64 brbfcr = read_sysreg_s(SYS_BRBFCR_EL1); + int nr_live; + + write_sysreg_s(brbfcr | BRBFCR_EL1_PAUSED, SYS_BRBFCR_EL1); + isb(); + + nr_live = capture_brbe_regset(live, nr_hw_entries); + + write_sysreg_s(brbfcr & ~BRBFCR_EL1_PAUSED, SYS_BRBFCR_EL1); + isb(); + + return nr_live; +} + +void armv8pmu_branch_save(struct arm_pmu *arm_pmu, void *ctx) +{ + struct arm64_perf_task_context *task_ctx = ctx; + struct brbe_regset live[BRBE_MAX_ENTRIES]; + int nr_live, nr_store, nr_hw_entries; + + nr_hw_entries = brbe_get_numrec(arm_pmu->reg_brbidr); + nr_live = brbe_branch_save(live, nr_hw_entries); + nr_store = task_ctx->nr_brbe_records; + nr_store = stitch_stored_live_entries(task_ctx->store, live, nr_store, + nr_live, nr_hw_entries); + task_ctx->nr_brbe_records = nr_store; +} + +/* + * Generic perf branch filters supported on BRBE + * + * New branch filters need to be evaluated whether they could be supported on + * BRBE. This ensures that such branch filters would not just be accepted, to + * fail silently. PERF_SAMPLE_BRANCH_HV is a special case that is selectively + * supported only on platforms where kernel is in hyp mode. + */ +#define BRBE_EXCLUDE_BRANCH_FILTERS (PERF_SAMPLE_BRANCH_ABORT_TX | \ + PERF_SAMPLE_BRANCH_IN_TX | \ + PERF_SAMPLE_BRANCH_NO_TX | \ + PERF_SAMPLE_BRANCH_CALL_STACK | \ + PERF_SAMPLE_BRANCH_COUNTERS) + +#define BRBE_ALLOWED_BRANCH_FILTERS (PERF_SAMPLE_BRANCH_USER | \ + PERF_SAMPLE_BRANCH_KERNEL | \ + PERF_SAMPLE_BRANCH_HV | \ + PERF_SAMPLE_BRANCH_ANY | \ + PERF_SAMPLE_BRANCH_ANY_CALL | \ + PERF_SAMPLE_BRANCH_ANY_RETURN | \ + PERF_SAMPLE_BRANCH_IND_CALL | \ + PERF_SAMPLE_BRANCH_COND | \ + PERF_SAMPLE_BRANCH_IND_JUMP | \ + PERF_SAMPLE_BRANCH_CALL | \ + PERF_SAMPLE_BRANCH_NO_FLAGS | \ + PERF_SAMPLE_BRANCH_NO_CYCLES | \ + PERF_SAMPLE_BRANCH_TYPE_SAVE | \ + PERF_SAMPLE_BRANCH_HW_INDEX | \ + PERF_SAMPLE_BRANCH_PRIV_SAVE) + +#define BRBE_PERF_BRANCH_FILTERS (BRBE_ALLOWED_BRANCH_FILTERS | \ + BRBE_EXCLUDE_BRANCH_FILTERS) + +bool armv8pmu_branch_attr_valid(struct perf_event *event) +{ + u64 branch_type = event->attr.branch_sample_type; + + /* + * Ensure both perf branch filter allowed and exclude + * masks are always in sync with the generic perf ABI. + */ + BUILD_BUG_ON(BRBE_PERF_BRANCH_FILTERS != (PERF_SAMPLE_BRANCH_MAX - 1)); + + if (branch_type & ~BRBE_ALLOWED_BRANCH_FILTERS) { + pr_debug_once("requested branch filter not supported 0x%llx\n", branch_type); + return false; + } + + /* + * If the event does not have at least one of the privilege + * branch filters as in PERF_SAMPLE_BRANCH_PLM_ALL, the core + * perf will adjust its value based on perf event's existing + * privilege level via attr.exclude_[user|kernel|hv]. + * + * As event->attr.branch_sample_type might have been changed + * when the event reaches here, it is not possible to figure + * out whether the event originally had HV privilege request + * or got added via the core perf. Just report this situation + * once and continue ignoring if there are other instances. + */ + if ((branch_type & PERF_SAMPLE_BRANCH_HV) && !is_kernel_in_hyp_mode()) + pr_debug_once("hypervisor privilege filter not supported 0x%llx\n", branch_type); + + return true; +} + +int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *arm_pmu) +{ + size_t size = sizeof(struct arm64_perf_task_context); + + arm_pmu->pmu.task_ctx_cache = kmem_cache_create("arm64_brbe_task_ctx", size, 0, 0, NULL); + if (!arm_pmu->pmu.task_ctx_cache) + return -ENOMEM; + return 0; +} + +void armv8pmu_task_ctx_cache_free(struct arm_pmu *arm_pmu) +{ + kmem_cache_destroy(arm_pmu->pmu.task_ctx_cache); +} + +static int brbe_attributes_probe(struct arm_pmu *armpmu, u32 brbe) +{ + u64 brbidr = read_sysreg_s(SYS_BRBIDR0_EL1); + int brbe_version, brbe_format, brbe_cc, brbe_nr; + + brbe_version = brbe; + brbe_format = brbe_get_format(brbidr); + brbe_cc = brbe_get_cc_bits(brbidr); + brbe_nr = brbe_get_numrec(brbidr); + armpmu->reg_brbidr = brbidr; + + if (!valid_brbe_version(brbe_version) || + !valid_brbe_format(brbe_format) || + !valid_brbe_cc(brbe_cc) || + !valid_brbe_nr(brbe_nr)) + return -EOPNOTSUPP; + return 0; +} + +void armv8pmu_branch_probe(struct arm_pmu *armpmu) +{ + u64 aa64dfr0 = read_sysreg_s(SYS_ID_AA64DFR0_EL1); + u32 brbe; + + /* + * BRBE implementation's branch entries cannot exceed maximum + * branch records supported at the ARM PMU level abstraction. + * Otherwise there is always a possibility of array overflow, + * while processing BRBE branch records. + */ + BUILD_BUG_ON(BRBE_BANK_MAX_ENTRIES > MAX_BRANCH_RECORDS); + + brbe = cpuid_feature_extract_unsigned_field(aa64dfr0, ID_AA64DFR0_EL1_BRBE_SHIFT); + if (!brbe) + return; + + if (brbe_attributes_probe(armpmu, brbe)) + return; + + armpmu->has_branch_stack = 1; +} + +/* + * BRBE supports the following functional branch type filters while + * generating branch records. These branch filters can be enabled, + * either individually or as a group i.e ORing multiple filters + * with each other. + * + * BRBFCR_EL1_CONDDIR - Conditional direct branch + * BRBFCR_EL1_DIRCALL - Direct call + * BRBFCR_EL1_INDCALL - Indirect call + * BRBFCR_EL1_INDIRECT - Indirect branch + * BRBFCR_EL1_DIRECT - Direct branch + * BRBFCR_EL1_RTN - Subroutine return + */ +static u64 branch_type_to_brbfcr(int branch_type) +{ + u64 brbfcr = 0; + + if (branch_type & PERF_SAMPLE_BRANCH_ANY) { + brbfcr |= BRBFCR_EL1_BRANCH_FILTERS; + return brbfcr; + } + + if (branch_type & PERF_SAMPLE_BRANCH_ANY_CALL) { + brbfcr |= BRBFCR_EL1_INDCALL; + brbfcr |= BRBFCR_EL1_DIRCALL; + } + + if (branch_type & PERF_SAMPLE_BRANCH_ANY_RETURN) + brbfcr |= BRBFCR_EL1_RTN; + + if (branch_type & PERF_SAMPLE_BRANCH_IND_CALL) + brbfcr |= BRBFCR_EL1_INDCALL; + + if (branch_type & PERF_SAMPLE_BRANCH_COND) + brbfcr |= BRBFCR_EL1_CONDDIR; + + if (branch_type & PERF_SAMPLE_BRANCH_IND_JUMP) + brbfcr |= BRBFCR_EL1_INDIRECT; + + if (branch_type & PERF_SAMPLE_BRANCH_CALL) + brbfcr |= BRBFCR_EL1_DIRCALL; + + return brbfcr & BRBFCR_EL1_CONFIG_MASK; +} + +/* + * BRBE supports the following privilege mode filters while generating + * branch records. + * + * BRBCR_ELx_E0BRE - EL0 branch records + * BRBCR_ELx_ExBRE - EL1/EL2 branch records + * + * BRBE also supports the following additional functional branch type + * filters while generating branch records. + * + * BRBCR_ELx_EXCEPTION - Exception + * BRBCR_ELx_ERTN - Exception return + */ +static u64 branch_type_to_brbcr(int branch_type) +{ + u64 brbcr = BRBCR_ELx_DEFAULT_TS; + + /* + * BRBE should be paused on PMU interrupt while tracing kernel + * space to stop capturing further branch records. Otherwise + * interrupt handler branch records might get into the samples + * which is not desired. + * + * BRBE need not be paused on PMU interrupt while tracing only + * the user space, because it will automatically be inside the + * prohibited region. But even after PMU overflow occurs, the + * interrupt could still take much more cycles, before it can + * be taken and by that time BRBE will have been overwritten. + * Hence enable pause on PMU interrupt mechanism even for user + * only traces as well. + */ + brbcr |= BRBCR_ELx_FZP; + + if (branch_type & PERF_SAMPLE_BRANCH_USER) + brbcr |= BRBCR_ELx_E0BRE; + + /* + * When running in the hyp mode, writing into BRBCR_EL1 + * actually writes into BRBCR_EL2 instead. Field E2BRE + * is also at the same position as E1BRE. + */ + if (branch_type & PERF_SAMPLE_BRANCH_KERNEL) + brbcr |= BRBCR_ELx_ExBRE; + + if (branch_type & PERF_SAMPLE_BRANCH_HV) { + if (is_kernel_in_hyp_mode()) + brbcr |= BRBCR_ELx_ExBRE; + } + + if (!(branch_type & PERF_SAMPLE_BRANCH_NO_CYCLES)) + brbcr |= BRBCR_ELx_CC; + + if (!(branch_type & PERF_SAMPLE_BRANCH_NO_FLAGS)) + brbcr |= BRBCR_ELx_MPRED; + + /* + * The exception and exception return branches could be + * captured, irrespective of the perf event's privilege. + * If the perf event does not have enough privilege for + * a given exception level, then addresses which falls + * under that exception level will be reported as zero + * for the captured branch record, creating source only + * or target only records. + */ + if (branch_type & PERF_SAMPLE_BRANCH_ANY) { + brbcr |= BRBCR_ELx_EXCEPTION; + brbcr |= BRBCR_ELx_ERTN; + } + + if (branch_type & PERF_SAMPLE_BRANCH_ANY_CALL) + brbcr |= BRBCR_ELx_EXCEPTION; + + if (branch_type & PERF_SAMPLE_BRANCH_ANY_RETURN) + brbcr |= BRBCR_ELx_ERTN; + + return brbcr & BRBCR_ELx_CONFIG_MASK; +} + +void armv8pmu_branch_enable(struct arm_pmu *arm_pmu) +{ + struct pmu_hw_events *cpuc = this_cpu_ptr(arm_pmu->hw_events); + u64 brbfcr, brbcr; + + if (!(cpuc->branch_sample_type && cpuc->branch_users)) + return; + + /* + * BRBE gets configured with a new mismatched branch sample + * type request, overriding any previous branch filters. + */ + brbfcr = read_sysreg_s(SYS_BRBFCR_EL1); + brbfcr &= ~BRBFCR_EL1_CONFIG_MASK; + brbfcr |= branch_type_to_brbfcr(cpuc->branch_sample_type); + write_sysreg_s(brbfcr, SYS_BRBFCR_EL1); + isb(); + + brbcr = read_sysreg_s(SYS_BRBCR_EL1); + brbcr &= ~BRBCR_ELx_CONFIG_MASK; + brbcr |= branch_type_to_brbcr(cpuc->branch_sample_type); + write_sysreg_s(brbcr, SYS_BRBCR_EL1); + isb(); +} + +void armv8pmu_branch_disable(void) +{ + u64 brbfcr, brbcr; + + brbcr = read_sysreg_s(SYS_BRBCR_EL1); + brbfcr = read_sysreg_s(SYS_BRBFCR_EL1); + brbcr &= ~(BRBCR_ELx_E0BRE | BRBCR_ELx_ExBRE); + brbfcr |= BRBFCR_EL1_PAUSED; + write_sysreg_s(brbcr, SYS_BRBCR_EL1); + write_sysreg_s(brbfcr, SYS_BRBFCR_EL1); + isb(); +} + +static void brbe_set_perf_entry_type(struct perf_branch_entry *entry, u64 brbinf) +{ + int brbe_type = brbe_get_type(brbinf); + + switch (brbe_type) { + case BRBINFx_EL1_TYPE_DIRECT_UNCOND: + entry->type = PERF_BR_UNCOND; + break; + case BRBINFx_EL1_TYPE_INDIRECT: + entry->type = PERF_BR_IND; + break; + case BRBINFx_EL1_TYPE_DIRECT_LINK: + entry->type = PERF_BR_CALL; + break; + case BRBINFx_EL1_TYPE_INDIRECT_LINK: + entry->type = PERF_BR_IND_CALL; + break; + case BRBINFx_EL1_TYPE_RET: + entry->type = PERF_BR_RET; + break; + case BRBINFx_EL1_TYPE_DIRECT_COND: + entry->type = PERF_BR_COND; + break; + case BRBINFx_EL1_TYPE_CALL: + entry->type = PERF_BR_CALL; + break; + case BRBINFx_EL1_TYPE_TRAP: + entry->type = PERF_BR_SYSCALL; + break; + case BRBINFx_EL1_TYPE_ERET: + entry->type = PERF_BR_ERET; + break; + case BRBINFx_EL1_TYPE_IRQ: + entry->type = PERF_BR_IRQ; + break; + case BRBINFx_EL1_TYPE_DEBUG_HALT: + entry->type = PERF_BR_EXTEND_ABI; + entry->new_type = PERF_BR_ARM64_DEBUG_HALT; + break; + case BRBINFx_EL1_TYPE_SERROR: + entry->type = PERF_BR_SERROR; + break; + case BRBINFx_EL1_TYPE_INSN_DEBUG: + entry->type = PERF_BR_EXTEND_ABI; + entry->new_type = PERF_BR_ARM64_DEBUG_INST; + break; + case BRBINFx_EL1_TYPE_DATA_DEBUG: + entry->type = PERF_BR_EXTEND_ABI; + entry->new_type = PERF_BR_ARM64_DEBUG_DATA; + break; + case BRBINFx_EL1_TYPE_ALIGN_FAULT: + entry->type = PERF_BR_EXTEND_ABI; + entry->new_type = PERF_BR_NEW_FAULT_ALGN; + break; + case BRBINFx_EL1_TYPE_INSN_FAULT: + entry->type = PERF_BR_EXTEND_ABI; + entry->new_type = PERF_BR_NEW_FAULT_INST; + break; + case BRBINFx_EL1_TYPE_DATA_FAULT: + entry->type = PERF_BR_EXTEND_ABI; + entry->new_type = PERF_BR_NEW_FAULT_DATA; + break; + case BRBINFx_EL1_TYPE_FIQ: + entry->type = PERF_BR_EXTEND_ABI; + entry->new_type = PERF_BR_ARM64_FIQ; + break; + case BRBINFx_EL1_TYPE_DEBUG_EXIT: + entry->type = PERF_BR_EXTEND_ABI; + entry->new_type = PERF_BR_ARM64_DEBUG_EXIT; + break; + default: + pr_warn_once("%d - unknown branch type captured\n", brbe_type); + entry->type = PERF_BR_UNKNOWN; + break; + } +} + +static int brbe_get_perf_priv(u64 brbinf) +{ + int brbe_el = brbe_get_el(brbinf); + + switch (brbe_el) { + case BRBINFx_EL1_EL_EL0: + return PERF_BR_PRIV_USER; + case BRBINFx_EL1_EL_EL1: + return PERF_BR_PRIV_KERNEL; + case BRBINFx_EL1_EL_EL2: + if (is_kernel_in_hyp_mode()) + return PERF_BR_PRIV_KERNEL; + return PERF_BR_PRIV_HV; + default: + pr_warn_once("%d - unknown branch privilege captured\n", brbe_el); + return PERF_BR_PRIV_UNKNOWN; + } +} + +static void capture_brbe_flags(struct perf_branch_entry *entry, struct perf_event *event, + u64 brbinf) +{ + brbe_set_perf_entry_type(entry, brbinf); + + if (!branch_sample_no_cycles(event)) + entry->cycles = brbe_get_cycles(brbinf); + + if (!branch_sample_no_flags(event)) { + /* + * BRBINFx_EL1.LASTFAILED indicates that a TME transaction failed (or + * was cancelled) prior to this record, and some number of records + * prior to this one, may have been generated during an attempt to + * execute the transaction. + */ + entry->abort = brbe_get_lastfailed(brbinf); + + /* + * All these information (i.e transaction state and mispredicts) + * are available for source only and complete branch records. + */ + if (brbe_record_is_complete(brbinf) || + brbe_record_is_source_only(brbinf)) { + entry->mispred = brbe_get_mispredict(brbinf); + entry->predicted = !entry->mispred; + entry->in_tx = brbe_get_in_tx(brbinf); + } + + /* + * Currently TME feature is neither implemented in any hardware + * nor it is being supported in the kernel. Just warn here once + * if TME related information shows up rather unexpectedly. + */ + if (entry->abort || entry->in_tx) + pr_warn_once("Unknown transaction states %d %d\n", + entry->abort, entry->in_tx); + } + + /* + * All these information (i.e branch privilege level) are + * available for target only and complete branch records. + */ + if (brbe_record_is_complete(brbinf) || + brbe_record_is_target_only(brbinf)) + entry->priv = brbe_get_perf_priv(brbinf); +} + +static void brbe_regset_branch_entries(struct pmu_hw_events *cpuc, struct perf_event *event, + struct brbe_regset *regset, int idx) +{ + struct perf_branch_entry *entry = &cpuc->branches->branch_entries[idx]; + u64 brbinf = regset[idx].brbinf; + + perf_clear_branch_entry_bitfields(entry); + if (brbe_record_is_complete(brbinf)) { + entry->from = regset[idx].brbsrc; + entry->to = regset[idx].brbtgt; + } else if (brbe_record_is_source_only(brbinf)) { + entry->from = regset[idx].brbsrc; + entry->to = 0; + } else if (brbe_record_is_target_only(brbinf)) { + entry->from = 0; + entry->to = regset[idx].brbtgt; + } + capture_brbe_flags(entry, event, brbinf); +} + +static void process_branch_entries(struct pmu_hw_events *cpuc, struct perf_event *event, + struct brbe_regset *regset, int nr_regset) +{ + int idx; + + for (idx = 0; idx < nr_regset; idx++) + brbe_regset_branch_entries(cpuc, event, regset, idx); + + cpuc->branches->branch_stack.nr = nr_regset; + cpuc->branches->branch_stack.hw_idx = -1ULL; +} + +void armv8pmu_branch_read(struct pmu_hw_events *cpuc, struct perf_event *event) +{ + struct arm64_perf_task_context *task_ctx = event->pmu_ctx->task_ctx_data; + struct brbe_regset live[BRBE_MAX_ENTRIES]; + int nr_live, nr_store, nr_hw_entries; + + nr_hw_entries = brbe_get_numrec(cpuc->percpu_pmu->reg_brbidr); + nr_live = capture_brbe_regset(live, nr_hw_entries); + if (event->ctx->task) { + nr_store = task_ctx->nr_brbe_records; + nr_store = stitch_stored_live_entries(task_ctx->store, live, nr_store, + nr_live, nr_hw_entries); + process_branch_entries(cpuc, event, task_ctx->store, nr_store); + task_ctx->nr_brbe_records = 0; + } else { + process_branch_entries(cpuc, event, live, nr_live); + } +} + +static bool filter_branch_privilege(struct perf_branch_entry *entry, u64 branch_sample_type) +{ + /* + * Retrieve the privilege level branch filter requests + * from the overall branch sample type. + */ + branch_sample_type &= PERF_SAMPLE_BRANCH_PLM_ALL; + + /* + * The privilege information do not always get captured + * successfully for given BRBE branch record. Hence the + * entry->priv could be analyzed for filtering when the + * information has really been captured. + */ + if (entry->priv) { + if (entry->priv == PERF_BR_PRIV_USER) { + if (!(branch_sample_type & PERF_SAMPLE_BRANCH_USER)) + return false; + } + + if (entry->priv == PERF_BR_PRIV_KERNEL) { + if (!(branch_sample_type & PERF_SAMPLE_BRANCH_KERNEL)) { + if (!is_kernel_in_hyp_mode()) + return false; + + if (!(branch_sample_type & PERF_SAMPLE_BRANCH_HV)) + return false; + } + } + + if (entry->priv == PERF_BR_PRIV_HV) { + /* + * PERF_SAMPLE_BRANCH_HV request actually gets configured + * similar to PERF_SAMPLE_BRANCH_KERNEL when kernel is in + * hyp mode. In that case PERF_BR_PRIV_KERNEL should have + * been reported for corresponding branch records. + */ + pr_warn_once("PERF_BR_PRIV_HV should not have been captured\n"); + } + return true; + } + + if (is_ttbr0_addr(entry->from) || is_ttbr0_addr(entry->to)) { + if (!(branch_sample_type & PERF_SAMPLE_BRANCH_USER)) + return false; + } + + if (is_ttbr1_addr(entry->from) || is_ttbr1_addr(entry->to)) { + if (!(branch_sample_type & PERF_SAMPLE_BRANCH_KERNEL)) { + if (!is_kernel_in_hyp_mode()) + return false; + + if (!(branch_sample_type & PERF_SAMPLE_BRANCH_HV)) + return false; + } + } + return true; +} + +static bool filter_branch_record(struct pmu_hw_events *cpuc, + struct perf_event *event, + struct perf_branch_entry *entry) +{ + struct arm64_perf_task_context *task_ctx = event->pmu_ctx->task_ctx_data; + u64 branch_sample = event->attr.branch_sample_type; + DECLARE_BITMAP(entry_type_mask, PERF_BR_ARM64_MAX); + DECLARE_BITMAP(event_type_mask, PERF_BR_ARM64_MAX); + + if (!filter_branch_privilege(entry, branch_sample)) + return false; + + if (entry->type == PERF_BR_UNKNOWN) + return true; + + if (branch_sample & PERF_SAMPLE_BRANCH_ANY) + return true; + + /* + * Both PMU and event branch filters match here except the privilege + * filters - which have already been tested earlier. Skip functional + * branch type test and just return success. + */ + if ((cpuc->branch_sample_type & ~PERF_SAMPLE_BRANCH_PLM_ALL) == + event->attr.branch_sample_type) + return true; + + branch_entry_mask(entry, entry_type_mask); + if (task_ctx) + return bitmap_subset(entry_type_mask, task_ctx->br_type_mask, PERF_BR_ARM64_MAX); + + prepare_event_branch_type_mask(event, event_type_mask); + return bitmap_subset(entry_type_mask, event_type_mask, PERF_BR_ARM64_MAX); +} + +void arm64_filter_branch_records(struct pmu_hw_events *cpuc, + struct perf_event *event, + struct branch_records *event_records) +{ + struct perf_branch_entry *entry; + int idx, count = 0; + + memset(event_records, 0, sizeof(*event_records)); + for (idx = 0; idx < cpuc->branches->branch_stack.nr; idx++) { + entry = &cpuc->branches->branch_entries[idx]; + if (!filter_branch_record(cpuc, event, entry)) + continue; + + memcpy(&event_records->branch_entries[count], entry, sizeof(*entry)); + count++; + } + event_records->branch_stack.nr = count; +} diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index cede85983f028..7555d8388b519 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -25,6 +25,7 @@ #include #include +#include "arm_pmuv3_branch.h" /* ARMv8 Cortex-A53 specific event types. */ #define ARMV8_A53_PERFCTR_PREF_LINEFILL 0xC2 @@ -833,14 +834,70 @@ static void armv8pmu_start(struct arm_pmu *cpu_pmu) /* Enable all counters */ armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E); + if (cpu_pmu->has_branch_stack) + armv8pmu_branch_enable(cpu_pmu); } static void armv8pmu_stop(struct arm_pmu *cpu_pmu) { + if (cpu_pmu->has_branch_stack) + armv8pmu_branch_disable(); + /* Disable all counters */ armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E); } +static void read_branch_records(struct pmu_hw_events *cpuc, + struct perf_event *event, + struct perf_sample_data *data, + bool *branch_captured) +{ + struct branch_records event_records; + + /* + * CPU specific branch records buffer must have been allocated already + * for the hardware records to be captured and processed further. + */ + if (WARN_ON(!cpuc->branches)) + return; + + /* + * When the current task context does not match with the PMU overflown + * event, the captured branch records here cannot be co-related to the + * overflowed event. Report to the user - as if no branch records have + * been captured, and flush branch records. + */ + if (event->ctx->task && (cpuc->branch_context != event->ctx)) + return; + + /* + * Read the branch records from the hardware once after the PMU IRQ + * has been triggered but subsequently same records can be used for + * other events that might have been overflowed simultaneously thus + * saving much CPU cycles. + */ + if (!*branch_captured) { + armv8pmu_branch_read(cpuc, event); + *branch_captured = true; + } + + /* + * Filter captured branch records + * + * PMU captured branch records would contain samples applicable for + * the aggregated branch filters, for all events that got scheduled + * on this PMU simultaneously. Hence these branch records need to + * be filtered first so that each individual event get samples they + * had requested originally. + */ + if (cpuc->branch_sample_type != event->attr.branch_sample_type) { + arm64_filter_branch_records(cpuc, event, &event_records); + perf_sample_save_brstack(data, event, &event_records.branch_stack, NULL); + return; + } + perf_sample_save_brstack(data, event, &cpuc->branches->branch_stack, NULL); +} + static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) { u32 pmovsr; @@ -848,6 +905,7 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); struct pt_regs *regs; int idx; + bool branch_captured = false; /* * Get and reset the IRQ flags @@ -891,6 +949,13 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) if (!armpmu_event_set_period(event)) continue; + /* + * PMU IRQ should remain asserted until all branch records + * are captured and processed into struct perf_sample_data. + */ + if (has_branch_stack(event) && cpu_pmu->has_branch_stack) + read_branch_records(cpuc, event, &data, &branch_captured); + /* * Perf event overflow will queue the processing of the event as * an irq_work which will be taken care of in the handling of @@ -900,6 +965,8 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) cpu_pmu->disable(event); } armv8pmu_start(cpu_pmu); + if (cpu_pmu->has_branch_stack) + armv8pmu_branch_stack_reset(); return IRQ_HANDLED; } @@ -990,6 +1057,40 @@ static int armv8pmu_user_event_idx(struct perf_event *event) return event->hw.idx; } +static bool armv8pmu_branch_stack_init(struct perf_event *event) +{ + if (armv8pmu_branch_attr_valid(event)) { + /* + * If a task gets scheduled out, the current branch records + * get saved in the task's context data, which can be later + * used to fill in the records upon an event overflow. Let's + * enable PERF_ATTACH_TASK_DATA in 'event->attach_state' for + * all branch stack sampling perf events. + */ + event->attach_state |= PERF_ATTACH_TASK_DATA; + return true; + } + return false; +} + +static void armv8pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) +{ + struct arm_pmu *armpmu = to_arm_pmu(pmu_ctx->pmu); + void *task_ctx = pmu_ctx->task_ctx_data; + + if (armpmu->has_branch_stack) { + /* Save branch records in task_ctx on sched out */ + if (task_ctx && !sched_in) { + armv8pmu_branch_save(armpmu, task_ctx); + return; + } + + /* Reset branch records on sched in */ + if (sched_in) + armv8pmu_branch_stack_reset(); + } +} + /* * Add an event filter to a given event. */ @@ -1082,6 +1183,9 @@ static void armv8pmu_reset(void *info) pmcr |= ARMV8_PMU_PMCR_LP; armv8pmu_pmcr_write(pmcr); + + if (cpu_pmu->has_branch_stack) + armv8pmu_branch_stack_reset(); } static int __armv8_pmuv3_map_event_id(struct arm_pmu *armpmu, @@ -1235,6 +1339,41 @@ static void __armv8pmu_probe_pmu(void *info) cpu_pmu->reg_pmmir = read_pmmir(); else cpu_pmu->reg_pmmir = 0; + + /* + * BRBE is being probed on a single cpu for a + * given PMU. The remaining cpus, are assumed + * to have the exact same BRBE implementation. + */ + armv8pmu_branch_probe(cpu_pmu); +} + +static int branch_records_alloc(struct arm_pmu *armpmu) +{ + struct branch_records __percpu *records; + int cpu; + + records = alloc_percpu_gfp(struct branch_records, GFP_KERNEL); + if (!records) + return -ENOMEM; + + /* + * percpu memory allocated for 'records' gets completely consumed + * here, and never required to be freed up later. So permanently + * losing access to this anchor i.e 'records' is acceptable. + * + * Otherwise this allocation handle would have to be saved up for + * free_percpu() release later if required. + */ + for_each_possible_cpu(cpu) { + struct pmu_hw_events *events_cpu; + struct branch_records *records_cpu; + + events_cpu = per_cpu_ptr(armpmu->hw_events, cpu); + records_cpu = per_cpu_ptr(records, cpu); + events_cpu->branches = records_cpu; + } + return 0; } static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) @@ -1251,7 +1390,21 @@ static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) if (ret) return ret; - return probe.present ? 0 : -ENODEV; + if (!probe.present) + return -ENODEV; + + if (cpu_pmu->has_branch_stack) { + ret = armv8pmu_task_ctx_cache_alloc(cpu_pmu); + if (ret) + return ret; + + ret = branch_records_alloc(cpu_pmu); + if (ret) { + armv8pmu_task_ctx_cache_free(cpu_pmu); + return ret; + } + } + return 0; } static void armv8pmu_disable_user_access_ipi(void *unused) @@ -1311,6 +1464,11 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name, cpu_pmu->set_event_filter = armv8pmu_set_event_filter; cpu_pmu->pmu.event_idx = armv8pmu_user_event_idx; + cpu_pmu->sched_task = armv8pmu_sched_task; + cpu_pmu->branch_stack_init = armv8pmu_branch_stack_init; + cpu_pmu->branch_stack_add = armv8pmu_branch_stack_add; + cpu_pmu->branch_stack_del = armv8pmu_branch_stack_del; + cpu_pmu->branch_stack_reset = armv8pmu_branch_stack_reset; cpu_pmu->name = name; cpu_pmu->map_event = map_event; diff --git a/drivers/perf/arm_pmuv3_branch.h b/drivers/perf/arm_pmuv3_branch.h new file mode 100644 index 0000000000000..4142da38cfb54 --- /dev/null +++ b/drivers/perf/arm_pmuv3_branch.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Branch Record Buffer Extension Helpers. + * + * Copyright (C) 2022-2023 ARM Limited + * + * Author: Anshuman Khandual + */ +#include + +#ifdef CONFIG_ARM64_BRBE +void armv8pmu_branch_stack_add(struct perf_event *event, struct pmu_hw_events *cpuc); +void armv8pmu_branch_stack_del(struct perf_event *event, struct pmu_hw_events *cpuc); +void armv8pmu_branch_stack_reset(void); +void armv8pmu_branch_probe(struct arm_pmu *arm_pmu); +bool armv8pmu_branch_attr_valid(struct perf_event *event); +void armv8pmu_branch_enable(struct arm_pmu *arm_pmu); +void armv8pmu_branch_disable(void); +void armv8pmu_branch_read(struct pmu_hw_events *cpuc, + struct perf_event *event); +void arm64_filter_branch_records(struct pmu_hw_events *cpuc, + struct perf_event *event, + struct branch_records *event_records); +void armv8pmu_branch_save(struct arm_pmu *arm_pmu, void *ctx); +int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *arm_pmu); +void armv8pmu_task_ctx_cache_free(struct arm_pmu *arm_pmu); +#else +static inline void armv8pmu_branch_stack_add(struct perf_event *event, struct pmu_hw_events *cpuc) +{ +} + +static inline void armv8pmu_branch_stack_del(struct perf_event *event, struct pmu_hw_events *cpuc) +{ +} + +static inline void armv8pmu_branch_stack_reset(void) +{ +} + +static inline void armv8pmu_branch_probe(struct arm_pmu *arm_pmu) +{ +} + +static inline bool armv8pmu_branch_attr_valid(struct perf_event *event) +{ + WARN_ON_ONCE(!has_branch_stack(event)); + return false; +} + +static inline void armv8pmu_branch_enable(struct arm_pmu *arm_pmu) +{ +} + +static inline void armv8pmu_branch_disable(void) +{ +} + +static inline void armv8pmu_branch_read(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + WARN_ON_ONCE(!has_branch_stack(event)); +} + +static inline void arm64_filter_branch_records(struct pmu_hw_events *cpuc, + struct perf_event *event, + struct branch_records *event_records) +{ + +} + +static inline void armv8pmu_branch_save(struct arm_pmu *arm_pmu, void *ctx) +{ +} + +static inline int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *arm_pmu) +{ + return 0; +} + +static inline void armv8pmu_task_ctx_cache_free(struct arm_pmu *arm_pmu) +{ +} +#endif diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index fc02950059d5f..504613645658c 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -147,6 +147,11 @@ struct arm_pmu { /* store the PMMIR_EL1 to expose slots */ u64 reg_pmmir; +#ifdef CONFIG_ARM64_BRBE + /* store the BRBIDR0_EL1 capturing attributes */ + u64 reg_brbidr; +#endif + /* Only to be used by ACPI probing code */ unsigned long acpi_cpuid; }; From e6c537f6577f195136c74ea8d2098e2199e7c4c9 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 13 Jun 2024 11:47:28 +0530 Subject: [PATCH 71/95] KVM: arm64: nvhe: Disable branch generation in nVHE guests [Upstream commit d7567e9b9ba53861390830ee18b9fb2035ca81c4] Cc: Marc Zyngier Cc: Oliver Upton Cc: James Morse Cc: Suzuki K Poulose Cc: Catalin Marinas Cc: Will Deacon Cc: kvmarm@lists.linux.dev Cc: linux-arm-kernel@lists.infradead.org CC: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- arch/arm64/include/asm/kvm_host.h | 3 +++ arch/arm64/kvm/debug.c | 5 +++++ arch/arm64/kvm/hyp/nvhe/debug-sr.c | 31 ++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index cbbe1e6868ad4..c5d9f1a4aae7c 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -464,6 +464,7 @@ struct kvm_host_data { u64 trfcr_el1; /* Values of trap registers for the host before guest entry. */ u64 mdcr_el2; + u64 brbcr_el1; } host_debug_state; }; @@ -729,6 +730,8 @@ struct kvm_vcpu_arch { #define DEBUG_STATE_SAVE_SPE __vcpu_single_flag(iflags, BIT(5)) /* Save TRBE context if active */ #define DEBUG_STATE_SAVE_TRBE __vcpu_single_flag(iflags, BIT(6)) +/* Save BRBE context if active */ +#define DEBUG_STATE_SAVE_BRBE __vcpu_single_flag(iflags, BIT(7)) /* Physical CPU not in supported_cpus */ #define ON_UNSUPPORTED_CPU __vcpu_single_flag(sflags, BIT(2)) diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index 8725291cb00a1..99f85d8acbf36 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -335,10 +335,15 @@ void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu) if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_TraceBuffer_SHIFT) && !(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_EL1_P)) vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_TRBE); + + /* Check if we have BRBE implemented and available at the host */ + if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_BRBE_SHIFT)) + vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_BRBE); } void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu) { vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_SPE); vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_TRBE); + vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_BRBE); } diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c index 53efda0235cfe..97e861df1b451 100644 --- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c @@ -79,6 +79,32 @@ static void __debug_restore_trace(u64 trfcr_el1) write_sysreg_el1(trfcr_el1, SYS_TRFCR); } +static void __debug_save_brbe(u64 *brbcr_el1) +{ + *brbcr_el1 = 0; + + /* Check if the BRBE is enabled */ + if (!(read_sysreg_el1(SYS_BRBCR) & (BRBCR_ELx_E0BRE | BRBCR_ELx_ExBRE))) + return; + + /* + * Prohibit branch record generation while we are in guest. + * Since access to BRBCR_EL1 is trapped, the guest can't + * modify the filtering set by the host. + */ + *brbcr_el1 = read_sysreg_el1(SYS_BRBCR); + write_sysreg_el1(0, SYS_BRBCR); +} + +static void __debug_restore_brbe(u64 brbcr_el1) +{ + if (!brbcr_el1) + return; + + /* Restore BRBE controls */ + write_sysreg_el1(brbcr_el1, SYS_BRBCR); +} + void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu) { /* Disable and flush SPE data generation */ @@ -87,6 +113,9 @@ void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu) /* Disable and flush Self-Hosted Trace generation */ if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE)) __debug_save_trace(host_data_ptr(host_debug_state.trfcr_el1)); + /* Disable BRBE branch records */ + if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_BRBE)) + __debug_save_brbe(host_data_ptr(host_debug_state.brbcr_el1)); } void __debug_switch_to_guest(struct kvm_vcpu *vcpu) @@ -100,6 +129,8 @@ void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu) __debug_restore_spe(*host_data_ptr(host_debug_state.pmscr_el1)); if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE)) __debug_restore_trace(*host_data_ptr(host_debug_state.trfcr_el1)); + if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_BRBE)) + __debug_restore_brbe(*host_data_ptr(host_debug_state.brbcr_el1)); } void __debug_switch_to_host(struct kvm_vcpu *vcpu) From 0e71d35ef4937fc5e611a8f48c30eab3c7270e47 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 5 Apr 2024 08:16:37 +0530 Subject: [PATCH 72/95] perf: test: Speed up running brstack test on an Arm model commit b599a5d92686e25d082692744b8c8ae7b6f090ee openEuler The test runs quite slowly in the model, so replace "xargs -n1" with "tr ' ' '\n'" which does the same thing but in single digit minutes instead of double digit minutes. Also reduce the number of loops in the test application. Unfortunately this causes intermittent failures on x86, presumably because the sampling interval is too big to pickup any loops, so keep it the same there. Cc: Mark Rutland Cc: Arnaldo Carvalho de Melo Cc: linux-perf-users@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: James Clark Signed-off-by: Anshuman Khandual Signed-off-by: Junhao He Signed-off-by: Slim6882 Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- tools/perf/tests/shell/test_brstack.sh | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tools/perf/tests/shell/test_brstack.sh b/tools/perf/tests/shell/test_brstack.sh index 09908d71c9941..283c9a902bbfd 100755 --- a/tools/perf/tests/shell/test_brstack.sh +++ b/tools/perf/tests/shell/test_brstack.sh @@ -12,7 +12,6 @@ if ! perf record -o- --no-buildid --branch-filter any,save_type,u -- true > /dev fi TMPDIR=$(mktemp -d /tmp/__perf_test.program.XXXXX) -TESTPROG="perf test -w brstack" cleanup() { rm -rf $TMPDIR @@ -20,11 +19,21 @@ cleanup() { trap cleanup EXIT TERM INT +is_arm64() { + uname -m | grep -q aarch64 +} + +if is_arm64; then + TESTPROG="perf test -w brstack 5000" +else + TESTPROG="perf test -w brstack" +fi + test_user_branches() { echo "Testing user branch stack sampling" perf record -o $TMPDIR/perf.data --branch-filter any,save_type,u -- ${TESTPROG} > /dev/null 2>&1 - perf script -i $TMPDIR/perf.data --fields brstacksym | xargs -n1 > $TMPDIR/perf.script + perf script -i $TMPDIR/perf.data --fields brstacksym | tr ' ' '\n' > $TMPDIR/perf.script # example of branch entries: # brstack_foo+0x14/brstack_bar+0x40/P/-/-/0/CALL @@ -53,7 +62,7 @@ test_filter() { echo "Testing branch stack filtering permutation ($test_filter_filter,$test_filter_expect)" perf record -o $TMPDIR/perf.data --branch-filter $test_filter_filter,save_type,u -- ${TESTPROG} > /dev/null 2>&1 - perf script -i $TMPDIR/perf.data --fields brstack | xargs -n1 > $TMPDIR/perf.script + perf script -i $TMPDIR/perf.data --fields brstack | tr ' ' '\n' > $TMPDIR/perf.script # fail if we find any branch type that doesn't match any of the expected ones # also consider UNKNOWN branch types (-) From 626fa7d302c45e4d620e5f5ab7480e444806deaa Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 5 Apr 2024 08:16:38 +0530 Subject: [PATCH 73/95] perf: test: Remove empty lines from branch filter test output commit e9c94f8a622ba7871af155a85ffa467133d72706 openEuler In the perf script command, spaces are turned into newlines. But when there is a double space this results in empty lines which fail the following inverse grep test, so strip the empty lines. Cc: Mark Rutland Cc: Arnaldo Carvalho de Melo Cc: linux-perf-users@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: James Clark Signed-off-by: Anshuman Khandual Signed-off-by: Junhao He Signed-off-by: Slim6882 Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- tools/perf/tests/shell/test_brstack.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/test_brstack.sh b/tools/perf/tests/shell/test_brstack.sh index 283c9a902bbfd..b1fe29bb71b3e 100755 --- a/tools/perf/tests/shell/test_brstack.sh +++ b/tools/perf/tests/shell/test_brstack.sh @@ -62,7 +62,7 @@ test_filter() { echo "Testing branch stack filtering permutation ($test_filter_filter,$test_filter_expect)" perf record -o $TMPDIR/perf.data --branch-filter $test_filter_filter,save_type,u -- ${TESTPROG} > /dev/null 2>&1 - perf script -i $TMPDIR/perf.data --fields brstack | tr ' ' '\n' > $TMPDIR/perf.script + perf script -i $TMPDIR/perf.data --fields brstack | tr ' ' '\n' | sed '/^[[:space:]]*$/d' > $TMPDIR/perf.script # fail if we find any branch type that doesn't match any of the expected ones # also consider UNKNOWN branch types (-) From 63f6f3dc55e2bc5cbc7a8ba61004c55272129ed5 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 5 Apr 2024 08:16:39 +0530 Subject: [PATCH 74/95] perf: test: Extend branch stack sampling test for Arm64 BRBE commit faba2f172560d006b762ba5499d14ae65b693be5 openEuler Add Arm64 BRBE-specific testing to the existing branch stack sampling test. The test currently passes on the Arm FVP RevC model, but no hardware has been tested yet. Cc: Mark Rutland Cc: Arnaldo Carvalho de Melo Cc: linux-perf-users@vger.kernel.org Cc: linux-kernel@vger.kernel.org Co-developed-by: German Gomez Signed-off-by: German Gomez Signed-off-by: James Clark Signed-off-by: Anshuman Khandual Signed-off-by: Junhao He Signed-off-by: Slim6882 Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- tools/perf/tests/builtin-test.c | 1 + tools/perf/tests/shell/test_brstack.sh | 42 ++++++++++++++++++++++++-- tools/perf/tests/tests.h | 1 + tools/perf/tests/workloads/Build | 2 ++ tools/perf/tests/workloads/traploop.c | 39 ++++++++++++++++++++++++ 5 files changed, 82 insertions(+), 3 deletions(-) create mode 100644 tools/perf/tests/workloads/traploop.c diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index cb6f1dd00dc48..7d9e0a311ef91 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -139,6 +139,7 @@ static struct test_workload *workloads[] = { &workload__sqrtloop, &workload__brstack, &workload__datasym, + &workload__traploop }; static int num_subtests(const struct test_suite *t) diff --git a/tools/perf/tests/shell/test_brstack.sh b/tools/perf/tests/shell/test_brstack.sh index b1fe29bb71b3e..b0c96bfae3043 100755 --- a/tools/perf/tests/shell/test_brstack.sh +++ b/tools/perf/tests/shell/test_brstack.sh @@ -47,12 +47,43 @@ test_user_branches() { grep -E -m1 "^brstack_foo\+[^ ]*/brstack_bench\+[^ ]*/RET/.*$" $TMPDIR/perf.script grep -E -m1 "^brstack_bench\+[^ ]*/brstack_bench\+[^ ]*/COND/.*$" $TMPDIR/perf.script grep -E -m1 "^brstack\+[^ ]*/brstack\+[^ ]*/UNCOND/.*$" $TMPDIR/perf.script + + if is_arm64; then + # in arm64 with BRBE, we get IRQ entries that correspond + # to any point in the process + grep -m1 "/IRQ/" $TMPDIR/perf.script + fi set +x # some branch types are still not being tested: # IND COND_CALL COND_RET SYSCALL SYSRET IRQ SERROR NO_TX } +test_arm64_trap_eret_branches() { + echo "Testing trap & eret branches (arm64 brbe)" + perf record -o $TMPDIR/perf.data --branch-filter any,save_type,u -- \ + perf test -w traploop 250 + perf script -i $TMPDIR/perf.data --fields brstacksym | tr ' ' '\n' > $TMPDIR/perf.script + set -x + # BRBINF.TYPE == TRAP are mapped to PERF_BR_SYSCALL by the BRBE driver + grep -E -m1 "^trap_bench\+[^ ]*/\[unknown\][^ ]*/SYSCALL/" $TMPDIR/perf.script + grep -E -m1 "^\[unknown\][^ ]*/trap_bench\+[^ ]*/ERET/" $TMPDIR/perf.script + set +x +} + +test_arm64_kernel_branches() { + echo "Testing kernel branches (arm64 brbe)" + # skip if perf doesn't have enough privileges + if ! perf record --branch-filter any,k -o- -- true > /dev/null; then + echo "[skipped: not enough privileges]" + return 0 + fi + perf record -o $TMPDIR/perf.data --branch-filter any,k -- uname -a + perf script -i $TMPDIR/perf.data --fields brstack | tr ' ' '\n' > $TMPDIR/perf.script + grep -E -m1 "0xffff[0-9a-f]{12}" $TMPDIR/perf.script + ! egrep -E -m1 "0x0000[0-9a-f]{12}" $TMPDIR/perf.script +} + # first argument is the argument passed to "--branch-stack ,save_type,u" # second argument are the expected branch types for the given filter test_filter() { @@ -75,11 +106,16 @@ set -e test_user_branches -test_filter "any_call" "CALL|IND_CALL|COND_CALL|SYSCALL|IRQ" +if is_arm64; then + test_arm64_trap_eret_branches + test_arm64_kernel_branches +fi + +test_filter "any_call" "CALL|IND_CALL|COND_CALL|SYSCALL|IRQ|FAULT_DATA|FAULT_INST" test_filter "call" "CALL|SYSCALL" test_filter "cond" "COND" test_filter "any_ret" "RET|COND_RET|SYSRET|ERET" test_filter "call,cond" "CALL|SYSCALL|COND" -test_filter "any_call,cond" "CALL|IND_CALL|COND_CALL|IRQ|SYSCALL|COND" -test_filter "cond,any_call,any_ret" "COND|CALL|IND_CALL|COND_CALL|SYSCALL|IRQ|RET|COND_RET|SYSRET|ERET" +test_filter "any_call,cond" "CALL|IND_CALL|COND_CALL|IRQ|SYSCALL|COND|FAULT_DATA|FAULT_INST" +test_filter "cond,any_call,any_ret" "COND|CALL|IND_CALL|COND_CALL|SYSCALL|IRQ|RET|COND_RET|SYSRET|ERET|FAULT_DATA|FAULT_INST" diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index b394f3ac2d667..c65455da4eafc 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -205,6 +205,7 @@ DECLARE_WORKLOAD(leafloop); DECLARE_WORKLOAD(sqrtloop); DECLARE_WORKLOAD(brstack); DECLARE_WORKLOAD(datasym); +DECLARE_WORKLOAD(traploop); extern const char *dso_to_test; diff --git a/tools/perf/tests/workloads/Build b/tools/perf/tests/workloads/Build index a1f34d5861e36..a9dc93d8468be 100644 --- a/tools/perf/tests/workloads/Build +++ b/tools/perf/tests/workloads/Build @@ -6,8 +6,10 @@ perf-y += leafloop.o perf-y += sqrtloop.o perf-y += brstack.o perf-y += datasym.o +perf-y += traploop.o CFLAGS_sqrtloop.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE CFLAGS_leafloop.o = -g -O0 -fno-inline -fno-omit-frame-pointer -U_FORTIFY_SOURCE CFLAGS_brstack.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE CFLAGS_datasym.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE +CFLAGS_traploop.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE diff --git a/tools/perf/tests/workloads/traploop.c b/tools/perf/tests/workloads/traploop.c new file mode 100644 index 0000000000000..7dac94897e49c --- /dev/null +++ b/tools/perf/tests/workloads/traploop.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include "../tests.h" + +#define BENCH_RUNS 999999 + +static volatile int cnt; + +#ifdef __aarch64__ +static void trap_bench(void) +{ + unsigned long val; + + asm("mrs %0, ID_AA64ISAR0_EL1" : "=r" (val)); /* TRAP + ERET */ +} +#else +static void trap_bench(void) +{ + +} +#endif + +static int traploop(int argc, const char **argv) +{ + int num_loops = BENCH_RUNS; + + if (argc > 0) + num_loops = atoi(argv[0]); + + while (1) { + if ((cnt++) > num_loops) + break; + + trap_bench(); + } + return 0; +} + +DEFINE_WORKLOAD(traploop); From 43e26ec683fb67ed9cdec6a1b9ea9b43cc86cf9e Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Wed, 12 Mar 2025 09:38:58 +0800 Subject: [PATCH 75/95] perf: Configure BRBE correctly on VHE host commit 56a824d811e8debed95e497201217984cd51066a openEuler The BRBE support backported of v18 didn't configure the BRBCR_EL1 correctly which will miss some key functions like FZP during sampling. In VHE mode with MDCR_EL2.HPMN set to PMCR_EL0.N, the counters are controlled by BRBCR_EL1 rather than BRBCR_EL2 (which writes to BRBCR_EL1 are redirected to). Use the same value for both register except keep EL1 and EL0 recording disabled in guests. The fix is backported from the v19 version [1]. [1] https://lore.kernel.org/linux-arm-kernel/ 20250202-arm-brbe-v19-v19-0-1c1300802385@kernel.org/ T/#m887b1dcf7f6784b7293626952052f27b68784beb Fixes: 6c6848e7e00c ("drivers: perf: arm_pmuv3: Enable branch stack sampling via FEAT_BRBE") Signed-off-by: Yicong Yang Signed-off-by: Qizhi Zhang Signed-off-by: Slim6882 Signed-off-by: yeyiyang <850219375@qq.com> Signed-off-by: WangYuli --- drivers/perf/arm_brbe.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/perf/arm_brbe.c b/drivers/perf/arm_brbe.c index d795e8fd646fc..22e4828feec83 100644 --- a/drivers/perf/arm_brbe.c +++ b/drivers/perf/arm_brbe.c @@ -874,6 +874,16 @@ void armv8pmu_branch_enable(struct arm_pmu *arm_pmu) brbcr &= ~BRBCR_ELx_CONFIG_MASK; brbcr |= branch_type_to_brbcr(cpuc->branch_sample_type); write_sysreg_s(brbcr, SYS_BRBCR_EL1); + + /* + * In VHE mode with MDCR_EL2.HPMN set to PMCR_EL0.N, the counters are + * controlled by BRBCR_EL1 rather than BRBCR_EL2 (which writes to + * BRBCR_EL1 are redirected to). Use the same value for both register + * except keep EL1 and EL0 recording disabled in guests. + */ + if (is_kernel_in_hyp_mode()) + write_sysreg_s(brbcr & ~(BRBCR_ELx_ExBRE | BRBCR_ELx_E0BRE), SYS_BRBCR_EL12); + isb(); } @@ -886,6 +896,11 @@ void armv8pmu_branch_disable(void) brbcr &= ~(BRBCR_ELx_E0BRE | BRBCR_ELx_ExBRE); brbfcr |= BRBFCR_EL1_PAUSED; write_sysreg_s(brbcr, SYS_BRBCR_EL1); + + /* See the comment in armv8pmu_branch_enable() */ + if (is_kernel_in_hyp_mode()) + write_sysreg_s(brbcr, SYS_BRBCR_EL12); + write_sysreg_s(brbfcr, SYS_BRBFCR_EL1); isb(); } From 6f006a2827a43915e5fcb68ae8ef79f208af588f Mon Sep 17 00:00:00 2001 From: Yu Liao Date: Mon, 1 Jan 2024 06:32:40 +0800 Subject: [PATCH 76/95] cpuinspect: add CPU-inspect infrastructure commit d91c6db8ca896824da23aab69dde9ce289222299 openEuler This adds the CPU-inspect infrastructure. CPU-inspect is designed to provide a framework for early detection of SDC by proactively executing CPU inspection test cases. Silent Data Corruption (SDC), sometimes referred to as Silent Data Error (SDE), is an industry-wide issue impacting not only long-protected memory, storage, and networking, but also computer CPUs. As with software issues, hardware-induced SDC can contribute to data loss and corruption. An SDC occurs when an impacted CPU inadvertently causes errors in the data it processes. For example, an impacted CPU might miscalculate data (i.e., 1+1=3). There may be no indication of these computational errors unless the software systematically checks for errors [1]. SDC issues have been around for many years, but as chips have become more advanced and compact in size, the transistors and lines have become so tiny that small electrical fluctuations can cause errors. Most of these errors are caused by defects during manufacturing and are screened out by the vendors; others are caught by hardware error detection or correction. However, some errors go undetected by hardware; therefore only detection software can protect against such errors [1]. [1] https://support.google.com/cloud/answer/10759085 To use CPU-inspect, you need to load at least one inspector (the driver that specifically executes the CPU inspection code) Here is an example using CPU-inspect: # Set the cpumask of CPU-inspect to 10-20 echo 10-20 > /sys/devices/system/cpu/cpuinspect/cpumask # set the max cpu utility of inspectiono threads to 50% echo 50 > /sys/devices/system/cpu/cpuinspect/cpu_utility # start the CPU inspection task echo 1 > /sys/devices/system/cpu/cpuinspect/start_patrol # Check the result to see if some faulty cpu are found cat /sys/devices/system/cpu/cpuinspect/result In addition to being readable, the 'result' file in cpuinspect can also be polled. The user that use poll() to monitor 'result' will return when a faulty CPU is found or the inspection task is completed. Signed-off-by: Yu Liao Signed-off-by: huwentao Signed-off-by: WangYuli --- drivers/Kconfig | 2 + drivers/Makefile | 1 + drivers/cpuinspect/Kconfig | 13 ++ drivers/cpuinspect/Makefile | 6 + drivers/cpuinspect/cpuinspect.c | 170 ++++++++++++++++++++ drivers/cpuinspect/cpuinspect.h | 46 ++++++ drivers/cpuinspect/inspector.c | 124 ++++++++++++++ drivers/cpuinspect/sysfs.c | 276 ++++++++++++++++++++++++++++++++ include/linux/cpuinspect.h | 40 +++++ 9 files changed, 678 insertions(+) create mode 100644 drivers/cpuinspect/Kconfig create mode 100644 drivers/cpuinspect/Makefile create mode 100644 drivers/cpuinspect/cpuinspect.c create mode 100644 drivers/cpuinspect/cpuinspect.h create mode 100644 drivers/cpuinspect/inspector.c create mode 100644 drivers/cpuinspect/sysfs.c create mode 100644 include/linux/cpuinspect.h diff --git a/drivers/Kconfig b/drivers/Kconfig index efb66e25fa2dd..83ff63c256b71 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -243,4 +243,6 @@ source "drivers/hte/Kconfig" source "drivers/cdx/Kconfig" +source "drivers/cpuinspect/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index 1bec7819a837a..99dbd33629138 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -133,6 +133,7 @@ obj-$(CONFIG_EISA) += eisa/ obj-$(CONFIG_PM_OPP) += opp/ obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_CPU_IDLE) += cpuidle/ +obj-$(CONFIG_CPU_INSPECT) += cpuinspect/ obj-y += mmc/ obj-y += ufs/ obj-$(CONFIG_MEMSTICK) += memstick/ diff --git a/drivers/cpuinspect/Kconfig b/drivers/cpuinspect/Kconfig new file mode 100644 index 0000000000000..548412ba35782 --- /dev/null +++ b/drivers/cpuinspect/Kconfig @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-2.0-only +menu "CPU Inspect" + +config CPU_INSPECT + tristate "CPU inspect support" + depends on SYSFS && 64BIT + default n + help + CPU-inspect is designed to provide a framework for early detection + of SDC by proactively executing CPU inspection test cases. It + includes modular inspector that can be swapped during runtime. + +endmenu diff --git a/drivers/cpuinspect/Makefile b/drivers/cpuinspect/Makefile new file mode 100644 index 0000000000000..64dcaf3e4e444 --- /dev/null +++ b/drivers/cpuinspect/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for cpuinspect. +# +obj-$(CONFIG_CPU_INSPECT) += cpu_inspect.o +cpu_inspect-y = cpuinspect.o inspector.o sysfs.o diff --git a/drivers/cpuinspect/cpuinspect.c b/drivers/cpuinspect/cpuinspect.c new file mode 100644 index 0000000000000..33eabe49866a4 --- /dev/null +++ b/drivers/cpuinspect/cpuinspect.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * cpuinspect.c - core cpuinspect infrastructure + * + * Copyright (c) Huawei Technologies Co., Ltd. 2022-2023. All rights reserved. + * + * Author: Yu Liao + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cpuinspect.h" + +#define CPUINSPECT_SLEEP_TIMEOUT 1000000UL +/* + * The core struct, store the most relevant data for cpuinspect. + */ +struct cpuinspect ci_core = { + .inspect_times = 1, + .cpu_utility = 90, +}; + +static struct task_struct *cpuinspect_threads[NR_CPUS]; +static atomic_t active_threads_num; +DECLARE_BITMAP(result, NR_CPUS); +DEFINE_MUTEX(cpuinspect_lock); + +/* inspection thread function */ +static int run_inspector(void *data) +{ + unsigned int inspect_times = 0, group, ret; + unsigned int cpu = (unsigned long)data; + ktime_t start_time, duration; + unsigned long sleep_us; + + while (!kthread_should_stop()) { + if (inspect_times >= ci_core.inspect_times || !cpu_online(cpu)) + break; + + for (group = 0; group < curr_cpu_inspector->group_num; group++) { + start_time = ktime_get(); + ret = curr_cpu_inspector->start_inspect(group); + if (ret) { + set_bit(cpu, result); + cpuinspect_result_notify(); + } + + /* + * Sleep for a while if user set desired cpu utility. + */ + duration = ktime_get() - start_time; + sleep_us = (duration * 100 / ci_core.cpu_utility - duration) / 1000; + /* + * During low cpu utility in cpu inspect we might wait a + * while; let's avoid the hung task warning. + */ + sleep_us = min(sleep_us, CPUINSPECT_SLEEP_TIMEOUT); + /* + * Since usleep_range is built on top of hrtimers, + * and we don't want to introduce a large number of + * undesired interrupts, choose a range of 200us + * to balance performance and latency. This can + * cause inspection threads cpu utility is lower + * than required cpu utility. And this also prevents + * soft lockup. + */ + usleep_range(sleep_us, sleep_us + 200); + } + inspect_times++; + } + + cpuinspect_threads[cpu] = NULL; + /* + * When this condition is met, it indicate this is the final cpuinspect + * thread, mark inspect state as 0 and notify user that it has been + * completed. + */ + if (atomic_dec_and_test(&active_threads_num)) { + ci_core.inspect_on = 0; + cpuinspect_result_notify(); + } + + return 0; +} + +int start_inspect_threads(void) +{ + unsigned int cpu = 0; + + bitmap_zero(result, NR_CPUS); + + ci_core.inspect_on = 1; + for_each_cpu(cpu, &ci_core.inspect_cpumask) { + cpuinspect_threads[cpu] = kthread_create_on_node(run_inspector, + (void *)(unsigned long)cpu, + cpu_to_node(cpu), "cpuinspect/%u", cpu); + if (IS_ERR(cpuinspect_threads[cpu])) { + cpuinspect_threads[cpu] = NULL; + continue; + } + + kthread_bind(cpuinspect_threads[cpu], cpu); + wake_up_process(cpuinspect_threads[cpu]); + atomic_inc(&active_threads_num); + } + + /* + * If creating inspection threads for all CPUs in mask fails (or + * inspect_cpumask is empty), notify user, mark the inspection status + * as 0 and simply exit. + */ + if (unlikely(!atomic_read(&active_threads_num))) { + ci_core.inspect_on = 0; + cpuinspect_result_notify(); + } + + return 0; +} + +int stop_inspect_threads(void) +{ + unsigned int cpu = 0; + + /* All inspection threads has been stopped */ + if (atomic_read(&active_threads_num) == 0) + return 0; + + for_each_cpu(cpu, &ci_core.inspect_cpumask) { + if (cpuinspect_threads[cpu]) + kthread_stop(cpuinspect_threads[cpu]); + } + + return 0; +} + +/** + * cpuinspect_init - core initializer + */ +static int __init cpuinspect_init(void) +{ + cpumask_copy(&ci_core.inspect_cpumask, cpu_all_mask); + + return cpuinspect_add_interface(); +} + +static void __exit cpuinspect_exit(void) +{ + return cpuinspect_remove_interface(); +} + +module_init(cpuinspect_init); +module_exit(cpuinspect_exit); +module_param_string(inspector, param_inspector, CPUINSPECT_NAME_LEN, 0444); +MODULE_LICENSE("GPL"); diff --git a/drivers/cpuinspect/cpuinspect.h b/drivers/cpuinspect/cpuinspect.h new file mode 100644 index 0000000000000..c520cdca44864 --- /dev/null +++ b/drivers/cpuinspect/cpuinspect.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * cpuinspect.h - The internal header file + */ + +#ifndef __DRIVER_CPUINSPECT_H +#define __DRIVER_CPUINSPECT_H + +#define CPUINSPECT_NAME_LEN 16 + +/* sysfs */ +int cpuinspect_add_interface(void); +void cpuinspect_remove_interface(void); +void cpuinspect_result_notify(void); + +/* inspect control */ +int start_inspect_threads(void); +int stop_inspect_threads(void); +int cpuinspect_is_running(void); + +/* switch inspector */ +int cpuinspect_switch_inspector(struct cpu_inspector *insp); + +/* for internal use only */ +extern DECLARE_BITMAP(result, NR_CPUS); +extern struct cpu_inspector *curr_cpu_inspector; +extern struct mutex cpuinspect_lock; +extern struct cpuinspect ci_core; +extern struct list_head cpu_inspectors; +extern char param_inspector[]; + +/** + * struct cpuinspect - the basic cpuinspect structure + * @cpu_utility: Maximum CPU utilization occupied by the inspection thread. + * @inspect_times: The number of times the inspection code will be executed. + * @inspect_cpumask: cpumask to indicate for which CPUs are involved in inspection. + * @inspect_on: Set if the inspection thread is running. + */ +struct cpuinspect { + unsigned int cpu_utility; + unsigned long inspect_times; + int inspect_on; + cpumask_t inspect_cpumask; +}; + +#endif /* __DRIVER_CPUINSPECT_H */ diff --git a/drivers/cpuinspect/inspector.c b/drivers/cpuinspect/inspector.c new file mode 100644 index 0000000000000..e56e35ec63252 --- /dev/null +++ b/drivers/cpuinspect/inspector.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * inspector.c - inspector support + * + * Copyright (c) Huawei Technologies Co., Ltd. 2022-2023. All rights reserved. + * + * Author: Yu Liao + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#define pr_fmt(fmt) "CPUINSPECT: " fmt + +#include +#include +#include +#include +#include + +#include "cpuinspect.h" + + +char param_inspector[CPUINSPECT_NAME_LEN]; + +LIST_HEAD(cpu_inspectors); +struct cpu_inspector *curr_cpu_inspector; +struct cpu_inspector *prev_cpu_inspector; + +/** + * cpuinspect_find_inspector - finds a inspector of the specified name + * @str: the name + */ +static struct cpu_inspector *cpuinspect_find_inspector(const char *str) +{ + struct cpu_inspector *insp; + + list_for_each_entry(insp, &cpu_inspectors, list) + if (!strncasecmp(str, insp->name, CPUINSPECT_NAME_LEN)) + return insp; + return NULL; +} + +/** + * cpuinspect_switch_inspector - changes the inspector + * @insp: the new target inspector + */ +int cpuinspect_switch_inspector(struct cpu_inspector *insp) +{ + if (!insp) + return -EINVAL; + + if (insp == curr_cpu_inspector) + return 0; + + curr_cpu_inspector = insp; + pr_info("using inspector %s, group_num: %lu\n", insp->name, insp->group_num); + + return 0; +} + +/** + * cpuinspect_register_inspector - registers a inspector + * @insp: the inspector + */ +int cpuinspect_register_inspector(struct cpu_inspector *insp) +{ + int ret = -EEXIST; + + if (!insp) + return -EINVAL; + + mutex_lock(&cpuinspect_lock); + if (cpuinspect_find_inspector(insp->name) == NULL) { + ret = 0; + list_add_tail(&insp->list, &cpu_inspectors); + + /* + * We select the inspector if current inspector is NULL or it is + * one specificed by kernel parameter. + */ + if (!curr_cpu_inspector || + !strncasecmp(param_inspector, insp->name, CPUINSPECT_NAME_LEN)) + cpuinspect_switch_inspector(insp); + } + mutex_unlock(&cpuinspect_lock); + + return ret; +} +EXPORT_SYMBOL(cpuinspect_register_inspector); + +/** + * cpuinspect_unregister_inspector - unregisters a inspector + * @insp: the inspector + */ +int cpuinspect_unregister_inspector(struct cpu_inspector *insp) +{ + if (!insp) + return -EINVAL; + + mutex_lock(&cpuinspect_lock); + if (curr_cpu_inspector == insp) { + if (ci_core.inspect_on) { + mutex_unlock(&cpuinspect_lock); + return -EBUSY; + } + + curr_cpu_inspector = NULL; + } + + if (!list_empty(&insp->list)) + list_del(&insp->list); + + mutex_unlock(&cpuinspect_lock); + + return 0; +} +EXPORT_SYMBOL(cpuinspect_unregister_inspector); diff --git a/drivers/cpuinspect/sysfs.c b/drivers/cpuinspect/sysfs.c new file mode 100644 index 0000000000000..e84a37031d2f4 --- /dev/null +++ b/drivers/cpuinspect/sysfs.c @@ -0,0 +1,276 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * sysfs.c - sysfs support + * + * Copyright (c) Huawei Technologies Co., Ltd. 2022-2023. All rights reserved. + * + * Author: Yu Liao + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cpuinspect.h" + +static ssize_t available_inspector_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct cpu_inspector *tmp; + ssize_t i = 0; + + list_for_each_entry(tmp, &cpu_inspectors, list) { + if (i >= (ssize_t) (PAGE_SIZE - (CPUINSPECT_NAME_LEN + 2))) + goto out; + + i += scnprintf(&buf[i], CPUINSPECT_NAME_LEN + 1, "%s ", tmp->name); + } + +out: + i += sprintf(&buf[i], "\n"); + return i; +} + +static ssize_t current_inspector_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + ssize_t ret; + + if (curr_cpu_inspector) + ret = sprintf(buf, "%s\n", curr_cpu_inspector->name); + else + ret = sprintf(buf, "none\n"); + + return ret; +} + +static ssize_t current_inspector_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + char insp_name[CPUINSPECT_NAME_LEN + 1]; + int ret; + struct cpu_inspector *insp; + + ret = sscanf(buf, "%" __stringify(CPUINSPECT_NAME_LEN) "s", insp_name); + if (ret != 1) + return -EINVAL; + + if (ci_core.inspect_on) + return -EBUSY; + + ret = -EINVAL; + list_for_each_entry(insp, &cpu_inspectors, list) { + if (!strncmp(insp->name, insp_name, CPUINSPECT_NAME_LEN)) { + ret = cpuinspect_switch_inspector(insp); + break; + } + } + + return ret ? ret : count; +} + +ssize_t patrol_complete_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", !ci_core.inspect_on); +} + +ssize_t cpu_utility_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%u\n", ci_core.cpu_utility); +} + +ssize_t cpu_utility_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + unsigned int cpu_util; + + if (kstrtouint(buf, 10, &cpu_util) || cpu_util < 1 || cpu_util > 100) + return -EINVAL; + + ci_core.cpu_utility = cpu_util; + + return size; +} + +ssize_t patrol_times_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%lu\n", ci_core.inspect_times); +} + +ssize_t patrol_times_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + /* + * It is not allowed to modify patrol times during the CPU + * inspection operation. + */ + if (ci_core.inspect_on) + return -EBUSY; + + if (kstrtoul(buf, 10, &ci_core.inspect_times)) + return -EINVAL; + + return size; +} + +ssize_t start_patrol_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + bool start_patrol = false; + + if (strtobool(buf, &start_patrol) < 0) + return -EINVAL; + + if (!mutex_trylock(&cpuinspect_lock)) + return -EBUSY; + + /* + * It is not allowed to start the inspection again during the + * inspection process. + */ + if (start_patrol && (int) start_patrol == ci_core.inspect_on) { + mutex_unlock(&cpuinspect_lock); + return -EBUSY; + } + + if (start_patrol == 0) + stop_inspect_threads(); + else if (curr_cpu_inspector) + start_inspect_threads(); + + mutex_unlock(&cpuinspect_lock); + return size; +} + +static ssize_t result_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%*pbl\n", nr_cpu_ids, &result); +} + +static ssize_t cpumask_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%*pbl\n", + cpumask_pr_args(&ci_core.inspect_cpumask)); +} + +static ssize_t cpumask_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + ssize_t err; + + /* + * It is not allowed to modify cpumask during the CPU + * inspection operation. + */ + if (ci_core.inspect_on) + return -EBUSY; + + err = cpulist_parse(buf, &ci_core.inspect_cpumask); + if (err) + return err; + + return count; +} + +/* + * Tell userspace to handle result if one of the following conditions is met: + * - Found fault core + * - Inspection task completed + */ +void cpuinspect_result_notify(void) +{ + struct device *dev_root = bus_get_dev_root(&cpu_subsys); + + if (dev_root) { + sysfs_notify(&dev_root->kobj, "cpuinspect", "result"); + put_device(dev_root); + } +} + +static DEVICE_ATTR_RO(result); +static DEVICE_ATTR_WO(start_patrol); +static DEVICE_ATTR_RO(patrol_complete); +static DEVICE_ATTR_RW(cpu_utility); +static DEVICE_ATTR_RW(cpumask); +static DEVICE_ATTR_RW(patrol_times); + +/* show and switch inspector */ +static DEVICE_ATTR_RO(available_inspector); +static DEVICE_ATTR_RW(current_inspector); + + +static struct attribute *cpuinspect_attrs[] = { + &dev_attr_result.attr, + &dev_attr_start_patrol.attr, + &dev_attr_patrol_complete.attr, + &dev_attr_cpu_utility.attr, + &dev_attr_cpumask.attr, + &dev_attr_patrol_times.attr, + &dev_attr_available_inspector.attr, + &dev_attr_current_inspector.attr, + NULL +}; + +static struct attribute_group cpuinspect_attr_group = { + .attrs = cpuinspect_attrs, + .name = "cpuinspect", +}; + +/** + * cpuinspect_add_interface - add CPU global sysfs attributes + * @dev: the target device + */ +int cpuinspect_add_interface(void) +{ + struct device *dev_root = bus_get_dev_root(&cpu_subsys); + int retval; + + if (!dev_root) + return -EINVAL; + + retval = sysfs_create_group(&dev_root->kobj, &cpuinspect_attr_group); + put_device(dev_root); + return retval; +} + +/** + * cpuinspect_remove_interface - remove CPU global sysfs attributes + * @dev: the target device + */ +void cpuinspect_remove_interface(void) +{ + struct device *dev_root = bus_get_dev_root(&cpu_subsys); + + if (dev_root) { + sysfs_remove_group(&dev_root->kobj, &cpuinspect_attr_group); + put_device(dev_root); + } +} diff --git a/include/linux/cpuinspect.h b/include/linux/cpuinspect.h new file mode 100644 index 0000000000000..596b7d82abb4c --- /dev/null +++ b/include/linux/cpuinspect.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * cpuinspect.h - a generic framework for CPU online inspection + * + * Copyright (c) 2023 Yu Liao + */ + +#ifndef __LINUX_CPUINSPECT_H +#define __LINUX_CPUINSPECT_H + +#include +#include +#include + +#define CPUINSPECT_NAME_LEN 16 + +/** + * struct cpu_inspector - CPU Inspection driver. Inspection code may run in + * kernel, BIOS, trusted OS, etc., and contains many test cases. All test + * cases can be divided into multiple or one group, provied a function + * to start inspection for a specified group. + * + * @name: Pointer to inspector name + * @list: List head for registration (internal) + * @group_num: Number of inspection code groups + * @start_inspect: Function to start inspect process, passes group + * number as a argument + */ +struct cpu_inspector { + const char name[CPUINSPECT_NAME_LEN]; + struct list_head list; + unsigned long group_num; + + int (*start_inspect)(unsigned int group); +}; + +extern int cpuinspect_register_inspector(struct cpu_inspector *insp); +extern int cpuinspect_unregister_inspector(struct cpu_inspector *insp); + +#endif /* __LINUX_CPUINSPECT_H */ From 17e9d3ad3ca7a0209c23586aff31ef6310cc7ea5 Mon Sep 17 00:00:00 2001 From: Yu Liao Date: Mon, 1 Jan 2024 06:32:41 +0800 Subject: [PATCH 77/95] cpuinspect: add ATF inspector commit 133e66ca214b9d084f01c07cd3d57f454a426afb openEuler This inspector implements the execution of inspection instructions in BIOS. Using smc to get group num and execute inspection test cases from arm-trusted-firmware. Signed-off-by: Yu Liao Signed-off-by: huwentao Signed-off-by: WangYuli --- drivers/cpuinspect/Kconfig | 11 ++++ drivers/cpuinspect/Makefile | 1 + drivers/cpuinspect/inspector-atf.c | 81 ++++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+) create mode 100644 drivers/cpuinspect/inspector-atf.c diff --git a/drivers/cpuinspect/Kconfig b/drivers/cpuinspect/Kconfig index 548412ba35782..4f4144288d641 100644 --- a/drivers/cpuinspect/Kconfig +++ b/drivers/cpuinspect/Kconfig @@ -10,4 +10,15 @@ config CPU_INSPECT of SDC by proactively executing CPU inspection test cases. It includes modular inspector that can be swapped during runtime. +if CPU_INSPECT + +config CPU_INSPECTOR_ATF + tristate "ATF CPU inspector" + depends on ARM64 + default n + help + This inspector implements the execution of inspection instructions + in BIOS. + +endif endmenu diff --git a/drivers/cpuinspect/Makefile b/drivers/cpuinspect/Makefile index 64dcaf3e4e444..8429e9751ae4e 100644 --- a/drivers/cpuinspect/Makefile +++ b/drivers/cpuinspect/Makefile @@ -3,4 +3,5 @@ # Makefile for cpuinspect. # obj-$(CONFIG_CPU_INSPECT) += cpu_inspect.o +obj-$(CONFIG_CPU_INSPECTOR_ATF) += inspector-atf.o cpu_inspect-y = cpuinspect.o inspector.o sysfs.o diff --git a/drivers/cpuinspect/inspector-atf.c b/drivers/cpuinspect/inspector-atf.c new file mode 100644 index 0000000000000..00adaff5a19a1 --- /dev/null +++ b/drivers/cpuinspect/inspector-atf.c @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * inspector-atf: cpuinspect inspector for EFI-based systems + * + * Copyright (c) Huawei Technologies Co., Ltd. 2022-2023. All rights reserved. + * + * Author: Yu Liao + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include + +#define PRIVATE_ARM_SMC_ID_STL_GET_MAX_GROUP 0x83000504 +#define PRIVATE_ARM_SMC_ID_STL_ONLINE_TEST 0x83000505 +#define CPUINSPECT_NOT_SUPPORTED -1 + +static struct cpu_inspector atf_inspector; + +static int atf_get_group_num(void) +{ + struct arm_smccc_res res; + + arm_smccc_smc(PRIVATE_ARM_SMC_ID_STL_GET_MAX_GROUP, 0, 0, 0, 0, + 0, 0, 0, &res); + + return res.a0; +} + +static int atf_run_chip_test(unsigned int group) +{ + struct arm_smccc_res res; + + arm_smccc_smc(PRIVATE_ARM_SMC_ID_STL_ONLINE_TEST, group, 0, 0, 0, + 0, 0, 0, &res); + + return res.a0; +} + +static struct cpu_inspector atf_inspector = { + .name = "atf", + .start_inspect = atf_run_chip_test, +}; + +/** + * init_atf_inspector - initializes the inspector + */ +static __init int init_atf_inspector(void) +{ + unsigned long ret; + + ret = atf_get_group_num(); + if (ret == CPUINSPECT_NOT_SUPPORTED) { + pr_info("BIOS does not support CPU inspect.\nFailed to register inspector %s\n", + atf_inspector.name); + return -EOPNOTSUPP; + } + + atf_inspector.group_num = ret; + + return cpuinspect_register_inspector(&atf_inspector); +} + +static __exit void exit_atf_inspector(void) +{ + cpuinspect_unregister_inspector(&atf_inspector); +} + +MODULE_LICENSE("GPL"); +module_init(init_atf_inspector); +module_exit(exit_atf_inspector); From 2f8eb2174f6213fc6505ce7b2e7a435f228fd08a Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Fri, 26 Jan 2024 10:39:43 +0800 Subject: [PATCH 78/95] blk-mq: avoid housekeeping CPUs scheduling a worker on a non-housekeeping CPU commit 5f20541757730418065c3ae77dfdbcde3f523394 openEuler When NOHZ_FULL is enabled, such as in HPC situation, CPUs are divided into housekeeping CPUs and non-housekeeping CPUs. Non-housekeeping CPUs are NOHZ_FULL CPUs and are often monopolized by the userspace process, such HPC application process. Any sort of interruption is not expected. blk_mq_hctx_next_cpu() selects each cpu in 'hctx->cpumask' alternately to schedule the work thread blk_mq_run_work_fn(). When 'hctx->cpumask' contains housekeeping CPU and non-housekeeping CPU at the same time, a housekeeping CPU, which want to request a IO, may schedule a worker on a non-housekeeping CPU. This may affect the performance of the userspace application running on non-housekeeping CPUs. So let's just schedule the worker thread on the current CPU when the current CPU is housekeeping CPU. Signed-off-by: Xiongfeng Wang Signed-off-by: huwentao Signed-off-by: WangYuli --- block/blk-mq.c | 11 ++++++++++- include/linux/sched/isolation.h | 3 +++ kernel/sched/isolation.c | 8 ++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 4c1c5a261d0cf..8e756a17311d9 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -2259,9 +2260,17 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx) */ void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) { + int work_cpu; + if (unlikely(blk_mq_hctx_stopped(hctx))) return; - kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work, + + if (enhanced_isolcpus && tick_nohz_full_enabled() && + housekeeping_cpu(raw_smp_processor_id(), HK_TYPE_WQ)) + work_cpu = raw_smp_processor_id(); + else + work_cpu = blk_mq_hctx_next_cpu(hctx); + kblockd_mod_delayed_work_on(work_cpu, &hctx->run_work, msecs_to_jiffies(msecs)); } EXPORT_SYMBOL(blk_mq_delay_run_hw_queue); diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index fe1a46f30d240..3894e74e8dc51 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -19,6 +19,7 @@ enum hk_type { }; #ifdef CONFIG_CPU_ISOLATION +extern bool enhanced_isolcpus; DECLARE_STATIC_KEY_FALSE(housekeeping_overridden); extern int housekeeping_any_cpu(enum hk_type type); extern const struct cpumask *housekeeping_cpumask(enum hk_type type); @@ -29,6 +30,8 @@ extern void __init housekeeping_init(void); #else +#define enhanced_isolcpus 0 + static inline int housekeeping_any_cpu(enum hk_type type) { return smp_processor_id(); diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c index 82e2f7fc7c267..1179dc21873ed 100644 --- a/kernel/sched/isolation.c +++ b/kernel/sched/isolation.c @@ -244,3 +244,11 @@ static int __init housekeeping_isolcpus_setup(char *str) return housekeeping_setup(str, flags); } __setup("isolcpus=", housekeeping_isolcpus_setup); + +bool enhanced_isolcpus; +static int __init enhanced_isolcpus_setup(char *str) +{ + enhanced_isolcpus = true; + return 0; +} +__setup("enhanced_isolcpus", enhanced_isolcpus_setup); From 3aef22c3ca1274dc494dc0c141e6514d9a2288fb Mon Sep 17 00:00:00 2001 From: Wei Li Date: Tue, 26 Mar 2024 17:15:38 +0800 Subject: [PATCH 79/95] locking/qspinlock: Add CNA support for ARM64 without pvspinlock commit 2dfbfabfced809d6e9672a50cc9edd3435477df0 openEuler The CNA spinlock is enabled base on 'pv_ops' of pvspinlock, and is only supported on x86_64 now, add support for arm64 without pvspinlock. Signed-off-by: Wei Li Signed-off-by: huwentao Signed-off-by: WangYuli --- arch/arm64/Kconfig | 15 ++++++++++++ arch/arm64/include/asm/Kbuild | 1 - arch/arm64/include/asm/qspinlock.h | 38 ++++++++++++++++++++++++++++++ arch/x86/kernel/alternative.c | 4 ---- init/main.c | 4 +++- kernel/locking/qspinlock.c | 2 +- kernel/locking/qspinlock_cna.h | 12 ++++++++-- 7 files changed, 67 insertions(+), 9 deletions(-) create mode 100644 arch/arm64/include/asm/qspinlock.h diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 2e3950f8522a0..fd2b38037d295 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1513,6 +1513,21 @@ config NODES_SHIFT Specify the maximum number of NUMA Nodes available on the target system. Increases memory reserved to accommodate various tables. +config NUMA_AWARE_SPINLOCKS + bool "Numa-aware spinlocks" + depends on NUMA + depends on QUEUED_SPINLOCKS + default n + help + Introduce NUMA (Non Uniform Memory Access) awareness into + the slow path of spinlocks. + + In this variant of qspinlock, the kernel will try to keep the lock + on the same node, thus reducing the number of remote cache misses, + while trading some of the short term fairness for better performance. + + Say N if you want absolute first come first serve fairness. + source "kernel/Kconfig.hz" config ARCH_SPARSEMEM_ENABLE diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 5c8ee5a541d20..d16ee80953663 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -2,7 +2,6 @@ generic-y += early_ioremap.h generic-y += mcs_spinlock.h generic-y += qrwlock.h -generic-y += qspinlock.h generic-y += parport.h generic-y += user.h diff --git a/arch/arm64/include/asm/qspinlock.h b/arch/arm64/include/asm/qspinlock.h new file mode 100644 index 0000000000000..8cc7d00b8c67b --- /dev/null +++ b/arch/arm64/include/asm/qspinlock.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_ARM64_QSPINLOCK_H +#define _ASM_ARM64_QSPINLOCK_H + +#ifdef CONFIG_NUMA_AWARE_SPINLOCKS +#include + +extern void cna_configure_spin_lock_slowpath(void); + +extern void (*cna_queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val); +extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); + +#define queued_spin_unlock queued_spin_unlock +/** + * queued_spin_unlock - release a queued spinlock + * @lock : Pointer to queued spinlock structure + * + * A smp_store_release() on the least-significant byte. + */ +static inline void native_queued_spin_unlock(struct qspinlock *lock) +{ + smp_store_release(&lock->locked, 0); +} + +static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +{ + cna_queued_spin_lock_slowpath(lock, val); +} + +static inline void queued_spin_unlock(struct qspinlock *lock) +{ + native_queued_spin_unlock(lock); +} +#endif + +#include + +#endif /* _ASM_ARM64_QSPINLOCK_H */ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 09fdc4ff5970f..18ce2b759dbb8 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1812,10 +1812,6 @@ void __init alternative_instructions(void) * the optimization of the custom code with a function call again. */ paravirt_set_cap(); - -#if defined(CONFIG_NUMA_AWARE_SPINLOCKS) - cna_configure_spin_lock_slowpath(); -#endif /* Keep CET-IBT disabled until caller/callee are patched */ ibt = ibt_save(/*disable*/ true); diff --git a/init/main.c b/init/main.c index c98ca410a37b0..acad549b0c7c4 100644 --- a/init/main.c +++ b/init/main.c @@ -1025,7 +1025,9 @@ void start_kernel(void) panic_param); lockdep_init(); - +#if defined(CONFIG_NUMA_AWARE_SPINLOCKS) + cna_configure_spin_lock_slowpath(); +#endif /* * Need to run this when irqs are enabled, because it wants * to self-test [hard/soft]-irqs on/off lock inversion bugs diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index d3f99060b60f1..c1818fec34f2a 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -289,7 +289,7 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock, #define pv_kick_node __pv_kick_node #define pv_wait_head_or_lock __pv_wait_head_or_lock -#ifdef CONFIG_PARAVIRT_SPINLOCKS +#if defined(CONFIG_PARAVIRT_SPINLOCKS) || defined(CONFIG_NUMA_AWARE_SPINLOCKS) #define queued_spin_lock_slowpath native_queued_spin_lock_slowpath #endif diff --git a/kernel/locking/qspinlock_cna.h b/kernel/locking/qspinlock_cna.h index 2d834bc8d7dd1..a7ed231e32eb6 100644 --- a/kernel/locking/qspinlock_cna.h +++ b/kernel/locking/qspinlock_cna.h @@ -376,6 +376,14 @@ static inline void cna_lock_handoff(struct mcs_spinlock *node, arch_mcs_lock_handoff(&next->locked, val); } +#ifdef CONFIG_PARAVIRT_SPINLOCKS +#define cna_queued_spin_lock_slowpath pv_ops.lock.queued_spin_lock_slowpath +#else +void (*cna_queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val) = + native_queued_spin_lock_slowpath; +EXPORT_SYMBOL(cna_queued_spin_lock_slowpath); +#endif + /* * Constant (boot-param configurable) flag selecting the NUMA-aware variant * of spinlock. Possible values: -1 (off) / 0 (auto, default) / 1 (on). @@ -413,13 +421,13 @@ void __init cna_configure_spin_lock_slowpath(void) return; if (numa_spinlock_flag == 0 && (nr_node_ids < 2 || - pv_ops.lock.queued_spin_lock_slowpath != + cna_queued_spin_lock_slowpath != native_queued_spin_lock_slowpath)) return; cna_init_nodes(); - pv_ops.lock.queued_spin_lock_slowpath = __cna_queued_spin_lock_slowpath; + cna_queued_spin_lock_slowpath = __cna_queued_spin_lock_slowpath; pr_info("Enabling CNA spinlock\n"); } From 6cfd43b560eae5999c8bd638613031fe8f30636f Mon Sep 17 00:00:00 2001 From: Lifeng Zheng Date: Fri, 13 Jun 2025 15:27:51 +0800 Subject: [PATCH 80/95] arm64: topology: Setup amu fie when cpu hotplugging commit 7b342952fd52859d1bd2ed4b97f7e479d7ec54bf openEuler Amu fie was set up by a cpufreq policy notifier after the policy was created. This caused some problems: 1. The cpus related to the same policy would all fail to set up amu fie if one of them couldn't pass the freq_counters_valid() check. 2. The cpus fail to set up amu fie would never have a chance to set up again. When boot with maxcpu=1 restrict, the support amu flags of the offline cpus would never be setup. After that, when cpufreq policy was being created, the online cpu might set up amu fie fail because the other cpus related to the same policy couldn't pass the freq_counters_valid() check. Hotplug the offline cpus, since the policy was already created, amu_fie_setup() would never be called again. All cpus couldn't setup amu fie in this situation. After commit 1f59a0e83600 ("arm64/amu: Use capacity_ref_freq() to set AMU ratio"), the max_freq stores in policy data is never needed when setting up amu fie. This indicates that the setting up of amu fie does not depend on the policy any more. So each cpu can set up amu fie separately during hotplug and the problems above will be solved. Fixes: 1eb5dde674f5 ("cpufreq: CPPC: Add support for frequency invariance") Signed-off-by: Lifeng Zheng Signed-off-by: Hongye Lin Signed-off-by: WangYuli --- arch/arm64/kernel/topology.c | 53 +++++++++++------------------------- 1 file changed, 16 insertions(+), 37 deletions(-) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index dc92ac21f08b1..6d0d54de73492 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -212,52 +211,28 @@ static struct scale_freq_data amu_sfd = { .set_freq_scale = amu_scale_freq_tick, }; -static void amu_fie_setup(const struct cpumask *cpus) +static void amu_fie_setup(unsigned int cpu) { - int cpu; - - /* We are already set since the last insmod of cpufreq driver */ - if (unlikely(cpumask_subset(cpus, amu_fie_cpus))) + if (cpumask_test_cpu(cpu, amu_fie_cpus)) return; - for_each_cpu(cpu, cpus) { - if (!freq_counters_valid(cpu)) - return; - } + if (!freq_counters_valid(cpu)) + return; - cpumask_or(amu_fie_cpus, amu_fie_cpus, cpus); + cpumask_set_cpu(cpu, amu_fie_cpus); topology_set_scale_freq_source(&amu_sfd, amu_fie_cpus); - pr_debug("CPUs[%*pbl]: counters will be used for FIE.", - cpumask_pr_args(cpus)); + pr_debug("CPUs[%u]: counters will be used for FIE.", cpu); } -static int init_amu_fie_callback(struct notifier_block *nb, unsigned long val, - void *data) +static int cpuhp_topology_online(unsigned int cpu) { - struct cpufreq_policy *policy = data; - - if (val == CPUFREQ_CREATE_POLICY) - amu_fie_setup(policy->related_cpus); - - /* - * We don't need to handle CPUFREQ_REMOVE_POLICY event as the AMU - * counters don't have any dependency on cpufreq driver once we have - * initialized AMU support and enabled invariance. The AMU counters will - * keep on working just fine in the absence of the cpufreq driver, and - * for the CPUs for which there are no counters available, the last set - * value of arch_freq_scale will remain valid as that is the frequency - * those CPUs are running at. - */ + amu_fie_setup(cpu); return 0; } -static struct notifier_block init_amu_fie_notifier = { - .notifier_call = init_amu_fie_callback, -}; - static int __init init_amu_fie(void) { int ret; @@ -265,12 +240,16 @@ static int __init init_amu_fie(void) if (!zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL)) return -ENOMEM; - ret = cpufreq_register_notifier(&init_amu_fie_notifier, - CPUFREQ_POLICY_NOTIFIER); - if (ret) + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "arm64/topology:online", + cpuhp_topology_online, + NULL); + if (ret < 0) { free_cpumask_var(amu_fie_cpus); + return ret; + } - return ret; + return 0; } core_initcall(init_amu_fie); From 2bc92084c9f9dd10cfd939f31db9ff9459292864 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Sat, 2 Nov 2024 18:42:31 +0800 Subject: [PATCH 81/95] arm64/sysreg: Update ID_AA64MMFR1_EL1 register [Upstream commit aa47dcda2708e571695dae2e3f9537d9a8eb804c] Update ID_AA64MMFR1_EL1 register fields definition per DDI0601 (ID092424) 2024-09. ID_AA64MMFR1_EL1.ETS adds definition for FEAT_ETS2 and FEAT_ETS3. ID_AA64MMFR1_EL1.HAFDBS adds definition for FEAT_HAFT and FEAT_HDBSS. Reviewed-by: Mark Brown Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20241102104235.62560-2-yangyicong@huawei.com Signed-off-by: Catalin Marinas Signed-off-by: huwentao Signed-off-by: WangYuli --- arch/arm64/tools/sysreg | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index c97275a6adbc3..961bef1c0959f 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -1618,6 +1618,8 @@ EndEnum UnsignedEnum 39:36 ETS 0b0000 NI 0b0001 IMP + 0b0010 ETS2 + 0b0011 ETS3 EndEnum UnsignedEnum 35:32 TWED 0b0000 NI @@ -1658,6 +1660,8 @@ UnsignedEnum 3:0 HAFDBS 0b0000 NI 0b0001 AF 0b0010 DBM + 0b0011 HAFT + 0b0100 HDBSS EndEnum EndSysreg From f2a7a84f2b32efe24193a98f325e0ab5efb7b011 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Sat, 2 Nov 2024 18:42:32 +0800 Subject: [PATCH 82/95] arm64: setup: name 'tcr2' register [Upstream commit 926b66e2ebc8c055b9fea3fb3e5f5b67c80e8e7a] TCR2_EL1 introduced some additional controls besides TCR_EL1. Currently only PIE is supported and enabled by writing TCR2_EL1 directly if PIE detected. Introduce a named register 'tcr2' just like 'tcr' we've already had. It'll be initialized to 0 and updated if certain feature detected and needs to be enabled. Touch the TCR2_EL1 registers at last with the updated 'tcr2' value if FEAT_TCR2 supported by checking ID_AA64MMFR3_EL1.TCRX. Then we can extend the support of other features controlled by TCR2_EL1. Reviewed-by: Catalin Marinas Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20241102104235.62560-3-yangyicong@huawei.com Signed-off-by: Catalin Marinas Signed-off-by: huwentao Signed-off-by: WangYuli --- arch/arm64/mm/proc.S | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 14fdf645edc88..44a61b1f1a1ee 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -420,10 +420,12 @@ SYM_FUNC_START(__cpu_setup) */ mair .req x17 tcr .req x16 + tcr2 .req x15 mov_q mair, MAIR_EL1_SET mov_q tcr, TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \ TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \ TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS | TCR_MTE_FLAGS + mov tcr2, xzr tcr_clear_errata_bits tcr, x9, x5 @@ -464,11 +466,16 @@ SYM_FUNC_START(__cpu_setup) mov_q x0, PIE_E1 msr REG_PIR_EL1, x0 - mov x0, TCR2_EL1x_PIE - msr REG_TCR2_EL1, x0 + orr tcr2, tcr2, TCR2_EL1x_PIE .Lskip_indirection: + mrs_s x1, SYS_ID_AA64MMFR3_EL1 + ubfx x1, x1, #ID_AA64MMFR3_EL1_TCRX_SHIFT, #4 + cbz x1, 1f + msr REG_TCR2_EL1, tcr2 +1: + /* * Prepare SCTLR */ @@ -477,4 +484,5 @@ SYM_FUNC_START(__cpu_setup) .unreq mair .unreq tcr + .unreq tcr2 SYM_FUNC_END(__cpu_setup) From 0da1bdfaff8160b4a6c0ee28d9561febb733f557 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Thu, 21 Nov 2024 10:50:42 +0800 Subject: [PATCH 83/95] arm64: Add support for FEAT_HAFT [Upstream commit efe72541355d4d40a4f076af453f6533e98e058c] Armv8.9/v9.4 introduces the feature Hardware managed Access Flag for Table descriptors (FEAT_HAFT). The feature is indicated by ID_AA64MMFR1_EL1.HAFDBS == 0b0011 and can be enabled by TCR2_EL1.HAFT so it has a dependency on FEAT_TCR2. Adds the Kconfig for FEAT_HAFT and support detecting and enabling the feature. The feature is enabled in __cpu_setup() before MMU on just like HA. A CPU capability is added to notify the user of the feature. Add definition of P{G,4,U,M}D_TABLE_AF bit and set the AF bit when creating the page table, which will save the hardware from having to update them at runtime. This will be ignored if FEAT_HAFT is not enabled. The AF bit of table descriptors cannot be managed by the software per spec, unlike the HA. So this should be used only if it's supported system wide by system_supports_haft(). Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20241102104235.62560-4-yangyicong@huawei.com Reviewed-by: Catalin Marinas [catalin.marinas@arm.com: added the ID check back to __cpu_setup in case of future CPU errata] Signed-off-by: Catalin Marinas Signed-off-by: huwentao Signed-off-by: WangYuli --- arch/arm64/Kconfig | 15 +++++++++++++++ arch/arm64/include/asm/cpufeature.h | 6 ++++++ arch/arm64/include/asm/pgalloc.h | 10 ++++++---- arch/arm64/include/asm/pgtable-hwdef.h | 3 +++ arch/arm64/kernel/cpufeature.c | 15 +++++++++++++++ arch/arm64/mm/fixmap.c | 9 ++++++--- arch/arm64/mm/mmu.c | 6 +++--- arch/arm64/mm/proc.S | 7 ++++++- arch/arm64/tools/cpucaps | 1 + 9 files changed, 61 insertions(+), 11 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index fd2b38037d295..fe324325b8874 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2176,6 +2176,21 @@ config ARM64_NMI if the cpu does not implement the feature. It will also be disabled if pseudo NMIs are enabled at runtime. +config ARM64_HAFT + bool "Support for Hardware managed Access Flag for Table Descriptors" + depends on ARM64_HW_AFDBM + default y + help + The ARMv8.9/ARMv9.5 introduces the feature Hardware managed Access + Flag for Table descriptors. When enabled an architectural executed + memory access will update the Access Flag in each Table descriptor + which is accessed during the translation table walk and for which + the Access Flag is 0. The Access Flag of the Table descriptor use + the same bit of PTE_AF. + + The feature will only be enabled if all the CPUs in the system + support this feature. If unsure, say Y. + endmenu # "ARMv8.8 architectural features" config ARM64_SVE diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 53343c6cba583..21f1ee1c2059a 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -837,6 +837,12 @@ static inline bool system_supports_tlb_range(void) cpus_have_const_cap(ARM64_HAS_TLB_RANGE); } +static inline bool system_supports_haft(void) +{ + return IS_ENABLED(CONFIG_ARM64_HAFT) && + cpus_have_final_cap(ARM64_HAFT); +} + int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt); bool try_emulate_mrs(struct pt_regs *regs, u32 isn); diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 237224484d0f6..661964e99b9d5 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -27,7 +27,7 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp) { - pudval_t pudval = PUD_TYPE_TABLE; + pudval_t pudval = PUD_TYPE_TABLE | PUD_TABLE_AF; pudval |= (mm == &init_mm) ? PUD_TABLE_UXN : PUD_TABLE_PXN; __pud_populate(pudp, __pa(pmdp), pudval); @@ -48,7 +48,7 @@ static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp) { - p4dval_t p4dval = P4D_TYPE_TABLE; + p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_AF; p4dval |= (mm == &init_mm) ? P4D_TABLE_UXN : P4D_TABLE_PXN; __p4d_populate(p4dp, __pa(pudp), p4dval); @@ -77,14 +77,16 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep) { VM_BUG_ON(mm && mm != &init_mm); - __pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE | PMD_TABLE_UXN); + __pmd_populate(pmdp, __pa(ptep), + PMD_TYPE_TABLE | PMD_TABLE_AF | PMD_TABLE_UXN); } static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t ptep) { VM_BUG_ON(mm == &init_mm); - __pmd_populate(pmdp, page_to_phys(ptep), PMD_TYPE_TABLE | PMD_TABLE_PXN); + __pmd_populate(pmdp, page_to_phys(ptep), + PMD_TYPE_TABLE | PMD_TABLE_AF | PMD_TABLE_PXN); } #endif diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index e4944d517c999..f736a42221909 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -94,6 +94,7 @@ #define P4D_TYPE_MASK (_AT(p4dval_t, 3) << 0) #define P4D_TYPE_SECT (_AT(p4dval_t, 1) << 0) #define P4D_SECT_RDONLY (_AT(p4dval_t, 1) << 7) /* AP[2] */ +#define P4D_TABLE_AF (_AT(p4dval_t, 1) << 10) /* Ignored if no FEAT_HAFT */ #define P4D_TABLE_PXN (_AT(p4dval_t, 1) << 59) #define P4D_TABLE_UXN (_AT(p4dval_t, 1) << 60) @@ -105,6 +106,7 @@ #define PUD_TYPE_MASK (_AT(pudval_t, 3) << 0) #define PUD_TYPE_SECT (_AT(pudval_t, 1) << 0) #define PUD_SECT_RDONLY (_AT(pudval_t, 1) << 7) /* AP[2] */ +#define PUD_TABLE_AF (_AT(pudval_t, 1) << 10) /* Ignored if no FEAT_HAFT */ #define PUD_TABLE_PXN (_AT(pudval_t, 1) << 59) #define PUD_TABLE_UXN (_AT(pudval_t, 1) << 60) @@ -115,6 +117,7 @@ #define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0) #define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0) #define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1) +#define PMD_TABLE_AF (_AT(pmdval_t, 1) << 10) /* Ignored if no FEAT_HAFT */ /* * Section diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index f569da53f440d..170652d867232 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2587,6 +2587,21 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_enable_hw_dbm, ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HAFDBS, DBM) }, +#endif +#ifdef CONFIG_ARM64_HAFT + { + .desc = "Hardware managed Access Flag for Table Descriptors", + /* + * Contrary to the page/block access flag, the table access flag + * cannot be emulated in software (no access fault will occur). + * Therefore this should be used only if it's supported system + * wide. + */ + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .capability = ARM64_HAFT, + .matches = has_cpuid_feature, + ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HAFDBS, HAFT) + }, #endif { .desc = "CRC32 instructions", diff --git a/arch/arm64/mm/fixmap.c b/arch/arm64/mm/fixmap.c index c0a3301203bdf..84c9f2e2b1ece 100644 --- a/arch/arm64/mm/fixmap.c +++ b/arch/arm64/mm/fixmap.c @@ -44,7 +44,8 @@ static void __init early_fixmap_init_pte(pmd_t *pmdp, unsigned long addr) if (pmd_none(pmd)) { ptep = bm_pte[BM_PTE_TABLE_IDX(addr)]; - __pmd_populate(pmdp, __pa_symbol(ptep), PMD_TYPE_TABLE); + __pmd_populate(pmdp, __pa_symbol(ptep), + PMD_TYPE_TABLE | PMD_TABLE_AF); } } @@ -56,7 +57,8 @@ static void __init early_fixmap_init_pmd(pud_t *pudp, unsigned long addr, pmd_t *pmdp; if (pud_none(pud)) - __pud_populate(pudp, __pa_symbol(bm_pmd), PUD_TYPE_TABLE); + __pud_populate(pudp, __pa_symbol(bm_pmd), + PUD_TYPE_TABLE | PUD_TABLE_AF); pmdp = pmd_offset_kimg(pudp, addr); do { @@ -83,7 +85,8 @@ static void __init early_fixmap_init_pud(p4d_t *p4dp, unsigned long addr, } if (p4d_none(p4d)) - __p4d_populate(p4dp, __pa_symbol(bm_pud), P4D_TYPE_TABLE); + __p4d_populate(p4dp, __pa_symbol(bm_pud), + P4D_TYPE_TABLE | P4D_TABLE_AF); pudp = pud_offset_kimg(p4dp, addr); early_fixmap_init_pmd(pudp, addr, end); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 3a0568fe2b628..9f70744b57596 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -212,7 +212,7 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, BUG_ON(pmd_sect(pmd)); if (pmd_none(pmd)) { - pmdval_t pmdval = PMD_TYPE_TABLE | PMD_TABLE_UXN; + pmdval_t pmdval = PMD_TYPE_TABLE | PMD_TABLE_UXN | PMD_TABLE_AF; phys_addr_t pte_phys; if (flags & NO_EXEC_MAPPINGS) @@ -290,7 +290,7 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, */ BUG_ON(pud_sect(pud)); if (pud_none(pud)) { - pudval_t pudval = PUD_TYPE_TABLE | PUD_TABLE_UXN; + pudval_t pudval = PUD_TYPE_TABLE | PUD_TABLE_UXN | PUD_TABLE_AF; phys_addr_t pmd_phys; if (flags & NO_EXEC_MAPPINGS) @@ -329,7 +329,7 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, p4d_t p4d = READ_ONCE(*p4dp); if (p4d_none(p4d)) { - p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_UXN; + p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_UXN | P4D_TABLE_AF; phys_addr_t pud_phys; if (flags & NO_EXEC_MAPPINGS) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 44a61b1f1a1ee..9ef7b07349d7f 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -449,9 +449,14 @@ SYM_FUNC_START(__cpu_setup) * via capabilities. */ mrs x9, ID_AA64MMFR1_EL1 - and x9, x9, ID_AA64MMFR1_EL1_HAFDBS_MASK + ubfx x9, x9, ID_AA64MMFR1_EL1_HAFDBS_SHIFT, #4 cbz x9, 1f orr tcr, tcr, #TCR_HA // hardware Access flag update +#ifdef CONFIG_ARM64_HAFT + cmp x9, ID_AA64MMFR1_EL1_HAFDBS_HAFT + b.lt 1f + orr tcr2, tcr2, TCR2_EL1x_HAFT +#endif /* CONFIG_ARM64_HAFT */ 1: #endif /* CONFIG_ARM64_HW_AFDBM */ msr mair_el1, mair diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 686b95b15ccea..62a38a364c412 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -53,6 +53,7 @@ HAS_TIDCP1 HAS_TLB_RANGE HAS_VIRT_HOST_EXTN HAS_WFXT +HAFT HW_DBM KVM_HVHE KVM_PROTECTED_MODE From 6e24b875ae20a40452ca18275b1704a8220df34a Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Sat, 2 Nov 2024 18:42:34 +0800 Subject: [PATCH 84/95] arm64: Enable ARCH_HAS_NONLEAF_PMD_YOUNG [Upstream commit 62df5870ebf7cec96a51c9b9008daf167e22db14] With the support of FEAT_HAFT, the NONLEAF_PMD_YOUNG can be enabled on arm64 since the hardware is capable of updating the AF flag for PMD table descriptor. Since the AF bit of the table descriptor shares the same bit position in block descriptors, we only need to implement arch_has_hw_nonleaf_pmd_young() and select related configs. The related pmd_young test/update operations keeps the same with and already implemented for transparent page support. Currently ARCH_HAS_NONLEAF_PMD_YOUNG is used to improve the efficiency of lru-gen aging. Signed-off-by: Yicong Yang Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20241102104235.62560-5-yangyicong@huawei.com Signed-off-by: Catalin Marinas Signed-off-by: huwentao Signed-off-by: WangYuli --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/pgtable.h | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index fe324325b8874..eace039f470bb 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -35,6 +35,7 @@ config ARM64 select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE + select ARCH_HAS_NONLEAF_PMD_YOUNG if ARM64_HAF select ARCH_HAS_PTE_DEVMAP select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SETUP_DMA_OPS diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 0212129b13d07..cbac6426ffc5b 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -930,7 +930,7 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, return young; } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long address, @@ -938,7 +938,7 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, { return ptep_test_and_clear_young(vma, address, (pte_t *)pmdp); } -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, @@ -1096,6 +1096,10 @@ static inline void update_mmu_cache_range(struct vm_fault *vmf, */ #define arch_has_hw_pte_young cpu_has_hw_af +#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG +#define arch_has_hw_nonleaf_pmd_young system_supports_haft +#endif + /* * Experimentally, it's cheap to set the access flag in hardware and we * benefit from prefaulting mappings as 'old' to start with. From 04c92bfcda5daf9ae5f8ff631a9858c50616933b Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Sat, 2 Nov 2024 18:42:35 +0800 Subject: [PATCH 85/95] arm64: pgtable: Warn unexpected pmdp_test_and_clear_young() [Upstream commit b349a5a2b6e236b25095c6ff886b3451de5ea041] Young bit operation on PMD table entry is only supported if FEAT_HAFT enabled system wide. Add a warning for notifying the misbehaviour. Signed-off-by: Yicong Yang Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20241102104235.62560-6-yangyicong@huawei.com Signed-off-by: Catalin Marinas Signed-off-by: huwentao Signed-off-by: WangYuli --- arch/arm64/include/asm/pgtable.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index cbac6426ffc5b..bfd089a21adb3 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -936,6 +936,8 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { + /* Operation applies to PMD table entry only if FEAT_HAFT is enabled */ + VM_WARN_ON(pmd_table(READ_ONCE(*pmdp)) && !system_supports_haft()); return ptep_test_and_clear_young(vma, address, (pte_t *)pmdp); } #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */ From 65c46f5a37aebe8a982e66b1b10bf6084cb0f7c0 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 13 May 2025 21:22:06 +0800 Subject: [PATCH 86/95] arm64: Support AT_HWCAP3 [Upstream commit ddadbcdaaed5c3c44cc6c36093f6bf02d942d71d] We have filled all 64 bits of AT_HWCAP2 so in order to support discovery of further features provide the framework to use the already defined AT_HWCAP3 for further CPU features. Signed-off-by: Mark Brown Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20241004-arm64-elf-hwcap3-v2-2-799d1daad8b0@kernel.org Signed-off-by: Catalin Marinas [conflicts with e218c611e32c8d4315a6d64790b9a86f3d4fa4dc] Signed-off-by: Hongye Lin Signed-off-by: WangYuli --- Documentation/arch/arm64/elf_hwcaps.rst | 6 +++--- arch/arm64/include/asm/cpufeature.h | 3 ++- arch/arm64/include/asm/hwcap.h | 5 ++++- arch/arm64/include/uapi/asm/hwcap.h | 4 ++++ arch/arm64/kernel/cpufeature.c | 6 ++++++ 5 files changed, 19 insertions(+), 5 deletions(-) diff --git a/Documentation/arch/arm64/elf_hwcaps.rst b/Documentation/arch/arm64/elf_hwcaps.rst index f88a24d621dd4..d1c07278913df 100644 --- a/Documentation/arch/arm64/elf_hwcaps.rst +++ b/Documentation/arch/arm64/elf_hwcaps.rst @@ -16,9 +16,9 @@ architected discovery mechanism available to userspace code at EL0. The kernel exposes the presence of these features to userspace through a set of flags called hwcaps, exposed in the auxiliary vector. -Userspace software can test for features by acquiring the AT_HWCAP or -AT_HWCAP2 entry of the auxiliary vector, and testing whether the relevant -flags are set, e.g.:: +Userspace software can test for features by acquiring the AT_HWCAP, +AT_HWCAP2 or AT_HWCAP3 entry of the auxiliary vector, and testing +whether the relevant flags are set, e.g.:: bool floating_point_is_present(void) { diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 21f1ee1c2059a..3eba6e1855b28 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -12,7 +12,7 @@ #include #include -#define MAX_CPU_FEATURES 128 +#define MAX_CPU_FEATURES 192 #define cpu_feature(x) KERNEL_HWCAP_ ## x #define ARM64_SW_FEATURE_OVERRIDE_NOKASLR 0 @@ -434,6 +434,7 @@ void cpu_set_feature(unsigned int num); bool cpu_have_feature(unsigned int num); unsigned long cpu_get_elf_hwcap(void); unsigned long cpu_get_elf_hwcap2(void); +unsigned long cpu_get_elf_hwcap3(void); #define cpu_set_named_feature(name) cpu_set_feature(cpu_feature(name)) #define cpu_have_named_feature(name) cpu_have_feature(cpu_feature(name)) diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 521267478d187..6f044e5873a98 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -140,17 +140,20 @@ #define KERNEL_HWCAP_MOPS __khwcap2_feature(MOPS) #define KERNEL_HWCAP_HBC __khwcap2_feature(HBC) +#define __khwcap3_feature(x) (const_ilog2(HWCAP3_ ## x) + 128) + /* * This yields a mask that user programs can use to figure out what * instruction set this cpu supports. */ #define ELF_HWCAP cpu_get_elf_hwcap() #define ELF_HWCAP2 cpu_get_elf_hwcap2() +#define ELF_HWCAP3 cpu_get_elf_hwcap3() #ifdef CONFIG_COMPAT #define COMPAT_ELF_HWCAP (compat_elf_hwcap) #define COMPAT_ELF_HWCAP2 (compat_elf_hwcap2) -extern unsigned int compat_elf_hwcap, compat_elf_hwcap2; +extern unsigned int compat_elf_hwcap, compat_elf_hwcap2, compat_elf_hwcap3; #endif enum { diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 53026f45a5092..a86fca74fdcf8 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -105,4 +105,8 @@ #define HWCAP2_MOPS (1UL << 43) #define HWCAP2_HBC (1UL << 44) +/* + * HWCAP3 flags - for AT_HWCAP3 + */ + #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 170652d867232..f798ebc06ac3b 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -104,6 +104,7 @@ static DECLARE_BITMAP(elf_hwcap, MAX_CPU_FEATURES) __read_mostly; COMPAT_HWCAP_LPAE) unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; unsigned int compat_elf_hwcap2 __read_mostly; +unsigned int compat_elf_hwcap3 __read_mostly; #endif DECLARE_BITMAP(system_cpucaps, ARM64_NCAPS); @@ -3508,6 +3509,11 @@ unsigned long cpu_get_elf_hwcap2(void) return elf_hwcap[1]; } +unsigned long cpu_get_elf_hwcap3(void) +{ + return elf_hwcap[2]; +} + static void __init setup_system_capabilities(void) { /* From b9ea2e980853be93beca2fa8aa658b334c2ed23b Mon Sep 17 00:00:00 2001 From: Peter Bergner Date: Tue, 13 May 2025 21:22:07 +0800 Subject: [PATCH 87/95] uapi/auxvec: Define AT_HWCAP3 and AT_HWCAP4 aux vector, entries [Upstream commit 3281366a8e79a512956382885091565db1036b64] Changes from v1: - Add Acked-by lines. The powerpc toolchain keeps a copy of the HWCAP bit masks in our TCB for fast access by the __builtin_cpu_supports built-in function. The TCB space for the HWCAP entries - which are created in pairs - is an ABI extension, so waiting to create the space for HWCAP3 and HWCAP4 until we need them is problematical. Define AT_HWCAP3 and AT_HWCAP4 in the generic uapi header so they can be used in glibc to reserve space in the powerpc TCB for their future use. I scanned through the Linux and GLIBC source codes looking for unused AT_* values and 29 and 30 did not seem to be used, so they are what I went with. This has received Acked-by's from both GLIBC and Linux kernel developers and no reservations or Nacks from anyone. Arnd, we seem to have consensus on the patch below. Is this something you could take and apply to your tree? Peter Signed-off-by: Peter Bergner Acked-by: Adhemerval Zanella Acked-by: Nicholas Piggin Acked-by: Szabolcs Nagy Acked-by: Arnd Bergmann Commit: Michael Ellerman Signed-off-by: Michael Ellerman Link: https://msgid.link/a406b535-dc55-4856-8ae9-5a063644a1af@linux.ibm.com Signed-off-by: Hongye Lin Signed-off-by: WangYuli --- include/uapi/linux/auxvec.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/uapi/linux/auxvec.h b/include/uapi/linux/auxvec.h index 6991c4b8ab189..cc61cb9b3e9af 100644 --- a/include/uapi/linux/auxvec.h +++ b/include/uapi/linux/auxvec.h @@ -32,6 +32,8 @@ #define AT_HWCAP2 26 /* extension of AT_HWCAP */ #define AT_RSEQ_FEATURE_SIZE 27 /* rseq supported feature size */ #define AT_RSEQ_ALIGN 28 /* rseq allocation alignment */ +#define AT_HWCAP3 29 /* extension of AT_HWCAP */ +#define AT_HWCAP4 30 /* extension of AT_HWCAP */ #define AT_EXECFN 31 /* filename of program */ From 68c4bb084dfc0e3a5930aa3d19031accdbc5bd5f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 13 May 2025 21:22:08 +0800 Subject: [PATCH 88/95] binfmt_elf: Wire up AT_HWCAP3 at AT_HWCAP4 [Upstream commit 4e6e8c2b757f382684abc4765202cd25c221dea1] AT_HWCAP3 and AT_HWCAP4 were recently defined for use on PowerPC in commit 3281366a8e79 ("uapi/auxvec: Define AT_HWCAP3 and AT_HWCAP4 aux vector, entries"). Since we want to start using AT_HWCAP3 on arm64 add support for exposing both these new hwcaps via binfmt_elf. Signed-off-by: Mark Brown Acked-by: Kees Cook Reviewed-by: Anshuman Khandual Signed-off-by: Hongye Lin Signed-off-by: WangYuli --- fs/binfmt_elf.c | 6 ++++++ fs/binfmt_elf_fdpic.c | 6 ++++++ fs/compat_binfmt_elf.c | 10 ++++++++++ 3 files changed, 22 insertions(+) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 3ff7d2e47c7e9..5dbebfeda70bd 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -257,6 +257,12 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec, NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes); #ifdef ELF_HWCAP2 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2); +#endif +#ifdef ELF_HWCAP3 + NEW_AUX_ENT(AT_HWCAP3, ELF_HWCAP3); +#endif +#ifdef ELF_HWCAP4 + NEW_AUX_ENT(AT_HWCAP4, ELF_HWCAP4); #endif NEW_AUX_ENT(AT_EXECFN, bprm->exec); if (k_platform) { diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 96a8b13b57d96..7419c3a41db97 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -642,6 +642,12 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP); #ifdef ELF_HWCAP2 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2); +#endif +#ifdef ELF_HWCAP3 + NEW_AUX_ENT(AT_HWCAP3, ELF_HWCAP3); +#endif +#ifdef ELF_HWCAP4 + NEW_AUX_ENT(AT_HWCAP4, ELF_HWCAP4); #endif NEW_AUX_ENT(AT_PAGESZ, PAGE_SIZE); NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC); diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c index 8f0af4f626316..d5ef5469e4e62 100644 --- a/fs/compat_binfmt_elf.c +++ b/fs/compat_binfmt_elf.c @@ -80,6 +80,16 @@ #define ELF_HWCAP2 COMPAT_ELF_HWCAP2 #endif +#ifdef COMPAT_ELF_HWCAP3 +#undef ELF_HWCAP3 +#define ELF_HWCAP3 COMPAT_ELF_HWCAP3 +#endif + +#ifdef COMPAT_ELF_HWCAP4 +#undef ELF_HWCAP4 +#define ELF_HWCAP4 COMPAT_ELF_HWCAP4 +#endif + #ifdef COMPAT_ARCH_DLINFO #undef ARCH_DLINFO #define ARCH_DLINFO COMPAT_ARCH_DLINFO From 6b63a6e8d0be6a4b949a411253c307638509d836 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 13 May 2025 21:22:09 +0800 Subject: [PATCH 89/95] arm64: Provide basic EL2 setup for FEAT_{LS64, LS64_V} usage at EL0/1 commit 077e22a3c65f623d6c2172a952783dd9898d0f98 openEuler Instructions introduced by FEAT_{LS64, LS64_V} is controlled by HCRX_EL2.{EnALS, EnASR}. Configure all of these to allow usage at EL0/1. This doesn't mean these instructions are always available in EL0/1 if provided. The hypervisor still have the control at runtime. Signed-off-by: Yicong Yang Signed-off-by: Hongye Lin Signed-off-by: WangYuli --- arch/arm64/include/asm/el2_setup.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 8da13fcce6f9c..c653ef8284306 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -27,6 +27,17 @@ ubfx x0, x0, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4 cbz x0, .Lskip_hcrx_\@ mov_q x0, HCRX_HOST_FLAGS + + /* Enable LS64, LS64_V if supported */ + mrs_s x1, SYS_ID_AA64ISAR1_EL1 + ubfx x1, x1, #ID_AA64ISAR1_EL1_LS64_SHIFT, #4 + cbz x1, .Lset_hcrx_\@ + orr x0, x0, #HCRX_EL2_EnALS + cmp x1, #ID_AA64ISAR1_EL1_LS64_LS64_V + b.lt .Lset_hcrx_\@ + orr x0, x0, #HCRX_EL2_EnASR + +.Lset_hcrx_\@ : msr_s SYS_HCRX_EL2, x0 .Lskip_hcrx_\@: .endm From ed4511977db91dc6eebded5f8bc22be135ab3e26 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 13 May 2025 21:22:10 +0800 Subject: [PATCH 90/95] arm64: Add support for FEAT_{LS64, LS64_V} commit 98809b6ac799cdab9f7f09452afec42fe86664af openEuler Armv8.7 introduces single-copy atomic 64-byte loads and stores instructions and its variants named under FEAT_{LS64, LS64_V}. These features are identified by ID_AA64ISAR1_EL1.LS64 and the use of such instructions in userspace (EL0) can be trapped. In order to support the use of corresponding instructions in userspace: - Make ID_AA64ISAR1_EL1.LS64 visbile to userspace - Add identifying and enabling in the cpufeature list - Expose these support of these features to userspace through HWCAP3 and cpuinfo Signed-off-by: Yicong Yang Signed-off-by: Hongye Lin Signed-off-by: WangYuli --- Documentation/arch/arm64/booting.rst | 12 ++++++ Documentation/arch/arm64/elf_hwcaps.rst | 6 +++ arch/arm64/include/asm/hwcap.h | 2 + arch/arm64/include/uapi/asm/hwcap.h | 2 + arch/arm64/kernel/cpufeature.c | 51 +++++++++++++++++++++++++ arch/arm64/kernel/cpuinfo.c | 2 + arch/arm64/tools/cpucaps | 2 + 7 files changed, 77 insertions(+) diff --git a/Documentation/arch/arm64/booting.rst b/Documentation/arch/arm64/booting.rst index 408d2e27b6418..b540e0933ddeb 100644 --- a/Documentation/arch/arm64/booting.rst +++ b/Documentation/arch/arm64/booting.rst @@ -438,6 +438,18 @@ Before jumping into the kernel, the following conditions must be met: - HCRX_EL2.TALLINT must be initialised to 0b0. + For CPUs support for 64-byte loads and stores without status (FEAT_LS64): + + - If the kernel is entered at EL1 and EL2 is present: + + - HCRX_EL2.EnALS (bit 1) must be initialised to 0b1. + + For CPUs support for 64-byte loads and stores with status (FEAT_LS64_V): + + - If the kernel is entered at EL1 and EL2 is present: + + - HCRX_EL2.EnASR (bit 2) must be initialised to 0b1. + The requirements described above for CPU mode, caches, MMUs, architected timers, coherency and system registers apply to all CPUs. All CPUs must enter the kernel in the same exception level. Where the values documented diff --git a/Documentation/arch/arm64/elf_hwcaps.rst b/Documentation/arch/arm64/elf_hwcaps.rst index d1c07278913df..c6e988a07c54f 100644 --- a/Documentation/arch/arm64/elf_hwcaps.rst +++ b/Documentation/arch/arm64/elf_hwcaps.rst @@ -320,6 +320,12 @@ HWCAP2_MOPS HWCAP2_HBC Functionality implied by ID_AA64ISAR2_EL1.BC == 0b0001. +HWCAP3_LS64 + Functionality implied by ID_AA64ISAR1_EL1.LS64 == 0b0001. + +HWCAP3_LS64_V + Functionality implied by ID_AA64ISAR1_EL1.LS64 == 0b0010. + 4. Unused AT_HWCAP bits ----------------------- diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 6f044e5873a98..14de5d7ecc9ca 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -141,6 +141,8 @@ #define KERNEL_HWCAP_HBC __khwcap2_feature(HBC) #define __khwcap3_feature(x) (const_ilog2(HWCAP3_ ## x) + 128) +#define KERNEL_HWCAP_LS64 __khwcap3_feature(LS64) +#define KERNEL_HWCAP_LS64_V __khwcap3_feature(LS64_V) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index a86fca74fdcf8..4cabccfd7d842 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -108,5 +108,7 @@ /* * HWCAP3 flags - for AT_HWCAP3 */ +#define HWCAP3_LS64 (1UL << 0) +#define HWCAP3_LS64_V (1UL << 1) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index f798ebc06ac3b..10b784c976f29 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -200,6 +200,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_LS64_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_I8MM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_DGH_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_BF16_SHIFT, 4, 0), @@ -2160,6 +2161,38 @@ static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap) static bool enable_pseudo_nmi; #endif +static bool has_ls64(const struct arm64_cpu_capabilities *entry, int __unused) +{ + u64 ls64; + + ls64 = cpuid_feature_extract_field(__read_sysreg_by_encoding(entry->sys_reg), + entry->field_pos, entry->sign); + + if (ls64 == ID_AA64ISAR1_EL1_LS64_NI || + ls64 > ID_AA64ISAR1_EL1_LS64_LS64_ACCDATA) + return false; + + if (entry->capability == ARM64_HAS_LS64 && + ls64 >= ID_AA64ISAR1_EL1_LS64_LS64) + return true; + + if (entry->capability == ARM64_HAS_LS64_V && + ls64 >= ID_AA64ISAR1_EL1_LS64_LS64_V) + return true; + + return false; +} + +static void cpu_enable_ls64(struct arm64_cpu_capabilities const *cap) +{ + sysreg_clear_set(sctlr_el1, SCTLR_EL1_EnALS, SCTLR_EL1_EnALS); +} + +static void cpu_enable_ls64_v(struct arm64_cpu_capabilities const *cap) +{ + sysreg_clear_set(sctlr_el1, SCTLR_EL1_EnASR, SCTLR_EL1_EnASR); +} + #ifdef CONFIG_ARM64_PSEUDO_NMI static int __init early_enable_pseudo_nmi(char *p) { @@ -2879,6 +2912,22 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = nmi_enable, }, #endif + { + .desc = "LS64", + .capability = ARM64_HAS_LS64, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_ls64, + .cpu_enable = cpu_enable_ls64, + ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, LS64, LS64) + }, + { + .desc = "LS64_V", + .capability = ARM64_HAS_LS64_V, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_ls64, + .cpu_enable = cpu_enable_ls64_v, + ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, LS64, LS64_V) + }, {}, }; @@ -2994,6 +3043,8 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64ISAR1_EL1, BF16, EBF16, CAP_HWCAP, KERNEL_HWCAP_EBF16), HWCAP_CAP(ID_AA64ISAR1_EL1, DGH, IMP, CAP_HWCAP, KERNEL_HWCAP_DGH), HWCAP_CAP(ID_AA64ISAR1_EL1, I8MM, IMP, CAP_HWCAP, KERNEL_HWCAP_I8MM), + HWCAP_CAP(ID_AA64ISAR1_EL1, LS64, LS64, CAP_HWCAP, KERNEL_HWCAP_LS64), + HWCAP_CAP(ID_AA64ISAR1_EL1, LS64, LS64_V, CAP_HWCAP, KERNEL_HWCAP_LS64_V), HWCAP_CAP(ID_AA64MMFR2_EL1, AT, IMP, CAP_HWCAP, KERNEL_HWCAP_USCAT), #ifdef CONFIG_ARM64_SVE HWCAP_CAP(ID_AA64PFR0_EL1, SVE, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE), diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 98fda85005353..21a18237fc4b5 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -82,6 +82,8 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_SB] = "sb", [KERNEL_HWCAP_PACA] = "paca", [KERNEL_HWCAP_PACG] = "pacg", + [KERNEL_HWCAP_LS64] = "ls64", + [KERNEL_HWCAP_LS64_V] = "ls64_v", [KERNEL_HWCAP_DCPODP] = "dcpodp", [KERNEL_HWCAP_SVE2] = "sve2", [KERNEL_HWCAP_SVEAES] = "sveaes", diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 62a38a364c412..1bace40af4a8c 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -104,3 +104,5 @@ WORKAROUND_REPEAT_TLBI WORKAROUND_SPECULATIVE_AT WORKAROUND_SPECULATIVE_SSBS WORKAROUND_SPECULATIVE_UNPRIV_LOAD +HAS_LS64 +HAS_LS64_V From f243d6fdbb06865867b0dd0e24d1fe30c7109fd4 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 13 May 2025 21:22:11 +0800 Subject: [PATCH 91/95] Workaround the issue when compile with CONFIG_FUNCTION_ALIGNMENT_64B commit db144b9863a522607c0c2769a70473494e54019b openEuler Workaround the issue when compile with CONFIG_FUNCTION_ALIGNMENT_64B Signed-off-by: Yicong Yang Signed-off-by: Hongye Lin Signed-off-by: WangYuli --- arch/arm64/Kconfig | 8 ++++++++ arch/arm64/include/asm/el2_setup.h | 2 ++ arch/arm64/kernel/cpufeature.c | 4 ++++ 3 files changed, 14 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index eace039f470bb..11fc63286352f 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -117,6 +117,7 @@ config ARM64 select ARM_GIC_V3 select ARM_GIC_V3_ITS if PCI select ARM_PSCI_FW + select ARM64_LS64 if !FUNCTION_ALIGNMENT_64B select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS select COMMON_CLK @@ -2148,6 +2149,13 @@ config ARM64_MTE endmenu # "ARMv8.5 architectural features" +menu "ARMv8.6 architectural features" + +config ARM64_LS64 + bool + +endmenu # "ARMv8.6 architectural features" + menu "ARMv8.7 architectural features" config ARM64_EPAN diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index c653ef8284306..e468e4293be97 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -28,6 +28,7 @@ cbz x0, .Lskip_hcrx_\@ mov_q x0, HCRX_HOST_FLAGS +#ifdef CONFIG_ARM64_LS64 /* Enable LS64, LS64_V if supported */ mrs_s x1, SYS_ID_AA64ISAR1_EL1 ubfx x1, x1, #ID_AA64ISAR1_EL1_LS64_SHIFT, #4 @@ -36,6 +37,7 @@ cmp x1, #ID_AA64ISAR1_EL1_LS64_LS64_V b.lt .Lset_hcrx_\@ orr x0, x0, #HCRX_EL2_EnASR +#endif .Lset_hcrx_\@ : msr_s SYS_HCRX_EL2, x0 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 10b784c976f29..6e7926724866d 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2161,6 +2161,7 @@ static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap) static bool enable_pseudo_nmi; #endif +#ifdef CONFIG_ARM64_LS64 static bool has_ls64(const struct arm64_cpu_capabilities *entry, int __unused) { u64 ls64; @@ -2192,6 +2193,7 @@ static void cpu_enable_ls64_v(struct arm64_cpu_capabilities const *cap) { sysreg_clear_set(sctlr_el1, SCTLR_EL1_EnASR, SCTLR_EL1_EnASR); } +#endif #ifdef CONFIG_ARM64_PSEUDO_NMI static int __init early_enable_pseudo_nmi(char *p) @@ -2912,6 +2914,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = nmi_enable, }, #endif +#ifdef CONFIG_ARM64_LS64 { .desc = "LS64", .capability = ARM64_HAS_LS64, @@ -2928,6 +2931,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_enable_ls64_v, ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, LS64, LS64_V) }, +#endif {}, }; From f390b9589bb34ef454493afe0a63855f162fe8c9 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 13 May 2025 21:22:12 +0800 Subject: [PATCH 92/95] KVM: arm64: Enable FEAT_{LS64, LS64_V} in the supported guest commit 2492274e6dc947ae6d7d4dbb46c0abf6ffd533cc openEuler Using FEAT_{LS64, LS64_V} instructions in a guest is also controlled by HCRX_EL2.{EnALS, EnASR}. Enable it if guest has related feature. Signed-off-by: Yicong Yang Signed-off-by: Hongye Lin Signed-off-by: WangYuli --- arch/arm64/kvm/hyp/include/hyp/switch.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 2f08a04e57880..6269cab564e9f 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -211,6 +211,12 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) hcrx &= ~clr; } + if (cpus_have_final_cap(ARM64_HAS_LS64)) + hcrx |= HCRX_EL2_EnALS; + + if (cpus_have_final_cap(ARM64_HAS_LS64_V)) + hcrx |= HCRX_EL2_EnASR; + write_sysreg_s(hcrx, SYS_HCRX_EL2); } From d78838cdc729e9eea8e07f77a439ea30b766f5cc Mon Sep 17 00:00:00 2001 From: WangYuli Date: Tue, 3 Feb 2026 20:35:14 +0800 Subject: [PATCH 93/95] Revert "deepin: qspinlock: Disable CNA by default" This reverts commit e6a498c050a7c489b79db171363f74d5d9be5499. Link: https://gitee.com/OpenCloudOS/OpenCloudOS-Kernel/pulls/567 Signed-off-by: zhaolichang Signed-off-by: WangYuli --- Documentation/admin-guide/kernel-parameters.txt | 6 ++---- kernel/locking/qspinlock_cna.h | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index e4c752a068617..b3aea03fb4c41 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4075,14 +4075,12 @@ numa_spinlock= [NUMA, PV_OPS] Select the NUMA-aware variant of spinlock. The options are: auto - Enable this variant if running on a multi-node - machine in native environment. (Under this option, if - paravirt spinlock is already enabled, this variant will - not be enabled.) + machine in native environment. on - Unconditionally enable this variant. off - Unconditionally disable this variant. Not specifying this option is equivalent to - numa_spinlock=off. + numa_spinlock=auto. numa_zonelist_order= [KNL, BOOT] Select zonelist order for NUMA. 'node', 'default' can be specified diff --git a/kernel/locking/qspinlock_cna.h b/kernel/locking/qspinlock_cna.h index a7ed231e32eb6..5352487ccc525 100644 --- a/kernel/locking/qspinlock_cna.h +++ b/kernel/locking/qspinlock_cna.h @@ -388,7 +388,7 @@ EXPORT_SYMBOL(cna_queued_spin_lock_slowpath); * Constant (boot-param configurable) flag selecting the NUMA-aware variant * of spinlock. Possible values: -1 (off) / 0 (auto, default) / 1 (on). */ -static int numa_spinlock_flag = -1; +static int numa_spinlock_flag; static int __init numa_spinlock_setup(char *str) { From 2e6b1c453b1424ddfc49af6fc7fb26ee59ef2ca0 Mon Sep 17 00:00:00 2001 From: Devyn Liu Date: Tue, 31 Oct 2023 10:52:47 +0800 Subject: [PATCH 94/95] ipmi: Errata workaround to prevent SMS message processing timeout commit 6449ff7a5a2b148bc0a60efb3f0d723aaeceaae6 openEuler Add erratum 162102203 for HIP09. Add erratum 162102203 kconfig to enable workaround for SMS message processing timeout Add a workaround method to prevent bridge commands from timeout. Because SMS_ATN bit will be cleared by specific CPU hardware before it is driven to process. sms_atn_quirk: Identify whether the SMS_ATN flag needs to be stored and do workaround operations. sms_atn_flag: This variable is used to store SMS_ATN when the driver polling enters the bt_event until the IDLE state is reached.Then reset this flag when driver finished proccessing SMS message and clear SMS_ATN bit. Identify whether the incorrect BT_B2H_ATN is possibly due to receiving an SMS message from BMC. If so, clear B2H_ATN. Prevents SMS_ATN from being discarded after processing drain stale response. Signed-off-by: Devyn Liu Signed-off-by: zhaolichang <943677312@qq.com> Signed-off-by: WangYuli --- Documentation/arch/arm64/silicon-errata.rst | 1 + arch/arm64/Kconfig | 11 +++ drivers/char/ipmi/ipmi_bt_sm.c | 78 +++++++++++++++++++++ 3 files changed, 90 insertions(+) diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index fbc833841bef5..d18f41f3cfbbf 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -250,6 +250,7 @@ stable kernels. | | ,10C,11} | | | | | SMMU PMCG | | | +----------------+-----------------+-----------------+-----------------------------+ +| Hisilicon | HIP09 | #162102203 | HISILICON_ERRATUM_162102203 | +----------------+-----------------+-----------------+-----------------------------+ | Qualcomm Tech. | Kryo/Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 11fc63286352f..da6db8255bfa9 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1231,6 +1231,17 @@ config HISILICON_ERRATUM_165010801 To prevent starvation from triggering system watchdog hardlockup, the interrupt priority is explicitly increased in the arch_timer driver. +config HISILICON_ERRATUM_162102203 + bool "Hisilicon erratum 162102203" + default n + help + On HIP09, SMS_ATN bit may be cleared by hardware before it is driven + to process. This will causes bridge commands timeout. Avoid these issues + by modifying the IPMI driver to use variables to store SMS_ATNs that may + be lost. + + If unsure, say N. + config QCOM_FALKOR_ERRATUM_1003 bool "Falkor E1003: Incorrect translation due to ASID change" default y diff --git a/drivers/char/ipmi/ipmi_bt_sm.c b/drivers/char/ipmi/ipmi_bt_sm.c index f41f78972b9c4..1a4985c05bb12 100644 --- a/drivers/char/ipmi/ipmi_bt_sm.c +++ b/drivers/char/ipmi/ipmi_bt_sm.c @@ -88,6 +88,13 @@ struct si_sm_data { enum bt_states complete; /* to divert the state machine */ long BT_CAP_req2rsp; int BT_CAP_retries; /* Recommended retries */ + +#ifdef CONFIG_HISILICON_ERRATUM_162102203 + /* Record sms_atn when sms_atn set && bt_state not in idle */ + int sms_atn_flag; + /* If true, need to store SMS_ATN bit */ + bool sms_atn_quirk; +#endif }; #define BT_CLR_WR_PTR 0x01 /* See IPMI 1.5 table 11.6.4 */ @@ -168,6 +175,32 @@ static char *status2txt(unsigned char status) } #define STATUS2TXT status2txt(status) +#ifdef CONFIG_HISILICON_ERRATUM_162102203 +/* + * To confirm whether the SMS_ATN flag needs to be stored and get + * quirk through the method reported by the BIOS. Because in special + * cases SMS_ATN flag bits may be lost before being processed. + */ +static bool get_sms_atn_quirk(struct si_sm_io *io) +{ + acpi_handle handle; + acpi_status status; + unsigned long long tmp; + + handle = ACPI_HANDLE(io->dev); + if (!handle) + return false; + + status = acpi_evaluate_integer(handle, "SATN", NULL, &tmp); + if (ACPI_FAILURE(status)) + return false; + else if (tmp != 1) + return false; + + return true; +} +#endif + /* called externally at insmod time, and internally on cleanup */ static unsigned int bt_init_data(struct si_sm_data *bt, struct si_sm_io *io) @@ -182,6 +215,12 @@ static unsigned int bt_init_data(struct si_sm_data *bt, struct si_sm_io *io) bt->complete = BT_STATE_IDLE; /* end here */ bt->BT_CAP_req2rsp = BT_NORMAL_TIMEOUT * USEC_PER_SEC; bt->BT_CAP_retries = BT_NORMAL_RETRY_LIMIT; + +#ifdef CONFIG_HISILICON_ERRATUM_162102203 + bt->sms_atn_quirk = get_sms_atn_quirk(io); + bt->sms_atn_flag = 0; +#endif + return 3; /* We claim 3 bytes of space; ought to check SPMI table */ } @@ -283,6 +322,11 @@ static void reset_flags(struct si_sm_data *bt) BT_CONTROL(BT_H_BUSY); /* force clear */ BT_CONTROL(BT_CLR_WR_PTR); /* always reset */ BT_CONTROL(BT_SMS_ATN); /* always clear */ + +#ifdef CONFIG_HISILICON_ERRATUM_162102203 + bt->sms_atn_flag = 0; /* Reset sms_atn_flag */ +#endif + BT_INTMASK_W(BT_BMC_HWRST); } @@ -454,6 +498,36 @@ static enum si_sm_result bt_event(struct si_sm_data *bt, long time) int i; status = BT_STATUS; +#ifdef CONFIG_HISILICON_ERRATUM_162102203 + if (bt->sms_atn_quirk) { + /* + * Identify whether the current BT_B2H_ATN is possibly due to + * receiving an SMS message from BMC. If so, only clear the + * incorrectly set BT_B2H_ATN without returning, and continue + * to execute downwards. + */ + if ((bt->state < BT_STATE_WRITE_BYTES) && (status & BT_B2H_ATN) && + (status & BT_SMS_ATN)) { + BT_CONTROL(BT_B2H_ATN); /* Clear it */ + status &= ~BT_B2H_ATN; /* Refresh status */ + if (bt_debug) + dev_dbg(bt->io->dev, "clear wrong B2H_ATN, BT: %s\n", + status2txt(BT_STATUS)); + } + + /* + * Record the SMS_ATN by sms_atn_flag, because SMS_ATN would be clear + * incorrectly by hardware when received a SMS message from BMC if + * bt->state is not in IDLE state. And the BT_SMS_ATN will be lost. + */ + if ((bt->state >= BT_STATE_XACTION_START) && (status & BT_SMS_ATN)) + bt->sms_atn_flag = 1; + + /* Restore SMS_ATN */ + if (bt->sms_atn_flag) + status |= BT_SMS_ATN; + } +#endif bt->nonzero_status |= status; if ((bt_debug & BT_DEBUG_STATES) && (bt->state != last_printed)) { dev_dbg(bt->io->dev, "BT: %s %s TO=%ld - %ld\n", @@ -496,6 +570,10 @@ static enum si_sm_result bt_event(struct si_sm_data *bt, long time) case BT_STATE_IDLE: if (status & BT_SMS_ATN) { BT_CONTROL(BT_SMS_ATN); /* clear it */ + +#ifdef CONFIG_HISILICON_ERRATUM_162102203 + bt->sms_atn_flag = 0; /* Reset sms_atn_flag */ +#endif return SI_SM_ATTN; } From 9d05ee67e65e1b42e5ef987f996d5d47bec5d9b8 Mon Sep 17 00:00:00 2001 From: WangYuli Date: Wed, 4 Feb 2026 16:57:45 +0800 Subject: [PATCH 95/95] arm64: Kconfig: Fix typo in ARCH_HAS_NONLEAF_PMD_YOUNG dependency Fix a typo in the Kconfig dependency for ARCH_HAS_NONLEAF_PMD_YOUNG. The correct config symbol is ARM64_HAFT (Hardware managed Access Flag for Table descriptors), not ARM64_HAF. Due to this typo, ARCH_HAS_NONLEAF_PMD_YOUNG is never selected even when ARM64_HAFT is enabled, which prevents the LRU generation aging optimization from being enabled on capable hardware. Fixes: 6e24b875ae20 ("arm64: Enable ARCH_HAS_NONLEAF_PMD_YOUNG") Signed-off-by: WangYuli --- arch/arm64/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index da6db8255bfa9..3696b9322f031 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -35,7 +35,7 @@ config ARM64 select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE - select ARCH_HAS_NONLEAF_PMD_YOUNG if ARM64_HAF + select ARCH_HAS_NONLEAF_PMD_YOUNG if ARM64_HAFT select ARCH_HAS_PTE_DEVMAP select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SETUP_DMA_OPS