Skip to content

Commit cd34908

Browse files
bwicaksononvnvmochs
authored andcommitted
NVIDIA: VR: SAUCE: perf/arm_pmu: Skip PMCCNTR_EL0 on NVIDIA Olympus
PMCCNTR_EL0 may continue to increment on NVIDIA Olympus CPUs while the PE is in WFI/WFE. That does not necessarily match the CPU_CYCLES event counted by a programmable counter, so using PMCCNTR_EL0 for cycles can give results that differ from the programmable counter path. Extend the existing PMCCNTR avoidance decision from the SMT case to also cover Olympus. Store the result in the common arm_pmu state at registration time, so arm_pmuv3 can keep using a single flag when deciding whether CPU_CYCLES may use PMCCNTR_EL0. Use the cached MIDR from cpu_data to identify Olympus parts and avoid reading MIDR_EL1 in the event path. Signed-off-by: Besar Wicaksono <bwicaksono@nvidia.com> (backported from https://lore.kernel.org/all/20260429215614.1793131-1-bwicaksono@nvidia.com/) Signed-off-by: Matthew R. Ochs <mochs@nvidia.com>
1 parent 5e154c9 commit cd34908

3 files changed

Lines changed: 75 additions & 13 deletions

File tree

drivers/perf/arm_pmu.c

Lines changed: 73 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
#include <linux/irq.h>
2525
#include <linux/irqdesc.h>
2626

27+
#include <asm/cpu.h>
28+
#include <asm/cputype.h>
2729
#include <asm/irq_regs.h>
2830

2931
static int armpmu_count_irq_users(const struct cpumask *affinity,
@@ -920,6 +922,76 @@ void armpmu_free(struct arm_pmu *pmu)
920922
kfree(pmu);
921923
}
922924

925+
#ifdef CONFIG_ARM64
926+
/*
927+
* List of CPUs that should avoid using PMCCNTR_EL0.
928+
*/
929+
static struct midr_range armpmu_avoid_pmccntr_cpus[] = {
930+
/*
931+
* The PMCCNTR_EL0 in Olympus CPU may still increment while in WFI/WFE state.
932+
* This is an implementation specific behavior and not an erratum.
933+
*
934+
* From ARM DDI0487 D14.4:
935+
* It is IMPLEMENTATION SPECIFIC whether CPU_CYCLES and PMCCNTR count
936+
* when the PE is in WFI or WFE state, even if the clocks are not stopped.
937+
*
938+
* From ARM DDI0487 D24.5.2:
939+
* All counters are subject to any changes in clock frequency, including
940+
* clock stopping caused by the WFI and WFE instructions.
941+
* This means that it is CONSTRAINED UNPREDICTABLE whether or not
942+
* PMCCNTR_EL0 continues to increment when clocks are stopped by WFI and
943+
* WFE instructions.
944+
*/
945+
MIDR_ALL_VERSIONS(MIDR_NVIDIA_OLYMPUS),
946+
{}
947+
};
948+
949+
static bool armpmu_is_in_avoid_pmccntr_cpus(int cpu)
950+
{
951+
struct midr_range const *r = armpmu_avoid_pmccntr_cpus;
952+
u32 midr = (u32)per_cpu(cpu_data, cpu).reg_midr;
953+
954+
while (r->model) {
955+
if (midr_is_cpu_model_range(midr, r->model, r->rv_min, r->rv_max))
956+
return true;
957+
r++;
958+
}
959+
960+
return false;
961+
}
962+
#else
963+
static bool armpmu_is_in_avoid_pmccntr_cpus(int cpu)
964+
{
965+
return false;
966+
}
967+
#endif
968+
969+
static bool armpmu_avoid_pmccntr(struct arm_pmu *pmu)
970+
{
971+
int cpu = cpumask_first(&pmu->supported_cpus);
972+
973+
/*
974+
* By this stage we know our supported CPUs on either DT/ACPI platforms,
975+
* detect the SMT implementation.
976+
* On SMT CPUs, the PMCCNTR_EL0 increments from the processor clock rather
977+
* than the PE clock (ARM DDI0487 L.b D13.1.3) which means it'll continue
978+
* counting on a WFI PE if one of its SMT sibling is not idle on a
979+
* multi-threaded implementation. So don't use it on SMT cores.
980+
*/
981+
if (topology_core_has_smt(cpu))
982+
return true;
983+
984+
/*
985+
* On some CPUs, PMCCNTR_EL0 does not match the behavior of CPU_CYCLES
986+
* programmable counter, so avoid routing cycles through PMCCNTR_EL0 to
987+
* prevent inconsistency in the results.
988+
*/
989+
if (armpmu_is_in_avoid_pmccntr_cpus(cpu))
990+
return true;
991+
992+
return false;
993+
}
994+
923995
int armpmu_register(struct arm_pmu *pmu)
924996
{
925997
int ret;
@@ -928,11 +1000,7 @@ int armpmu_register(struct arm_pmu *pmu)
9281000
if (ret)
9291001
return ret;
9301002

931-
/*
932-
* By this stage we know our supported CPUs on either DT/ACPI platforms,
933-
* detect the SMT implementation.
934-
*/
935-
pmu->has_smt = topology_core_has_smt(cpumask_first(&pmu->supported_cpus));
1003+
pmu->avoid_pmccntr = armpmu_avoid_pmccntr(pmu);
9361004

9371005
if (!pmu->set_event_filter)
9381006
pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE;

drivers/perf/arm_pmuv3.c

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1002,13 +1002,7 @@ static bool armv8pmu_can_use_pmccntr(struct pmu_hw_events *cpuc,
10021002
if (has_branch_stack(event))
10031003
return false;
10041004

1005-
/*
1006-
* The PMCCNTR_EL0 increments from the processor clock rather than
1007-
* the PE clock (ARM DDI0487 L.b D13.1.3) which means it'll continue
1008-
* counting on a WFI PE if one of its SMT sibling is not idle on a
1009-
* multi-threaded implementation. So don't use it on SMT cores.
1010-
*/
1011-
if (cpu_pmu->has_smt)
1005+
if (cpu_pmu->avoid_pmccntr)
10121006
return false;
10131007

10141008
return true;

include/linux/perf/arm_pmu.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ struct arm_pmu {
119119

120120
/* PMUv3 only */
121121
int pmuver;
122-
bool has_smt;
122+
bool avoid_pmccntr;
123123
u64 reg_pmmir;
124124
u64 reg_brbidr;
125125
#define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40

0 commit comments

Comments
 (0)