From a27bdc89b6d05d154418cfd23da42e031ac1e8f4 Mon Sep 17 00:00:00 2001 From: Sumit Gupta Date: Fri, 6 Feb 2026 19:56:52 +0530 Subject: [PATCH 01/19] ACPI: CPPC: Add cppc_get_perf() API to read performance controls Add cppc_get_perf() function to read values of performance control registers including desired_perf, min_perf, max_perf, energy_perf, and auto_sel. This provides a read interface to complement the existing cppc_set_perf() write interface for performance control registers. Note that auto_sel is read by cppc_get_perf() but not written by cppc_set_perf() to avoid unintended mode changes during performance updates. It can be updated with existing dedicated cppc_set_auto_sel() API. Use cppc_get_perf() in cppc_cpufreq_get_cpu_data() to initialize perf_ctrls with current hardware register values during cpufreq policy initialization. Signed-off-by: Sumit Gupta Reviewed-by: Pierre Gondois Reviewed-by: Lifeng Zheng Link: https://patch.msgid.link/20260206142658.72583-2-sumitg@nvidia.com Signed-off-by: Rafael J. Wysocki (cherry picked from commit 658fa7b1c47a857af484c5c5dff8d0164b7c7bfb) Signed-off-by: Jamie Nguyen --- drivers/acpi/cppc_acpi.c | 80 ++++++++++++++++++++++++++++++++++ drivers/cpufreq/cppc_cpufreq.c | 6 +++ include/acpi/cppc_acpi.h | 5 +++ 3 files changed, 91 insertions(+) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index f0e513e9ed5d3..5122e99bddb09 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1738,6 +1738,86 @@ int cppc_set_enable(int cpu, bool enable) } EXPORT_SYMBOL_GPL(cppc_set_enable); +/** + * cppc_get_perf - Get a CPU's performance controls. + * @cpu: CPU for which to get performance controls. + * @perf_ctrls: ptr to cppc_perf_ctrls. See cppc_acpi.h + * + * Return: 0 for success with perf_ctrls, -ERRNO otherwise. + */ +int cppc_get_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) +{ + struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu); + struct cpc_register_resource *desired_perf_reg, + *min_perf_reg, *max_perf_reg, + *energy_perf_reg, *auto_sel_reg; + u64 desired_perf = 0, min = 0, max = 0, energy_perf = 0, auto_sel = 0; + int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); + struct cppc_pcc_data *pcc_ss_data = NULL; + int ret = 0, regs_in_pcc = 0; + + if (!cpc_desc) { + pr_debug("No CPC descriptor for CPU:%d\n", cpu); + return -ENODEV; + } + + if (!perf_ctrls) { + pr_debug("Invalid perf_ctrls pointer\n"); + return -EINVAL; + } + + desired_perf_reg = &cpc_desc->cpc_regs[DESIRED_PERF]; + min_perf_reg = &cpc_desc->cpc_regs[MIN_PERF]; + max_perf_reg = &cpc_desc->cpc_regs[MAX_PERF]; + energy_perf_reg = &cpc_desc->cpc_regs[ENERGY_PERF]; + auto_sel_reg = &cpc_desc->cpc_regs[AUTO_SEL_ENABLE]; + + /* Are any of the regs PCC ?*/ + if (CPC_IN_PCC(desired_perf_reg) || CPC_IN_PCC(min_perf_reg) || + CPC_IN_PCC(max_perf_reg) || CPC_IN_PCC(energy_perf_reg) || + CPC_IN_PCC(auto_sel_reg)) { + if (pcc_ss_id < 0) { + pr_debug("Invalid pcc_ss_id for CPU:%d\n", cpu); + return -ENODEV; + } + pcc_ss_data = pcc_data[pcc_ss_id]; + regs_in_pcc = 1; + down_write(&pcc_ss_data->pcc_lock); + /* Ring doorbell once to update PCC subspace */ + if (send_pcc_cmd(pcc_ss_id, CMD_READ) < 0) { + ret = -EIO; + goto out_err; + } + } + + /* Read optional elements if present */ + if (CPC_SUPPORTED(max_perf_reg)) + cpc_read(cpu, max_perf_reg, &max); + perf_ctrls->max_perf = max; + + if (CPC_SUPPORTED(min_perf_reg)) + cpc_read(cpu, min_perf_reg, &min); + perf_ctrls->min_perf = min; + + if (CPC_SUPPORTED(desired_perf_reg)) + cpc_read(cpu, desired_perf_reg, &desired_perf); + perf_ctrls->desired_perf = desired_perf; + + if (CPC_SUPPORTED(energy_perf_reg)) + cpc_read(cpu, energy_perf_reg, &energy_perf); + perf_ctrls->energy_perf = energy_perf; + + if (CPC_SUPPORTED(auto_sel_reg)) + cpc_read(cpu, auto_sel_reg, &auto_sel); + perf_ctrls->auto_sel = (bool)auto_sel; + +out_err: + if (regs_in_pcc) + up_write(&pcc_ss_data->pcc_lock); + return ret; +} +EXPORT_SYMBOL_GPL(cppc_get_perf); + /** * cppc_set_perf - Set a CPU's performance controls. * @cpu: CPU for which to set performance controls. diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 011f35cb47b94..a61a24e0dcaed 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -594,6 +594,12 @@ static struct cppc_cpudata *cppc_cpufreq_get_cpu_data(unsigned int cpu) goto free_mask; } + ret = cppc_get_perf(cpu, &cpu_data->perf_ctrls); + if (ret) { + pr_debug("Err reading CPU%d perf ctrls: ret:%d\n", cpu, ret); + goto free_mask; + } + return cpu_data; free_mask: diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 4d644f03098e3..3fc796c0d9022 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -151,6 +151,7 @@ extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf); extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf); extern int cppc_get_highest_perf(int cpunum, u64 *highest_perf); extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs); +extern int cppc_get_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls); extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls); extern int cppc_set_enable(int cpu, bool enable); extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps); @@ -193,6 +194,10 @@ static inline int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ { return -EOPNOTSUPP; } +static inline int cppc_get_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) +{ + return -EOPNOTSUPP; +} static inline int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) { return -EOPNOTSUPP; From 05aa30a460306f69d62511d16aea9a938179fe45 Mon Sep 17 00:00:00 2001 From: Sumit Gupta Date: Fri, 6 Feb 2026 19:56:53 +0530 Subject: [PATCH 02/19] ACPI: CPPC: Warn on missing mandatory DESIRED_PERF register Add a warning during CPPC processor probe if the Desired Performance register is not supported when it should be. As per 8.4.6.1.2.3 section of ACPI 6.6 specification, "The Desired Performance Register is optional only when OSPM indicates support for CPPC2 in the platform-wide _OSC capabilities and the Autonomous Selection Enable field is encoded as an Integer with a value of 1." In other words: - In CPPC v1, DESIRED_PERF is mandatory - In CPPC v2, it becomes optional only when AUTO_SEL_ENABLE is supported This helps detect firmware configuration issues early during boot. Link: https://lore.kernel.org/lkml/9fa21599-004a-4af8-acc2-190fd0404e35@nvidia.com/ Suggested-by: Pierre Gondois Signed-off-by: Sumit Gupta Reviewed-by: Pierre Gondois Reviewed-by: Lifeng Zheng Link: https://patch.msgid.link/20260206142658.72583-3-sumitg@nvidia.com Signed-off-by: Rafael J. Wysocki (cherry picked from commit b3e45fb2db9d8a733e94b315f1272e2c4468ed4b) Signed-off-by: Jamie Nguyen --- drivers/acpi/cppc_acpi.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 5122e99bddb09..3f758d2944e23 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -853,6 +853,16 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr) } per_cpu(cpu_pcc_subspace_idx, pr->id) = pcc_subspace_id; + /* + * In CPPC v1, DESIRED_PERF is mandatory. In CPPC v2, it is optional + * only when AUTO_SEL_ENABLE is supported. + */ + if (!CPC_SUPPORTED(&cpc_ptr->cpc_regs[DESIRED_PERF]) && + (!osc_sb_cppc2_support_acked || + !CPC_SUPPORTED(&cpc_ptr->cpc_regs[AUTO_SEL_ENABLE]))) + pr_warn("Desired perf. register is mandatory if CPPC v2 is not supported " + "or autonomous selection is disabled\n"); + /* * Initialize the remaining cpc_regs as unsupported. * Example: In case FW exposes CPPC v2, the below loop will initialize From 13a90fb9f9549fa8fd43cbe52d0b15920f7b41ff Mon Sep 17 00:00:00 2001 From: Sumit Gupta Date: Fri, 6 Feb 2026 19:56:54 +0530 Subject: [PATCH 03/19] ACPI: CPPC: Extend cppc_set_epp_perf() for FFH/SystemMemory Extend cppc_set_epp_perf() to write both auto_sel and energy_perf registers when they are in FFH or SystemMemory address space. This keeps the behavior consistent with PCC case where both registers are already updated together, but was missing for FFH/SystemMemory. Signed-off-by: Sumit Gupta Reviewed-by: Pierre Gondois Reviewed-by: Lifeng Zheng Link: https://patch.msgid.link/20260206142658.72583-4-sumitg@nvidia.com Signed-off-by: Rafael J. Wysocki (cherry picked from commit 38428a680026c52a1fc64212325d161974c3e4cf) Signed-off-by: Jamie Nguyen --- drivers/acpi/cppc_acpi.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 3f758d2944e23..94a7ffa8be3c3 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1571,6 +1571,8 @@ int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable) struct cpc_register_resource *auto_sel_reg; struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu); struct cppc_pcc_data *pcc_ss_data = NULL; + bool autosel_ffh_sysmem; + bool epp_ffh_sysmem; int ret; if (!cpc_desc) { @@ -1581,6 +1583,11 @@ int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable) auto_sel_reg = &cpc_desc->cpc_regs[AUTO_SEL_ENABLE]; epp_set_reg = &cpc_desc->cpc_regs[ENERGY_PERF]; + epp_ffh_sysmem = CPC_SUPPORTED(epp_set_reg) && + (CPC_IN_FFH(epp_set_reg) || CPC_IN_SYSTEM_MEMORY(epp_set_reg)); + autosel_ffh_sysmem = CPC_SUPPORTED(auto_sel_reg) && + (CPC_IN_FFH(auto_sel_reg) || CPC_IN_SYSTEM_MEMORY(auto_sel_reg)); + if (CPC_IN_PCC(epp_set_reg) || CPC_IN_PCC(auto_sel_reg)) { if (pcc_ss_id < 0) { pr_debug("Invalid pcc_ss_id for CPU:%d\n", cpu); @@ -1606,11 +1613,22 @@ int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable) ret = send_pcc_cmd(pcc_ss_id, CMD_WRITE); up_write(&pcc_ss_data->pcc_lock); } else if (osc_cpc_flexible_adr_space_confirmed && - CPC_SUPPORTED(epp_set_reg) && CPC_IN_FFH(epp_set_reg)) { - ret = cpc_write(cpu, epp_set_reg, perf_ctrls->energy_perf); + (epp_ffh_sysmem || autosel_ffh_sysmem)) { + if (autosel_ffh_sysmem) { + ret = cpc_write(cpu, auto_sel_reg, enable); + if (ret) + return ret; + } + + if (epp_ffh_sysmem) { + ret = cpc_write(cpu, epp_set_reg, + perf_ctrls->energy_perf); + if (ret) + return ret; + } } else { ret = -ENOTSUPP; - pr_debug("_CPC in PCC and _CPC in FFH are not supported\n"); + pr_debug("_CPC in PCC/FFH/SystemMemory are not supported\n"); } return ret; From f60982b8dd6b7cc0c66f97d67b48c045b6c2b811 Mon Sep 17 00:00:00 2001 From: Sumit Gupta Date: Fri, 6 Feb 2026 19:56:55 +0530 Subject: [PATCH 04/19] cpufreq: CPPC: Update cached perf_ctrls on sysfs write Update the cached perf_ctrls values when writing via sysfs to keep them in sync with hardware registers: - store_auto_select(): update perf_ctrls.auto_sel - store_energy_performance_preference_val(): update perf_ctrls.energy_perf This ensures consistent cached values after sysfs writes, which complements the cppc_get_perf() initialization during policy setup. Signed-off-by: Sumit Gupta Reviewed-by: Pierre Gondois Reviewed-by: Lifeng Zheng Link: https://patch.msgid.link/20260206142658.72583-5-sumitg@nvidia.com Signed-off-by: Rafael J. Wysocki (cherry picked from commit 24ad4c6c136bdaa4c92c5c5948856752ce3e9f76) Signed-off-by: Jamie Nguyen --- drivers/cpufreq/cppc_cpufreq.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index a61a24e0dcaed..ebb5746df220e 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -855,6 +855,7 @@ static ssize_t show_auto_select(struct cpufreq_policy *policy, char *buf) static ssize_t store_auto_select(struct cpufreq_policy *policy, const char *buf, size_t count) { + struct cppc_cpudata *cpu_data = policy->driver_data; bool val; int ret; @@ -866,6 +867,8 @@ static ssize_t store_auto_select(struct cpufreq_policy *policy, if (ret) return ret; + cpu_data->perf_ctrls.auto_sel = val; + return count; } @@ -916,8 +919,32 @@ static ssize_t store_##_name(struct cpufreq_policy *policy, \ CPPC_CPUFREQ_ATTR_RW_U64(auto_act_window, cppc_get_auto_act_window, cppc_set_auto_act_window) -CPPC_CPUFREQ_ATTR_RW_U64(energy_performance_preference_val, - cppc_get_epp_perf, cppc_set_epp) +static ssize_t +show_energy_performance_preference_val(struct cpufreq_policy *policy, char *buf) +{ + return cppc_cpufreq_sysfs_show_u64(policy->cpu, cppc_get_epp_perf, buf); +} + +static ssize_t +store_energy_performance_preference_val(struct cpufreq_policy *policy, + const char *buf, size_t count) +{ + struct cppc_cpudata *cpu_data = policy->driver_data; + u64 val; + int ret; + + ret = kstrtou64(buf, 0, &val); + if (ret) + return ret; + + ret = cppc_set_epp(policy->cpu, val); + if (ret) + return ret; + + cpu_data->perf_ctrls.energy_perf = val; + + return count; +} cpufreq_freq_attr_ro(freqdomain_cpus); cpufreq_freq_attr_rw(auto_select); From a1cf76fde511866941890ed080fde5598959d8b0 Mon Sep 17 00:00:00 2001 From: Sumit Gupta Date: Fri, 6 Feb 2026 19:56:56 +0530 Subject: [PATCH 05/19] cpufreq: cppc: Update MIN_PERF/MAX_PERF in target callbacks Update MIN_PERF and MAX_PERF registers from policy->min and policy->max in the .target() and .fast_switch() callbacks. This allows controlling performance bounds via standard scaling_min_freq and scaling_max_freq sysfs interfaces. Similar to intel_cpufreq which updates HWP min/max limits in .target(), cppc_cpufreq now programs MIN_PERF/MAX_PERF along with DESIRED_PERF. Since MIN_PERF/MAX_PERF can be updated even when auto_sel is disabled, they are updated unconditionally. Also program MIN_PERF/MAX_PERF in store_auto_select() when enabling autonomous selection so the platform uses correct bounds immediately. Suggested-by: Rafael J. Wysocki Signed-off-by: Sumit Gupta Link: https://patch.msgid.link/20260206142658.72583-6-sumitg@nvidia.com Signed-off-by: Rafael J. Wysocki (cherry picked from commit ea3db45ae476889a1ba0ab3617e6afdeeefbda3d) Signed-off-by: Jamie Nguyen --- drivers/cpufreq/cppc_cpufreq.c | 41 +++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index ebb5746df220e..8a8cf76828ee2 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -287,6 +287,21 @@ static inline void cppc_freq_invariance_exit(void) } #endif /* CONFIG_ACPI_CPPC_CPUFREQ_FIE */ +static void cppc_cpufreq_update_perf_limits(struct cppc_cpudata *cpu_data, + struct cpufreq_policy *policy) +{ + struct cppc_perf_caps *caps = &cpu_data->perf_caps; + u32 min_perf, max_perf; + + min_perf = cppc_khz_to_perf(caps, policy->min); + max_perf = cppc_khz_to_perf(caps, policy->max); + + cpu_data->perf_ctrls.min_perf = + clamp_t(u32, min_perf, caps->lowest_perf, caps->highest_perf); + cpu_data->perf_ctrls.max_perf = + clamp_t(u32, max_perf, caps->lowest_perf, caps->highest_perf); +} + static int cppc_cpufreq_set_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) @@ -298,6 +313,8 @@ static int cppc_cpufreq_set_target(struct cpufreq_policy *policy, cpu_data->perf_ctrls.desired_perf = cppc_khz_to_perf(&cpu_data->perf_caps, target_freq); + cppc_cpufreq_update_perf_limits(cpu_data, policy); + freqs.old = policy->cur; freqs.new = target_freq; @@ -322,8 +339,9 @@ static unsigned int cppc_cpufreq_fast_switch(struct cpufreq_policy *policy, desired_perf = cppc_khz_to_perf(&cpu_data->perf_caps, target_freq); cpu_data->perf_ctrls.desired_perf = desired_perf; - ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls); + cppc_cpufreq_update_perf_limits(cpu_data, policy); + ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls); if (ret) { pr_debug("Failed to set target on CPU:%d. ret:%d\n", cpu, ret); @@ -869,6 +887,27 @@ static ssize_t store_auto_select(struct cpufreq_policy *policy, cpu_data->perf_ctrls.auto_sel = val; + if (val) { + u32 old_min_perf = cpu_data->perf_ctrls.min_perf; + u32 old_max_perf = cpu_data->perf_ctrls.max_perf; + + /* + * When enabling autonomous selection, program MIN_PERF and + * MAX_PERF from current policy limits so that the platform + * uses the correct performance bounds immediately. + */ + cppc_cpufreq_update_perf_limits(cpu_data, policy); + + ret = cppc_set_perf(policy->cpu, &cpu_data->perf_ctrls); + if (ret) { + cpu_data->perf_ctrls.min_perf = old_min_perf; + cpu_data->perf_ctrls.max_perf = old_max_perf; + cppc_set_auto_sel(policy->cpu, false); + cpu_data->perf_ctrls.auto_sel = false; + return ret; + } + } + return count; } From b1d006a988aa4c54b45d1ce85badb68fd530ef39 Mon Sep 17 00:00:00 2001 From: Sumit Gupta Date: Fri, 6 Feb 2026 19:56:57 +0530 Subject: [PATCH 06/19] ACPI: CPPC: add APIs and sysfs interface for perf_limited Add sysfs interface to read/write the Performance Limited register. The Performance Limited register indicates to the OS that an unpredictable event (like thermal throttling) has limited processor performance. It contains two sticky bits set by the platform: - Bit 0 (Desired_Excursion): Set when delivered performance is constrained below desired performance. Not used when Autonomous Selection is enabled. - Bit 1 (Minimum_Excursion): Set when delivered performance is constrained below minimum performance. These bits remain set until OSPM explicitly clears them. The write operation accepts a bitmask of bits to clear: - Write 0x1 to clear bit 0 - Write 0x2 to clear bit 1 - Write 0x3 to clear both bits This enables users to detect if platform throttling impacted a workload. Users clear the register before execution, run the workload, then check afterward - if set, hardware throttling occurred during that time window. The interface is exposed as: /sys/devices/system/cpu/cpuX/cpufreq/perf_limited Signed-off-by: Sumit Gupta Reviewed-by: Pierre Gondois Reviewed-by: Lifeng Zheng Link: https://patch.msgid.link/20260206142658.72583-7-sumitg@nvidia.com Signed-off-by: Rafael J. Wysocki (cherry picked from commit 13c45a26635fa51a68911aa57e6778bdad18b103) Signed-off-by: Jamie Nguyen --- drivers/acpi/cppc_acpi.c | 56 ++++++++++++++++++++++++++++++++++ drivers/cpufreq/cppc_cpufreq.c | 5 +++ include/acpi/cppc_acpi.h | 15 +++++++++ 3 files changed, 76 insertions(+) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 94a7ffa8be3c3..53a6ffd995a1a 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1978,6 +1978,62 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) } EXPORT_SYMBOL_GPL(cppc_set_perf); +/** + * cppc_get_perf_limited - Get the Performance Limited register value. + * @cpu: CPU from which to get Performance Limited register. + * @perf_limited: Pointer to store the Performance Limited value. + * + * The returned value contains sticky status bits indicating platform-imposed + * performance limitations. + * + * Return: 0 for success, -EIO on failure, -EOPNOTSUPP if not supported. + */ +int cppc_get_perf_limited(int cpu, u64 *perf_limited) +{ + return cppc_get_reg_val(cpu, PERF_LIMITED, perf_limited); +} +EXPORT_SYMBOL_GPL(cppc_get_perf_limited); + +/** + * cppc_set_perf_limited() - Clear bits in the Performance Limited register. + * @cpu: CPU on which to write register. + * @bits_to_clear: Bitmask of bits to clear in the perf_limited register. + * + * The Performance Limited register contains two sticky bits set by platform: + * - Bit 0 (Desired_Excursion): Set when delivered performance is constrained + * below desired performance. Not used when Autonomous Selection is enabled. + * - Bit 1 (Minimum_Excursion): Set when delivered performance is constrained + * below minimum performance. + * + * These bits are sticky and remain set until OSPM explicitly clears them. + * This function only allows clearing bits (the platform sets them). + * + * Return: 0 for success, -EINVAL for invalid bits, -EIO on register + * access failure, -EOPNOTSUPP if not supported. + */ +int cppc_set_perf_limited(int cpu, u64 bits_to_clear) +{ + u64 current_val, new_val; + int ret; + + /* Only bits 0 and 1 are valid */ + if (bits_to_clear & ~CPPC_PERF_LIMITED_MASK) + return -EINVAL; + + if (!bits_to_clear) + return 0; + + ret = cppc_get_perf_limited(cpu, ¤t_val); + if (ret) + return ret; + + /* Clear the specified bits */ + new_val = current_val & ~bits_to_clear; + + return cppc_set_reg_val(cpu, PERF_LIMITED, new_val); +} +EXPORT_SYMBOL_GPL(cppc_set_perf_limited); + /** * cppc_get_transition_latency - returns frequency transition latency in ns * @cpu_num: CPU number for per_cpu(). diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 8a8cf76828ee2..94d489a4c90d1 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -985,16 +985,21 @@ store_energy_performance_preference_val(struct cpufreq_policy *policy, return count; } +CPPC_CPUFREQ_ATTR_RW_U64(perf_limited, cppc_get_perf_limited, + cppc_set_perf_limited) + cpufreq_freq_attr_ro(freqdomain_cpus); cpufreq_freq_attr_rw(auto_select); cpufreq_freq_attr_rw(auto_act_window); cpufreq_freq_attr_rw(energy_performance_preference_val); +cpufreq_freq_attr_rw(perf_limited); static struct freq_attr *cppc_cpufreq_attr[] = { &freqdomain_cpus, &auto_select, &auto_act_window, &energy_performance_preference_val, + &perf_limited, NULL, }; diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 3fc796c0d9022..f7afa20b8ad9d 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -42,6 +42,11 @@ #define CPPC_EPP_PERFORMANCE_PREF 0x00 #define CPPC_EPP_ENERGY_EFFICIENCY_PREF 0xFF +#define CPPC_PERF_LIMITED_DESIRED_EXCURSION BIT(0) +#define CPPC_PERF_LIMITED_MINIMUM_EXCURSION BIT(1) +#define CPPC_PERF_LIMITED_MASK (CPPC_PERF_LIMITED_DESIRED_EXCURSION | \ + CPPC_PERF_LIMITED_MINIMUM_EXCURSION) + /* Each register has the folowing format. */ struct cpc_reg { u8 descriptor; @@ -174,6 +179,8 @@ extern int cppc_get_auto_act_window(int cpu, u64 *auto_act_window); extern int cppc_set_auto_act_window(int cpu, u64 auto_act_window); extern int cppc_get_auto_sel(int cpu, bool *enable); extern int cppc_set_auto_sel(int cpu, bool enable); +extern int cppc_get_perf_limited(int cpu, u64 *perf_limited); +extern int cppc_set_perf_limited(int cpu, u64 bits_to_clear); extern int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf); extern int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator); extern int amd_detect_prefcore(bool *detected); @@ -270,6 +277,14 @@ static inline int cppc_set_auto_sel(int cpu, bool enable) { return -EOPNOTSUPP; } +static inline int cppc_get_perf_limited(int cpu, u64 *perf_limited) +{ + return -EOPNOTSUPP; +} +static inline int cppc_set_perf_limited(int cpu, u64 bits_to_clear) +{ + return -EOPNOTSUPP; +} static inline int amd_get_highest_perf(unsigned int cpu, u32 *highest_perf) { return -ENODEV; From 41cee2357e8388314ed80ddf0aba7dc1a695a183 Mon Sep 17 00:00:00 2001 From: Sumit Gupta Date: Fri, 6 Feb 2026 19:56:58 +0530 Subject: [PATCH 07/19] cpufreq: CPPC: Add sysfs documentation for perf_limited Add ABI documentation for the Performance Limited Register sysfs interface in the cppc_cpufreq driver. Signed-off-by: Sumit Gupta Reviewed-by: Randy Dunlap Reviewed-by: Pierre Gondois Reviewed-by: Lifeng Zheng Link: https://patch.msgid.link/20260206142658.72583-8-sumitg@nvidia.com Signed-off-by: Rafael J. Wysocki (cherry picked from commit 856250ba2e810e772dc95b3234ebf0d6393a51d9) Signed-off-by: Jamie Nguyen --- .../ABI/testing/sysfs-devices-system-cpu | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 3a05604c21bf8..82d10d556cc89 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -327,6 +327,24 @@ Description: Energy performance preference This file is only present if the cppc-cpufreq driver is in use. +What: /sys/devices/system/cpu/cpuX/cpufreq/perf_limited +Date: February 2026 +Contact: linux-pm@vger.kernel.org +Description: Performance Limited + + Read to check if platform throttling (thermal/power/current + limits) caused delivered performance to fall below the + requested level. A non-zero value indicates throttling occurred. + + Write the bitmask of bits to clear: + + - 0x1 = clear bit 0 (desired performance excursion) + - 0x2 = clear bit 1 (minimum performance excursion) + - 0x3 = clear both bits + + The platform sets these bits; OSPM can only clear them. + + This file is only present if the cppc-cpufreq driver is in use. What: /sys/devices/system/cpu/cpu*/cache/index3/cache_disable_{0,1} Date: August 2008 From f3d79077ae63a1235d726dc357ed4923e586bb09 Mon Sep 17 00:00:00 2001 From: Pengjie Zhang Date: Fri, 13 Feb 2026 18:09:35 +0800 Subject: [PATCH 08/19] ACPI: CPPC: Move reference performance to capabilities Currently, the `Reference Performance` register is read every time the CPU frequency is sampled in `cppc_get_perf_ctrs()`. This function is on the hot path of the cppc_cpufreq driver. Reference Performance indicates the performance level that corresponds to the Reference Counter incrementing and is not expected to change dynamically during runtime (unlike the Delivered and Reference counters). Reading this register in the hot path incurs unnecessary overhead, particularly on platforms where CPC registers are located in the PCC (Platform Communication Channel) subspace. This patch moves `reference_perf` from the dynamic feedback counters structure (`cppc_perf_fb_ctrs`) to the static capabilities structure (`cppc_perf_caps`). Signed-off-by: Pengjie Zhang [ rjw: Changelog adjustment ] Link: https://patch.msgid.link/20260213100935.19111-1-zhangpengjie2@huawei.com Signed-off-by: Rafael J. Wysocki (cherry picked from commit 8505bfb4e4eca28ef1b20d3369435ec2d6a125c6) Signed-off-by: Jamie Nguyen --- drivers/acpi/cppc_acpi.c | 55 +++++++++++++++------------------- drivers/cpufreq/cppc_cpufreq.c | 21 +++++++------ include/acpi/cppc_acpi.h | 2 +- 3 files changed, 37 insertions(+), 41 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 53a6ffd995a1a..07bbf5b366a42 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -177,12 +177,12 @@ __ATTR(_name, 0444, show_##_name, NULL) show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, highest_perf); show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, lowest_perf); show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, nominal_perf); +show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, reference_perf); show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, lowest_nonlinear_perf); show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, guaranteed_perf); show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, lowest_freq); show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, nominal_freq); -show_cppc_data(cppc_get_perf_ctrs, cppc_perf_fb_ctrs, reference_perf); show_cppc_data(cppc_get_perf_ctrs, cppc_perf_fb_ctrs, wraparound_time); /* Check for valid access_width, otherwise, fallback to using bit_width */ @@ -1352,9 +1352,10 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps) { struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum); struct cpc_register_resource *highest_reg, *lowest_reg, - *lowest_non_linear_reg, *nominal_reg, *guaranteed_reg, - *low_freq_reg = NULL, *nom_freq_reg = NULL; - u64 high, low, guaranteed, nom, min_nonlinear, low_f = 0, nom_f = 0; + *lowest_non_linear_reg, *nominal_reg, *reference_reg, + *guaranteed_reg, *low_freq_reg = NULL, *nom_freq_reg = NULL; + u64 high, low, guaranteed, nom, ref, min_nonlinear, + low_f = 0, nom_f = 0; int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum); struct cppc_pcc_data *pcc_ss_data = NULL; int ret = 0, regs_in_pcc = 0; @@ -1368,6 +1369,7 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps) lowest_reg = &cpc_desc->cpc_regs[LOWEST_PERF]; lowest_non_linear_reg = &cpc_desc->cpc_regs[LOW_NON_LINEAR_PERF]; nominal_reg = &cpc_desc->cpc_regs[NOMINAL_PERF]; + reference_reg = &cpc_desc->cpc_regs[REFERENCE_PERF]; low_freq_reg = &cpc_desc->cpc_regs[LOWEST_FREQ]; nom_freq_reg = &cpc_desc->cpc_regs[NOMINAL_FREQ]; guaranteed_reg = &cpc_desc->cpc_regs[GUARANTEED_PERF]; @@ -1375,6 +1377,7 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps) /* Are any of the regs PCC ?*/ if (CPC_IN_PCC(highest_reg) || CPC_IN_PCC(lowest_reg) || CPC_IN_PCC(lowest_non_linear_reg) || CPC_IN_PCC(nominal_reg) || + (CPC_SUPPORTED(reference_reg) && CPC_IN_PCC(reference_reg)) || CPC_IN_PCC(low_freq_reg) || CPC_IN_PCC(nom_freq_reg) || CPC_IN_PCC(guaranteed_reg)) { if (pcc_ss_id < 0) { @@ -1400,6 +1403,17 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps) cpc_read(cpunum, nominal_reg, &nom); perf_caps->nominal_perf = nom; + /* + * If reference perf register is not supported then we should + * use the nominal perf value + */ + if (CPC_SUPPORTED(reference_reg)) { + cpc_read(cpunum, reference_reg, &ref); + perf_caps->reference_perf = ref; + } else { + perf_caps->reference_perf = nom; + } + if (guaranteed_reg->type != ACPI_TYPE_BUFFER || IS_NULL_REG(&guaranteed_reg->cpc_entry.reg)) { perf_caps->guaranteed_perf = 0; @@ -1411,7 +1425,7 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps) cpc_read(cpunum, lowest_non_linear_reg, &min_nonlinear); perf_caps->lowest_nonlinear_perf = min_nonlinear; - if (!high || !low || !nom || !min_nonlinear) + if (!high || !low || !nom || !ref || !min_nonlinear) ret = -EFAULT; /* Read optional lowest and nominal frequencies if present */ @@ -1441,20 +1455,10 @@ EXPORT_SYMBOL_GPL(cppc_get_perf_caps); bool cppc_perf_ctrs_in_pcc_cpu(unsigned int cpu) { struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu); - struct cpc_register_resource *ref_perf_reg; - - /* - * If reference perf register is not supported then we should use the - * nominal perf value - */ - ref_perf_reg = &cpc_desc->cpc_regs[REFERENCE_PERF]; - if (!CPC_SUPPORTED(ref_perf_reg)) - ref_perf_reg = &cpc_desc->cpc_regs[NOMINAL_PERF]; return CPC_IN_PCC(&cpc_desc->cpc_regs[DELIVERED_CTR]) || CPC_IN_PCC(&cpc_desc->cpc_regs[REFERENCE_CTR]) || - CPC_IN_PCC(&cpc_desc->cpc_regs[CTR_WRAP_TIME]) || - CPC_IN_PCC(ref_perf_reg); + CPC_IN_PCC(&cpc_desc->cpc_regs[CTR_WRAP_TIME]); } EXPORT_SYMBOL_GPL(cppc_perf_ctrs_in_pcc_cpu); @@ -1491,10 +1495,10 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs) { struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum); struct cpc_register_resource *delivered_reg, *reference_reg, - *ref_perf_reg, *ctr_wrap_reg; + *ctr_wrap_reg; int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum); struct cppc_pcc_data *pcc_ss_data = NULL; - u64 delivered, reference, ref_perf, ctr_wrap_time; + u64 delivered, reference, ctr_wrap_time; int ret = 0, regs_in_pcc = 0; if (!cpc_desc) { @@ -1504,19 +1508,11 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs) delivered_reg = &cpc_desc->cpc_regs[DELIVERED_CTR]; reference_reg = &cpc_desc->cpc_regs[REFERENCE_CTR]; - ref_perf_reg = &cpc_desc->cpc_regs[REFERENCE_PERF]; ctr_wrap_reg = &cpc_desc->cpc_regs[CTR_WRAP_TIME]; - /* - * If reference perf register is not supported then we should - * use the nominal perf value - */ - if (!CPC_SUPPORTED(ref_perf_reg)) - ref_perf_reg = &cpc_desc->cpc_regs[NOMINAL_PERF]; - /* Are any of the regs PCC ?*/ if (CPC_IN_PCC(delivered_reg) || CPC_IN_PCC(reference_reg) || - CPC_IN_PCC(ctr_wrap_reg) || CPC_IN_PCC(ref_perf_reg)) { + CPC_IN_PCC(ctr_wrap_reg)) { if (pcc_ss_id < 0) { pr_debug("Invalid pcc_ss_id\n"); return -ENODEV; @@ -1533,8 +1529,6 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs) cpc_read(cpunum, delivered_reg, &delivered); cpc_read(cpunum, reference_reg, &reference); - cpc_read(cpunum, ref_perf_reg, &ref_perf); - /* * Per spec, if ctr_wrap_time optional register is unsupported, then the * performance counters are assumed to never wrap during the lifetime of @@ -1544,14 +1538,13 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs) if (CPC_SUPPORTED(ctr_wrap_reg)) cpc_read(cpunum, ctr_wrap_reg, &ctr_wrap_time); - if (!delivered || !reference || !ref_perf) { + if (!delivered || !reference) { ret = -EFAULT; goto out_err; } perf_fb_ctrs->delivered = delivered; perf_fb_ctrs->reference = reference; - perf_fb_ctrs->reference_perf = ref_perf; perf_fb_ctrs->wraparound_time = ctr_wrap_time; out_err: if (regs_in_pcc) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 94d489a4c90d1..5dfb109cf1f4e 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -50,7 +50,8 @@ struct cppc_freq_invariance { static DEFINE_PER_CPU(struct cppc_freq_invariance, cppc_freq_inv); static struct kthread_worker *kworker_fie; -static int cppc_perf_from_fbctrs(struct cppc_perf_fb_ctrs *fb_ctrs_t0, +static int cppc_perf_from_fbctrs(u64 reference_perf, + struct cppc_perf_fb_ctrs *fb_ctrs_t0, struct cppc_perf_fb_ctrs *fb_ctrs_t1); /** @@ -70,7 +71,7 @@ static void __cppc_scale_freq_tick(struct cppc_freq_invariance *cppc_fi) struct cppc_perf_fb_ctrs fb_ctrs = {0}; struct cppc_cpudata *cpu_data; unsigned long local_freq_scale; - u64 perf; + u64 perf, ref_perf; cpu_data = cppc_fi->cpu_data; @@ -79,7 +80,9 @@ static void __cppc_scale_freq_tick(struct cppc_freq_invariance *cppc_fi) return; } - perf = cppc_perf_from_fbctrs(&cppc_fi->prev_perf_fb_ctrs, &fb_ctrs); + ref_perf = cpu_data->perf_caps.reference_perf; + perf = cppc_perf_from_fbctrs(ref_perf, + &cppc_fi->prev_perf_fb_ctrs, &fb_ctrs); if (!perf) return; @@ -747,13 +750,11 @@ static inline u64 get_delta(u64 t1, u64 t0) return (u32)t1 - (u32)t0; } -static int cppc_perf_from_fbctrs(struct cppc_perf_fb_ctrs *fb_ctrs_t0, +static int cppc_perf_from_fbctrs(u64 reference_perf, + struct cppc_perf_fb_ctrs *fb_ctrs_t0, struct cppc_perf_fb_ctrs *fb_ctrs_t1) { u64 delta_reference, delta_delivered; - u64 reference_perf; - - reference_perf = fb_ctrs_t0->reference_perf; delta_reference = get_delta(fb_ctrs_t1->reference, fb_ctrs_t0->reference); @@ -790,7 +791,7 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) struct cpufreq_policy *policy __free(put_cpufreq_policy) = cpufreq_cpu_get(cpu); struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0}; struct cppc_cpudata *cpu_data; - u64 delivered_perf; + u64 delivered_perf, reference_perf; int ret; if (!policy) @@ -807,7 +808,9 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu) return 0; } - delivered_perf = cppc_perf_from_fbctrs(&fb_ctrs_t0, &fb_ctrs_t1); + reference_perf = cpu_data->perf_caps.reference_perf; + delivered_perf = cppc_perf_from_fbctrs(reference_perf, + &fb_ctrs_t0, &fb_ctrs_t1); if (!delivered_perf) goto out_invalid_counters; diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index f7afa20b8ad9d..d8e405becdc31 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -121,6 +121,7 @@ struct cppc_perf_caps { u32 guaranteed_perf; u32 highest_perf; u32 nominal_perf; + u32 reference_perf; u32 lowest_perf; u32 lowest_nonlinear_perf; u32 lowest_freq; @@ -138,7 +139,6 @@ struct cppc_perf_ctrls { struct cppc_perf_fb_ctrs { u64 reference; u64 delivered; - u64 reference_perf; u64 wraparound_time; }; From 7de3943df546b72937ba1e7f35c618f56d17e823 Mon Sep 17 00:00:00 2001 From: Pengjie Zhang Date: Wed, 11 Mar 2026 15:13:34 +0800 Subject: [PATCH 09/19] ACPI: CPPC: Fix uninitialized ref variable in cppc_get_perf_caps() Commit 8505bfb4e4ec ("ACPI: CPPC: Move reference performance to capabilities") introduced a logical error when retrieving the reference performance. On platforms lacking the reference performance register, the fallback logic leaves the local 'ref' variable uninitialized (0). This causes the subsequent sanity check to incorrectly return -EFAULT, breaking amd_pstate initialization. Fix this by assigning 'ref = nom' in the fallback path. Fixes: 8505bfb4e4ec ("ACPI: CPPC: Move reference performance to capabilities") Reported-by: Nathan Chancellor Closes: https://lore.kernel.org/all/20260310003026.GA2639793@ax162/ Tested-by: Nathan Chancellor Signed-off-by: Pengjie Zhang [ rjw: Subject tweak ] Link: https://patch.msgid.link/20260311071334.1494960-1-zhangpengjie2@huawei.com Signed-off-by: Rafael J. Wysocki (cherry picked from commit be473f0591f183990a998edee02161b319047eaa) Signed-off-by: Jamie Nguyen --- drivers/acpi/cppc_acpi.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 07bbf5b366a42..5ad922eb937a9 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1407,12 +1407,11 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps) * If reference perf register is not supported then we should * use the nominal perf value */ - if (CPC_SUPPORTED(reference_reg)) { + if (CPC_SUPPORTED(reference_reg)) cpc_read(cpunum, reference_reg, &ref); - perf_caps->reference_perf = ref; - } else { - perf_caps->reference_perf = nom; - } + else + ref = nom; + perf_caps->reference_perf = ref; if (guaranteed_reg->type != ACPI_TYPE_BUFFER || IS_NULL_REG(&guaranteed_reg->cpc_entry.reg)) { From c3a5e79d21f28ebc1596e3f39d67fcb2c16af946 Mon Sep 17 00:00:00 2001 From: Sumit Gupta Date: Wed, 18 Mar 2026 15:20:05 +0530 Subject: [PATCH 10/19] ACPI: CPPC: Check cpc_read() return values consistently Callers of cpc_read() ignore its return value, which can lead to using uninitialized or stale values when the read fails. Fix this by consistently checking cpc_read() return values in cppc_get_perf_caps(), cppc_get_perf_ctrs(), and cppc_get_perf(). Link: https://lore.kernel.org/lkml/48bdf87e-39f1-402f-a7dc-1a0e1e7a819d@nvidia.com/ Suggested-by: Rafael J. Wysocki Signed-off-by: Sumit Gupta Link: https://patch.msgid.link/20260318095005.2437960-1-sumitg@nvidia.com Signed-off-by: Rafael J. Wysocki (cherry picked from commit 0cc24977224a6c7d470860265a4990109f0a32ee) Signed-off-by: Jamie Nguyen --- drivers/acpi/cppc_acpi.c | 99 +++++++++++++++++++++++++++++----------- 1 file changed, 72 insertions(+), 27 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 5ad922eb937a9..053fc6765a592 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1394,45 +1394,66 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps) } } - cpc_read(cpunum, highest_reg, &high); + ret = cpc_read(cpunum, highest_reg, &high); + if (ret) + goto out_err; perf_caps->highest_perf = high; - cpc_read(cpunum, lowest_reg, &low); + ret = cpc_read(cpunum, lowest_reg, &low); + if (ret) + goto out_err; perf_caps->lowest_perf = low; - cpc_read(cpunum, nominal_reg, &nom); + ret = cpc_read(cpunum, nominal_reg, &nom); + if (ret) + goto out_err; perf_caps->nominal_perf = nom; /* * If reference perf register is not supported then we should * use the nominal perf value */ - if (CPC_SUPPORTED(reference_reg)) - cpc_read(cpunum, reference_reg, &ref); - else + if (CPC_SUPPORTED(reference_reg)) { + ret = cpc_read(cpunum, reference_reg, &ref); + if (ret) + goto out_err; + } else { ref = nom; + } perf_caps->reference_perf = ref; if (guaranteed_reg->type != ACPI_TYPE_BUFFER || IS_NULL_REG(&guaranteed_reg->cpc_entry.reg)) { perf_caps->guaranteed_perf = 0; } else { - cpc_read(cpunum, guaranteed_reg, &guaranteed); + ret = cpc_read(cpunum, guaranteed_reg, &guaranteed); + if (ret) + goto out_err; perf_caps->guaranteed_perf = guaranteed; } - cpc_read(cpunum, lowest_non_linear_reg, &min_nonlinear); + ret = cpc_read(cpunum, lowest_non_linear_reg, &min_nonlinear); + if (ret) + goto out_err; perf_caps->lowest_nonlinear_perf = min_nonlinear; - if (!high || !low || !nom || !ref || !min_nonlinear) + if (!high || !low || !nom || !ref || !min_nonlinear) { ret = -EFAULT; + goto out_err; + } /* Read optional lowest and nominal frequencies if present */ - if (CPC_SUPPORTED(low_freq_reg)) - cpc_read(cpunum, low_freq_reg, &low_f); + if (CPC_SUPPORTED(low_freq_reg)) { + ret = cpc_read(cpunum, low_freq_reg, &low_f); + if (ret) + goto out_err; + } - if (CPC_SUPPORTED(nom_freq_reg)) - cpc_read(cpunum, nom_freq_reg, &nom_f); + if (CPC_SUPPORTED(nom_freq_reg)) { + ret = cpc_read(cpunum, nom_freq_reg, &nom_f); + if (ret) + goto out_err; + } perf_caps->lowest_freq = low_f; perf_caps->nominal_freq = nom_f; @@ -1526,16 +1547,25 @@ int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs) } } - cpc_read(cpunum, delivered_reg, &delivered); - cpc_read(cpunum, reference_reg, &reference); + ret = cpc_read(cpunum, delivered_reg, &delivered); + if (ret) + goto out_err; + + ret = cpc_read(cpunum, reference_reg, &reference); + if (ret) + goto out_err; + /* * Per spec, if ctr_wrap_time optional register is unsupported, then the * performance counters are assumed to never wrap during the lifetime of * platform */ ctr_wrap_time = (u64)(~((u64)0)); - if (CPC_SUPPORTED(ctr_wrap_reg)) - cpc_read(cpunum, ctr_wrap_reg, &ctr_wrap_time); + if (CPC_SUPPORTED(ctr_wrap_reg)) { + ret = cpc_read(cpunum, ctr_wrap_reg, &ctr_wrap_time); + if (ret) + goto out_err; + } if (!delivered || !reference) { ret = -EFAULT; @@ -1811,24 +1841,39 @@ int cppc_get_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls) } /* Read optional elements if present */ - if (CPC_SUPPORTED(max_perf_reg)) - cpc_read(cpu, max_perf_reg, &max); + if (CPC_SUPPORTED(max_perf_reg)) { + ret = cpc_read(cpu, max_perf_reg, &max); + if (ret) + goto out_err; + } perf_ctrls->max_perf = max; - if (CPC_SUPPORTED(min_perf_reg)) - cpc_read(cpu, min_perf_reg, &min); + if (CPC_SUPPORTED(min_perf_reg)) { + ret = cpc_read(cpu, min_perf_reg, &min); + if (ret) + goto out_err; + } perf_ctrls->min_perf = min; - if (CPC_SUPPORTED(desired_perf_reg)) - cpc_read(cpu, desired_perf_reg, &desired_perf); + if (CPC_SUPPORTED(desired_perf_reg)) { + ret = cpc_read(cpu, desired_perf_reg, &desired_perf); + if (ret) + goto out_err; + } perf_ctrls->desired_perf = desired_perf; - if (CPC_SUPPORTED(energy_perf_reg)) - cpc_read(cpu, energy_perf_reg, &energy_perf); + if (CPC_SUPPORTED(energy_perf_reg)) { + ret = cpc_read(cpu, energy_perf_reg, &energy_perf); + if (ret) + goto out_err; + } perf_ctrls->energy_perf = energy_perf; - if (CPC_SUPPORTED(auto_sel_reg)) - cpc_read(cpu, auto_sel_reg, &auto_sel); + if (CPC_SUPPORTED(auto_sel_reg)) { + ret = cpc_read(cpu, auto_sel_reg, &auto_sel); + if (ret) + goto out_err; + } perf_ctrls->auto_sel = (bool)auto_sel; out_err: From 452912d59131950fb2c65fa19e4dff457b783c03 Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Thu, 26 Mar 2026 21:44:00 +0100 Subject: [PATCH 11/19] cpufreq: Remove max_freq_req update for pre-existing policy policy->max_freq_req QoS constraint represents the maximal allowed frequency than can be requested. It is set by: - writing to policyX/scaling_max sysfs file - toggling the cpufreq/boost sysfs file Upon calling freq_qos_update_request(), a successful update of the max_freq_req value triggers cpufreq_notifier_max(), followed by cpufreq_set_policy() which update the requested frequency for the policy. If the new max_freq_req value is not different from the original value, no frequency update is triggered. In a specific sequence of toggling: - cpufreq/boost sysfs file - CPU hot-plugging a CPU could end up with boost enabled but running at the maximal non-boost frequency, cpufreq_notifier_max() not being triggered. The following fixed that: commit 1608f0230510 ("cpufreq: Fix re-boost issue after hotplugging a CPU") The following: commit dd016f379ebc ("cpufreq: Introduce a more generic way to set default per-policy boost flag") also fixed the issue by correctly setting the max_freq_req constraint of a policy that is re-activated. This makes the first fix unnecessary. As the original issue is fixed by another method, this patch reverts: commit 1608f0230510 ("cpufreq: Fix re-boost issue after hotplugging a CPU") Reviewed-by: Lifeng Zheng Signed-off-by: Pierre Gondois Acked-by: Viresh Kumar Link: https://patch.msgid.link/20260326204404.1401849-2-pierre.gondois@arm.com Signed-off-by: Rafael J. Wysocki (cherry picked from commit 04aa9d0726cc6a23b348498815a9722b42d27c91) Signed-off-by: Jamie Nguyen --- drivers/cpufreq/cpufreq.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 1f794524a1d92..fbc04bff06e35 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1484,10 +1484,6 @@ static int cpufreq_policy_online(struct cpufreq_policy *policy, blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_CREATE_POLICY, policy); - } else { - ret = freq_qos_update_request(policy->max_freq_req, policy->max); - if (ret < 0) - goto out_destroy_policy; } if (cpufreq_driver->get && has_target()) { From 03d677cdc228c5d1028d49802de86265e5617e4c Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Thu, 26 Mar 2026 21:44:01 +0100 Subject: [PATCH 12/19] cpufreq: Add boost_freq_req QoS request The Power Management Quality of Service (PM QoS) allows to aggregate constraints from multiple entities. It is currently used to manage the min/max frequency of a given policy. Frequency constraints can come for instance from: - Thermal framework: acpi_thermal_cpufreq_init() - Firmware: _PPC objects: acpi_processor_ppc_init() - User: by setting policyX/scaling_[min|max]_freq The minimum of the max frequency constraints is used to compute the resulting maximum allowed frequency. When enabling boost frequencies, the same frequency request object (policy->max_freq_req) as to handle requests from users is used. As a result, when setting: - scaling_max_freq - boost The last sysfs file used overwrites the request from the other sysfs file. To avoid this, create a per-policy boost_freq_req to save the boost constraints instead of overwriting the last scaling_max_freq constraint. policy_set_boost() calls the cpufreq set_boost callback. Update the newly added boost_freq_req request from there: - whenever boost is toggled - to cover all possible paths In the existing .set_boost() callbacks: - Don't update policy->max as this is done through the qos notifier cpufreq_notifier_max() which calls cpufreq_set_policy(). - Remove freq_qos_update_request() calls as the qos request is now done in policy_set_boost() and updates the new boost_freq_req $ ## Init state scaling_max_freq:1000000 cpuinfo_max_freq:1000000 $ echo 700000 > scaling_max_freq scaling_max_freq:700000 cpuinfo_max_freq:1000000 $ echo 1 > ../boost scaling_max_freq:1200000 cpuinfo_max_freq:1200000 $ echo 800000 > scaling_max_freq scaling_max_freq:800000 cpuinfo_max_freq:1200000 $ ## Final step: $ ## Without the patches: $ echo 0 > ../boost scaling_max_freq:1000000 cpuinfo_max_freq:1000000 $ ## With the patches: $ echo 0 > ../boost scaling_max_freq:800000 cpuinfo_max_freq:1000000 Note: cpufreq_frequency_table_cpuinfo() updates policy->min and max from: A. cpufreq_boost_set_sw() \-cpufreq_frequency_table_cpuinfo() B. cpufreq_policy_online() \-cpufreq_table_validate_and_sort() \-cpufreq_frequency_table_cpuinfo() Keep these updates as some drivers expect policy->min and max to be set through B. Reviewed-by: Lifeng Zheng Signed-off-by: Pierre Gondois Acked-by: Viresh Kumar Link: https://patch.msgid.link/20260326204404.1401849-3-pierre.gondois@arm.com Signed-off-by: Rafael J. Wysocki (cherry picked from commit 6e39ba4e5a82aa5469b2ac517b74a71accb0540f) Signed-off-by: Jamie Nguyen --- drivers/cpufreq/amd-pstate.c | 2 -- drivers/cpufreq/cppc_cpufreq.c | 10 ++------ drivers/cpufreq/cpufreq.c | 46 +++++++++++++++++++++++----------- include/linux/cpufreq.h | 1 + 4 files changed, 34 insertions(+), 25 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 5aa9fcd80cf51..d0675d6a19fe1 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -769,8 +769,6 @@ static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on) else if (policy->cpuinfo.max_freq > nominal_freq) policy->cpuinfo.max_freq = nominal_freq; - policy->max = policy->cpuinfo.max_freq; - if (cppc_state == AMD_PSTATE_PASSIVE) { ret = freq_qos_update_request(&cpudata->req[1], policy->cpuinfo.max_freq); if (ret < 0) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 5dfb109cf1f4e..7e7f9dfb7a24c 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -834,17 +834,11 @@ static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state) { struct cppc_cpudata *cpu_data = policy->driver_data; struct cppc_perf_caps *caps = &cpu_data->perf_caps; - int ret; if (state) - policy->max = cppc_perf_to_khz(caps, caps->highest_perf); + policy->cpuinfo.max_freq = cppc_perf_to_khz(caps, caps->highest_perf); else - policy->max = cppc_perf_to_khz(caps, caps->nominal_perf); - policy->cpuinfo.max_freq = policy->max; - - ret = freq_qos_update_request(policy->max_freq_req, policy->max); - if (ret < 0) - return ret; + policy->cpuinfo.max_freq = cppc_perf_to_khz(caps, caps->nominal_perf); return 0; } diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index fbc04bff06e35..0e4440bfff54e 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -609,10 +609,19 @@ static int policy_set_boost(struct cpufreq_policy *policy, bool enable) policy->boost_enabled = enable; ret = cpufreq_driver->set_boost(policy, enable); - if (ret) + if (ret) { policy->boost_enabled = !policy->boost_enabled; + return ret; + } - return ret; + ret = freq_qos_update_request(policy->boost_freq_req, policy->cpuinfo.max_freq); + if (ret < 0) { + policy->boost_enabled = !policy->boost_enabled; + cpufreq_driver->set_boost(policy, policy->boost_enabled); + return ret; + } + + return 0; } static ssize_t store_local_boost(struct cpufreq_policy *policy, @@ -1377,6 +1386,7 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy) } freq_qos_remove_request(policy->min_freq_req); + freq_qos_remove_request(policy->boost_freq_req); kfree(policy->min_freq_req); cpufreq_policy_put_kobj(policy); @@ -1442,26 +1452,38 @@ static int cpufreq_policy_online(struct cpufreq_policy *policy, cpumask_and(policy->cpus, policy->cpus, cpu_online_mask); if (new_policy) { + unsigned int count; + for_each_cpu(j, policy->related_cpus) { per_cpu(cpufreq_cpu_data, j) = policy; add_cpu_dev_symlink(policy, j, get_cpu_device(j)); } - policy->min_freq_req = kzalloc(2 * sizeof(*policy->min_freq_req), + count = policy->boost_supported ? 3 : 2; + policy->min_freq_req = kzalloc(count * sizeof(*policy->min_freq_req), GFP_KERNEL); if (!policy->min_freq_req) { ret = -ENOMEM; goto out_destroy_policy; } + if (policy->boost_supported) { + policy->boost_freq_req = policy->min_freq_req + 2; + + ret = freq_qos_add_request(&policy->constraints, + policy->boost_freq_req, + FREQ_QOS_MAX, + policy->cpuinfo.max_freq); + if (ret < 0) { + policy->boost_freq_req = NULL; + goto out_destroy_policy; + } + } + ret = freq_qos_add_request(&policy->constraints, policy->min_freq_req, FREQ_QOS_MIN, FREQ_QOS_MIN_DEFAULT_VALUE); if (ret < 0) { - /* - * So we don't call freq_qos_remove_request() for an - * uninitialized request. - */ kfree(policy->min_freq_req); policy->min_freq_req = NULL; goto out_destroy_policy; @@ -2785,16 +2807,10 @@ int cpufreq_boost_set_sw(struct cpufreq_policy *policy, int state) return -ENXIO; ret = cpufreq_frequency_table_cpuinfo(policy); - if (ret) { + if (ret) pr_err("%s: Policy frequency update failed\n", __func__); - return ret; - } - - ret = freq_qos_update_request(policy->max_freq_req, policy->max); - if (ret < 0) - return ret; - return 0; + return ret; } EXPORT_SYMBOL_GPL(cpufreq_boost_set_sw); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index cc894fc389710..89157e367eefa 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -81,6 +81,7 @@ struct cpufreq_policy { struct freq_constraints constraints; struct freq_qos_request *min_freq_req; struct freq_qos_request *max_freq_req; + struct freq_qos_request *boost_freq_req; struct cpufreq_frequency_table *freq_table; enum cpufreq_table_sorting freq_table_sorted; From d59875653a6ede62c0268c5401c6165353269492 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 31 Mar 2026 10:33:46 +0530 Subject: [PATCH 13/19] cpufreq: Allocate QoS freq_req objects with policy A recent change exposed a bug in the error path: if freq_qos_add_request(boost_freq_req) fails, min_freq_req may remain a valid pointer even though it was never successfully added. During policy teardown, this leads to an unconditional call to freq_qos_remove_request(), triggering a WARN. The current design allocates all three freq_req objects together, making the lifetime rules unclear and error handling fragile. Simplify this by allocating the QoS freq_req objects at policy allocation time. The policy itself is dynamically allocated, and two of the three requests are always needed anyway. This ensures consistent lifetime management and eliminates the inconsistent state in failure paths. Reported-by: Zhongqiu Han Fixes: 6e39ba4e5a82 ("cpufreq: Add boost_freq_req QoS request") Signed-off-by: Viresh Kumar Reviewed-by: Lifeng Zheng Tested-by: Pierre Gondois Reviewed-by: Zhongqiu Han Link: https://patch.msgid.link/a293f29d841b86c51f34699c6e717e01858d8ada.1774933424.git.viresh.kumar@linaro.org Signed-off-by: Rafael J. Wysocki (cherry picked from commit 9266b4da051a410d9e6c5c0b0ef0c877855aa1b8) Signed-off-by: Jamie Nguyen --- drivers/cpufreq/cpufreq.c | 53 +++++++++++---------------------------- include/linux/cpufreq.h | 6 ++--- 2 files changed, 17 insertions(+), 42 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 0e4440bfff54e..5a5cb04ee12da 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -614,7 +614,7 @@ static int policy_set_boost(struct cpufreq_policy *policy, bool enable) return ret; } - ret = freq_qos_update_request(policy->boost_freq_req, policy->cpuinfo.max_freq); + ret = freq_qos_update_request(&policy->boost_freq_req, policy->cpuinfo.max_freq); if (ret < 0) { policy->boost_enabled = !policy->boost_enabled; cpufreq_driver->set_boost(policy, policy->boost_enabled); @@ -769,7 +769,7 @@ static ssize_t store_##file_name \ if (ret) \ return ret; \ \ - ret = freq_qos_update_request(policy->object##_freq_req, val);\ + ret = freq_qos_update_request(&policy->object##_freq_req, val); \ return ret >= 0 ? count : ret; \ } @@ -1374,7 +1374,7 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy) /* Cancel any pending policy->update work before freeing the policy. */ cancel_work_sync(&policy->update); - if (policy->max_freq_req) { + if (freq_qos_request_active(&policy->max_freq_req)) { /* * Remove max_freq_req after sending CPUFREQ_REMOVE_POLICY * notification, since CPUFREQ_CREATE_POLICY notification was @@ -1382,12 +1382,13 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy) */ blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_REMOVE_POLICY, policy); - freq_qos_remove_request(policy->max_freq_req); + freq_qos_remove_request(&policy->max_freq_req); } - freq_qos_remove_request(policy->min_freq_req); - freq_qos_remove_request(policy->boost_freq_req); - kfree(policy->min_freq_req); + if (freq_qos_request_active(&policy->min_freq_req)) + freq_qos_remove_request(&policy->min_freq_req); + if (freq_qos_request_active(&policy->boost_freq_req)) + freq_qos_remove_request(&policy->boost_freq_req); cpufreq_policy_put_kobj(policy); free_cpumask_var(policy->real_cpus); @@ -1452,57 +1453,31 @@ static int cpufreq_policy_online(struct cpufreq_policy *policy, cpumask_and(policy->cpus, policy->cpus, cpu_online_mask); if (new_policy) { - unsigned int count; - for_each_cpu(j, policy->related_cpus) { per_cpu(cpufreq_cpu_data, j) = policy; add_cpu_dev_symlink(policy, j, get_cpu_device(j)); } - count = policy->boost_supported ? 3 : 2; - policy->min_freq_req = kzalloc(count * sizeof(*policy->min_freq_req), - GFP_KERNEL); - if (!policy->min_freq_req) { - ret = -ENOMEM; - goto out_destroy_policy; - } - if (policy->boost_supported) { - policy->boost_freq_req = policy->min_freq_req + 2; - ret = freq_qos_add_request(&policy->constraints, - policy->boost_freq_req, + &policy->boost_freq_req, FREQ_QOS_MAX, policy->cpuinfo.max_freq); - if (ret < 0) { - policy->boost_freq_req = NULL; + if (ret < 0) goto out_destroy_policy; - } } ret = freq_qos_add_request(&policy->constraints, - policy->min_freq_req, FREQ_QOS_MIN, + &policy->min_freq_req, FREQ_QOS_MIN, FREQ_QOS_MIN_DEFAULT_VALUE); - if (ret < 0) { - kfree(policy->min_freq_req); - policy->min_freq_req = NULL; + if (ret < 0) goto out_destroy_policy; - } - - /* - * This must be initialized right here to avoid calling - * freq_qos_remove_request() on uninitialized request in case - * of errors. - */ - policy->max_freq_req = policy->min_freq_req + 1; ret = freq_qos_add_request(&policy->constraints, - policy->max_freq_req, FREQ_QOS_MAX, + &policy->max_freq_req, FREQ_QOS_MAX, FREQ_QOS_MAX_DEFAULT_VALUE); - if (ret < 0) { - policy->max_freq_req = NULL; + if (ret < 0) goto out_destroy_policy; - } blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_CREATE_POLICY, policy); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 89157e367eefa..ae3f15fd4e6e2 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -79,9 +79,9 @@ struct cpufreq_policy { * called, but you're in IRQ context */ struct freq_constraints constraints; - struct freq_qos_request *min_freq_req; - struct freq_qos_request *max_freq_req; - struct freq_qos_request *boost_freq_req; + struct freq_qos_request min_freq_req; + struct freq_qos_request max_freq_req; + struct freq_qos_request boost_freq_req; struct cpufreq_frequency_table *freq_table; enum cpufreq_table_sorting freq_table_sorted; From 6c6e63d665ae6675ec84dcdc484aedd4e5d094b9 Mon Sep 17 00:00:00 2001 From: "Mario Limonciello (AMD)" Date: Thu, 26 Mar 2026 14:36:20 -0500 Subject: [PATCH 14/19] cpufreq/amd-pstate: Cache the max frequency in cpudata The value of maximum frequency is fixed and never changes. Doing calculations every time based off of perf is unnecessary. Reviewed-by: Gautham R. Shenoy Link: https://lore.kernel.org/r/20260326193620.649441-1-mario.limonciello@amd.com Signed-off-by: Mario Limonciello (AMD) (backported from commit 8cdc494013dfcd48f31eafe19b18fd67c224dd8a) [jamien: minor context-line drift in amd-pstate.c hunks from 3-way auto-merge; +/- content is byte-identical to upstream.] Signed-off-by: Jamie Nguyen --- drivers/cpufreq/amd-pstate.c | 27 +++++++++------------------ drivers/cpufreq/amd-pstate.h | 2 ++ 2 files changed, 11 insertions(+), 18 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index d0675d6a19fe1..dce694d13c4e9 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -757,15 +757,13 @@ static void amd_pstate_adjust_perf(unsigned int cpu, static int amd_pstate_cpu_boost_update(struct cpufreq_policy *policy, bool on) { struct amd_cpudata *cpudata = policy->driver_data; - union perf_cached perf = READ_ONCE(cpudata->perf); - u32 nominal_freq, max_freq; + u32 nominal_freq; int ret = 0; nominal_freq = READ_ONCE(cpudata->nominal_freq); - max_freq = perf_to_freq(perf, cpudata->nominal_freq, perf.highest_perf); if (on) - policy->cpuinfo.max_freq = max_freq; + policy->cpuinfo.max_freq = cpudata->max_freq; else if (policy->cpuinfo.max_freq > nominal_freq) policy->cpuinfo.max_freq = nominal_freq; @@ -950,13 +948,15 @@ static int amd_pstate_init_freq(struct amd_cpudata *cpudata) WRITE_ONCE(cpudata->nominal_freq, nominal_freq); + /* max_freq is calculated according to (nominal_freq * highest_perf)/nominal_perf */ max_freq = perf_to_freq(perf, nominal_freq, perf.highest_perf); + WRITE_ONCE(cpudata->max_freq, max_freq); + lowest_nonlinear_freq = perf_to_freq(perf, nominal_freq, perf.lowest_nonlinear_perf); WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq); /** * Below values need to be initialized correctly, otherwise driver will fail to load - * max_freq is calculated according to (nominal_freq * highest_perf)/nominal_perf * lowest_nonlinear_freq is a value between [min_freq, nominal_freq] * Check _CPC in ACPI table objects if any values are incorrect */ @@ -1019,9 +1019,7 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) policy->cpuinfo.min_freq = policy->min = perf_to_freq(perf, cpudata->nominal_freq, perf.lowest_perf); - policy->cpuinfo.max_freq = policy->max = perf_to_freq(perf, - cpudata->nominal_freq, - perf.highest_perf); + policy->cpuinfo.max_freq = policy->max = cpudata->max_freq; ret = amd_pstate_cppc_enable(policy); if (ret) @@ -1088,14 +1086,9 @@ static void amd_pstate_cpu_exit(struct cpufreq_policy *policy) static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy, char *buf) { - struct amd_cpudata *cpudata; - union perf_cached perf; - - cpudata = policy->driver_data; - perf = READ_ONCE(cpudata->perf); + struct amd_cpudata *cpudata = policy->driver_data; - return sysfs_emit(buf, "%u\n", - perf_to_freq(perf, cpudata->nominal_freq, perf.highest_perf)); + return sysfs_emit(buf, "%u\n", cpudata->max_freq); } static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy, @@ -1501,9 +1494,7 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) policy->cpuinfo.min_freq = policy->min = perf_to_freq(perf, cpudata->nominal_freq, perf.lowest_perf); - policy->cpuinfo.max_freq = policy->max = perf_to_freq(perf, - cpudata->nominal_freq, - perf.highest_perf); + policy->cpuinfo.max_freq = policy->max = cpudata->max_freq; policy->driver_data = cpudata; ret = amd_pstate_cppc_enable(policy); diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h index cb45fdca27a6c..0d26e56b7938b 100644 --- a/drivers/cpufreq/amd-pstate.h +++ b/drivers/cpufreq/amd-pstate.h @@ -68,6 +68,7 @@ struct amd_aperf_mperf { * @min_limit_freq: Cached value of policy->min (in khz) * @max_limit_freq: Cached value of policy->max (in khz) * @nominal_freq: the frequency (in khz) that mapped to nominal_perf + * @max_freq: in ideal conditions the maximum frequency (in khz) possible frequency * @lowest_nonlinear_freq: the frequency (in khz) that mapped to lowest_nonlinear_perf * @cur: Difference of Aperf/Mperf/tsc count between last and current sample * @prev: Last Aperf/Mperf/tsc count value read from register @@ -94,6 +95,7 @@ struct amd_cpudata { u32 min_limit_freq; u32 max_limit_freq; u32 nominal_freq; + u32 max_freq; u32 lowest_nonlinear_freq; struct amd_aperf_mperf cur; From 92cb64cabb90c090b73967e9a7f6aaf2624c5753 Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Mon, 11 May 2026 15:55:28 +0200 Subject: [PATCH 15/19] NVIDIA: SAUCE: cpufreq: Extract cpufreq_policy_init_qos() function Extract the QoS related logic from cpufreq_policy_online() to make the function shorter/simpler. The logic is placed in cpufreq_policy_init_qos() and is now executed right after the following calls: - cpufreq_driver->init() - cpufreq_table_validate_and_sort() This helps preparing following patches that will, in cpufreq_policy_init_qos(): - treat the policy->min/max values set by drivers as QoS requests. - set a default policy->min/max value to all policies. No functional change. Signed-off-by: Pierre Gondois (backported from https://lore.kernel.org/lkml/20260511135538.522653-1-pierre.gondois@arm.com/) Signed-off-by: Jamie Nguyen --- drivers/cpufreq/cpufreq.c | 53 +++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 5a5cb04ee12da..44eb68c80ea13 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1397,6 +1397,32 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy) kfree(policy); } +static int cpufreq_policy_init_qos(struct cpufreq_policy *policy) +{ + int ret; + + if (policy->boost_supported) { + ret = freq_qos_add_request(&policy->constraints, + &policy->boost_freq_req, + FREQ_QOS_MAX, + policy->cpuinfo.max_freq); + if (ret < 0) + return ret; + } + + ret = freq_qos_add_request(&policy->constraints, &policy->min_freq_req, + FREQ_QOS_MIN, FREQ_QOS_MIN_DEFAULT_VALUE); + if (ret < 0) + return ret; + + ret = freq_qos_add_request(&policy->constraints, &policy->max_freq_req, + FREQ_QOS_MAX, FREQ_QOS_MAX_DEFAULT_VALUE); + if (ret < 0) + return ret; + + return ret; +} + static int cpufreq_policy_online(struct cpufreq_policy *policy, unsigned int cpu, bool new_policy) { @@ -1442,6 +1468,12 @@ static int cpufreq_policy_online(struct cpufreq_policy *policy, if (ret) goto out_offline_policy; + if (new_policy) { + ret = cpufreq_policy_init_qos(policy); + if (ret < 0) + goto out_offline_policy; + } + /* related_cpus should at least include policy->cpus. */ cpumask_copy(policy->related_cpus, policy->cpus); } @@ -1458,27 +1490,6 @@ static int cpufreq_policy_online(struct cpufreq_policy *policy, add_cpu_dev_symlink(policy, j, get_cpu_device(j)); } - if (policy->boost_supported) { - ret = freq_qos_add_request(&policy->constraints, - &policy->boost_freq_req, - FREQ_QOS_MAX, - policy->cpuinfo.max_freq); - if (ret < 0) - goto out_destroy_policy; - } - - ret = freq_qos_add_request(&policy->constraints, - &policy->min_freq_req, FREQ_QOS_MIN, - FREQ_QOS_MIN_DEFAULT_VALUE); - if (ret < 0) - goto out_destroy_policy; - - ret = freq_qos_add_request(&policy->constraints, - &policy->max_freq_req, FREQ_QOS_MAX, - FREQ_QOS_MAX_DEFAULT_VALUE); - if (ret < 0) - goto out_destroy_policy; - blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_CREATE_POLICY, policy); } From 942396841ab6f455a5728b9c16785d2a68c0ac82 Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Mon, 11 May 2026 15:55:29 +0200 Subject: [PATCH 16/19] NVIDIA: SAUCE: cpufreq: Set default policy->min/max values for all drivers Some drivers set policy->min/max in their .init() callback. cpufreq_set_policy() will ultimately override them through: cpufreq_policy_online() \-cpufreq_init_policy() \-cpufreq_set_policy() \-/* Set policy->min/max */ Thus the policy min/max values provided are only temporary. There is an exception if CPUFREQ_NEED_INITIAL_FREQ_CHECK is set and: cpufreq_policy_online() \-__cpufreq_driver_target() \-cpufreq_driver->target() To prepare for a following patch that will remove all policy->min/max initialization in the driver .init() callback if the min/max value is equal to the cpuinfo.min/max_freq, set a default policy->min/max value for all drivers. Signed-off-by: Pierre Gondois (backported from https://lore.kernel.org/lkml/20260511135538.522653-1-pierre.gondois@arm.com/) Signed-off-by: Jamie Nguyen --- drivers/cpufreq/cpufreq.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 44eb68c80ea13..d193f6e446008 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1401,6 +1401,13 @@ static int cpufreq_policy_init_qos(struct cpufreq_policy *policy) { int ret; + /* + * If the driver didn't set policy->min/max, set them as + * they are used to clamp frequency requests. + */ + policy->min = policy->min ? policy->min : policy->cpuinfo.min_freq; + policy->max = policy->max ? policy->max : policy->cpuinfo.max_freq; + if (policy->boost_supported) { ret = freq_qos_add_request(&policy->constraints, &policy->boost_freq_req, From 0a1365e788f222b230b47963e0f29d8aa17a4b67 Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Mon, 11 May 2026 15:55:30 +0200 Subject: [PATCH 17/19] NVIDIA: SAUCE: cpufreq: Remove driver default policy->min/max init Prior to [1], drivers were setting policy->min/max and the value was used as a QoS constraint. After that change, the values were only temporarily used: cpufreq_set_policy() ultimately overriding them through: cpufreq_policy_online() \-cpufreq_init_policy() \-cpufreq_set_policy() \-/* Set policy->min/max */ This patch reinstate the initial behaviour. This will allow drivers to request min/max QoS frequencies if desired. For instance, the cppc driver advertises a lowest non-linear frequency, which should be used as a min QoS value. To avoid having drivers setting policy->min/max to default values which are considered as QoS values (i.e. the reason why [1] was introduced), remove the initialization of policy->min/max in .init() callbacks wherever the policy->min/max values are identical to the policy->cpuinfo.min/max_freq. Indeed, the previous patch ("cpufreq: Set default policy->min/max values for all drivers") makes this initialization redundant. The only drivers where these values are different are: - gx-suspmod.c (min) - cppc-cpufreq.c (min) - longrun.c [1] commit 521223d8b3ec ("cpufreq: Fix initialization of min and max frequency QoS requests") Signed-off-by: Pierre Gondois (backported from https://lore.kernel.org/lkml/20260511135538.522653-1-pierre.gondois@arm.com/) [jamien: 3-way auto-merge resolved context drift in amd-pstate.c and intel_pstate.c against this tree; +/- content is byte-identical to v2 3/4.] Signed-off-by: Jamie Nguyen --- drivers/cpufreq/amd-pstate.c | 14 ++++++-------- drivers/cpufreq/cppc_cpufreq.c | 5 ++--- drivers/cpufreq/cpufreq-nforce2.c | 4 ++-- drivers/cpufreq/freq_table.c | 7 +++---- drivers/cpufreq/gx-suspmod.c | 2 +- drivers/cpufreq/intel_pstate.c | 3 --- drivers/cpufreq/pcc-cpufreq.c | 10 ++++------ drivers/cpufreq/pxa3xx-cpufreq.c | 5 ++--- drivers/cpufreq/sh-cpufreq.c | 6 ++---- drivers/cpufreq/virtual-cpufreq.c | 5 +---- 10 files changed, 23 insertions(+), 38 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index dce694d13c4e9..c2d9d409867db 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1016,10 +1016,9 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) perf = READ_ONCE(cpudata->perf); - policy->cpuinfo.min_freq = policy->min = perf_to_freq(perf, - cpudata->nominal_freq, - perf.lowest_perf); - policy->cpuinfo.max_freq = policy->max = cpudata->max_freq; + policy->cpuinfo.min_freq = perf_to_freq(perf, cpudata->nominal_freq, + perf.lowest_perf); + policy->cpuinfo.max_freq = cpudata->max_freq; ret = amd_pstate_cppc_enable(policy); if (ret) @@ -1491,10 +1490,9 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) perf = READ_ONCE(cpudata->perf); - policy->cpuinfo.min_freq = policy->min = perf_to_freq(perf, - cpudata->nominal_freq, - perf.lowest_perf); - policy->cpuinfo.max_freq = policy->max = cpudata->max_freq; + policy->cpuinfo.min_freq = perf_to_freq(perf, cpudata->nominal_freq, + perf.lowest_perf); + policy->cpuinfo.max_freq = cpudata->max_freq; policy->driver_data = cpudata; ret = amd_pstate_cppc_enable(policy); diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 7e7f9dfb7a24c..5abac50df7508 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -660,8 +660,6 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) * Section 8.4.7.1.1.5 of ACPI 6.1 spec) */ policy->min = cppc_perf_to_khz(caps, caps->lowest_nonlinear_perf); - policy->max = cppc_perf_to_khz(caps, policy->boost_enabled ? - caps->highest_perf : caps->nominal_perf); /* * Set cpuinfo.min_freq to Lowest to make the full range of performance @@ -669,7 +667,8 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) * nonlinear perf */ policy->cpuinfo.min_freq = cppc_perf_to_khz(caps, caps->lowest_perf); - policy->cpuinfo.max_freq = policy->max; + policy->cpuinfo.max_freq = cppc_perf_to_khz(caps, policy->boost_enabled ? + caps->highest_perf : caps->nominal_perf); policy->transition_delay_us = cppc_cpufreq_get_transition_delay_us(cpu); policy->shared_type = cpu_data->shared_type; diff --git a/drivers/cpufreq/cpufreq-nforce2.c b/drivers/cpufreq/cpufreq-nforce2.c index fbbbe501cf2dc..831102522ad64 100644 --- a/drivers/cpufreq/cpufreq-nforce2.c +++ b/drivers/cpufreq/cpufreq-nforce2.c @@ -355,8 +355,8 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy) min_fsb = NFORCE2_MIN_FSB; /* cpuinfo and default policy values */ - policy->min = policy->cpuinfo.min_freq = min_fsb * fid * 100; - policy->max = policy->cpuinfo.max_freq = max_fsb * fid * 100; + policy->cpuinfo.min_freq = min_fsb * fid * 100; + policy->cpuinfo.max_freq = max_fsb * fid * 100; return 0; } diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index 5b364d8da4f92..ea994647abc88 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -49,16 +49,15 @@ int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy) max_freq = freq; } - policy->min = policy->cpuinfo.min_freq = min_freq; - policy->max = max_freq; + policy->cpuinfo.min_freq = min_freq; /* * If the driver has set its own cpuinfo.max_freq above max_freq, leave * it as is. */ if (policy->cpuinfo.max_freq < max_freq) - policy->max = policy->cpuinfo.max_freq = max_freq; + policy->cpuinfo.max_freq = max_freq; - if (policy->min == ~0) + if (min_freq == ~0) return -EINVAL; else return 0; diff --git a/drivers/cpufreq/gx-suspmod.c b/drivers/cpufreq/gx-suspmod.c index d269a4f26f98e..d40c9e0bbb740 100644 --- a/drivers/cpufreq/gx-suspmod.c +++ b/drivers/cpufreq/gx-suspmod.c @@ -421,7 +421,7 @@ static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy) policy->min = maxfreq / max_duration; else policy->min = maxfreq / POLICY_MIN_DIV; - policy->max = maxfreq; + policy->cpuinfo.min_freq = maxfreq / max_duration; policy->cpuinfo.max_freq = maxfreq; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 11c58af419006..ed3fd134fb8b7 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -3049,9 +3049,6 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy) policy->cpuinfo.max_freq = READ_ONCE(global.no_turbo) ? cpu->pstate.max_freq : cpu->pstate.turbo_freq; - policy->min = policy->cpuinfo.min_freq; - policy->max = policy->cpuinfo.max_freq; - intel_pstate_init_acpi_perf_limits(policy); policy->fast_switch_possible = true; diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c index ac2e90a65f0c4..0f185a13577f8 100644 --- a/drivers/cpufreq/pcc-cpufreq.c +++ b/drivers/cpufreq/pcc-cpufreq.c @@ -551,13 +551,11 @@ static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy) goto out; } - policy->max = policy->cpuinfo.max_freq = - ioread32(&pcch_hdr->nominal) * 1000; - policy->min = policy->cpuinfo.min_freq = - ioread32(&pcch_hdr->minimum_frequency) * 1000; + policy->cpuinfo.max_freq = ioread32(&pcch_hdr->nominal) * 1000; + policy->cpuinfo.min_freq = ioread32(&pcch_hdr->minimum_frequency) * 1000; - pr_debug("init: policy->max is %d, policy->min is %d\n", - policy->max, policy->min); + pr_debug("init: max_freq is %d, min_freq is %d\n", + policy->cpuinfo.max_freq, policy->cpuinfo.min_freq); out: return result; } diff --git a/drivers/cpufreq/pxa3xx-cpufreq.c b/drivers/cpufreq/pxa3xx-cpufreq.c index 50ff3b6a69000..06b27cbc59d6a 100644 --- a/drivers/cpufreq/pxa3xx-cpufreq.c +++ b/drivers/cpufreq/pxa3xx-cpufreq.c @@ -185,9 +185,8 @@ static int pxa3xx_cpufreq_init(struct cpufreq_policy *policy) int ret = -EINVAL; /* set default policy and cpuinfo */ - policy->min = policy->cpuinfo.min_freq = 104000; - policy->max = policy->cpuinfo.max_freq = - (cpu_is_pxa320()) ? 806000 : 624000; + policy->cpuinfo.min_freq = 104000; + policy->cpuinfo.max_freq = (cpu_is_pxa320()) ? 806000 : 624000; policy->cpuinfo.transition_latency = 1000; /* FIXME: 1 ms, assumed */ if (cpu_is_pxa300() || cpu_is_pxa310()) diff --git a/drivers/cpufreq/sh-cpufreq.c b/drivers/cpufreq/sh-cpufreq.c index 642ddb9ea217e..3c99d7009cbe2 100644 --- a/drivers/cpufreq/sh-cpufreq.c +++ b/drivers/cpufreq/sh-cpufreq.c @@ -124,10 +124,8 @@ static int sh_cpufreq_cpu_init(struct cpufreq_policy *policy) dev_notice(dev, "no frequency table found, falling back " "to rate rounding.\n"); - policy->min = policy->cpuinfo.min_freq = - (clk_round_rate(cpuclk, 1) + 500) / 1000; - policy->max = policy->cpuinfo.max_freq = - (clk_round_rate(cpuclk, ~0UL) + 500) / 1000; + policy->cpuinfo.min_freq = (clk_round_rate(cpuclk, 1) + 500) / 1000; + policy->cpuinfo.max_freq = (clk_round_rate(cpuclk, ~0UL) + 500) / 1000; } return 0; diff --git a/drivers/cpufreq/virtual-cpufreq.c b/drivers/cpufreq/virtual-cpufreq.c index 4159f31349b16..dc78b74409af4 100644 --- a/drivers/cpufreq/virtual-cpufreq.c +++ b/drivers/cpufreq/virtual-cpufreq.c @@ -164,10 +164,7 @@ static int virt_cpufreq_get_freq_info(struct cpufreq_policy *policy) policy->cpuinfo.min_freq = 1; policy->cpuinfo.max_freq = virt_cpufreq_get_perftbl_entry(policy->cpu, 0); - policy->min = policy->cpuinfo.min_freq; - policy->max = policy->cpuinfo.max_freq; - - policy->cur = policy->max; + policy->cur = policy->cpuinfo.max_freq; return 0; } From b28cacebff55c289343f5e897cd322b5a1b96cfe Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Mon, 11 May 2026 15:55:31 +0200 Subject: [PATCH 18/19] NVIDIA: SAUCE: cpufreq: Use policy->min/max init as QoS request Consider policy->min/max being set in the driver .init() callback as a QoS request. Impacted driver are: - gx-suspmod.c (min) - cppc-cpufreq.c (min) - longrun.c (min/max) Update the documentation accordingly. Signed-off-by: Pierre Gondois (backported from https://lore.kernel.org/lkml/20260511135538.522653-1-pierre.gondois@arm.com/) Signed-off-by: Jamie Nguyen --- Documentation/cpu-freq/cpu-drivers.rst | 10 ++++++++-- drivers/cpufreq/cpufreq.c | 12 ++++++++++-- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/Documentation/cpu-freq/cpu-drivers.rst b/Documentation/cpu-freq/cpu-drivers.rst index c5635ac3de547..ab4f3c0f3a89b 100644 --- a/Documentation/cpu-freq/cpu-drivers.rst +++ b/Documentation/cpu-freq/cpu-drivers.rst @@ -114,8 +114,14 @@ Then, the driver must fill in the following values: |policy->cur | The current operating frequency of | | | this CPU (if appropriate) | +-----------------------------------+--------------------------------------+ -|policy->min, | | -|policy->max, | | +|policy->min | If set by the driver in ->init(), | +| | used as initial minimum frequency | +| | QoS request. | ++-----------------------------------+--------------------------------------+ +|policy->max | If set by the driver in ->init(), | +| | used as initial maximum frequency | +| | QoS request. | ++-----------------------------------+--------------------------------------+ |policy->policy and, if necessary, | | |policy->governor | must contain the "default policy" for| | | this CPU. A few moments later, | diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index d193f6e446008..a2fba4d3e5070 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1399,8 +1399,16 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy) static int cpufreq_policy_init_qos(struct cpufreq_policy *policy) { + unsigned int min_freq, max_freq; int ret; + /* Use policy->min/max set by the driver as QoS requests. */ + min_freq = max(FREQ_QOS_MIN_DEFAULT_VALUE, policy->min); + if (policy->max) + max_freq = min(FREQ_QOS_MAX_DEFAULT_VALUE, policy->max); + else + max_freq = FREQ_QOS_MAX_DEFAULT_VALUE; + /* * If the driver didn't set policy->min/max, set them as * they are used to clamp frequency requests. @@ -1418,12 +1426,12 @@ static int cpufreq_policy_init_qos(struct cpufreq_policy *policy) } ret = freq_qos_add_request(&policy->constraints, &policy->min_freq_req, - FREQ_QOS_MIN, FREQ_QOS_MIN_DEFAULT_VALUE); + FREQ_QOS_MIN, min_freq); if (ret < 0) return ret; ret = freq_qos_add_request(&policy->constraints, &policy->max_freq_req, - FREQ_QOS_MAX, FREQ_QOS_MAX_DEFAULT_VALUE); + FREQ_QOS_MAX, max_freq); if (ret < 0) return ret; From 49b3b1277bcf7eec3cea5529db00956816586ff4 Mon Sep 17 00:00:00 2001 From: Sumit Gupta Date: Sat, 25 Apr 2026 01:48:14 +0530 Subject: [PATCH 19/19] NVIDIA: SAUCE: cpufreq: CPPC: add autonomous mode boot parameter support Add a kernel boot parameter 'cppc_cpufreq.auto_sel_mode' to enable CPPC autonomous performance selection on all CPUs at system startup. When autonomous mode is enabled, the hardware automatically adjusts CPU performance based on workload demands using Energy Performance Preference (EPP) hints. When auto_sel_mode=1: - Configure all CPUs for autonomous operation on first init - Set EPP to performance preference (0x0) - Use HW min/max_perf when available; otherwise initialize from caps - Clamp desired_perf to bounds before enabling autonomous mode - Hardware controls frequency instead of the OS governor The boot parameter is applied only during first policy initialization. Skip applying it on CPU hotplug to preserve runtime sysfs configuration. This patch depends on patch [2] ("cpufreq: Set policy->min and max as real QoS constraints") so that the policy->min/max set in cppc_cpufreq_cpu_init() are not overridden by cpufreq_set_policy() during init. Reviewed-by: Randy Dunlap Signed-off-by: Sumit Gupta (backported from https://lore.kernel.org/lkml/20260424201814.230071-1-sumitg@nvidia.com/) [jamien: hunk #2 (cppc_set_enable() insertion in cppc_cpufreq_cpu_init) rebased onto Pierre's v2 series, which replaced the local min/max vars with direct policy->min assignment; insertion point and code are unchanged.] Signed-off-by: Jamie Nguyen --- .../admin-guide/kernel-parameters.txt | 13 +++ drivers/cpufreq/cppc_cpufreq.c | 89 +++++++++++++++++-- 2 files changed, 97 insertions(+), 5 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index d3e7034bece52..f3d8bafeff2c8 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1052,6 +1052,19 @@ Kernel parameters policy to use. This governor must be registered in the kernel before the cpufreq driver probes. + cppc_cpufreq.auto_sel_mode= + [CPU_FREQ] Enable ACPI CPPC autonomous performance + selection. When enabled, hardware automatically adjusts + CPU frequency on all CPUs based on workload demands. + In Autonomous mode, Energy Performance Preference (EPP) + hints guide hardware toward performance (0x0) or energy + efficiency (0xff). + Requires ACPI CPPC autonomous selection register support. + Format: + Default: 0 (disabled) + 0: use cpufreq governors + 1: enable if supported by hardware + cpu_init_udelay=N [X86,EARLY] Delay for N microsec between assert and de-assert of APIC INIT to start processors. This delay occurs diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 5abac50df7508..be28d7e6cf63e 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -28,6 +28,9 @@ static struct cpufreq_driver cppc_cpufreq_driver; +/* Autonomous Selection boot parameter */ +static bool auto_sel_mode; + #ifdef CONFIG_ACPI_CPPC_CPUFREQ_FIE static enum { FIE_UNSET = -1, @@ -655,6 +658,14 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) caps = &cpu_data->perf_caps; policy->driver_data = cpu_data; + /* + * Enable CPPC for both OS-driven and autonomous modes. + * The Enable register is optional - some platforms may not support it + */ + ret = cppc_set_enable(cpu, true); + if (ret && ret != -EOPNOTSUPP) + pr_warn("Failed to enable CPPC for CPU%d (%d)\n", cpu, ret); + /* * Set min to lowest nonlinear perf to avoid any efficiency penalty (see * Section 8.4.7.1.1.5 of ACPI 6.1 spec) @@ -707,11 +718,71 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) policy->cur = cppc_perf_to_khz(caps, caps->highest_perf); cpu_data->perf_ctrls.desired_perf = caps->highest_perf; - ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls); - if (ret) { - pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n", - caps->highest_perf, cpu, ret); - goto out; + /* + * Enable autonomous mode on first init if boot param is set. + * Check last_governor to detect first init and skip if auto_sel + * is already enabled. + */ + if (auto_sel_mode && policy->last_governor[0] == '\0' && + !cpu_data->perf_ctrls.auto_sel) { + /* Init min/max_perf from caps if not already set by HW. */ + if (!cpu_data->perf_ctrls.min_perf) + cpu_data->perf_ctrls.min_perf = caps->lowest_nonlinear_perf; + if (!cpu_data->perf_ctrls.max_perf) + cpu_data->perf_ctrls.max_perf = policy->boost_enabled ? + caps->highest_perf : caps->nominal_perf; + + cpu_data->perf_ctrls.desired_perf = + clamp_t(u32, cpu_data->perf_ctrls.desired_perf, + cpu_data->perf_ctrls.min_perf, + cpu_data->perf_ctrls.max_perf); + + policy->cur = cppc_perf_to_khz(caps, + cpu_data->perf_ctrls.desired_perf); + + /* EPP is optional - some platforms may not support it */ + ret = cppc_set_epp(cpu, CPPC_EPP_PERFORMANCE_PREF); + if (ret && ret != -EOPNOTSUPP) + pr_warn("Failed to set EPP for CPU%d (%d)\n", cpu, ret); + else if (!ret) + cpu_data->perf_ctrls.energy_perf = CPPC_EPP_PERFORMANCE_PREF; + + /* Program min/max/desired into CPPC regs before enabling auto_sel. */ + ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls); + if (ret) { + pr_debug("Err setting perf for autonomous mode CPU:%d ret:%d\n", + cpu, ret); + goto out; + } + + ret = cppc_set_auto_sel(cpu, true); + if (ret && ret != -EOPNOTSUPP) { + pr_warn("Failed autonomous config for CPU%d (%d)\n", + cpu, ret); + goto out; + } + if (!ret) + cpu_data->perf_ctrls.auto_sel = true; + } + + if (cpu_data->perf_ctrls.auto_sel) { + /* Sync policy limits from HW when autonomous mode is active */ + policy->min = cppc_perf_to_khz(caps, + cpu_data->perf_ctrls.min_perf ?: + caps->lowest_nonlinear_perf); + policy->max = cppc_perf_to_khz(caps, + cpu_data->perf_ctrls.max_perf ?: + (policy->boost_enabled ? + caps->highest_perf : + caps->nominal_perf)); + } else { + /* Normal mode: governors control frequency */ + ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls); + if (ret) { + pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n", + caps->highest_perf, cpu, ret); + goto out; + } } cppc_cpufreq_cpu_fie_init(policy); @@ -1031,10 +1102,18 @@ static int __init cppc_cpufreq_init(void) static void __exit cppc_cpufreq_exit(void) { + unsigned int cpu; + + for_each_present_cpu(cpu) + cppc_set_auto_sel(cpu, false); + cpufreq_unregister_driver(&cppc_cpufreq_driver); cppc_freq_invariance_exit(); } +module_param(auto_sel_mode, bool, 0444); +MODULE_PARM_DESC(auto_sel_mode, "Enable CPPC autonomous performance selection at boot"); + module_exit(cppc_cpufreq_exit); MODULE_AUTHOR("Ashwin Chaugule"); MODULE_DESCRIPTION("CPUFreq driver based on the ACPI CPPC v5.0+ spec");