From 6bf2d09d97b7550549c1721e92d1438641d1d7b5 Mon Sep 17 00:00:00 2001 From: Suresh Thelkar Date: Mon, 20 Apr 2026 04:06:13 +0000 Subject: [PATCH 1/2] Upgrade irqbalance to 1.9.5 and add patches to fix ENOSPC handling --- ...PC-slot-aware-placement-and-fallback.patch | 592 ++++++++++++++++++ ...fine-IRQBALANCE_ARGS-as-empty-string.patch | 23 - SPECS/irqbalance/irqbalance.signatures.json | 2 +- SPECS/irqbalance/irqbalance.spec | 13 +- cgmanifest.json | 4 +- 5 files changed, 605 insertions(+), 29 deletions(-) create mode 100644 SPECS/irqbalance/0001-Backport-irqbalance-ENOSPC-slot-aware-placement-and-fallback.patch delete mode 100644 SPECS/irqbalance/0001-define-IRQBALANCE_ARGS-as-empty-string.patch diff --git a/SPECS/irqbalance/0001-Backport-irqbalance-ENOSPC-slot-aware-placement-and-fallback.patch b/SPECS/irqbalance/0001-Backport-irqbalance-ENOSPC-slot-aware-placement-and-fallback.patch new file mode 100644 index 00000000000..e43b3c6b03a --- /dev/null +++ b/SPECS/irqbalance/0001-Backport-irqbalance-ENOSPC-slot-aware-placement-and-fallback.patch @@ -0,0 +1,592 @@ +From c53bf1bd5180b71d821cb6cc99ca28cfff7a212a Mon Sep 17 00:00:00 2001 +From: Suresh Thelkar +Date: Tue, 28 Apr 2026 04:16:56 +0000 +Subject: [PATCH] irqbalance: improve ENOSPC handling, slot-aware placement, + and policy-compliant fallback + +Fix ENOSPC handling when writing IRQ affinity by introducing slot-aware +placement, a robust fallback mechanism, and multiple correctness and +policy fixes, while preserving the core load-balancing behavior. + +Enhancements: + +1. Slot-aware placement: + - Penalize CPUs with low slots_left and skip fully saturated CPUs + - Use slots_left as primary tie-breaker, with interrupt count as secondary + - Maintain original load-based behavior when slots_left is not constrained + - Restore g_list_length() tie-breaker to avoid bias toward lower-index CPUs + when slots_left remains at INT_MAX + +2. ENOSPC fallback mechanism: + - Implement immediate retry logic via try_fallback_cpu() + - Priority order: cache siblings -> NUMA/package siblings -> broader scope + - Up to 8 retry attempts using tried_cpus tracking + - Use migrate_irq_obj() to maintain topology consistency + +3. Policy-compliant fallback: + - Honor user-configured balance_level during fallback + - Restrict search scope to configured domain (cache/package/NUMA) + - Treat BALANCE_NONE and BALANCE_CORE as no-fallback cases + - Ensure domain-assigned IRQs follow their configured balancing scope + - Improve logging with balance_level context + +4. ENOSPC state correctness: + - Keep source CPU saturated after fallback to avoid repeated ENOSPC retries + - Prevent slots_left from rebounding incorrectly after migration + - Clamp slots_left at 0 instead of allowing arbitrary negative values + +5. Safety and robustness improvements: + - Add NULL checks in topology traversal paths + - Fix NUMA/domain traversal for child objects + - Use snprintf() instead of sprintf() for safe string handling + - Capture errno immediately after syscalls + +Behavioral guarantees: +- No change to core placement logic under normal conditions +- All new logic activates only when ENOSPC conditions are encountered +- No violation of user-defined balance policies during fallback + +Results: +- Significant reduction in ENOSPC errors with reliable fallback recovery +- Improved IRQ distribution and stability on constrained systems + +Upstream commits cherry-picked (irqbalance/irqbalance master): + a4e9b62 Fix ENOSPC handling with slot-aware placement and fallback (PR #360) + 4332ecb activate: honor balance_level policy for domain-assigned IRQs (PR #360) + 9f5a4bc placement: restore interrupt-count tie-breaker in find_best_object() (PR #361) + 52a5e95 activate: keep ENOSPC source CPU saturated after fallback migration (PR #361) + a242d32 activate: do not relocate BALANCE_NONE IRQs on ENOSPC (PR #361) + +Fixes: https://github.com/Irqbalance/irqbalance/issues/355 +Signed-off-by: Suresh Thelkar +--- + activate.c | 403 +++++++++++++++++++++++++++++++++++++++++++++++++++- + placement.c | 74 +++++++++- + 2 files changed, 467 insertions(+), 10 deletions(-) + +diff --git a/activate.c b/activate.c +index 59a2fce..9a0d72e 100644 +--- a/activate.c ++++ b/activate.c +@@ -34,6 +34,374 @@ + + #include "irqbalance.h" + ++/* ++ * Maximum number of fallback attempts per IRQ per cycle to prevent ++ * infinite loops when all CPUs are saturated. ++ * ++ * The value 8 is based on typical x86 CPU cache topology: ++ * - Modern CPUs have 2-8 physical cores sharing an L3 cache domain ++ * - With SMT/Hyperthreading, this becomes 4-16 logical CPUs per domain ++ * - 8 attempts is sufficient to exhaust a typical cache domain before ++ * escalating to NUMA or global scope fallback ++ * ++ * On systems with more than 8 cores: ++ * - If a CPU has slots_left <= 0, it is skipped (not counted as attempt) ++ * - The loop exits early via "no fallback CPU available" when all ++ * valid candidates are exhausted, before hitting this limit ++ * - Only worst-case gradual saturation (each CPU returns ENOSPC only ++ * when tried) would hit this limit, which is rare in practice ++ * ++ * Trade-off: Higher values could cover more CPUs but risk stalling ++ * IRQ activation for other interrupts. Deferred IRQs are retried ++ * on the next rebalance cycle (default 10 seconds). ++ */ ++#define MAX_FALLBACK_ATTEMPTS 8 ++ ++/* ++ * Minimum slots_left value to track saturation without unbounded negative drift. ++ * When a CPU is saturated, slots_left is set to this value to indicate ++ * "full, don't use" without going arbitrarily negative. ++ */ ++#define SLOTS_SATURATED 0 ++ ++/* ++ * Check if a CPU is a valid fallback candidate. ++ * Returns 1 if valid, 0 otherwise. ++ * ++ * FIX: Added NULL check for cpu parameter to prevent NULL dereference ++ * when GList element has NULL data. ++ */ ++static int is_valid_fallback(struct topo_obj *cpu, struct topo_obj *original, ++ cpumask_t *tried_cpus) ++{ ++ /* FIX: NULL dereference protection */ ++ if (!cpu) ++ return 0; ++ if (cpu == original || cpu->obj_type != OBJ_TYPE_CPU) ++ return 0; ++ /* FIX: Use <= 0 check for slots_left saturation */ ++ if (cpu->slots_left <= SLOTS_SATURATED) ++ return 0; ++ if (!cpus_intersects(cpu->mask, unbanned_cpus)) ++ return 0; ++ if (cpus_intersects(cpu->mask, *tried_cpus)) ++ return 0; /* Already tried this CPU */ ++ return 1; ++} ++ ++/* ++ * Recursively collect all CPU objects under a given topology object. ++ * This fixes the NUMA fallback issue where numa_node->children contains ++ * packages/caches, not CPUs directly. ++ * ++ * FIX: Traverse topology tree to find actual CPU objects within NUMA node. ++ */ ++static struct topo_obj *find_best_cpu_under_obj(struct topo_obj *obj, ++ struct topo_obj *original, ++ cpumask_t *tried_cpus) ++{ ++ struct topo_obj *best = NULL; ++ GList *iter; ++ ++ if (!obj) ++ return NULL; ++ ++ /* If this is a CPU, check if it's a valid fallback */ ++ if (obj->obj_type == OBJ_TYPE_CPU) { ++ if (is_valid_fallback(obj, original, tried_cpus)) ++ return obj; ++ return NULL; ++ } ++ ++ /* Otherwise, recursively search children */ ++ for (iter = obj->children; iter; iter = iter->next) { ++ struct topo_obj *child = iter->data; ++ struct topo_obj *candidate; ++ ++ /* FIX: NULL check for child */ ++ if (!child) ++ continue; ++ ++ candidate = find_best_cpu_under_obj(child, original, tried_cpus); ++ if (candidate) { ++ /* FIX: Select CPU with most slots_left */ ++ if (!best || candidate->slots_left > best->slots_left) ++ best = candidate; ++ } ++ } ++ return best; ++} ++ ++/* ++ * Convert balance level to string for logging. ++ */ ++static const char *balance_level_str(int level) ++{ ++ switch (level) { ++ case BALANCE_NONE: ++ return "none"; ++ case BALANCE_PACKAGE: ++ return "package"; ++ case BALANCE_CACHE: ++ return "cache"; ++ case BALANCE_CORE: ++ return "core"; ++ default: ++ return "unknown"; ++ } ++} ++ ++/* ++ * Try to find an alternative CPU when the primary target returns ENOSPC. ++ * Returns 0 on success (IRQ placed on alternative CPU), -1 on failure. ++ * ++ * Uses iteration with a tried_cpus bitmask to avoid retrying the same CPU. ++ * ++ * IMPORTANT: This function respects the IRQ's configured balance level. ++ * If the user configured an IRQ to balance at a specific level (e.g., cache), ++ * fallback will only search within that scope. If no CPU is available within ++ * the constrained scope, we warn the user and fail rather than violating the ++ * configured policy. ++ * ++ * Scope by balance level: ++ * - BALANCE_CORE: No fallback possible (single CPU, fail immediately) ++ * - BALANCE_CACHE: Only search within same cache domain ++ * - BALANCE_PACKAGE: Only search within same package ++ * - BALANCE_NONE: User opted out of balancing - decline fallback ++ * ++ * FIXES APPLIED: ++ * 1. NUMA fallback now traverses topology tree to find actual CPUs ++ * 2. NULL dereference protection throughout ++ * 3. Use snprintf() instead of sprintf() for path construction ++ * 4. Capture errno immediately after failing syscall ++ * 5. Consistent slots_left update logic (saturate at 0, not negative) ++ * 6. Guard slots_left decrement on success ++ * 7. Load-aware CPU selection (most slots_left first to avoid bias) ++ * 8. Respect user-configured balance level policy ++ */ ++static int try_fallback_cpu(struct irq_info *info, cpumask_t applied_mask, ++ int attempt __attribute__((unused))) ++{ ++ struct topo_obj *original = info->assigned_obj; ++ struct topo_obj *fallback = NULL; ++ struct topo_obj *search_scope = NULL; ++ cpumask_t tried_cpus; ++ char buf[PATH_MAX]; ++ FILE *file; ++ int ret; ++ int saved_errno; ++ int attempts = 0; ++ int balance_level = info->level; ++ ++ /* ++ * BALANCE_CORE means the IRQ is pinned to a specific CPU. ++ * No fallback is possible without violating the configured policy. ++ */ ++ if (balance_level == BALANCE_CORE) { ++ log(TO_ALL, LOG_WARNING, ++ "IRQ %d: cannot fallback - balance level is 'core' " ++ "(CPU %d saturated, no alternative within policy)\n", ++ info->irq, original->number); ++ return -1; ++ } ++ ++ /* ++ * BALANCE_NONE means the user opted this IRQ out of balancing. ++ * place_irq_in_node() only relocates such an IRQ when banned CPUs ++ * forced it; we should not silently widen the scope further on ++ * ENOSPC. Treat it like BALANCE_CORE: warn and decline. ++ * This also avoids a NULL search_scope on non-NUMA systems where ++ * no OBJ_TYPE_NODE ancestor exists in the topology. ++ */ ++ if (balance_level == BALANCE_NONE) { ++ log(TO_ALL, LOG_WARNING, ++ "IRQ %d: cannot fallback - balance level is 'none' " ++ "(user policy forbids relocation)\n", ++ info->irq); ++ return -1; ++ } ++ ++ cpus_clear(tried_cpus); ++ ++ /* ++ * Determine the search scope based on assigned_obj type. ++ * ++ * For CPU-assigned IRQs: search within the appropriate parent domain ++ * based on balance_level (cache → package → NUMA). ++ * ++ * For domain-assigned IRQs: the assigned domain IS the search scope. ++ * This respects the user's balance_level policy - we only search ++ * within the domain irqbalance already chose for this IRQ. ++ */ ++ if (original->obj_type == OBJ_TYPE_CPU) { ++ /* CPU-assigned: find appropriate parent based on balance_level */ ++ switch (balance_level) { ++ case BALANCE_CACHE: ++ search_scope = original->parent; ++ while (search_scope && search_scope->obj_type != OBJ_TYPE_CACHE) ++ search_scope = search_scope->parent; ++ break; ++ case BALANCE_PACKAGE: ++ search_scope = original->parent; ++ while (search_scope && search_scope->obj_type != OBJ_TYPE_PACKAGE) ++ search_scope = search_scope->parent; ++ break; ++ default: ++ search_scope = NULL; ++ } ++ } else { ++ /* ++ * Domain-assigned IRQ: the assigned object IS the search scope. ++ * We search for individual CPUs within this domain. ++ */ ++ search_scope = original; ++ log(TO_ALL, LOG_DEBUG, ++ "IRQ %d: domain-assigned (obj_type=%d), searching within assigned scope\n", ++ info->irq, original->obj_type); ++ } ++ ++ if (!search_scope) { ++ log(TO_ALL, LOG_WARNING, ++ "IRQ %d: no valid search scope for fallback (balance_level=%s)\n", ++ info->irq, balance_level_str(balance_level)); ++ return -1; ++ } ++ ++ while (attempts < MAX_FALLBACK_ATTEMPTS) { ++ /* ++ * Search for fallback CPU within the determined scope. ++ * Use recursive traversal to find actual CPU objects. ++ * ++ * For CPU-assigned IRQs: 'original' is excluded from candidates. ++ * For domain-assigned IRQs: 'original' is not a CPU, so ++ * is_valid_fallback() will skip it automatically. ++ */ ++ fallback = find_best_cpu_under_obj(search_scope, original, &tried_cpus); ++ ++ if (!fallback) { ++ log(TO_ALL, LOG_WARNING, ++ "IRQ %d: no fallback CPU available within " ++ "scope (balance_level=%s, obj_type=%d)\n", ++ info->irq, balance_level_str(balance_level), ++ search_scope->obj_type); ++ return -1; ++ } ++ ++ /* Mark this CPU as tried */ ++ cpus_or(tried_cpus, tried_cpus, fallback->mask); ++ attempts++; ++ ++ if (original->obj_type == OBJ_TYPE_CPU) { ++ log(TO_ALL, LOG_DEBUG, ++ "IRQ %d: ENOSPC fallback from CPU %d to CPU %d " ++ "(attempt %d, slots_left=%d)\n", ++ info->irq, original->number, fallback->number, ++ attempts, fallback->slots_left); ++ } else { ++ log(TO_ALL, LOG_DEBUG, ++ "IRQ %d: ENOSPC fallback to CPU %d " ++ "(attempt %d, slots_left=%d, scope_type=%d)\n", ++ info->irq, fallback->number, ++ attempts, fallback->slots_left, original->obj_type); ++ } ++ ++ /* Update assignment and compute new mask */ ++ info->assigned_obj = fallback; ++ cpus_and(applied_mask, cpu_online_map, fallback->mask); ++ ++ /* FIX: Use snprintf() to prevent buffer overflow */ ++ ret = snprintf(buf, sizeof(buf), "/proc/irq/%i/smp_affinity", ++ info->irq); ++ if (ret < 0 || ret >= (int)sizeof(buf)) { ++ log(TO_ALL, LOG_WARNING, ++ "IRQ %d: path buffer overflow\n", info->irq); ++ info->assigned_obj = original; ++ continue; ++ } ++ ++ file = fopen(buf, "w"); ++ if (!file) { ++ /* FIX: Capture errno immediately */ ++ saved_errno = errno; ++ log(TO_ALL, LOG_DEBUG, ++ "IRQ %d: cannot open %s: %s\n", ++ info->irq, buf, strerror(saved_errno)); ++ info->assigned_obj = original; ++ continue; /* Try next CPU */ ++ } ++ ++ cpumask_scnprintf(buf, PATH_MAX, applied_mask); ++ ret = fprintf(file, "%s", buf); ++ /* FIX: Capture errno immediately after potential failure */ ++ saved_errno = errno; ++ if (ret >= 0) { ++ if (fflush(file)) { ++ ret = -1; ++ saved_errno = errno; ++ } ++ } ++ fclose(file); ++ ++ if (ret < 0) { ++ /* ++ * FIX: Consistent slots_left update logic. ++ * On ENOSPC, mark CPU as saturated (slots_left = 0) ++ * instead of arbitrary negative values. ++ */ ++ if (saved_errno == ENOSPC) { ++ fallback->slots_left = SLOTS_SATURATED; ++ log(TO_ALL, LOG_DEBUG, ++ "IRQ %d: fallback CPU %d saturated " ++ "(ENOSPC), slots_left set to %d\n", ++ info->irq, fallback->number, ++ SLOTS_SATURATED); ++ } ++ info->assigned_obj = original; ++ continue; /* Try next CPU */ ++ } ++ ++ /* ++ * Success! Properly migrate the IRQ from original to fallback. ++ * FIX: Use migrate_irq_obj() to: ++ * 1. Move IRQ from original->interrupts to fallback->interrupts ++ * 2. Update slots_left on both CPUs ++ * 3. Update load on fallback CPU ++ * 4. Set info->assigned_obj = fallback ++ * ++ */ ++ migrate_irq_obj(original, fallback, info); ++ /* ++ * migrate_irq_obj() unconditionally increments the source ++ * object's slots_left (0 -> 1), which would make the original ++ * CPU look eligible again. The kernel returned ENOSPC, so its ++ * vector table is still full; re-clamp to SATURATED so we ++ * don't immediately retry the same dead-end placement. ++ * ++ * Only re-clamp when the source is an actual CPU. For ++ * domain-assigned IRQs (cache/package/NUMA), the original ++ * is a domain object whose slots_left aggregates its CPUs ++ * and was never marked SLOTS_SATURATED by the ENOSPC handler ++ * (see activate_mapping(): the obj_type==CPU branch). ++ * Force-saturating the whole domain would penalize every CPU ++ * inside it on the next placement cycle. ++ */ ++ if (original->obj_type == OBJ_TYPE_CPU) ++ original->slots_left = SLOTS_SATURATED; ++ info->moved = 0; ++ log(TO_ALL, LOG_DEBUG, ++ "IRQ %d: successfully placed on fallback CPU %d " ++ "(slots_left now %d)\n", ++ info->irq, fallback->number, fallback->slots_left); ++ return 0; ++ } ++ ++ log(TO_ALL, LOG_WARNING, ++ "IRQ %d: max fallback attempts (%d) reached, " ++ "all CPUs saturated within policy scope\n", ++ info->irq, MAX_FALLBACK_ATTEMPTS); ++ return -1; ++} ++ + static int check_affinity(struct irq_info *info, cpumask_t applied_mask) + { + cpumask_t current_mask; +@@ -109,16 +477,37 @@ error: + /* Do not blacklist the IRQ on transient errors. */ + break; + case ENOSPC: /* Specified CPU APIC is full. */ +- if (info->assigned_obj->obj_type != OBJ_TYPE_CPU) +- break; ++ /* ++ * For CPU-assigned IRQs, mark the CPU as saturated. ++ * For domain-assigned IRQs (cache/package/NUMA), we cannot ++ * determine which specific CPU failed, so skip slots_left update. ++ */ ++ if (info->assigned_obj->obj_type == OBJ_TYPE_CPU) { ++ info->assigned_obj->slots_left = SLOTS_SATURATED; ++ log(TO_ALL, LOG_DEBUG, ++ "IRQ %d: CPU %d saturated (ENOSPC), slots_left set to %d\n", ++ info->irq, info->assigned_obj->number, SLOTS_SATURATED); ++ } else { ++ log(TO_ALL, LOG_DEBUG, ++ "IRQ %d: ENOSPC on domain-assigned IRQ (obj_type=%d)\n", ++ info->irq, info->assigned_obj->obj_type); ++ } ++ ++ /* ++ * Try fallback CPUs before giving up. This allows IRQs to ++ * be redistributed in the same cycle rather than waiting ++ * for the next rebalance iteration. ++ */ ++ if (try_fallback_cpu(info, applied_mask, 0) == 0) ++ break; /* Success - IRQ placed on alternative CPU */ + +- if (info->assigned_obj->slots_left > 0) +- info->assigned_obj->slots_left = -1; +- else +- /* Negative slots to count how many we need to free */ +- info->assigned_obj->slots_left--; ++ /* ++ * All fallback attempts failed. Mark for reconsideration ++ * in next cycle by resetting moved flag and forcing rebalance. ++ */ + + force_rebalance_irq(info, NULL); ++ info->moved = 0; /* Allow reconsideration in next cycle */ + break; + default: + /* Any other error is considered permanent. */ +diff --git a/placement.c b/placement.c +index 0fa4af1..799f9d6 100644 +--- a/placement.c ++++ b/placement.c +@@ -37,10 +37,41 @@ struct obj_placement { + struct irq_info *info; + }; + ++/* ++ * Threshold for slot-based penalty. CPUs with slots_left >= this value ++ * receive zero penalty (considered to have ample headroom). ++ * CPUs with fewer slots receive an increasing penalty to prefer ++ * CPUs with more capacity. ++ */ ++#define SLOTS_PENALTY_THRESHOLD 10 ++ ++/* ++ * Penalty multiplier per slot below threshold. ++ * ++ * The value 1000 is chosen because: ++ * - Typical IRQ load values range from thousands to millions ++ * - A penalty of 1000-9000 (for slots 9 down to 1) is significant ++ * enough to influence placement when loads are similar ++ * - But not so large as to override load-based decisions entirely ++ * for lightly loaded CPUs ++ * ++ * Example: CPU with load=5000 and slots=2 has adjusted_cost = 5000 + 8000 = 13000 ++ * CPU with load=8000 and slots=10 has adjusted_cost = 8000 + 0 = 8000 ++ * → Prefers the higher-load CPU with more headroom ++ * ++ * CPUs with slots_left >= SLOTS_PENALTY_THRESHOLD get zero penalty, ++ * so their placement is determined purely by load balancing. ++ */ ++#define SLOTS_PENALTY_FACTOR 1000 ++ + static void find_best_object(struct topo_obj *d, void *data) + { + struct obj_placement *best = (struct obj_placement *)data; + uint64_t newload; ++ uint64_t adjusted_cost; ++ uint64_t best_adjusted_cost; ++ uint64_t slots_penalty; ++ uint64_t best_slots_penalty; + + /* + * Don't consider the unspecified numa node here +@@ -63,12 +94,49 @@ static void find_best_object(struct topo_obj *d, void *data) + return; + + newload = d->load; +- if (newload < best->best_cost) { ++ ++ /* ++ * Factor in slots_left to prefer CPUs with more available capacity. ++ * When loads are similar, prefer CPUs with more headroom to reduce ++ * likelihood of ENOSPC. Using a penalty system: lower slots = higher ++ * effective cost. ++ * ++ * Penalty calculation: slots < SLOTS_PENALTY_THRESHOLD adds penalty ++ * Cast to uint64_t to ensure safe arithmetic with newload. ++ */ ++ if (d->slots_left < SLOTS_PENALTY_THRESHOLD) ++ slots_penalty = (uint64_t)(SLOTS_PENALTY_THRESHOLD - d->slots_left) * SLOTS_PENALTY_FACTOR; ++ else ++ slots_penalty = 0; ++ adjusted_cost = newload + slots_penalty; ++ ++ if (best->best) { ++ if (best->best->slots_left < SLOTS_PENALTY_THRESHOLD) ++ best_slots_penalty = (uint64_t)(SLOTS_PENALTY_THRESHOLD - best->best->slots_left) * SLOTS_PENALTY_FACTOR; ++ else ++ best_slots_penalty = 0; ++ best_adjusted_cost = best->best_cost + best_slots_penalty; ++ } else { ++ best_adjusted_cost = ULLONG_MAX; ++ } ++ ++ if (adjusted_cost < best_adjusted_cost) { + best->best = d; + best->best_cost = newload; +- } else if (newload == best->best_cost) { +- if (!best->best || g_list_length(d->interrupts) < g_list_length(best->best->interrupts)) { ++ } else if (adjusted_cost == best_adjusted_cost) { ++ /* ++ * Tie-breaker: first prefer the CPU with more slots_left ++ * (more headroom). During normal operation slots_left is ++ * INT_MAX for all CPUs (see clear_slots()), so fall back to ++ * the original interrupt-count comparison to keep IRQs ++ * spread across CPUs that currently hold fewer interrupts. ++ */ ++ if (!best->best || ++ d->slots_left > best->best->slots_left || ++ (d->slots_left == best->best->slots_left && ++ g_list_length(d->interrupts) < g_list_length(best->best->interrupts))) { + best->best = d; ++ best->best_cost = newload; + } + } + } +-- +2.43.0 + diff --git a/SPECS/irqbalance/0001-define-IRQBALANCE_ARGS-as-empty-string.patch b/SPECS/irqbalance/0001-define-IRQBALANCE_ARGS-as-empty-string.patch deleted file mode 100644 index 9fae18f95a5..00000000000 --- a/SPECS/irqbalance/0001-define-IRQBALANCE_ARGS-as-empty-string.patch +++ /dev/null @@ -1,23 +0,0 @@ -From 42115bda75d9c49156a2799bc178ea105daf5003 Mon Sep 17 00:00:00 2001 -From: Cameron Baird -Date: Wed, 10 Jul 2024 23:09:32 +0000 -Subject: [PATCH] define IRQBALANCE_ARGS as empty string to squelch systemd - warning - ---- - misc/irqbalance.env | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/misc/irqbalance.env b/misc/irqbalance.env -index 96acb39..84cb843 100644 ---- a/misc/irqbalance.env -+++ b/misc/irqbalance.env -@@ -41,4 +41,4 @@ - # Append any args here to the irqbalance daemon as documented in the man - # page. - # --#IRQBALANCE_ARGS= -+IRQBALANCE_ARGS="" --- -2.34.1 - diff --git a/SPECS/irqbalance/irqbalance.signatures.json b/SPECS/irqbalance/irqbalance.signatures.json index 340c3eb7053..387d75d7c44 100644 --- a/SPECS/irqbalance/irqbalance.signatures.json +++ b/SPECS/irqbalance/irqbalance.signatures.json @@ -1,5 +1,5 @@ { "Signatures": { - "irqbalance-1.9.3.tar.gz": "8d698799251ea8518f342f36be26f2f092df51189f6777db33116d40cf0dae6c" + "irqbalance-1.9.5.tar.gz": "c5fc3b1880136437d297afe9a7833781e7849939e104d0780888ffcafc37e339" } } diff --git a/SPECS/irqbalance/irqbalance.spec b/SPECS/irqbalance/irqbalance.spec index 0bf9283e4b4..589862d9e06 100644 --- a/SPECS/irqbalance/irqbalance.spec +++ b/SPECS/irqbalance/irqbalance.spec @@ -1,14 +1,14 @@ Summary: Irqbalance daemon Name: irqbalance -Version: 1.9.3 -Release: 2%{?dist} +Version: 1.9.5 +Release: 1%{?dist} License: GPLv2 URL: https://github.com/Irqbalance/irqbalance Group: System Environment/Services Vendor: Microsoft Corporation Distribution: Azure Linux Source0: https://github.com/Irqbalance/%{name}/archive/v%{version}.tar.gz#/%{name}-%{version}.tar.gz -Patch0: 0001-define-IRQBALANCE_ARGS-as-empty-string.patch +Patch0: 0001-Backport-irqbalance-ENOSPC-slot-aware-placement-and-fallback.patch BuildRequires: systemd-devel BuildRequires: glib-devel Requires: systemd @@ -33,6 +33,7 @@ make %{?_smp_mflags} %install make DESTDIR=%{buildroot} install +rm -rf %{buildroot}%{_prefix}/etc install -D -m 0644 misc/irqbalance.env %{buildroot}/etc/sysconfig/irqbalance sed -i 's#/path/to/irqbalance.env#/etc/sysconfig/irqbalance#' misc/irqbalance.service install -D -m 0644 misc/irqbalance.service %{buildroot}%{_prefix}/lib/systemd/system/irqbalance.service @@ -57,6 +58,12 @@ make -k check |& tee %{_specdir}/%{name}-check-log || %{nocheck} %{_datadir}/* %changelog +* Wed Apr 15 2026 Suresh Thelkar - 1.9.5-1 +- Upgrade to version 1.9.5 +- Remove IRQBALANCE_ARGS patch (now upstream) +- Add ENOSPC handling with slot-aware placement and fallback +- Honor balance_level policy for domain-assigned IRQs + * Mon Jul 01 2024 Cameron Baird - 1.9.3-2 - Define IRQBALANCE_ARGS variable in EnvironmentFile for irqbalance.service to squelch systemd warning. diff --git a/cgmanifest.json b/cgmanifest.json index 7c942bafc1e..fab927268c6 100644 --- a/cgmanifest.json +++ b/cgmanifest.json @@ -7621,8 +7621,8 @@ "type": "other", "other": { "name": "irqbalance", - "version": "1.9.3", - "downloadUrl": "https://github.com/Irqbalance/irqbalance/archive/v1.9.3.tar.gz" + "version": "1.9.5", + "downloadUrl": "https://github.com/Irqbalance/irqbalance/archive/v1.9.5.tar.gz" } } }, From b5b9c9f0db0fccece25344c9bf2eec3f6e7e7b16 Mon Sep 17 00:00:00 2001 From: Suresh Thelkar Date: Thu, 30 Apr 2026 04:33:31 +0000 Subject: [PATCH 2/2] irqbalance:Addressing review comments --- SPECS/irqbalance/irqbalance.spec | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/SPECS/irqbalance/irqbalance.spec b/SPECS/irqbalance/irqbalance.spec index 589862d9e06..3418467d373 100644 --- a/SPECS/irqbalance/irqbalance.spec +++ b/SPECS/irqbalance/irqbalance.spec @@ -24,8 +24,13 @@ interrupts across all of a systems cpus. %build sed -i 's/libsystemd-journal/libsystemd/' configure.ac ./autogen.sh +# Override pkgconfdir (defaults to $prefix/etc/default => /usr/etc/default) +# so configure substitutes @pkgconfdir@ in misc/irqbalance.service to a path +# under %{_sysconfdir}, and `make install` lays irqbalance.env there too. ./configure \ --prefix=%{_prefix} \ + --sysconfdir=%{_sysconfdir} \ + --with-pkgconfdir=%{_sysconfdir}/sysconfig \ --disable-static \ --with-systemd @@ -33,10 +38,19 @@ make %{?_smp_mflags} %install make DESTDIR=%{buildroot} install -rm -rf %{buildroot}%{_prefix}/etc -install -D -m 0644 misc/irqbalance.env %{buildroot}/etc/sysconfig/irqbalance -sed -i 's#/path/to/irqbalance.env#/etc/sysconfig/irqbalance#' misc/irqbalance.service -install -D -m 0644 misc/irqbalance.service %{buildroot}%{_prefix}/lib/systemd/system/irqbalance.service +# Upstream ships the env file as 'irqbalance.env'; rename to the historical +# Azure Linux path '/etc/sysconfig/irqbalance' and update the unit to match. +mv %{buildroot}%{_sysconfdir}/sysconfig/irqbalance.env \ + %{buildroot}%{_sysconfdir}/sysconfig/irqbalance +# Upstream's `make install` only installs the systemd unit when pkg-config +# exposes systemd's `systemdsystemunitdir`; in our build env that lookup +# can come back empty, leaving the unit uninstalled. Install it manually +# (configure has already substituted @pkgconfdir@ -> /etc/sysconfig in it) +# and fix up the EnvironmentFile path to match the renamed env file. +sed -i 's#/etc/sysconfig/irqbalance\.env#/etc/sysconfig/irqbalance#' \ + misc/irqbalance.service +install -D -m 0644 misc/irqbalance.service \ + %{buildroot}%{_prefix}/lib/systemd/system/irqbalance.service %check make -k check |& tee %{_specdir}/%{name}-check-log || %{nocheck} @@ -63,6 +77,9 @@ make -k check |& tee %{_specdir}/%{name}-check-log || %{nocheck} - Remove IRQBALANCE_ARGS patch (now upstream) - Add ENOSPC handling with slot-aware placement and fallback - Honor balance_level policy for domain-assigned IRQs +- Pass --with-pkgconfdir=%{_sysconfdir}/sysconfig so configure substitutes + @pkgconfdir@ in the unit to /etc/sysconfig (instead of /usr/etc/default), + and drop the obsolete /path/to/irqbalance.env sed which no longer matches. * Mon Jul 01 2024 Cameron Baird - 1.9.3-2 - Define IRQBALANCE_ARGS variable in EnvironmentFile for irqbalance.service