Skip to content

Commit 67e4b5a

Browse files
SriMNvidiakobak2026
authored andcommitted
NVIDIA: VR: SAUCE: cxl: Add multi-function sibling coordination for CXL reset
BugLink: https://bugs.launchpad.net/bugs/2143032 Add sibling PCI function save/disable/restore coordination for CXL reset. Before reset, all CXL.cachemem sibling functions are locked, saved, and disabled; after reset they are restored. The Non-CXL Function Map DVSEC and per-function DVSEC capability register are consulted to skip non-CXL and CXL.io-only functions. A global mutex serializes concurrent resets to prevent deadlocks between sibling functions. Signed-off-by: Srirangan Madhavan <smadhavan@nvidia.com> (cherry picked from https://lore.kernel.org/linux-cxl/20260306092322.148765-1-smadhavan@nvidia.com/) Signed-off-by: Jiandi An <jan@nvidia.com> Acked-by: Jamie Nguyen <jamien@nvidia.com> Acked-by: Nirmoy Das <nirmoyd@nvidia.com> Acked-by: Carol L Soto <csoto@nvidia.com> Acked-by: Matthew R. Ochs <mochs@nvidia.com> Signed-off-by: Brad Figg <bfigg@nvidia.com> (backported from commit 9a08c02 nv-kernels/24.04_linux-nvidia-6.17-next) [koba: Propagate sibling collection allocation failures after pci_walk_bus() so reset aborts instead of proceeding with a partial sibling list.] Signed-off-by: Koba Ko <kobak@nvidia.com>
1 parent 4b8d49a commit 67e4b5a

1 file changed

Lines changed: 156 additions & 0 deletions

File tree

drivers/cxl/core/pci.c

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
#include "core.h"
1717
#include "trace.h"
1818

19+
/* Initial sibling array capacity: covers max non-ARI functions per slot */
20+
#define CXL_RESET_SIBLINGS_INIT 8
21+
1922
/**
2023
* DOC: cxl core pci
2124
*
@@ -1047,3 +1050,156 @@ static int __maybe_unused cxl_reset_flush_cpu_caches(struct cxl_memdev *cxlmd)
10471050
device_for_each_child(&endpoint->dev, NULL, cxl_decoder_flush_cache);
10481051
return 0;
10491052
}
1053+
1054+
/*
1055+
* Serialize all CXL reset operations globally.
1056+
*/
1057+
static DEFINE_MUTEX(cxl_reset_mutex);
1058+
1059+
struct cxl_reset_context {
1060+
struct pci_dev *target;
1061+
struct pci_dev **pci_functions;
1062+
int pci_func_count;
1063+
int pci_func_cap;
1064+
};
1065+
1066+
/*
1067+
* Check if a sibling function is non-CXL using the Non-CXL Function Map
1068+
* DVSEC. Returns true if fn is listed as non-CXL, false otherwise (including
1069+
* on any read failure).
1070+
*/
1071+
static bool cxl_is_non_cxl_function(struct pci_dev *pdev,
1072+
u16 func_map_dvsec, int fn)
1073+
{
1074+
int reg, bit;
1075+
u32 map;
1076+
1077+
if (pci_ari_enabled(pdev->bus)) {
1078+
reg = fn / 32;
1079+
bit = fn % 32;
1080+
} else {
1081+
reg = 0;
1082+
bit = fn;
1083+
}
1084+
1085+
if (pci_read_config_dword(pdev,
1086+
func_map_dvsec + PCI_DVSEC_CXL_FUNCTION_MAP_REG + (reg * 4),
1087+
&map))
1088+
return false;
1089+
1090+
return map & BIT(bit);
1091+
}
1092+
1093+
struct cxl_reset_walk_ctx {
1094+
struct cxl_reset_context *ctx;
1095+
u16 func_map_dvsec;
1096+
int error;
1097+
bool ari;
1098+
};
1099+
1100+
static int cxl_reset_collect_sibling(struct pci_dev *func, void *data)
1101+
{
1102+
struct cxl_reset_walk_ctx *wctx = data;
1103+
struct cxl_reset_context *ctx = wctx->ctx;
1104+
struct pci_dev *pdev = ctx->target;
1105+
u16 dvsec, cap;
1106+
int fn;
1107+
1108+
if (func == pdev)
1109+
return 0;
1110+
1111+
if (!wctx->ari &&
1112+
PCI_SLOT(func->devfn) != PCI_SLOT(pdev->devfn))
1113+
return 0;
1114+
1115+
fn = wctx->ari ? func->devfn : PCI_FUNC(func->devfn);
1116+
if (wctx->func_map_dvsec &&
1117+
cxl_is_non_cxl_function(pdev, wctx->func_map_dvsec, fn))
1118+
return 0;
1119+
1120+
/* Only coordinate with siblings that have CXL.cachemem */
1121+
dvsec = pci_find_dvsec_capability(func, PCI_VENDOR_ID_CXL,
1122+
PCI_DVSEC_CXL_DEVICE);
1123+
if (!dvsec)
1124+
return 0;
1125+
if (pci_read_config_word(func, dvsec + PCI_DVSEC_CXL_CAP, &cap))
1126+
return 0;
1127+
if (!(cap & (PCI_DVSEC_CXL_CACHE_CAPABLE |
1128+
PCI_DVSEC_CXL_MEM_CAPABLE)))
1129+
return 0;
1130+
1131+
/* Grow sibling array; double capacity for ARI devices when running out of space */
1132+
if (ctx->pci_func_count >= ctx->pci_func_cap) {
1133+
struct pci_dev **new;
1134+
int new_cap = ctx->pci_func_cap ? ctx->pci_func_cap * 2
1135+
: CXL_RESET_SIBLINGS_INIT;
1136+
1137+
new = krealloc(ctx->pci_functions,
1138+
new_cap * sizeof(*new), GFP_KERNEL);
1139+
if (!new) {
1140+
wctx->error = -ENOMEM;
1141+
return 1;
1142+
}
1143+
ctx->pci_functions = new;
1144+
ctx->pci_func_cap = new_cap;
1145+
}
1146+
1147+
pci_dev_get(func);
1148+
ctx->pci_functions[ctx->pci_func_count++] = func;
1149+
return 0;
1150+
}
1151+
1152+
static void __maybe_unused cxl_pci_functions_reset_release(struct cxl_reset_context *ctx)
1153+
{
1154+
int i;
1155+
1156+
for (i = 0; i < ctx->pci_func_count; i++)
1157+
pci_dev_put(ctx->pci_functions[i]);
1158+
kfree(ctx->pci_functions);
1159+
ctx->pci_functions = NULL;
1160+
ctx->pci_func_count = 0;
1161+
ctx->pci_func_cap = 0;
1162+
}
1163+
1164+
static int __maybe_unused cxl_pci_functions_reset_prepare(struct cxl_reset_context *ctx)
1165+
{
1166+
struct pci_dev *pdev = ctx->target;
1167+
struct cxl_reset_walk_ctx wctx;
1168+
int i;
1169+
1170+
ctx->pci_func_count = 0;
1171+
ctx->pci_functions = NULL;
1172+
ctx->pci_func_cap = 0;
1173+
1174+
wctx.ctx = ctx;
1175+
wctx.ari = pci_ari_enabled(pdev->bus);
1176+
wctx.error = 0;
1177+
wctx.func_map_dvsec = pci_find_dvsec_capability(pdev,
1178+
PCI_VENDOR_ID_CXL, PCI_DVSEC_CXL_FUNCTION_MAP);
1179+
1180+
/* Collect CXL.cachemem siblings under pci_bus_sem */
1181+
pci_walk_bus(pdev->bus, cxl_reset_collect_sibling, &wctx);
1182+
if (wctx.error) {
1183+
cxl_pci_functions_reset_release(ctx);
1184+
return wctx.error;
1185+
}
1186+
1187+
/* Lock and save/disable siblings outside pci_bus_sem */
1188+
for (i = 0; i < ctx->pci_func_count; i++) {
1189+
pci_dev_lock(ctx->pci_functions[i]);
1190+
pci_dev_save_and_disable(ctx->pci_functions[i]);
1191+
}
1192+
1193+
return 0;
1194+
}
1195+
1196+
static void __maybe_unused cxl_pci_functions_reset_done(struct cxl_reset_context *ctx)
1197+
{
1198+
int i;
1199+
1200+
for (i = 0; i < ctx->pci_func_count; i++) {
1201+
pci_dev_restore(ctx->pci_functions[i]);
1202+
pci_dev_unlock(ctx->pci_functions[i]);
1203+
}
1204+
cxl_pci_functions_reset_release(ctx);
1205+
}

0 commit comments

Comments
 (0)