@@ -66,6 +66,8 @@ struct nvgrace_gpu_pci_core_device {
6666 /* GPU has just been reset */
6767 bool reset_done ;
6868 int egm_node ;
69+ /* CXL Device DVSEC offset; 0 if not present (legacy GB path) */
70+ int cxl_dvsec ;
6971};
7072
7173static bool egm_enabled ;
@@ -246,7 +248,7 @@ static void nvgrace_gpu_close_device(struct vfio_device *core_vdev)
246248 vfio_pci_core_close_device (core_vdev );
247249}
248250
249- static int nvgrace_gpu_wait_device_ready (void __iomem * io )
251+ static int nvgrace_gpu_wait_device_ready_legacy (void __iomem * io )
250252{
251253 unsigned long timeout = jiffies + msecs_to_jiffies (POLL_TIMEOUT_MS );
252254
@@ -260,6 +262,76 @@ static int nvgrace_gpu_wait_device_ready(void __iomem *io)
260262 return - ETIME ;
261263}
262264
265+ /*
266+ * Decode the 3-bit Memory_Active_Timeout field from CXL DVSEC Range 1 Low
267+ * (bits 15:13) into milliseconds. Encoding per CXL spec r4.0 sec 8.1.3.8.2:
268+ * 000b = 1s, 001b = 4s, 010b = 16s, 011b = 64s, 100b = 256s,
269+ * 101b-111b = reserved (clamped to 256s).
270+ */
271+ static inline unsigned long cxl_mem_active_timeout_ms (u8 timeout )
272+ {
273+ return 1000UL << (2 * min_t (u8 , timeout , 4 ));
274+ }
275+
276+ /*
277+ * Check if CXL DVSEC reports memory as valid and active.
278+ */
279+ static inline bool cxl_dvsec_mem_is_active (u32 status )
280+ {
281+ return (status & PCI_DVSEC_CXL_MEM_INFO_VALID ) &&
282+ (status & PCI_DVSEC_CXL_MEM_ACTIVE );
283+ }
284+
285+ static int nvgrace_gpu_wait_device_ready_cxl (struct nvgrace_gpu_pci_core_device * nvdev )
286+ {
287+ struct pci_dev * pdev = nvdev -> core_device .pdev ;
288+ int cxl_dvsec = nvdev -> cxl_dvsec ;
289+ unsigned long mem_info_valid_deadline ;
290+ unsigned long timeout ;
291+ u32 dvsec_memory_status ;
292+ u8 mem_active_timeout ;
293+
294+ pci_read_config_dword (pdev , cxl_dvsec + PCI_DVSEC_CXL_RANGE_SIZE_LOW (0 ),
295+ & dvsec_memory_status );
296+
297+ if (cxl_dvsec_mem_is_active (dvsec_memory_status ))
298+ return 0 ;
299+
300+ mem_active_timeout = FIELD_GET (PCI_DVSEC_CXL_MEM_ACTIVE_TIMEOUT ,
301+ dvsec_memory_status );
302+
303+ timeout = jiffies +
304+ msecs_to_jiffies (cxl_mem_active_timeout_ms (mem_active_timeout ));
305+
306+ mem_info_valid_deadline = jiffies + msecs_to_jiffies (POLL_QUANTUM_MS );
307+
308+ do {
309+ pci_read_config_dword (pdev ,
310+ cxl_dvsec + PCI_DVSEC_CXL_RANGE_SIZE_LOW (0 ),
311+ & dvsec_memory_status );
312+
313+ if (cxl_dvsec_mem_is_active (dvsec_memory_status ))
314+ return 0 ;
315+
316+ /* Bail early if MEM_INFO_VALID is not set within 1 second */
317+ if (!(dvsec_memory_status & PCI_DVSEC_CXL_MEM_INFO_VALID ) &&
318+ time_after (jiffies , mem_info_valid_deadline ))
319+ return - ETIME ;
320+
321+ msleep (POLL_QUANTUM_MS );
322+ } while (!time_after (jiffies , timeout ));
323+
324+ return - ETIME ;
325+ }
326+
327+ static inline int nvgrace_gpu_wait_device_ready (struct nvgrace_gpu_pci_core_device * nvdev ,
328+ void __iomem * io )
329+ {
330+ return nvdev -> cxl_dvsec ?
331+ nvgrace_gpu_wait_device_ready_cxl (nvdev ) :
332+ nvgrace_gpu_wait_device_ready_legacy (io );
333+ }
334+
263335/*
264336 * If the GPU memory is accessed by the CPU while the GPU is not ready
265337 * after reset, it can cause harmless corrected RAS events to be logged.
@@ -279,7 +351,7 @@ nvgrace_gpu_check_device_ready(struct nvgrace_gpu_pci_core_device *nvdev)
279351 if (!__vfio_pci_memory_enabled (vdev ))
280352 return - EIO ;
281353
282- ret = nvgrace_gpu_wait_device_ready (vdev -> barmap [0 ]);
354+ ret = nvgrace_gpu_wait_device_ready (nvdev , vdev -> barmap [0 ]);
283355 if (ret )
284356 return ret ;
285357
@@ -1157,11 +1229,16 @@ static bool nvgrace_gpu_has_mig_hw_bug(struct pci_dev *pdev)
11571229 * Ensure that the BAR0 region is enabled before accessing the
11581230 * registers.
11591231 */
1160- static int nvgrace_gpu_probe_check_device_ready (struct pci_dev * pdev )
1232+ static int nvgrace_gpu_probe_check_device_ready (struct nvgrace_gpu_pci_core_device * nvdev )
11611233{
1234+ struct pci_dev * pdev = nvdev -> core_device .pdev ;
11621235 void __iomem * io ;
11631236 int ret ;
11641237
1238+ /* CXL path only reads PCI config space; no need to map BAR0. */
1239+ if (nvdev -> cxl_dvsec )
1240+ return nvgrace_gpu_wait_device_ready_cxl (nvdev );
1241+
11651242 ret = pci_enable_device (pdev );
11661243 if (ret )
11671244 return ret ;
@@ -1176,7 +1253,7 @@ static int nvgrace_gpu_probe_check_device_ready(struct pci_dev *pdev)
11761253 goto iomap_exit ;
11771254 }
11781255
1179- ret = nvgrace_gpu_wait_device_ready (io );
1256+ ret = nvgrace_gpu_wait_device_ready_legacy (io );
11801257
11811258 pci_iounmap (pdev , io );
11821259iomap_exit :
@@ -1195,10 +1272,6 @@ static int nvgrace_gpu_probe(struct pci_dev *pdev,
11951272 u64 egmpxm ;
11961273 int ret ;
11971274
1198- ret = nvgrace_gpu_probe_check_device_ready (pdev );
1199- if (ret )
1200- return ret ;
1201-
12021275 ret = nvgrace_gpu_fetch_memory_property (pdev , & memphys , & memlength );
12031276 if (!ret ) {
12041277 ops = & nvgrace_gpu_pci_ops ;
@@ -1215,6 +1288,13 @@ static int nvgrace_gpu_probe(struct pci_dev *pdev,
12151288
12161289 dev_set_drvdata (& pdev -> dev , & nvdev -> core_device );
12171290
1291+ nvdev -> cxl_dvsec = pci_find_dvsec_capability (pdev , PCI_VENDOR_ID_CXL ,
1292+ PCI_DVSEC_CXL_DEVICE );
1293+
1294+ ret = nvgrace_gpu_probe_check_device_ready (nvdev );
1295+ if (ret )
1296+ goto out_put_vdev ;
1297+
12181298 if (ops == & nvgrace_gpu_pci_ops ) {
12191299 nvdev -> has_mig_hw_bug = nvgrace_gpu_has_mig_hw_bug (pdev );
12201300
0 commit comments