Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 88 additions & 8 deletions drivers/vfio/pci/nvgrace-gpu/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ struct nvgrace_gpu_pci_core_device {
/* GPU has just been reset */
bool reset_done;
int egm_node;
/* CXL Device DVSEC offset; 0 if not present (legacy GB path) */
int cxl_dvsec;
};

static bool egm_enabled;
Expand Down Expand Up @@ -246,7 +248,7 @@ static void nvgrace_gpu_close_device(struct vfio_device *core_vdev)
vfio_pci_core_close_device(core_vdev);
}

static int nvgrace_gpu_wait_device_ready(void __iomem *io)
static int nvgrace_gpu_wait_device_ready_legacy(void __iomem *io)
{
unsigned long timeout = jiffies + msecs_to_jiffies(POLL_TIMEOUT_MS);

Expand All @@ -260,6 +262,76 @@ static int nvgrace_gpu_wait_device_ready(void __iomem *io)
return -ETIME;
}

/*
* Decode the 3-bit Memory_Active_Timeout field from CXL DVSEC Range 1 Low
* (bits 15:13) into milliseconds. Encoding per CXL spec r4.0 sec 8.1.3.8.2:
* 000b = 1s, 001b = 4s, 010b = 16s, 011b = 64s, 100b = 256s,
* 101b-111b = reserved (clamped to 256s).
*/
static inline unsigned long cxl_mem_active_timeout_ms(u8 timeout)
{
return 1000UL << (2 * min_t(u8, timeout, 4));
}

/*
* Check if CXL DVSEC reports memory as valid and active.
*/
static inline bool cxl_dvsec_mem_is_active(u32 status)
{
return (status & PCI_DVSEC_CXL_MEM_INFO_VALID) &&
(status & PCI_DVSEC_CXL_MEM_ACTIVE);
}

static int nvgrace_gpu_wait_device_ready_cxl(struct nvgrace_gpu_pci_core_device *nvdev)
{
struct pci_dev *pdev = nvdev->core_device.pdev;
int cxl_dvsec = nvdev->cxl_dvsec;
unsigned long mem_info_valid_deadline;
unsigned long timeout;
u32 dvsec_memory_status;
u8 mem_active_timeout;

pci_read_config_dword(pdev, cxl_dvsec + PCI_DVSEC_CXL_RANGE_SIZE_LOW(0),
&dvsec_memory_status);

if (cxl_dvsec_mem_is_active(dvsec_memory_status))
return 0;

mem_active_timeout = FIELD_GET(PCI_DVSEC_CXL_MEM_ACTIVE_TIMEOUT,
dvsec_memory_status);

timeout = jiffies +
msecs_to_jiffies(cxl_mem_active_timeout_ms(mem_active_timeout));

mem_info_valid_deadline = jiffies + msecs_to_jiffies(POLL_QUANTUM_MS);

do {
pci_read_config_dword(pdev,
cxl_dvsec + PCI_DVSEC_CXL_RANGE_SIZE_LOW(0),
&dvsec_memory_status);

if (cxl_dvsec_mem_is_active(dvsec_memory_status))
return 0;

/* Bail early if MEM_INFO_VALID is not set within 1 second */
if (!(dvsec_memory_status & PCI_DVSEC_CXL_MEM_INFO_VALID) &&
time_after(jiffies, mem_info_valid_deadline))
return -ETIME;

msleep(POLL_QUANTUM_MS);
} while (!time_after(jiffies, timeout));

return -ETIME;
}

static inline int nvgrace_gpu_wait_device_ready(struct nvgrace_gpu_pci_core_device *nvdev,
void __iomem *io)
{
return nvdev->cxl_dvsec ?
nvgrace_gpu_wait_device_ready_cxl(nvdev) :
nvgrace_gpu_wait_device_ready_legacy(io);
}

/*
* If the GPU memory is accessed by the CPU while the GPU is not ready
* after reset, it can cause harmless corrected RAS events to be logged.
Expand All @@ -279,7 +351,7 @@ nvgrace_gpu_check_device_ready(struct nvgrace_gpu_pci_core_device *nvdev)
if (!__vfio_pci_memory_enabled(vdev))
return -EIO;

ret = nvgrace_gpu_wait_device_ready(vdev->barmap[0]);
ret = nvgrace_gpu_wait_device_ready(nvdev, vdev->barmap[0]);
if (ret)
return ret;

Expand Down Expand Up @@ -1157,11 +1229,16 @@ static bool nvgrace_gpu_has_mig_hw_bug(struct pci_dev *pdev)
* Ensure that the BAR0 region is enabled before accessing the
* registers.
*/
static int nvgrace_gpu_probe_check_device_ready(struct pci_dev *pdev)
static int nvgrace_gpu_probe_check_device_ready(struct nvgrace_gpu_pci_core_device *nvdev)
{
struct pci_dev *pdev = nvdev->core_device.pdev;
void __iomem *io;
int ret;

/* CXL path only reads PCI config space; no need to map BAR0. */
if (nvdev->cxl_dvsec)
return nvgrace_gpu_wait_device_ready_cxl(nvdev);

ret = pci_enable_device(pdev);
if (ret)
return ret;
Expand All @@ -1176,7 +1253,7 @@ static int nvgrace_gpu_probe_check_device_ready(struct pci_dev *pdev)
goto iomap_exit;
}

ret = nvgrace_gpu_wait_device_ready(io);
ret = nvgrace_gpu_wait_device_ready_legacy(io);

pci_iounmap(pdev, io);
iomap_exit:
Expand All @@ -1195,10 +1272,6 @@ static int nvgrace_gpu_probe(struct pci_dev *pdev,
u64 egmpxm;
int ret;

ret = nvgrace_gpu_probe_check_device_ready(pdev);
if (ret)
return ret;

ret = nvgrace_gpu_fetch_memory_property(pdev, &memphys, &memlength);
if (!ret) {
ops = &nvgrace_gpu_pci_ops;
Expand All @@ -1215,6 +1288,13 @@ static int nvgrace_gpu_probe(struct pci_dev *pdev,

dev_set_drvdata(&pdev->dev, &nvdev->core_device);

nvdev->cxl_dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
PCI_DVSEC_CXL_DEVICE);

ret = nvgrace_gpu_probe_check_device_ready(nvdev);
if (ret)
goto out_put_vdev;

if (ops == &nvgrace_gpu_pci_ops) {
nvdev->has_mig_hw_bug = nvgrace_gpu_has_mig_hw_bug(pdev);

Expand Down
1 change: 1 addition & 0 deletions include/uapi/linux/pci_regs.h
Original file line number Diff line number Diff line change
Expand Up @@ -1371,6 +1371,7 @@
#define PCI_DVSEC_CXL_RANGE_SIZE_LOW(i) (0x1C + (i * 0x10))
#define PCI_DVSEC_CXL_MEM_INFO_VALID _BITUL(0)
#define PCI_DVSEC_CXL_MEM_ACTIVE _BITUL(1)
#define PCI_DVSEC_CXL_MEM_ACTIVE_TIMEOUT __GENMASK(15, 13)
#define PCI_DVSEC_CXL_MEM_SIZE_LOW __GENMASK(31, 28)
#define PCI_DVSEC_CXL_RANGE_BASE_HIGH(i) (0x20 + (i * 0x10))
#define PCI_DVSEC_CXL_RANGE_BASE_LOW(i) (0x24 + (i * 0x10))
Expand Down
Loading