Skip to content

Commit 6c0a588

Browse files
ankita-nvnirmoy
authored andcommitted
NVIDIA: SAUCE: vfio/nvgrace-egm: list gpus through sysfs
To replicate the host EGM topology in the VM in terms of the GPU affinity, the userspace need to be aware of which GPUs belong to the same socket as the EGM region. Expose the list of GPUs associated with an EGM region through sysfs. The list can be queried from the location /sys/devices/virtual/egm/egmX/gpu_devices. Signed-off-by: Ankit Agrawal <ankita@nvidia.com> Ref: sj24: /home/nvidia/ankita/kernel_patches/0002_vfio_nvgrace-egm_list_gpus_through_sysfs.patch (koba: Enchance error handling for sysfs_create_group) Signed-off-by: Koba Ko <kobak@nvidia.com> Acked-by: Matthew R. Ochs <mochs@nvidia.com> Acked-by: Carol L. Soto <csoto@nvidia.com> Signed-off-by: Matthew R. Ochs <mochs@nvidia.com> (cherry picked from commit fec2356 https://github.com/NVIDIA/NV-Kernels/tree/24.04_linux-nvidia-adv-6.11-next) Signed-off-by: Nirmoy Das <nirmoyd@nvidia.com>
1 parent 2be8d45 commit 6c0a588

1 file changed

Lines changed: 40 additions & 1 deletion

File tree

  • drivers/vfio/pci/nvgrace-gpu

drivers/vfio/pci/nvgrace-gpu/egm.c

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,38 @@ static void nvgrace_egm_fetch_bad_pages(struct pci_dev *pdev,
357357
memunmap(memaddr);
358358
}
359359

360+
static ssize_t gpu_devices_show(struct device *dev, struct device_attribute *attr,
361+
char *buf)
362+
{
363+
struct egm_region *region =
364+
container_of(dev, struct egm_region, device);
365+
struct gpu_node *node, *temp_node;
366+
int len = 0;
367+
368+
list_for_each_entry_safe(node, temp_node, &region->gpus, list) {
369+
struct pci_dev *pdev = node->pdev;
370+
371+
len += sysfs_emit_at(buf, len, "%04x:%02x:%02x.%x\n",
372+
pci_domain_nr(pdev->bus),
373+
pdev->bus->number,
374+
PCI_SLOT(pdev->devfn),
375+
PCI_FUNC(pdev->devfn));
376+
}
377+
378+
return len;
379+
}
380+
381+
static DEVICE_ATTR_RO(gpu_devices);
382+
383+
static struct attribute *attrs[] = {
384+
&dev_attr_gpu_devices.attr,
385+
NULL,
386+
};
387+
388+
static struct attribute_group attr_group = {
389+
.attrs = attrs,
390+
};
391+
360392
static int add_gpu(struct egm_region *region, struct pci_dev *pdev)
361393
{
362394
struct gpu_node *node;
@@ -423,12 +455,18 @@ int register_egm_node(struct pci_dev *pdev)
423455

424456
list_add_tail(&region->list, &egm_list);
425457

426-
ret = add_gpu(region, pdev);
458+
ret = sysfs_create_group(&region->device.kobj, &attr_group);
427459
if (ret)
428460
goto err_remove_from_list;
429461

462+
ret = add_gpu(region, pdev);
463+
if (ret)
464+
goto err_remove_sysfs;
465+
430466
return 0;
431467

468+
err_remove_sysfs:
469+
sysfs_remove_group(&region->device.kobj, &attr_group);
432470
err_remove_from_list:
433471
list_del(&region->list);
434472
destroy_egm_chardev(region);
@@ -462,6 +500,7 @@ void unregister_egm_node(struct pci_dev *pdev)
462500
vfree(cur_page);
463501
}
464502

503+
sysfs_remove_group(&region->device.kobj, &attr_group);
465504
destroy_egm_chardev(region);
466505
list_del(&region->list);
467506
kfree(region);

0 commit comments

Comments
 (0)