Skip to content

Commit 2be8d45

Browse files
ankita-nvnirmoy
authored andcommitted
NVIDIA: SAUCE: vfio/nvgrace-egm: track GPUs associated with the EGM regions
GB200 systems could have multiple GPUs associated with an EGM region. For proper EGM functionality the host topology in terms of GPU affinity has to be replicated in the VM. Hence the EGM region structure must track the GPU devices belonging to the same socket. On the device probe, the device pci_dev struct is added to a linked list of the appropriate EGM region. Similarly on device remove, the pci_dev struct for the GPU is removed from the EGM region. Signed-off-by: Ankit Agrawal <ankita@nvidia.com> Ref: sj24: /home/nvidia/ankita/kernel_patches/0001_vfio_nvgrace-egm_track_GPUs_associated_with_the_EGM_regions.patch (koba: Enhance error handling, Remove egm_node from unregister_egm_node and move destroy_egm_chardev a little forward) Signed-off-by: Koba Ko <kobak@nvidia.com> Acked-by: Matthew R. Ochs <mochs@nvidia.com> Acked-by: Carol L. Soto <csoto@nvidia.com> Signed-off-by: Matthew R. Ochs <mochs@nvidia.com> (cherry picked from commit 0222c35 https://github.com/NVIDIA/NV-Kernels/tree/24.04_linux-nvidia-adv-6.11-next) Signed-off-by: Nirmoy Das <nirmoyd@nvidia.com>
1 parent 522bd1c commit 2be8d45

1 file changed

Lines changed: 59 additions & 9 deletions

File tree

  • drivers/vfio/pci/nvgrace-gpu

drivers/vfio/pci/nvgrace-gpu/egm.c

Lines changed: 59 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@
1616

1717
#define MAX_EGM_NODES 256
1818

19+
struct gpu_node {
20+
struct list_head list;
21+
struct pci_dev *pdev;
22+
};
23+
1924
struct egm_region {
2025
struct list_head list;
2126
int egmpxm;
@@ -24,6 +29,7 @@ struct egm_region {
2429
size_t egmlength;
2530
struct device device;
2631
struct cdev cdev;
32+
struct list_head gpus;
2733
DECLARE_HASHTABLE(htbl, 0x10);
2834
#ifdef CONFIG_MEMORY_FAILURE
2935
struct pfn_address_space pfn_address_space;
@@ -268,6 +274,11 @@ static int setup_egm_chardev(struct egm_region *region)
268274
return ret;
269275
}
270276

277+
static void destroy_egm_chardev(struct egm_region *region)
278+
{
279+
cdev_device_del(&region->cdev, &region->device);
280+
}
281+
271282
static int
272283
nvgrace_gpu_fetch_egm_property(struct pci_dev *pdev, u64 *pegmphys,
273284
u64 *pegmlength, u64 *pegmpxm)
@@ -346,6 +357,32 @@ static void nvgrace_egm_fetch_bad_pages(struct pci_dev *pdev,
346357
memunmap(memaddr);
347358
}
348359

360+
static int add_gpu(struct egm_region *region, struct pci_dev *pdev)
361+
{
362+
struct gpu_node *node;
363+
364+
node = kvzalloc(sizeof(*node), GFP_KERNEL);
365+
if (!node)
366+
return -ENOMEM;
367+
368+
node->pdev = pdev;
369+
370+
list_add_tail(&node->list, &region->gpus);
371+
return 0;
372+
}
373+
374+
static void remove_gpu(struct egm_region *region, struct pci_dev *pdev)
375+
{
376+
struct gpu_node *node, *tmp;
377+
378+
list_for_each_entry_safe(node, tmp, &region->gpus, list) {
379+
if (node->pdev == pdev) {
380+
list_del(&node->list);
381+
kvfree(node);
382+
}
383+
}
384+
}
385+
349386
int register_egm_node(struct pci_dev *pdev)
350387
{
351388
struct egm_region *region = NULL;
@@ -356,11 +393,15 @@ int register_egm_node(struct pci_dev *pdev)
356393
if (ret)
357394
return ret;
358395

396+
/* Check if region already exists */
359397
list_for_each_entry(region, &egm_list, list) {
360-
if (region->egmphys == egmphys)
361-
return 0;
398+
if (region->egmphys == egmphys) {
399+
/* Add GPU to existing region */
400+
return add_gpu(region, pdev);
401+
}
362402
}
363403

404+
/* Create new region */
364405
region = kvzalloc(sizeof(*region), GFP_KERNEL);
365406
if (!region)
366407
return -ENOMEM;
@@ -370,28 +411,33 @@ int register_egm_node(struct pci_dev *pdev)
370411
region->egmpxm = egmpxm;
371412

372413
hash_init(region->htbl);
414+
INIT_LIST_HEAD(&region->gpus);
415+
373416
atomic_set(&region->open_count, 0);
374417

375418
nvgrace_egm_fetch_bad_pages(pdev, region);
376419

377420
ret = setup_egm_chardev(region);
378421
if (ret)
379-
goto err;
422+
goto err_free_region;
380423

381424
list_add_tail(&region->list, &egm_list);
382425

426+
ret = add_gpu(region, pdev);
427+
if (ret)
428+
goto err_remove_from_list;
429+
383430
return 0;
384-
err:
431+
432+
err_remove_from_list:
433+
list_del(&region->list);
434+
destroy_egm_chardev(region);
435+
err_free_region:
385436
kfree(region);
386437
return ret;
387438
}
388439
EXPORT_SYMBOL_GPL(register_egm_node);
389440

390-
static void destroy_egm_chardev(struct egm_region *region)
391-
{
392-
cdev_device_del(&region->cdev, &region->device);
393-
}
394-
395441
void unregister_egm_node(struct pci_dev *pdev)
396442
{
397443
struct egm_region *region, *temp_region;
@@ -407,6 +453,10 @@ void unregister_egm_node(struct pci_dev *pdev)
407453

408454
list_for_each_entry_safe(region, temp_region, &egm_list, list) {
409455
if (egmpxm == region->egmpxm) {
456+
remove_gpu(region, pdev);
457+
if (!list_empty(&region->gpus))
458+
break;
459+
410460
hash_for_each_safe(region->htbl, bkt, temp_node, cur_page, node) {
411461
hash_del(&cur_page->node);
412462
vfree(cur_page);

0 commit comments

Comments
 (0)