Skip to content

Commit bc93285

Browse files
committed
nv-pci: add timeout to usage_count wait for eGPU hotplug
1 parent c44fe64 commit bc93285

File tree

1 file changed

+6
-14
lines changed

1 file changed

+6
-14
lines changed

kernel-open/nvidia/nv-pci.c

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
#include "nv-msi.h"
2828
#include "nv-hypervisor.h"
2929
#include "nv-reg.h"
30-
#include "nv-rsync.h"
3130

3231
#if defined(NV_VGPU_KVM_BUILD)
3332
#include "nv-vgpu-vfio-interface.h"
@@ -2143,13 +2142,6 @@ nv_pci_remove(struct pci_dev *pci_dev)
21432142

21442143
nv = NV_STATE_PTR(nvl);
21452144

2146-
/*
2147-
* Note: For external GPUs (eGPU via Thunderbolt), the NV_FLAG_IN_SURPRISE_REMOVAL
2148-
* flag is set later in the removal process - either when waiting for usage count
2149-
* times out, or when actual surprise removal is detected. Setting it too early
2150-
* can interfere with normal cleanup operations that need to acquire GPU locks.
2151-
*/
2152-
21532145
#if NV_IS_EXPORT_SYMBOL_GPL_iommu_dev_disable_feature
21542146
#if defined(CONFIG_IOMMU_SVA) && \
21552147
(defined(NV_IOASID_GET_PRESENT) || defined(NV_MM_PASID_DROP_PRESENT))
@@ -2190,7 +2182,7 @@ nv_pci_remove(struct pci_dev *pci_dev)
21902182
* We still wait for a short time to allow in-progress close operations
21912183
* to complete, but with a timeout to prevent hangs.
21922184
*/
2193-
if ((atomic64_read(&nvl->usage_count) != 0) && !(nv->is_external_gpu))
2185+
if (atomic64_read(&nvl->usage_count) != 0)
21942186
{
21952187
/*
21962188
* For external GPU: wait up to 5 seconds (10 iterations * 500ms)
@@ -2202,10 +2194,10 @@ nv_pci_remove(struct pci_dev *pci_dev)
22022194
int wait_iterations = 0;
22032195

22042196
nv_printf(NV_DBG_ERRORS,
2205-
"NVRM: Attempting to remove device %04x:%02x:%02x.%x with non-zero usage count (%d)%s\n",
2197+
"NVRM: Attempting to remove device %04x:%02x:%02x.%x with non-zero usage count (%lld)%s\n",
22062198
NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev),
22072199
NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn),
2208-
NV_ATOMIC_READ(nvl->usage_count),
2200+
atomic64_read(&nvl->usage_count),
22092201
nv->is_external_gpu ? " (external GPU)" : "");
22102202

22112203
/*
@@ -2242,13 +2234,13 @@ nv_pci_remove(struct pci_dev *pci_dev)
22422234
down(&nvl->ldata_lock);
22432235
}
22442236

2245-
if (NV_ATOMIC_READ(nvl->usage_count) != 0)
2237+
if (atomic64_read(&nvl->usage_count) != 0)
22462238
{
22472239
nv_printf(NV_DBG_ERRORS,
2248-
"NVRM: Timeout waiting for usage count on device %04x:%02x:%02x.%x (remaining: %d). Forcing removal.\n",
2240+
"NVRM: Timeout waiting for usage count on device %04x:%02x:%02x.%x (remaining: %lld). Forcing removal.\n",
22492241
NV_PCI_DOMAIN_NUMBER(pci_dev), NV_PCI_BUS_NUMBER(pci_dev),
22502242
NV_PCI_SLOT_NUMBER(pci_dev), PCI_FUNC(pci_dev->devfn),
2251-
NV_ATOMIC_READ(nvl->usage_count));
2243+
atomic64_read(&nvl->usage_count));
22522244
/*
22532245
* Force the surprise removal flag so that any remaining
22542246
* close operations will take the fast-path.

0 commit comments

Comments
 (0)