Skip to content

Commit ab1e51e

Browse files
committed
Fix sleep/resume and improve reliability
- Wait for T2 PCIe link re-training after S3 before MMIO access - Fix wake mailbox command (0x18 -> 0x1b to match macOS) - Reorder endpoint pause: drain pending output DMA before pausing - Handle T2 port connect/resume/suspend event notifications - Add command queue timeout (5s) to prevent infinite hangs - Add device link to NVMe function 0 for runtime PM ordering - Fix atomic counter underflow in transfer queue completions - Add NULL checks in URB enqueue/dequeue to prevent crashes
1 parent 2dfd47e commit ab1e51e

9 files changed

Lines changed: 178 additions & 49 deletions

File tree

apple_bce.c

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,14 @@ static int apple_bce_probe(struct pci_dev *dev, const struct pci_device_id *id)
103103

104104
bce_vhci_create(bce, &bce->vhci);
105105

106+
/* The T2 chip requires function 0 (NVMe) to be a bus master for DMA
107+
* on our function. Create a device link for runtime PM ordering.
108+
* (System S3 ordering is already handled by PCI function numbering.) */
109+
bce->pci0_link = device_link_add(&dev->dev, &bce->pci0->dev,
110+
DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME);
111+
if (!bce->pci0_link)
112+
dev_warn(&dev->dev, "apple-bce: failed to create device link to function 0\n");
113+
106114
return 0;
107115

108116
fail_ts:
@@ -243,6 +251,9 @@ static void apple_bce_remove(struct pci_dev *dev)
243251

244252
bce_vhci_destroy(&bce->vhci);
245253

254+
if (bce->pci0_link)
255+
device_link_del(bce->pci0_link);
256+
246257
bce_timestamp_stop(&bce->timestamp);
247258
#ifndef WITHOUT_NVME_PATCH
248259
pci_disable_device(bce->pci0);
@@ -356,6 +367,24 @@ static int apple_bce_resume(struct device *dev)
356367
{
357368
struct apple_bce_device *bce = pci_get_drvdata(to_pci_dev(dev));
358369
int status;
370+
int i;
371+
u16 vid;
372+
373+
/* Wait for T2 PCIe link to re-train after S3.
374+
* MMIO to the T2 BARs will hang the CPU if the link is down.
375+
* Config space reads go through the root port and return 0xFFFF safely.
376+
* Poll aggressively first (link usually retrains in ~100-200ms),
377+
* then back off to 50ms intervals. */
378+
for (i = 0; i < 120; i++) {
379+
pci_read_config_word(bce->pci, PCI_VENDOR_ID, &vid);
380+
if (vid == PCI_VENDOR_ID_APPLE)
381+
break;
382+
msleep(i < 40 ? 5 : 50);
383+
}
384+
if (vid != PCI_VENDOR_ID_APPLE) {
385+
pr_err("apple-bce: resume: T2 not accessible after timeout (vid=0x%04x)\n", vid);
386+
return -ENODEV;
387+
}
359388

360389
pci_set_master(bce->pci);
361390
pci_set_master(bce->pci0);

apple_bce.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
struct apple_bce_device {
1717
struct pci_dev *pci, *pci0;
18+
struct device_link *pci0_link;
1819
dev_t devt;
1920
struct device *dev;
2021
void __iomem *reg_mem_mb;

mailbox.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ enum bce_message_type {
2121
BCE_MB_SLEEP_NO_STATE = 0x14, // to-device
2222
BCE_MB_RESTORE_NO_STATE = 0x15, // to-device
2323
BCE_MB_SAVE_STATE_AND_SLEEP = 0x17, // to-device
24-
BCE_MB_RESTORE_STATE_AND_WAKE = 0x18, // to-device
24+
BCE_MB_RESTORE_STATE_AND_WAKE = 0x1b, // to-device (macOS: 0x6c00000000000000)
2525
BCE_MB_SAVE_STATE_AND_SLEEP_FAILURE = 0x19, // from-device
2626
BCE_MB_SAVE_RESTORE_STATE_COMPLETE = 0x1A, // from-device
2727
};

queue.c

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -250,18 +250,28 @@ static __always_inline void *bce_cmd_start(struct bce_queue_cmdq *cmdq, struct b
250250
return NULL;
251251

252252
spin_lock(&cmdq->lck);
253+
res->slot = cmdq->sq->tail;
253254
cmdq->tres[cmdq->sq->tail] = res;
254255
ret = bce_next_submission(cmdq->sq);
255256
return ret;
256257
}
257258

258-
static __always_inline void bce_cmd_finish(struct bce_queue_cmdq *cmdq, struct bce_queue_cmdq_result_el *res)
259+
static __always_inline int bce_cmd_finish(struct bce_queue_cmdq *cmdq, struct bce_queue_cmdq_result_el *res)
259260
{
260261
bce_submit_to_device(cmdq->sq);
261262
spin_unlock(&cmdq->lck);
262263

263-
wait_for_completion(&res->cmpl);
264+
if (!wait_for_completion_timeout(&res->cmpl, msecs_to_jiffies(5000))) {
265+
pr_err("apple-bce: command queue timeout\n");
266+
spin_lock(&cmdq->lck);
267+
cmdq->tres[res->slot] = NULL;
268+
spin_unlock(&cmdq->lck);
269+
/* Reclaim the slot: advance head and wake any waiters */
270+
bce_notify_submission_complete(cmdq->sq);
271+
return -ETIMEDOUT;
272+
}
264273
mb();
274+
return 0;
265275
}
266276

267277
u32 bce_cmd_register_queue(struct bce_queue_cmdq *cmdq, struct bce_queue_memcfg *cfg, const char *name, bool isdirout)
@@ -285,7 +295,8 @@ u32 bce_cmd_register_queue(struct bce_queue_cmdq *cmdq, struct bce_queue_memcfg
285295
cmd->addr = cfg->addr;
286296
cmd->length = cfg->length;
287297

288-
bce_cmd_finish(cmdq, &res);
298+
if (bce_cmd_finish(cmdq, &res))
299+
return (u32) -1;
289300
return res.status;
290301
}
291302

@@ -298,7 +309,8 @@ u32 bce_cmd_unregister_memory_queue(struct bce_queue_cmdq *cmdq, u16 qid)
298309
cmd->cmd = BCE_CMD_UNREGISTER_MEMORY_QUEUE;
299310
cmd->flags = 0;
300311
cmd->qid = qid;
301-
bce_cmd_finish(cmdq, &res);
312+
if (bce_cmd_finish(cmdq, &res))
313+
return (u32) -1;
302314
return res.status;
303315
}
304316

@@ -311,7 +323,8 @@ u32 bce_cmd_flush_memory_queue(struct bce_queue_cmdq *cmdq, u16 qid)
311323
cmd->cmd = BCE_CMD_FLUSH_MEMORY_QUEUE;
312324
cmd->flags = 0;
313325
cmd->qid = qid;
314-
bce_cmd_finish(cmdq, &res);
326+
if (bce_cmd_finish(cmdq, &res))
327+
return (u32) -1;
315328
return res.status;
316329
}
317330

queue.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ struct bce_queue_cmdq_result_el {
5656
struct completion cmpl;
5757
u32 status;
5858
u64 result;
59+
u32 slot; /* queue slot index for O(1) timeout cleanup */
5960
};
6061
struct bce_queue_cmdq {
6162
struct bce_queue_sq *sq;

vhci/command.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ enum bce_vhci_command {
2727
BCE_VHCI_CMD_PORT_RESET = 0x14,
2828
BCE_VHCI_CMD_PORT_DISABLE = 0x15,
2929
BCE_VHCI_CMD_PORT_STATUS = 0x16,
30+
BCE_VHCI_CMD_PORT_CONNECT = 0x18, /* Undocumented - port connection notification from T2 */
3031

3132
BCE_VHCI_CMD_DEVICE_CREATE = 0x30,
3233
BCE_VHCI_CMD_DEVICE_DESTROY = 0x31,

vhci/transfer.c

Lines changed: 38 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -180,21 +180,21 @@ static void bce_vhci_transfer_queue_completion(struct bce_queue_sq *sq)
180180
*/
181181
if (c->status == BCE_COMPLETION_ABORTED) { /* We flushed the queue */
182182
pr_debug("bce-vhci: [%02x] Got an abort completion\n", q->endp_addr);
183-
if (is_sq_out && atomic_dec_and_test(&q->sq_out_pending))
183+
if (is_sq_out && atomic_dec_if_positive(&q->sq_out_pending) == 0)
184184
wake_up(&q->sq_out_wait_queue);
185185
bce_notify_submission_complete(sq);
186186
continue;
187187
}
188188
if (list_empty(&q->endp->urb_list)) {
189189
pr_err("bce-vhci: [%02x] Got a completion while no requests are pending\n", q->endp_addr);
190-
if (is_sq_out && atomic_dec_and_test(&q->sq_out_pending))
190+
if (is_sq_out && atomic_dec_if_positive(&q->sq_out_pending) == 0)
191191
wake_up(&q->sq_out_wait_queue);
192192
continue;
193193
}
194194
pr_debug("bce-vhci: [%02x] Got a transfer queue completion\n", q->endp_addr);
195195
urb = list_first_entry(&q->endp->urb_list, struct urb, urb_list);
196196
bce_vhci_urb_transfer_completion(urb->hcpriv, c);
197-
if (is_sq_out && atomic_dec_and_test(&q->sq_out_pending))
197+
if (is_sq_out && atomic_dec_if_positive(&q->sq_out_pending) == 0)
198198
wake_up(&q->sq_out_wait_queue);
199199
bce_notify_submission_complete(sq);
200200
}
@@ -204,50 +204,66 @@ static void bce_vhci_transfer_queue_completion(struct bce_queue_sq *sq)
204204
}
205205

206206
/* Timeout for waiting on pending output requests during pause */
207-
#define BCE_VHCI_PAUSE_TIMEOUT_MS 5000
207+
#define BCE_VHCI_PAUSE_TIMEOUT_MS 2000
208208

209209
int bce_vhci_transfer_queue_do_pause(struct bce_vhci_transfer_queue *q)
210210
{
211211
unsigned long flags;
212212
int status;
213213
int pending;
214214
long timeout;
215-
u8 endp_addr = (u8) (q->endp->desc.bEndpointAddress & 0x8F);
215+
216+
pr_info("bce-vhci: [%02x] pause: starting (dev=%d)\n", q->endp_addr, q->dev_addr);
216217

217218
spin_lock_irqsave(&q->urb_lock, flags);
218219
q->active = false;
219220
spin_unlock_irqrestore(&q->urb_lock, flags);
220221
bce_vhci_transfer_queue_remove_pending(q);
221222

222-
if ((status = bce_vhci_cmd_endpoint_set_state(
223-
&q->vhci->cq, q->dev_addr, endp_addr, BCE_VHCI_ENDPOINT_PAUSED, &q->state)))
224-
return status;
225-
if (q->state != BCE_VHCI_ENDPOINT_PAUSED)
226-
return -EINVAL;
227-
228-
if (q->sq_in)
229-
bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, (u16) q->sq_in->qid);
230-
223+
/*
224+
* Wait for pending output transfers to COMPLETE before pausing/flushing.
225+
* This ensures commands like keyboard backlight off actually reach the T2
226+
* before we abort remaining transfers. Without this, the backlight command
227+
* gets aborted by flush and the keyboard stays lit during suspend.
228+
*/
231229
if (q->sq_out) {
232-
bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, (u16) q->sq_out->qid);
233-
/*
234-
* Suspend/resume fix: Wait for all pending output DMA transfers to
235-
* complete before returning. This prevents use-after-free when the
236-
* queue is destroyed while transfers are still in flight.
237-
*/
238230
pending = atomic_read(&q->sq_out_pending);
231+
pr_info("bce-vhci: [%02x] pause: %d pending outputs, waiting for completion\n",
232+
q->endp_addr, pending);
239233
if (pending > 0) {
240234
timeout = wait_event_timeout(q->sq_out_wait_queue,
241235
atomic_read(&q->sq_out_pending) == 0,
242236
msecs_to_jiffies(BCE_VHCI_PAUSE_TIMEOUT_MS));
243237
if (timeout == 0) {
244238
pending = atomic_read(&q->sq_out_pending);
245239
if (pending > 0)
246-
pr_warn("bce-vhci: [%02x] Timeout waiting for %d pending output requests\n",
240+
pr_warn("bce-vhci: [%02x] pause: TIMEOUT waiting for %d pending outputs\n",
247241
q->endp_addr, pending);
242+
} else {
243+
pr_info("bce-vhci: [%02x] pause: pending outputs completed\n", q->endp_addr);
248244
}
249245
}
250246
}
247+
248+
pr_info("bce-vhci: [%02x] pause: setting endpoint state to PAUSED\n", q->endp_addr);
249+
if ((status = bce_vhci_cmd_endpoint_set_state(
250+
&q->vhci->cq, q->dev_addr, q->endp_addr, BCE_VHCI_ENDPOINT_PAUSED, &q->state))) {
251+
pr_err("bce-vhci: [%02x] pause: set_state failed with %d\n", q->endp_addr, status);
252+
return status;
253+
}
254+
if (q->state != BCE_VHCI_ENDPOINT_PAUSED) {
255+
pr_err("bce-vhci: [%02x] pause: unexpected state %d\n", q->endp_addr, q->state);
256+
return -EINVAL;
257+
}
258+
259+
pr_info("bce-vhci: [%02x] pause: flushing queues\n", q->endp_addr);
260+
if (q->sq_in)
261+
bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, (u16) q->sq_in->qid);
262+
263+
if (q->sq_out)
264+
bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, (u16) q->sq_out->qid);
265+
266+
pr_info("bce-vhci: [%02x] pause: done\n", q->endp_addr);
251267
return 0;
252268
}
253269

@@ -259,10 +275,9 @@ int bce_vhci_transfer_queue_do_resume(struct bce_vhci_transfer_queue *q)
259275
int status;
260276
struct urb *urb, *urbt;
261277
struct bce_vhci_urb *vurb;
262-
u8 endp_addr = (u8) (q->endp->desc.bEndpointAddress & 0x8F);
263278

264279
if ((status = bce_vhci_cmd_endpoint_set_state(
265-
&q->vhci->cq, q->dev_addr, endp_addr, BCE_VHCI_ENDPOINT_ACTIVE, &q->state)))
280+
&q->vhci->cq, q->dev_addr, q->endp_addr, BCE_VHCI_ENDPOINT_ACTIVE, &q->state)))
266281
return status;
267282
if (q->state != BCE_VHCI_ENDPOINT_ACTIVE)
268283
return -EINVAL;

0 commit comments

Comments
 (0)