diff --git a/Makefile b/Makefile index a6a656f..ca48dc7 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,6 @@ -modname := apple-bce -obj-m += $(modname).o +obj-m += apple-bce.o -apple-bce-objs := apple_bce.o mailbox.o queue.o queue_dma.o vhci/vhci.o vhci/queue.o vhci/transfer.o audio/audio.o audio/protocol.o audio/protocol_bce.o audio/pcm.o +apple-bce-objs := apple_bce.o mailbox.o queue.o queue_dma.o vhci/vhci.o vhci/queue.o vhci/transfer.o audio/audio.o audio/protocol.o audio/protocol_bce.o audio/pcm.o video/video.o video/protocol.o video/encoder.o MY_CFLAGS += -DWITHOUT_NVME_PATCH #MY_CFLAGS += -g -DDEBUG diff --git a/apple_bce.c b/apple_bce.c index 4fd2415..19e3eb3 100644 --- a/apple_bce.c +++ b/apple_bce.c @@ -49,7 +49,7 @@ static int apple_bce_probe(struct pci_dev *dev, const struct pci_device_id *id) bce->devt = bce_chrdev; bce->dev = device_create(bce_class, &dev->dev, bce->devt, NULL, "apple-bce"); if (IS_ERR_OR_NULL(bce->dev)) { - status = PTR_ERR(bce_class); + status = PTR_ERR(bce->dev); goto fail; } @@ -101,10 +101,26 @@ static int apple_bce_probe(struct pci_dev *dev, const struct pci_device_id *id) global_bce = bce; - bce_vhci_create(bce, &bce->vhci); + if ((status = bce_vhci_create(bce, &bce->vhci))) { + pr_err("apple-bce: VHCI creation failed\n"); + goto fail_vhci; + } + + if ((status = bce_ave_create(bce))) + pr_warn("apple-bce: AVE encoder init failed (%d), continuing without video\n", status); + + /* The T2 chip requires function 0 (NVMe) to be a bus master for DMA + * on our function. Create a device link for runtime PM ordering. + * (System S3 ordering is already handled by PCI function numbering.) */ + bce->pci0_link = device_link_add(&dev->dev, &bce->pci0->dev, + DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME); + if (!bce->pci0_link) + dev_warn(&dev->dev, "apple-bce: failed to create device link to function 0\n"); return 0; +fail_vhci: + bce_free_command_queues(bce); fail_ts: bce_timestamp_stop(&bce->timestamp); #ifndef WITHOUT_NVME_PATCH @@ -158,14 +174,16 @@ static int bce_create_command_queues(struct apple_bce_device *bce) } bce_get_cq_memcfg(bce->cmd_cq, cfg); if ((status = bce_register_command_queue(bce, cfg, false))) - goto err; + goto err_cfg; bce_get_sq_memcfg(bce->cmd_cmdq->sq, bce->cmd_cq, cfg); if ((status = bce_register_command_queue(bce, cfg, true))) - goto err; + goto err_cfg; kfree(cfg); return 0; +err_cfg: + kfree(cfg); err: if (bce->cmd_cq) bce_free_cq(bce, bce->cmd_cq); @@ -180,6 +198,7 @@ static void bce_free_command_queues(struct apple_bce_device *bce) bce_free_cmdq(bce, bce->cmd_cmdq); bce->cmd_cq = NULL; bce->queues[0] = NULL; + bce->queues[1] = NULL; } static irqreturn_t bce_handle_mb_irq(int irq, void *dev) @@ -241,8 +260,12 @@ static void apple_bce_remove(struct pci_dev *dev) struct apple_bce_device *bce = pci_get_drvdata(dev); bce->is_being_removed = true; + bce_ave_destroy(); bce_vhci_destroy(&bce->vhci); + if (bce->pci0_link) + device_link_del(bce->pci0_link); + bce_timestamp_stop(&bce->timestamp); #ifndef WITHOUT_NVME_PATCH pci_disable_device(bce->pci0); @@ -349,6 +372,11 @@ static int apple_bce_suspend(struct device *dev) if ((status = bce_save_state_and_sleep(bce))) return status; + /* Disable DMA IRQ after T2 is asleep. On resume, PCI core powers the + * device back on before apple_bce_resume() runs — the disabled IRQ + * prevents stale completion processing during that transition window. */ + disable_irq(pci_irq_vector(bce->pci, 4)); + return 0; } @@ -356,15 +384,42 @@ static int apple_bce_resume(struct device *dev) { struct apple_bce_device *bce = pci_get_drvdata(to_pci_dev(dev)); int status; + int i; + u16 vid; + + /* Wait for T2 PCIe link to re-train after S3. + * MMIO to the T2 BARs will hang the CPU if the link is down. + * Config space reads go through the root port and return 0xFFFF safely. + * Poll aggressively first (link usually retrains in ~100-200ms), + * then back off to 50ms intervals. */ + for (i = 0; i < 120; i++) { + pci_read_config_word(bce->pci, PCI_VENDOR_ID, &vid); + if (vid == PCI_VENDOR_ID_APPLE) + break; + if (i < 40) + usleep_range(2000, 3000); + else + msleep(50); + } + if (vid != PCI_VENDOR_ID_APPLE) { + pr_err("apple-bce: resume: T2 not accessible after timeout (vid=0x%04x)\n", vid); + enable_irq(pci_irq_vector(bce->pci, 4)); + return -ENODEV; + } pci_set_master(bce->pci); pci_set_master(bce->pci0); - if ((status = bce_restore_state_and_wake(bce))) + if ((status = bce_restore_state_and_wake(bce))) { + enable_irq(pci_irq_vector(bce->pci, 4)); return status; + } bce_timestamp_start(&bce->timestamp, false); + /* Re-enable DMA IRQ now that T2 state is restored and bus mastering is on. */ + enable_irq(pci_irq_vector(bce->pci, 4)); + return 0; } @@ -418,7 +473,6 @@ static int __init apple_bce_module_init(void) return 0; fail_drv: - pci_unregister_driver(&apple_bce_pci_driver); fail_class: class_destroy(bce_class); fail_chrdev: diff --git a/apple_bce.h b/apple_bce.h index 58dbeff..d66020c 100644 --- a/apple_bce.h +++ b/apple_bce.h @@ -15,6 +15,7 @@ struct apple_bce_device { struct pci_dev *pci, *pci0; + struct device_link *pci0_link; dev_t devt; struct device *dev; void __iomem *reg_mem_mb; @@ -28,7 +29,6 @@ struct apple_bce_device { struct bce_queue_cmdq *cmd_cmdq; struct bce_queue_sq *int_sq_list[BCE_MAX_QUEUE_COUNT]; bool is_being_removed; - dma_addr_t saved_data_dma_addr; void *saved_data_dma_ptr; size_t saved_data_dma_size; @@ -38,4 +38,7 @@ struct apple_bce_device { extern struct apple_bce_device *global_bce; +int bce_ave_create(struct apple_bce_device *bce); +void bce_ave_destroy(void); + #endif //APPLE_BCE_H diff --git a/audio/audio.c b/audio/audio.c index 16bd50f..85492ab 100644 --- a/audio/audio.c +++ b/audio/audio.c @@ -57,7 +57,7 @@ static int aaudio_probe(struct pci_dev *dev, const struct pci_device_id *id) aaudio->devt = aaudio_chrdev; aaudio->dev = device_create(aaudio_class, &dev->dev, aaudio->devt, NULL, "aaudio"); if (IS_ERR_OR_NULL(aaudio->dev)) { - status = PTR_ERR(aaudio_class); + status = PTR_ERR(aaudio->dev); goto fail; } device_link_add(aaudio->dev, aaudio->bce->dev, DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_CONSUMER); @@ -73,7 +73,7 @@ static int aaudio_probe(struct pci_dev *dev, const struct pci_device_id *id) dev_info(aaudio->dev, "aaudio: bs len = %llx\n", pci_resource_len(dev, 0)); aaudio->reg_mem_bs_dma = pci_resource_start(dev, 0); - aaudio->reg_mem_bs = pci_iomap(dev, 0, 0); + aaudio->reg_mem_bs = pci_iomap_wc(dev, 0, 0); aaudio->reg_mem_cfg = pci_iomap(dev, 4, 0); aaudio->reg_mem_gpr = (u32 __iomem *) ((u8 __iomem *) aaudio->reg_mem_cfg + 0xC000); @@ -90,7 +90,7 @@ static int aaudio_probe(struct pci_dev *dev, const struct pci_device_id *id) if (snd_card_new(aaudio->dev, aaudio_alsa_index, aaudio_alsa_id, THIS_MODULE, 0, &aaudio->card)) { dev_err(&dev->dev, "aaudio: Failed to create ALSA card\n"); - goto fail; + goto fail_bce; } strcpy(aaudio->card->shortname, "Apple T2 Audio"); @@ -111,7 +111,7 @@ static int aaudio_probe(struct pci_dev *dev, const struct pci_device_id *id) if ((status = aaudio_cmd_set_remote_access(aaudio, AAUDIO_REMOTE_ACCESS_ON))) { dev_err(&dev->dev, "Failed to set remote access\n"); - return status; + goto fail_snd; } if (snd_card_register(aaudio->card)) { @@ -133,15 +133,18 @@ static int aaudio_probe(struct pci_dev *dev, const struct pci_device_id *id) fail_snd: snd_card_free(aaudio->card); +fail_bce: + aaudio_bce_free(aaudio); fail: - if (aaudio && aaudio->dev) - device_destroy(aaudio_class, aaudio->devt); - kfree(aaudio); - - if (!IS_ERR_OR_NULL(aaudio->reg_mem_bs)) - pci_iounmap(dev, aaudio->reg_mem_bs); - if (!IS_ERR_OR_NULL(aaudio->reg_mem_cfg)) - pci_iounmap(dev, aaudio->reg_mem_cfg); + if (aaudio) { + if (!IS_ERR_OR_NULL(aaudio->reg_mem_bs)) + pci_iounmap(dev, aaudio->reg_mem_bs); + if (!IS_ERR_OR_NULL(aaudio->reg_mem_cfg)) + pci_iounmap(dev, aaudio->reg_mem_cfg); + if (aaudio->dev) + device_destroy(aaudio_class, aaudio->devt); + kfree(aaudio); + } pci_release_regions(dev); pci_disable_device(dev); @@ -164,6 +167,7 @@ static void aaudio_remove(struct pci_dev *dev) list_del(&sdev->list); aaudio_free_dev(sdev); } + aaudio_bce_free(aaudio); pci_iounmap(dev, aaudio->reg_mem_bs); pci_iounmap(dev, aaudio->reg_mem_cfg); device_destroy(aaudio_class, aaudio->devt); @@ -176,6 +180,32 @@ static void aaudio_remove(struct pci_dev *dev) static int aaudio_suspend(struct device *dev) { struct aaudio_device *aaudio = pci_get_drvdata(to_pci_dev(dev)); + struct aaudio_subdevice *sdev; + size_t i; + + /* Stop running streams and tell ALSA they are suspended. Without this, + * ALSA keeps polling aaudio_pcm_pointer() after resume while + * remote_timestamp is stale, producing wildly wrong position values + * ("invalid position" errors). */ + list_for_each_entry(sdev, &aaudio->subdevice_list, list) { + bool stopped_io = false; + for (i = 0; i < sdev->out_stream_cnt; i++) { + if (sdev->out_streams[i].started) { + stopped_io = true; + sdev->out_streams[i].started = 0; + } + } + for (i = 0; i < sdev->in_stream_cnt; i++) { + if (sdev->in_streams[i].started) { + stopped_io = true; + sdev->in_streams[i].started = 0; + } + } + if (stopped_io) + aaudio_cmd_stop_io(sdev->a, sdev->dev_id); + if (sdev->pcm) + snd_pcm_suspend_all(sdev->pcm); + } if (aaudio_cmd_set_remote_access(aaudio, AAUDIO_REMOTE_ACCESS_OFF)) dev_warn(aaudio->dev, "Failed to reset remote access\n"); @@ -259,7 +289,7 @@ static void aaudio_init_dev(struct aaudio_device *a, aaudio_device_id_t dev_id) sdev->dev_id = dev_id; sdev->buf_id = AAUDIO_BUFFER_ID_NONE; strncpy(sdev->uid, uid, uid_len); - sdev->uid[uid_len + 1] = '\0'; + sdev->uid[uid_len] = '\0'; if (aaudio_cmd_get_primitive_property(a, dev_id, dev_id, AAUDIO_PROP(AAUDIO_PROP_SCOPE_INPUT, AAUDIO_PROP_LATENCY, 0), NULL, 0, &sdev->in_latency, sizeof(u32))) @@ -342,8 +372,14 @@ static void aaudio_free_dev(struct aaudio_subdevice *sdev) for (i = 0; i < sdev->in_stream_cnt; i++) { if (sdev->in_streams[i].alsa_hw_desc) kfree(sdev->in_streams[i].alsa_hw_desc); - if (sdev->in_streams[i].buffers) + if (sdev->in_streams[i].buffers) { + if (sdev->in_streams[i].host_allocated) + dma_free_coherent(&sdev->a->pci->dev, + sdev->in_streams[i].buffers[0].size, + sdev->in_streams[i].buffers[0].ptr, + sdev->in_streams[i].buffers[0].dma_addr); kfree(sdev->in_streams[i].buffers); + } } for (i = 0; i < sdev->out_stream_cnt; i++) { if (sdev->out_streams[i].alsa_hw_desc) @@ -426,9 +462,13 @@ static int aaudio_init_bs(struct aaudio_device *a) list_for_each_entry(sdev, &a->subdevice_list, list) { if (sdev->buf_id != AAUDIO_BUFFER_ID_NONE) continue; + if (i >= ARRAY_SIZE(a->bs->devices)) { + dev_warn(a->dev, "aaudio: Too many devices, skipping %s\n", sdev->uid); + break; + } sdev->buf_id = i; dev_info(a->dev, "aaudio: Created device %i %s\n", i, sdev->uid); - strcpy(a->bs->devices[i].name, sdev->uid); + strscpy(a->bs->devices[i].name, sdev->uid, sizeof(a->bs->devices[i].name)); a->bs->devices[i].num_input_streams = 0; a->bs->devices[i].num_output_streams = 0; a->bs->num_devices = ++i; @@ -487,7 +527,7 @@ static void aaudio_init_bs_stream_host(struct aaudio_device *a, struct aaudio_st size_t size; dma_addr_t dma_addr; void *dma_ptr; - size = strm->desc.bytes_per_packet * 16640; + size = strm->desc.bytes_per_packet * 4096; dma_ptr = dma_alloc_coherent(&a->pci->dev, size, &dma_addr, GFP_KERNEL); if (!dma_ptr) { dev_err(a->dev, "dma_alloc_coherent failed\n"); @@ -508,6 +548,7 @@ static void aaudio_init_bs_stream_host(struct aaudio_device *a, struct aaudio_st strm->buffers[0].dma_addr = dma_addr; strm->buffers[0].ptr = dma_ptr; strm->buffers[0].size = size; + strm->host_allocated = true; strm->alsa_hw_desc = kmalloc(sizeof(struct snd_pcm_hardware), GFP_KERNEL); if (aaudio_create_hw_info(&strm->desc, strm->alsa_hw_desc, strm->buffers[0].size)) { @@ -597,6 +638,8 @@ void aaudio_handle_prop_change(struct aaudio_device *a, struct aaudio_msg *msg) * is not possible when we are in the reply parsing code's context. */ struct aaudio_prop_change_work_struct *work; work = kmalloc(sizeof(struct aaudio_prop_change_work_struct), GFP_KERNEL); + if (!work) + return; work->a = a; INIT_WORK(&work->ws, aaudio_handle_prop_change_work); aaudio_msg_read_property_changed(msg, &work->dev, &work->obj, &work->prop); @@ -617,7 +660,8 @@ void aaudio_handle_cmd_timestamp(struct aaudio_device *a, struct aaudio_msg *msg dev_dbg(a->dev, "Received timestamp update for dev=%llx ts=%llx seed=%llx\n", devid, timestamp, update_seed); sdev = aaudio_find_dev_by_dev_id(a, devid); - aaudio_handle_timestamp(sdev, time_os, timestamp); + if (sdev) + aaudio_handle_timestamp(sdev, time_os, timestamp); aaudio_send_cmd_response(a, &sctx, msg, aaudio_msg_write_update_timestamp_response); @@ -679,7 +723,6 @@ int aaudio_module_init(void) return 0; fail_drv: - pci_unregister_driver(&aaudio_pci_driver); fail_class: class_destroy(aaudio_class); fail_chrdev: diff --git a/audio/audio.h b/audio/audio.h index ebbfaa1..11aefba 100644 --- a/audio/audio.h +++ b/audio/audio.h @@ -61,12 +61,14 @@ struct aaudio_stream { struct aaudio_apple_description desc; struct snd_pcm_hardware *alsa_hw_desc; u32 latency; + bool host_allocated; bool waiting_for_first_ts; ktime_t remote_timestamp; snd_pcm_sframes_t frame_min; int started; + unsigned int elapsed_count; }; struct aaudio_subdevice { struct aaudio_device *a; diff --git a/audio/pcm.c b/audio/pcm.c index 1026e10..af5f04c 100644 --- a/audio/pcm.c +++ b/audio/pcm.c @@ -89,8 +89,8 @@ int aaudio_create_hw_info(struct aaudio_apple_description *desc, struct snd_pcm_ alsa_hw->channels_max = desc->channels_per_frame; alsa_hw->buffer_bytes_max = buf_size; alsa_hw->period_bytes_min = desc->bytes_per_packet; - alsa_hw->period_bytes_max = desc->bytes_per_packet; - alsa_hw->periods_min = (uint) (buf_size / desc->bytes_per_packet); + alsa_hw->period_bytes_max = buf_size / 2; + alsa_hw->periods_min = 2; alsa_hw->periods_max = (uint) (buf_size / desc->bytes_per_packet); pr_debug("aaudio_create_hw_info: format = %llu, rate = %u/%u. channels = %u, periods = %u, period size = %lu\n", alsa_hw->formats, alsa_hw->rate_min, alsa_hw->rates, alsa_hw->channels_min, alsa_hw->periods_min, @@ -109,8 +109,14 @@ static struct aaudio_stream *aaudio_pcm_stream(struct snd_pcm_substream *substre static int aaudio_pcm_open(struct snd_pcm_substream *substream) { + struct aaudio_stream *stream = aaudio_pcm_stream(substream); pr_debug("aaudio_pcm_open\n"); - substream->runtime->hw = *aaudio_pcm_stream(substream)->alsa_hw_desc; + substream->runtime->hw = *stream->alsa_hw_desc; + + /* Period size must be a multiple of the hardware packet size */ + snd_pcm_hw_constraint_step(substream->runtime, 0, + SNDRV_PCM_HW_PARAM_PERIOD_SIZE, + stream->desc.frames_per_packet); return 0; } @@ -150,29 +156,31 @@ static void aaudio_pcm_start(struct snd_pcm_substream *substream) { struct aaudio_subdevice *sdev = snd_pcm_substream_chip(substream); struct aaudio_stream *stream = aaudio_pcm_stream(substream); - void *buf; + void *buf = NULL; size_t s; ktime_t time_start, time_end; - bool back_buffer; time_start = ktime_get(); - back_buffer = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK); - - if (back_buffer) { - s = frames_to_bytes(substream->runtime, substream->runtime->control->appl_ptr); - buf = kmalloc(s, GFP_KERNEL); - memcpy_fromio(buf, substream->runtime->dma_area, s); - time_end = ktime_get(); - pr_debug("aaudio: Backed up the buffer in %lluns [%li]\n", ktime_to_ns(time_end - time_start), - substream->runtime->control->appl_ptr); - } - stream->waiting_for_first_ts = true; stream->frame_min = stream->latency; + stream->elapsed_count = 0; - aaudio_cmd_start_io(sdev->a, sdev->dev_id); - if (back_buffer) - memcpy_toio(substream->runtime->dma_area, buf, s); + s = frames_to_bytes(substream->runtime, substream->runtime->control->appl_ptr); + + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { + /* Backup MMIO buffer before start_io (which may clear it), + * then restore the pre-filled audio data afterwards. */ + buf = kmalloc(s, GFP_KERNEL); + if (buf) + memcpy_fromio(buf, substream->runtime->dma_area, s); + aaudio_cmd_start_io(sdev->a, sdev->dev_id); + if (buf) { + memcpy_toio(substream->runtime->dma_area, buf, s); + kfree(buf); + } + } else { + aaudio_cmd_start_io(sdev->a, sdev->dev_id); + } time_end = ktime_get(); pr_debug("aaudio: Started the audio device in %lluns\n", ktime_to_ns(time_end - time_start)); @@ -209,15 +217,15 @@ static snd_pcm_uframes_t aaudio_pcm_pointer(struct snd_pcm_substream *substream) snd_pcm_sframes_t frames; snd_pcm_sframes_t buffer_time_length; - if (!stream->started || stream->waiting_for_first_ts) { - pr_warn("aaudio_pcm_pointer while not started\n"); + if (!stream->started || stream->waiting_for_first_ts) return 0; - } /* Approximate the pointer based on the last received timestamp */ time_from_start = ktime_get_boottime() - stream->remote_timestamp; + if (ktime_to_ns(time_from_start) < 0) + return 0; buffer_time_length = NSEC_PER_SEC * substream->runtime->buffer_size / substream->runtime->rate; - frames = (ktime_to_ns(time_from_start) % buffer_time_length) * substream->runtime->buffer_size / buffer_time_length; + frames = (ktime_to_ns(time_from_start) % buffer_time_length) * (snd_pcm_sframes_t)substream->runtime->buffer_size / buffer_time_length; if (ktime_to_ns(time_from_start) < buffer_time_length) { if (frames < stream->frame_min) frames = stream->frame_min; @@ -235,6 +243,28 @@ static snd_pcm_uframes_t aaudio_pcm_pointer(struct snd_pcm_substream *substream) return (snd_pcm_uframes_t) frames; } +static int aaudio_pcm_mmap(struct snd_pcm_substream *substream, + struct vm_area_struct *vma) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + + /* Use write-combining for playback: PipeWire's stores are batched into + * efficient PCI transactions instead of individual uncached writes. */ + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + else + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + return vm_iomap_memory(vma, runtime->dma_addr, runtime->dma_bytes); +} + +static int aaudio_pcm_ack(struct snd_pcm_substream *substream) +{ + /* Flush write-combine buffers so the T2 sees fresh audio data. */ + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + wmb(); + return 0; +} + static struct snd_pcm_ops aaudio_pcm_ops = { .open = aaudio_pcm_open, .close = aaudio_pcm_close, @@ -244,7 +274,8 @@ static struct snd_pcm_ops aaudio_pcm_ops = { .prepare = aaudio_pcm_prepare, .trigger = aaudio_pcm_trigger, .pointer = aaudio_pcm_pointer, - .mmap = snd_pcm_lib_mmap_iomem + .mmap = aaudio_pcm_mmap, + .ack = aaudio_pcm_ack }; int aaudio_create_pcm(struct aaudio_subdevice *sdev) @@ -292,6 +323,19 @@ static void aaudio_handle_stream_timestamp(struct snd_pcm_substream *substream, return; } snd_pcm_stream_unlock_irqrestore(substream, flags); + + /* Only fire period_elapsed once per period's worth of hardware packets. + * Count timestamps rather than using wall-clock time so that bursty + * message delivery (e.g. after a scheduling delay) doesn't swallow + * period notifications. */ + if (substream->runtime->period_size) { + unsigned int packets_per_period = substream->runtime->period_size / + stream->desc.frames_per_packet; + stream->elapsed_count++; + if (packets_per_period > 1 && stream->elapsed_count < packets_per_period) + return; + stream->elapsed_count = 0; + } snd_pcm_period_elapsed(substream); } diff --git a/audio/protocol_bce.c b/audio/protocol_bce.c index 28f2dfd..2dda59b 100644 --- a/audio/protocol_bce.c +++ b/audio/protocol_bce.c @@ -28,6 +28,23 @@ int aaudio_bce_init(struct aaudio_device *dev) return 0; } +void aaudio_bce_free(struct aaudio_device *dev) +{ + struct aaudio_bce *bce = &dev->bcem; + if (bce->qin.data) + dma_free_coherent(&dev->bce->pci->dev, bce->qin.el_size * bce->qin.el_count, + bce->qin.data, bce->qin.dma_addr); + if (bce->qin.sq) + bce_destroy_sq(dev->bce, bce->qin.sq); + if (bce->qout.data) + dma_free_coherent(&dev->bce->pci->dev, bce->qout.el_size * bce->qout.el_count, + bce->qout.data, bce->qout.dma_addr); + if (bce->qout.sq) + bce_destroy_sq(dev->bce, bce->qout.sq); + if (bce->cq) + bce_destroy_cq(dev->bce, bce->cq); +} + int aaudio_bce_queue_init(struct aaudio_device *dev, struct aaudio_bce_queue *q, const char *name, int direction, bce_sq_completion cfn) { @@ -129,6 +146,10 @@ static void aaudio_handle_reply(struct aaudio_bce *b, struct aaudio_msg *reply) pr_err("aaudio_handle_reply: Tag parse failed: %.4s\n", tag); return; } + if (tagn < 0 || tagn >= AAUDIO_BCE_QUEUE_TAG_COUNT) { + pr_err("aaudio_handle_reply: Tag out of range: %d\n", tagn); + return; + } spin_lock_irqsave(&b->spinlock, irq_flags); entry = b->pending_entries[tagn]; diff --git a/audio/protocol_bce.h b/audio/protocol_bce.h index 14d26c0..27487b8 100644 --- a/audio/protocol_bce.h +++ b/audio/protocol_bce.h @@ -41,6 +41,7 @@ struct aaudio_send_ctx { }; int aaudio_bce_init(struct aaudio_device *dev); +void aaudio_bce_free(struct aaudio_device *dev); int __aaudio_send_prepare(struct aaudio_bce *b, struct aaudio_send_ctx *ctx, char *tag); void __aaudio_send(struct aaudio_bce *b, struct aaudio_send_ctx *ctx); int __aaudio_send_cmd_sync(struct aaudio_bce *b, struct aaudio_send_ctx *ctx, struct aaudio_msg *reply); diff --git a/queue.c b/queue.c index 9482861..11f3ff2 100644 --- a/queue.c +++ b/queue.c @@ -8,6 +8,8 @@ struct bce_queue_cq *bce_alloc_cq(struct apple_bce_device *dev, int qid, u32 el_ { struct bce_queue_cq *q; q = kzalloc(sizeof(struct bce_queue_cq), GFP_KERNEL); + if (!q) + return NULL; q->qid = qid; q->type = BCE_QUEUE_CQ; q->el_count = el_count; @@ -102,6 +104,8 @@ struct bce_queue_sq *bce_alloc_sq(struct apple_bce_device *dev, int qid, u32 el_ { struct bce_queue_sq *q; q = kzalloc(sizeof(struct bce_queue_sq), GFP_KERNEL); + if (!q) + return NULL; q->qid = qid; q->type = BCE_QUEUE_SQ; q->el_size = el_size; @@ -115,8 +119,11 @@ struct bce_queue_sq *bce_alloc_sq(struct apple_bce_device *dev, int qid, u32 el_ atomic_set(&q->available_commands, el_count - 1); init_completion(&q->available_command_completion); atomic_set(&q->available_command_completion_waiting_count, 0); - if (!q->data) { + if (!q->data || !q->completion_data) { pr_err("DMA queue memory alloc failed\n"); + if (q->data) + dma_free_coherent(&dev->pci->dev, el_count * el_size, q->data, q->dma_handle); + kfree(q->completion_data); kfree(q); return NULL; } @@ -136,6 +143,7 @@ void bce_get_sq_memcfg(struct bce_queue_sq *sq, struct bce_queue_cq *cq, struct void bce_free_sq(struct apple_bce_device *dev, struct bce_queue_sq *sq) { dma_free_coherent(&dev->pci->dev, sq->el_count * sq->el_size, sq->data, sq->dma_handle); + kfree(sq->completion_data); kfree(sq); } @@ -154,6 +162,8 @@ int bce_reserve_submission(struct bce_queue_sq *sq, unsigned long *timeout) return 0; } +EXPORT_SYMBOL_GPL(bce_reserve_submission); + void bce_cancel_submission_reservation(struct bce_queue_sq *sq) { atomic_inc(&sq->available_commands); @@ -165,12 +175,14 @@ void *bce_next_submission(struct bce_queue_sq *sq) sq->tail = (sq->tail + 1) % sq->el_count; return ret; } +EXPORT_SYMBOL_GPL(bce_next_submission); void bce_submit_to_device(struct bce_queue_sq *sq) { mb(); iowrite32(sq->tail, (u32 *) ((u8 *) sq->reg_mem_dma + REG_DOORBELL_BASE) + sq->qid); } +EXPORT_SYMBOL_GPL(bce_submit_to_device); void bce_notify_submission_complete(struct bce_queue_sq *sq) { @@ -180,6 +192,7 @@ void bce_notify_submission_complete(struct bce_queue_sq *sq) complete(&sq->available_command_completion); } } +EXPORT_SYMBOL_GPL(bce_notify_submission_complete); void bce_set_submission_single(struct bce_qe_submission *element, dma_addr_t addr, size_t size) { @@ -187,6 +200,7 @@ void bce_set_submission_single(struct bce_qe_submission *element, dma_addr_t add element->length = size; element->segl_addr = element->segl_length = 0; } +EXPORT_SYMBOL_GPL(bce_set_submission_single); static void bce_cmdq_completion(struct bce_queue_sq *q); @@ -194,6 +208,8 @@ struct bce_queue_cmdq *bce_alloc_cmdq(struct apple_bce_device *dev, int qid, u32 { struct bce_queue_cmdq *q; q = kzalloc(sizeof(struct bce_queue_cmdq), GFP_KERNEL); + if (!q) + return NULL; q->sq = bce_alloc_sq(dev, qid, BCE_CMD_SIZE, el_count, bce_cmdq_completion, q); if (!q->sq) { kfree(q); @@ -202,6 +218,14 @@ struct bce_queue_cmdq *bce_alloc_cmdq(struct apple_bce_device *dev, int qid, u32 spin_lock_init(&q->lck); q->tres = kzalloc(sizeof(struct bce_queue_cmdq_result_el*) * el_count, GFP_KERNEL); if (!q->tres) { + bce_free_sq(dev, q->sq); + kfree(q); + return NULL; + } + q->slot_gen = kzalloc(sizeof(u32) * el_count, GFP_KERNEL); + if (!q->slot_gen) { + kfree(q->tres); + bce_free_sq(dev, q->sq); kfree(q); return NULL; } @@ -211,6 +235,7 @@ struct bce_queue_cmdq *bce_alloc_cmdq(struct apple_bce_device *dev, int qid, u32 void bce_free_cmdq(struct apple_bce_device *dev, struct bce_queue_cmdq *cmdq) { bce_free_sq(dev, cmdq->sq); + kfree(cmdq->slot_gen); kfree(cmdq->tres); kfree(cmdq); } @@ -224,13 +249,14 @@ void bce_cmdq_completion(struct bce_queue_sq *q) spin_lock(&cmdq->lck); while ((result = bce_next_completion(q))) { el = cmdq->tres[cmdq->sq->head]; - if (el) { + if (el && el->generation == cmdq->slot_gen[cmdq->sq->head]) { el->result = result->result; el->status = result->status; mb(); complete(&el->cmpl); } else { - pr_err("apple-bce: Unexpected command queue completion\n"); + /* Slot was NULLed by a timeout or generation mismatch — discard */ + pr_debug("apple-bce: discarding late command completion\n"); } cmdq->tres[cmdq->sq->head] = NULL; bce_notify_submission_complete(q); @@ -250,28 +276,42 @@ static __always_inline void *bce_cmd_start(struct bce_queue_cmdq *cmdq, struct b return NULL; spin_lock(&cmdq->lck); + res->slot = cmdq->sq->tail; + res->generation = cmdq->slot_gen[cmdq->sq->tail]; cmdq->tres[cmdq->sq->tail] = res; ret = bce_next_submission(cmdq->sq); return ret; } -static __always_inline void bce_cmd_finish(struct bce_queue_cmdq *cmdq, struct bce_queue_cmdq_result_el *res) +static __always_inline int bce_cmd_finish(struct bce_queue_cmdq *cmdq, struct bce_queue_cmdq_result_el *res) { bce_submit_to_device(cmdq->sq); spin_unlock(&cmdq->lck); - wait_for_completion(&res->cmpl); + if (!wait_for_completion_timeout(&res->cmpl, msecs_to_jiffies(5000))) { + pr_err("apple-bce: command queue timeout (slot %u)\n", res->slot); + spin_lock(&cmdq->lck); + cmdq->tres[res->slot] = NULL; + cmdq->slot_gen[res->slot]++; + spin_unlock(&cmdq->lck); + /* Reclaim the slot so the queue doesn't deadlock. If T2 sends + * a late completion, bce_cmdq_completion will find tres[head]==NULL + * and discard it. */ + bce_notify_submission_complete(cmdq->sq); + return -ETIMEDOUT; + } mb(); + return 0; } -u32 bce_cmd_register_queue(struct bce_queue_cmdq *cmdq, struct bce_queue_memcfg *cfg, const char *name, bool isdirout) +u32 bce_cmd_register_queue(struct bce_queue_cmdq *cmdq, struct bce_queue_memcfg *cfg, const char *name, u16 flags) { struct bce_queue_cmdq_result_el res; struct bce_cmdq_register_memory_queue_cmd *cmd = bce_cmd_start(cmdq, &res); if (!cmd) return (u32) -1; cmd->cmd = BCE_CMD_REGISTER_MEMORY_QUEUE; - cmd->flags = (u16) ((name ? 2 : 0) | (isdirout ? 1 : 0)); + cmd->flags = flags; cmd->qid = cfg->qid; cmd->el_count = cfg->el_count; cmd->vector_or_cq = cfg->vector_or_cq; @@ -285,7 +325,8 @@ u32 bce_cmd_register_queue(struct bce_queue_cmdq *cmdq, struct bce_queue_memcfg cmd->addr = cfg->addr; cmd->length = cfg->length; - bce_cmd_finish(cmdq, &res); + if (bce_cmd_finish(cmdq, &res)) + return (u32) -1; return res.status; } @@ -298,7 +339,8 @@ u32 bce_cmd_unregister_memory_queue(struct bce_queue_cmdq *cmdq, u16 qid) cmd->cmd = BCE_CMD_UNREGISTER_MEMORY_QUEUE; cmd->flags = 0; cmd->qid = qid; - bce_cmd_finish(cmdq, &res); + if (bce_cmd_finish(cmdq, &res)) + return (u32) -1; return res.status; } @@ -311,9 +353,11 @@ u32 bce_cmd_flush_memory_queue(struct bce_queue_cmdq *cmdq, u16 qid) cmd->cmd = BCE_CMD_FLUSH_MEMORY_QUEUE; cmd->flags = 0; cmd->qid = qid; - bce_cmd_finish(cmdq, &res); + if (bce_cmd_finish(cmdq, &res)) + return (u32) -1; return res.status; } +EXPORT_SYMBOL_GPL(bce_cmd_flush_memory_queue); struct bce_queue_cq *bce_create_cq(struct apple_bce_device *dev, u32 el_count) @@ -331,7 +375,7 @@ struct bce_queue_cq *bce_create_cq(struct apple_bce_device *dev, u32 el_count) if (!cq) return NULL; bce_get_cq_memcfg(cq, &cfg); - if (bce_cmd_register_queue(dev->cmd_cmdq, &cfg, NULL, false) != 0) { + if (bce_cmd_register_queue(dev->cmd_cmdq, &cfg, NULL, 0) != 0) { pr_err("apple-bce: CQ registration failed (%i)", qid); bce_free_cq(dev, cq); #if LINUX_VERSION_CODE < KERNEL_VERSION(6,18,0) @@ -345,12 +389,15 @@ struct bce_queue_cq *bce_create_cq(struct apple_bce_device *dev, u32 el_count) return cq; } +EXPORT_SYMBOL_GPL(bce_create_cq); + struct bce_queue_sq *bce_create_sq(struct apple_bce_device *dev, struct bce_queue_cq *cq, const char *name, u32 el_count, int direction, bce_sq_completion compl, void *userdata) { struct bce_queue_sq *sq; struct bce_queue_memcfg cfg; int qid; + u16 flags; if (cq == NULL) return NULL; /* cq can not be null */ if (name == NULL) @@ -368,7 +415,8 @@ struct bce_queue_sq *bce_create_sq(struct apple_bce_device *dev, struct bce_queu if (!sq) return NULL; bce_get_sq_memcfg(sq, cq, &cfg); - if (bce_cmd_register_queue(dev->cmd_cmdq, &cfg, name, direction != DMA_FROM_DEVICE) != 0) { + flags = (u16) ((name ? 2 : 0) | ((direction != DMA_FROM_DEVICE) ? 1 : 0)); + if (bce_cmd_register_queue(dev->cmd_cmdq, &cfg, name, flags) != 0) { pr_err("apple-bce: SQ registration failed (%i)", qid); bce_free_sq(dev, sq); #if LINUX_VERSION_CODE < KERNEL_VERSION(6,18,0) @@ -384,6 +432,45 @@ struct bce_queue_sq *bce_create_sq(struct apple_bce_device *dev, struct bce_queu return sq; } +struct bce_queue_sq *bce_create_sq_with_flags(struct apple_bce_device *dev, struct bce_queue_cq *cq, const char *name, + u32 el_count, u16 flags, bce_sq_completion compl, void *userdata) +{ + struct bce_queue_sq *sq; + struct bce_queue_memcfg cfg; + int qid; + if (cq == NULL) + return NULL; + if (name == NULL) + return NULL; +#if LINUX_VERSION_CODE < KERNEL_VERSION(6,18,0) + qid = ida_simple_get(&dev->queue_ida, BCE_QUEUE_USER_MIN, BCE_QUEUE_USER_MAX, GFP_KERNEL); +#else + qid = ida_alloc_range(&dev->queue_ida, BCE_QUEUE_USER_MIN, BCE_QUEUE_USER_MAX - 1, GFP_KERNEL); +#endif + if (qid < 0) + return NULL; + sq = bce_alloc_sq(dev, qid, sizeof(struct bce_qe_submission), el_count, compl, userdata); + if (!sq) + return NULL; + bce_get_sq_memcfg(sq, cq, &cfg); + if (bce_cmd_register_queue(dev->cmd_cmdq, &cfg, name, flags) != 0) { + pr_err("apple-bce: SQ registration failed (%i)", qid); + bce_free_sq(dev, sq); +#if LINUX_VERSION_CODE < KERNEL_VERSION(6,18,0) + ida_simple_remove(&dev->queue_ida, (uint) qid); +#else + ida_free(&dev->queue_ida, (uint) qid); +#endif + return NULL; + } + spin_lock(&dev->queues_lock); + dev->queues[qid] = (struct bce_queue *) sq; + spin_unlock(&dev->queues_lock); + return sq; +} + +EXPORT_SYMBOL_GPL(bce_create_sq_with_flags); + void bce_destroy_cq(struct apple_bce_device *dev, struct bce_queue_cq *cq) { if (!dev->is_being_removed && bce_cmd_unregister_memory_queue(dev->cmd_cmdq, (u16) cq->qid)) @@ -399,6 +486,8 @@ void bce_destroy_cq(struct apple_bce_device *dev, struct bce_queue_cq *cq) bce_free_cq(dev, cq); } +EXPORT_SYMBOL_GPL(bce_destroy_cq); + void bce_destroy_sq(struct apple_bce_device *dev, struct bce_queue_sq *sq) { if (!dev->is_being_removed && bce_cmd_unregister_memory_queue(dev->cmd_cmdq, (u16) sq->qid)) @@ -412,4 +501,5 @@ void bce_destroy_sq(struct apple_bce_device *dev, struct bce_queue_sq *sq) ida_free(&dev->queue_ida, (uint) sq->qid); #endif bce_free_sq(dev, sq); -} \ No newline at end of file +} +EXPORT_SYMBOL_GPL(bce_destroy_sq); diff --git a/queue.h b/queue.h index 8368ac5..517c7ec 100644 --- a/queue.h +++ b/queue.h @@ -56,11 +56,14 @@ struct bce_queue_cmdq_result_el { struct completion cmpl; u32 status; u64 result; + u32 slot; /* queue slot index for O(1) timeout cleanup */ + u32 generation; /* generation counter to detect stale completions */ }; struct bce_queue_cmdq { struct bce_queue_sq *sq; struct spinlock lck; struct bce_queue_cmdq_result_el **tres; + u32 *slot_gen; /* per-slot generation counter */ }; struct bce_queue_memcfg { @@ -161,7 +164,7 @@ void bce_set_submission_single(struct bce_qe_submission *element, dma_addr_t add struct bce_queue_cmdq *bce_alloc_cmdq(struct apple_bce_device *dev, int qid, u32 el_count); void bce_free_cmdq(struct apple_bce_device *dev, struct bce_queue_cmdq *cmdq); -u32 bce_cmd_register_queue(struct bce_queue_cmdq *cmdq, struct bce_queue_memcfg *cfg, const char *name, bool isdirout); +u32 bce_cmd_register_queue(struct bce_queue_cmdq *cmdq, struct bce_queue_memcfg *cfg, const char *name, u16 flags); u32 bce_cmd_unregister_memory_queue(struct bce_queue_cmdq *cmdq, u16 qid); u32 bce_cmd_flush_memory_queue(struct bce_queue_cmdq *cmdq, u16 qid); @@ -171,6 +174,8 @@ u32 bce_cmd_flush_memory_queue(struct bce_queue_cmdq *cmdq, u16 qid); struct bce_queue_cq *bce_create_cq(struct apple_bce_device *dev, u32 el_count); struct bce_queue_sq *bce_create_sq(struct apple_bce_device *dev, struct bce_queue_cq *cq, const char *name, u32 el_count, int direction, bce_sq_completion compl, void *userdata); +struct bce_queue_sq *bce_create_sq_with_flags(struct apple_bce_device *dev, struct bce_queue_cq *cq, const char *name, + u32 el_count, u16 flags, bce_sq_completion compl, void *userdata); void bce_destroy_cq(struct apple_bce_device *dev, struct bce_queue_cq *cq); void bce_destroy_sq(struct apple_bce_device *dev, struct bce_queue_sq *sq); diff --git a/queue_dma.c b/queue_dma.c index b236613..5b3020e 100644 --- a/queue_dma.c +++ b/queue_dma.c @@ -49,6 +49,8 @@ int bce_map_dma_buffer_vm(struct device *dev, struct bce_dma_buffer *buf, void * return 0; } +EXPORT_SYMBOL_GPL(bce_map_dma_buffer_vm); + int bce_map_dma_buffer_km(struct device *dev, struct bce_dma_buffer *buf, void *data, size_t len, enum dma_data_direction dir) { @@ -71,7 +73,9 @@ void bce_unmap_dma_buffer(struct device *dev, struct bce_dma_buffer *buf) { dma_unmap_sg(dev, buf->scatterlist.sgl, buf->scatterlist.nents, buf->direction); bce_unmap_segement_list(dev, buf->seglist_hostinfo); + sg_free_table(&buf->scatterlist); } +EXPORT_SYMBOL_GPL(bce_unmap_dma_buffer); static int bce_alloc_scatterlist_from_vm(struct sg_table *tbl, void *data, size_t len) @@ -88,6 +92,8 @@ static int bce_alloc_scatterlist_from_vm(struct sg_table *tbl, void *data, size_ pages = vmalloc(page_count * sizeof(struct page *)); else pages = kmalloc(page_count * sizeof(struct page *), GFP_KERNEL); + if (!pages) + return -ENOMEM; for (i = 0; i < page_count; i++) pages[i] = vmalloc_to_page((void *) ((start_page + i) * PAGE_SIZE)); @@ -153,6 +159,13 @@ static struct bce_segment_list_element_hostinfo *bce_map_segment_list( el->addr = sg->dma_address; el->length = sg->length; header->data_size += el->length; + el++; + } + + /* Fix up element_count in the last segment page to reflect actual usage */ + if (header != &theader) { + struct bce_segment_list_element *first_el = (void *) (header + 1); + header->element_count = el - first_el; } /* DMA map */ @@ -166,6 +179,8 @@ static struct bce_segment_list_element_hostinfo *bce_map_segment_list( header = pout->page_start; header->next_segl_addr = out->dma_start; header->next_segl_length = out->page_count * PAGE_SIZE; + dma_sync_single_for_device(dev, pout->dma_start, + pout->page_count * PAGE_SIZE, DMA_TO_DEVICE); } pout = out; out = out->next; @@ -181,8 +196,11 @@ static void bce_unmap_segement_list(struct device *dev, struct bce_segment_list_ { struct bce_segment_list_element_hostinfo *next; while (list) { + size_t i; if (list->dma_start != DMA_MAPPING_ERROR) dma_unmap_single(dev, list->dma_start, list->page_count * PAGE_SIZE, DMA_TO_DEVICE); + for (i = 0; i < list->page_count; i++) + free_page((unsigned long)list->page_start + i * PAGE_SIZE); next = list->next; kfree(list); list = next; @@ -213,8 +231,9 @@ int bce_set_submission_buf(struct bce_qe_submission *element, struct bce_dma_buf if (!seg) return -EINVAL; element->addr = offset; - element->length = buf->scatterlist.sgl->dma_length; + element->length = length; element->segl_addr = seg->dma_start; element->segl_length = seg->page_count * PAGE_SIZE; return 0; -} \ No newline at end of file +} +EXPORT_SYMBOL_GPL(bce_set_submission_buf); diff --git a/vhci/command.h b/vhci/command.h index 5cba515..392d153 100644 --- a/vhci/command.h +++ b/vhci/command.h @@ -6,11 +6,15 @@ #include #define BCE_VHCI_CMD_TIMEOUT_SHORT msecs_to_jiffies(2000) +#define BCE_VHCI_CMD_TIMEOUT_MEDIUM msecs_to_jiffies(5000) #define BCE_VHCI_CMD_TIMEOUT_LONG msecs_to_jiffies(30000) #define BCE_VHCI_BULK_MAX_ACTIVE_URBS_POW2 2 #define BCE_VHCI_BULK_MAX_ACTIVE_URBS (1 << BCE_VHCI_BULK_MAX_ACTIVE_URBS_POW2) +#define BCE_VHCI_ISOC_MAX_ACTIVE_URBS_POW2 2 +#define BCE_VHCI_ISOC_MAX_ACTIVE_URBS (1 << BCE_VHCI_ISOC_MAX_ACTIVE_URBS_POW2) + typedef u8 bce_vhci_port_t; typedef u8 bce_vhci_device_t; @@ -37,6 +41,9 @@ enum bce_vhci_command { BCE_VHCI_CMD_ENDPOINT_SET_STATE = 0x42, BCE_VHCI_CMD_ENDPOINT_RESET = 0x44, + /* System events (device to host only) */ + BCE_VHCI_CMD_CONTROLLER_RESET_NOTIFY = 0x50, + /* Device to host only */ BCE_VHCI_CMD_ENDPOINT_REQUEST_STATE = 0x43, BCE_VHCI_CMD_TRANSFER_REQUEST = 0x1000, @@ -153,7 +160,7 @@ static inline int bce_vhci_cmd_device_destroy(struct bce_vhci_command_queue *q, struct bce_vhci_message cmd, res; cmd.cmd = BCE_VHCI_CMD_DEVICE_DESTROY; cmd.param1 = dev; - return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_LONG); + return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_MEDIUM); } static inline int bce_vhci_cmd_endpoint_create(struct bce_vhci_command_queue *q, bce_vhci_device_t dev, @@ -168,8 +175,10 @@ static inline int bce_vhci_cmd_endpoint_create(struct bce_vhci_command_queue *q, cmd.param1 = dev | ((desc->bEndpointAddress & 0x8Fu) << 8); if (endpoint_type == USB_ENDPOINT_XFER_BULK) max_active_requests_pow2 = BCE_VHCI_BULK_MAX_ACTIVE_URBS_POW2; + else if (endpoint_type == USB_ENDPOINT_XFER_ISOC) + max_active_requests_pow2 = BCE_VHCI_ISOC_MAX_ACTIVE_URBS_POW2; cmd.param2 = endpoint_type | ((max_active_requests_pow2 & 0xf) << 4) | (maxp << 16) | ((u64) maxp_burst << 32); - if (endpoint_type == USB_ENDPOINT_XFER_INT) + if (endpoint_type == USB_ENDPOINT_XFER_INT || endpoint_type == USB_ENDPOINT_XFER_ISOC) cmd.param2 |= (desc->bInterval - 1) << 8; return bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT); } @@ -189,7 +198,7 @@ static inline int bce_vhci_cmd_endpoint_set_state(struct bce_vhci_command_queue cmd.param1 = dev | (endpoint << 8); cmd.param2 = (u64) newState; status = bce_vhci_command_queue_execute(q, &cmd, &res, BCE_VHCI_CMD_TIMEOUT_SHORT); - if (status != BCE_VHCI_INTERNAL_ERROR && status != BCE_VHCI_NO_POWER) + if (status >= 0 && status != BCE_VHCI_INTERNAL_ERROR && status != BCE_VHCI_NO_POWER) *retState = (enum bce_vhci_endpoint_state) res.param2; return status; } diff --git a/vhci/queue.c b/vhci/queue.c index 7b0b502..74d8263 100644 --- a/vhci/queue.c +++ b/vhci/queue.c @@ -1,6 +1,8 @@ #include "queue.h" #include "vhci.h" #include "../apple_bce.h" +#include +#include static void bce_vhci_message_queue_completion(struct bce_queue_sq *sq); @@ -83,6 +85,7 @@ int __bce_vhci_event_queue_create(struct bce_vhci *vhci, struct bce_vhci_event_q return -EINVAL; } + ret->draining = false; init_completion(&ret->queue_empty_completion); bce_vhci_event_queue_submit_pending(ret, VHCI_EVENT_PENDING_COUNT); return 0; @@ -123,7 +126,8 @@ static void bce_vhci_event_queue_completion(struct bce_queue_sq *sq) bce_notify_submission_complete(sq); ++cnt; } - bce_vhci_event_queue_submit_pending(ev, cnt); + if (!READ_ONCE(ev->draining)) + bce_vhci_event_queue_submit_pending(ev, cnt); if (atomic_read(&sq->available_commands) == sq->el_count - 1) complete(&ev->queue_empty_completion); } @@ -149,6 +153,7 @@ void bce_vhci_event_queue_pause(struct bce_vhci_event_queue *q) { unsigned long timeout; reinit_completion(&q->queue_empty_completion); + WRITE_ONCE(q->draining, true); if (bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, q->sq->qid)) pr_warn("bce-vhci: failed to flush event queue\n"); timeout = msecs_to_jiffies(5000); @@ -163,9 +168,17 @@ void bce_vhci_event_queue_pause(struct bce_vhci_event_queue *q) void bce_vhci_event_queue_resume(struct bce_vhci_event_queue *q) { + WRITE_ONCE(q->draining, false); if (atomic_read(&q->sq->available_commands) != q->sq->el_count - 1) { - pr_err("bce-vhci: resume of a queue with pending submissions\n"); - return; + pr_warn("bce-vhci: resume: event queue not fully drained, flushing\n"); + bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, q->sq->qid); + /* Wait briefly for stale completions to arrive */ + msleep(50); + if (atomic_read(&q->sq->available_commands) != q->sq->el_count - 1) { + pr_err("bce-vhci: resume: event queue still has %d pending after flush\n", + q->sq->el_count - 1 - atomic_read(&q->sq->available_commands)); + return; + } } bce_vhci_event_queue_submit_pending(q, VHCI_EVENT_PENDING_COUNT); } @@ -213,8 +226,13 @@ static int __bce_vhci_command_queue_execute(struct bce_vhci_command_queue *cq, s int status; struct bce_vhci_command_queue_completion *c; struct bce_vhci_message creq; + struct bce_vhci *vhci = container_of(cq, struct bce_vhci, cq); c = &cq->completion; + /* Fail fast if T2 VHCI controller is known-dead */ + if (READ_ONCE(vhci->controller_dead)) + return -ENODEV; + if ((status = bce_reserve_submission(cq->mq->sq, &timeout))) return status; @@ -236,11 +254,17 @@ static int __bce_vhci_command_queue_execute(struct bce_vhci_command_queue *cq, s bce_vhci_message_queue_write(cq->mq, &creq); if (!wait_for_completion_timeout(&c->completion, 1000)) { - pr_err("bce-vhci: Possible desync, cmd cancel timed out\n"); - spin_lock(&cq->completion_lock); c->result = NULL; spin_unlock(&cq->completion_lock); + + /* Only schedule recovery if not already recovering — prevents + * the recovery worker's own timed-out commands from spawning + * an infinite chain of recovery attempts. */ + if (!atomic_read(&vhci->recovering)) { + pr_err("bce-vhci: Possible desync, cmd cancel timed out — scheduling recovery\n"); + schedule_work(&vhci->w_recovery); + } return -ETIMEDOUT; } if ((res->cmd & ~0x8000) == creq.cmd) diff --git a/vhci/queue.h b/vhci/queue.h index adb705b..57bb9d8 100644 --- a/vhci/queue.h +++ b/vhci/queue.h @@ -42,6 +42,7 @@ struct bce_vhci_event_queue { dma_addr_t dma_addr; bce_vhci_event_queue_callback cb; struct completion queue_empty_completion; + bool draining; }; struct bce_vhci_command_queue_completion { struct bce_vhci_message *result; diff --git a/vhci/transfer.c b/vhci/transfer.c index d772207..6d233d4 100644 --- a/vhci/transfer.c +++ b/vhci/transfer.c @@ -6,13 +6,15 @@ static void bce_vhci_transfer_queue_completion(struct bce_queue_sq *sq); static void bce_vhci_transfer_queue_giveback(struct bce_vhci_transfer_queue *q); -static void bce_vhci_transfer_queue_remove_pending(struct bce_vhci_transfer_queue *q); +void bce_vhci_transfer_queue_remove_pending(struct bce_vhci_transfer_queue *q); static int bce_vhci_urb_init(struct bce_vhci_urb *vurb); +static void bce_vhci_urb_complete(struct bce_vhci_urb *urb, int status); static int bce_vhci_urb_update(struct bce_vhci_urb *urb, struct bce_vhci_message *msg); static int bce_vhci_urb_transfer_completion(struct bce_vhci_urb *urb, struct bce_sq_completion_data *c); static void bce_vhci_transfer_queue_reset_w(struct work_struct *work); +static void bce_vhci_transfer_queue_deferred_resume_w(struct work_struct *work); void bce_vhci_create_transfer_queue(struct bce_vhci *vhci, struct bce_vhci_transfer_queue *q, struct usb_host_endpoint *endp, bce_vhci_device_t dev_addr, enum dma_data_direction dir) @@ -22,19 +24,27 @@ void bce_vhci_create_transfer_queue(struct bce_vhci *vhci, struct bce_vhci_trans INIT_LIST_HEAD(&q->giveback_urb_list); spin_lock_init(&q->urb_lock); mutex_init(&q->pause_lock); + /* Initialize pending output tracking for safe suspend/resume */ + init_waitqueue_head(&q->sq_out_wait_queue); + atomic_set(&q->sq_out_pending, 0); q->vhci = vhci; q->endp = endp; q->dev_addr = dev_addr; q->endp_addr = (u8) (endp->desc.bEndpointAddress & 0x8F); q->state = BCE_VHCI_ENDPOINT_ACTIVE; q->active = true; + q->paused_by = 0; q->stalled = false; q->max_active_requests = 1; if (usb_endpoint_type(&endp->desc) == USB_ENDPOINT_XFER_BULK) q->max_active_requests = BCE_VHCI_BULK_MAX_ACTIVE_URBS; + else if (usb_endpoint_type(&endp->desc) == USB_ENDPOINT_XFER_ISOC) + q->max_active_requests = BCE_VHCI_ISOC_MAX_ACTIVE_URBS; q->remaining_active_requests = q->max_active_requests; q->cq = bce_create_cq(vhci->dev, 0x100); INIT_WORK(&q->w_reset, bce_vhci_transfer_queue_reset_w); + INIT_WORK(&q->w_resume, bce_vhci_transfer_queue_deferred_resume_w); + q->ghost_in_count = 0; q->sq_in = NULL; if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) { snprintf(name, sizeof(name), "VHC1-%i-%02x", dev_addr, 0x80 | usb_endpoint_num(&endp->desc)); @@ -51,6 +61,8 @@ void bce_vhci_create_transfer_queue(struct bce_vhci *vhci, struct bce_vhci_trans void bce_vhci_destroy_transfer_queue(struct bce_vhci *vhci, struct bce_vhci_transfer_queue *q) { + cancel_work_sync(&q->w_resume); + cancel_work_sync(&q->w_reset); bce_vhci_transfer_queue_giveback(q); bce_vhci_transfer_queue_remove_pending(q); if (q->sq_in) @@ -68,7 +80,11 @@ static inline bool bce_vhci_transfer_queue_can_init_urb(struct bce_vhci_transfer static void bce_vhci_transfer_queue_defer_event(struct bce_vhci_transfer_queue *q, struct bce_vhci_message *msg) { struct bce_vhci_list_message *lm; - lm = kmalloc(sizeof(struct bce_vhci_list_message), GFP_KERNEL); + lm = kmalloc(sizeof(struct bce_vhci_list_message), GFP_ATOMIC); + if (!lm) { + pr_err("bce-vhci: [%02x] failed to allocate deferred event, dropping\n", q->endp_addr); + return; + } INIT_LIST_HEAD(&lm->list); lm->msg = *msg; list_add_tail(&lm->list, &q->evq); @@ -111,7 +127,7 @@ static void bce_vhci_transfer_queue_deliver_pending(struct bce_vhci_transfer_que bce_vhci_transfer_queue_init_pending_urbs(q); } -static void bce_vhci_transfer_queue_remove_pending(struct bce_vhci_transfer_queue *q) +void bce_vhci_transfer_queue_remove_pending(struct bce_vhci_transfer_queue *q) { unsigned long flags; struct bce_vhci_list_message *lm; @@ -124,12 +140,39 @@ static void bce_vhci_transfer_queue_remove_pending(struct bce_vhci_transfer_queu spin_unlock_irqrestore(&q->urb_lock, flags); } +void bce_vhci_transfer_queue_cancel_all(struct bce_vhci_transfer_queue *q) +{ + unsigned long flags; + struct urb *urb, *urbt; + struct bce_vhci_urb *vurb; + + spin_lock_irqsave(&q->urb_lock, flags); + q->active = false; + list_for_each_entry_safe(urb, urbt, &q->endp->urb_list, urb_list) { + vurb = urb->hcpriv; + bce_vhci_urb_complete(vurb, -ECONNRESET); + } + spin_unlock_irqrestore(&q->urb_lock, flags); + /* Clear hcpriv before giveback so completion handlers can't + * resubmit to a queue that's about to be destroyed */ + q->endp->hcpriv = NULL; + bce_vhci_transfer_queue_giveback(q); +} + void bce_vhci_transfer_queue_event(struct bce_vhci_transfer_queue *q, struct bce_vhci_message *msg) { unsigned long flags; struct bce_vhci_urb *turb; struct urb *urb; spin_lock_irqsave(&q->urb_lock, flags); + + /* + * Suspend/resume fix: Skip events on inactive queues. During pause/unbind, + * the T2 chip may still send events for endpoints being torn down. + */ + if (!q->active) + goto complete; + bce_vhci_transfer_queue_deliver_pending(q); if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST && @@ -160,20 +203,45 @@ static void bce_vhci_transfer_queue_completion(struct bce_queue_sq *sq) struct bce_sq_completion_data *c; struct urb *urb; struct bce_vhci_transfer_queue *q = sq->userdata; + bool is_sq_out = (sq == q->sq_out); spin_lock_irqsave(&q->urb_lock, flags); while ((c = bce_next_completion(sq))) { + /* + * Suspend/resume fix: Track output completion to allow pause to wait + * for all pending DMA transfers before destroying the queue. + */ if (c->status == BCE_COMPLETION_ABORTED) { /* We flushed the queue */ pr_debug("bce-vhci: [%02x] Got an abort completion\n", q->endp_addr); + if (is_sq_out && atomic_dec_if_positive(&q->sq_out_pending) == 0) + wake_up(&q->sq_out_wait_queue); + bce_notify_submission_complete(sq); + continue; + } + /* Absorb ghost sq_in completions from cancelled IN URBs. + * T2 completed the DMA but the URB is already gone — just + * free the active slot and kick any INIT_PENDING URBs. */ + if (!is_sq_out && q->ghost_in_count > 0) { + q->ghost_in_count--; + ++q->remaining_active_requests; + bce_vhci_transfer_queue_init_pending_urbs(q); bce_notify_submission_complete(sq); continue; } if (list_empty(&q->endp->urb_list)) { - pr_err("bce-vhci: [%02x] Got a completion while no requests are pending\n", q->endp_addr); + /* Expected during teardown: URB was cancelled and unlinked from + * endp->urb_list, but T2's in-flight DMA completion arrived after. + * The completion slot is properly consumed below. */ + pr_debug("bce-vhci: [%02x] Got a completion while no requests are pending\n", q->endp_addr); + if (is_sq_out && atomic_dec_if_positive(&q->sq_out_pending) == 0) + wake_up(&q->sq_out_wait_queue); + bce_notify_submission_complete(sq); continue; } pr_debug("bce-vhci: [%02x] Got a transfer queue completion\n", q->endp_addr); urb = list_first_entry(&q->endp->urb_list, struct urb, urb_list); bce_vhci_urb_transfer_completion(urb->hcpriv, c); + if (is_sq_out && atomic_dec_if_positive(&q->sq_out_pending) == 0) + wake_up(&q->sq_out_wait_queue); bce_notify_submission_complete(sq); } bce_vhci_transfer_queue_deliver_pending(q); @@ -181,27 +249,68 @@ static void bce_vhci_transfer_queue_completion(struct bce_queue_sq *sq) bce_vhci_transfer_queue_giveback(q); } +/* Timeout for waiting on pending output requests during pause. + * Keep short — multiple endpoints timeout sequentially behind + * cq->mutex, so per-endpoint timeouts compound into apparent freezes. */ +#define BCE_VHCI_PAUSE_TIMEOUT_MS 1000 + int bce_vhci_transfer_queue_do_pause(struct bce_vhci_transfer_queue *q) { unsigned long flags; int status; - u8 endp_addr = (u8) (q->endp->desc.bEndpointAddress & 0x8F); + int pending; + long timeout; + + pr_debug("bce-vhci: [%02x] pause (dev=%d)\n", q->endp_addr, q->dev_addr); + spin_lock_irqsave(&q->urb_lock, flags); q->active = false; spin_unlock_irqrestore(&q->urb_lock, flags); + bce_vhci_transfer_queue_remove_pending(q); + + /* During recovery, skip T2 commands and queue flushes entirely. + * Recovery resets the controller and re-enumerates all devices, + * so per-endpoint pauses are pointless and would just burn + * timeouts against an unresponsive T2. */ + if (atomic_read(&q->vhci->recovering)) + return 0; + + /* Wait for pending output transfers to complete before pausing/flushing. + * Ensures commands like keyboard backlight off reach T2 before flush + * aborts remaining transfers. */ if (q->sq_out) { - pr_err("bce-vhci: Not implemented: wait for pending output requests\n"); + pending = atomic_read(&q->sq_out_pending); + if (pending > 0) { + timeout = wait_event_timeout(q->sq_out_wait_queue, + atomic_read(&q->sq_out_pending) == 0, + msecs_to_jiffies(BCE_VHCI_PAUSE_TIMEOUT_MS)); + if (timeout == 0) { + pending = atomic_read(&q->sq_out_pending); + if (pending > 0) + pr_warn("bce-vhci: [%02x] pause: timeout waiting for %d pending outputs\n", + q->endp_addr, pending); + } + } } - bce_vhci_transfer_queue_remove_pending(q); - if ((status = bce_vhci_cmd_endpoint_set_state( - &q->vhci->cq, q->dev_addr, endp_addr, BCE_VHCI_ENDPOINT_PAUSED, &q->state))) - return status; - if (q->state != BCE_VHCI_ENDPOINT_PAUSED) + + status = bce_vhci_cmd_endpoint_set_state( + &q->vhci->cq, q->dev_addr, q->endp_addr, BCE_VHCI_ENDPOINT_PAUSED, &q->state); + if (status) { + /* Fall through to flush. Returning error here prevents paused_by + * from being set, causing the deferred pause worker to retry in a + * loop where each attempt burns a full T2 command timeout (~3s). */ + pr_warn("bce-vhci: [%02x] pause: set_state failed (%d), continuing with local cleanup\n", + q->endp_addr, status); + } else if (q->state != BCE_VHCI_ENDPOINT_PAUSED) { + pr_err("bce-vhci: [%02x] pause: unexpected state %d\n", q->endp_addr, q->state); return -EINVAL; + } + if (q->sq_in) bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, (u16) q->sq_in->qid); if (q->sq_out) bce_cmd_flush_memory_queue(q->vhci->dev->cmd_cmdq, (u16) q->sq_out->qid); + return 0; } @@ -213,12 +322,13 @@ int bce_vhci_transfer_queue_do_resume(struct bce_vhci_transfer_queue *q) int status; struct urb *urb, *urbt; struct bce_vhci_urb *vurb; - u8 endp_addr = (u8) (q->endp->desc.bEndpointAddress & 0x8F); + if ((status = bce_vhci_cmd_endpoint_set_state( - &q->vhci->cq, q->dev_addr, endp_addr, BCE_VHCI_ENDPOINT_ACTIVE, &q->state))) + &q->vhci->cq, q->dev_addr, q->endp_addr, BCE_VHCI_ENDPOINT_ACTIVE, &q->state))) return status; if (q->state != BCE_VHCI_ENDPOINT_ACTIVE) return -EINVAL; + spin_lock_irqsave(&q->urb_lock, flags); q->active = true; list_for_each_entry_safe(urb, urbt, &q->endp->urb_list, urb_list) { @@ -297,6 +407,14 @@ void bce_vhci_transfer_queue_request_reset(struct bce_vhci_transfer_queue *q) queue_work(q->vhci->tq_state_wq, &q->w_reset); } +static void bce_vhci_transfer_queue_deferred_resume_w(struct work_struct *work) +{ + struct bce_vhci_transfer_queue *q = + container_of(work, struct bce_vhci_transfer_queue, w_resume); + bce_vhci_transfer_queue_resume(q, BCE_VHCI_PAUSE_INTERNAL_WQ); +} + + static void bce_vhci_transfer_queue_init_pending_urbs(struct bce_vhci_transfer_queue *q) { struct urb *urb, *urbt; @@ -314,12 +432,15 @@ static void bce_vhci_transfer_queue_init_pending_urbs(struct bce_vhci_transfer_q static int bce_vhci_urb_data_start(struct bce_vhci_urb *urb, unsigned long *timeout); -int bce_vhci_urb_create(struct bce_vhci_transfer_queue *q, struct urb *urb) +int bce_vhci_urb_create(struct bce_vhci_transfer_queue *q, struct urb *urb, gfp_t mem_flags) { unsigned long flags; int status = 0; struct bce_vhci_urb *vurb; - vurb = kzalloc(sizeof(struct bce_vhci_urb), GFP_KERNEL); + + vurb = kzalloc(sizeof(struct bce_vhci_urb), mem_flags); + if (!vurb) + return -ENOMEM; urb->hcpriv = vurb; vurb->q = q; @@ -342,9 +463,11 @@ int bce_vhci_urb_create(struct bce_vhci_transfer_queue *q, struct urb *urb) else vurb->state = BCE_VHCI_URB_INIT_PENDING; } else { + vurb->state = BCE_VHCI_URB_INIT_PENDING; if (q->stalled) bce_vhci_transfer_queue_request_reset(q); - vurb->state = BCE_VHCI_URB_INIT_PENDING; + else if (READ_ONCE(q->paused_by) & BCE_VHCI_PAUSE_INTERNAL_WQ) + queue_work(q->vhci->tq_state_wq, &q->w_resume); } if (status) { usb_hcd_unlink_urb_from_ep(q->vhci->hcd, urb); @@ -420,49 +543,93 @@ int bce_vhci_urb_request_cancel(struct bce_vhci_transfer_queue *q, struct urb *u vurb->state = BCE_VHCI_URB_CANCELLED; - /* If the URB wasn't posted to the device yet, we can still remove it on the host without pausing the queue. */ + /* Free the active request slot so new URBs can use it. */ if (old_state != BCE_VHCI_URB_INIT_PENDING) { - pr_debug("bce-vhci: [%02x] Cancelling URB\n", q->endp_addr); - - spin_unlock_irqrestore(&q->urb_lock, flags); - bce_vhci_transfer_queue_pause(q, BCE_VHCI_PAUSE_INTERNAL_WQ); - spin_lock_irqsave(&q->urb_lock, flags); - - ++q->remaining_active_requests; + if (old_state == BCE_VHCI_URB_WAITING_FOR_COMPLETION && + vurb->dir == DMA_FROM_DEVICE && !vurb->is_control) { + /* Active IN data transfer: T2 has a pending + * TRANSFER_REQUEST + sq_in DMA in its pipeline. + * Don't free the slot yet — let the ghost completion + * handler absorb it. This avoids pausing the endpoint + * (which disrupts the UVC bulk stream) or exceeding + * T2's max_active limit. */ + q->ghost_in_count++; + } else { + ++q->remaining_active_requests; + } } usb_hcd_unlink_urb_from_ep(q->vhci->hcd, urb); + urb->hcpriv = NULL; + + /* Check if this was the last URB and ghosts are still in flight. + * If so, we must flush before returning so the caller (UVC) can + * safely free DMA buffers. This only fires during teardown + * (STREAMOFF) — during normal streaming, new URBs keep the list + * non-empty so we never hit this path. */ + { + bool need_flush = (q->ghost_in_count > 0 && + list_empty(&q->endp->urb_list)); + spin_unlock_irqrestore(&q->urb_lock, flags); - spin_unlock_irqrestore(&q->urb_lock, flags); - - usb_hcd_giveback_urb(q->vhci->hcd, urb, status); - - if (old_state != BCE_VHCI_URB_INIT_PENDING) - bce_vhci_transfer_queue_resume(q, BCE_VHCI_PAUSE_INTERNAL_WQ); + usb_hcd_giveback_urb(q->vhci->hcd, urb, status); + kfree(vurb); - kfree(vurb); + if (need_flush) { + bce_vhci_transfer_queue_pause(q, BCE_VHCI_PAUSE_INTERNAL_WQ); + spin_lock_irqsave(&q->urb_lock, flags); + q->ghost_in_count = 0; + q->remaining_active_requests = q->max_active_requests; + spin_unlock_irqrestore(&q->urb_lock, flags); + bce_vhci_transfer_queue_resume(q, BCE_VHCI_PAUSE_INTERNAL_WQ); + } + } return 0; } +/* Select the correct host-to-T2 message queue based on endpoint type. + * T2 expects isochronous transfer requests on VHC1HostIsochronousEvents. + * Interrupt, bulk, and control all go through VHC1HostAsynchronousEvents + * (T2 does NOT accept TRANSFER_REQUEST on VHC1HostInterruptEvents). */ +static void bce_vhci_get_msg_queue(struct bce_vhci_transfer_queue *q, + struct bce_vhci_message_queue **mq, struct spinlock **lock) +{ + struct bce_vhci *vhci = q->vhci; + switch (usb_endpoint_type(&q->endp->desc)) { + case USB_ENDPOINT_XFER_ISOC: + *mq = &vhci->msg_isochronous; + *lock = &vhci->msg_isochronous_lock; + break; + default: + *mq = &vhci->msg_asynchronous; + *lock = &vhci->msg_asynchronous_lock; + break; + } +} + static int bce_vhci_urb_data_transfer_in(struct bce_vhci_urb *urb, unsigned long *timeout) { struct bce_vhci_message msg; struct bce_qe_submission *s; + struct bce_vhci_message_queue *mq; + struct spinlock *mq_lock; u32 tr_len; int reservation1, reservation2 = -EFAULT; pr_debug("bce-vhci: [%02x] DMA from device %llx %x\n", urb->q->endp_addr, (u64) urb->urb->transfer_dma, urb->urb->transfer_buffer_length); + bce_vhci_get_msg_queue(urb->q, &mq, &mq_lock); + /* Reserve both a message and a submission, so we don't run into issues later. */ - reservation1 = bce_reserve_submission(urb->q->vhci->msg_asynchronous.sq, timeout); + reservation1 = bce_reserve_submission(mq->sq, timeout); if (!reservation1) reservation2 = bce_reserve_submission(urb->q->sq_in, timeout); if (reservation1 || reservation2) { pr_err("bce-vhci: Failed to reserve a submission for URB data transfer\n"); if (!reservation1) - bce_cancel_submission_reservation(urb->q->vhci->msg_asynchronous.sq); + bce_cancel_submission_reservation(mq->sq); return -ENOMEM; } @@ -470,13 +637,13 @@ static int bce_vhci_urb_data_transfer_in(struct bce_vhci_urb *urb, unsigned long tr_len = urb->urb->transfer_buffer_length - urb->send_offset; - spin_lock(&urb->q->vhci->msg_asynchronous_lock); + spin_lock(mq_lock); msg.cmd = BCE_VHCI_CMD_TRANSFER_REQUEST; msg.status = 0; msg.param1 = ((urb->urb->ep->desc.bEndpointAddress & 0x8Fu) << 8) | urb->q->dev_addr; msg.param2 = tr_len; - bce_vhci_message_queue_write(&urb->q->vhci->msg_asynchronous, &msg); - spin_unlock(&urb->q->vhci->msg_asynchronous_lock); + bce_vhci_message_queue_write(mq, &msg); + spin_unlock(mq_lock); s = bce_next_submission(urb->q->sq_in); bce_set_submission_single(s, urb->urb->transfer_dma + urb->send_offset, tr_len); @@ -512,6 +679,8 @@ static int bce_vhci_urb_send_out_data(struct bce_vhci_urb *urb, dma_addr_t addr, s = bce_next_submission(urb->q->sq_out); bce_set_submission_single(s, addr, size); + /* Track pending output for safe pause - decremented in completion handler */ + atomic_inc(&urb->q->sq_out_pending); bce_submit_to_device(urb->q->sq_out); return 0; } @@ -540,6 +709,24 @@ static int bce_vhci_urb_data_update(struct bce_vhci_urb *urb, struct bce_vhci_me return -EAGAIN; } +static void bce_vhci_urb_iso_complete(struct bce_vhci_urb *vurb) +{ + struct urb *urb = vurb->urb; + int i; + u32 remaining = urb->actual_length; + u32 maxp = usb_endpoint_maxp(&urb->ep->desc); + + urb->error_count = 0; + for (i = 0; i < urb->number_of_packets; i++) { + u32 pkt_len = min(maxp, remaining); + urb->iso_frame_desc[i].actual_length = pkt_len; + urb->iso_frame_desc[i].status = 0; + remaining -= pkt_len; + } + + bce_vhci_urb_complete(vurb, 0); +} + static int bce_vhci_urb_data_transfer_completion(struct bce_vhci_urb *urb, struct bce_sq_completion_data *c) { if (urb->state == BCE_VHCI_URB_WAITING_FOR_COMPLETION) { @@ -548,7 +735,10 @@ static int bce_vhci_urb_data_transfer_completion(struct bce_vhci_urb *urb, struc urb->urb->actual_length = (u32) urb->receive_offset; urb->state = BCE_VHCI_URB_DATA_TRANSFER_COMPLETE; if (!urb->is_control) { - bce_vhci_urb_complete(urb, 0); + if (urb->urb->number_of_packets > 0) + bce_vhci_urb_iso_complete(urb); + else + bce_vhci_urb_complete(urb, 0); return -ENOENT; } } @@ -569,12 +759,22 @@ static int bce_vhci_urb_control_check_status(struct bce_vhci_urb *urb) urb->state != BCE_VHCI_URB_CONTROL_WAITING_FOR_SETUP_COMPLETION)) { urb->state = BCE_VHCI_URB_CONTROL_COMPLETE; if (urb->received_status != BCE_VHCI_SUCCESS) { - pr_err("bce-vhci: [%02x] URB failed: %x\n", urb->q->endp_addr, urb->received_status); + if (urb->is_control && urb->urb->setup_packet) { + struct usb_ctrlrequest *setup = (struct usb_ctrlrequest *)urb->urb->setup_packet; + pr_err("bce-vhci: [%02x] URB failed: %x (dev=%d) setup=%02x/%02x val=%04x idx=%04x len=%04x\n", + urb->q->endp_addr, urb->received_status, urb->q->dev_addr, + setup->bRequestType, setup->bRequest, + le16_to_cpu(setup->wValue), le16_to_cpu(setup->wIndex), + le16_to_cpu(setup->wLength)); + } else { + pr_err("bce-vhci: [%02x] URB failed: %x (dev=%d)\n", + urb->q->endp_addr, urb->received_status, urb->q->dev_addr); + } urb->q->active = false; urb->q->stalled = true; bce_vhci_urb_complete(urb, -EPIPE); - if (!list_empty(&q->endp->urb_list)) - bce_vhci_transfer_queue_request_reset(q); + bce_vhci_transfer_queue_request_reset(q); + return -ENOENT; } bce_vhci_urb_complete(urb, 0); diff --git a/vhci/transfer.h b/vhci/transfer.h index 2f4df78..a7b99f8 100644 --- a/vhci/transfer.h +++ b/vhci/transfer.h @@ -33,7 +33,24 @@ struct bce_vhci_transfer_queue { struct mutex pause_lock; struct list_head giveback_urb_list; + /* + * Suspend/resume fix: Track pending output submissions to avoid destroying + * queues while DMA transfers are still in flight. The wait queue allows + * bce_vhci_transfer_queue_do_pause() to block until all pending output + * completions have been processed, preventing use-after-free during suspend. + */ + wait_queue_head_t sq_out_wait_queue; + atomic_t sq_out_pending; + struct work_struct w_reset; + struct work_struct w_resume; + + /* Count of ghost sq_in completions to absorb from cancelled IN URBs. + * When an active IN URB is cancelled, its TRANSFER_REQUEST + DMA are + * still in T2's pipeline. Instead of pausing the endpoint (which + * disrupts the UVC bulk stream), we let T2 complete the DMA and + * silently skip the result. Protected by urb_lock. */ + int ghost_in_count; }; enum bce_vhci_urb_state { BCE_VHCI_URB_INIT_PENDING, @@ -66,7 +83,10 @@ int bce_vhci_transfer_queue_pause(struct bce_vhci_transfer_queue *q, enum bce_vh int bce_vhci_transfer_queue_resume(struct bce_vhci_transfer_queue *q, enum bce_vhci_pause_source src); void bce_vhci_transfer_queue_request_reset(struct bce_vhci_transfer_queue *q); -int bce_vhci_urb_create(struct bce_vhci_transfer_queue *q, struct urb *urb); +void bce_vhci_transfer_queue_remove_pending(struct bce_vhci_transfer_queue *q); +void bce_vhci_transfer_queue_cancel_all(struct bce_vhci_transfer_queue *q); + +int bce_vhci_urb_create(struct bce_vhci_transfer_queue *q, struct urb *urb, gfp_t mem_flags); int bce_vhci_urb_request_cancel(struct bce_vhci_transfer_queue *q, struct urb *urb, int status); #endif //BCEDRIVER_TRANSFER_H diff --git a/vhci/vhci.c b/vhci/vhci.c index 675d477..0a57b88 100644 --- a/vhci/vhci.c +++ b/vhci/vhci.c @@ -17,6 +17,12 @@ static int bce_vhci_create_message_queues(struct bce_vhci *vhci); static void bce_vhci_destroy_message_queues(struct bce_vhci *vhci); static void bce_vhci_handle_firmware_events_w(struct work_struct *ws); static void bce_vhci_firmware_event_completion(struct bce_queue_sq *sq); +static void bce_vhci_recovery_w(struct work_struct *ws); +static void bce_vhci_watchdog_w(struct work_struct *ws); + +#define BCE_VHCI_RECOVERY_COOLDOWN_SECS 10 +#define BCE_VHCI_WATCHDOG_INTERVAL_SECS 30 +#define BCE_VHCI_MAX_RECOVERY_FAILURES 3 int bce_vhci_create(struct apple_bce_device *dev, struct bce_vhci *vhci) { @@ -40,6 +46,12 @@ int bce_vhci_create(struct apple_bce_device *dev, struct bce_vhci *vhci) vhci->tq_state_wq = alloc_ordered_workqueue("bce-vhci-tq-state", 0); INIT_WORK(&vhci->w_fw_events, bce_vhci_handle_firmware_events_w); + INIT_WORK(&vhci->w_recovery, bce_vhci_recovery_w); + INIT_DELAYED_WORK(&vhci->recovery_watchdog, bce_vhci_watchdog_w); + atomic_set(&vhci->recovering, 0); + vhci->last_recovery_jiffies = 0; + vhci->recovery_fail_count = 0; + vhci->controller_dead = false; vhci->hcd = usb_create_hcd(&bce_vhci_driver, vhci->vdev, "bce-vhci"); if (!vhci->hcd) { @@ -54,10 +66,12 @@ int bce_vhci_create(struct apple_bce_device *dev, struct bce_vhci *vhci) vhci->hcd->speed = HCD_USB2; if ((status = usb_add_hcd(vhci->hcd, 0, 0))) - goto fail_hcd; + goto fail_add_hcd; return 0; +fail_add_hcd: + usb_put_hcd(vhci->hcd); fail_hcd: bce_vhci_destroy_event_queues(vhci); fail_eq: @@ -73,6 +87,11 @@ int bce_vhci_create(struct apple_bce_device *dev, struct bce_vhci *vhci) void bce_vhci_destroy(struct bce_vhci *vhci) { usb_remove_hcd(vhci->hcd); + cancel_delayed_work_sync(&vhci->recovery_watchdog); + cancel_work_sync(&vhci->w_recovery); + cancel_work_sync(&vhci->w_fw_events); + flush_workqueue(vhci->tq_state_wq); + destroy_workqueue(vhci->tq_state_wq); bce_vhci_destroy_event_queues(vhci); bce_vhci_destroy_message_queues(vhci); device_destroy(bce_vhci_class, vhci->vdevt); @@ -101,35 +120,34 @@ int bce_vhci_start(struct usb_hcd *hcd) port_mask >>= 1; } vhci->port_count = port_no; + schedule_delayed_work(&vhci->recovery_watchdog, + msecs_to_jiffies(BCE_VHCI_WATCHDOG_INTERVAL_SECS * 1000)); return 0; } void bce_vhci_stop(struct usb_hcd *hcd) { struct bce_vhci *vhci = bce_vhci_from_hcd(hcd); + cancel_delayed_work_sync(&vhci->recovery_watchdog); bce_vhci_cmd_controller_disable(&vhci->cq); } static int bce_vhci_hub_status_data(struct usb_hcd *hcd, char *buf) { struct bce_vhci *vhci = bce_vhci_from_hcd(hcd); - unsigned long changed; - int i, bytes; + unsigned long mask = READ_ONCE(vhci->port_resume_mask); + int i, ret_len; - changed = xchg(&vhci->port_change_pending, 0); - if (!changed) + if (!mask) return 0; - /* USB hub status bitmap: bit 0 = hub, bits 1..N = ports 1..N. - * Since we set_bit(port_number, &pending), port N is already - * in bit N — matching the USB spec layout directly. */ - - bytes = DIV_ROUND_UP(vhci->port_count + 1, 8); - - for (i = 0; i < bytes; i++) - buf[i] = (changed >> (i * 8)) & 0xff; - - return bytes; + ret_len = (vhci->port_count + 8) / 8; + memset(buf, 0, ret_len); + for (i = 1; i <= vhci->port_count; i++) { + if (test_bit(i, &vhci->port_resume_mask)) + buf[i / 8] |= BIT(i % 8); + } + return ret_len; } static int bce_vhci_reset_device(struct bce_vhci *vhci, int index, u16 timeout); @@ -170,6 +188,19 @@ static int bce_vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, u1 if (!(bce_vhci_port_mask & BIT(wIndex))) return 0; + /* Controller dead — report no connection so USB core stops polling */ + if (READ_ONCE(vhci->controller_dead)) + return 0; + + /* If port needs forced re-enumeration, hide the connection so USB + * core sees the device as disconnected without querying T2. + * Avoids hundreds of wasted T2 PCIe round-trips during PM resume + * when USB core polls all ports repeatedly. */ + if (test_bit(wIndex, &vhci->port_reenumerate_mask)) { + ps->wPortChange |= USB_PORT_STAT_C_CONNECTION; + return 0; + } + if ((status = bce_vhci_cmd_port_status(&vhci->cq, (u8) wIndex, 0, &port_status))) return status; @@ -186,8 +217,11 @@ static int bce_vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, u1 if (port_status & 0x40000) ps->wPortChange |= USB_PORT_STAT_C_CONNECTION; + if (test_bit(wIndex, &vhci->port_resume_mask)) + ps->wPortChange |= USB_PORT_STAT_C_CONNECTION; - pr_debug("bce-vhci: Translated status %x to %x:%x\n", port_status, ps->wPortStatus, ps->wPortChange); + pr_debug("bce-vhci: GetPortStatus port %d: raw=0x%x usb_status=0x%x usb_change=0x%x\n", + wIndex, port_status, ps->wPortStatus, ps->wPortChange); return 0; } else if (typeReq == SetPortFeature) { if (wValue == USB_PORT_FEAT_POWER) { @@ -214,8 +248,10 @@ static int bce_vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, u1 vhci->port_power_mask &= ~BIT(wIndex); return status; } - if (wValue == USB_PORT_FEAT_C_CONNECTION) + if (wValue == USB_PORT_FEAT_C_CONNECTION) { + clear_bit(wIndex, &vhci->port_resume_mask); return bce_vhci_cmd_port_status(&vhci->cq, (u8) wIndex, 0x40000, &port_status); + } if (wValue == USB_PORT_FEAT_C_RESET) { /* I don't think I can transfer it in any way */ return 0; } @@ -234,7 +270,7 @@ static int bce_vhci_enable_device(struct usb_hcd *hcd, struct usb_device *udev) struct bce_vhci *vhci = bce_vhci_from_hcd(hcd); struct bce_vhci_device *vdev; bce_vhci_device_t devid; - pr_info("bce_vhci_enable_device\n"); + pr_debug("bce_vhci_enable_device\n"); if (vhci->port_to_device[udev->portnum]) return 0; @@ -243,9 +279,13 @@ static int bce_vhci_enable_device(struct usb_hcd *hcd, struct usb_device *udev) if (bce_vhci_cmd_device_create(&vhci->cq, udev->portnum, &devid)) return -EIO; - pr_info("bce_vhci_cmd_device_create %i -> %i\n", udev->portnum, devid); + pr_debug("bce_vhci_cmd_device_create %i -> %i\n", udev->portnum, devid); vdev = kzalloc(sizeof(struct bce_vhci_device), GFP_KERNEL); + if (devid >= 16) { + kfree(vdev); + return -EINVAL; + } vhci->port_to_device[udev->portnum] = devid; vhci->devices[devid] = vdev; @@ -273,22 +313,55 @@ static void bce_vhci_free_device(struct usb_hcd *hcd, struct usb_device *udev) int i; bce_vhci_device_t devid; struct bce_vhci_device *dev; - pr_info("bce_vhci_free_device %i\n", udev->portnum); - if (!vhci->port_to_device[udev->portnum]) + pr_debug("bce_vhci_free_device %i\n", udev->portnum); + if (!vhci->port_to_device[udev->portnum]) { + /* Defensive: T2 device mapping already cleared. + * If re-enumeration was requested, signal reconnect. */ + if (test_and_clear_bit(udev->portnum, &vhci->port_reenumerate_mask)) { + pr_debug("bce_vhci_free_device: port %d (already cleared) signaling reconnect\n", udev->portnum); + set_bit(udev->portnum, &vhci->port_resume_mask); + usb_hcd_poll_rh_status(vhci->hcd); + } return; + } devid = vhci->port_to_device[udev->portnum]; dev = vhci->devices[devid]; + + /* Clear device mappings first to prevent concurrent access from + * event handlers during teardown. */ + vhci->devices[devid] = NULL; + vhci->port_to_device[udev->portnum] = 0; + + /* Host-side cleanup only: clear hcpriv, destroy transfer queues. + * Skip per-endpoint T2 pause+destroy commands -- device_destroy + * handles T2-side cleanup implicitly (same pattern as + * bce_vhci_reset_device). USB core has already dequeued all URBs + * before free_dev, so no DMA is in flight. */ for (i = 0; i < 32; i++) { if (dev->tq_mask & BIT(i)) { - bce_vhci_transfer_queue_pause(&dev->tq[i], BCE_VHCI_PAUSE_SHUTDOWN); - bce_vhci_cmd_endpoint_destroy(&vhci->cq, devid, (u8) i); + if (dev->tq[i].endp) + dev->tq[i].endp->hcpriv = NULL; bce_vhci_destroy_transfer_queue(vhci, &dev->tq[i]); } } - vhci->devices[devid] = NULL; - vhci->port_to_device[udev->portnum] = 0; - bce_vhci_cmd_device_destroy(&vhci->cq, devid); + dev->tq_mask = 0; + /* Skip device_destroy when reenumerate_mask is set — bus_resume + * already handled T2-side teardown (either T2 reconnected and + * destroyed the old device itself, or we called device_destroy + * explicitly to force re-enumeration). */ + if (!test_bit(udev->portnum, &vhci->port_reenumerate_mask)) + bce_vhci_cmd_device_destroy(&vhci->cq, devid); kfree(dev); + + /* If this port was marked for forced re-enumeration, the device is now + * torn down. Clear the flag so the next GetPortStatus shows the real + * connection, and set port_resume_mask so hub_event picks up the + * "new" connection and re-enumerates. */ + if (test_and_clear_bit(udev->portnum, &vhci->port_reenumerate_mask)) { + pr_info("bce_vhci_free_device: port %d cleared reenumerate, signaling reconnect\n", udev->portnum); + set_bit(udev->portnum, &vhci->port_resume_mask); + usb_hcd_poll_rh_status(vhci->hcd); + } } static int bce_vhci_reset_device(struct bce_vhci *vhci, int index, u16 timeout) @@ -298,22 +371,41 @@ static int bce_vhci_reset_device(struct bce_vhci *vhci, int index, u16 timeout) int i; int status; enum dma_data_direction dir; - pr_info("bce_vhci_reset_device %i\n", index); + + /* After bus_resume refresh, skip reset_device calls from USB core's + * reset_resume path — the T2 device was already refreshed in bus_resume. + * USB core calls reset_device twice (hub_port_reset + hub_port_init), + * so we skip both. The port is already enabled from the refresh. */ + if (vhci->port_resume_skip_reset[index] > 0) { + pr_info("bce_vhci_reset_device: port %d skipped (resume, remaining=%d)\n", + index, vhci->port_resume_skip_reset[index] - 1); + vhci->port_resume_skip_reset[index]--; + return 0; + } devid = vhci->port_to_device[index]; if (devid) { dev = vhci->devices[devid]; + pr_info("bce_vhci_reset_device: port %d devid=%d tq_mask=0x%x\n", + index, devid, dev ? dev->tq_mask : 0); for (i = 0; i < 32; i++) { - if (dev->tq_mask & BIT(i)) { - bce_vhci_transfer_queue_pause(&dev->tq[i], BCE_VHCI_PAUSE_SHUTDOWN); - bce_vhci_cmd_endpoint_destroy(&vhci->cq, devid, (u8) i); - bce_vhci_destroy_transfer_queue(vhci, &dev->tq[i]); - } + if (!(dev->tq_mask & BIT(i))) + continue; + /* + * Suspend/resume fix: Clear hcpriv BEFORE destroying the queue + * to prevent use-after-free if URB operations occur during reset. + */ + if (dev->tq[i].endp) + dev->tq[i].endp->hcpriv = NULL; + bce_vhci_destroy_transfer_queue(vhci, &dev->tq[i]); } vhci->devices[devid] = NULL; vhci->port_to_device[index] = 0; + /* T2 implicitly destroys endpoints with device_destroy */ bce_vhci_cmd_device_destroy(&vhci->cq, devid); + } else { + pr_info("bce_vhci_reset_device: port %d (no device)\n", index); } status = bce_vhci_cmd_port_reset(&vhci->cq, (u8) index, timeout); @@ -329,6 +421,8 @@ static int bce_vhci_reset_device(struct bce_vhci *vhci, int index, u16 timeout) if (i == 0) dir = DMA_BIDIRECTIONAL; bce_vhci_create_transfer_queue(vhci, &dev->tq[i], dev->tq[i].endp, devid, dir); + /* Restore hcpriv after recreating the queue */ + dev->tq[i].endp->hcpriv = &dev->tq[i]; bce_vhci_cmd_endpoint_create(&vhci->cq, devid, &dev->tq[i].endp->desc); } } @@ -344,38 +438,90 @@ static int bce_vhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev static int bce_vhci_get_frame_number(struct usb_hcd *hcd) { - return 0; + return (int)(jiffies & 0x7FF); } static int bce_vhci_bus_suspend(struct usb_hcd *hcd) { int i, j; int status; + unsigned long flags; + struct bce_vhci_transfer_queue *tq; struct bce_vhci *vhci = bce_vhci_from_hcd(hcd); + cancel_delayed_work_sync(&vhci->recovery_watchdog); pr_info("bce_vhci: suspend started\n"); - - pr_info("bce_vhci: suspend endpoints\n"); + pr_info("bce_vhci: suspend: msg queue slots: cmd=%d/%d sys=%d/%d async=%d/%d int=%d/%d iso=%d/%d\n", + atomic_read(&vhci->msg_commands.sq->available_commands), vhci->msg_commands.sq->el_count - 1, + atomic_read(&vhci->msg_system.sq->available_commands), vhci->msg_system.sq->el_count - 1, + atomic_read(&vhci->msg_asynchronous.sq->available_commands), vhci->msg_asynchronous.sq->el_count - 1, + atomic_read(&vhci->msg_interrupt.sq->available_commands), vhci->msg_interrupt.sq->el_count - 1, + atomic_read(&vhci->msg_isochronous.sq->available_commands), vhci->msg_isochronous.sq->el_count - 1); + memset(vhci->port_resume_skip_reset, 0, sizeof(vhci->port_resume_skip_reset)); + WRITE_ONCE(vhci->port_resume_mask, 0); + /* Clear any leftover re-enumeration state from a previous resume cycle + * that didn't fully complete before this suspend. Stale bits would + * confuse the next resume cycle. */ + WRITE_ONCE(vhci->port_reenumerate_mask, 0); + + /* Pause endpoints on T2 BEFORE flushing the workqueue. + * Without this ordering, flush_workqueue blocks on N × do_pause() + * (3 T2 round-trips per endpoint), causing progressive suspend + * slowdown. */ for (i = 0; i < 16; i++) { + struct bce_vhci_device *vdev; if (!vhci->port_to_device[i]) continue; + vdev = vhci->devices[vhci->port_to_device[i]]; for (j = 0; j < 32; j++) { - if (!(vhci->devices[vhci->port_to_device[i]]->tq_mask & BIT(j))) + if (!(vdev->tq_mask & BIT(j))) continue; - bce_vhci_transfer_queue_pause(&vhci->devices[vhci->port_to_device[i]]->tq[j], - BCE_VHCI_PAUSE_SUSPEND); + tq = &vdev->tq[j]; + mutex_lock(&tq->pause_lock); + if (!tq->paused_by) { + spin_lock_irqsave(&tq->urb_lock, flags); + tq->active = false; + spin_unlock_irqrestore(&tq->urb_lock, flags); + status = bce_vhci_cmd_endpoint_set_state( + &vhci->cq, tq->dev_addr, tq->endp_addr, + BCE_VHCI_ENDPOINT_PAUSED, &tq->state); + if (status) + pr_warn("bce_vhci: suspend: endpoint_set_state failed %d:%d (err=%d)\n", + i, j, status); + } + tq->paused_by |= BCE_VHCI_PAUSE_SUSPEND; + mutex_unlock(&tq->pause_lock); } } - pr_info("bce_vhci: suspend ports\n"); + flush_workqueue(vhci->tq_state_wq); + + pr_debug("bce_vhci: suspend: suspending ports\n"); for (i = 0; i < 16; i++) { if (!vhci->port_to_device[i]) continue; bce_vhci_cmd_port_suspend(&vhci->cq, i); } - pr_info("bce_vhci: suspend controller\n"); - if ((status = bce_vhci_cmd_controller_pause(&vhci->cq))) + + pr_debug("bce_vhci: suspend: pausing controller\n"); + if ((status = bce_vhci_cmd_controller_pause(&vhci->cq))) { + pr_err("bce_vhci: suspend: controller_pause failed (err=%d)\n", status); return status; + } + /* Set PORT_STATUS_CHANGE suppress mask BEFORE pausing event queues, so it's + * in RAM through S3. When PCIe restores before bus_resume runs, T2 may + * deliver PORT_STATUS_CHANGE events via interrupt — the mask must already be + * set or those events will trigger USB core re-enumeration. */ + { + unsigned long suppress = 0; + for (i = 0; i < 16; i++) { + if (vhci->port_to_device[i]) + suppress |= BIT(i); + } + WRITE_ONCE(vhci->port_suppress_connect_mask, suppress); + } + + pr_debug("bce_vhci: suspend: pausing event queues\n"); bce_vhci_event_queue_pause(&vhci->ev_commands); bce_vhci_event_queue_pause(&vhci->ev_system); bce_vhci_event_queue_pause(&vhci->ev_isochronous); @@ -387,55 +533,192 @@ static int bce_vhci_bus_suspend(struct usb_hcd *hcd) static int bce_vhci_bus_resume(struct usb_hcd *hcd) { + static unsigned int resume_cycle; int i, j; int status; + int need_poll = 0; + u32 port_status; + struct bce_vhci_device *vdev; + bce_vhci_device_t devid, new_devid; + struct usb_host_endpoint *ep0_endp; + struct usb_device *udev; struct bce_vhci *vhci = bce_vhci_from_hcd(hcd); - pr_info("bce_vhci: resume started\n"); + ++resume_cycle; + pr_info("bce_vhci: resume started (cycle %u)\n", resume_cycle); + + /* If recovery is already in progress (e.g. watchdog fired during + * suspend), defer to it. Return 0 so USB core doesn't call + * usb_hc_died() — recovery will restart the controller and + * re-enumerate devices once it completes. */ + if (atomic_read(&vhci->recovering)) { + pr_info("bce_vhci: resume: recovery in progress, deferring\n"); + return 0; + } + pr_debug("bce_vhci: resume: resuming event queues\n"); + bce_vhci_event_queue_resume(&vhci->ev_commands); bce_vhci_event_queue_resume(&vhci->ev_system); + bce_vhci_event_queue_resume(&vhci->ev_asynchronous); bce_vhci_event_queue_resume(&vhci->ev_isochronous); bce_vhci_event_queue_resume(&vhci->ev_interrupt); - bce_vhci_event_queue_resume(&vhci->ev_asynchronous); - bce_vhci_event_queue_resume(&vhci->ev_commands); - pr_info("bce_vhci: resume controller\n"); - if ((status = bce_vhci_cmd_controller_start(&vhci->cq))) - return status; + pr_debug("bce_vhci: resume: starting controller\n"); + if ((status = bce_vhci_cmd_controller_start(&vhci->cq))) { + pr_err("bce_vhci: resume: controller_start failed (err=%d), scheduling recovery\n", status); + WRITE_ONCE(vhci->port_suppress_connect_mask, 0); + WRITE_ONCE(vhci->port_reenumerate_mask, 0); + schedule_work(&vhci->w_recovery); + return 0; + } + + /* Flush stale DMA transfer submissions from before S3. + * All three transfer message queues may have pending submissions. */ + bce_cmd_flush_memory_queue(vhci->dev->cmd_cmdq, (u16) vhci->msg_asynchronous.sq->qid); + bce_cmd_flush_memory_queue(vhci->dev->cmd_cmdq, (u16) vhci->msg_isochronous.sq->qid); + bce_cmd_flush_memory_queue(vhci->dev->cmd_cmdq, (u16) vhci->msg_interrupt.sq->qid); - pr_info("bce_vhci: resume ports\n"); + pr_debug("bce_vhci: resume: resuming ports\n"); for (i = 0; i < 16; i++) { if (!vhci->port_to_device[i]) continue; bce_vhci_cmd_port_resume(&vhci->cq, i); } - pr_info("bce_vhci: resume endpoints\n"); + + /* Per-port classification: silent refresh + reset_resume for surviving + * devices, re-enumeration for reconnected/error ports. */ for (i = 0; i < 16; i++) { if (!vhci->port_to_device[i]) continue; + + devid = vhci->port_to_device[i]; + vdev = vhci->devices[devid]; + + bool connection_changed = false; + status = bce_vhci_cmd_port_status(&vhci->cq, (u8) i, 0, &port_status); + if (!status && (port_status & 0x40000)) { + /* T2 reconnected — old device already destroyed on T2 side. + * Clear the change bit and fall through to the refresh path + * (same as surviving devices, but skip device_destroy). */ + pr_info("bce_vhci: resume: port %d connection changed, refreshing\n", i); + bce_vhci_cmd_port_status(&vhci->cq, (u8) i, 0x40000, &port_status); + connection_changed = true; + } + if (status || !(port_status & 0x4)) { + /* Error querying port or device disconnected */ + pr_info("bce_vhci: resume: port %d error/disconnected (status=%d, port_status=0x%x)\n", + i, status, port_status); + bce_vhci_cmd_device_destroy(&vhci->cq, devid); + goto reenumerate; + } + + /* Inline refresh for surviving devices (not reconnected). + * Creating only EP0 matches boot-time state and prevents T2 + * from stalling on GET_DESCRIPTOR. USB core's reset_resume + * sends SET_CONFIGURATION, then add_endpoint recreates + * non-EP0 endpoints. */ + pr_info("bce_vhci: resume: port %d refreshing (devid=%d, tq_mask=0x%x)\n", + i, devid, vdev->tq_mask); + + /* Save EP0 endpoint pointer before destroying queues */ + ep0_endp = vdev->tq[0].endp; + + /* Cancel all in-flight URBs and destroy ALL transfer queues */ for (j = 0; j < 32; j++) { - if (!(vhci->devices[vhci->port_to_device[i]]->tq_mask & BIT(j))) + if (!(vdev->tq_mask & BIT(j))) continue; - bce_vhci_transfer_queue_resume(&vhci->devices[vhci->port_to_device[i]]->tq[j], - BCE_VHCI_PAUSE_SUSPEND); + bce_vhci_transfer_queue_cancel_all(&vdev->tq[j]); + if (vdev->tq[j].endp) + vdev->tq[j].endp->hcpriv = NULL; + bce_vhci_destroy_transfer_queue(vhci, &vdev->tq[j]); + } + vdev->tq_mask = 0; + + /* Clear mappings before T2 commands */ + vhci->devices[devid] = NULL; + vhci->port_to_device[i] = 0; + + /* Destroy and recreate T2 device — port_reset required by T2 + * before device_create will succeed. + * Skip destroy for reconnected devices: T2 already destroyed them. */ + if (!connection_changed) { + status = bce_vhci_cmd_device_destroy(&vhci->cq, devid); + if (status) { + pr_err("bce_vhci: resume: port %d device_destroy failed (err=%d), falling back to re-enum\n", i, status); + kfree(vdev); + goto reenumerate; + } + } + status = bce_vhci_cmd_port_reset(&vhci->cq, (u8) i, 0); + if (status) { + pr_err("bce_vhci: resume: port %d port_reset failed (err=%d), falling back to re-enum\n", i, status); + kfree(vdev); + goto reenumerate; } + if (bce_vhci_cmd_device_create(&vhci->cq, i, &new_devid)) { + pr_err("bce_vhci: resume: port %d device_create failed, falling back to re-enum\n", i); + kfree(vdev); + goto reenumerate; + } + + /* Update mappings with new device ID */ + vhci->devices[new_devid] = vdev; + vhci->port_to_device[i] = new_devid; + + /* Create ONLY EP0 — matches boot-time device state. + * Non-EP0 endpoints will be recreated by add_endpoint + * when USB core restores configuration during reset_resume. */ + bce_vhci_create_transfer_queue(vhci, &vdev->tq[0], ep0_endp, new_devid, DMA_BIDIRECTIONAL); + ep0_endp->hcpriv = &vdev->tq[0]; + vdev->tq_mask = BIT(0); + bce_vhci_cmd_endpoint_create(&vhci->cq, new_devid, &ep0_endp->desc); + + /* Tell USB core to do reset_resume: sends SET_CONFIGURATION + * so devices are properly configured after port_reset. + * Skip the 2 reset_device calls USB core will make — + * T2 was already refreshed above. */ + vhci->port_resume_skip_reset[i] = 2; + udev = usb_hub_find_child(hcd->self.root_hub, i); + if (udev) + udev->reset_resume = 1; + + pr_info("bce_vhci: resume: port %d refresh done (new devid=%d, EP0 only)\n", i, new_devid); + continue; + + reenumerate: + /* Leave port_to_device/devices intact for free_device cleanup. + * free_device will skip device_destroy (reenumerate_mask set). */ + set_bit(i, &vhci->port_reenumerate_mask); + set_bit(i, &vhci->port_resume_mask); + need_poll = 1; } - pr_info("bce_vhci: resume done\n"); + WRITE_ONCE(vhci->port_suppress_connect_mask, 0); + + if (need_poll) + usb_hcd_poll_rh_status(vhci->hcd); + + schedule_delayed_work(&vhci->recovery_watchdog, + msecs_to_jiffies(BCE_VHCI_WATCHDOG_INTERVAL_SECS * 1000)); + + pr_info("bce_vhci: resume done (cycle %u)\n", resume_cycle); return 0; } static int bce_vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags) { struct bce_vhci_transfer_queue *q = urb->ep->hcpriv; - pr_debug("bce_vhci_urb_enqueue %i:%x\n", q->dev_addr, urb->ep->desc.bEndpointAddress); if (!q) return -ENOENT; - return bce_vhci_urb_create(q, urb); + + pr_debug("bce_vhci_urb_enqueue %i:%x\n", q->dev_addr, urb->ep->desc.bEndpointAddress); + return bce_vhci_urb_create(q, urb, mem_flags); } static int bce_vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) { struct bce_vhci_transfer_queue *q = urb->ep->hcpriv; + if (!q) + return -ENOENT; pr_debug("bce_vhci_urb_dequeue %x\n", urb->ep->desc.bEndpointAddress); return bce_vhci_urb_request_cancel(q, urb, status); } @@ -462,7 +745,6 @@ static int bce_vhci_add_endpoint(struct usb_hcd *hcd, struct usb_device *udev, s bce_vhci_device_t devid = vhci->port_to_device[udev->portnum]; struct bce_vhci_device *vdev = vhci->devices[devid]; pr_debug("bce_vhci_add_endpoint %x/%x:%x\n", udev->portnum, devid, endp_index); - if (udev->bus->root_hub == udev) /* The USB hub */ return 0; if (vdev == NULL) @@ -487,20 +769,38 @@ static int bce_vhci_drop_endpoint(struct usb_hcd *hcd, struct usb_device *udev, struct bce_vhci *vhci = bce_vhci_from_hcd(hcd); bce_vhci_device_t devid = vhci->port_to_device[udev->portnum]; struct bce_vhci_transfer_queue *q = endp->hcpriv; - struct bce_vhci_device *vdev = vhci->devices[devid]; - pr_info("bce_vhci_drop_endpoint %x:%x\n", udev->portnum, endp_index); + struct bce_vhci_device *vdev; + pr_debug("bce_vhci_drop_endpoint %x:%x\n", udev->portnum, endp_index); + + /* + * Suspend/resume fix: Device may have been freed during reset. + * Check validity before accessing device structures. + */ + if (!devid || !vhci->devices[devid]) { + endp->hcpriv = NULL; + return 0; + } + vdev = vhci->devices[devid]; + if (!q) { - if (vdev && vdev->tq_mask & BIT(endp_index)) { - pr_err("something deleted the hcpriv?\n"); + if (vdev->tq_mask & BIT(endp_index)) { + pr_debug("bce-vhci: [%02x] drop_endpoint: hcpriv cleared (cancel_all teardown)\n", + endp_index); q = &vdev->tq[endp_index]; } else { return 0; } } - bce_vhci_cmd_endpoint_destroy(&vhci->cq, devid, (u8) (endp->desc.bEndpointAddress & 0x8Fu)); - vhci->devices[devid]->tq_mask &= ~BIT(endp_index); + /* During device disconnect, skip T2 endpoint_destroy command. + * device_destroy in free_device handles T2-side cleanup implicitly + * (same pattern as bce_vhci_reset_device). Avoids one T2 round-trip + * per endpoint on the shared command queue mutex. */ + if (udev->state != USB_STATE_NOTATTACHED) + bce_vhci_cmd_endpoint_destroy(&vhci->cq, devid, (u8) (endp->desc.bEndpointAddress & 0x8Fu)); + vdev->tq_mask &= ~BIT(endp_index); bce_vhci_destroy_transfer_queue(vhci, q); + endp->hcpriv = NULL; return 0; } @@ -515,6 +815,7 @@ static int bce_vhci_create_message_queues(struct bce_vhci *vhci) return -EINVAL; } spin_lock_init(&vhci->msg_asynchronous_lock); + spin_lock_init(&vhci->msg_isochronous_lock); bce_vhci_command_queue_create(&vhci->cq, &vhci->msg_commands); return 0; } @@ -587,6 +888,8 @@ static int bce_vhci_handle_firmware_event(struct bce_vhci *vhci, struct bce_vhci struct bce_vhci_transfer_queue *tq; if (msg->cmd == BCE_VHCI_CMD_ENDPOINT_REQUEST_STATE || msg->cmd == BCE_VHCI_CMD_ENDPOINT_SET_STATE) { devid = (bce_vhci_device_t) (msg->param1 & 0xff); + if (devid >= 16) + return BCE_VHCI_BAD_ARGUMENT; endp = bce_vhci_endpoint_index((u8) ((msg->param1 >> 8) & 0xff)); dev = vhci->devices[devid]; if (!dev || !(dev->tq_mask & BIT(endp))) @@ -685,26 +988,61 @@ static void bce_vhci_firmware_event_completion(struct bce_queue_sq *sq) static void bce_vhci_handle_system_event(struct bce_vhci_event_queue *q, struct bce_vhci_message *msg) { struct usb_hcd *hcd = q->vhci->hcd; + u8 port; + if (msg->cmd & 0x8000) { bce_vhci_command_queue_deliver_completion(&q->vhci->cq, msg); - } else if (msg->cmd == BCE_VHCI_CMD_PORT_STATUS_CHANGE && - msg->param1 > 0 && - msg->param1 <= q->vhci->port_count) { - /* Port status change notification from T2 — flag the port and - * tell the USB framework to re-scan so late-initializing devices - * (camera, Touch Bar, iBridge) are discovered. */ - if (hcd) { - pr_warn("bce-vhci: Port %u status change event, requesting hub rescan\n", - msg->param1); - set_bit(msg->param1, &q->vhci->port_change_pending); - usb_hcd_poll_rh_status(hcd); - } else { - pr_warn("bce-vhci: port %u change received but HCD is NULL\n", - msg->param1); + return; + } + + switch (msg->cmd) { + case BCE_VHCI_CMD_PORT_STATUS_CHANGE: + /* + * T2 notifies us that a port connection state changed. + * This happens during boot and resume. Mark the port as changed + * so hub_status_data reports it, then notify USB core to poll. + * + * During session refresh (resume), suppress these events for + * ports being refreshed — we don't want USB core to see a + * connection change and trigger re-enumeration. + */ + port = (u8)(msg->param1 & 0xff); + if (port == 0 || port > q->vhci->port_count) { + pr_warn("bce-vhci: port status change with invalid port %u\n", port); + break; } - } else { + if (test_bit(port, &q->vhci->port_suppress_connect_mask)) { + pr_debug("bce-vhci: port %d status change suppressed (session refresh)\n", port); + break; + } + pr_info("bce-vhci: port %d status change (status=0x%llx)\n", port, msg->param2); + set_bit(port, &q->vhci->port_resume_mask); + if (hcd) + usb_hcd_poll_rh_status(hcd); + break; + + case BCE_VHCI_CMD_PORT_RESUME: + port = (u8)(msg->param1 & 0xff); + pr_info("bce-vhci: T2 initiated port %d resume (status=0x%llx)\n", port, msg->param2); + if (hcd) + usb_hcd_poll_rh_status(hcd); + break; + + case BCE_VHCI_CMD_PORT_SUSPEND: + port = (u8)(msg->param1 & 0xff); + pr_info("bce-vhci: T2 initiated port %d suspend (status=0x%llx)\n", port, msg->param2); + break; + + case BCE_VHCI_CMD_CONTROLLER_RESET_NOTIFY: + pr_warn("bce-vhci: T2 controller reset notification (p1=0x%x p2=0x%llx), scheduling recovery\n", + msg->param1, msg->param2); + schedule_work(&q->vhci->w_recovery); + break; + + default: pr_warn("bce-vhci: Unhandled system event: %x s=%x p1=%x p2=%llx\n", msg->cmd, msg->status, msg->param1, msg->param2); + break; } } @@ -717,6 +1055,10 @@ static void bce_vhci_handle_usb_event(struct bce_vhci_event_queue *q, struct bce bce_vhci_command_queue_deliver_completion(&q->vhci->cq, msg); } else if (msg->cmd == BCE_VHCI_CMD_TRANSFER_REQUEST || msg->cmd == BCE_VHCI_CMD_CONTROL_TRANSFER_STATUS) { devid = (bce_vhci_device_t) (msg->param1 & 0xff); + if (devid >= 16) { + pr_err("bce-vhci: USB event devid %u out of range\n", devid); + return; + } endp = bce_vhci_endpoint_index((u8) ((msg->param1 >> 8) & 0xff)); dev = q->vhci->devices[devid]; if (!dev || (dev->tq_mask & BIT(endp)) == 0) { @@ -732,6 +1074,161 @@ static void bce_vhci_handle_usb_event(struct bce_vhci_event_queue *q, struct bce +static void bce_vhci_recovery_w(struct work_struct *ws) +{ + struct bce_vhci *vhci = container_of(ws, struct bce_vhci, w_recovery); + struct usb_hcd *hcd = vhci->hcd; + int i, j, status; + struct bce_vhci_device *vdev; + unsigned long flags; + + /* Don't attempt recovery if controller is permanently dead */ + if (READ_ONCE(vhci->controller_dead)) + return; + + /* Rate limit: at most one recovery per cooldown period */ + if (vhci->last_recovery_jiffies && + time_before(jiffies, vhci->last_recovery_jiffies + + msecs_to_jiffies(BCE_VHCI_RECOVERY_COOLDOWN_SECS * 1000))) { + pr_info("bce-vhci: recovery skipped (cooldown)\n"); + return; + } + + /* Prevent concurrent recovery */ + if (atomic_cmpxchg(&vhci->recovering, 0, 1) != 0) { + pr_info("bce-vhci: recovery already in progress\n"); + return; + } + + vhci->last_recovery_jiffies = jiffies; + pr_warn("bce-vhci: === VHCI desync recovery starting ===\n"); + + /* Phase 1: Mark all transfer queues inactive + cancel in-flight URBs */ + for (i = 0; i < 16; i++) { + if (!vhci->port_to_device[i]) + continue; + vdev = vhci->devices[vhci->port_to_device[i]]; + if (!vdev) + continue; + for (j = 0; j < 32; j++) { + if (!(vdev->tq_mask & BIT(j))) + continue; + spin_lock_irqsave(&vdev->tq[j].urb_lock, flags); + vdev->tq[j].active = false; + spin_unlock_irqrestore(&vdev->tq[j].urb_lock, flags); + bce_vhci_transfer_queue_cancel_all(&vdev->tq[j]); + } + } + + flush_workqueue(vhci->tq_state_wq); + + /* Phase 2: Attempt controller pause (may fail — that's expected). + * Use SHORT timeout: T2 is likely desynced so this will timeout anyway. */ + { + struct bce_vhci_message cmd = { .cmd = BCE_VHCI_CMD_CONTROLLER_PAUSE }, res; + status = bce_vhci_command_queue_execute(&vhci->cq, &cmd, &res, + BCE_VHCI_CMD_TIMEOUT_SHORT); + } + if (status) + pr_info("bce-vhci: recovery: controller_pause returned %d (expected if desynced)\n", status); + + /* Phase 3: Flush event queues to drain stale completions */ + bce_vhci_event_queue_pause(&vhci->ev_commands); + bce_vhci_event_queue_pause(&vhci->ev_system); + bce_vhci_event_queue_pause(&vhci->ev_isochronous); + bce_vhci_event_queue_pause(&vhci->ev_interrupt); + bce_vhci_event_queue_pause(&vhci->ev_asynchronous); + + /* Flush stale DMA submissions on all three transfer message queues */ + bce_cmd_flush_memory_queue(vhci->dev->cmd_cmdq, (u16) vhci->msg_asynchronous.sq->qid); + bce_cmd_flush_memory_queue(vhci->dev->cmd_cmdq, (u16) vhci->msg_isochronous.sq->qid); + bce_cmd_flush_memory_queue(vhci->dev->cmd_cmdq, (u16) vhci->msg_interrupt.sq->qid); + + /* Resume event queues */ + bce_vhci_event_queue_resume(&vhci->ev_commands); + bce_vhci_event_queue_resume(&vhci->ev_system); + bce_vhci_event_queue_resume(&vhci->ev_asynchronous); + bce_vhci_event_queue_resume(&vhci->ev_isochronous); + bce_vhci_event_queue_resume(&vhci->ev_interrupt); + + /* Phase 4: Restart controller. + * Use MEDIUM timeout: after flushing, T2 should respond quickly. */ + { + struct bce_vhci_message cmd = { .cmd = BCE_VHCI_CMD_CONTROLLER_START }, res; + status = bce_vhci_command_queue_execute(&vhci->cq, &cmd, &res, + BCE_VHCI_CMD_TIMEOUT_MEDIUM); + } + if (status) { + vhci->recovery_fail_count++; + if (vhci->recovery_fail_count >= BCE_VHCI_MAX_RECOVERY_FAILURES) { + pr_err("bce-vhci: recovery: controller_start failed (%d) — %u consecutive failures, marking controller DEAD\n", + status, vhci->recovery_fail_count); + pr_err("bce-vhci: T2 VHCI is stuck. Reload apple-bce module or reboot to recover.\n"); + WRITE_ONCE(vhci->controller_dead, true); + } else { + pr_err("bce-vhci: recovery: controller_start failed (%d), attempt %u/%u\n", + status, vhci->recovery_fail_count, BCE_VHCI_MAX_RECOVERY_FAILURES); + } + goto out; + } + + /* Recovery succeeded — reset failure counter */ + vhci->recovery_fail_count = 0; + + /* Phase 5: Signal USB core to re-enumerate all active ports. + * Phase 1 already marked queues inactive and cancelled URBs. + * Phase 4 restarted the controller so T2 can process commands. + * Let USB core's normal path handle the rest: + * free_device -> destroy TQs + device_destroy + * reset_device -> port_reset + * enable_device -> device_create + EP0 */ + for (i = 0; i < 16; i++) { + if (!vhci->port_to_device[i]) + continue; + pr_info("bce-vhci: recovery: port %d signaling re-enumeration\n", i); + set_bit(i, &vhci->port_reenumerate_mask); + set_bit(i, &vhci->port_resume_mask); + } + + /* Phase 6: Notify USB core to re-enumerate */ + usb_hcd_poll_rh_status(hcd); + + pr_warn("bce-vhci: === VHCI desync recovery complete ===\n"); + +out: + atomic_set(&vhci->recovering, 0); + + /* Don't restart watchdog if controller is dead */ + if (!READ_ONCE(vhci->controller_dead)) + schedule_delayed_work(&vhci->recovery_watchdog, + msecs_to_jiffies(BCE_VHCI_WATCHDOG_INTERVAL_SECS * 1000)); +} + +static void bce_vhci_watchdog_w(struct work_struct *ws) +{ + struct bce_vhci *vhci = container_of(ws, struct bce_vhci, recovery_watchdog.work); + u32 port_status; + int status; + + /* Don't probe during recovery or if controller is dead */ + if (atomic_read(&vhci->recovering)) + goto reschedule; + if (READ_ONCE(vhci->controller_dead)) + return; + + /* Probe port 0 — if the command times out, the VHCI is likely desynced */ + status = bce_vhci_cmd_port_status(&vhci->cq, 0, 0, &port_status); + if (status == -ETIMEDOUT) { + pr_warn("bce-vhci: watchdog: port_status probe timed out, scheduling recovery\n"); + schedule_work(&vhci->w_recovery); + return; /* Recovery worker will restart watchdog */ + } + +reschedule: + schedule_delayed_work(&vhci->recovery_watchdog, + msecs_to_jiffies(BCE_VHCI_WATCHDOG_INTERVAL_SECS * 1000)); +} + static const struct hc_driver bce_vhci_driver = { .description = "bce-vhci", .product_desc = "BCE VHCI Host Controller", @@ -774,12 +1271,10 @@ int __init bce_vhci_module_init(void) #endif if (IS_ERR(bce_vhci_class)) { result = PTR_ERR(bce_vhci_class); - goto fail_class; + goto fail_chrdev; } return 0; -fail_class: - class_destroy(bce_vhci_class); fail_chrdev: unregister_chrdev_region(bce_vhci_chrdev, 1); if (!result) diff --git a/vhci/vhci.h b/vhci/vhci.h index d1e80d8..48608b7 100644 --- a/vhci/vhci.h +++ b/vhci/vhci.h @@ -1,6 +1,7 @@ #ifndef BCE_VHCI_H #define BCE_VHCI_H +#include #include "queue.h" #include "transfer.h" @@ -23,6 +24,7 @@ struct bce_vhci { struct bce_vhci_message_queue msg_interrupt; struct bce_vhci_message_queue msg_asynchronous; struct spinlock msg_asynchronous_lock; + struct spinlock msg_isochronous_lock; struct bce_vhci_command_queue cq; struct bce_queue_cq *ev_cq; struct bce_vhci_event_queue ev_commands; @@ -37,7 +39,16 @@ struct bce_vhci { struct bce_vhci_device *devices[16]; struct workqueue_struct *tq_state_wq; struct work_struct w_fw_events; - unsigned long port_change_pending; + struct work_struct w_recovery; + struct delayed_work recovery_watchdog; + atomic_t recovering; + unsigned long last_recovery_jiffies; + unsigned int recovery_fail_count; + bool controller_dead; /* Set after repeated recovery failures — all commands return -ENODEV */ + unsigned long port_resume_mask; /* Ports with connection change (accessed atomically) */ + unsigned long port_reenumerate_mask; /* Ports where GetPortStatus hides CONNECTION to force disconnect+re-enum */ + unsigned long port_suppress_connect_mask; /* Ports where PORT_STATUS_CHANGE events are suppressed during session refresh */ + u8 port_resume_skip_reset[16]; /* Skip count for reset_device after bus_resume refresh */ }; int __init bce_vhci_module_init(void); diff --git a/video/encoder.c b/video/encoder.c new file mode 100644 index 0000000..de809a4 --- /dev/null +++ b/video/encoder.c @@ -0,0 +1,1070 @@ +#include "encoder.h" +#include "../apple_bce.h" + +#include +#include +#include +#include + +/* + * Stamp session token at +0x08 in the command buffer. + * The T2 returns this token in the CodecID response[+0x18], and all subsequent + * commands must carry it at offset +0x08. (Discovered via disassembly of + * AppleAVEEncoder.bundle — previously thought to be hardcoded 0xFEEDBEEF.) + */ +static inline void ave_stamp_session_token(struct ave_session *session) +{ + *(u64 *)(session->cmd_buf + 0x08) = session->session_token; +} + +static int ave_send_copy_property(struct ave_session *session, const char *name) +{ + pr_debug("apple-ave: [setup] CopyProperty (0x08): \"%s\"\n", name); + ave_build_cmd_copy_property(session->cmd_buf, name); + ave_stamp_session_token(session); + return ave_cmd_send_sync(&session->queues, session->cmd_buf, AVE_CMD_BUF_SIZE); +} + +static int ave_send_set_property_bool(struct ave_session *session, const char *name, bool value) +{ + pr_debug("apple-ave: [setup] SetProperty (0x09): \"%s\" = %s\n", + name, value ? "true" : "false"); + ave_build_cmd_set_property_bool(session->cmd_buf, name, value); + ave_stamp_session_token(session); + return ave_cmd_send_sync(&session->queues, session->cmd_buf, AVE_CMD_BUF_SIZE); +} + +static int ave_send_set_property_s32(struct ave_session *session, const char *name, s32 value) +{ + pr_debug("apple-ave: [setup] SetProperty (0x09): \"%s\" = %d (0x%x)\n", + name, value, value); + ave_build_cmd_set_property_s32(session->cmd_buf, name, value); + ave_stamp_session_token(session); + return ave_cmd_send_sync(&session->queues, session->cmd_buf, AVE_CMD_BUF_SIZE); +} + +static int ave_send_set_property_float32(struct ave_session *session, const char *name, + u32 ieee754_bits) +{ + pr_debug("apple-ave: [setup] SetProperty (0x09): \"%s\" = float32(0x%08x)\n", + name, ieee754_bits); + ave_build_cmd_set_property_float32(session->cmd_buf, name, ieee754_bits); + ave_stamp_session_token(session); + return ave_cmd_send_sync(&session->queues, session->cmd_buf, AVE_CMD_BUF_SIZE); +} + +static int ave_send_set_property_string(struct ave_session *session, const char *name, + const char *value) +{ + pr_debug("apple-ave: [setup] SetProperty (0x09): \"%s\" = \"%s\"\n", name, value); + ave_build_cmd_set_property_string(session->cmd_buf, name, value); + ave_stamp_session_token(session); + return ave_cmd_send_sync(&session->queues, session->cmd_buf, AVE_CMD_BUF_SIZE); +} + +/* + * IEEE 754 float32 lookup table for Quality percentages 0-100. + * Maps integer percentage to float32 bit pattern: table[n] = float32(n/100.0). + * Avoids kernel FPU — all values precomputed at compile time. + */ +static const u32 ave_quality_to_float32[101] = { + 0x00000000, /* 0 = 0.00 */ + 0x3C23D70A, /* 1 = 0.01 */ + 0x3CA3D70A, /* 2 = 0.02 */ + 0x3CF5C28F, /* 3 = 0.03 */ + 0x3D23D70A, /* 4 = 0.04 */ + 0x3D4CCCCD, /* 5 = 0.05 */ + 0x3D75C28F, /* 6 = 0.06 */ + 0x3D8F5C29, /* 7 = 0.07 */ + 0x3DA3D70A, /* 8 = 0.08 */ + 0x3DB851EC, /* 9 = 0.09 */ + 0x3DCCCCCD, /* 10 = 0.10 */ + 0x3DE147AE, /* 11 = 0.11 */ + 0x3DF5C28F, /* 12 = 0.12 */ + 0x3E051EB8, /* 13 = 0.13 */ + 0x3E0F5C29, /* 14 = 0.14 */ + 0x3E19999A, /* 15 = 0.15 */ + 0x3E23D70A, /* 16 = 0.16 */ + 0x3E2E147B, /* 17 = 0.17 */ + 0x3E3851EC, /* 18 = 0.18 */ + 0x3E428F5C, /* 19 = 0.19 */ + 0x3E4CCCCD, /* 20 = 0.20 */ + 0x3E570A3D, /* 21 = 0.21 */ + 0x3E6147AE, /* 22 = 0.22 */ + 0x3E6B851F, /* 23 = 0.23 */ + 0x3E75C28F, /* 24 = 0.24 */ + 0x3E800000, /* 25 = 0.25 */ + 0x3E851EB8, /* 26 = 0.26 */ + 0x3E8A3D71, /* 27 = 0.27 */ + 0x3E8F5C29, /* 28 = 0.28 */ + 0x3E947AE1, /* 29 = 0.29 */ + 0x3E99999A, /* 30 = 0.30 */ + 0x3E9EB852, /* 31 = 0.31 */ + 0x3EA3D70A, /* 32 = 0.32 */ + 0x3EA8F5C3, /* 33 = 0.33 */ + 0x3EAE147B, /* 34 = 0.34 */ + 0x3EB33333, /* 35 = 0.35 */ + 0x3EB851EC, /* 36 = 0.36 */ + 0x3EBD70A4, /* 37 = 0.37 */ + 0x3EC28F5C, /* 38 = 0.38 */ + 0x3EC7AE14, /* 39 = 0.39 */ + 0x3ECCCCCD, /* 40 = 0.40 */ + 0x3ED1EB85, /* 41 = 0.41 */ + 0x3ED70A3D, /* 42 = 0.42 */ + 0x3EDC28F6, /* 43 = 0.43 */ + 0x3EE147AE, /* 44 = 0.44 */ + 0x3EE66666, /* 45 = 0.45 */ + 0x3EEB851F, /* 46 = 0.46 */ + 0x3EF0A3D7, /* 47 = 0.47 */ + 0x3EF5C28F, /* 48 = 0.48 */ + 0x3EFAE148, /* 49 = 0.49 */ + 0x3F000000, /* 50 = 0.50 */ + 0x3F028F5C, /* 51 = 0.51 */ + 0x3F051EB8, /* 52 = 0.52 */ + 0x3F07AE14, /* 53 = 0.53 */ + 0x3F0A3D71, /* 54 = 0.54 */ + 0x3F0CCCCD, /* 55 = 0.55 */ + 0x3F0F5C29, /* 56 = 0.56 */ + 0x3F11EB85, /* 57 = 0.57 */ + 0x3F147AE1, /* 58 = 0.58 */ + 0x3F170A3D, /* 59 = 0.59 */ + 0x3F19999A, /* 60 = 0.60 */ + 0x3F1C28F6, /* 61 = 0.61 */ + 0x3F1EB852, /* 62 = 0.62 */ + 0x3F2147AE, /* 63 = 0.63 */ + 0x3F23D70A, /* 64 = 0.64 */ + 0x3F266666, /* 65 = 0.65 */ + 0x3F28F5C3, /* 66 = 0.66 */ + 0x3F2B851F, /* 67 = 0.67 */ + 0x3F2E147B, /* 68 = 0.68 */ + 0x3F30A3D7, /* 69 = 0.69 */ + 0x3F333333, /* 70 = 0.70 */ + 0x3F35C28F, /* 71 = 0.71 */ + 0x3F3851EC, /* 72 = 0.72 */ + 0x3F3AE148, /* 73 = 0.73 */ + 0x3F3D70A4, /* 74 = 0.74 */ + 0x3F400000, /* 75 = 0.75 */ + 0x3F428F5C, /* 76 = 0.76 */ + 0x3F451EB8, /* 77 = 0.77 */ + 0x3F47AE14, /* 78 = 0.78 */ + 0x3F4A3D71, /* 79 = 0.79 */ + 0x3F4CCCCD, /* 80 = 0.80 */ + 0x3F4F5C29, /* 81 = 0.81 */ + 0x3F51EB85, /* 82 = 0.82 */ + 0x3F547AE1, /* 83 = 0.83 */ + 0x3F570A3D, /* 84 = 0.84 */ + 0x3F59999A, /* 85 = 0.85 */ + 0x3F5C28F6, /* 86 = 0.86 */ + 0x3F5EB852, /* 87 = 0.87 */ + 0x3F6147AE, /* 88 = 0.88 */ + 0x3F63D70A, /* 89 = 0.89 */ + 0x3F666666, /* 90 = 0.90 */ + 0x3F68F5C3, /* 91 = 0.91 */ + 0x3F6B851F, /* 92 = 0.92 */ + 0x3F6E147B, /* 93 = 0.93 */ + 0x3F70A3D7, /* 94 = 0.94 */ + 0x3F733333, /* 95 = 0.95 */ + 0x3F75C28F, /* 96 = 0.96 */ + 0x3F7851EC, /* 97 = 0.97 */ + 0x3F7AE148, /* 98 = 0.98 */ + 0x3F7D70A4, /* 99 = 0.99 */ + 0x3F800000, /* 100 = 1.00 */ +}; + +/* + * Build a T2 ProfileLevel string from V4L2 HEVC profile and level enums. + * Format: "HEVC_{profile}_{level}" or "HEVC_{profile}_AutoLevel" + * + * The T2 encoder plugin accepts strings like: + * "HEVC_Main_AutoLevel", "HEVC_Main_5.1", "HEVC_Main10_4.0" + */ +static void ave_build_profile_level_string(char *buf, size_t size, s32 profile, s32 level) +{ + const char *prof_str; + const char *lvl_str; + + switch (profile) { + case V4L2_MPEG_VIDEO_HEVC_PROFILE_MAIN_10: + prof_str = "Main10"; + break; + case V4L2_MPEG_VIDEO_HEVC_PROFILE_MAIN_STILL_PICTURE: + prof_str = "MainStill"; + break; + default: + prof_str = "Main"; + break; + } + + switch (level) { + case V4L2_MPEG_VIDEO_HEVC_LEVEL_1: lvl_str = "1.0"; break; + case V4L2_MPEG_VIDEO_HEVC_LEVEL_2: lvl_str = "2.0"; break; + case V4L2_MPEG_VIDEO_HEVC_LEVEL_2_1: lvl_str = "2.1"; break; + case V4L2_MPEG_VIDEO_HEVC_LEVEL_3: lvl_str = "3.0"; break; + case V4L2_MPEG_VIDEO_HEVC_LEVEL_3_1: lvl_str = "3.1"; break; + case V4L2_MPEG_VIDEO_HEVC_LEVEL_4: lvl_str = "4.0"; break; + case V4L2_MPEG_VIDEO_HEVC_LEVEL_4_1: lvl_str = "4.1"; break; + case V4L2_MPEG_VIDEO_HEVC_LEVEL_5: lvl_str = "5.0"; break; + case V4L2_MPEG_VIDEO_HEVC_LEVEL_5_1: lvl_str = "5.1"; break; + case V4L2_MPEG_VIDEO_HEVC_LEVEL_5_2: lvl_str = "5.2"; break; + case V4L2_MPEG_VIDEO_HEVC_LEVEL_6: lvl_str = "6.0"; break; + case V4L2_MPEG_VIDEO_HEVC_LEVEL_6_1: lvl_str = "6.1"; break; + case V4L2_MPEG_VIDEO_HEVC_LEVEL_6_2: lvl_str = "6.2"; break; + default: lvl_str = NULL; break; + } + + if (lvl_str) + snprintf(buf, size, "HEVC_%s_%s", prof_str, lvl_str); + else + snprintf(buf, size, "HEVC_%s_AutoLevel", prof_str); +} + +int ave_session_setup(struct ave_session *session, struct apple_bce_device *bce, + u32 width, u32 height, const struct ave_enc_params *params) +{ + int status; + char profile_buf[64]; + + pr_debug("apple-ave: === SESSION SETUP START ===\n"); + pr_debug("apple-ave: params: %ux%u @ %u bps, %u/%u fps\n", + width, height, params->bitrate, params->fps_num, params->fps_den); + + memset(session, 0, sizeof(*session)); + session->bce = bce; + session->width = width; + session->height = height; + session->bitrate = params->bitrate; + session->fps_num = params->fps_num; + session->fps_den = params->fps_den; + session->state = AVE_STATE_IDLE; + + session->cmd_buf = kmalloc(AVE_CMD_BUF_SIZE, GFP_KERNEL); + if (!session->cmd_buf) + return -ENOMEM; + + /* Step 1: Create BCE queues */ + pr_debug("apple-ave: [setup] Step 1: Creating BCE queues\n"); + status = ave_queues_create(bce, &session->queues); + if (status) { + pr_err("apple-ave: [setup] queue creation failed (%d)\n", status); + goto fail_cmd; + } + + /* Step 2: Q1 recv buffers are submitted per-command in ave_cmd_send_sync(). + * Q2/Q3 recv buffers are pre-submitted before frame encoding, not here. + * macOS does NOT submit any Q2/Q3 buffers during the setup phase. */ + + /* Step 3: Cmd 0x00 — Codec ID "hvc1" */ + pr_debug("apple-ave: [setup] Step 3: CodecID (hvc1)\n"); + ave_build_cmd_codec_id(session->cmd_buf); + status = ave_cmd_send_sync(&session->queues, session->cmd_buf, AVE_CMD_BUF_SIZE); + if (status) { + pr_err("apple-ave: [setup] CodecID FAILED (%d)\n", status); + goto fail_queues; + } + pr_debug("apple-ave: [setup] Step 3: CodecID OK\n"); + + /* Extract session token from T2's CodecID response at offset +0x18. + * Disassembly of AppleAVEEncoder.bundle shows: storage[0x08] = response[0x18]. + * This token (observed as 0xFEEDBEEF) must be placed at +0x08 in all + * subsequent commands. */ + session->session_token = *(u64 *)(session->queues.cmd_resp_buf + 0x18); + pr_debug("apple-ave: [setup] session token = 0x%llx\n", session->session_token); + + /* Step 4: Cmd 0x01 — Session config */ + pr_debug("apple-ave: [setup] Step 4: SessionConfig (%ux%u)\n", width, height); + ave_build_cmd_session_config(session->cmd_buf, width, height); + ave_stamp_session_token(session); + status = ave_cmd_send_sync(&session->queues, session->cmd_buf, AVE_CMD_BUF_SIZE); + if (status) { + pr_err("apple-ave: [setup] SessionConfig FAILED (%d)\n", status); + goto fail_queues; + } + pr_debug("apple-ave: [setup] Step 4: SessionConfig OK\n"); + + /* Step 5: Cmd 0x08 — CopyProperty "UsingHardwareAcceleratedVideoEncoder" */ + pr_debug("apple-ave: [setup] Step 5: CopyProperty UsingHardwareAcceleratedVideoEncoder\n"); + status = ave_send_copy_property(session, "UsingHardwareAcceleratedVideoEncoder"); + if (status) { + pr_err("apple-ave: [setup] UsingHWAccel #1 FAILED (%d)\n", status); + goto fail_queues; + } + + /* Step 6: Cmd 0x08 — CopyProperty "UsingHardwareAcceleratedVideoEncoder" */ + pr_debug("apple-ave: [setup] Step 6: CopyProperty UsingHardwareAcceleratedVideoEncoder\n"); + status = ave_send_copy_property(session, "UsingHardwareAcceleratedVideoEncoder"); + if (status) { + pr_err("apple-ave: [setup] UsingHWAccel #2 FAILED (%d)\n", status); + goto fail_queues; + } + + /* Step 7: SetProperty "RealTime" = true */ + status = ave_send_set_property_bool(session, "RealTime", true); + if (status) { + pr_err("apple-ave: [setup] RealTime FAILED (%d)\n", status); + goto fail_queues; + } + + /* Step 8: SetProperty "AllowFrameReordering" = false + * B-frames require frame reordering, but our synchronous pipeline + * (submit 1 frame, wait for output) cannot handle buffered/reordered + * output. Keep disabled unconditionally. */ + status = ave_send_set_property_bool(session, "AllowFrameReordering", false); + if (status) { + pr_err("apple-ave: [setup] AllowFrameReordering FAILED (%d)\n", status); + goto fail_queues; + } + + /* Step 8b: SetProperty "ExpectedFrameRate" (integer fps only) */ + if (params->fps_den == 1 && params->fps_num > 0) { + status = ave_send_set_property_s32(session, "ExpectedFrameRate", + params->fps_num); + if (status) + pr_warn("apple-ave: [setup] ExpectedFrameRate rejected (%d)\n", status); + } + + /* Step 8c: SetProperty "ProfileLevel" (string) */ + ave_build_profile_level_string(profile_buf, sizeof(profile_buf), + params->profile, params->level); + status = ave_send_set_property_string(session, "ProfileLevel", profile_buf); + if (status) + pr_warn("apple-ave: [setup] ProfileLevel \"%s\" rejected (%d), using default\n", + profile_buf, status); + + /* Step 9: SetProperty "AverageBitRate" */ + status = ave_send_set_property_s32(session, "AverageBitRate", params->bitrate); + if (status) { + pr_err("apple-ave: [setup] AverageBitRate FAILED (%d)\n", status); + goto fail_queues; + } + + /* Step 9b: SetProperty "ConstantBitRate" (CBR mode only) */ + if (params->bitrate_mode == V4L2_MPEG_VIDEO_BITRATE_MODE_CBR) { + status = ave_send_set_property_bool(session, "ConstantBitRate", true); + if (status) + pr_warn("apple-ave: [setup] ConstantBitRate rejected (%d)\n", status); + } + + /* Step 9c: SetProperty "Quality" (CQ mode only, float32) */ + if (params->bitrate_mode == V4L2_MPEG_VIDEO_BITRATE_MODE_CQ) { + s32 q = clamp(params->quality, 1, 100); + + status = ave_send_set_property_float32(session, "Quality", + ave_quality_to_float32[q]); + if (status) + pr_warn("apple-ave: [setup] Quality rejected (%d)\n", status); + } + + /* Step 9d: SetProperty "MaxKeyFrameInterval" (GOP size) */ + if (params->gop_size > 0) { + status = ave_send_set_property_s32(session, "MaxKeyFrameInterval", + params->gop_size); + if (status) + pr_warn("apple-ave: [setup] MaxKeyFrameInterval rejected (%d)\n", status); + } + + /* Step 9e: SetProperty "MinAllowedFrameQP" */ + if (params->min_qp > 0) { + status = ave_send_set_property_s32(session, "MinAllowedFrameQP", + params->min_qp); + if (status) + pr_warn("apple-ave: [setup] MinAllowedFrameQP rejected (%d)\n", status); + } + + /* Step 9f: SetProperty "MaxAllowedFrameQP" */ + if (params->max_qp > 0) { + status = ave_send_set_property_s32(session, "MaxAllowedFrameQP", + params->max_qp); + if (status) + pr_warn("apple-ave: [setup] MaxAllowedFrameQP rejected (%d)\n", status); + } + + /* Step 10: CopyProperty "MVHEVCVideoLayerIDs" */ + status = ave_send_copy_property(session, "MVHEVCVideoLayerIDs"); + if (status) { + pr_err("apple-ave: [setup] MVHEVCVideoLayerIDs FAILED (%d)\n", status); + goto fail_queues; + } + + /* Step 11: PrepareToEncodeFrames */ + ave_build_cmd_prepare(session->cmd_buf); + ave_stamp_session_token(session); + status = ave_cmd_send_sync(&session->queues, session->cmd_buf, AVE_CMD_BUF_SIZE); + if (status) { + pr_err("apple-ave: [setup] Prepare FAILED (%d)\n", status); + goto fail_queues; + } + + /* Step 11b: SetProperty "BPictures" = 0 + * Our synchronous pipeline cannot handle B-frame reordering. + * T2's processPrepareToEncodeFrames may internally set BPictures=1 + * for resolutions >1080p, so always override to 0 after Prepare. */ + status = ave_send_set_property_s32(session, "BPictures", 0); + if (status) { + pr_err("apple-ave: [setup] BPictures FAILED (%d)\n", status); + goto fail_queues; + } + + /* Step 12: SetProperty "ColorPrimaries" */ + status = ave_send_set_property_s32(session, "ColorPrimaries", + params->color_primaries); + if (status) { + pr_err("apple-ave: [setup] ColorPrimaries FAILED (%d)\n", status); + goto fail_queues; + } + + /* Step 13: SetProperty "YCbCrMatrix" */ + status = ave_send_set_property_s32(session, "YCbCrMatrix", + params->ycbcr_matrix); + if (status) { + pr_err("apple-ave: [setup] YCbCrMatrix FAILED (%d)\n", status); + goto fail_queues; + } + + /* Step 13b: SetProperty "TransferFunction" */ + status = ave_send_set_property_s32(session, "TransferFunction", + params->transfer_func); + if (status) + pr_warn("apple-ave: [setup] TransferFunction rejected (%d)\n", status); + + session->state = AVE_STATE_CONFIGURED; + session->frame_counter = 0; + pr_debug("apple-ave: === SESSION SETUP COMPLETE (%ux%u @ %u bps) ===\n", + width, height, params->bitrate); + return 0; + +fail_queues: + pr_debug("apple-ave: [setup] cleaning up after failure...\n"); + ave_queues_destroy(&session->queues); +fail_cmd: + kfree(session->cmd_buf); + session->cmd_buf = NULL; + session->state = AVE_STATE_ERROR; + pr_err("apple-ave: === SESSION SETUP FAILED ===\n"); + return status; +} + +void ave_session_teardown(struct ave_session *session) +{ + if (!session->cmd_buf) + return; + + pr_debug("apple-ave: === SESSION TEARDOWN START ===\n"); + + if (session->state == AVE_STATE_CONFIGURED || + session->state == AVE_STATE_ENCODING || + session->state == AVE_STATE_ERROR) { + /* Re-enable auto-resubmit so teardown commands work normally */ + session->queues.q3_auto_resubmit = true; + + /* Cmd 0x03 — CompleteFrames: drain in-flight frames */ + pr_debug("apple-ave: [teardown] sending CompleteFrames (0x03)\n"); + ave_build_cmd_complete_frames(session->cmd_buf); + ave_stamp_session_token(session); + ave_cmd_send_sync(&session->queues, session->cmd_buf, AVE_CMD_BUF_SIZE); + + /* Cmd 0x06 — EndSession: tell firmware to release session */ + pr_debug("apple-ave: [teardown] sending EndSession (0x06)\n"); + ave_build_cmd_end_session(session->cmd_buf); + ave_stamp_session_token(session); + ave_cmd_send_sync(&session->queues, session->cmd_buf, AVE_CMD_BUF_SIZE); + } + + ave_queues_destroy(&session->queues); + kfree(session->cmd_buf); + session->cmd_buf = NULL; + kfree(session->hvcc_data); + session->hvcc_data = NULL; + kfree(session->annex_b_header); + session->annex_b_header = NULL; + session->state = AVE_STATE_IDLE; + pr_debug("apple-ave: === SESSION TEARDOWN COMPLETE ===\n"); +} + +/* + * Extract hvcC from Frame 1's first Q3 callback and build an Annex B header + * containing VPS + SPS + PPS (prepended to IDR frames). + * + * The hvcC structure starts at callback buffer offset +0x68 and follows + * ISO/IEC 14496-15: 23-byte fixed header, then arrays of parameter sets. + */ +static int ave_extract_hvcc(struct ave_session *session, const void *q3_data) +{ + const u8 *hvc = (const u8 *)q3_data + AVE_HVCC_OFFSET; + const u8 *hvc_end = hvc + AVE_CMD_BUF_SIZE - AVE_HVCC_OFFSET; + u8 num_arrays, nal_type; + u16 num_nalus, nal_len; + const u8 *p; + u8 *out; + size_t out_pos = 0; + int i, j; + static const u8 start_code[4] = {0x00, 0x00, 0x00, 0x01}; + + if (hvc[0] != 1) { + pr_warn("apple-ave: [hvcc] unexpected version %d, expected 1\n", hvc[0]); + return -EINVAL; + } + + num_arrays = hvc[AVE_HVCC_HEADER_SIZE - 1]; + pr_debug("apple-ave: [hvcc] version=%d profile=%d level=%d numArrays=%d\n", + hvc[0], hvc[1] & 0x1F, hvc[12], num_arrays); + + /* First pass: calculate total size needed */ + p = hvc + AVE_HVCC_HEADER_SIZE; + for (i = 0; i < num_arrays && i < 8; i++) { + if (p + 3 > hvc_end) + break; + num_nalus = ((u16)p[1] << 8) | p[2]; + p += 3; + for (j = 0; j < num_nalus && j < 16; j++) { + if (p + 2 > hvc_end) + goto size_done; + nal_len = ((u16)p[0] << 8) | p[1]; + p += 2; + if (p + nal_len > hvc_end) + goto size_done; + out_pos += 4 + nal_len; /* start code + NAL data */ + p += nal_len; + } + } +size_done: + if (out_pos == 0) { + pr_warn("apple-ave: [hvcc] no parameter sets found\n"); + return -EINVAL; + } + + /* Allocate and build Annex B header */ + session->annex_b_header = kmalloc(out_pos, GFP_KERNEL); + if (!session->annex_b_header) + return -ENOMEM; + session->annex_b_header_size = out_pos; + + out = session->annex_b_header; + out_pos = 0; + p = hvc + AVE_HVCC_HEADER_SIZE; + for (i = 0; i < num_arrays && i < 8; i++) { + if (p + 3 > hvc_end) + break; + nal_type = p[0] & 0x3F; + num_nalus = ((u16)p[1] << 8) | p[2]; + p += 3; + for (j = 0; j < num_nalus && j < 16; j++) { + if (p + 2 > hvc_end) + goto build_done; + nal_len = ((u16)p[0] << 8) | p[1]; + p += 2; + if (p + nal_len > hvc_end) + goto build_done; + + pr_debug("apple-ave: [hvcc] array[%d]: type=%d (%s) len=%d\n", + i, nal_type, + nal_type == 32 ? "VPS" : + nal_type == 33 ? "SPS" : + nal_type == 34 ? "PPS" : "?", + nal_len); + + memcpy(out + out_pos, start_code, 4); + out_pos += 4; + memcpy(out + out_pos, p, nal_len); + out_pos += nal_len; + p += nal_len; + } + } +build_done: + session->annex_b_header_size = out_pos; + + /* Also save a copy of the raw hvcC for potential future use */ + session->hvcc_data = kmalloc(AVE_CMD_BUF_SIZE, GFP_KERNEL); + if (session->hvcc_data) { + memcpy(session->hvcc_data, q3_data, AVE_CMD_BUF_SIZE); + session->hvcc_size = AVE_CMD_BUF_SIZE; + } + + pr_debug("apple-ave: [hvcc] Annex B header built: %zu bytes (VPS+SPS+PPS)\n", + session->annex_b_header_size); + return 0; +} + +/* + * Resubmit consumed Q3 buffers to the ring so T2 has buffers for the next frame. + */ +static void ave_resubmit_q3_bufs(struct ave_queues *queues, size_t count) +{ + size_t i; + + for (i = 0; i < count; i++) { + if (ave_submit_q3_buf(queues)) { + pr_err("apple-ave: [encode] failed to resubmit Q3 buf %zu\n", i); + break; + } + } +} + +/* + * Frame 1 encoding pipeline (verified on macOS at ~50ms): + * + * 1. Submit: Q0(EncodeFrame) → Q0(Y) → Q0(UV) → Q1(recv) + * 2. Wait Q3 #1: hvcC config (~40ms) — extract VPS/SPS/PPS + * 3. Echo Q3 #1 on Q2 (hvcC acknowledgment) + * 4. Wait Q3 #2: metadata (~1ms) + * 5. Wait Q3 #3: encoded NAL data (~0.2ms) — result scalar = size + * 6. Echo Q3 #2 on Q2 (metadata acknowledgment) + * 7. Wait Q1: frame complete (near-instant after Q3/Q2 done) + * 8. Resubmit consumed Q3 buffers + * + * CRITICAL: Service Q3 events and echo on Q2 BEFORE waiting for Q1. + * The T2 waits for Q2 callback echoes before signalling Q1. + */ +static int ave_encode_first_frame(struct ave_session *session, + void *y_data, size_t y_size, + void *uv_data, size_t uv_size, + void *out_buf, size_t out_buf_size, + size_t *encoded_size) +{ + struct ave_queues *q = &session->queues; + struct device *dev = &q->bce->pci->dev; + struct bce_dma_buffer y_dma, uv_dma; + bool y_mapped = false, uv_mapped = false; + void *q3_data; + size_t raw_size, out_pos = 0; + int status; + + /* Disable Q3 auto-resubmit — we need to read the completed data */ + q->q3_auto_resubmit = false; + ave_q3_reset(q); + + /* Submit EncodeFrame command on Q0 */ + status = ave_submit_q0_cmd(q, session->cmd_buf, AVE_CMD_BUF_SIZE); + if (status) { + pr_err("apple-ave: [frame1] Q0 EncodeFrame submit failed (%d)\n", status); + goto fail; + } + + /* Submit Y and UV plane data on Q0 (async — no Q0 ack wait) */ + status = ave_submit_frame_data_async(q, y_data, y_size, &y_dma); + if (status) { + pr_err("apple-ave: [frame1] Y plane submit failed (%d)\n", status); + goto fail; + } + y_mapped = true; + + status = ave_submit_frame_data_async(q, uv_data, uv_size, &uv_dma); + if (status) { + pr_err("apple-ave: [frame1] UV plane submit failed (%d)\n", status); + goto fail; + } + uv_mapped = true; + + /* Submit Q1 receive buffer (arms cmd_completion) */ + status = ave_submit_q1_recv(q); + if (status) { + pr_err("apple-ave: [frame1] Q1 recv submit failed (%d)\n", status); + goto fail; + } + + /* === Service Q3 FIRST (3 events for Frame 1) === */ + + /* Q3 #1: hvcC config (~40ms after submit) */ + pr_debug("apple-ave: [frame1] waiting for Q3 #1 (hvcC)...\n"); + status = ave_wait_q3(q, 10000); + if (status) { + pr_err("apple-ave: [frame1] Q3 #1 (hvcC) TIMEOUT\n"); + goto fail; + } + pr_debug("apple-ave: [frame1] Q3 #1 (hvcC) received, result=0x%zx\n", + ave_q3_completed_size(q, 0)); + + /* Extract hvcC and build Annex B header (VPS+SPS+PPS) */ + q3_data = ave_q3_completed_data(q, 0); + status = ave_extract_hvcc(session, q3_data); + if (status) + pr_warn("apple-ave: [frame1] hvcC extraction failed (%d), continuing without header\n", + status); + + /* Echo hvcC callback on Q2 — T2 waits for this before proceeding */ + status = ave_submit_q2_echo(q, q3_data); + if (status) + pr_warn("apple-ave: [frame1] Q2 echo (hvcC) failed (%d)\n", status); + + /* Q3 #2: metadata callback (~1ms) */ + pr_debug("apple-ave: [frame1] waiting for Q3 #2 (metadata)...\n"); + status = ave_wait_q3(q, 10000); + if (status) { + pr_err("apple-ave: [frame1] Q3 #2 (metadata) TIMEOUT\n"); + goto fail; + } + pr_debug("apple-ave: [frame1] Q3 #2 (metadata) received\n"); + + /* Q3 #3: encoded NAL data (~0.2ms) — result scalar = encoded size */ + pr_debug("apple-ave: [frame1] waiting for Q3 #3 (output)...\n"); + status = ave_wait_q3(q, 10000); + if (status) { + pr_err("apple-ave: [frame1] Q3 #3 (output) TIMEOUT\n"); + goto fail; + } + + raw_size = ave_q3_completed_size(q, 2); + pr_debug("apple-ave: [frame1] Q3 #3 (output) received, encoded_size=%zu\n", raw_size); + + if (raw_size > AVE_MAX_ENCODED_SIZE) { + pr_err("apple-ave: [frame1] encoded size %zu exceeds buffer size %d\n", + raw_size, AVE_MAX_ENCODED_SIZE); + status = -ENOSPC; + goto fail; + } + + /* Echo metadata callback on Q2 — echoes Q3 event #1 (metadata), + * NOT the output data. Matches reference: cb3_buf still holds metadata + * because the output went into a separate buffer (Q3 ring slot). */ + q3_data = ave_q3_completed_data(q, 1); + status = ave_submit_q2_echo(q, q3_data); + if (status) + pr_warn("apple-ave: [frame1] Q2 echo (metadata) failed (%d)\n", status); + + /* === Wait Q1 last (near-instant, T2 already done) === */ + pr_debug("apple-ave: [frame1] waiting for Q1...\n"); + status = ave_wait_q1(q, 10000); + if (status) { + pr_err("apple-ave: [frame1] Q1 TIMEOUT or error (%d)\n", status); + goto fail; + } + pr_debug("apple-ave: [frame1] Q1 done\n"); + + /* Unmap DMA buffers now that T2 is done */ + bce_unmap_dma_buffer(dev, &y_dma); + y_mapped = false; + bce_unmap_dma_buffer(dev, &uv_dma); + uv_mapped = false; + + /* === Build output: Annex B header + converted NAL data === */ + if (raw_size == 0) { + pr_warn("apple-ave: [frame1] T2 returned 0 bytes\n"); + *encoded_size = 0; + goto resubmit; + } + + /* Prepend Annex B header (VPS+SPS+PPS) for IDR frame */ + if (session->annex_b_header && session->annex_b_header_size > 0) { + if (session->annex_b_header_size > out_buf_size) { + status = -ENOSPC; + goto fail; + } + memcpy(out_buf, session->annex_b_header, session->annex_b_header_size); + out_pos = session->annex_b_header_size; + } + + /* Convert length-prefixed NALs to Annex B and append */ + q3_data = ave_q3_completed_data(q, 2); + status = ave_convert_to_annex_b(q3_data, raw_size, + (u8 *)out_buf + out_pos, + out_buf_size - out_pos, encoded_size); + if (status) { + pr_err("apple-ave: [frame1] Annex B conversion failed (%d)\n", status); + goto fail; + } + *encoded_size += out_pos; + +resubmit: + /* Resubmit consumed Q3 buffers (3 for Frame 1) */ + ave_resubmit_q3_bufs(q, q->q3_event_count); + return 0; + +fail: + if (y_mapped) + bce_unmap_dma_buffer(dev, &y_dma); + if (uv_mapped) + bce_unmap_dma_buffer(dev, &uv_dma); + session->state = AVE_STATE_ERROR; + return status ? status : -EIO; +} + +/* + * Frame 2+ encoding pipeline (verified on macOS at ~18ms): + * + * 1. Submit: Q0(EncodeFrame) → Q0(Y) → Q0(UV) → Q1(recv) + * 2. Wait Q1: EncodeFrame ACCEPT (~3ms — just queues the frame) + * 3. Submit: Q0(CompleteFrames) → Q1(recv) + * 4. Wait Q3 #1: metadata (~8ms) + * 5. Wait Q3 #2: encoded NAL data (~0.1ms) — result scalar = size + * 6. Echo Q3 #1 on Q2 (metadata acknowledgment) + * 7. Wait Q1: CompleteFrames done (near-instant after Q3/Q2) + * 8. Resubmit consumed Q3 buffers + */ +static int ave_encode_next_frame(struct ave_session *session, + void *y_data, size_t y_size, + void *uv_data, size_t uv_size, + void *out_buf, size_t out_buf_size, + size_t *encoded_size, bool is_keyframe) +{ + struct ave_queues *q = &session->queues; + struct device *dev = &q->bce->pci->dev; + struct bce_dma_buffer y_dma, uv_dma; + bool y_mapped = false, uv_mapped = false; + void *q3_data; + size_t raw_size, out_pos = 0; + int status; + + /* Disable Q3 auto-resubmit, reset tracking */ + q->q3_auto_resubmit = false; + ave_q3_reset(q); + + /* Submit EncodeFrame command on Q0 */ + status = ave_submit_q0_cmd(q, session->cmd_buf, AVE_CMD_BUF_SIZE); + if (status) { + pr_err("apple-ave: [frameN] Q0 EncodeFrame submit failed (%d)\n", status); + goto fail; + } + + /* Submit Y and UV plane data on Q0 (async) */ + status = ave_submit_frame_data_async(q, y_data, y_size, &y_dma); + if (status) { + pr_err("apple-ave: [frameN] Y plane submit failed (%d)\n", status); + goto fail; + } + y_mapped = true; + + status = ave_submit_frame_data_async(q, uv_data, uv_size, &uv_dma); + if (status) { + pr_err("apple-ave: [frameN] UV plane submit failed (%d)\n", status); + goto fail; + } + uv_mapped = true; + + /* Submit Q1 recv for EncodeFrame accept */ + status = ave_submit_q1_recv(q); + if (status) { + pr_err("apple-ave: [frameN] Q1 recv submit failed (%d)\n", status); + goto fail; + } + + /* === Wait Q1: EncodeFrame ACCEPT (~3ms, just queues the frame) === */ + pr_debug("apple-ave: [frameN] waiting for Q1 EncodeFrame accept...\n"); + status = ave_wait_q1(q, 10000); + if (status) { + pr_err("apple-ave: [frameN] Q1 EncodeFrame accept TIMEOUT or error (%d)\n", status); + goto fail; + } + pr_debug("apple-ave: [frameN] EncodeFrame accepted\n"); + + /* Unmap DMA now — T2 has accepted the frame data */ + bce_unmap_dma_buffer(dev, &y_dma); + y_mapped = false; + bce_unmap_dma_buffer(dev, &uv_dma); + uv_mapped = false; + + /* === Send CompleteFrames (triggers actual encoding) === */ + ave_build_cmd_complete_frames(session->cmd_buf); + ave_stamp_session_token(session); + + status = ave_submit_q0_cmd(q, session->cmd_buf, AVE_CMD_BUF_SIZE); + if (status) { + pr_err("apple-ave: [frameN] Q0 CompleteFrames submit failed (%d)\n", status); + goto fail; + } + + status = ave_submit_q1_recv(q); + if (status) { + pr_err("apple-ave: [frameN] Q1 recv for CompleteFrames failed (%d)\n", status); + goto fail; + } + + /* === Service Q3 FIRST (2 events for Frame 2+) === */ + + /* Q3 #1: metadata (~8ms) */ + pr_debug("apple-ave: [frameN] waiting for Q3 #1 (metadata)...\n"); + status = ave_wait_q3(q, 10000); + if (status) { + pr_err("apple-ave: [frameN] Q3 #1 (metadata) TIMEOUT\n"); + goto fail; + } + pr_debug("apple-ave: [frameN] Q3 #1 (metadata) received\n"); + + /* Q3 #2: encoded NAL data — result scalar = encoded size */ + pr_debug("apple-ave: [frameN] waiting for Q3 #2 (output)...\n"); + status = ave_wait_q3(q, 10000); + if (status) { + pr_err("apple-ave: [frameN] Q3 #2 (output) TIMEOUT\n"); + goto fail; + } + + raw_size = ave_q3_completed_size(q, 1); + pr_debug("apple-ave: [frameN] Q3 #2 (output) received, encoded_size=%zu\n", raw_size); + + if (raw_size > AVE_MAX_ENCODED_SIZE) { + pr_err("apple-ave: [frameN] encoded size %zu exceeds buffer size %d\n", + raw_size, AVE_MAX_ENCODED_SIZE); + status = -ENOSPC; + goto fail; + } + + /* Echo metadata callback on Q2 — echoes Q3 event #0 (metadata) */ + q3_data = ave_q3_completed_data(q, 0); + status = ave_submit_q2_echo(q, q3_data); + if (status) + pr_warn("apple-ave: [frameN] Q2 echo (metadata) failed (%d)\n", status); + + /* === Wait Q1: CompleteFrames done (near-instant) === */ + pr_debug("apple-ave: [frameN] waiting for Q1 CompleteFrames...\n"); + status = ave_wait_q1(q, 10000); + if (status) { + pr_err("apple-ave: [frameN] Q1 CompleteFrames TIMEOUT or error (%d)\n", status); + goto fail; + } + pr_debug("apple-ave: [frameN] CompleteFrames done\n"); + + /* === Build output === */ + if (raw_size == 0) { + pr_warn("apple-ave: [frameN] T2 returned 0 bytes\n"); + *encoded_size = 0; + goto resubmit; + } + + /* Prepend Annex B header for keyframes */ + if (is_keyframe && session->annex_b_header && session->annex_b_header_size > 0) { + if (session->annex_b_header_size > out_buf_size) { + status = -ENOSPC; + goto fail; + } + memcpy(out_buf, session->annex_b_header, session->annex_b_header_size); + out_pos = session->annex_b_header_size; + } + + /* Convert length-prefixed NALs to Annex B */ + q3_data = ave_q3_completed_data(q, 1); + status = ave_convert_to_annex_b(q3_data, raw_size, + (u8 *)out_buf + out_pos, + out_buf_size - out_pos, encoded_size); + if (status) { + pr_err("apple-ave: [frameN] Annex B conversion failed (%d)\n", status); + goto fail; + } + *encoded_size += out_pos; + +resubmit: + ave_resubmit_q3_bufs(q, q->q3_event_count); + return 0; + +fail: + if (y_mapped) + bce_unmap_dma_buffer(dev, &y_dma); + if (uv_mapped) + bce_unmap_dma_buffer(dev, &uv_dma); + session->state = AVE_STATE_ERROR; + return status ? status : -EIO; +} + +int ave_session_encode_frame(struct ave_session *session, + void *y_data, size_t y_size, + void *uv_data, size_t uv_size, + void *out_buf, size_t out_buf_size, + size_t *encoded_size, bool force_keyframe) +{ + int status; + bool is_first, is_keyframe; + + if (session->state != AVE_STATE_CONFIGURED && + session->state != AVE_STATE_ENCODING) + return -EINVAL; + + session->state = AVE_STATE_ENCODING; + session->frame_counter++; + is_first = (session->frame_counter == 1); + is_keyframe = force_keyframe || is_first; + + /* Pre-submit Q2/Q3 receive buffers on first frame (macOS pattern) */ + if (is_first) { + pr_debug("apple-ave: [encode] first frame — pre-submitting Q2/Q3 buffers\n"); + ave_presubmit_recv_bufs(&session->queues); + } + + pr_debug("apple-ave: === ENCODE FRAME %llu (%s) ===\n", + session->frame_counter, is_first ? "Frame1" : "FrameN"); + + /* Build EncodeFrame command */ + ave_build_cmd_encode_frame(session->cmd_buf, session->frame_counter, + session->width, session->height, + session->fps_num, session->fps_den, + is_keyframe, + (u64)(uintptr_t)y_data); + ave_stamp_session_token(session); + + if (is_first) + status = ave_encode_first_frame(session, y_data, y_size, + uv_data, uv_size, + out_buf, out_buf_size, encoded_size); + else + status = ave_encode_next_frame(session, y_data, y_size, + uv_data, uv_size, + out_buf, out_buf_size, encoded_size, + is_keyframe); + + if (status) { + pr_err("apple-ave: === FRAME %llu FAILED (%d) ===\n", + session->frame_counter, status); + return status; + } + + pr_debug("apple-ave: === FRAME %llu ENCODED: %zu bytes ===\n", + session->frame_counter, *encoded_size); + return 0; +} + +/* + * Convert HEVC length-prefixed NAL units to Annex B format. + * Input: [4-byte length][NAL data][4-byte length][NAL data]... + * Output: [00 00 00 01][NAL data][00 00 00 01][NAL data]... + */ +int ave_convert_to_annex_b(const void *src, size_t src_size, + void *dst, size_t dst_size, size_t *out_size) +{ + const u8 *in = src; + u8 *out = dst; + size_t in_pos = 0, out_pos = 0; + u32 nal_len; + int nal_count = 0; + static const u8 start_code[4] = {0x00, 0x00, 0x00, 0x01}; + + pr_debug("apple-ave: [annexb] converting %zu bytes\n", src_size); + + while (in_pos + 4 <= src_size) { + /* Read 4-byte big-endian NAL length */ + nal_len = ((u32)in[in_pos] << 24) | + ((u32)in[in_pos + 1] << 16) | + ((u32)in[in_pos + 2] << 8) | + ((u32)in[in_pos + 3]); + in_pos += 4; + + if (nal_len == 0 || in_pos + nal_len > src_size) { + pr_warn("apple-ave: [annexb] invalid NAL length %u at offset %zu (remaining %zu)\n", + nal_len, in_pos - 4, src_size - in_pos); + break; + } + + if (out_pos + 4 + nal_len > dst_size) { + pr_err("apple-ave: [annexb] output buffer too small\n"); + return -ENOSPC; + } + + /* Log NAL type for HEVC: type is bits 1-6 of first byte */ + if (nal_len >= 2) { + u8 nal_type = (in[in_pos] >> 1) & 0x3f; + pr_debug("apple-ave: [annexb] NAL #%d: type=%u len=%u\n", + nal_count, nal_type, nal_len); + } + + /* Write Annex B start code */ + memcpy(out + out_pos, start_code, 4); + out_pos += 4; + + /* Copy NAL data */ + memcpy(out + out_pos, in + in_pos, nal_len); + out_pos += nal_len; + + in_pos += nal_len; + nal_count++; + } + + *out_size = out_pos; + pr_debug("apple-ave: [annexb] converted %d NALs, %zu -> %zu bytes\n", + nal_count, src_size, out_pos); + return 0; +} diff --git a/video/encoder.h b/video/encoder.h new file mode 100644 index 0000000..a3ac0f6 --- /dev/null +++ b/video/encoder.h @@ -0,0 +1,72 @@ +#ifndef AVE_ENCODER_H +#define AVE_ENCODER_H + +#include "protocol.h" + +enum ave_session_state { + AVE_STATE_IDLE, + AVE_STATE_CONFIGURED, + AVE_STATE_ENCODING, + AVE_STATE_ERROR, +}; + +struct ave_enc_params { + u32 bitrate; /* AverageBitRate (bps) */ + u32 fps_num; /* Frame rate numerator */ + u32 fps_den; /* Frame rate denominator */ + s32 gop_size; /* MaxKeyFrameInterval (0 = firmware default) */ + s32 bitrate_mode; /* V4L2_MPEG_VIDEO_BITRATE_MODE_* (0=VBR, 1=CBR, 2=CQ) */ + s32 quality; /* Quality 1-100 for CQ mode (maps to 0.01-1.0 float) */ + s32 min_qp; /* MinAllowedFrameQP (0 = unset) */ + s32 max_qp; /* MaxAllowedFrameQP (0 = unset) */ + s32 profile; /* V4L2_MPEG_VIDEO_HEVC_PROFILE_* */ + s32 level; /* V4L2_MPEG_VIDEO_HEVC_LEVEL_* */ + s32 color_primaries; /* ISO 23001-8 colour_primaries */ + s32 ycbcr_matrix; /* ISO 23001-8 matrix_coefficients */ + s32 transfer_func; /* ISO 23001-8 transfer_characteristics */ +}; + +struct ave_session { + struct apple_bce_device *bce; + struct ave_queues queues; + + /* Format parameters */ + u32 width; + u32 height; + u32 bitrate; + u32 fps_num; + u32 fps_den; + + /* Session state */ + enum ave_session_state state; + u64 frame_counter; + + /* Session token returned by T2 in CodecID response[+0x18] */ + u64 session_token; + + /* Scratch buffer for building commands (4KB, kmalloc'd) */ + void *cmd_buf; + + /* hvcC decoder config extracted from Frame 1's first Q3 callback */ + void *hvcc_data; /* ISO 14496-15 hvcC box content */ + size_t hvcc_size; + + /* Annex B header (VPS+SPS+PPS) derived from hvcC — prepended to IDR frames */ + void *annex_b_header; + size_t annex_b_header_size; +}; + +int ave_session_setup(struct ave_session *session, struct apple_bce_device *bce, + u32 width, u32 height, const struct ave_enc_params *params); +void ave_session_teardown(struct ave_session *session); + +int ave_session_encode_frame(struct ave_session *session, + void *y_data, size_t y_size, + void *uv_data, size_t uv_size, + void *out_buf, size_t out_buf_size, + size_t *encoded_size, bool force_keyframe); + +int ave_convert_to_annex_b(const void *src, size_t src_size, + void *dst, size_t dst_size, size_t *out_size); + +#endif /* AVE_ENCODER_H */ diff --git a/video/protocol.c b/video/protocol.c new file mode 100644 index 0000000..dd82413 --- /dev/null +++ b/video/protocol.c @@ -0,0 +1,818 @@ +#include "protocol.h" +#include "../apple_bce.h" + +#include +#include +#include +#include + +static void ave_q0_completion(struct bce_queue_sq *sq); +static void ave_q1_completion(struct bce_queue_sq *sq); +static void ave_q2_completion(struct bce_queue_sq *sq); +static void ave_q3_completion(struct bce_queue_sq *sq); + +static int ave_alloc_queue_buf(struct apple_bce_device *bce, struct ave_queue_buf *qb, + size_t el_size, size_t el_count) +{ + size_t i; + + qb->el_size = el_size; + qb->el_count = el_count; + qb->head = 0; + qb->tail = 0; + + qb->data = kcalloc(el_count, sizeof(*qb->data), GFP_KERNEL); + if (!qb->data) + return -ENOMEM; + + qb->dma_addrs = kcalloc(el_count, sizeof(*qb->dma_addrs), GFP_KERNEL); + if (!qb->dma_addrs) { + kfree(qb->data); + qb->data = NULL; + return -ENOMEM; + } + + for (i = 0; i < el_count; i++) { + qb->data[i] = dma_alloc_coherent(&bce->pci->dev, el_size, + &qb->dma_addrs[i], GFP_KERNEL); + if (!qb->data[i]) { + pr_err("apple-ave: DMA alloc failed: element %zu (%zu bytes)\n", + i, el_size); + goto fail; + } + } + pr_debug("apple-ave: DMA buf alloc: %zu x %zu bytes (%zu elements)\n", + el_size, el_count, el_count); + return 0; + +fail: + while (i--) + dma_free_coherent(&bce->pci->dev, el_size, qb->data[i], qb->dma_addrs[i]); + kfree(qb->dma_addrs); + qb->dma_addrs = NULL; + kfree(qb->data); + qb->data = NULL; + return -ENOMEM; +} + +static void ave_free_queue_buf(struct apple_bce_device *bce, struct ave_queue_buf *qb) +{ + size_t i; + + if (!qb->data) + return; + + for (i = 0; i < qb->el_count; i++) { + if (qb->data[i]) + dma_free_coherent(&bce->pci->dev, qb->el_size, + qb->data[i], qb->dma_addrs[i]); + } + kfree(qb->dma_addrs); + qb->dma_addrs = NULL; + kfree(qb->data); + qb->data = NULL; +} + +int ave_queues_create(struct apple_bce_device *bce, struct ave_queues *queues) +{ + int status; + + pr_debug("apple-ave: creating BCE queues...\n"); + + memset(queues, 0, sizeof(*queues)); + queues->bce = bce; + init_completion(&queues->cmd_completion); + init_completion(&queues->q3_completion); + + /* CQ0 + Q0: AVEParameterSubmitQueue — Host→T2 (flags=3) */ + queues->cq[0] = bce_create_cq(bce, AVE_CQ_DEPTH); + if (!queues->cq[0]) { + pr_err("apple-ave: failed to create CQ0\n"); + return -ENOMEM; + } + queues->sq_submit = bce_create_sq_with_flags(bce, queues->cq[0], + "AVEParameterSubmitQueue", + AVE_SQ_DEPTH, 3, + ave_q0_completion, queues); + if (!queues->sq_submit) { + pr_err("apple-ave: FAILED to create Q0 (ParameterSubmit)\n"); + status = -EINVAL; + goto fail_cq0; + } + pr_debug("apple-ave: Q0 created OK (cq=%d sq=%d)\n", + queues->cq[0]->qid, queues->sq_submit->qid); + + /* Q0 DMA ring buffer for commands */ + status = ave_alloc_queue_buf(bce, &queues->q0_buf, + AVE_CMD_BUF_SIZE, AVE_RECV_BUF_COUNT); + if (status) + goto fail_sq0; + + /* CQ1 + Q1: AVEParameterReturnQueue — T2→Host (flags=2) */ + queues->cq[1] = bce_create_cq(bce, AVE_CQ_DEPTH); + if (!queues->cq[1]) { + pr_err("apple-ave: failed to create CQ1\n"); + status = -ENOMEM; + goto fail_q0_buf; + } + queues->sq_return = bce_create_sq_with_flags(bce, queues->cq[1], + "AVEParameterReturnQueue", + AVE_SQ_DEPTH, 2, + ave_q1_completion, queues); + if (!queues->sq_return) { + pr_err("apple-ave: FAILED to create Q1 (ParameterReturn)\n"); + status = -EINVAL; + goto fail_cq1; + } + pr_debug("apple-ave: Q1 created OK (cq=%d sq=%d)\n", + queues->cq[1]->qid, queues->sq_return->qid); + + /* CQ2 + Q2: AVECallbackReturnQueue — Host→T2 callback echo (flags=3) */ + queues->cq[2] = bce_create_cq(bce, AVE_CQ_DEPTH); + if (!queues->cq[2]) { + pr_err("apple-ave: failed to create CQ2\n"); + status = -ENOMEM; + goto fail_sq1; + } + queues->sq_cb_return = bce_create_sq_with_flags(bce, queues->cq[2], + "AVECallbackReturnQueue", + AVE_SQ_DEPTH, 3, + ave_q2_completion, queues); + if (!queues->sq_cb_return) { + pr_err("apple-ave: FAILED to create Q2 (CallbackReturn)\n"); + status = -EINVAL; + goto fail_cq2; + } + pr_debug("apple-ave: Q2 created OK (cq=%d sq=%d)\n", + queues->cq[2]->qid, queues->sq_cb_return->qid); + + /* CQ3 + Q3: AVECallbackSubmitQueue — T2→Host callback data (flags=2) */ + queues->cq[3] = bce_create_cq(bce, AVE_CQ_DEPTH); + if (!queues->cq[3]) { + pr_err("apple-ave: failed to create CQ3\n"); + status = -ENOMEM; + goto fail_sq2; + } + queues->sq_cb_submit = bce_create_sq_with_flags(bce, queues->cq[3], + "AVECallbackSubmitQueue", + AVE_SQ_DEPTH, 2, + ave_q3_completion, queues); + if (!queues->sq_cb_submit) { + pr_err("apple-ave: FAILED to create Q3 (CallbackSubmit)\n"); + status = -EINVAL; + goto fail_cq3; + } + pr_debug("apple-ave: Q3 created OK (cq=%d sq=%d)\n", + queues->cq[3]->qid, queues->sq_cb_submit->qid); + + /* Allocate DMA ring buffers for receive queues */ + pr_debug("apple-ave: allocating Q1 receive ring (%d x %d bytes)...\n", + AVE_RECV_BUF_COUNT, AVE_CMD_BUF_SIZE); + status = ave_alloc_queue_buf(bce, &queues->q1_buf, + AVE_CMD_BUF_SIZE, AVE_RECV_BUF_COUNT); + if (status) + goto fail_sq3; + + pr_debug("apple-ave: allocating Q2 receive ring (%d x %d bytes)...\n", + AVE_RECV_BUF_COUNT, AVE_CMD_BUF_SIZE); + status = ave_alloc_queue_buf(bce, &queues->q2_buf, + AVE_CMD_BUF_SIZE, AVE_RECV_BUF_COUNT); + if (status) + goto fail_q1_buf; + + pr_debug("apple-ave: allocating Q3 output ring (%d x %d bytes)...\n", + AVE_OUTPUT_BUF_COUNT, AVE_MAX_ENCODED_SIZE); + status = ave_alloc_queue_buf(bce, &queues->q3_buf, + AVE_MAX_ENCODED_SIZE, AVE_OUTPUT_BUF_COUNT); + if (status) + goto fail_q2_buf; + + pr_debug("apple-ave: all queues + buffers created successfully\n"); + return 0; + +fail_q2_buf: + ave_free_queue_buf(bce, &queues->q2_buf); +fail_q1_buf: + ave_free_queue_buf(bce, &queues->q1_buf); +fail_sq3: + bce_destroy_sq(bce, queues->sq_cb_submit); +fail_cq3: + bce_destroy_cq(bce, queues->cq[3]); +fail_sq2: + bce_destroy_sq(bce, queues->sq_cb_return); +fail_cq2: + bce_destroy_cq(bce, queues->cq[2]); +fail_sq1: + bce_destroy_sq(bce, queues->sq_return); +fail_cq1: + bce_destroy_cq(bce, queues->cq[1]); +fail_q0_buf: + ave_free_queue_buf(bce, &queues->q0_buf); +fail_sq0: + bce_destroy_sq(bce, queues->sq_submit); +fail_cq0: + bce_destroy_cq(bce, queues->cq[0]); + return status; +} + +void ave_queues_destroy(struct ave_queues *queues) +{ + struct apple_bce_device *bce = queues->bce; + + if (!bce) + return; + + pr_debug("apple-ave: destroying queues...\n"); + + /* Flush all queues before freeing DMA buffers. This tells the T2 + * to drain in-flight operations, preventing the firmware from + * DMA-ing to freed addresses. Without this, the AVE firmware may + * be stuck waiting for data that will never arrive. The VHCI + * subsystem does the same (bce_vhci_event_queue_pause). */ + if (queues->sq_cb_submit) + bce_cmd_flush_memory_queue(bce->cmd_cmdq, queues->sq_cb_submit->qid); + if (queues->sq_cb_return) + bce_cmd_flush_memory_queue(bce->cmd_cmdq, queues->sq_cb_return->qid); + if (queues->sq_return) + bce_cmd_flush_memory_queue(bce->cmd_cmdq, queues->sq_return->qid); + if (queues->sq_submit) + bce_cmd_flush_memory_queue(bce->cmd_cmdq, queues->sq_submit->qid); + + ave_free_queue_buf(bce, &queues->q3_buf); + ave_free_queue_buf(bce, &queues->q2_buf); + ave_free_queue_buf(bce, &queues->q1_buf); + ave_free_queue_buf(bce, &queues->q0_buf); + + if (queues->sq_cb_submit) + bce_destroy_sq(bce, queues->sq_cb_submit); + if (queues->cq[3]) + bce_destroy_cq(bce, queues->cq[3]); + if (queues->sq_cb_return) + bce_destroy_sq(bce, queues->sq_cb_return); + if (queues->cq[2]) + bce_destroy_cq(bce, queues->cq[2]); + if (queues->sq_return) + bce_destroy_sq(bce, queues->sq_return); + if (queues->cq[1]) + bce_destroy_cq(bce, queues->cq[1]); + if (queues->sq_submit) + bce_destroy_sq(bce, queues->sq_submit); + if (queues->cq[0]) + bce_destroy_cq(bce, queues->cq[0]); + + queues->bce = NULL; + pr_debug("apple-ave: queues destroyed\n"); +} + +/* + * Pre-submit empty receive buffers on Q1, Q2, Q3. + * Similar to aaudio_bce_in_queue_submit_pending(). + */ +static void ave_presubmit_queue(struct bce_queue_sq *sq, struct ave_queue_buf *qb, size_t count) +{ + struct bce_qe_submission *s; + size_t i; + + for (i = 0; i < count; i++) { + if (bce_reserve_submission(sq, NULL)) { + pr_err("apple-ave: failed to reserve submission for pre-submit (i=%zu)\n", i); + break; + } + /* Fill with 0xFF sentinel (as observed in macOS traces) */ + memset(qb->data[qb->tail], 0xFF, qb->el_size); + + s = bce_next_submission(sq); + bce_set_submission_single(s, + qb->dma_addrs[qb->tail], + qb->el_size); + qb->tail = (qb->tail + 1) % qb->el_count; + } + bce_submit_to_device(sq); +} + +/* + * Pre-submit receive buffers on Q3 only. + * Q1 is NOT pre-submitted here — it is submitted one buffer at a time + * alongside each Q0 command in ave_cmd_send_sync() (macOS pairing pattern). + * Q2 is NOT pre-submitted — it is echoed per-event via ave_submit_q2_echo(). + * Q3 buffers are only active during frame encoding, not during setup. + */ +void ave_presubmit_recv_bufs(struct ave_queues *queues) +{ + pr_debug("apple-ave: pre-submitting receive buffers: Q3=%d (Q1 paired with Q0, Q2 echoed per-event)\n", + AVE_OUTPUT_BUF_COUNT); + queues->q3_auto_resubmit = true; + /* Q2 is NOT pre-submitted — it is only submitted as explicit callback echoes + * via ave_submit_q2_echo() during frame encoding. The T2 expects Q2 data to + * contain the echoed Q3 callback content (handshake/flow control). */ + ave_presubmit_queue(queues->sq_cb_submit, &queues->q3_buf, AVE_OUTPUT_BUF_COUNT); + pr_debug("apple-ave: receive buffers pre-submitted\n"); +} + +/* + * Submit a command buffer on Q0 and wait for the Q1 response. + * Uses pre-allocated DMA ring (like audio driver) instead of per-call DMA mapping. + */ +int ave_cmd_send_sync(struct ave_queues *queues, void *cmd_buf, size_t cmd_size) +{ + struct bce_qe_submission *s; + struct ave_queue_buf *q0 = &queues->q0_buf; + struct ave_queue_buf *q1 = &queues->q1_buf; + unsigned long timeout; + int status; + u32 cmd_type = *(u32 *)cmd_buf; + + pr_debug("apple-ave: cmd_send_sync: cmd=0x%02x size=%zu\n", cmd_type, cmd_size); + + if (cmd_size > AVE_CMD_BUF_SIZE) + return -EINVAL; + + /* Copy command into pre-allocated DMA ring slot */ + memcpy(q0->data[q0->tail], cmd_buf, cmd_size); + + reinit_completion(&queues->cmd_completion); + + /* Reserve and submit on Q0 */ + timeout = msecs_to_jiffies(AVE_SUBMIT_TIMEOUT_MS); + status = bce_reserve_submission(queues->sq_submit, &timeout); + if (status) { + pr_err("apple-ave: Q0 reservation timeout\n"); + return status; + } + + s = bce_next_submission(queues->sq_submit); + bce_set_submission_single(s, q0->dma_addrs[q0->tail], cmd_size); + q0->tail = (q0->tail + 1) % q0->el_count; + bce_submit_to_device(queues->sq_submit); + pr_debug("apple-ave: cmd 0x%02x submitted on Q0\n", cmd_type); + + /* + * Submit one Q1 receive buffer immediately after Q0 command. + * macOS pairs each Q0 command with exactly one Q1 receive buffer + * (finding 26, investigation 2). The T2 firmware expects this pairing. + */ + timeout = msecs_to_jiffies(AVE_SUBMIT_TIMEOUT_MS); + status = bce_reserve_submission(queues->sq_return, &timeout); + if (status) { + pr_err("apple-ave: Q1 reservation timeout\n"); + return status; + } + memset(q1->data[q1->tail], 0xFF, q1->el_size); + s = bce_next_submission(queues->sq_return); + bce_set_submission_single(s, q1->dma_addrs[q1->tail], q1->el_size); + q1->tail = (q1->tail + 1) % q1->el_count; + bce_submit_to_device(queues->sq_return); + pr_debug("apple-ave: Q1 recv buf submitted, waiting for response...\n"); + + /* Wait for Q1 response */ + if (!wait_for_completion_timeout(&queues->cmd_completion, + msecs_to_jiffies(AVE_RESPONSE_TIMEOUT_MS))) { + pr_err("apple-ave: TIMEOUT waiting for Q1 response (cmd=0x%02x)\n", cmd_type); + return -ETIMEDOUT; + } + + pr_debug("apple-ave: cmd 0x%02x completed, status=%d resp_size=%zu\n", + cmd_type, queues->cmd_status, queues->cmd_resp_size); + return queues->cmd_status; +} + +/* === Completion callbacks === */ + +static void ave_q0_completion(struct bce_queue_sq *sq) +{ + struct ave_queues *queues = sq->userdata; + struct ave_queue_buf *qb = &queues->q0_buf; + int cnt = 0; + + while (bce_next_completion(sq)) { + bce_notify_submission_complete(sq); + qb->head = (qb->head + 1) % qb->el_count; + cnt++; + } + + pr_debug("apple-ave: Q0 completion: %d items drained\n", cnt); +} + +static void ave_q1_completion(struct bce_queue_sq *sq) +{ + struct ave_queues *queues = sq->userdata; + struct bce_sq_completion_data *c; + struct ave_queue_buf *qb = &queues->q1_buf; + size_t cnt = 0; + + while ((c = bce_next_completion(sq))) { + void *resp = qb->data[qb->head]; + + pr_debug("apple-ave: Q1 response: status=%u data_size=%llu result=0x%llx\n", + c->status, c->data_size, c->result); + + queues->cmd_status = (c->status == BCE_COMPLETION_SUCCESS) ? 0 : -EIO; + queues->cmd_resp_size = c->data_size; + queues->cmd_resp_buf = resp; + + qb->head = (qb->head + 1) % qb->el_count; + bce_notify_submission_complete(sq); + cnt++; + } + + /* + * Do NOT auto-resubmit Q1 buffers here. macOS pairs each Q1 buffer + * with a Q0 command in ave_cmd_send_sync(). Auto-resubmitting would + * break the strict pairing the T2 firmware expects. + */ + + complete(&queues->cmd_completion); +} + +static void ave_q2_completion(struct bce_queue_sq *sq) +{ + struct ave_queues *queues = sq->userdata; + struct bce_sq_completion_data *c; + struct ave_queue_buf *qb = &queues->q2_buf; + size_t cnt = 0; + + while ((c = bce_next_completion(sq))) { + pr_debug("apple-ave: Q2 completion: status=%u data_size=%llu result=0x%llx\n", + c->status, c->data_size, c->result); + + qb->head = (qb->head + 1) % qb->el_count; + bce_notify_submission_complete(sq); + cnt++; + } + + /* Q2 buffers are NOT auto-resubmitted. During encoding, Q2 is + * explicitly submitted as callback echoes via ave_submit_q2_echo(). + * The T2 expects echoed Q3 callback data on Q2, not sentinel fills. */ +} + +static void ave_q3_completion(struct bce_queue_sq *sq) +{ + struct ave_queues *queues = sq->userdata; + struct bce_sq_completion_data *c; + struct ave_queue_buf *qb = &queues->q3_buf; + size_t cnt = 0; + + while ((c = bce_next_completion(sq))) { + size_t idx = queues->q3_event_count; + + pr_debug("apple-ave: Q3 event[%zu]: status=%u data_size=%llu result=0x%llx\n", + idx, c->status, c->data_size, c->result); + + if (idx < AVE_OUTPUT_BUF_COUNT) + queues->q3_result[idx] = c->data_size; + queues->q3_event_count++; + + qb->head = (qb->head + 1) % qb->el_count; + bce_notify_submission_complete(sq); + cnt++; + + /* Signal per-element — encoder waits on this N times */ + complete(&queues->q3_completion); + } + + /* Only auto-resubmit during setup, not during encoding */ + if (queues->q3_auto_resubmit && cnt) { + pr_debug("apple-ave: Q3 auto-resubmitting %zu output buffers\n", cnt); + ave_presubmit_queue(sq, qb, cnt); + } +} + +/* === Async submit/wait primitives for encoding pipeline === */ + +/* + * Submit a command on Q0 without pairing Q1 (for async EncodeFrame pipeline). + */ +int ave_submit_q0_cmd(struct ave_queues *queues, void *cmd_buf, size_t cmd_size) +{ + struct bce_qe_submission *s; + struct ave_queue_buf *q0 = &queues->q0_buf; + unsigned long timeout = msecs_to_jiffies(AVE_SUBMIT_TIMEOUT_MS); + + if (cmd_size > AVE_CMD_BUF_SIZE) + return -EINVAL; + + memcpy(q0->data[q0->tail], cmd_buf, cmd_size); + + if (bce_reserve_submission(queues->sq_submit, &timeout)) + return -ETIMEDOUT; + + s = bce_next_submission(queues->sq_submit); + bce_set_submission_single(s, q0->dma_addrs[q0->tail], cmd_size); + q0->tail = (q0->tail + 1) % q0->el_count; + bce_submit_to_device(queues->sq_submit); + return 0; +} + +/* + * Submit one receive buffer on Q1 and arm cmd_completion. + * The reinit happens BEFORE submission to avoid racing with the callback. + */ +int ave_submit_q1_recv(struct ave_queues *queues) +{ + struct bce_qe_submission *s; + struct ave_queue_buf *q1 = &queues->q1_buf; + unsigned long timeout = msecs_to_jiffies(AVE_SUBMIT_TIMEOUT_MS); + + memset(q1->data[q1->tail], 0xFF, q1->el_size); + + reinit_completion(&queues->cmd_completion); + + if (bce_reserve_submission(queues->sq_return, &timeout)) + return -ETIMEDOUT; + + s = bce_next_submission(queues->sq_return); + bce_set_submission_single(s, q1->dma_addrs[q1->tail], q1->el_size); + q1->tail = (q1->tail + 1) % q1->el_count; + bce_submit_to_device(queues->sq_return); + return 0; +} + +/* + * Submit one empty buffer on Q3 for T2 to fill with callback data. + */ +int ave_submit_q3_buf(struct ave_queues *queues) +{ + struct bce_qe_submission *s; + struct ave_queue_buf *qb = &queues->q3_buf; + unsigned long timeout = msecs_to_jiffies(AVE_SUBMIT_TIMEOUT_MS); + + memset(qb->data[qb->tail], 0xFF, qb->el_size); + + if (bce_reserve_submission(queues->sq_cb_submit, &timeout)) + return -ETIMEDOUT; + + s = bce_next_submission(queues->sq_cb_submit); + bce_set_submission_single(s, qb->dma_addrs[qb->tail], qb->el_size); + qb->tail = (qb->tail + 1) % qb->el_count; + bce_submit_to_device(queues->sq_cb_submit); + return 0; +} + +/* + * Echo Q3 callback data back on Q2 (callback acknowledgment). + * The T2 expects the host to copy Q3 callback content to Q2 as a handshake. + * Without this echo, the T2 stalls and never signals Q1 completion. + * Copies the first 4KB (Q2 buffer size) of q3_data into the next Q2 ring slot. + */ +int ave_submit_q2_echo(struct ave_queues *queues, const void *q3_data) +{ + struct bce_qe_submission *s; + struct ave_queue_buf *qb = &queues->q2_buf; + unsigned long timeout = msecs_to_jiffies(AVE_SUBMIT_TIMEOUT_MS); + + memcpy(qb->data[qb->tail], q3_data, qb->el_size); + + if (bce_reserve_submission(queues->sq_cb_return, &timeout)) + return -ETIMEDOUT; + + s = bce_next_submission(queues->sq_cb_return); + bce_set_submission_single(s, qb->dma_addrs[qb->tail], qb->el_size); + qb->tail = (qb->tail + 1) % qb->el_count; + bce_submit_to_device(queues->sq_cb_return); + + pr_debug("apple-ave: Q2 echo submitted\n"); + return 0; +} + +/* + * Wait for Q1 response (cmd_completion). Must call ave_submit_q1_recv() first. + */ +int ave_wait_q1(struct ave_queues *queues, unsigned long timeout_ms) +{ + if (!wait_for_completion_timeout(&queues->cmd_completion, + msecs_to_jiffies(timeout_ms))) + return -ETIMEDOUT; + return queues->cmd_status; +} + +/* + * Wait for one Q3 event. Can be called N times to wait for N events. + * Each call consumes one complete() signal from ave_q3_completion(). + */ +int ave_wait_q3(struct ave_queues *queues, unsigned long timeout_ms) +{ + if (!wait_for_completion_timeout(&queues->q3_completion, + msecs_to_jiffies(timeout_ms))) + return -ETIMEDOUT; + return 0; +} + +/* + * Access completed Q3 data for a given event index. + * The buffer index corresponds to the ring slot that was completed. + */ +void *ave_q3_completed_data(struct ave_queues *queues, size_t event_index) +{ + struct ave_queue_buf *qb = &queues->q3_buf; + size_t buf_idx; + + if (event_index >= queues->q3_event_count) { + pr_err("apple-ave: Q3 event_index %zu >= event_count %zu\n", + event_index, queues->q3_event_count); + return NULL; + } + + /* Events are consumed in head order; the first event after reset + * was at head position when reset was called. We track the starting + * head in q3_reset_head for this purpose. But simpler: the ring + * head has already been advanced by the callback, so the data for + * event N is at (current_head - total_events + event_index). */ + buf_idx = (qb->head + qb->el_count - queues->q3_event_count + event_index) % qb->el_count; + return qb->data[buf_idx]; +} + +size_t ave_q3_completed_size(struct ave_queues *queues, size_t event_index) +{ + if (event_index < AVE_OUTPUT_BUF_COUNT) + return queues->q3_result[event_index]; + return 0; +} + +/* + * Reset Q3 tracking for a new frame. Must be called before submitting + * Q3 buffers for a new encoding round. + */ +void ave_q3_reset(struct ave_queues *queues) +{ + queues->q3_event_count = 0; + memset(queues->q3_result, 0, sizeof(queues->q3_result)); + reinit_completion(&queues->q3_completion); +} + +/* + * Submit frame data (Y/UV plane) on Q0 without waiting for Q0 ack. + * DMA mapping is returned in dma_out; caller MUST call + * bce_unmap_dma_buffer() after the T2 has finished reading (i.e., after + * Q1 response). This avoids the race condition in ave_submit_frame_data() + * where a Q0 command ack could prematurely signal the frame data ack. + */ +int ave_submit_frame_data_async(struct ave_queues *queues, void *data, size_t size, + struct bce_dma_buffer *dma_out) +{ + struct bce_qe_submission *s; + unsigned long timeout; + int status; + + status = bce_map_dma_buffer_vm(&queues->bce->pci->dev, dma_out, + data, size, DMA_TO_DEVICE); + if (status) { + pr_err("apple-ave: failed to map frame data DMA (%zu bytes, err=%d)\n", + size, status); + return status; + } + + timeout = msecs_to_jiffies(AVE_SUBMIT_TIMEOUT_MS); + status = bce_reserve_submission(queues->sq_submit, &timeout); + if (status) { + pr_err("apple-ave: Q0 reservation timeout for async frame data\n"); + bce_unmap_dma_buffer(&queues->bce->pci->dev, dma_out); + return -ETIMEDOUT; + } + + s = bce_next_submission(queues->sq_submit); + bce_set_submission_buf(s, dma_out, 0, size); + bce_submit_to_device(queues->sq_submit); + return 0; +} + +/* === Command buffer builders === */ + +void ave_build_cmd_codec_id(void *buf) +{ + memset(buf, 0xBB, AVE_CMD_BUF_SIZE); + /* cmd_type = 0 at +0x00 */ + *(u32 *)(buf + 0x00) = 0x00000000; + /* "1cvh" = FourCC "hvc1" little-endian at +0x08 */ + *(u32 *)(buf + 0x08) = 0x68766331; /* "hvc1" as LE bytes: 31 63 76 68 */ +} + +void ave_build_cmd_session_config(void *buf, u32 width, u32 height) +{ + memset(buf, 0x00, AVE_CMD_BUF_SIZE); + + /* Header — session token at +0x08 is stamped by caller */ + *(u32 *)(buf + 0x00) = AVE_CMD_SESSION_CONFIG; + + /* Resolution — only fields the T2 firmware actually reads */ + *(u32 *)(buf + 0x18) = width; + *(u32 *)(buf + 0x1C) = height; +} + +void ave_build_cmd_encode_frame(void *buf, u64 frame_num, u32 width, u32 height, + u32 fps_num, u32 fps_den, bool keyframe, + u64 cookie) +{ + memset(buf, 0x00, AVE_CMD_BUF_SIZE); + *(u32 *)(buf + 0x00) = AVE_CMD_ENCODE_FRAME; + /* +0x08: session token stamped by caller */ + *(u64 *)(buf + 0x10) = frame_num; + *(u32 *)(buf + 0x18) = keyframe ? 1 : 0; + *(u32 *)(buf + 0x20) = fps_num; + *(u32 *)(buf + 0x24) = fps_den; + *(u32 *)(buf + 0x30) = 1; + *(u32 *)(buf + 0x38) = fps_num; + *(u32 *)(buf + 0x3C) = fps_den; + *(u32 *)(buf + 0x48) = width; + *(u32 *)(buf + 0x50) = height; + *(u32 *)(buf + 0x60) = 8; /* bit depth */ + *(u32 *)(buf + 0x68) = width; /* stride */ + /* "v024" = kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange ('420v'). + * aveserverd also accepts 'x420' (10-bit video range) and + * 'xf20' (10-bit full range). */ + memcpy(buf + 0x70, "v024", 4); + /* +0x74: dead data — aveserverd never reads this field. + * Previously hardcoded to 0x7000 from a packet capture. */ + *(u64 *)(buf + 0x80) = cookie; +} + +void ave_build_cmd_copy_property(void *buf, const char *name) +{ + memset(buf, 0x00, AVE_CMD_BUF_SIZE); + *(u32 *)(buf + 0x00) = AVE_CMD_COPY_PROPERTY; + /* +0x08: session token stamped by caller */ + strscpy(buf + 0x10, name, AVE_CMD_BUF_SIZE - 0x10); +} + +/* + * SetProperty command layout (from T2 readCFKeyValuePair): + * +0x00: cmd_type (0x09) + * +0x08: session token (stamped by caller) + * +0x10: property name string + * +0x30: buffer size (0x1000) + * +0x64: value-present flag (1) + * +0x74: type code (1=bool, 2=SInt32) + * +0x78: value length in bytes + * +0x80: value data + * + * T2 firmware rejects all-zero buffers — must use 0xBB fill for + * don't-care bytes, then zero only the header (finding 27/28). + */ +static void ave_build_cmd_set_property_common(void *buf, const char *name) +{ + memset(buf, 0xBB, AVE_CMD_BUF_SIZE); + memset(buf, 0x00, 0x20); + + *(u32 *)(buf + 0x00) = AVE_CMD_SET_PROPERTY; + strscpy(buf + 0x10, name, 0x20); + + *(u32 *)(buf + 0x30) = 0x1000; + *(u32 *)(buf + 0x34) = 0x0000; + *(u32 *)(buf + 0x64) = 0x00000001; +} + +void ave_build_cmd_set_property_bool(void *buf, const char *name, bool value) +{ + ave_build_cmd_set_property_common(buf, name); + *(u32 *)(buf + 0x74) = 0x00000001; /* type = bool */ + *(u32 *)(buf + 0x78) = 0x00000001; /* length = 1 byte */ + *(u8 *)(buf + 0x80) = value ? 1 : 0; +} + +void ave_build_cmd_set_property_s32(void *buf, const char *name, s32 value) +{ + ave_build_cmd_set_property_common(buf, name); + *(u32 *)(buf + 0x74) = 0x00000002; /* type = SInt32 */ + *(u32 *)(buf + 0x78) = 0x00000004; /* length = 4 bytes */ + *(s32 *)(buf + 0x80) = value; +} + +void ave_build_cmd_set_property_float32(void *buf, const char *name, u32 ieee754_bits) +{ + ave_build_cmd_set_property_common(buf, name); + *(u32 *)(buf + 0x74) = 0x00000004; /* type = Float32 */ + *(u32 *)(buf + 0x78) = 0x00000004; /* length = 4 bytes */ + *(u32 *)(buf + 0x80) = ieee754_bits; +} + +void ave_build_cmd_set_property_string(void *buf, const char *name, const char *value) +{ + size_t len = strlen(value); + + ave_build_cmd_set_property_common(buf, name); + *(u32 *)(buf + 0x74) = 0x00000006; /* type = CFString */ + *(u32 *)(buf + 0x78) = len; + memcpy(buf + 0x80, value, min_t(size_t, len + 1, AVE_CMD_BUF_SIZE - 0x80)); +} + +void ave_build_cmd_prepare(void *buf) +{ + memset(buf, 0x00, AVE_CMD_BUF_SIZE); + *(u32 *)(buf + 0x00) = AVE_CMD_PREPARE; + /* +0x08: session token stamped by caller */ +} + +void ave_build_cmd_complete_frames(void *buf) +{ + memset(buf, 0x00, AVE_CMD_BUF_SIZE); + *(u32 *)(buf + 0x00) = AVE_CMD_COMPLETE_FRAMES; + /* +0x08: session token stamped by caller */ +} + +void ave_build_cmd_end_session(void *buf) +{ + memset(buf, 0xFF, AVE_CMD_BUF_SIZE); + *(u32 *)(buf + 0x00) = AVE_CMD_END_SESSION; + *(u32 *)(buf + 0x04) = 0x00000000; + /* +0x08: session token stamped by caller */ + *(u32 *)(buf + 0x0C) = 0x00000000; +} diff --git a/video/protocol.h b/video/protocol.h new file mode 100644 index 0000000..0a6d05b --- /dev/null +++ b/video/protocol.h @@ -0,0 +1,137 @@ +#ifndef AVE_PROTOCOL_H +#define AVE_PROTOCOL_H + +#include "../queue.h" +#include "../queue_dma.h" + +#define AVE_CMD_BUF_SIZE 4096 +#define AVE_CQ_DEPTH 256 +#define AVE_SQ_DEPTH 128 +/* Session token — returned by T2 in CodecID response[+0x18], written at +0x08 in all + * subsequent commands. Previously hardcoded as 0xFEEDBEEF, but it is a dynamic value. */ + +/* Maximum encoded output size per frame (2MB covers 4K keyframes at high bitrates). + * 8 of these are allocated as contiguous DMA buffers, so keep this reasonable. + * Even at 100 Mbps / 30 fps, a worst-case IDR is well under 2 MB. */ +#define AVE_MAX_ENCODED_SIZE (2 * 1024 * 1024) + +/* hvcC parsing constants (ISO/IEC 14496-15) */ +#define AVE_HVCC_OFFSET 0x68 +#define AVE_HVCC_HEADER_SIZE 23 + +/* Timeout constants (milliseconds) */ +#define AVE_SUBMIT_TIMEOUT_MS 5000 +#define AVE_RESPONSE_TIMEOUT_MS 10000 + +/* Command types */ +#define AVE_CMD_CODEC_ID 0x00 +#define AVE_CMD_SESSION_CONFIG 0x01 +#define AVE_CMD_ENCODE_FRAME 0x02 +#define AVE_CMD_COMPLETE_FRAMES 0x03 +#define AVE_CMD_PREPARE 0x05 +#define AVE_CMD_END_SESSION 0x06 +#define AVE_CMD_COPY_PROPERTY 0x08 +#define AVE_CMD_SET_PROPERTY 0x09 +#define AVE_CMD_DATA_NOTIFY 0x0a /* T2→Host: encoder finished notification */ +#define AVE_CMD_ENCODED_FRAME 0x0b /* T2→Host: encoded frame callback */ + +/* Number of pre-submitted receive buffers per queue */ +#define AVE_RECV_BUF_COUNT 8 + +/* Number of encoded output buffers on Q3 */ +#define AVE_OUTPUT_BUF_COUNT 8 + +struct apple_bce_device; + +struct ave_queue_buf { + void **data; + dma_addr_t *dma_addrs; + size_t el_size; + size_t el_count; + size_t head, tail; +}; + +struct ave_queues { + struct apple_bce_device *bce; + + struct bce_queue_cq *cq[4]; /* one CQ per SQ, matching macOS topology */ + + /* Q0: AVEParameterSubmitQueue — Host→T2 (commands + frame data) */ + struct bce_queue_sq *sq_submit; + /* Q1: AVEParameterReturnQueue — T2→Host (responses) */ + struct bce_queue_sq *sq_return; + /* Q2: AVECallbackReturnQueue — Host→T2 (callback echo/ack, flags=3) */ + struct bce_queue_sq *sq_cb_return; + /* Q3: AVECallbackSubmitQueue — T2→Host (callback data + encoded bitstream, flags=2) */ + struct bce_queue_sq *sq_cb_submit; + + /* Pre-allocated DMA ring buffer for Q0 commands (4KB each) */ + struct ave_queue_buf q0_buf; + /* Pre-allocated DMA ring buffers for Q1/Q2 responses (4KB each) */ + struct ave_queue_buf q1_buf; + struct ave_queue_buf q2_buf; + /* Larger ring for Q3 encoded output */ + struct ave_queue_buf q3_buf; + + /* Synchronous command completion (signalled by Q1 only) */ + struct completion cmd_completion; + int cmd_status; + size_t cmd_resp_size; + void *cmd_resp_buf; /* points into q1_buf ring — valid until next command */ + + /* Frame completion tracking */ + struct completion q3_completion; + + /* Per-event Q3 tracking for async encoding pipeline */ + size_t q3_result[AVE_OUTPUT_BUF_COUNT]; /* result scalar per completed element */ + size_t q3_event_count; /* number of Q3 events since last reset */ + bool q3_auto_resubmit; /* false during encoding to preserve data */ +}; + +int ave_queues_create(struct apple_bce_device *bce, struct ave_queues *queues); +void ave_queues_destroy(struct ave_queues *queues); + +int ave_cmd_send_sync(struct ave_queues *queues, void *cmd_buf, size_t cmd_size); + +/* Command buffer builders — all operate on a 4096-byte buffer */ +void ave_build_cmd_codec_id(void *buf); +void ave_build_cmd_session_config(void *buf, u32 width, u32 height); +void ave_build_cmd_encode_frame(void *buf, u64 frame_num, u32 width, u32 height, + u32 fps_num, u32 fps_den, bool keyframe, + u64 cookie); +void ave_build_cmd_copy_property(void *buf, const char *name); +void ave_build_cmd_set_property_bool(void *buf, const char *name, bool value); +void ave_build_cmd_set_property_s32(void *buf, const char *name, s32 value); +void ave_build_cmd_set_property_float32(void *buf, const char *name, u32 ieee754_bits); +void ave_build_cmd_set_property_string(void *buf, const char *name, const char *value); +void ave_build_cmd_prepare(void *buf); +void ave_build_cmd_complete_frames(void *buf); +void ave_build_cmd_end_session(void *buf); + +/* Pre-submit empty receive buffers on Q2/Q3 (Q1 is paired with Q0 per-command) */ +void ave_presubmit_recv_bufs(struct ave_queues *queues); + +/* Low-level Q0/Q1/Q3 submission without blocking (for async encoding pipeline) */ +int ave_submit_q0_cmd(struct ave_queues *queues, void *cmd_buf, size_t cmd_size); +int ave_submit_q1_recv(struct ave_queues *queues); +int ave_submit_q3_buf(struct ave_queues *queues); + +/* Echo Q3 callback data back on Q2 (acknowledgment to T2) */ +int ave_submit_q2_echo(struct ave_queues *queues, const void *q3_data); + +/* Submit frame data (Y/UV) on Q0 without waiting — caller must unmap later */ +int ave_submit_frame_data_async(struct ave_queues *queues, void *data, size_t size, + struct bce_dma_buffer *dma_out); + +/* Waiting primitives */ +int ave_wait_q1(struct ave_queues *queues, unsigned long timeout_ms); +int ave_wait_q3(struct ave_queues *queues, unsigned long timeout_ms); + +/* Q3 buffer access after completion */ +void *ave_q3_completed_data(struct ave_queues *queues, size_t event_index); +size_t ave_q3_completed_size(struct ave_queues *queues, size_t event_index); + +/* Reset Q3 tracking for new frame */ +void ave_q3_reset(struct ave_queues *queues); + +#endif /* AVE_PROTOCOL_H */ diff --git a/video/video.c b/video/video.c new file mode 100644 index 0000000..13bb9db --- /dev/null +++ b/video/video.c @@ -0,0 +1,1112 @@ +#include "encoder.h" +#include "../apple_bce.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define AVE_NAME "apple-ave" +#define AVE_DEFAULT_WIDTH 1920 +#define AVE_DEFAULT_HEIGHT 1080 +#define AVE_MIN_WIDTH 128 +#define AVE_MIN_HEIGHT 128 +#define AVE_MAX_WIDTH 4096 +#define AVE_MAX_HEIGHT 2304 +#define AVE_DEFAULT_BITRATE 4000000 +#define AVE_MIN_BITRATE 100000 +#define AVE_MAX_BITRATE 100000000 +#define AVE_DEFAULT_FPS_NUM 30 +#define AVE_DEFAULT_FPS_DEN 1 + +/* NV12 frame size: Y + UV = w*h + w*h/2 = w*h*3/2 */ +#define AVE_NV12_SIZE(w, h) ((w) * (h) * 3 / 2) +#define AVE_NV12_Y_SIZE(w, h) ((w) * (h)) +#define AVE_NV12_UV_SIZE(w, h) ((w) * (h) / 2) + +static char *sock_path = "/run/aveserverd.sock"; +module_param(sock_path, charp, 0644); +MODULE_PARM_DESC(sock_path, "Unix socket path for aveserverd daemon"); + +struct ave_device { + struct apple_bce_device *bce; + struct v4l2_device v4l2_dev; + struct video_device vdev; + struct v4l2_m2m_dev *m2m_dev; + struct mutex dev_mutex; + struct mutex session_mutex; + struct ave_session *session; +}; + +struct ave_ctx { + struct v4l2_fh fh; + struct ave_device *dev; + struct v4l2_pix_format_mplane src_fmt; + struct v4l2_pix_format_mplane dst_fmt; + struct v4l2_ctrl_handler ctrl_handler; + u32 bitrate; + bool force_keyframe; + u32 fps_num; + u32 fps_den; + s32 gop_size; + s32 bitrate_mode; + s32 quality; + s32 min_qp; + s32 max_qp; + s32 profile; + s32 level; +}; + +static struct ave_device *ave_global_dev; + +static int ave_queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq); + +/* === Format helpers === */ + +static void ave_set_default_src_fmt(struct v4l2_pix_format_mplane *f) +{ + memset(f, 0, sizeof(*f)); + f->width = AVE_DEFAULT_WIDTH; + f->height = AVE_DEFAULT_HEIGHT; + f->pixelformat = V4L2_PIX_FMT_NV12; + f->field = V4L2_FIELD_NONE; + f->colorspace = V4L2_COLORSPACE_REC709; + f->num_planes = 1; + f->plane_fmt[0].sizeimage = AVE_NV12_SIZE(AVE_DEFAULT_WIDTH, AVE_DEFAULT_HEIGHT); + f->plane_fmt[0].bytesperline = AVE_DEFAULT_WIDTH; +} + +static void ave_set_default_dst_fmt(struct v4l2_pix_format_mplane *f) +{ + memset(f, 0, sizeof(*f)); + f->width = AVE_DEFAULT_WIDTH; + f->height = AVE_DEFAULT_HEIGHT; + f->pixelformat = V4L2_PIX_FMT_HEVC; + f->field = V4L2_FIELD_NONE; + f->colorspace = V4L2_COLORSPACE_REC709; + f->num_planes = 1; + /* Allocate enough for worst-case encoded output */ + f->plane_fmt[0].sizeimage = AVE_NV12_SIZE(AVE_DEFAULT_WIDTH, AVE_DEFAULT_HEIGHT); + f->plane_fmt[0].bytesperline = 0; +} + +/* === V4L2 IOCTL ops === */ + +static int ave_querycap(struct file *file, void *priv, struct v4l2_capability *cap) +{ + strscpy(cap->driver, AVE_NAME, sizeof(cap->driver)); + strscpy(cap->card, "Apple T2 HEVC Encoder", sizeof(cap->card)); + snprintf(cap->bus_info, sizeof(cap->bus_info), "PCI:%s", + pci_name(ave_global_dev->bce->pci)); + return 0; +} + +static int ave_enum_fmt(struct file *file, void *priv, struct v4l2_fmtdesc *f) +{ + if (f->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) { + switch (f->index) { + case 0: + f->pixelformat = V4L2_PIX_FMT_NV12; + return 0; + case 1: + f->pixelformat = V4L2_PIX_FMT_NV12M; + return 0; + default: + return -EINVAL; + } + } else if (f->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) { + if (f->index != 0) + return -EINVAL; + f->pixelformat = V4L2_PIX_FMT_HEVC; + f->flags = V4L2_FMT_FLAG_COMPRESSED; + return 0; + } + return -EINVAL; +} + +static int ave_g_fmt(struct file *file, void *priv, struct v4l2_format *f) +{ + struct ave_ctx *ctx = container_of(file->private_data, struct ave_ctx, fh); + + if (f->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) + f->fmt.pix_mp = ctx->src_fmt; + else if (f->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) + f->fmt.pix_mp = ctx->dst_fmt; + else + return -EINVAL; + return 0; +} + +static void ave_fill_src_fmt(struct v4l2_pix_format_mplane *pix, u32 w, u32 h, + bool multiplanar) +{ + pix->width = w; + pix->height = h; + pix->field = V4L2_FIELD_NONE; + + if (multiplanar) { + pix->pixelformat = V4L2_PIX_FMT_NV12M; + pix->num_planes = 2; + pix->plane_fmt[0].sizeimage = AVE_NV12_Y_SIZE(w, h); + pix->plane_fmt[0].bytesperline = w; + pix->plane_fmt[1].sizeimage = AVE_NV12_UV_SIZE(w, h); + pix->plane_fmt[1].bytesperline = w; + } else { + pix->pixelformat = V4L2_PIX_FMT_NV12; + pix->num_planes = 1; + pix->plane_fmt[0].sizeimage = AVE_NV12_SIZE(w, h); + pix->plane_fmt[0].bytesperline = w; + } +} + +static int ave_s_fmt_out(struct file *file, void *priv, struct v4l2_format *f) +{ + struct ave_ctx *ctx = container_of(file->private_data, struct ave_ctx, fh); + struct v4l2_pix_format_mplane *pix = &f->fmt.pix_mp; + bool multi = (pix->pixelformat == V4L2_PIX_FMT_NV12M); + u32 w, h; + + w = clamp(pix->width, (u32)AVE_MIN_WIDTH, (u32)AVE_MAX_WIDTH); + h = clamp(pix->height, (u32)AVE_MIN_HEIGHT, (u32)AVE_MAX_HEIGHT); + /* Align to 2 for NV12 chroma subsampling */ + w = ALIGN(w, 2); + h = ALIGN(h, 2); + + ave_fill_src_fmt(pix, w, h, multi); + /* Accept user-provided colorspace, default to REC709 */ + if (!pix->colorspace) + pix->colorspace = V4L2_COLORSPACE_REC709; + + ctx->src_fmt = *pix; + + /* Update capture format to match */ + ctx->dst_fmt.width = w; + ctx->dst_fmt.height = h; + ctx->dst_fmt.colorspace = pix->colorspace; + ctx->dst_fmt.ycbcr_enc = pix->ycbcr_enc; + ctx->dst_fmt.xfer_func = pix->xfer_func; + ctx->dst_fmt.plane_fmt[0].sizeimage = AVE_NV12_SIZE(w, h); + + return 0; +} + +static int ave_s_fmt_cap(struct file *file, void *priv, struct v4l2_format *f) +{ + struct ave_ctx *ctx = container_of(file->private_data, struct ave_ctx, fh); + struct v4l2_pix_format_mplane *pix = &f->fmt.pix_mp; + + pix->pixelformat = V4L2_PIX_FMT_HEVC; + pix->width = ctx->src_fmt.width; + pix->height = ctx->src_fmt.height; + pix->field = V4L2_FIELD_NONE; + pix->colorspace = V4L2_COLORSPACE_REC709; + pix->num_planes = 1; + pix->plane_fmt[0].sizeimage = AVE_NV12_SIZE(pix->width, pix->height); + pix->plane_fmt[0].bytesperline = 0; + + ctx->dst_fmt = *pix; + return 0; +} + +static int ave_try_fmt_out(struct file *file, void *priv, struct v4l2_format *f) +{ + struct v4l2_pix_format_mplane *pix = &f->fmt.pix_mp; + bool multi = (pix->pixelformat == V4L2_PIX_FMT_NV12M); + u32 w, h; + + w = clamp(pix->width, (u32)AVE_MIN_WIDTH, (u32)AVE_MAX_WIDTH); + h = clamp(pix->height, (u32)AVE_MIN_HEIGHT, (u32)AVE_MAX_HEIGHT); + w = ALIGN(w, 2); + h = ALIGN(h, 2); + + ave_fill_src_fmt(pix, w, h, multi); + + return 0; +} + +static int ave_try_fmt_cap(struct file *file, void *priv, struct v4l2_format *f) +{ + struct v4l2_pix_format_mplane *pix = &f->fmt.pix_mp; + u32 w, h; + + w = clamp(pix->width, (u32)AVE_MIN_WIDTH, (u32)AVE_MAX_WIDTH); + h = clamp(pix->height, (u32)AVE_MIN_HEIGHT, (u32)AVE_MAX_HEIGHT); + w = ALIGN(w, 2); + h = ALIGN(h, 2); + + pix->width = w; + pix->height = h; + pix->pixelformat = V4L2_PIX_FMT_HEVC; + pix->field = V4L2_FIELD_NONE; + pix->num_planes = 1; + pix->plane_fmt[0].sizeimage = AVE_NV12_SIZE(w, h); + pix->plane_fmt[0].bytesperline = 0; + + return 0; +} + +static int ave_enum_framesizes(struct file *file, void *priv, + struct v4l2_frmsizeenum *fsize) +{ + if (fsize->index != 0) + return -EINVAL; + if (fsize->pixel_format != V4L2_PIX_FMT_NV12 && + fsize->pixel_format != V4L2_PIX_FMT_NV12M && + fsize->pixel_format != V4L2_PIX_FMT_HEVC) + return -EINVAL; + + fsize->type = V4L2_FRMSIZE_TYPE_STEPWISE; + fsize->stepwise.min_width = AVE_MIN_WIDTH; + fsize->stepwise.max_width = AVE_MAX_WIDTH; + fsize->stepwise.step_width = 2; + fsize->stepwise.min_height = AVE_MIN_HEIGHT; + fsize->stepwise.max_height = AVE_MAX_HEIGHT; + fsize->stepwise.step_height = 2; + + return 0; +} + +/* === Colorspace mapping: V4L2 → ISO 23001-8 for T2 encoder === */ + +static s32 ave_v4l2_to_t2_primaries(enum v4l2_colorspace cs) +{ + switch (cs) { + case V4L2_COLORSPACE_SMPTE170M: + case V4L2_COLORSPACE_470_SYSTEM_BG: + return 6; /* BT.601 */ + case V4L2_COLORSPACE_BT2020: + return 9; + case V4L2_COLORSPACE_SMPTE240M: + return 7; + case V4L2_COLORSPACE_REC709: + default: + return 1; /* BT.709 */ + } +} + +static s32 ave_v4l2_to_t2_matrix(enum v4l2_colorspace cs, + enum v4l2_ycbcr_encoding enc) +{ + if (enc == V4L2_YCBCR_ENC_BT2020) + return 9; + if (enc == V4L2_YCBCR_ENC_601) + return 5; + if (enc == V4L2_YCBCR_ENC_709 || enc == V4L2_YCBCR_ENC_DEFAULT) { + switch (cs) { + case V4L2_COLORSPACE_SMPTE170M: + case V4L2_COLORSPACE_470_SYSTEM_BG: + return 5; /* BT.601 */ + case V4L2_COLORSPACE_BT2020: + return 9; + default: + return 1; /* BT.709 */ + } + } + return 1; +} + +static s32 ave_v4l2_to_t2_xfer(enum v4l2_colorspace cs, + enum v4l2_xfer_func xfer) +{ + if (xfer == V4L2_XFER_FUNC_SMPTE2084) + return 16; /* PQ / HDR10 */ + if (xfer == V4L2_XFER_FUNC_709 || xfer == V4L2_XFER_FUNC_DEFAULT) { + if (cs == V4L2_COLORSPACE_BT2020) + return 14; /* BT.2020-10 */ + return 1; /* BT.709 */ + } + return 1; +} + +/* === Frame rate (S_PARM / G_PARM) === */ + +static int ave_g_parm(struct file *file, void *priv, struct v4l2_streamparm *sp) +{ + struct ave_ctx *ctx = container_of(file->private_data, struct ave_ctx, fh); + + if (sp->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE || + sp->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) { + memset(&sp->parm, 0, sizeof(sp->parm)); + sp->parm.output.capability = V4L2_CAP_TIMEPERFRAME; + sp->parm.output.timeperframe.numerator = ctx->fps_den; + sp->parm.output.timeperframe.denominator = ctx->fps_num; + return 0; + } + if (sp->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE || + sp->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) { + memset(&sp->parm, 0, sizeof(sp->parm)); + sp->parm.capture.capability = V4L2_CAP_TIMEPERFRAME; + sp->parm.capture.timeperframe.numerator = ctx->fps_den; + sp->parm.capture.timeperframe.denominator = ctx->fps_num; + return 0; + } + return -EINVAL; +} + +static int ave_s_parm(struct file *file, void *priv, struct v4l2_streamparm *sp) +{ + struct ave_ctx *ctx = container_of(file->private_data, struct ave_ctx, fh); + u32 num = 0, den = 0; + + if (sp->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE || + sp->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) { + num = sp->parm.output.timeperframe.numerator; + den = sp->parm.output.timeperframe.denominator; + } else if (sp->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE || + sp->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) { + num = sp->parm.capture.timeperframe.numerator; + den = sp->parm.capture.timeperframe.denominator; + } else { + return -EINVAL; + } + + if (num && den) { + ctx->fps_den = num; + ctx->fps_num = den; + } else { + ctx->fps_num = AVE_DEFAULT_FPS_NUM; + ctx->fps_den = AVE_DEFAULT_FPS_DEN; + } + + return ave_g_parm(file, priv, sp); +} + +static const struct v4l2_ioctl_ops ave_ioctl_ops = { + .vidioc_querycap = ave_querycap, + + .vidioc_enum_fmt_vid_cap = ave_enum_fmt, + .vidioc_enum_fmt_vid_out = ave_enum_fmt, + + .vidioc_g_fmt_vid_cap_mplane = ave_g_fmt, + .vidioc_g_fmt_vid_out_mplane = ave_g_fmt, + + .vidioc_s_fmt_vid_cap_mplane = ave_s_fmt_cap, + .vidioc_s_fmt_vid_out_mplane = ave_s_fmt_out, + + .vidioc_try_fmt_vid_cap_mplane = ave_try_fmt_cap, + .vidioc_try_fmt_vid_out_mplane = ave_try_fmt_out, + + .vidioc_enum_framesizes = ave_enum_framesizes, + + .vidioc_g_parm = ave_g_parm, + .vidioc_s_parm = ave_s_parm, + + .vidioc_reqbufs = v4l2_m2m_ioctl_reqbufs, + .vidioc_querybuf = v4l2_m2m_ioctl_querybuf, + .vidioc_qbuf = v4l2_m2m_ioctl_qbuf, + .vidioc_dqbuf = v4l2_m2m_ioctl_dqbuf, + .vidioc_create_bufs = v4l2_m2m_ioctl_create_bufs, + .vidioc_expbuf = v4l2_m2m_ioctl_expbuf, + + .vidioc_streamon = v4l2_m2m_ioctl_streamon, + .vidioc_streamoff = v4l2_m2m_ioctl_streamoff, + + .vidioc_encoder_cmd = v4l2_m2m_ioctl_encoder_cmd, + .vidioc_try_encoder_cmd = v4l2_m2m_ioctl_try_encoder_cmd, + + .vidioc_subscribe_event = v4l2_ctrl_subscribe_event, + .vidioc_unsubscribe_event = v4l2_event_unsubscribe, +}; + +/* === vb2 queue ops === */ + +static int ave_queue_setup(struct vb2_queue *vq, unsigned int *nbuffers, + unsigned int *nplanes, unsigned int sizes[], + struct device *alloc_devs[]) +{ + struct ave_ctx *ctx = vb2_get_drv_priv(vq); + struct v4l2_pix_format_mplane *fmt; + int i; + + if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) + fmt = &ctx->src_fmt; + else + fmt = &ctx->dst_fmt; + + if (*nplanes) { + /* Verify existing configuration */ + if (*nplanes != fmt->num_planes) + return -EINVAL; + for (i = 0; i < fmt->num_planes; i++) + if (sizes[i] < fmt->plane_fmt[i].sizeimage) + return -EINVAL; + return 0; + } + + *nplanes = fmt->num_planes; + for (i = 0; i < fmt->num_planes; i++) + sizes[i] = fmt->plane_fmt[i].sizeimage; + + return 0; +} + +static int ave_buf_prepare(struct vb2_buffer *vb) +{ + struct ave_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue); + struct v4l2_pix_format_mplane *fmt; + int i; + + if (vb->vb2_queue->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) + fmt = &ctx->src_fmt; + else + fmt = &ctx->dst_fmt; + + for (i = 0; i < fmt->num_planes; i++) { + if (vb2_plane_size(vb, i) < fmt->plane_fmt[i].sizeimage) + return -EINVAL; + } + + /* For OUTPUT buffers, userspace sets bytesused; for CAPTURE, we set it later */ + if (vb->vb2_queue->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) { + for (i = 0; i < fmt->num_planes; i++) + vb2_set_plane_payload(vb, i, fmt->plane_fmt[i].sizeimage); + } + + return 0; +} + +static void ave_buf_queue(struct vb2_buffer *vb) +{ + struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); + struct ave_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue); + + v4l2_m2m_buf_queue(ctx->fh.m2m_ctx, vbuf); +} + +/* === Daemon socket IPC for T2 XPC lifecycle === */ + +static int ave_daemon_cmd(const char *cmd) +{ + struct socket *sock; + struct sockaddr_un addr; + struct kvec send_vec, recv_vec; + struct msghdr msg = {}; + char send_buf[16]; + char recv_buf[64]; + int ret, len; + + /* During process teardown, current->fs may be NULL. Unix socket + * connect needs path lookup which dereferences fs_struct. */ + if (!current->fs) { + pr_debug("apple-ave: skipping XPC %s (no fs context)\n", cmd); + return -ENOENT; + } + + ret = sock_create_kern(&init_net, AF_UNIX, SOCK_STREAM, 0, &sock); + if (ret) { + pr_warn("apple-ave: sock_create failed (%d)\n", ret); + return ret; + } + + memset(&addr, 0, sizeof(addr)); + addr.sun_family = AF_UNIX; + strscpy(addr.sun_path, sock_path, sizeof(addr.sun_path)); + + ret = kernel_connect(sock, (struct sockaddr_unsized *)&addr, + offsetof(struct sockaddr_un, sun_path) + + strlen(addr.sun_path) + 1, 0); + if (ret) { + pr_warn("apple-ave: connect to %s failed (%d)\n", + sock_path, ret); + goto out; + } + + /* Send "start\n" or "stop\n" */ + len = snprintf(send_buf, sizeof(send_buf), "%s\n", cmd); + send_vec.iov_base = send_buf; + send_vec.iov_len = len; + + ret = kernel_sendmsg(sock, &msg, &send_vec, 1, len); + if (ret < 0) { + pr_warn("apple-ave: sendmsg failed (%d)\n", ret); + goto out; + } + + /* Read response */ + memset(recv_buf, 0, sizeof(recv_buf)); + recv_vec.iov_base = recv_buf; + recv_vec.iov_len = sizeof(recv_buf) - 1; + + ret = kernel_recvmsg(sock, &msg, &recv_vec, 1, + sizeof(recv_buf) - 1, 0); + if (ret < 0) { + pr_warn("apple-ave: recvmsg failed (%d)\n", ret); + goto out; + } + + recv_buf[ret] = '\0'; + + if (strncmp(recv_buf, "OK", 2) == 0) { + pr_info("apple-ave: XPC %s OK\n", cmd); + ret = 0; + } else { + pr_warn("apple-ave: XPC %s failed: %s", cmd, recv_buf); + ret = -EIO; + } + +out: + sock_release(sock); + return ret; +} + +static int ave_xpc_start(void) +{ + return ave_daemon_cmd("start"); +} + +static int ave_xpc_stop(void) +{ + return ave_daemon_cmd("stop"); +} + +static int ave_xpc_recover(void) +{ + int ret; + + /* Recovery: send start, then stop to reset aveservice state */ + ret = ave_xpc_start(); + if (ret) + return ret; + + /* Small delay between start and stop */ + msleep(100); + + return ave_xpc_stop(); +} + +static void ave_build_params(struct ave_ctx *ctx, struct ave_enc_params *p) +{ + enum v4l2_colorspace cs = ctx->src_fmt.colorspace; + + p->bitrate = ctx->bitrate; + p->fps_num = ctx->fps_num; + p->fps_den = ctx->fps_den; + p->gop_size = ctx->gop_size; + p->bitrate_mode = ctx->bitrate_mode; + p->quality = ctx->quality; + p->min_qp = ctx->min_qp; + p->max_qp = ctx->max_qp; + p->profile = ctx->profile; + p->level = ctx->level; + p->color_primaries = ave_v4l2_to_t2_primaries(cs); + p->ycbcr_matrix = ave_v4l2_to_t2_matrix(cs, ctx->src_fmt.ycbcr_enc); + p->transfer_func = ave_v4l2_to_t2_xfer(cs, ctx->src_fmt.xfer_func); +} + +static int ave_start_streaming(struct vb2_queue *vq, unsigned int count) +{ + struct ave_ctx *ctx = vb2_get_drv_priv(vq); + struct ave_device *adev = ctx->dev; + struct ave_enc_params params; + struct vb2_v4l2_buffer *vbuf; + int status = 0; + + if (vq->type != V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) + return 0; + + ave_build_params(ctx, ¶ms); + ave_xpc_start(); + + mutex_lock(&adev->session_mutex); + if (adev->session) { + mutex_unlock(&adev->session_mutex); + pr_err("apple-ave: encoder session already active\n"); + status = -EBUSY; + goto return_bufs; + } + + adev->session = kzalloc(sizeof(struct ave_session), GFP_KERNEL); + if (!adev->session) { + mutex_unlock(&adev->session_mutex); + status = -ENOMEM; + goto return_bufs; + } + + status = ave_session_setup(adev->session, adev->bce, + ctx->src_fmt.width, ctx->src_fmt.height, + ¶ms); + if (status) { + /* Session setup failed — T2 may be wedged. Recover and retry once. */ + pr_warn("apple-ave: session setup failed (%d), attempting XPC recovery\n", status); + kfree(adev->session); + adev->session = NULL; + + ave_xpc_recover(); + + adev->session = kzalloc(sizeof(struct ave_session), GFP_KERNEL); + if (!adev->session) { + mutex_unlock(&adev->session_mutex); + status = -ENOMEM; + goto return_bufs; + } + + status = ave_session_setup(adev->session, adev->bce, + ctx->src_fmt.width, ctx->src_fmt.height, + ¶ms); + if (status) { + /* Still failing after recovery — give up */ + kfree(adev->session); + adev->session = NULL; + mutex_unlock(&adev->session_mutex); + goto return_bufs; + } + } + mutex_unlock(&adev->session_mutex); + + return 0; + +return_bufs: + while ((vbuf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx))) + v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_QUEUED); + return status; +} + +static void ave_stop_streaming(struct vb2_queue *vq) +{ + struct ave_ctx *ctx = vb2_get_drv_priv(vq); + struct ave_device *adev = ctx->dev; + struct vb2_v4l2_buffer *vbuf; + + if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) { + mutex_lock(&adev->session_mutex); + if (adev->session) { + ave_session_teardown(adev->session); + kfree(adev->session); + adev->session = NULL; + } + mutex_unlock(&adev->session_mutex); + + ave_xpc_stop(); + + while ((vbuf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx))) + v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_ERROR); + } else { + while ((vbuf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx))) + v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_ERROR); + } +} + +static const struct vb2_ops ave_vb2_ops = { + .queue_setup = ave_queue_setup, + .buf_prepare = ave_buf_prepare, + .buf_queue = ave_buf_queue, + .start_streaming = ave_start_streaming, + .stop_streaming = ave_stop_streaming, + .wait_prepare = vb2_ops_wait_prepare, + .wait_finish = vb2_ops_wait_finish, +}; + +/* === M2M ops === */ + +static void ave_device_run(void *priv) +{ + struct ave_ctx *ctx = priv; + struct ave_device *adev = ctx->dev; + struct vb2_v4l2_buffer *src_buf, *dst_buf; + struct vb2_buffer *src_vb, *dst_vb; + void *y_data, *uv_data, *out_data; + size_t y_size, uv_size, encoded_size = 0; + int status; + + src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); + dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); + + if (!src_buf || !dst_buf) { + pr_err("apple-ave: device_run called with missing buffers\n"); + v4l2_m2m_job_finish(adev->m2m_dev, ctx->fh.m2m_ctx); + return; + } + + src_vb = &src_buf->vb2_buf; + dst_vb = &dst_buf->vb2_buf; + + if (ctx->src_fmt.num_planes == 1) { + /* Single-plane NV12: Y and UV are contiguous in one buffer */ + u32 w = ctx->src_fmt.width; + u32 h = ctx->src_fmt.height; + + y_data = vb2_plane_vaddr(src_vb, 0); + y_size = AVE_NV12_Y_SIZE(w, h); + uv_data = y_data + y_size; + uv_size = AVE_NV12_UV_SIZE(w, h); + } else { + /* Multi-plane NV12M: separate Y and UV planes */ + y_data = vb2_plane_vaddr(src_vb, 0); + y_size = vb2_get_plane_payload(src_vb, 0); + uv_data = vb2_plane_vaddr(src_vb, 1); + uv_size = vb2_get_plane_payload(src_vb, 1); + } + out_data = vb2_plane_vaddr(dst_vb, 0); + + if (!y_data || !uv_data || !out_data) { + pr_err("apple-ave: failed to get buffer vaddrs\n"); + goto done_error; + } + + mutex_lock(&adev->session_mutex); + if (!adev->session || adev->session->state == AVE_STATE_ERROR) { + mutex_unlock(&adev->session_mutex); + pr_err("apple-ave: no active session\n"); + goto done_error; + } + + status = ave_session_encode_frame(adev->session, + y_data, y_size, uv_data, uv_size, + out_data, vb2_plane_size(dst_vb, 0), + &encoded_size, ctx->force_keyframe); + mutex_unlock(&adev->session_mutex); + + ctx->force_keyframe = false; + + if (status) { + pr_err("apple-ave: encode failed (%d), triggering XPC recovery\n", status); + ave_xpc_recover(); + goto done_error; + } + + vb2_set_plane_payload(dst_vb, 0, encoded_size); + + src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); + dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx); + + dst_buf->sequence = src_buf->sequence; + dst_buf->vb2_buf.timestamp = src_buf->vb2_buf.timestamp; + + v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE); + v4l2_m2m_buf_done(dst_buf, VB2_BUF_STATE_DONE); + v4l2_m2m_job_finish(adev->m2m_dev, ctx->fh.m2m_ctx); + return; + +done_error: + src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); + dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx); + if (src_buf) + v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR); + if (dst_buf) + v4l2_m2m_buf_done(dst_buf, VB2_BUF_STATE_ERROR); + v4l2_m2m_job_finish(adev->m2m_dev, ctx->fh.m2m_ctx); +} + +static int ave_job_ready(void *priv) +{ + struct ave_ctx *ctx = priv; + struct ave_device *adev = ctx->dev; + + return adev->session && + (adev->session->state == AVE_STATE_CONFIGURED || + adev->session->state == AVE_STATE_ENCODING); +} + +static void ave_job_abort(void *priv) +{ + /* M2M framework handles cancellation; just let current job finish */ +} + +static const struct v4l2_m2m_ops ave_m2m_ops = { + .device_run = ave_device_run, + .job_ready = ave_job_ready, + .job_abort = ave_job_abort, +}; + +/* === V4L2 controls === */ + +static int ave_s_ctrl(struct v4l2_ctrl *ctrl) +{ + struct ave_ctx *ctx = container_of(ctrl->handler, struct ave_ctx, ctrl_handler); + + switch (ctrl->id) { + case V4L2_CID_MPEG_VIDEO_BITRATE: + ctx->bitrate = ctrl->val; + return 0; + case V4L2_CID_MPEG_VIDEO_FORCE_KEY_FRAME: + ctx->force_keyframe = true; + return 0; + case V4L2_CID_MPEG_VIDEO_GOP_SIZE: + ctx->gop_size = ctrl->val; + return 0; + case V4L2_CID_MPEG_VIDEO_BITRATE_MODE: + ctx->bitrate_mode = ctrl->val; + return 0; + case V4L2_CID_MPEG_VIDEO_CONSTANT_QUALITY: + ctx->quality = ctrl->val; + return 0; + case V4L2_CID_MPEG_VIDEO_HEVC_MIN_QP: + ctx->min_qp = ctrl->val; + return 0; + case V4L2_CID_MPEG_VIDEO_HEVC_MAX_QP: + ctx->max_qp = ctrl->val; + return 0; + case V4L2_CID_MPEG_VIDEO_HEVC_PROFILE: + ctx->profile = ctrl->val; + return 0; + case V4L2_CID_MPEG_VIDEO_HEVC_LEVEL: + ctx->level = ctrl->val; + return 0; + default: + return -EINVAL; + } +} + +static const struct v4l2_ctrl_ops ave_ctrl_ops = { + .s_ctrl = ave_s_ctrl, +}; + +static int ave_init_ctrls(struct ave_ctx *ctx) +{ + struct v4l2_ctrl_handler *hdl = &ctx->ctrl_handler; + + v4l2_ctrl_handler_init(hdl, 10); + + v4l2_ctrl_new_std(hdl, &ave_ctrl_ops, + V4L2_CID_MPEG_VIDEO_BITRATE, + AVE_MIN_BITRATE, AVE_MAX_BITRATE, 1, + AVE_DEFAULT_BITRATE); + + v4l2_ctrl_new_std(hdl, &ave_ctrl_ops, + V4L2_CID_MPEG_VIDEO_FORCE_KEY_FRAME, + 0, 0, 0, 0); + + v4l2_ctrl_new_std(hdl, &ave_ctrl_ops, + V4L2_CID_MPEG_VIDEO_GOP_SIZE, + 0, 600, 1, 0); + + v4l2_ctrl_new_std_menu(hdl, &ave_ctrl_ops, + V4L2_CID_MPEG_VIDEO_BITRATE_MODE, + V4L2_MPEG_VIDEO_BITRATE_MODE_CQ, 0, + V4L2_MPEG_VIDEO_BITRATE_MODE_VBR); + + v4l2_ctrl_new_std(hdl, &ave_ctrl_ops, + V4L2_CID_MPEG_VIDEO_CONSTANT_QUALITY, + 1, 100, 1, 65); + + v4l2_ctrl_new_std(hdl, &ave_ctrl_ops, + V4L2_CID_MPEG_VIDEO_HEVC_MIN_QP, + 0, 51, 1, 0); + + v4l2_ctrl_new_std(hdl, &ave_ctrl_ops, + V4L2_CID_MPEG_VIDEO_HEVC_MAX_QP, + 0, 51, 1, 0); + + v4l2_ctrl_new_std_menu(hdl, &ave_ctrl_ops, + V4L2_CID_MPEG_VIDEO_HEVC_PROFILE, + V4L2_MPEG_VIDEO_HEVC_PROFILE_MAIN_10, 0, + V4L2_MPEG_VIDEO_HEVC_PROFILE_MAIN); + + v4l2_ctrl_new_std_menu(hdl, &ave_ctrl_ops, + V4L2_CID_MPEG_VIDEO_HEVC_LEVEL, + V4L2_MPEG_VIDEO_HEVC_LEVEL_6_2, 0, + V4L2_MPEG_VIDEO_HEVC_LEVEL_5_1); + + if (hdl->error) + return hdl->error; + + ctx->fh.ctrl_handler = hdl; + return 0; +} + +/* === File operations === */ + +static int ave_open(struct file *file) +{ + struct ave_device *adev = video_drvdata(file); + struct ave_ctx *ctx; + int status; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->dev = adev; + ctx->bitrate = AVE_DEFAULT_BITRATE; + ctx->fps_num = AVE_DEFAULT_FPS_NUM; + ctx->fps_den = AVE_DEFAULT_FPS_DEN; + ctx->gop_size = 0; + ctx->bitrate_mode = V4L2_MPEG_VIDEO_BITRATE_MODE_VBR; + ctx->quality = 65; + ctx->min_qp = 0; + ctx->max_qp = 0; + ctx->profile = V4L2_MPEG_VIDEO_HEVC_PROFILE_MAIN; + ctx->level = V4L2_MPEG_VIDEO_HEVC_LEVEL_5_1; + v4l2_fh_init(&ctx->fh, &adev->vdev); + + status = ave_init_ctrls(ctx); + if (status) + goto err_fh; + + ctx->fh.m2m_ctx = v4l2_m2m_ctx_init(adev->m2m_dev, ctx, ave_queue_init); + if (IS_ERR(ctx->fh.m2m_ctx)) { + status = PTR_ERR(ctx->fh.m2m_ctx); + goto err_ctrl; + } + + ave_set_default_src_fmt(&ctx->src_fmt); + ave_set_default_dst_fmt(&ctx->dst_fmt); + + file->private_data = &ctx->fh; + v4l2_fh_add(&ctx->fh, file); + + return 0; + +err_ctrl: + v4l2_ctrl_handler_free(&ctx->ctrl_handler); +err_fh: + v4l2_fh_exit(&ctx->fh); + kfree(ctx); + return status; +} + +static int ave_release(struct file *file) +{ + struct ave_ctx *ctx = container_of(file->private_data, struct ave_ctx, fh); + + v4l2_m2m_ctx_release(ctx->fh.m2m_ctx); + v4l2_fh_del(&ctx->fh, file); + v4l2_fh_exit(&ctx->fh); + v4l2_ctrl_handler_free(&ctx->ctrl_handler); + kfree(ctx); + + return 0; +} + +static const struct v4l2_file_operations ave_fops = { + .owner = THIS_MODULE, + .open = ave_open, + .release = ave_release, + .poll = v4l2_m2m_fop_poll, + .unlocked_ioctl = video_ioctl2, + .mmap = v4l2_m2m_fop_mmap, +}; + +/* === M2M queue init callback === */ + +static int ave_queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq) +{ + struct ave_ctx *ctx = priv; + int status; + + src_vq->type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; + src_vq->io_modes = VB2_MMAP | VB2_DMABUF; + src_vq->drv_priv = ctx; + src_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer); + src_vq->ops = &ave_vb2_ops; + src_vq->mem_ops = &vb2_vmalloc_memops; + src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY; + src_vq->lock = &ctx->dev->dev_mutex; + src_vq->dev = &ctx->dev->bce->pci->dev; + + status = vb2_queue_init(src_vq); + if (status) + return status; + + dst_vq->type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; + dst_vq->io_modes = VB2_MMAP | VB2_DMABUF; + dst_vq->drv_priv = ctx; + dst_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer); + dst_vq->ops = &ave_vb2_ops; + dst_vq->mem_ops = &vb2_vmalloc_memops; + dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY; + dst_vq->lock = &ctx->dev->dev_mutex; + dst_vq->dev = &ctx->dev->bce->pci->dev; + + return vb2_queue_init(dst_vq); +} + +/* === Init/exit called from apple_bce.c === */ + +int bce_ave_create(struct apple_bce_device *bce) +{ + struct ave_device *adev; + int status; + + /* Best-effort: start aveservice on T2 */ + ave_xpc_start(); + + adev = kzalloc(sizeof(*adev), GFP_KERNEL); + if (!adev) + return -ENOMEM; + + adev->bce = bce; + mutex_init(&adev->dev_mutex); + mutex_init(&adev->session_mutex); + + /* Register V4L2 device */ + status = v4l2_device_register(&adev->bce->pci->dev, &adev->v4l2_dev); + if (status) { + pr_err("apple-ave: v4l2_device_register failed (%d)\n", status); + goto err_free; + } + + /* Create M2M device */ + adev->m2m_dev = v4l2_m2m_init(&ave_m2m_ops); + if (IS_ERR(adev->m2m_dev)) { + status = PTR_ERR(adev->m2m_dev); + pr_err("apple-ave: v4l2_m2m_init failed (%d)\n", status); + goto err_v4l2; + } + + /* Initialize video device */ + adev->vdev.fops = &ave_fops; + adev->vdev.ioctl_ops = &ave_ioctl_ops; + adev->vdev.device_caps = V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_STREAMING; + adev->vdev.v4l2_dev = &adev->v4l2_dev; + adev->vdev.release = video_device_release_empty; + adev->vdev.vfl_dir = VFL_DIR_M2M; + adev->vdev.lock = &adev->dev_mutex; + strscpy(adev->vdev.name, "apple-ave-enc", sizeof(adev->vdev.name)); + + video_set_drvdata(&adev->vdev, adev); + + status = video_register_device(&adev->vdev, VFL_TYPE_VIDEO, -1); + if (status) { + pr_err("apple-ave: video_register_device failed (%d)\n", status); + goto err_m2m; + } + + ave_global_dev = adev; + pr_info("apple-ave: HEVC encoder registered as /dev/video%d\n", + adev->vdev.num); + return 0; + +err_m2m: + v4l2_m2m_release(adev->m2m_dev); +err_v4l2: + v4l2_device_unregister(&adev->v4l2_dev); +err_free: + kfree(adev); + return status; +} + +void bce_ave_destroy(void) +{ + struct ave_device *adev = ave_global_dev; + + if (!adev) + return; + + video_unregister_device(&adev->vdev); + v4l2_m2m_release(adev->m2m_dev); + v4l2_device_unregister(&adev->v4l2_dev); + + mutex_lock(&adev->session_mutex); + if (adev->session) { + ave_session_teardown(adev->session); + kfree(adev->session); + adev->session = NULL; + } + mutex_unlock(&adev->session_mutex); + + ave_xpc_stop(); + + kfree(adev); + ave_global_dev = NULL; + pr_info("apple-ave: encoder unregistered\n"); +}