diff --git a/arch/loongarch/configs/deepin_loongarch_desktop_defconfig b/arch/loongarch/configs/deepin_loongarch_desktop_defconfig index b926abedee7d6..06ba3a1c1d622 100644 --- a/arch/loongarch/configs/deepin_loongarch_desktop_defconfig +++ b/arch/loongarch/configs/deepin_loongarch_desktop_defconfig @@ -21,6 +21,7 @@ CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_PSI=y +CONFIG_PSI_DEFAULT_DISABLED=y CONFIG_RCU_EXPERT=y CONFIG_RCU_BOOST=y CONFIG_RCU_NOCB_CPU=y @@ -3681,6 +3682,7 @@ CONFIG_LOGO=y CONFIG_DRM_ACCEL=y CONFIG_SOUND=m CONFIG_SND=m +CONFIG_SND_UMP=m CONFIG_SND_OSSEMUL=y CONFIG_SND_MIXER_OSS=m CONFIG_SND_PCM_OSS=m @@ -3689,6 +3691,8 @@ CONFIG_SND_CTL_INPUT_VALIDATION=y CONFIG_SND_SEQUENCER=m CONFIG_SND_SEQ_DUMMY=m CONFIG_SND_SEQUENCER_OSS=m +CONFIG_SND_SEQ_UMP=y +CONFIG_SND_SEQ_UMP_CLIENT=m CONFIG_SND_DUMMY=m CONFIG_SND_ALOOP=m CONFIG_SND_VIRMIDI=m @@ -5838,3 +5842,4 @@ CONFIG_RV=y CONFIG_RV_MON_WWNR=y # CONFIG_STRICT_DEVMEM is not set # CONFIG_RUNTIME_TESTING_MENU is not set +# CONFIG_LEGACY_PTYS is not set diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 0aa6f478cc7c4..40fceabbd136c 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -16,6 +16,7 @@ CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_PSI=y +CONFIG_PSI_DEFAULT_DISABLED=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_IKHEADERS=y @@ -832,8 +833,11 @@ CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y CONFIG_LOGO=y CONFIG_SOUND=y CONFIG_SND=y +CONFIG_SND_UMP=m CONFIG_SND_SEQUENCER=m CONFIG_SND_SEQ_DUMMY=m +CONFIG_SND_SEQ_UMP=y +CONFIG_SND_SEQ_UMP_CLIENT=m CONFIG_SND_BT87X=m CONFIG_SND_BT87X_OVERCLOCK=y CONFIG_SND_HDA_INTEL=y @@ -860,7 +864,7 @@ CONFIG_SND_HDA_CODEC_HDMI_ATI=y CONFIG_SND_HDA_CODEC_HDMI_NVIDIA=y CONFIG_SND_HDA_CODEC_CONEXANT=y CONFIG_SND_USB_AUDIO=m -CONFIG_SND_USB_AUDIO_MIDI_V2=y +CONFIG_SND_USB_AUDIO_MIDI_V2=m CONFIG_SND_SOC=m CONFIG_SND_SOC_LOONGSON_CARD=m CONFIG_SND_SOC_ES7134=m @@ -1152,3 +1156,4 @@ CONFIG_SCHEDSTATS=y # CONFIG_DEBUG_PREEMPT is not set # CONFIG_FTRACE is not set CONFIG_UNWINDER_ORC=y +# CONFIG_LEGACY_PTYS is not set diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 2f6a96af7fb12..e3f997843ae23 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1289,6 +1289,11 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, uint64_t seq; int r; +#ifdef CONFIG_MACH_LOONGSON64 + while (amdgpu_ih_fix_is_busy(p->adev)) + msleep(20); +#endif + for (i = 0; i < p->gang_size; ++i) drm_sched_job_arm(&p->jobs[i]->base); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index a6419246e9c24..69838e6489715 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -27,6 +27,10 @@ #include "amdgpu_ih.h" #include "amdgpu_reset.h" +#ifdef CONFIG_MACH_LOONGSON64 +static void amdgpu_ih_handle_fix_work(struct work_struct *work); +#endif + /** * amdgpu_ih_ring_init - initialize the IH state * @@ -72,6 +76,8 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, ih->wptr_cpu = &ih->ring[ih->ring_size / 4]; ih->rptr_addr = dma_addr + ih->ring_size + 4; ih->rptr_cpu = &ih->ring[(ih->ring_size / 4) + 1]; + + } else { unsigned wptr_offs, rptr_offs; @@ -99,8 +105,18 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, ih->wptr_cpu = &adev->wb.wb[wptr_offs]; ih->rptr_addr = adev->wb.gpu_addr + rptr_offs * 4; ih->rptr_cpu = &adev->wb.wb[rptr_offs]; + } +#ifdef CONFIG_MACH_LOONGSON64 + INIT_WORK(&ih->fix_work, amdgpu_ih_handle_fix_work); + ih->adev = adev; + atomic_set(&ih->lock, 0); + for (r = 0; r < (ih->ring_size >> 2); r++) + ih->ring[r] = 0xDEADBEFF; + /* ensure data active */ + mb(); +#endif init_waitqueue_head(&ih->wait_process); return 0; } @@ -120,6 +136,10 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) if (!ih->ring) return; +#ifdef CONFIG_MACH_LOONGSON64 + cancel_work_sync(&ih->fix_work); +#endif + if (ih->use_bus_addr) { /* add 8 bytes for the rptr/wptr shadows and @@ -136,6 +156,126 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) } } +#ifdef CONFIG_MACH_LOONGSON64 + +int amdgpu_ih_fix_is_busy(struct amdgpu_device *adev) +{ + return atomic_read(&adev->irq.cs_lock); +} + +static int amdgpu_ih_fix_loongarch_pcie_order_start(struct amdgpu_ih_ring *ih, + u32 rptr, u32 wptr, + bool forever) +{ + int i, j; + int check_cnt = 0; + u32 old_wptr, ring_end = ih->ring_size >> 2; + + if (rptr == wptr) + return 0; + + rptr = rptr >> 2; + wptr = wptr >> 2; + old_wptr = wptr; + wptr = (rptr > wptr) ? ring_end : wptr; + +restart_check: + if (!forever && ++check_cnt > 1) + return -ENAVAIL; + + if (forever) + msleep(20); + + for (i = rptr; i < wptr; i += 1) { + j = i + 1; + j = (j < wptr) ? j : rptr; + if (le32_to_cpu(ih->ring[i]) == 0xDEADBEFF && + le32_to_cpu(ih->ring[j]) == 0xDEADBEFF) + goto restart_check; + } + + if (rptr > old_wptr) { + for (i = 0; i < old_wptr; i += 1) { + j = i + 1; + j = (j < old_wptr) ? j : 0; + if (le32_to_cpu(ih->ring[i]) == 0xDEADBEFF && + le32_to_cpu(ih->ring[j]) == 0xDEADBEFF) + goto restart_check; + } + } + + return 0; +} + +static int amdgpu_ih_fix_loongarch_pcie_order_end(struct amdgpu_ih_ring *ih, + u32 rptr, u32 wptr) +{ + int i; + u32 old_wptr, ring_end = ih->ring_size >> 2; + + if (rptr == wptr) + return 0; + + rptr = rptr >> 2; + wptr = wptr >> 2; + old_wptr = wptr; + wptr = (rptr > wptr) ? ring_end : wptr; + + for (i = rptr; i < wptr; i += 1) + ih->ring[i] = 0xDEADBEFF; + + if (rptr > old_wptr) { + for (i = 0; i < old_wptr; i += 1) + ih->ring[i] = 0xDEADBEFF; + } + /* memory barrier for writing into ih ring */ + mb(); + return 0; +} + +static void amdgpu_ih_handle_fix_work(struct work_struct *work) +{ + struct amdgpu_ih_ring *ih = + container_of(work, struct amdgpu_ih_ring, fix_work); + struct amdgpu_device *adev = ih->adev; + + u32 wptr; + u32 old_rptr; + int restart_fg = 0; + +restart: + if (restart_fg && atomic_xchg(&ih->lock, 1)) { + atomic_set(&adev->irq.cs_lock, 0); + return; + } + + wptr = amdgpu_ih_get_wptr(adev, ih); + /* Order reading of wptr vs. reading of IH ring data */ + rmb(); + + old_rptr = ih->rptr; + amdgpu_ih_fix_loongarch_pcie_order_start(ih, old_rptr, wptr, true); + + while (ih->rptr != wptr) { + amdgpu_irq_dispatch(adev, ih); + ih->rptr &= ih->ptr_mask; + } + + amdgpu_ih_fix_loongarch_pcie_order_end(ih, old_rptr, ih->rptr); + + amdgpu_ih_set_rptr(adev, ih); + atomic_set(&ih->lock, 0); + mb(); + + if (ih->rptr != amdgpu_ih_get_wptr(adev, ih)) { + restart_fg = 1; + goto restart; + } + + atomic_set(&adev->irq.cs_lock, 0); +} +#endif + /** * amdgpu_ih_ring_write - write IV to the ring buffer * @@ -210,6 +350,10 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) { unsigned int count; u32 wptr; +#ifdef CONFIG_MACH_LOONGSON64 + u32 old_rptr; + int r; +#endif if (!ih->enabled || adev->shutdown) return IRQ_NONE; @@ -217,20 +361,47 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) wptr = amdgpu_ih_get_wptr(adev, ih); restart_ih: +#ifdef CONFIG_MACH_LOONGSON64 + /* is somebody else already processing irqs? */ + if (atomic_xchg(&ih->lock, 1)) + return IRQ_NONE; +#endif count = AMDGPU_IH_MAX_NUM_IVS; dev_dbg(adev->dev, "%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr); /* Order reading of wptr vs. reading of IH ring data */ rmb(); +#ifdef CONFIG_MACH_LOONGSON64 + old_rptr = ih->rptr; + r = amdgpu_ih_fix_loongarch_pcie_order_start(ih, old_rptr, wptr, false); + if (r) { + if (old_rptr == ((wptr + 16) & ih->ptr_mask) || + old_rptr == ((wptr + 32) & ih->ptr_mask)) { + atomic_set(&ih->lock, 0); + return IRQ_NONE; + } + atomic_xchg(&adev->irq.cs_lock, 1); + schedule_work(&ih->fix_work); + return IRQ_NONE; + } +#endif + while (ih->rptr != wptr && --count) { amdgpu_irq_dispatch(adev, ih); ih->rptr &= ih->ptr_mask; } +#ifdef CONFIG_MACH_LOONGSON64 + amdgpu_ih_fix_loongarch_pcie_order_end(ih, old_rptr, ih->rptr); +#endif + if (!ih->overflow) amdgpu_ih_set_rptr(adev, ih); +#ifdef CONFIG_MACH_LOONGSON64 + atomic_set(&ih->lock, 0); +#endif wake_up_all(&ih->wait_process); /* make sure wptr hasn't changed while processing */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index f58b6be7fccc0..861a24101c373 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -73,6 +73,11 @@ struct amdgpu_ih_ring { wait_queue_head_t wait_process; uint64_t processed_timestamp; bool overflow; +#ifdef CONFIG_MACH_LOONGSON64 + atomic_t lock; + struct work_struct fix_work; + struct amdgpu_device *adev; +#endif }; /* return true if time stamp t2 is after t1 with 48bit wrap around */ @@ -115,4 +120,7 @@ void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev, uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr, signed int offset); const char *amdgpu_ih_ring_name(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); +#ifdef CONFIG_MACH_LOONGSON64 +int amdgpu_ih_fix_is_busy(struct amdgpu_device *adev); +#endif #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 8112ffc85995e..a2e45fdfe527d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -276,6 +276,10 @@ int amdgpu_irq_init(struct amdgpu_device *adev) spin_lock_init(&adev->irq.lock); +#ifdef CONFIG_MACH_LOONGSON64 + atomic_set(&adev->irq.cs_lock, 0); +#endif + /* Enable MSI if not disabled by module parameter */ adev->irq.msi_enabled = false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h index 9f0417456abda..44f99a099bed0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h @@ -100,6 +100,9 @@ struct amdgpu_irq { uint32_t srbm_soft_reset; u32 retry_cam_doorbell_index; bool retry_cam_enabled; +#ifdef CONFIG_MACH_LOONGSON64 + atomic_t cs_lock; +#endif }; enum interrupt_node_id_per_aid { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 003bcece715eb..d28c8e12fbce7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4672,8 +4672,13 @@ static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ? AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; +#ifdef CONFIG_MACH_LOONGSON64 + return amdgpu_ring_init(adev, ring, 1024*2, &adev->gfx.eop_irq, irq_type, + hw_prio, NULL); +#else return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, hw_prio, NULL); +#endif } static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, @@ -4704,8 +4709,13 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT; /* type-2 packets are deprecated on MEC, use type-3 instead */ +#ifdef CONFIG_MACH_LOONGSON64 + return amdgpu_ring_init(adev, ring, 1024*2, &adev->gfx.eop_irq, irq_type, + hw_prio, NULL); +#else return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, hw_prio, NULL); +#endif } static void gfx_v10_0_alloc_ip_dump(struct amdgpu_device *adev) @@ -8715,7 +8725,31 @@ static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, { bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; +#ifdef CONFIG_MACH_LOONGSON64 + int i; + for (i = 0; i < 10; i++) { + /* RELEASE_MEM - flush caches, send int */ + amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); + amdgpu_ring_write( + ring, + (PACKET3_RELEASE_MEM_GCR_SEQ | + PACKET3_RELEASE_MEM_GCR_GL2_WB | + PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */ + PACKET3_RELEASE_MEM_GCR_GLM_WB | + PACKET3_RELEASE_MEM_CACHE_POLICY(3) | + PACKET3_RELEASE_MEM_EVENT_TYPE( + CACHE_FLUSH_AND_INV_TS_EVENT) | + PACKET3_RELEASE_MEM_EVENT_INDEX(5))); + amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL( + write64bit ? 2 : 1) | + PACKET3_RELEASE_MEM_INT_SEL(0))); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + amdgpu_ring_write(ring, upper_32_bits(seq)); + } +#endif /* RELEASE_MEM - flush caches, send int */ amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ | @@ -9842,7 +9876,11 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 4 + /* VM_FLUSH */ +#ifdef CONFIG_MACH_LOONGSON64 + 8*11 + /* FENCE for VM_FLUSH */ +#else 8 + /* FENCE for VM_FLUSH */ +#endif 20 + /* GDS switch */ 4 + /* double SWITCH_BUFFER, * the first COND_EXEC jump to the place @@ -9855,7 +9893,11 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { 31 + /* DE_META */ 3 + /* CNTX_CTRL */ 5 + /* HDP_INVL */ +#ifdef CONFIG_MACH_LOONGSON64 + 8*11 + 8*11 + /* FENCE x2 */ +#else 8 + 8 + /* FENCE x2 */ +#endif 2 + /* SWITCH_BUFFER */ 8 + /* gfx_v10_0_emit_mem_sync */ 2, /* gfx_v10_0_ring_emit_cleaner_shader */ @@ -9901,7 +9943,11 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 2 + /* gfx_v10_0_ring_emit_vm_flush */ +#ifdef CONFIG_MACH_LOONGSON64 + 8*11 + 8*11 + 8*11 + /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */ +#else 8 + 8 + 8 + /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */ +#endif 8 + /* gfx_v10_0_emit_mem_sync */ 2, /* gfx_v10_0_ring_emit_cleaner_shader */ .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 1dd9fd486eecf..8ddefbe4554d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1151,8 +1151,13 @@ static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ? AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; +#ifdef CONFIG_MACH_LOONGSON64 + return amdgpu_ring_init(adev, ring, 1024*2, &adev->gfx.eop_irq, irq_type, + hw_prio, NULL); +#else return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, hw_prio, NULL); +#endif } static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, @@ -1184,8 +1189,13 @@ static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; /* type-2 packets are deprecated on MEC, use type-3 instead */ +#ifdef CONFIG_MACH_LOONGSON64 + r = amdgpu_ring_init(adev, ring, 1024*2, &adev->gfx.eop_irq, irq_type, + hw_prio, NULL); +#else r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, hw_prio, NULL); +#endif if (r) return r; @@ -5930,7 +5940,33 @@ static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, { bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; +#ifdef CONFIG_MACH_LOONGSON64 + int i; + for (i = 0; i < 10; i++) { + /* RELEASE_MEM - flush caches, send int */ + amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); + amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ | + PACKET3_RELEASE_MEM_GCR_GL2_WB | + PACKET3_RELEASE_MEM_GCR_GL2_INV | + PACKET3_RELEASE_MEM_GCR_GL2_US | + PACKET3_RELEASE_MEM_GCR_GL1_INV | + PACKET3_RELEASE_MEM_GCR_GLV_INV | + PACKET3_RELEASE_MEM_GCR_GLM_INV | + PACKET3_RELEASE_MEM_GCR_GLM_WB | + PACKET3_RELEASE_MEM_CACHE_POLICY(3) | + PACKET3_RELEASE_MEM_EVENT_TYPE( + CACHE_FLUSH_AND_INV_TS_EVENT) | + PACKET3_RELEASE_MEM_EVENT_INDEX(5))); + amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL( + write64bit ? 2 : 1) | + PACKET3_RELEASE_MEM_INT_SEL(0))); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + amdgpu_ring_write(ring, upper_32_bits(seq)); + } +#endif /* RELEASE_MEM - flush caches, send int */ amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ | @@ -7217,7 +7253,11 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 4 + /* VM_FLUSH */ +#ifdef CONFIG_MACH_LOONGSON64 + 8*11 + /* FENCE for VM_FLUSH */ +#else 8 + /* FENCE for VM_FLUSH */ +#endif 20 + /* GDS switch */ 5 + /* COND_EXEC */ 7 + /* HDP_flush */ @@ -7226,7 +7266,11 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { 3 + /* CNTX_CTRL */ 5 + /* HDP_INVL */ 22 + /* SET_Q_PREEMPTION_MODE */ +#ifdef CONFIG_MACH_LOONGSON64 + 8*11 + 8*11 + /* FENCE x2 */ +#else 8 + 8 + /* FENCE x2 */ +#endif 8 + /* gfx_v11_0_emit_mem_sync */ 2, /* gfx_v11_0_ring_emit_cleaner_shader */ .emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */ @@ -7272,7 +7316,11 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 2 + /* gfx_v11_0_ring_emit_vm_flush */ +#ifdef CONFIG_MACH_LOONGSON64 + 8*11 + 8*11 + 8*11 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */ +#else 8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */ +#endif 8 + /* gfx_v11_0_emit_mem_sync */ 2, /* gfx_v11_0_ring_emit_cleaner_shader */ .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 80565392313f1..fe2eef0f95539 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -1839,6 +1839,17 @@ static void gfx_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, amdgpu_ring_write(ring, 0xFFFFFFFF); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, 10); /* poll interval */ +#ifdef CONFIG_MACH_LOONGSON64 + /* EVENT_WRITE_EOP - flush caches, no send int */ + amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); + amdgpu_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5)); + amdgpu_ring_write(ring, addr & 0xfffffffc); + amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | + ((write64bit ? 2 : 1) << CP_EOP_DONE_DATA_CNTL__DATA_SEL__SHIFT) | + (0 << CP_EOP_DONE_DATA_CNTL__INT_SEL__SHIFT)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + amdgpu_ring_write(ring, upper_32_bits(seq)); +#endif /* EVENT_WRITE_EOP - flush caches, send int */ amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); amdgpu_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5)); @@ -3460,7 +3471,11 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = { .set_wptr = gfx_v6_0_ring_set_wptr_gfx, .emit_frame_size = 5 + 5 + /* hdp flush / invalidate */ +#ifdef CONFIG_MACH_LOONGSON64 + 20 + 20 + 20 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */ +#else 14 + 14 + 14 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */ +#endif 7 + 4 + /* gfx_v6_0_ring_emit_pipeline_sync */ SI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v6_0_ring_emit_vm_flush */ 3 + 2 + /* gfx_v6_ring_emit_cntxcntl including vgt flush */ @@ -3489,7 +3504,11 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = { 5 + 5 + /* hdp flush / invalidate */ 7 + /* gfx_v6_0_ring_emit_pipeline_sync */ SI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v6_0_ring_emit_vm_flush */ +#ifdef CONFIG_MACH_LOONGSON64 + 20 + 20 + 20 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */ +#else 14 + 14 + 14 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */ +#endif 5, /* SURFACE_SYNC */ .emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */ .emit_ib = gfx_v6_0_ring_emit_ib, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index e9a1957806b79..dc6f965ff5305 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2125,14 +2125,6 @@ static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; bool exec = flags & AMDGPU_FENCE_FLAG_EXEC; - -/* This workaround causes instability for LoongArch/Loongson (MIPS) - * devices based on the 7A1000/2000 chipset under heavy I/O load. - * - * FIXME: Disable this workaround until we find a better fix (possibly in - * the platform-specific PCI code). - */ -#ifndef CONFIG_MACH_LOONGSON64 /* Workaround for cache flush problems. First send a dummy EOP * event down the pipe with seq one below. */ @@ -2143,12 +2135,17 @@ static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, EVENT_INDEX(5))); amdgpu_ring_write(ring, addr & 0xfffffffc); amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | +#ifdef CONFIG_MACH_LOONGSON64 + DATA_SEL(write64bit ? 2 : 1) | INT_SEL(0)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + amdgpu_ring_write(ring, upper_32_bits(seq)); +#else DATA_SEL(1) | INT_SEL(0)); amdgpu_ring_write(ring, lower_32_bits(seq - 1)); amdgpu_ring_write(ring, upper_32_bits(seq - 1)); -#endif /* Then send the real EOP event down the pipe. */ +#endif amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN | diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 61b999f2e8c80..98b4e7bf9f96a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6113,13 +6113,6 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; bool exec = flags & AMDGPU_FENCE_FLAG_EXEC; -/* This workaround causes instability for LoongArch/Loongson (MIPS) - * devices based on the 7A1000/2000 chipset under heavy I/O load. - * - * FIXME: Disable this workaround until we find a better fix (possibly in - * the platform-specific PCI code). - */ -#ifndef CONFIG_MACH_LOONGSON64 /* Workaround for cache flush problems. First send a dummy EOP * event down the pipe with seq one below. */ @@ -6131,13 +6124,18 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, EVENT_INDEX(5))); amdgpu_ring_write(ring, addr & 0xfffffffc); amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | +#ifdef CONFIG_MACH_LOONGSON64 + DATA_SEL(write64bit ? 2 : 1) | INT_SEL(0)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + amdgpu_ring_write(ring, upper_32_bits(seq)); +#else DATA_SEL(1) | INT_SEL(0)); amdgpu_ring_write(ring, lower_32_bits(seq - 1)); amdgpu_ring_write(ring, upper_32_bits(seq - 1)); -#endif /* Then send the real EOP event down the pipe: * EVENT_WRITE_EOP - flush caches, send int */ +#endif amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN | diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 8e2e9eaa87b4f..909d51fdae016 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -3543,13 +3543,6 @@ void cik_fence_gfx_ring_emit(struct radeon_device *rdev, struct radeon_ring *ring = &rdev->ring[fence->ring]; u64 addr = rdev->fence_drv[fence->ring].gpu_addr; -/* This workaround causes instability for LoongArch/Loongson (MIPS) - * devices based on the 7A1000/2000 chipset under heavy I/O load. - * - * FIXME: Disable this workaround until we find a better fix (possibly in - * the platform-specific PCI code). - */ -#ifndef CONFIG_MACH_LOONGSON64 /* Workaround for cache flush problems. First send a dummy EOP * event down the pipe with seq one below. */ @@ -3561,9 +3554,12 @@ void cik_fence_gfx_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, addr & 0xfffffffc); radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(0)); +#ifdef CONFIG_MACH_LOONGSON64 + radeon_ring_write(ring, fence->seq); +#else radeon_ring_write(ring, fence->seq - 1); - radeon_ring_write(ring, 0); #endif + radeon_ring_write(ring, 0); /* Then send the real EOP event down the pipe. */ radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); @@ -8101,7 +8097,7 @@ int cik_irq_process(struct radeon_device *rdev) if (queue_thermal) schedule_work(&rdev->pm.dpm.thermal.work); rdev->ih.rptr = rptr; -#ifdef CONFIG_LOONGARCH +#ifdef CONFIG_MACH_LOONGSON64 WREG32(IH_RB_RPTR, rptr); #endif atomic_set(&rdev->ih.lock, 0); diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index a0b3e6268e6be..b08b0ad796f36 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -4919,7 +4919,7 @@ int evergreen_irq_process(struct radeon_device *rdev) if (queue_thermal && rdev->pm.dpm_enabled) schedule_work(&rdev->pm.dpm.thermal.work); rdev->ih.rptr = rptr; -#ifdef CONFIG_LOONGARCH +#ifdef CONFIG_MACH_LOONGSON64 WREG32(IH_RB_RPTR, rptr); #endif atomic_set(&rdev->ih.lock, 0); diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index df075e025801c..7d17c70de343b 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -4328,7 +4328,7 @@ int r600_irq_process(struct radeon_device *rdev) if (queue_thermal && rdev->pm.dpm_enabled) schedule_work(&rdev->pm.dpm.thermal.work); rdev->ih.rptr = rptr; -#ifdef CONFIG_LOONGARCH +#ifdef CONFIG_MACH_LOONGSON64 WREG32(IH_RB_RPTR, rptr); #endif atomic_set(&rdev->ih.lock, 0); diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 479d12ed82dee..d086ee64dc0ff 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -6423,7 +6423,7 @@ int si_irq_process(struct radeon_device *rdev) if (queue_thermal && rdev->pm.dpm_enabled) schedule_work(&rdev->pm.dpm.thermal.work); rdev->ih.rptr = rptr; -#ifdef CONFIG_LOONGARCH +#ifdef CONFIG_MACH_LOONGSON64 WREG32(IH_RB_RPTR, rptr); #endif atomic_set(&rdev->ih.lock, 0);