|
| 1 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-enable-object-linking < %s | FileCheck -check-prefixes=ASM %s |
| 2 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-enable-object-linking -filetype=obj < %s | llvm-readobj -r --syms - | FileCheck -check-prefixes=ELF %s |
| 3 | + |
| 4 | +; Test that with object linking enabled, external LDS declarations produce |
| 5 | +; @abs32@lo relocations, SHN_AMDGPU_LDS symbols, and .amdgpu_lds directives. |
| 6 | +; Covers multiple LDS variables with different sizes and alignments (including |
| 7 | +; zero-sized dynamic LDS), usage from both kernels and device functions, and |
| 8 | +; group_segment_fixed_size = 0 (linker patches via binary patching). |
| 9 | + |
| 10 | +@lds_large = external addrspace(3) global [256 x i8], align 16 |
| 11 | +@lds_small = external addrspace(3) global [128 x i8], align 4 |
| 12 | +@lds_dynamic = external addrspace(3) global [0 x i8], align 8 |
| 13 | + |
| 14 | +; --- Assembly checks --- |
| 15 | +; ASM-LABEL: {{^}}device_func: |
| 16 | +; ASM: v_add_u32_e32 v{{[0-9]+}}, lds_large@abs32@lo, v{{[0-9]+}} |
| 17 | + |
| 18 | +; ASM-LABEL: {{^}}test_kernel: |
| 19 | +; ASM-DAG: s_add_i32 s{{[0-9]+}}, s{{[0-9]+}}, lds_small@abs32@lo |
| 20 | +; ASM-DAG: s_add_i32 s{{[0-9]+}}, s{{[0-9]+}}, lds_dynamic@abs32@lo |
| 21 | + |
| 22 | +; ASM-DAG: .amdgpu_lds lds_large, 256, 16 |
| 23 | +; ASM-DAG: .amdgpu_lds lds_small, 128, 4 |
| 24 | +; ASM-DAG: .amdgpu_lds lds_dynamic, 0, 8 |
| 25 | + |
| 26 | +; ASM: .group_segment_fixed_size: 0 |
| 27 | + |
| 28 | +; --- ELF checks --- |
| 29 | +; ELF-DAG: R_AMDGPU_ABS32_LO lds_large |
| 30 | +; ELF-DAG: R_AMDGPU_ABS32_LO lds_small |
| 31 | +; ELF-DAG: R_AMDGPU_ABS32_LO lds_dynamic |
| 32 | + |
| 33 | +; ELF-DAG: Name: lds_large |
| 34 | +; ELF-DAG: Name: lds_small |
| 35 | +; ELF-DAG: Name: lds_dynamic |
| 36 | + |
| 37 | +define void @device_func(i32 %idx) { |
| 38 | + %gep = getelementptr [256 x i8], ptr addrspace(3) @lds_large, i32 0, i32 %idx |
| 39 | + store i8 1, ptr addrspace(3) %gep |
| 40 | + ret void |
| 41 | +} |
| 42 | + |
| 43 | +define amdgpu_kernel void @test_kernel(i32 %idx) { |
| 44 | + %gep1 = getelementptr [128 x i8], ptr addrspace(3) @lds_small, i32 0, i32 %idx |
| 45 | + store i8 2, ptr addrspace(3) %gep1 |
| 46 | + %gep2 = getelementptr [0 x i8], ptr addrspace(3) @lds_dynamic, i32 0, i32 %idx |
| 47 | + store i8 3, ptr addrspace(3) %gep2 |
| 48 | + call void @device_func(i32 %idx) |
| 49 | + ret void |
| 50 | +} |
0 commit comments