|
| 1 | +;=========================== begin_copyright_notice ============================ |
| 2 | +; |
| 3 | +; Copyright (C) 2026 Intel Corporation |
| 4 | +; |
| 5 | +; SPDX-License-Identifier: MIT |
| 6 | +; |
| 7 | +;============================ end_copyright_notice ============================= |
| 8 | + |
| 9 | +; Regression tests for the MemOpt fix in SymbolicPointer::checkTerms(): |
| 10 | +; when OpNum == 3 (both operands of the compared instructions are identical), |
| 11 | +; the merge must be REJECTED if the two terms carry different Scale values, |
| 12 | +; because the pointer difference is then runtime-dependent. |
| 13 | +; |
| 14 | +; Two patterns are covered, one per guard: |
| 15 | +; |
| 16 | +; Test 1 (first guard): |
| 17 | +; The differing terms' own top-level sub nsw instructions share both |
| 18 | +; operands (%add0 and %k), so the FIRST checkInstructions call sets |
| 19 | +; OpNum = 3. Scale(chain0)=4, Scale(chain1)=8 → reject. |
| 20 | +; |
| 21 | +; Test 2 (second guard): |
| 22 | +; The differing terms' top-level sub nsw instructions share only operand 1 |
| 23 | +; (%k); the FIRST checkInstructions call drills into operand 0, which is |
| 24 | +; an add nsw with identical operands (%base, %delta), so the SECOND |
| 25 | +; checkInstructions call sets OpNum = 3. |
| 26 | +; Scale(chain0)=4, Scale(chain1)=8 → reject. |
| 27 | +; |
| 28 | +; In both cases the loads must remain as two separate float loads rather than |
| 29 | +; being coalesced into a <2 x float> load. |
| 30 | +; |
| 31 | +; RUN: igc_opt --opaque-pointers %s -S -o - --basic-aa -igc-memopt -instcombine | FileCheck %s |
| 32 | + |
| 33 | +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32:64" |
| 34 | +target triple = "spir64-unknown-unknown" |
| 35 | + |
| 36 | +; --------------------------------------------------------------------------- |
| 37 | +; Test 1: first-guard path. |
| 38 | +; |
| 39 | +; Both differing terms use a top-level sub nsw whose two operands are the |
| 40 | +; same IR values (%add0 and %k), so the very first checkInstructions call |
| 41 | +; sets OpNum = 3. The GEP index for chain 1 is multiplied by 2, giving |
| 42 | +; Scale=8 vs Scale=4 for chain 0. The guard must return true (reject). |
| 43 | +; |
| 44 | +; Chain 0: ptr = src[sub0] sub0 = add0 - k scale 4 |
| 45 | +; Chain 1: ptr = src[sub1 * 2] sub1 = add0 - k scale 8 |
| 46 | +; |
| 47 | +; CHECK-LABEL: @check_terms_first_guard_different_scales |
| 48 | +; CHECK: load float |
| 49 | +; CHECK: load float |
| 50 | +; CHECK-NOT: load <2 x float> |
| 51 | +define spir_kernel void @check_terms_first_guard_different_scales( |
| 52 | + ptr addrspace(1) %dst, |
| 53 | + ptr addrspace(1) %src, |
| 54 | + i32 %base, |
| 55 | + i32 %delta, |
| 56 | + i32 %k) { |
| 57 | +entry: |
| 58 | + %add0 = add nsw i32 %base, %delta |
| 59 | + |
| 60 | + ; Chain 0: GEP scale = 4 (float) |
| 61 | + %sub0 = sub nsw i32 %add0, %k |
| 62 | + %sub0e = sext i32 %sub0 to i64 |
| 63 | + %ptr0 = getelementptr inbounds float, ptr addrspace(1) %src, i64 %sub0e |
| 64 | + %v0 = load float, ptr addrspace(1) %ptr0, align 4 |
| 65 | + |
| 66 | + ; Chain 1: same operands (%add0, %k) as sub0 → first checkInstructions |
| 67 | + ; sets OpNum = 3. Index doubled → net GEP scale = 8. |
| 68 | + %sub1 = sub nsw i32 %add0, %k |
| 69 | + %sub1e = sext i32 %sub1 to i64 |
| 70 | + %sub1s = mul i64 %sub1e, 2 |
| 71 | + %ptr1 = getelementptr inbounds float, ptr addrspace(1) %src, i64 %sub1s |
| 72 | + %v1 = load float, ptr addrspace(1) %ptr1, align 4 |
| 73 | + |
| 74 | + %sum = fadd float %v0, %v1 |
| 75 | + store float %sum, ptr addrspace(1) %dst, align 4 |
| 76 | + ret void |
| 77 | +} |
| 78 | + |
| 79 | +; --------------------------------------------------------------------------- |
| 80 | +; Test 2: second-guard path. |
| 81 | +; |
| 82 | +; The differing terms' top-level sub nsw instructions differ in operand 0 |
| 83 | +; (%add0 vs %add1), so the first checkInstructions call chooses OpNum = 0 |
| 84 | +; and drills into that operand. The nested add nsw instructions (%add0 and |
| 85 | +; %add1) share both operands (%base, %delta), so the SECOND |
| 86 | +; checkInstructions call sets OpNum = 3. The GEP index for chain 1 is |
| 87 | +; multiplied by 2, giving Scale=8 vs Scale=4 for chain 0. The guard must |
| 88 | +; return true (reject). |
| 89 | +; |
| 90 | +; Chain 0: ptr = src[sub0] sub0 = add0 - k, add0 = base + delta scale 4 |
| 91 | +; Chain 1: ptr = src[sub1 * 2] sub1 = add1 - k, add1 = base + delta scale 8 |
| 92 | +; |
| 93 | +; CHECK-LABEL: @check_terms_second_guard_different_scales |
| 94 | +; CHECK: load float |
| 95 | +; CHECK: load float |
| 96 | +; CHECK-NOT: load <2 x float> |
| 97 | +define spir_kernel void @check_terms_second_guard_different_scales( |
| 98 | + ptr addrspace(1) %dst, |
| 99 | + ptr addrspace(1) %src, |
| 100 | + i32 %base, |
| 101 | + i32 %delta, |
| 102 | + i32 %k) { |
| 103 | +entry: |
| 104 | + ; Chain 0: GEP scale = 4 (float) |
| 105 | + %add0 = add nsw i32 %base, %delta |
| 106 | + %sub0 = sub nsw i32 %add0, %k |
| 107 | + %sub0e = sext i32 %sub0 to i64 |
| 108 | + %ptr0 = getelementptr inbounds float, ptr addrspace(1) %src, i64 %sub0e |
| 109 | + %v0 = load float, ptr addrspace(1) %ptr0, align 4 |
| 110 | + |
| 111 | + ; Chain 1: add1 has same operands as add0 → second checkInstructions |
| 112 | + ; sets OpNum = 3. Index doubled → net GEP scale = 8. |
| 113 | + %add1 = add nsw i32 %base, %delta |
| 114 | + %sub1 = sub nsw i32 %add1, %k |
| 115 | + %sub1e = sext i32 %sub1 to i64 |
| 116 | + %sub1s = mul i64 %sub1e, 2 |
| 117 | + %ptr1 = getelementptr inbounds float, ptr addrspace(1) %src, i64 %sub1s |
| 118 | + %v1 = load float, ptr addrspace(1) %ptr1, align 4 |
| 119 | + |
| 120 | + %sum = fadd float %v0, %v1 |
| 121 | + store float %sum, ptr addrspace(1) %dst, align 4 |
| 122 | + ret void |
| 123 | +} |
0 commit comments