Skip to content

Commit fa5f762

Browse files
aratajewigcbot
authored andcommitted
Gate fp16 round-trip fold on fast-math
LowPrecisionOpt folded fpext(fptrunc(x)) -> x unconditionally, dropping a lossy fp16 round-trip. In OpenCL half is a precise IEEE-754 binary16 and the conversion is correctly rounded, so the fold is invalid under precise math. For OpenCL, apply it only under fast-relaxed-math.
1 parent 3e47abe commit fa5f762

8 files changed

Lines changed: 148 additions & 21 deletions

File tree

IGC/Compiler/LowPrecisionOptPass.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,15 @@ bool LowPrecisionOpt::runOnFunction(Function &F) {
6161
m_builder = &builder;
6262
m_currFunction = &F;
6363
shdrType = ctx->type;
64+
65+
// The fpext(fptrunc(x)) -> x fold in visitFPExtInst() drops a lossy fp16
66+
// round-trip. In OpenCL half is a precise IEEE-754 binary16, so it may only
67+
// be folded away under fast-relaxed-math (unsafe-math covers the rounding loss,
68+
// finite-math the overflow-to-inf case). Casts carry no fast-math flags, so
69+
// read the module-level compile options.
70+
auto &compOpt = ctx->getModuleMetaData()->compOpt;
71+
m_allowFp16RoundTripFold = (shdrType != ShaderType::OPENCL_SHADER) || compOpt.FastRelaxedMath ||
72+
(compOpt.UnsafeMathOptimizations && compOpt.FiniteMathOnly);
6473
bundles.clear();
6574
m_simplifyAlu = true;
6675
m_changeSample = false;
@@ -86,7 +95,8 @@ void LowPrecisionOpt::visitFPExtInst(llvm::FPExtInst &I) {
8695
Instruction *I0 = dyn_cast<Instruction>(I.getOperand(0));
8796
llvm::GenIntrinsicInst *callInst = llvm::dyn_cast<llvm::GenIntrinsicInst>(I.getOperand(0));
8897

89-
if (I0 && I0->getOpcode() == Instruction::FPTrunc && I.getDestTy() == I0->getOperand(0)->getType()) {
98+
if (m_allowFp16RoundTripFold && I0 && I0->getOpcode() == Instruction::FPTrunc &&
99+
I.getDestTy() == I0->getOperand(0)->getType()) {
90100
I.replaceAllUsesWith(I0->getOperand(0));
91101
I.eraseFromParent();
92102
m_changed = true;

IGC/Compiler/LowPrecisionOptPass.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ class LowPrecisionOpt : public llvm::FunctionPass, public llvm::InstVisitor<LowP
4848
llvm::SmallVector<moveBundle, 11> bundles;
4949
bool m_changeSample = false;
5050
bool m_simplifyAlu = false;
51+
// Whether the lossy fpext(fptrunc(x)) -> x fp16 round-trip may be folded away.
52+
// For OpenCL this requires fast-relaxed-math. Set in runOnFunction().
53+
bool m_allowFp16RoundTripFold = false;
5154

5255
public:
5356
static char ID;

IGC/Compiler/tests/DebugInfo/LowPrecisionOpt/fpext-typed-pointers.ll

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
;
1717
; Debug MD for this test was created with debugify pass.
1818
; ------------------------------------------------
19+
; igc_opt defaults to the OpenCL shader type, where the lossy fpext(fptrunc(x))
20+
; round-trip is only folded away under fast-relaxed-math, set in the metadata
21+
; below so the debug-info update on the fold is exercised.
1922

2023
; ModuleID = './LowPrecisionOpt/fpext.ll'
2124
source_filename = "./LowPrecisionOpt/fpext.ll"
@@ -63,22 +66,22 @@ entry:
6366
}
6467

6568
; Testcase 1 MD:
66-
; CHECK-DAG: [[TRUNC_LOC]] = !DILocation(line: 3, column: 1, scope: !9)
67-
; CHECK-DAG: [[TRUNC_MD]] = !DILocalVariable(name: "3", scope: !9, file: !4, line: 3, type: !16)
69+
; CHECK-DAG: [[TRUNC_LOC]] = !DILocation(line: 3, column: 1, scope: !{{[0-9]+}})
70+
; CHECK-DAG: [[TRUNC_MD]] = !DILocalVariable(name: "3", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: 3, type: !{{[0-9]+}})
6871
;
69-
; CHECK-DAG: [[EXT_LOC]] = !DILocation(line: 4, column: 1, scope: !9)
70-
; CHECK-DAG: [[EXT_MD]] = !DILocalVariable(name: "4", scope: !9, file: !4, line: 4, type: !13)
72+
; CHECK-DAG: [[EXT_LOC]] = !DILocation(line: 4, column: 1, scope: !{{[0-9]+}})
73+
; CHECK-DAG: [[EXT_MD]] = !DILocalVariable(name: "4", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: 4, type: !{{[0-9]+}})
7174
;
72-
; CHECK-DAG: [[STORE_LOC]] = !DILocation(line: 6, column: 1, scope: !9)
73-
; CHECK-DAG: [[STORE_MD]] = !DILocalVariable(name: "5", scope: !9, file: !4, line: 5, type: !19)
75+
; CHECK-DAG: [[STORE_LOC]] = !DILocation(line: 6, column: 1, scope: !{{[0-9]+}})
76+
; CHECK-DAG: [[STORE_MD]] = !DILocalVariable(name: "5", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: 5, type: !{{[0-9]+}})
7477

7578
; Testcase 2 MD:
76-
; CHECK-DAG: [[CALL_LOC]] = !DILocation(line: 7, column: 1, scope: !9)
79+
; CHECK-DAG: [[CALL_LOC]] = !DILocation(line: 7, column: 1, scope: !{{[0-9]+}})
7780
;
78-
; CHECK-DAG: [[EXT2_LOC]] = !DILocation(line: 8, column: 1, scope: !9)
79-
; CHECK-DAG: [[EXT2_MD]] = !DILocalVariable(name: "7", scope: !9, file: !4, line: 8, type: !13)
81+
; CHECK-DAG: [[EXT2_LOC]] = !DILocation(line: 8, column: 1, scope: !{{[0-9]+}})
82+
; CHECK-DAG: [[EXT2_MD]] = !DILocalVariable(name: "7", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: 8, type: !{{[0-9]+}})
8083
;
81-
; CHECK-DAG: [[STORE2_LOC]] = !DILocation(line: 9, column: 1, scope: !9)
84+
; CHECK-DAG: [[STORE2_LOC]] = !DILocation(line: 9, column: 1, scope: !{{[0-9]+}})
8285

8386
declare half @llvm.genx.GenISA.DCL.inputVec.f16(i32, i32)
8487

@@ -91,6 +94,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #0
9194
attributes #0 = { nounwind readnone speculatable }
9295

9396
!igc.functions = !{!0}
97+
!IGCMetadata = !{!32}
9498
!llvm.dbg.cu = !{!3}
9599
!llvm.debugify = !{!6, !7}
96100
!llvm.module.flags = !{!8}
@@ -127,3 +131,6 @@ attributes #0 = { nounwind readnone speculatable }
127131
!29 = !DILocation(line: 8, column: 1, scope: !9)
128132
!30 = !DILocation(line: 9, column: 1, scope: !9)
129133
!31 = !DILocation(line: 10, column: 1, scope: !9)
134+
!32 = !{!"ModuleMD", !33}
135+
!33 = !{!"compOpt", !34}
136+
!34 = !{!"FastRelaxedMath", i1 true}

IGC/Compiler/tests/DebugInfo/LowPrecisionOpt/fpext.ll

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
;
1717
; Debug MD for this test was created with debugify pass.
1818
; ------------------------------------------------
19+
; igc_opt defaults to the OpenCL shader type, where the lossy fpext(fptrunc(x))
20+
; round-trip is only folded away under fast-relaxed-math, set in the metadata
21+
; below so the debug-info update on the fold is exercised.
1922

2023
; ModuleID = './LowPrecisionOpt/fpext.ll'
2124
source_filename = "./LowPrecisionOpt/fpext.ll"
@@ -63,22 +66,22 @@ entry:
6366
}
6467

6568
; Testcase 1 MD:
66-
; CHECK-DAG: [[TRUNC_LOC]] = !DILocation(line: 3, column: 1, scope: !9)
67-
; CHECK-DAG: [[TRUNC_MD]] = !DILocalVariable(name: "3", scope: !9, file: !4, line: 3, type: !16)
69+
; CHECK-DAG: [[TRUNC_LOC]] = !DILocation(line: 3, column: 1, scope: !{{[0-9]+}})
70+
; CHECK-DAG: [[TRUNC_MD]] = !DILocalVariable(name: "3", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: 3, type: !{{[0-9]+}})
6871
;
69-
; CHECK-DAG: [[EXT_LOC]] = !DILocation(line: 4, column: 1, scope: !9)
70-
; CHECK-DAG: [[EXT_MD]] = !DILocalVariable(name: "4", scope: !9, file: !4, line: 4, type: !13)
72+
; CHECK-DAG: [[EXT_LOC]] = !DILocation(line: 4, column: 1, scope: !{{[0-9]+}})
73+
; CHECK-DAG: [[EXT_MD]] = !DILocalVariable(name: "4", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: 4, type: !{{[0-9]+}})
7174
;
72-
; CHECK-DAG: [[STORE_LOC]] = !DILocation(line: 6, column: 1, scope: !9)
73-
; CHECK-DAG: [[STORE_MD]] = !DILocalVariable(name: "5", scope: !9, file: !4, line: 5, type: !19)
75+
; CHECK-DAG: [[STORE_LOC]] = !DILocation(line: 6, column: 1, scope: !{{[0-9]+}})
76+
; CHECK-DAG: [[STORE_MD]] = !DILocalVariable(name: "5", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: 5, type: !{{[0-9]+}})
7477

7578
; Testcase 2 MD:
76-
; CHECK-DAG: [[CALL_LOC]] = !DILocation(line: 7, column: 1, scope: !9)
79+
; CHECK-DAG: [[CALL_LOC]] = !DILocation(line: 7, column: 1, scope: !{{[0-9]+}})
7780
;
78-
; CHECK-DAG: [[EXT2_LOC]] = !DILocation(line: 8, column: 1, scope: !9)
79-
; CHECK-DAG: [[EXT2_MD]] = !DILocalVariable(name: "7", scope: !9, file: !4, line: 8, type: !13)
81+
; CHECK-DAG: [[EXT2_LOC]] = !DILocation(line: 8, column: 1, scope: !{{[0-9]+}})
82+
; CHECK-DAG: [[EXT2_MD]] = !DILocalVariable(name: "7", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: 8, type: !{{[0-9]+}})
8083
;
81-
; CHECK-DAG: [[STORE2_LOC]] = !DILocation(line: 9, column: 1, scope: !9)
84+
; CHECK-DAG: [[STORE2_LOC]] = !DILocation(line: 9, column: 1, scope: !{{[0-9]+}})
8285

8386
declare half @llvm.genx.GenISA.DCL.inputVec.f16(i32, i32)
8487

@@ -91,6 +94,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #0
9194
attributes #0 = { nounwind readnone speculatable }
9295

9396
!igc.functions = !{!0}
97+
!IGCMetadata = !{!32}
9498
!llvm.dbg.cu = !{!3}
9599
!llvm.debugify = !{!6, !7}
96100
!llvm.module.flags = !{!8}
@@ -127,3 +131,6 @@ attributes #0 = { nounwind readnone speculatable }
127131
!29 = !DILocation(line: 8, column: 1, scope: !9)
128132
!30 = !DILocation(line: 9, column: 1, scope: !9)
129133
!31 = !DILocation(line: 10, column: 1, scope: !9)
134+
!32 = !{!"ModuleMD", !33}
135+
!33 = !{!"compOpt", !34}
136+
!34 = !{!"FastRelaxedMath", i1 true}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2026 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
10+
; UNSUPPORTED: llvm-17-plus
11+
; RUN: igc_opt --typed-pointers -igc-low-precision-opt -S < %s | FileCheck %s
12+
; ------------------------------------------------
13+
; LowPrecisionOpt
14+
; ------------------------------------------------
15+
; In OpenCL half is a precise IEEE-754 binary16, so the lossy fpext(fptrunc(x))
16+
; round-trip must NOT be folded away under precise math (no fast-relaxed-math).
17+
; igc_opt defaults to the OpenCL shader type, so the round-trip is preserved.
18+
19+
define void @test_fptrunc_fpext(float %src1, float %src2) {
20+
; CHECK-LABEL: @test_fptrunc_fpext(
21+
; CHECK: [[TMP1:%.*]] = fadd float %src1, %src2
22+
; CHECK: [[TMP2:%.*]] = fptrunc float [[TMP1]] to half
23+
; CHECK: [[TMP3:%.*]] = fpext half [[TMP2]] to float
24+
; CHECK: call void @use.f16(half [[TMP2]])
25+
; CHECK: call void @use.f32(float [[TMP3]])
26+
; CHECK: ret void
27+
28+
%1 = fadd float %src1, %src2
29+
%2 = fptrunc float %1 to half
30+
%3 = fpext half %2 to float
31+
call void @use.f16(half %2)
32+
call void @use.f32(float %3)
33+
ret void
34+
}
35+
36+
37+
declare void @use.f16(half)
38+
declare void @use.f32(float)
39+
40+
!igc.functions = !{!0}
41+
42+
!0 = !{void (float,float)* @test_fptrunc_fpext, !1}
43+
!1 = !{!2}
44+
!2 = !{!"function_type", i32 0}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2026 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
10+
; REQUIRES: llvm-14-plus
11+
; RUN: igc_opt --opaque-pointers -igc-low-precision-opt -S < %s | FileCheck %s
12+
; ------------------------------------------------
13+
; LowPrecisionOpt
14+
; ------------------------------------------------
15+
; In OpenCL half is a precise IEEE-754 binary16, so the lossy fpext(fptrunc(x))
16+
; round-trip must NOT be folded away under precise math (no fast-relaxed-math).
17+
; igc_opt defaults to the OpenCL shader type, so the round-trip is preserved.
18+
19+
define void @test_fptrunc_fpext(float %src1, float %src2) {
20+
; CHECK-LABEL: @test_fptrunc_fpext(
21+
; CHECK: [[TMP1:%.*]] = fadd float %src1, %src2
22+
; CHECK: [[TMP2:%.*]] = fptrunc float [[TMP1]] to half
23+
; CHECK: [[TMP3:%.*]] = fpext half [[TMP2]] to float
24+
; CHECK: call void @use.f16(half [[TMP2]])
25+
; CHECK: call void @use.f32(float [[TMP3]])
26+
; CHECK: ret void
27+
28+
%1 = fadd float %src1, %src2
29+
%2 = fptrunc float %1 to half
30+
%3 = fpext half %2 to float
31+
call void @use.f16(half %2)
32+
call void @use.f32(float %3)
33+
ret void
34+
}
35+
36+
37+
declare void @use.f16(half)
38+
declare void @use.f32(float)
39+
40+
!igc.functions = !{!0}
41+
42+
!0 = !{void (float,float)* @test_fptrunc_fpext, !1}
43+
!1 = !{!2}
44+
!2 = !{!"function_type", i32 0}

IGC/Compiler/tests/LowPrecisionOpt/fpext-typed-pointers.ll

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
; ------------------------------------------------
1111
; LowPrecisionOpt
1212
; ------------------------------------------------
13+
; igc_opt defaults to the OpenCL shader type, where the lossy fpext(fptrunc(x))
14+
; round-trip is only folded away under fast-relaxed-math, set in the metadata below.
1315

1416
define void @test_fptrunc_fpext(float %src1, float %src2) {
1517
; CHECK-LABEL: @test_fptrunc_fpext(
@@ -48,8 +50,12 @@ declare half @llvm.genx.GenISA.DCL.inputVec.f16(i32, i32)
4850
declare float @llvm.genx.GenISA.RuntimeValue.f32(i32)
4951

5052
!igc.functions = !{!0, !3}
53+
!IGCMetadata = !{!4}
5154

5255
!0 = !{void (float,float)* @test_fptrunc_fpext, !1}
5356
!1 = !{!2}
5457
!2 = !{!"function_type", i32 0}
5558
!3 = !{void (i32,i32)* @test_genx_fpext, !1}
59+
!4 = !{!"ModuleMD", !5}
60+
!5 = !{!"compOpt", !6}
61+
!6 = !{!"FastRelaxedMath", i1 true}

IGC/Compiler/tests/LowPrecisionOpt/fpext.ll

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
; ------------------------------------------------
1313
; LowPrecisionOpt
1414
; ------------------------------------------------
15+
; igc_opt defaults to the OpenCL shader type, where the lossy fpext(fptrunc(x))
16+
; round-trip is only folded away under fast-relaxed-math, set in the metadata below.
1517

1618
define void @test_fptrunc_fpext(float %src1, float %src2) {
1719
; CHECK-LABEL: @test_fptrunc_fpext(
@@ -50,8 +52,12 @@ declare half @llvm.genx.GenISA.DCL.inputVec.f16(i32, i32)
5052
declare float @llvm.genx.GenISA.RuntimeValue.f32(i32)
5153

5254
!igc.functions = !{!0, !3}
55+
!IGCMetadata = !{!4}
5356

5457
!0 = !{void (float,float)* @test_fptrunc_fpext, !1}
5558
!1 = !{!2}
5659
!2 = !{!"function_type", i32 0}
5760
!3 = !{void (i32,i32)* @test_genx_fpext, !1}
61+
!4 = !{!"ModuleMD", !5}
62+
!5 = !{!"compOpt", !6}
63+
!6 = !{!"FastRelaxedMath", i1 true}

0 commit comments

Comments
 (0)