Skip to content

Commit 01b4490

Browse files
aratajewfda0
authored andcommitted
Avoid calling ambiguous intrinsics from emulation builtins
If GenISA intrinsics are declared in OpenCL C as follows: ```c extern double GenISA_fma_rtz(double, double, double); ``` Clang produces the following LLVM function declaration: ```llvm declare dso_local double @GenISA_fma_rtz(double, double, double) ``` The issue is that the declaration is not mangled, so the same function name will be produced for double and float types. If a kernel needs multiple emulation builtins to be imported and some of them need `float` version of `GenISA_fma_rtz` and some of them need `double` version of `GenISA_fma_rtz`, then LLVM treats them both as the same function, hence it inserts a `bitcast` instruction for one of them (depending on which one was imported first): ```llvm float bitcast (double (double, double, double)* @GenISA_fma_rtz to float (float, float, float)*)(float %div, float 0xBE98000000000000, float %div) ``` It is incorrect, because we call `double` version of the intrinsic, even though the intention was to call the `float` version. The solution for this issue is to add manual mangling to GenISA externs: ```c extern double GenISA_fma_rtz_f64(double, double, double); ``` (cherry picked from commit aecdc4c)
1 parent 78191e6 commit 01b4490

File tree

10 files changed

+4507
-3485
lines changed

10 files changed

+4507
-3485
lines changed

IGC/Compiler/Builtins/LibraryDPEmu.hpp

Lines changed: 3457 additions & 2558 deletions
Large diffs are not rendered by default.

IGC/Compiler/Builtins/LibraryIntS32DivRemEmu.hpp

Lines changed: 204 additions & 183 deletions
Large diffs are not rendered by default.

IGC/Compiler/Builtins/LibraryIntS32DivRemEmuSP.hpp

Lines changed: 300 additions & 278 deletions
Large diffs are not rendered by default.

IGC/Compiler/Builtins/LibraryIntU32DivRemEmu.hpp

Lines changed: 201 additions & 180 deletions
Large diffs are not rendered by default.

IGC/Compiler/Builtins/LibraryIntU32DivRemEmuSP.hpp

Lines changed: 292 additions & 270 deletions
Large diffs are not rendered by default.

IGC/Compiler/Optimizer/PreCompiledFuncImport.cpp

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*========================== begin_copyright_notice ============================
22
3-
Copyright (C) 2017-2022 Intel Corporation
3+
Copyright (C) 2017-2023 Intel Corporation
44
55
SPDX-License-Identifier: MIT
66
@@ -635,14 +635,10 @@ bool PreCompiledFuncImport::runOnModule(Module& M)
635635
// When we test it, we need to set emuKind
636636
if (IGC_GET_FLAG_VALUE(TestIGCPreCompiledFunctions) == 1)
637637
{
638-
m_emuKind = EmuKind::EMU_DP;
639-
checkAndSetEnableSubroutine();
640-
}
641-
else if (IGC_GET_FLAG_VALUE(TestIGCPreCompiledFunctions) == 2)
642-
{
643-
m_emuKind = EmuKind::EMU_DP_DIV_SQRT;
638+
m_emuKind = IGC_GET_FLAG_VALUE(ForceEmuKind) ? IGC_GET_FLAG_VALUE(ForceEmuKind) : EmuKind::EMU_DP;
644639
checkAndSetEnableSubroutine();
645640
}
641+
646642
// sanity check
647643
if (m_emuKind == 0) {
648644
// Nothing to emulate
@@ -756,15 +752,15 @@ bool PreCompiledFuncImport::runOnModule(Module& M)
756752
for (auto& I : BB) {
757753
if (CallInst* CI = dyn_cast<CallInst>(&I)) {
758754
if (Function* calledFunc = CI->getCalledFunction()) {
759-
if (calledFunc->getName().equals("GenISA_fma_rtz"))
755+
if (calledFunc->getName().startswith("GenISA_fma_rtz"))
760756
{
761757
createIntrinsicCall(CI, GenISAIntrinsic::GenISA_fma_rtz);
762758
}
763-
else if (calledFunc->getName().equals("GenISA_fma_rtp"))
759+
else if (calledFunc->getName().startswith("GenISA_fma_rtp"))
764760
{
765761
createIntrinsicCall(CI, GenISAIntrinsic::GenISA_fma_rtp);
766762
}
767-
else if (calledFunc->getName().equals("GenISA_fma_rtn"))
763+
else if (calledFunc->getName().startswith("GenISA_fma_rtn"))
768764
{
769765
createIntrinsicCall(CI, GenISAIntrinsic::GenISA_fma_rtn);
770766
}
@@ -794,15 +790,15 @@ bool PreCompiledFuncImport::runOnModule(Module& M)
794790
{
795791
if (Function* calledFunc = CI->getCalledFunction())
796792
{
797-
if (calledFunc->getName().equals("GenISA_mul_rtz"))
793+
if (calledFunc->getName().startswith("GenISA_mul_rtz"))
798794
{
799795
createIntrinsicCall(CI, GenISAIntrinsic::GenISA_mul_rtz);
800796
}
801-
else if (calledFunc->getName().equals("GenISA_add_rtz"))
797+
else if (calledFunc->getName().startswith("GenISA_add_rtz"))
802798
{
803799
createIntrinsicCall(CI, GenISAIntrinsic::GenISA_add_rtz);
804800
}
805-
else if (calledFunc->getName().equals("GenISA_uitof_rtz"))
801+
else if (calledFunc->getName().startswith("GenISA_uitof_rtz"))
806802
{
807803
createIntrinsicCall(CI, GenISAIntrinsic::GenISA_uitof_rtz);
808804
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2023 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
;
9+
; REQUIRES: regkeys
10+
;
11+
; RUN: igc_opt -regkey TestIGCPreCompiledFunctions=1 -regkey ForceEmuKind=109 %s -S -o - --platformmtl --igc-precompiled-import | FileCheck %s
12+
; ------------------------------------------------
13+
; PreCompiledFuncImport
14+
;
15+
; ForceEmuKind=109 means EMU_I64DIVREM | EMU_DP_DIV_SQRT | EMU_SP_DIV | EMU_I32DIVREM_SP | EMU_FP64_FP16_CONV
16+
;
17+
; This test verifies if GenISA.fma intrinsic names clash doesn't occur when `sdiv i64` and `fdiv double`
18+
; are simultaniously emulated in the same LLVM module. Sdiv's implementation uses fma.rtz.f32, while
19+
; fdiv's implementation uses fma.rtz.f64. If IGC emulation builtins didn't add type mangling to GenISA_fma_rtz,
20+
; we would end up with incorrect bitcast instruction that would result in using float fma.rtz in places where
21+
; double version is expected.
22+
;
23+
; ------------------------------------------------
24+
25+
; CHECK-NOT: call float bitcast (double (double, double, double)* @GenISA_fma_rtz to float (float, float, float)*)(float {{.*}}, float {{.*}}, float {{.*}})
26+
27+
; CHECK-LABEL: @__igcbuiltin_dp_div_nomadm_ieee
28+
; CHECK: call double @llvm.genx.GenISA.fma.rtz.f64.f64.f64.f64(double {{.*}}, double {{.*}}, double {{.*}})
29+
30+
; CHECK-LABEL: @__igcbuiltin_s64_sdiv_sp
31+
; CHECK: call float @llvm.genx.GenISA.fma.rtz.f32.f32.f32.f32(float {{.*}}, float {{.*}}, float {{.*}})
32+
33+
define void @kernel(i64 addrspace(1)* %outA, double addrspace(1)* %outB, i64 %ix, i64 %iy, double %dx, double %dy) {
34+
entry:
35+
%iresult = sdiv i64 %ix, %iy
36+
store i64 %iresult, i64 addrspace(1)* %outA, align 8
37+
%dresult = fdiv double %dx, %dy
38+
store double %dresult, double addrspace(1)* %outB, align 8
39+
ret void
40+
}

IGC/Compiler/tests/PreCompiledFuncImport/split_and_mix_inline.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
;============================ end_copyright_notice =============================
88
; REQUIRES: regkeys
99
;
10-
; RUN: igc_opt -regkey TestIGCPreCompiledFunctions=2 -regkey InlinedEmulationThreshold=500 --platformmtl --igc-precompiled-import -S < %s | FileCheck %s
10+
; RUN: igc_opt -regkey TestIGCPreCompiledFunctions=1 -regkey ForceEmuKind=4 -regkey InlinedEmulationThreshold=500 --platformmtl --igc-precompiled-import -S < %s | FileCheck %s
1111
; ------------------------------------------------
1212
; PreCompiledFuncImport
1313
; ------------------------------------------------

IGC/Compiler/tests/PreCompiledFuncImport/split_inline_noinline.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
;============================ end_copyright_notice =============================
88
; REQUIRES: regkeys
99
;
10-
; RUN: igc_opt -regkey TestIGCPreCompiledFunctions=2 -regkey InlinedEmulationThreshold=600 --platformmtl --igc-precompiled-import -S < %s | FileCheck %s
10+
; RUN: igc_opt -regkey TestIGCPreCompiledFunctions=1 -regkey ForceEmuKind=4 -regkey InlinedEmulationThreshold=600 --platformmtl --igc-precompiled-import -S < %s | FileCheck %s
1111
; ------------------------------------------------
1212
; PreCompiledFuncImport
1313
; ------------------------------------------------

IGC/common/igc_flags.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,8 @@ DECLARE_IGC_REGKEY(bool, SToSProducesPositivePointer, false, "This key is for
302302
DECLARE_IGC_REGKEY(bool, EnableSupportBufferOffset, false, "[debugging]For StatelessToStateful optimization [OCL], support implicit buffer offset argument (same as -cl-intel-has-buffer-offset-arg).", false)
303303
DECLARE_IGC_REGKEY(bool, EnableOptionalBufferOffset, true, "For StatelessToStateful optimization [OCL], if true, make buffer offset optional. Valid only if buffer offset is supported.", true)
304304
DECLARE_IGC_REGKEY(bool, EnableTestIGCBuiltin, false, "Enable testing igc builtin (precompiled kernels) using OCL.", false)
305-
DECLARE_IGC_REGKEY(DWORD, TestIGCPreCompiledFunctions, 0, "Enable testing for precompiled kernels. [TEST ONLY]", false)
305+
DECLARE_IGC_REGKEY(bool, TestIGCPreCompiledFunctions, false, "Enable testing for precompiled kernels. [TEST ONLY]", false)
306+
DECLARE_IGC_REGKEY(DWORD, ForceEmuKind, 0, "Force emuKind used by PreCompiledFuncImport pass. This flag takes emulation kind value that is defined in EmuKind enum in PreCompiledFuncImport.hpp [TEST ONLY]", false)
306307
DECLARE_IGC_REGKEY(bool, EnableCSSIMD32, false, "Enable computer shader SIMD32 mode, and fall back to lower SIMD when spill", false)
307308
DECLARE_IGC_REGKEY(bool, ForceCSSIMD32, false, "Force computer shader SIMD32 mode", false)
308309
DECLARE_IGC_REGKEY(bool, ForceCSSIMD16, false, "Force computer shader SIMD16 mode if allowed, otherwise it will use SIMD32", false)

0 commit comments

Comments
 (0)