Skip to content

Commit a7c6c64

Browse files
committed
[X86][LWP] Add clang support for LWP instructions.
This patch adds support for the the LightWeight Profiling (LWP) instructions which are available on all AMD Bulldozer class CPUs (bdver1 to bdver4). Differential Revision: https://reviews.llvm.org/D32770 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@302418 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 0d0d7d1 commit a7c6c64

File tree

8 files changed

+212
-0
lines changed

8 files changed

+212
-0
lines changed

include/clang/Basic/BuiltinsX86.def

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -668,6 +668,12 @@ TARGET_BUILTIN(__builtin_ia32_pext_si, "UiUiUi", "", "bmi2")
668668
// TBM
669669
TARGET_BUILTIN(__builtin_ia32_bextri_u32, "UiUiIUi", "", "tbm")
670670

671+
// LWP
672+
TARGET_BUILTIN(__builtin_ia32_llwpcb, "vv*", "", "lwp")
673+
TARGET_BUILTIN(__builtin_ia32_slwpcb, "v*", "", "lwp")
674+
TARGET_BUILTIN(__builtin_ia32_lwpins32, "UcUiUiUi", "", "lwp")
675+
TARGET_BUILTIN(__builtin_ia32_lwpval32, "vUiUiUi", "", "lwp")
676+
671677
// SHA
672678
TARGET_BUILTIN(__builtin_ia32_sha1rnds4, "V4iV4iV4iIc", "", "sha")
673679
TARGET_BUILTIN(__builtin_ia32_sha1nexte, "V4iV4iV4i", "", "sha")

include/clang/Basic/BuiltinsX86_64.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ TARGET_BUILTIN(__builtin_ia32_bzhi_di, "ULLiULLiULLi", "", "bmi2")
6969
TARGET_BUILTIN(__builtin_ia32_pdep_di, "ULLiULLiULLi", "", "bmi2")
7070
TARGET_BUILTIN(__builtin_ia32_pext_di, "ULLiULLiULLi", "", "bmi2")
7171
TARGET_BUILTIN(__builtin_ia32_bextri_u64, "ULLiULLiIULLi", "", "tbm")
72+
TARGET_BUILTIN(__builtin_ia32_lwpins64, "UcULLiUiUi", "", "lwp")
73+
TARGET_BUILTIN(__builtin_ia32_lwpval64, "vULLiUiUi", "", "lwp")
7274
TARGET_BUILTIN(__builtin_ia32_pbroadcastq512_gpr_mask, "V8LLiLLiV8LLiUc", "", "avx512f")
7375
TARGET_BUILTIN(__builtin_ia32_pbroadcastq128_gpr_mask, "V2LLiULLiV2LLiUc","","avx512vl")
7476
TARGET_BUILTIN(__builtin_ia32_pbroadcastq256_gpr_mask, "V4LLiULLiV4LLiUc","","avx512vl")

include/clang/Driver/Options.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1752,6 +1752,7 @@ def mno_bmi : Flag<["-"], "mno-bmi">, Group<m_x86_Features_Group>;
17521752
def mno_bmi2 : Flag<["-"], "mno-bmi2">, Group<m_x86_Features_Group>;
17531753
def mno_popcnt : Flag<["-"], "mno-popcnt">, Group<m_x86_Features_Group>;
17541754
def mno_tbm : Flag<["-"], "mno-tbm">, Group<m_x86_Features_Group>;
1755+
def mno_lwp : Flag<["-"], "mno-lwp">, Group<m_x86_Features_Group>;
17551756
def mno_fma4 : Flag<["-"], "mno-fma4">, Group<m_x86_Features_Group>;
17561757
def mno_fma : Flag<["-"], "mno-fma">, Group<m_x86_Features_Group>;
17571758
def mno_xop : Flag<["-"], "mno-xop">, Group<m_x86_Features_Group>;
@@ -1951,6 +1952,7 @@ def mbmi : Flag<["-"], "mbmi">, Group<m_x86_Features_Group>;
19511952
def mbmi2 : Flag<["-"], "mbmi2">, Group<m_x86_Features_Group>;
19521953
def mpopcnt : Flag<["-"], "mpopcnt">, Group<m_x86_Features_Group>;
19531954
def mtbm : Flag<["-"], "mtbm">, Group<m_x86_Features_Group>;
1955+
def mlwp : Flag<["-"], "mlwp">, Group<m_x86_Features_Group>;
19541956
def mfma4 : Flag<["-"], "mfma4">, Group<m_x86_Features_Group>;
19551957
def mfma : Flag<["-"], "mfma">, Group<m_x86_Features_Group>;
19561958
def mxop : Flag<["-"], "mxop">, Group<m_x86_Features_Group>;

lib/Basic/Targets.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2591,6 +2591,7 @@ class X86TargetInfo : public TargetInfo {
25912591
bool HasRDSEED = false;
25922592
bool HasADX = false;
25932593
bool HasTBM = false;
2594+
bool HasLWP = false;
25942595
bool HasFMA = false;
25952596
bool HasF16C = false;
25962597
bool HasAVX512CD = false;
@@ -3363,6 +3364,7 @@ bool X86TargetInfo::initFeatureMap(
33633364
case CK_BDVER1:
33643365
// xop implies avx, sse4a and fma4.
33653366
setFeatureEnabledImpl(Features, "xop", true);
3367+
setFeatureEnabledImpl(Features, "lwp", true);
33663368
setFeatureEnabledImpl(Features, "lzcnt", true);
33673369
setFeatureEnabledImpl(Features, "aes", true);
33683370
setFeatureEnabledImpl(Features, "pclmul", true);
@@ -3634,6 +3636,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
36343636
HasADX = true;
36353637
} else if (Feature == "+tbm") {
36363638
HasTBM = true;
3639+
} else if (Feature == "+lwp") {
3640+
HasLWP = true;
36373641
} else if (Feature == "+fma") {
36383642
HasFMA = true;
36393643
} else if (Feature == "+f16c") {
@@ -3949,6 +3953,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
39493953
if (HasTBM)
39503954
Builder.defineMacro("__TBM__");
39513955

3956+
if (HasLWP)
3957+
Builder.defineMacro("__LWP__");
3958+
39523959
if (HasMWAITX)
39533960
Builder.defineMacro("__MWAITX__");
39543961

@@ -4132,6 +4139,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
41324139
.Case("sse4.2", SSELevel >= SSE42)
41334140
.Case("sse4a", XOPLevel >= SSE4A)
41344141
.Case("tbm", HasTBM)
4142+
.Case("lwp", HasLWP)
41354143
.Case("x86", true)
41364144
.Case("x86_32", getTriple().getArch() == llvm::Triple::x86)
41374145
.Case("x86_64", getTriple().getArch() == llvm::Triple::x86_64)

lib/Headers/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ set(files
4545
inttypes.h
4646
iso646.h
4747
limits.h
48+
lwpintrin.h
4849
lzcntintrin.h
4950
mm3dnow.h
5051
mmintrin.h

lib/Headers/lwpintrin.h

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
/*===---- lwpintrin.h - LWP intrinsics -------------------------------------===
2+
*
3+
* Permission is hereby granted, free of charge, to any person obtaining a copy
4+
* of this software and associated documentation files (the "Software"), to deal
5+
* in the Software without restriction, including without limitation the rights
6+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7+
* copies of the Software, and to permit persons to whom the Software is
8+
* furnished to do so, subject to the following conditions:
9+
*
10+
* The above copyright notice and this permission notice shall be included in
11+
* all copies or substantial portions of the Software.
12+
*
13+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19+
* THE SOFTWARE.
20+
*
21+
*===-----------------------------------------------------------------------===
22+
*/
23+
24+
#ifndef __X86INTRIN_H
25+
#error "Never use <lwpintrin.h> directly; include <x86intrin.h> instead."
26+
#endif
27+
28+
#ifndef __LWPINTRIN_H
29+
#define __LWPINTRIN_H
30+
31+
/* Define the default attributes for the functions in this file. */
32+
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lwp")))
33+
34+
/// \brief Parses the LWPCB at the specified address and enables
35+
/// profiling if valid.
36+
///
37+
/// \headerfile <x86intrin.h>
38+
///
39+
/// This intrinsic corresponds to the <c> LLWPCB </c> instruction.
40+
///
41+
/// \param __addr
42+
/// Address to the new Lightweight Profiling Control Block (LWPCB). If the
43+
/// LWPCB is valid, writes the address into the LWP_CBADDR MSR and enables
44+
/// Lightweight Profiling.
45+
static __inline__ void __DEFAULT_FN_ATTRS
46+
__llwpcb (void *__addr)
47+
{
48+
__builtin_ia32_llwpcb(__addr);
49+
}
50+
51+
/// \brief Flushes the LWP state to memory and returns the address of the LWPCB.
52+
///
53+
/// \headerfile <x86intrin.h>
54+
///
55+
/// This intrinsic corresponds to the <c> SLWPCB </c> instruction.
56+
///
57+
/// \return
58+
/// Address to the current Lightweight Profiling Control Block (LWPCB).
59+
/// If LWP is not currently enabled, returns NULL.
60+
static __inline__ void* __DEFAULT_FN_ATTRS
61+
__slwpcb ()
62+
{
63+
return __builtin_ia32_slwpcb();
64+
}
65+
66+
/// \brief Inserts programmed event record into the LWP event ring buffer
67+
/// and advances the ring buffer pointer.
68+
///
69+
/// \headerfile <x86intrin.h>
70+
///
71+
/// This intrinsic corresponds to the <c> LWPINS </c> instruction.
72+
///
73+
/// \param DATA2
74+
/// A 32-bit value is zero-extended and inserted into the 64-bit Data2 field.
75+
/// \param DATA1
76+
/// A 32-bit value is inserted into the 32-bit Data1 field.
77+
/// \param FLAGS
78+
/// A 32-bit immediate value is inserted into the 32-bit Flags field.
79+
/// \returns If the ring buffer is full and LWP is running in Synchronized Mode,
80+
/// the event record overwrites the last record in the buffer, the MissedEvents
81+
/// counter in the LWPCB is incremented, the head pointer is not advanced, and
82+
/// 1 is returned. Otherwise 0 is returned.
83+
#define __lwpins32(DATA2, DATA1, FLAGS) \
84+
(__builtin_ia32_lwpins32((unsigned int) (DATA2), (unsigned int) (DATA1), \
85+
(unsigned int) (FLAGS)))
86+
87+
/// \brief Decrements the LWP programmed value sample event counter. If the result is
88+
/// negative, inserts an event record into the LWP event ring buffer in memory
89+
/// and advances the ring buffer pointer.
90+
///
91+
/// \headerfile <x86intrin.h>
92+
///
93+
/// This intrinsic corresponds to the <c> LWPVAL </c> instruction.
94+
///
95+
/// \param DATA2
96+
/// A 32-bit value is zero-extended and inserted into the 64-bit Data2 field.
97+
/// \param DATA1
98+
/// A 32-bit value is inserted into the 32-bit Data1 field.
99+
/// \param FLAGS
100+
/// A 32-bit immediate value is inserted into the 32-bit Flags field.
101+
#define __lwpval32(DATA2, DATA1, FLAGS) \
102+
(__builtin_ia32_lwpval32((unsigned int) (DATA2), (unsigned int) (DATA1), \
103+
(unsigned int) (FLAGS)))
104+
105+
#ifdef __x86_64__
106+
107+
/// \brief Inserts programmed event record into the LWP event ring buffer
108+
/// and advances the ring buffer pointer.
109+
///
110+
/// \headerfile <x86intrin.h>
111+
///
112+
/// This intrinsic corresponds to the <c> LWPINS </c> instruction.
113+
///
114+
/// \param DATA2
115+
/// A 64-bit value is inserted into the 64-bit Data2 field.
116+
/// \param DATA1
117+
/// A 32-bit value is inserted into the 32-bit Data1 field.
118+
/// \param FLAGS
119+
/// A 32-bit immediate value is inserted into the 32-bit Flags field.
120+
/// \returns If the ring buffer is full and LWP is running in Synchronized Mode,
121+
/// the event record overwrites the last record in the buffer, the MissedEvents
122+
/// counter in the LWPCB is incremented, the head pointer is not advanced, and
123+
/// 1 is returned. Otherwise 0 is returned.
124+
#define __lwpins64(DATA2, DATA1, FLAGS) \
125+
(__builtin_ia32_lwpins64((unsigned long long) (DATA2), (unsigned int) (DATA1), \
126+
(unsigned int) (FLAGS)))
127+
128+
/// \brief Decrements the LWP programmed value sample event counter. If the result is
129+
/// negative, inserts an event record into the LWP event ring buffer in memory
130+
/// and advances the ring buffer pointer.
131+
///
132+
/// \headerfile <x86intrin.h>
133+
///
134+
/// This intrinsic corresponds to the <c> LWPVAL </c> instruction.
135+
///
136+
/// \param DATA2
137+
/// A 64-bit value is and inserted into the 64-bit Data2 field.
138+
/// \param DATA1
139+
/// A 32-bit value is inserted into the 32-bit Data1 field.
140+
/// \param FLAGS
141+
/// A 32-bit immediate value is inserted into the 32-bit Flags field.
142+
#define __lwpval64(DATA2, DATA1, FLAGS) \
143+
(__builtin_ia32_lwpval64((unsigned long long) (DATA2), (unsigned int) (DATA1), \
144+
(unsigned int) (FLAGS)))
145+
146+
#endif
147+
148+
#undef __DEFAULT_FN_ATTRS
149+
150+
#endif /* __LWPINTRIN_H */

lib/Headers/x86intrin.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,10 @@
7272
#include <tbmintrin.h>
7373
#endif
7474

75+
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LWP__)
76+
#include <lwpintrin.h>
77+
#endif
78+
7579
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__)
7680
#include <f16cintrin.h>
7781
#endif

test/CodeGen/lwp-builtins.c

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +lwp -emit-llvm -o - -Wall -Werror | FileCheck %s
2+
3+
#include <x86intrin.h>
4+
5+
void test_llwpcb(void *ptr) {
6+
// CHECK-LABEL: @test_llwpcb
7+
// CHECK: call void @llvm.x86.llwpcb(i8* %{{.*}})
8+
__llwpcb(ptr);
9+
}
10+
11+
void* test_slwpcb() {
12+
// CHECK-LABEL: @test_slwpcb
13+
// CHECK: call i8* @llvm.x86.slwpcb()
14+
return __slwpcb();
15+
}
16+
17+
unsigned char test_lwpins32(unsigned val2, unsigned val1) {
18+
// CHECK-LABEL: @test_lwpins32
19+
// CHECK: call i8 @llvm.x86.lwpins32(i32
20+
return __lwpins32(val2, val1, 0x01234);
21+
}
22+
23+
unsigned char test_lwpins64(unsigned long long val2, unsigned val1) {
24+
// CHECK-LABEL: @test_lwpins64
25+
// CHECK: call i8 @llvm.x86.lwpins64(i64
26+
return __lwpins64(val2, val1, 0x56789);
27+
}
28+
29+
void test_lwpval32(unsigned val2, unsigned val1) {
30+
// CHECK-LABEL: @test_lwpval32
31+
// CHECK: call void @llvm.x86.lwpval32(i32
32+
__lwpval32(val2, val1, 0x01234);
33+
}
34+
35+
void test_lwpval64(unsigned long long val2, unsigned val1) {
36+
// CHECK-LABEL: @test_lwpval64
37+
// CHECK: call void @llvm.x86.lwpval64(i64
38+
__lwpval64(val2, val1, 0xABCDEF);
39+
}

0 commit comments

Comments
 (0)