Skip to content

Commit 5cd825d

Browse files
committed
[SelectionDAG] Legalize <1 x T> vector types for atomic load
`load atomic <1 x T>` is not valid. This change legalizes vector types of atomic load via scalarization in SelectionDAG so that it can, for example, translate from `v1i32` to `i32`.
1 parent af2fec2 commit 5cd825d

File tree

3 files changed

+257
-9
lines changed

3 files changed

+257
-9
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -879,6 +879,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
879879
SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N);
880880
SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
881881
SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
882+
SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N);
882883
SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
883884
SDValue ScalarizeVecRes_VSELECT(SDNode *N);
884885
SDValue ScalarizeVecRes_SELECT(SDNode *N);

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
6565
R = ScalarizeVecRes_UnaryOpWithExtraInput(N);
6666
break;
6767
case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
68+
case ISD::ATOMIC_LOAD:
69+
R = ScalarizeVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N));
70+
break;
6871
case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
6972
case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
7073
case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
@@ -455,6 +458,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
455458
return Op;
456459
}
457460

461+
SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) {
462+
SDValue Result = DAG.getAtomicLoad(
463+
ISD::NON_EXTLOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(),
464+
N->getValueType(0).getVectorElementType(), N->getChain(), N->getBasePtr(),
465+
N->getMemOperand());
466+
467+
// Legalize the chain result - switch anything that used the old chain to
468+
// use the new one.
469+
ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
470+
return Result;
471+
}
472+
458473
SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
459474
assert(N->isUnindexed() && "Indexed vector load?");
460475

llvm/test/CodeGen/X86/atomic-load-store.ll

Lines changed: 241 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,CHECK-O3
3-
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-O3
4-
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-O3
5-
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-O3
3+
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-SSE-O3
4+
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX-O3
5+
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX-O3
66
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,CHECK-O0
7-
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-O0
8-
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-O0
9-
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-O0
7+
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-SSE-O0
8+
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX-O0
9+
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX-O0
1010

1111
define void @test1(ptr %ptr, i32 %val1) {
1212
; CHECK-LABEL: test1:
@@ -34,6 +34,238 @@ define i32 @test3(ptr %ptr) {
3434
%val = load atomic i32, ptr %ptr seq_cst, align 4
3535
ret i32 %val
3636
}
37-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
38-
; CHECK-O0: {{.*}}
39-
; CHECK-O3: {{.*}}
37+
38+
define <1 x i32> @atomic_vec1_i32(ptr %x) {
39+
; CHECK-LABEL: atomic_vec1_i32:
40+
; CHECK: # %bb.0:
41+
; CHECK-NEXT: movl (%rdi), %eax
42+
; CHECK-NEXT: retq
43+
%ret = load atomic <1 x i32>, ptr %x acquire, align 4
44+
ret <1 x i32> %ret
45+
}
46+
47+
define <1 x i8> @atomic_vec1_i8(ptr %x) {
48+
; CHECK-O3-LABEL: atomic_vec1_i8:
49+
; CHECK-O3: # %bb.0:
50+
; CHECK-O3-NEXT: movzbl (%rdi), %eax
51+
; CHECK-O3-NEXT: retq
52+
;
53+
; CHECK-SSE-O3-LABEL: atomic_vec1_i8:
54+
; CHECK-SSE-O3: # %bb.0:
55+
; CHECK-SSE-O3-NEXT: movzbl (%rdi), %eax
56+
; CHECK-SSE-O3-NEXT: retq
57+
;
58+
; CHECK-AVX-O3-LABEL: atomic_vec1_i8:
59+
; CHECK-AVX-O3: # %bb.0:
60+
; CHECK-AVX-O3-NEXT: movzbl (%rdi), %eax
61+
; CHECK-AVX-O3-NEXT: retq
62+
;
63+
; CHECK-O0-LABEL: atomic_vec1_i8:
64+
; CHECK-O0: # %bb.0:
65+
; CHECK-O0-NEXT: movb (%rdi), %al
66+
; CHECK-O0-NEXT: retq
67+
;
68+
; CHECK-SSE-O0-LABEL: atomic_vec1_i8:
69+
; CHECK-SSE-O0: # %bb.0:
70+
; CHECK-SSE-O0-NEXT: movb (%rdi), %al
71+
; CHECK-SSE-O0-NEXT: retq
72+
;
73+
; CHECK-AVX-O0-LABEL: atomic_vec1_i8:
74+
; CHECK-AVX-O0: # %bb.0:
75+
; CHECK-AVX-O0-NEXT: movb (%rdi), %al
76+
; CHECK-AVX-O0-NEXT: retq
77+
%ret = load atomic <1 x i8>, ptr %x acquire, align 1
78+
ret <1 x i8> %ret
79+
}
80+
81+
define <1 x i16> @atomic_vec1_i16(ptr %x) {
82+
; CHECK-O3-LABEL: atomic_vec1_i16:
83+
; CHECK-O3: # %bb.0:
84+
; CHECK-O3-NEXT: movzwl (%rdi), %eax
85+
; CHECK-O3-NEXT: retq
86+
;
87+
; CHECK-SSE-O3-LABEL: atomic_vec1_i16:
88+
; CHECK-SSE-O3: # %bb.0:
89+
; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax
90+
; CHECK-SSE-O3-NEXT: retq
91+
;
92+
; CHECK-AVX-O3-LABEL: atomic_vec1_i16:
93+
; CHECK-AVX-O3: # %bb.0:
94+
; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax
95+
; CHECK-AVX-O3-NEXT: retq
96+
;
97+
; CHECK-O0-LABEL: atomic_vec1_i16:
98+
; CHECK-O0: # %bb.0:
99+
; CHECK-O0-NEXT: movw (%rdi), %ax
100+
; CHECK-O0-NEXT: retq
101+
;
102+
; CHECK-SSE-O0-LABEL: atomic_vec1_i16:
103+
; CHECK-SSE-O0: # %bb.0:
104+
; CHECK-SSE-O0-NEXT: movw (%rdi), %ax
105+
; CHECK-SSE-O0-NEXT: retq
106+
;
107+
; CHECK-AVX-O0-LABEL: atomic_vec1_i16:
108+
; CHECK-AVX-O0: # %bb.0:
109+
; CHECK-AVX-O0-NEXT: movw (%rdi), %ax
110+
; CHECK-AVX-O0-NEXT: retq
111+
%ret = load atomic <1 x i16>, ptr %x acquire, align 2
112+
ret <1 x i16> %ret
113+
}
114+
115+
define <1 x i32> @atomic_vec1_i8_zext(ptr %x) {
116+
; CHECK-O3-LABEL: atomic_vec1_i8_zext:
117+
; CHECK-O3: # %bb.0:
118+
; CHECK-O3-NEXT: movzbl (%rdi), %eax
119+
; CHECK-O3-NEXT: movzbl %al, %eax
120+
; CHECK-O3-NEXT: retq
121+
;
122+
; CHECK-SSE-O3-LABEL: atomic_vec1_i8_zext:
123+
; CHECK-SSE-O3: # %bb.0:
124+
; CHECK-SSE-O3-NEXT: movzbl (%rdi), %eax
125+
; CHECK-SSE-O3-NEXT: movzbl %al, %eax
126+
; CHECK-SSE-O3-NEXT: retq
127+
;
128+
; CHECK-AVX-O3-LABEL: atomic_vec1_i8_zext:
129+
; CHECK-AVX-O3: # %bb.0:
130+
; CHECK-AVX-O3-NEXT: movzbl (%rdi), %eax
131+
; CHECK-AVX-O3-NEXT: movzbl %al, %eax
132+
; CHECK-AVX-O3-NEXT: retq
133+
;
134+
; CHECK-O0-LABEL: atomic_vec1_i8_zext:
135+
; CHECK-O0: # %bb.0:
136+
; CHECK-O0-NEXT: movb (%rdi), %al
137+
; CHECK-O0-NEXT: movzbl %al, %eax
138+
; CHECK-O0-NEXT: retq
139+
;
140+
; CHECK-SSE-O0-LABEL: atomic_vec1_i8_zext:
141+
; CHECK-SSE-O0: # %bb.0:
142+
; CHECK-SSE-O0-NEXT: movb (%rdi), %al
143+
; CHECK-SSE-O0-NEXT: movzbl %al, %eax
144+
; CHECK-SSE-O0-NEXT: retq
145+
;
146+
; CHECK-AVX-O0-LABEL: atomic_vec1_i8_zext:
147+
; CHECK-AVX-O0: # %bb.0:
148+
; CHECK-AVX-O0-NEXT: movb (%rdi), %al
149+
; CHECK-AVX-O0-NEXT: movzbl %al, %eax
150+
; CHECK-AVX-O0-NEXT: retq
151+
%ret = load atomic <1 x i8>, ptr %x acquire, align 1
152+
%zret = zext <1 x i8> %ret to <1 x i32>
153+
ret <1 x i32> %zret
154+
}
155+
156+
define <1 x i64> @atomic_vec1_i16_sext(ptr %x) {
157+
; CHECK-O3-LABEL: atomic_vec1_i16_sext:
158+
; CHECK-O3: # %bb.0:
159+
; CHECK-O3-NEXT: movzwl (%rdi), %eax
160+
; CHECK-O3-NEXT: movswq %ax, %rax
161+
; CHECK-O3-NEXT: retq
162+
;
163+
; CHECK-SSE-O3-LABEL: atomic_vec1_i16_sext:
164+
; CHECK-SSE-O3: # %bb.0:
165+
; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax
166+
; CHECK-SSE-O3-NEXT: movswq %ax, %rax
167+
; CHECK-SSE-O3-NEXT: retq
168+
;
169+
; CHECK-AVX-O3-LABEL: atomic_vec1_i16_sext:
170+
; CHECK-AVX-O3: # %bb.0:
171+
; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax
172+
; CHECK-AVX-O3-NEXT: movswq %ax, %rax
173+
; CHECK-AVX-O3-NEXT: retq
174+
;
175+
; CHECK-O0-LABEL: atomic_vec1_i16_sext:
176+
; CHECK-O0: # %bb.0:
177+
; CHECK-O0-NEXT: movw (%rdi), %ax
178+
; CHECK-O0-NEXT: movswq %ax, %rax
179+
; CHECK-O0-NEXT: retq
180+
;
181+
; CHECK-SSE-O0-LABEL: atomic_vec1_i16_sext:
182+
; CHECK-SSE-O0: # %bb.0:
183+
; CHECK-SSE-O0-NEXT: movw (%rdi), %ax
184+
; CHECK-SSE-O0-NEXT: movswq %ax, %rax
185+
; CHECK-SSE-O0-NEXT: retq
186+
;
187+
; CHECK-AVX-O0-LABEL: atomic_vec1_i16_sext:
188+
; CHECK-AVX-O0: # %bb.0:
189+
; CHECK-AVX-O0-NEXT: movw (%rdi), %ax
190+
; CHECK-AVX-O0-NEXT: movswq %ax, %rax
191+
; CHECK-AVX-O0-NEXT: retq
192+
%ret = load atomic <1 x i16>, ptr %x acquire, align 2
193+
%sret = sext <1 x i16> %ret to <1 x i64>
194+
ret <1 x i64> %sret
195+
}
196+
197+
define <1 x ptr addrspace(270)> @atomic_vec1_ptr270(ptr %x) {
198+
; CHECK-LABEL: atomic_vec1_ptr270:
199+
; CHECK: # %bb.0:
200+
; CHECK-NEXT: movl (%rdi), %eax
201+
; CHECK-NEXT: retq
202+
%ret = load atomic <1 x ptr addrspace(270)>, ptr %x acquire, align 4
203+
ret <1 x ptr addrspace(270)> %ret
204+
}
205+
206+
define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
207+
; CHECK-O3-LABEL: atomic_vec1_bfloat:
208+
; CHECK-O3: # %bb.0:
209+
; CHECK-O3-NEXT: movzwl (%rdi), %eax
210+
; CHECK-O3-NEXT: pinsrw $0, %eax, %xmm0
211+
; CHECK-O3-NEXT: retq
212+
;
213+
; CHECK-SSE-O3-LABEL: atomic_vec1_bfloat:
214+
; CHECK-SSE-O3: # %bb.0:
215+
; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax
216+
; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm0
217+
; CHECK-SSE-O3-NEXT: retq
218+
;
219+
; CHECK-AVX-O3-LABEL: atomic_vec1_bfloat:
220+
; CHECK-AVX-O3: # %bb.0:
221+
; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax
222+
; CHECK-AVX-O3-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
223+
; CHECK-AVX-O3-NEXT: retq
224+
;
225+
; CHECK-O0-LABEL: atomic_vec1_bfloat:
226+
; CHECK-O0: # %bb.0:
227+
; CHECK-O0-NEXT: movw (%rdi), %cx
228+
; CHECK-O0-NEXT: # implicit-def: $eax
229+
; CHECK-O0-NEXT: movw %cx, %ax
230+
; CHECK-O0-NEXT: # implicit-def: $xmm0
231+
; CHECK-O0-NEXT: pinsrw $0, %eax, %xmm0
232+
; CHECK-O0-NEXT: retq
233+
;
234+
; CHECK-SSE-O0-LABEL: atomic_vec1_bfloat:
235+
; CHECK-SSE-O0: # %bb.0:
236+
; CHECK-SSE-O0-NEXT: movw (%rdi), %cx
237+
; CHECK-SSE-O0-NEXT: # implicit-def: $eax
238+
; CHECK-SSE-O0-NEXT: movw %cx, %ax
239+
; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0
240+
; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm0
241+
; CHECK-SSE-O0-NEXT: retq
242+
;
243+
; CHECK-AVX-O0-LABEL: atomic_vec1_bfloat:
244+
; CHECK-AVX-O0: # %bb.0:
245+
; CHECK-AVX-O0-NEXT: movw (%rdi), %cx
246+
; CHECK-AVX-O0-NEXT: # implicit-def: $eax
247+
; CHECK-AVX-O0-NEXT: movw %cx, %ax
248+
; CHECK-AVX-O0-NEXT: # implicit-def: $xmm0
249+
; CHECK-AVX-O0-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
250+
; CHECK-AVX-O0-NEXT: retq
251+
%ret = load atomic <1 x bfloat>, ptr %x acquire, align 2
252+
ret <1 x bfloat> %ret
253+
}
254+
255+
define <1 x ptr> @atomic_vec1_ptr_align(ptr %x) nounwind {
256+
; CHECK-LABEL: atomic_vec1_ptr_align:
257+
; CHECK: # %bb.0:
258+
; CHECK-NEXT: movq (%rdi), %rax
259+
; CHECK-NEXT: retq
260+
%ret = load atomic <1 x ptr>, ptr %x acquire, align 8
261+
ret <1 x ptr> %ret
262+
}
263+
264+
define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
265+
; CHECK-LABEL: atomic_vec1_i64_align:
266+
; CHECK: # %bb.0:
267+
; CHECK-NEXT: movq (%rdi), %rax
268+
; CHECK-NEXT: retq
269+
%ret = load atomic <1 x i64>, ptr %x acquire, align 8
270+
ret <1 x i64> %ret
271+
}

0 commit comments

Comments
 (0)