-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[SelectionDAG] Legalize <1 x T> vector types for atomic load #148894
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/jofrn/gt/07-15-ir/verifier_allow_vector_type_in_atomic_load_and_store
Are you sure you want to change the base?
Conversation
Warning This pull request is not mergeable via GitHub because a downstack PR is open. Once all requirements are satisfied, merge this PR as a stack on Graphite.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-x86 Author: None (jofrn) Changes
Full diff: https://github.com/llvm/llvm-project/pull/148894.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index dd9af47da5287..d24b4517a460d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -879,6 +879,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N);
SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
+ SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N);
SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
SDValue ScalarizeVecRes_VSELECT(SDNode *N);
SDValue ScalarizeVecRes_SELECT(SDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 4d844f0036a75..d6cbf2211f053 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -65,6 +65,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
R = ScalarizeVecRes_UnaryOpWithExtraInput(N);
break;
case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::ATOMIC_LOAD:
+ R = ScalarizeVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N));
+ break;
case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
@@ -455,6 +458,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
return Op;
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) {
+ SDValue Result = DAG.getAtomicLoad(
+ ISD::NON_EXTLOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(),
+ N->getValueType(0).getVectorElementType(), N->getChain(), N->getBasePtr(),
+ N->getMemOperand());
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
+ return Result;
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
assert(N->isUnindexed() && "Indexed vector load?");
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 45277ce3d26c4..4f5cb5a4e9247 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -1,12 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,CHECK-O3
-; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-O3
-; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-O3
-; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-O3
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-SSE-O3
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX-O3
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX-O3
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,CHECK-O0
-; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-O0
-; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-O0
-; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-O0
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-SSE-O0
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX-O0
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX-O0
define void @test1(ptr %ptr, i32 %val1) {
; CHECK-LABEL: test1:
@@ -34,6 +34,238 @@ define i32 @test3(ptr %ptr) {
%val = load atomic i32, ptr %ptr seq_cst, align 4
ret i32 %val
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-O0: {{.*}}
-; CHECK-O3: {{.*}}
+
+define <1 x i32> @atomic_vec1_i32(ptr %x) {
+; CHECK-LABEL: atomic_vec1_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl (%rdi), %eax
+; CHECK-NEXT: retq
+ %ret = load atomic <1 x i32>, ptr %x acquire, align 4
+ ret <1 x i32> %ret
+}
+
+define <1 x i8> @atomic_vec1_i8(ptr %x) {
+; CHECK-O3-LABEL: atomic_vec1_i8:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movzbl (%rdi), %eax
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec1_i8:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movzbl (%rdi), %eax
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec1_i8:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: movzbl (%rdi), %eax
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec1_i8:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movb (%rdi), %al
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec1_i8:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movb (%rdi), %al
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec1_i8:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: movb (%rdi), %al
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <1 x i8>, ptr %x acquire, align 1
+ ret <1 x i8> %ret
+}
+
+define <1 x i16> @atomic_vec1_i16(ptr %x) {
+; CHECK-O3-LABEL: atomic_vec1_i16:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movzwl (%rdi), %eax
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec1_i16:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec1_i16:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec1_i16:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movw (%rdi), %ax
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec1_i16:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movw (%rdi), %ax
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec1_i16:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: movw (%rdi), %ax
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <1 x i16>, ptr %x acquire, align 2
+ ret <1 x i16> %ret
+}
+
+define <1 x i32> @atomic_vec1_i8_zext(ptr %x) {
+; CHECK-O3-LABEL: atomic_vec1_i8_zext:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movzbl (%rdi), %eax
+; CHECK-O3-NEXT: movzbl %al, %eax
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec1_i8_zext:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movzbl (%rdi), %eax
+; CHECK-SSE-O3-NEXT: movzbl %al, %eax
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec1_i8_zext:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: movzbl (%rdi), %eax
+; CHECK-AVX-O3-NEXT: movzbl %al, %eax
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec1_i8_zext:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movb (%rdi), %al
+; CHECK-O0-NEXT: movzbl %al, %eax
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec1_i8_zext:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movb (%rdi), %al
+; CHECK-SSE-O0-NEXT: movzbl %al, %eax
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec1_i8_zext:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: movb (%rdi), %al
+; CHECK-AVX-O0-NEXT: movzbl %al, %eax
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <1 x i8>, ptr %x acquire, align 1
+ %zret = zext <1 x i8> %ret to <1 x i32>
+ ret <1 x i32> %zret
+}
+
+define <1 x i64> @atomic_vec1_i16_sext(ptr %x) {
+; CHECK-O3-LABEL: atomic_vec1_i16_sext:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movzwl (%rdi), %eax
+; CHECK-O3-NEXT: movswq %ax, %rax
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec1_i16_sext:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax
+; CHECK-SSE-O3-NEXT: movswq %ax, %rax
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec1_i16_sext:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax
+; CHECK-AVX-O3-NEXT: movswq %ax, %rax
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec1_i16_sext:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movw (%rdi), %ax
+; CHECK-O0-NEXT: movswq %ax, %rax
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec1_i16_sext:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movw (%rdi), %ax
+; CHECK-SSE-O0-NEXT: movswq %ax, %rax
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec1_i16_sext:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: movw (%rdi), %ax
+; CHECK-AVX-O0-NEXT: movswq %ax, %rax
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <1 x i16>, ptr %x acquire, align 2
+ %sret = sext <1 x i16> %ret to <1 x i64>
+ ret <1 x i64> %sret
+}
+
+define <1 x ptr addrspace(270)> @atomic_vec1_ptr270(ptr %x) {
+; CHECK-LABEL: atomic_vec1_ptr270:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl (%rdi), %eax
+; CHECK-NEXT: retq
+ %ret = load atomic <1 x ptr addrspace(270)>, ptr %x acquire, align 4
+ ret <1 x ptr addrspace(270)> %ret
+}
+
+define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
+; CHECK-O3-LABEL: atomic_vec1_bfloat:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movzwl (%rdi), %eax
+; CHECK-O3-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec1_bfloat:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax
+; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec1_bfloat:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax
+; CHECK-AVX-O3-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec1_bfloat:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movw (%rdi), %cx
+; CHECK-O0-NEXT: # implicit-def: $eax
+; CHECK-O0-NEXT: movw %cx, %ax
+; CHECK-O0-NEXT: # implicit-def: $xmm0
+; CHECK-O0-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec1_bfloat:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movw (%rdi), %cx
+; CHECK-SSE-O0-NEXT: # implicit-def: $eax
+; CHECK-SSE-O0-NEXT: movw %cx, %ax
+; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0
+; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec1_bfloat:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: movw (%rdi), %cx
+; CHECK-AVX-O0-NEXT: # implicit-def: $eax
+; CHECK-AVX-O0-NEXT: movw %cx, %ax
+; CHECK-AVX-O0-NEXT: # implicit-def: $xmm0
+; CHECK-AVX-O0-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <1 x bfloat>, ptr %x acquire, align 2
+ ret <1 x bfloat> %ret
+}
+
+define <1 x ptr> @atomic_vec1_ptr_align(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec1_ptr_align:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq (%rdi), %rax
+; CHECK-NEXT: retq
+ %ret = load atomic <1 x ptr>, ptr %x acquire, align 8
+ ret <1 x ptr> %ret
+}
+
+define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec1_i64_align:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq (%rdi), %rax
+; CHECK-NEXT: retq
+ %ret = load atomic <1 x i64>, ptr %x acquire, align 8
+ ret <1 x i64> %ret
+}
|
@llvm/pr-subscribers-llvm-selectiondag Author: None (jofrn) Changes
Full diff: https://github.com/llvm/llvm-project/pull/148894.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index dd9af47da5287..d24b4517a460d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -879,6 +879,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N);
SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
+ SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N);
SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
SDValue ScalarizeVecRes_VSELECT(SDNode *N);
SDValue ScalarizeVecRes_SELECT(SDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 4d844f0036a75..d6cbf2211f053 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -65,6 +65,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
R = ScalarizeVecRes_UnaryOpWithExtraInput(N);
break;
case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::ATOMIC_LOAD:
+ R = ScalarizeVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N));
+ break;
case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
@@ -455,6 +458,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
return Op;
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) {
+ SDValue Result = DAG.getAtomicLoad(
+ ISD::NON_EXTLOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(),
+ N->getValueType(0).getVectorElementType(), N->getChain(), N->getBasePtr(),
+ N->getMemOperand());
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
+ return Result;
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
assert(N->isUnindexed() && "Indexed vector load?");
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 45277ce3d26c4..4f5cb5a4e9247 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -1,12 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,CHECK-O3
-; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-O3
-; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-O3
-; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-O3
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-SSE-O3
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX-O3
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX-O3
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,CHECK-O0
-; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-O0
-; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-O0
-; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-O0
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,CHECK-SSE-O0
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX-O0
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -O0 -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX-O0
define void @test1(ptr %ptr, i32 %val1) {
; CHECK-LABEL: test1:
@@ -34,6 +34,238 @@ define i32 @test3(ptr %ptr) {
%val = load atomic i32, ptr %ptr seq_cst, align 4
ret i32 %val
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-O0: {{.*}}
-; CHECK-O3: {{.*}}
+
+define <1 x i32> @atomic_vec1_i32(ptr %x) {
+; CHECK-LABEL: atomic_vec1_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl (%rdi), %eax
+; CHECK-NEXT: retq
+ %ret = load atomic <1 x i32>, ptr %x acquire, align 4
+ ret <1 x i32> %ret
+}
+
+define <1 x i8> @atomic_vec1_i8(ptr %x) {
+; CHECK-O3-LABEL: atomic_vec1_i8:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movzbl (%rdi), %eax
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec1_i8:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movzbl (%rdi), %eax
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec1_i8:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: movzbl (%rdi), %eax
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec1_i8:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movb (%rdi), %al
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec1_i8:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movb (%rdi), %al
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec1_i8:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: movb (%rdi), %al
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <1 x i8>, ptr %x acquire, align 1
+ ret <1 x i8> %ret
+}
+
+define <1 x i16> @atomic_vec1_i16(ptr %x) {
+; CHECK-O3-LABEL: atomic_vec1_i16:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movzwl (%rdi), %eax
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec1_i16:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec1_i16:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec1_i16:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movw (%rdi), %ax
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec1_i16:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movw (%rdi), %ax
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec1_i16:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: movw (%rdi), %ax
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <1 x i16>, ptr %x acquire, align 2
+ ret <1 x i16> %ret
+}
+
+define <1 x i32> @atomic_vec1_i8_zext(ptr %x) {
+; CHECK-O3-LABEL: atomic_vec1_i8_zext:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movzbl (%rdi), %eax
+; CHECK-O3-NEXT: movzbl %al, %eax
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec1_i8_zext:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movzbl (%rdi), %eax
+; CHECK-SSE-O3-NEXT: movzbl %al, %eax
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec1_i8_zext:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: movzbl (%rdi), %eax
+; CHECK-AVX-O3-NEXT: movzbl %al, %eax
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec1_i8_zext:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movb (%rdi), %al
+; CHECK-O0-NEXT: movzbl %al, %eax
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec1_i8_zext:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movb (%rdi), %al
+; CHECK-SSE-O0-NEXT: movzbl %al, %eax
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec1_i8_zext:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: movb (%rdi), %al
+; CHECK-AVX-O0-NEXT: movzbl %al, %eax
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <1 x i8>, ptr %x acquire, align 1
+ %zret = zext <1 x i8> %ret to <1 x i32>
+ ret <1 x i32> %zret
+}
+
+define <1 x i64> @atomic_vec1_i16_sext(ptr %x) {
+; CHECK-O3-LABEL: atomic_vec1_i16_sext:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movzwl (%rdi), %eax
+; CHECK-O3-NEXT: movswq %ax, %rax
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec1_i16_sext:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax
+; CHECK-SSE-O3-NEXT: movswq %ax, %rax
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec1_i16_sext:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax
+; CHECK-AVX-O3-NEXT: movswq %ax, %rax
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec1_i16_sext:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movw (%rdi), %ax
+; CHECK-O0-NEXT: movswq %ax, %rax
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec1_i16_sext:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movw (%rdi), %ax
+; CHECK-SSE-O0-NEXT: movswq %ax, %rax
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec1_i16_sext:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: movw (%rdi), %ax
+; CHECK-AVX-O0-NEXT: movswq %ax, %rax
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <1 x i16>, ptr %x acquire, align 2
+ %sret = sext <1 x i16> %ret to <1 x i64>
+ ret <1 x i64> %sret
+}
+
+define <1 x ptr addrspace(270)> @atomic_vec1_ptr270(ptr %x) {
+; CHECK-LABEL: atomic_vec1_ptr270:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl (%rdi), %eax
+; CHECK-NEXT: retq
+ %ret = load atomic <1 x ptr addrspace(270)>, ptr %x acquire, align 4
+ ret <1 x ptr addrspace(270)> %ret
+}
+
+define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
+; CHECK-O3-LABEL: atomic_vec1_bfloat:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movzwl (%rdi), %eax
+; CHECK-O3-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec1_bfloat:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax
+; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec1_bfloat:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax
+; CHECK-AVX-O3-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec1_bfloat:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movw (%rdi), %cx
+; CHECK-O0-NEXT: # implicit-def: $eax
+; CHECK-O0-NEXT: movw %cx, %ax
+; CHECK-O0-NEXT: # implicit-def: $xmm0
+; CHECK-O0-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec1_bfloat:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movw (%rdi), %cx
+; CHECK-SSE-O0-NEXT: # implicit-def: $eax
+; CHECK-SSE-O0-NEXT: movw %cx, %ax
+; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0
+; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec1_bfloat:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: movw (%rdi), %cx
+; CHECK-AVX-O0-NEXT: # implicit-def: $eax
+; CHECK-AVX-O0-NEXT: movw %cx, %ax
+; CHECK-AVX-O0-NEXT: # implicit-def: $xmm0
+; CHECK-AVX-O0-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <1 x bfloat>, ptr %x acquire, align 2
+ ret <1 x bfloat> %ret
+}
+
+define <1 x ptr> @atomic_vec1_ptr_align(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec1_ptr_align:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq (%rdi), %rax
+; CHECK-NEXT: retq
+ %ret = load atomic <1 x ptr>, ptr %x acquire, align 8
+ ret <1 x ptr> %ret
+}
+
+define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec1_i64_align:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq (%rdi), %rax
+; CHECK-NEXT: retq
+ %ret = load atomic <1 x i64>, ptr %x acquire, align 8
+ ret <1 x i64> %ret
+}
|
`load atomic <1 x T>` is not valid. This change legalizes vector types of atomic load via scalarization in SelectionDAG so that it can, for example, translate from `v1i32` to `i32`.
@@ -455,6 +458,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { | |||
return Op; | |||
} | |||
|
|||
SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) { | |||
SDValue Result = DAG.getAtomicLoad( | |||
ISD::NON_EXTLOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we preserve the extension type from the original ATOMIC_LOAD?
5cd825d
to
7bf4bb1
Compare
af2fec2
to
226a1b6
Compare
load atomic <1 x T>
is not valid. This change legalizesvector types of atomic load via scalarization in SelectionDAG
so that it can, for example, translate from
v1i32
toi32
.