Skip to content

Commit c62accd

Browse files
committed
[WIP][DAG] getNode - freeze insert/compress node base values if the inserted/mask values are UNDEF
Ensure we don't expose any poison elements
1 parent b6e113a commit c62accd

File tree

213 files changed

+40362
-31909
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

213 files changed

+40362
-31909
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27641,7 +27641,7 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
2764127641

2764227642
// If inserting an UNDEF, just return the original vector.
2764327643
if (N1.isUndef())
27644-
return N0;
27644+
return DAG.getFreeze(N0);
2764527645

2764627646
// If this is an insert of an extracted vector into an undef vector, we can
2764727647
// just use the input to the extract if the types match, and can simplify

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7962,7 +7962,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
79627962

79637963
// If the inserted element is an UNDEF, just use the input vector.
79647964
if (N2.isUndef())
7965-
return N1;
7965+
return getFreeze(N1);
79667966

79677967
break;
79687968
}
@@ -8001,7 +8001,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
80018001
// can just use the input to the extract.
80028002
if (N1.isUndef() && N2.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
80038003
N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT)
8004-
return N2.getOperand(0);
8004+
return getFreeze(N2.getOperand(0));
80058005
break;
80068006
}
80078007
case ISD::BITCAST:
@@ -8028,7 +8028,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
80288028
"Vector and mask must have same number of elements.");
80298029

80308030
if (N1.isUndef() || N2.isUndef())
8031-
return N3;
8031+
return getFreeze(N3);
80328032

80338033
break;
80348034
}

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45047,6 +45047,7 @@ bool X86TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
4504745047
case X86ISD::UNPCKL:
4504845048
case X86ISD::UNPCKH:
4504945049
case X86ISD::VPERMILPI:
45050+
case X86ISD::VPERMV:
4505045051
case X86ISD::VPERMV3: {
4505145052
SmallVector<int, 8> Mask;
4505245053
SmallVector<SDValue, 2> Ops;
@@ -45094,10 +45095,13 @@ bool X86TargetLowering::canCreateUndefOrPoisonForTargetNode(
4509445095
case X86ISD::BLENDV:
4509545096
return false;
4509645097
// SSE target shuffles.
45098+
case X86ISD::PACKSS:
45099+
case X86ISD::PACKUS:
4509745100
case X86ISD::PSHUFD:
4509845101
case X86ISD::UNPCKL:
4509945102
case X86ISD::UNPCKH:
4510045103
case X86ISD::VPERMILPI:
45104+
case X86ISD::VPERMV:
4510145105
case X86ISD::VPERMV3:
4510245106
return false;
4510345107
// SSE comparisons handle all icmp/fcmp cases.

llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ define <vscale x 2 x i64> @dup_extract_nxv2i64_v2i64(<2 x i64> %data) {
132132
define <vscale x 2 x i64> @dup_extract_nxv2i64_v1i64(<1 x i64> %data) {
133133
; CHECK-LABEL: dup_extract_nxv2i64_v1i64:
134134
; CHECK: // %bb.0:
135-
; CHECK-NEXT: mov z0.d, x8
135+
; CHECK-NEXT: mov z0.d, d0
136136
; CHECK-NEXT: ret
137137
%1 = extractelement <1 x i64> %data, i64 1
138138
%.splatinsert = insertelement <vscale x 2 x i64> poison, i64 %1, i32 0

llvm/test/CodeGen/AArch64/active_lane_mask.ll

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -304,9 +304,12 @@ define <16 x i1> @lane_mask_v16i1_i8(i8 %index, i8 %TC) {
304304
;
305305
; CHECK-STREAMING-LABEL: lane_mask_v16i1_i8:
306306
; CHECK-STREAMING: // %bb.0:
307-
; CHECK-STREAMING-NEXT: index z0.b, w0, #1
308-
; CHECK-STREAMING-NEXT: mov z1.b, w0
307+
; CHECK-STREAMING-NEXT: index z0.b, #0, #1
309308
; CHECK-STREAMING-NEXT: ptrue p0.b, vl16
309+
; CHECK-STREAMING-NEXT: mov z1.b, w0
310+
; CHECK-STREAMING-NEXT: mov z0.b, p0/m, z0.b
311+
; CHECK-STREAMING-NEXT: sel z1.b, p0, z1.b, z0.b
312+
; CHECK-STREAMING-NEXT: add z0.b, z1.b, z0.b
310313
; CHECK-STREAMING-NEXT: cmphi p1.b, p0/z, z1.b, z0.b
311314
; CHECK-STREAMING-NEXT: mov z1.b, p1/z, #-1 // =0xffffffffffffffff
312315
; CHECK-STREAMING-NEXT: orr z0.d, z0.d, z1.d
@@ -331,9 +334,12 @@ define <8 x i1> @lane_mask_v8i1_i8(i8 %index, i8 %TC) {
331334
;
332335
; CHECK-STREAMING-LABEL: lane_mask_v8i1_i8:
333336
; CHECK-STREAMING: // %bb.0:
334-
; CHECK-STREAMING-NEXT: index z0.b, w0, #1
335-
; CHECK-STREAMING-NEXT: mov z1.b, w0
337+
; CHECK-STREAMING-NEXT: index z0.b, #0, #1
336338
; CHECK-STREAMING-NEXT: ptrue p0.b, vl8
339+
; CHECK-STREAMING-NEXT: mov z1.b, w0
340+
; CHECK-STREAMING-NEXT: mov z0.b, p0/m, z0.b
341+
; CHECK-STREAMING-NEXT: sel z1.b, p0, z1.b, z0.b
342+
; CHECK-STREAMING-NEXT: add z0.b, z1.b, z0.b
337343
; CHECK-STREAMING-NEXT: cmphi p1.b, p0/z, z1.b, z0.b
338344
; CHECK-STREAMING-NEXT: mov z1.b, p1/z, #-1 // =0xffffffffffffffff
339345
; CHECK-STREAMING-NEXT: orr z0.d, z0.d, z1.d
@@ -362,15 +368,20 @@ define <4 x i1> @lane_mask_v4i1_i8(i8 %index, i8 %TC) {
362368
;
363369
; CHECK-STREAMING-LABEL: lane_mask_v4i1_i8:
364370
; CHECK-STREAMING: // %bb.0:
365-
; CHECK-STREAMING-NEXT: mov z1.h, w0
366-
; CHECK-STREAMING-NEXT: index z0.h, #0, #1
371+
; CHECK-STREAMING-NEXT: mov z0.h, #255 // =0xff
367372
; CHECK-STREAMING-NEXT: ptrue p0.h, vl4
368-
; CHECK-STREAMING-NEXT: and z1.h, z1.h, #0xff
369-
; CHECK-STREAMING-NEXT: add z0.h, z1.h, z0.h
370-
; CHECK-STREAMING-NEXT: mov z1.h, w1
371-
; CHECK-STREAMING-NEXT: umin z0.h, z0.h, #255
372-
; CHECK-STREAMING-NEXT: and z1.h, z1.h, #0xff
373-
; CHECK-STREAMING-NEXT: cmphi p0.h, p0/z, z1.h, z0.h
373+
; CHECK-STREAMING-NEXT: index z2.h, #0, #1
374+
; CHECK-STREAMING-NEXT: mov z1.h, w0
375+
; CHECK-STREAMING-NEXT: mov z3.h, w1
376+
; CHECK-STREAMING-NEXT: mov z0.h, p0/m, z0.h
377+
; CHECK-STREAMING-NEXT: sel z1.h, p0, z1.h, z0.h
378+
; CHECK-STREAMING-NEXT: sel z2.h, p0, z2.h, z0.h
379+
; CHECK-STREAMING-NEXT: sel z3.h, p0, z3.h, z0.h
380+
; CHECK-STREAMING-NEXT: and z1.d, z1.d, z0.d
381+
; CHECK-STREAMING-NEXT: add z1.h, z1.h, z2.h
382+
; CHECK-STREAMING-NEXT: and z2.d, z3.d, z0.d
383+
; CHECK-STREAMING-NEXT: umin z0.h, p0/m, z0.h, z1.h
384+
; CHECK-STREAMING-NEXT: cmphi p0.h, p0/z, z2.h, z0.h
374385
; CHECK-STREAMING-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
375386
; CHECK-STREAMING-NEXT: // kill: def $d0 killed $d0 killed $z0
376387
; CHECK-STREAMING-NEXT: ret
@@ -394,10 +405,14 @@ define <2 x i1> @lane_mask_v2i1_i8(i8 %index, i8 %TC) {
394405
;
395406
; CHECK-STREAMING-LABEL: lane_mask_v2i1_i8:
396407
; CHECK-STREAMING: // %bb.0:
397-
; CHECK-STREAMING-NEXT: and w8, w0, #0xff
408+
; CHECK-STREAMING-NEXT: index z0.s, #0, #1
398409
; CHECK-STREAMING-NEXT: ptrue p0.s, vl2
399-
; CHECK-STREAMING-NEXT: index z0.s, w8, #1
410+
; CHECK-STREAMING-NEXT: and w8, w0, #0xff
411+
; CHECK-STREAMING-NEXT: mov z1.s, w8
400412
; CHECK-STREAMING-NEXT: and w8, w1, #0xff
413+
; CHECK-STREAMING-NEXT: mov z0.s, p0/m, z0.s
414+
; CHECK-STREAMING-NEXT: sel z1.s, p0, z1.s, z0.s
415+
; CHECK-STREAMING-NEXT: add z0.s, z1.s, z0.s
401416
; CHECK-STREAMING-NEXT: mov z1.s, w8
402417
; CHECK-STREAMING-NEXT: umin z0.s, z0.s, #255
403418
; CHECK-STREAMING-NEXT: cmphi p0.s, p0/z, z1.s, z0.s

llvm/test/CodeGen/AArch64/arm64-build-vector.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@ define void @widen_f16_build_vector(ptr %addr) {
5757
; CHECK-LABEL: widen_f16_build_vector:
5858
; CHECK: // %bb.0:
5959
; CHECK-NEXT: mov w8, #13294 // =0x33ee
60-
; CHECK-NEXT: movk w8, #13294, lsl #16
61-
; CHECK-NEXT: str w8, [x0]
60+
; CHECK-NEXT: dup v0.4h, w8
61+
; CHECK-NEXT: str s0, [x0]
6262
; CHECK-NEXT: ret
6363
store <2 x half> <half 0xH33EE, half 0xH33EE>, ptr %addr, align 2
6464
ret void

llvm/test/CodeGen/AArch64/concat-vector-add-combine.ll

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -94,16 +94,14 @@ define i32 @combine_undef_add_8xi32(i32 %a, i32 %b, i32 %c, i32 %d) local_unname
9494
; CHECK-LABEL: combine_undef_add_8xi32:
9595
; CHECK: // %bb.0:
9696
; CHECK-NEXT: fmov s1, w0
97-
; CHECK-NEXT: movi v0.2d, #0000000000000000
97+
; CHECK-NEXT: dup v0.4s, w8
9898
; CHECK-NEXT: mov v1.s[1], w1
99-
; CHECK-NEXT: uhadd v0.4h, v0.4h, v0.4h
10099
; CHECK-NEXT: mov v1.s[2], w2
101100
; CHECK-NEXT: mov v1.s[3], w3
102-
; CHECK-NEXT: xtn v2.4h, v1.4s
103-
; CHECK-NEXT: shrn v1.4h, v1.4s, #16
104-
; CHECK-NEXT: uhadd v1.4h, v2.4h, v1.4h
105-
; CHECK-NEXT: mov v1.d[1], v0.d[0]
106-
; CHECK-NEXT: uaddlv s0, v1.8h
101+
; CHECK-NEXT: uzp2 v2.8h, v1.8h, v0.8h
102+
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
103+
; CHECK-NEXT: uhadd v0.8h, v0.8h, v2.8h
104+
; CHECK-NEXT: uaddlv s0, v0.8h
107105
; CHECK-NEXT: fmov w0, s0
108106
; CHECK-NEXT: ret
109107
%a1 = insertelement <8 x i32> poison, i32 %a, i32 0

llvm/test/CodeGen/AArch64/sve-fix-length-and-combine-512.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ define void @vls_sve_and_64xi8(ptr %ap, ptr %out) nounwind {
99
; CHECK-NEXT: add x8, x8, :lo12:.LCPI0_0
1010
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
1111
; CHECK-NEXT: ld1b { z1.b }, p0/z, [x8]
12+
; CHECK-NEXT: mov z0.b, p0/m, z0.b
13+
; CHECK-NEXT: sel z1.b, p0, z1.b, z0.b
1214
; CHECK-NEXT: and z0.d, z0.d, z1.d
1315
; CHECK-NEXT: st1b { z0.b }, p0, [x1]
1416
; CHECK-NEXT: ret

llvm/test/CodeGen/AArch64/sve-fixed-length-bitselect.ll

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,22 @@ define void @fixed_bitselect_v8i32(ptr %pre_cond_ptr, ptr %left_ptr, ptr %right_
1313
; CHECK-LABEL: fixed_bitselect_v8i32:
1414
; CHECK: // %bb.0:
1515
; CHECK-NEXT: ptrue p0.s, vl8
16-
; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff
1716
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
18-
; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1]
19-
; CHECK-NEXT: ld1w { z3.s }, p0/z, [x2]
20-
; CHECK-NEXT: add z1.s, z0.s, z1.s
21-
; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
17+
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
18+
; CHECK-NEXT: ld1w { z2.s }, p0/z, [x2]
19+
; CHECK-NEXT: mov z3.s, p0/m, #0 // =0x0
20+
; CHECK-NEXT: mov z4.s, p0/m, #-1 // =0xffffffffffffffff
21+
; CHECK-NEXT: mov z0.s, p0/m, z0.s
22+
; CHECK-NEXT: sel z1.s, p0, z1.s, z0.s
23+
; CHECK-NEXT: mov z0.s, p0/m, z0.s
24+
; CHECK-NEXT: sel z2.s, p0, z2.s, z0.s
25+
; CHECK-NEXT: sel z1.s, p0, z1.s, z0.s
26+
; CHECK-NEXT: sub z3.s, z3.s, z0.s
27+
; CHECK-NEXT: sel z2.s, p0, z2.s, z0.s
28+
; CHECK-NEXT: add z0.s, z0.s, z4.s
29+
; CHECK-NEXT: and z1.d, z3.d, z1.d
2230
; CHECK-NEXT: and z0.d, z0.d, z2.d
23-
; CHECK-NEXT: and z1.d, z1.d, z3.d
24-
; CHECK-NEXT: orr z0.d, z1.d, z0.d
31+
; CHECK-NEXT: orr z0.d, z0.d, z1.d
2532
; CHECK-NEXT: st1w { z0.s }, p0, [x3]
2633
; CHECK-NEXT: ret
2734
%pre_cond = load <8 x i32>, ptr %pre_cond_ptr

0 commit comments

Comments
 (0)