Skip to content

[WIP][DAG] getNode - freeze insert/compress node base values if the inserted/mask values are UNDEF #146332

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27641,7 +27641,7 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {

// If inserting an UNDEF, just return the original vector.
if (N1.isUndef())
return N0;
return DAG.getFreeze(N0);

// If this is an insert of an extracted vector into an undef vector, we can
// just use the input to the extract if the types match, and can simplify
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7962,7 +7962,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,

// If the inserted element is an UNDEF, just use the input vector.
if (N2.isUndef())
return N1;
return getFreeze(N1);

break;
}
Expand Down Expand Up @@ -8001,7 +8001,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// can just use the input to the extract.
if (N1.isUndef() && N2.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT)
return N2.getOperand(0);
return getFreeze(N2.getOperand(0));
break;
}
case ISD::BITCAST:
Expand All @@ -8028,7 +8028,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
"Vector and mask must have same number of elements.");

if (N1.isUndef() || N2.isUndef())
return N3;
return getFreeze(N3);

break;
}
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45047,6 +45047,7 @@ bool X86TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
case X86ISD::UNPCKL:
case X86ISD::UNPCKH:
case X86ISD::VPERMILPI:
case X86ISD::VPERMV:
case X86ISD::VPERMV3: {
SmallVector<int, 8> Mask;
SmallVector<SDValue, 2> Ops;
Expand Down Expand Up @@ -45094,10 +45095,13 @@ bool X86TargetLowering::canCreateUndefOrPoisonForTargetNode(
case X86ISD::BLENDV:
return false;
// SSE target shuffles.
case X86ISD::PACKSS:
case X86ISD::PACKUS:
case X86ISD::PSHUFD:
case X86ISD::UNPCKL:
case X86ISD::UNPCKH:
case X86ISD::VPERMILPI:
case X86ISD::VPERMV:
case X86ISD::VPERMV3:
return false;
// SSE comparisons handle all icmp/fcmp cases.
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ define <vscale x 2 x i64> @dup_extract_nxv2i64_v2i64(<2 x i64> %data) {
define <vscale x 2 x i64> @dup_extract_nxv2i64_v1i64(<1 x i64> %data) {
; CHECK-LABEL: dup_extract_nxv2i64_v1i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, x8
; CHECK-NEXT: mov z0.d, d0
; CHECK-NEXT: ret
%1 = extractelement <1 x i64> %data, i64 1
%.splatinsert = insertelement <vscale x 2 x i64> poison, i64 %1, i32 0
Expand Down
43 changes: 29 additions & 14 deletions llvm/test/CodeGen/AArch64/active_lane_mask.ll
Original file line number Diff line number Diff line change
Expand Up @@ -304,9 +304,12 @@ define <16 x i1> @lane_mask_v16i1_i8(i8 %index, i8 %TC) {
;
; CHECK-STREAMING-LABEL: lane_mask_v16i1_i8:
; CHECK-STREAMING: // %bb.0:
; CHECK-STREAMING-NEXT: index z0.b, w0, #1
; CHECK-STREAMING-NEXT: mov z1.b, w0
; CHECK-STREAMING-NEXT: index z0.b, #0, #1
; CHECK-STREAMING-NEXT: ptrue p0.b, vl16
; CHECK-STREAMING-NEXT: mov z1.b, w0
; CHECK-STREAMING-NEXT: mov z0.b, p0/m, z0.b
; CHECK-STREAMING-NEXT: sel z1.b, p0, z1.b, z0.b
; CHECK-STREAMING-NEXT: add z0.b, z1.b, z0.b
; CHECK-STREAMING-NEXT: cmphi p1.b, p0/z, z1.b, z0.b
; CHECK-STREAMING-NEXT: mov z1.b, p1/z, #-1 // =0xffffffffffffffff
; CHECK-STREAMING-NEXT: orr z0.d, z0.d, z1.d
Expand All @@ -331,9 +334,12 @@ define <8 x i1> @lane_mask_v8i1_i8(i8 %index, i8 %TC) {
;
; CHECK-STREAMING-LABEL: lane_mask_v8i1_i8:
; CHECK-STREAMING: // %bb.0:
; CHECK-STREAMING-NEXT: index z0.b, w0, #1
; CHECK-STREAMING-NEXT: mov z1.b, w0
; CHECK-STREAMING-NEXT: index z0.b, #0, #1
; CHECK-STREAMING-NEXT: ptrue p0.b, vl8
; CHECK-STREAMING-NEXT: mov z1.b, w0
; CHECK-STREAMING-NEXT: mov z0.b, p0/m, z0.b
; CHECK-STREAMING-NEXT: sel z1.b, p0, z1.b, z0.b
; CHECK-STREAMING-NEXT: add z0.b, z1.b, z0.b
; CHECK-STREAMING-NEXT: cmphi p1.b, p0/z, z1.b, z0.b
; CHECK-STREAMING-NEXT: mov z1.b, p1/z, #-1 // =0xffffffffffffffff
; CHECK-STREAMING-NEXT: orr z0.d, z0.d, z1.d
Expand Down Expand Up @@ -362,15 +368,20 @@ define <4 x i1> @lane_mask_v4i1_i8(i8 %index, i8 %TC) {
;
; CHECK-STREAMING-LABEL: lane_mask_v4i1_i8:
; CHECK-STREAMING: // %bb.0:
; CHECK-STREAMING-NEXT: mov z1.h, w0
; CHECK-STREAMING-NEXT: index z0.h, #0, #1
; CHECK-STREAMING-NEXT: mov z0.h, #255 // =0xff
; CHECK-STREAMING-NEXT: ptrue p0.h, vl4
; CHECK-STREAMING-NEXT: and z1.h, z1.h, #0xff
; CHECK-STREAMING-NEXT: add z0.h, z1.h, z0.h
; CHECK-STREAMING-NEXT: mov z1.h, w1
; CHECK-STREAMING-NEXT: umin z0.h, z0.h, #255
; CHECK-STREAMING-NEXT: and z1.h, z1.h, #0xff
; CHECK-STREAMING-NEXT: cmphi p0.h, p0/z, z1.h, z0.h
; CHECK-STREAMING-NEXT: index z2.h, #0, #1
; CHECK-STREAMING-NEXT: mov z1.h, w0
; CHECK-STREAMING-NEXT: mov z3.h, w1
; CHECK-STREAMING-NEXT: mov z0.h, p0/m, z0.h
; CHECK-STREAMING-NEXT: sel z1.h, p0, z1.h, z0.h
; CHECK-STREAMING-NEXT: sel z2.h, p0, z2.h, z0.h
; CHECK-STREAMING-NEXT: sel z3.h, p0, z3.h, z0.h
; CHECK-STREAMING-NEXT: and z1.d, z1.d, z0.d
; CHECK-STREAMING-NEXT: add z1.h, z1.h, z2.h
; CHECK-STREAMING-NEXT: and z2.d, z3.d, z0.d
; CHECK-STREAMING-NEXT: umin z0.h, p0/m, z0.h, z1.h
; CHECK-STREAMING-NEXT: cmphi p0.h, p0/z, z2.h, z0.h
; CHECK-STREAMING-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-STREAMING-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-STREAMING-NEXT: ret
Expand All @@ -394,10 +405,14 @@ define <2 x i1> @lane_mask_v2i1_i8(i8 %index, i8 %TC) {
;
; CHECK-STREAMING-LABEL: lane_mask_v2i1_i8:
; CHECK-STREAMING: // %bb.0:
; CHECK-STREAMING-NEXT: and w8, w0, #0xff
; CHECK-STREAMING-NEXT: index z0.s, #0, #1
; CHECK-STREAMING-NEXT: ptrue p0.s, vl2
; CHECK-STREAMING-NEXT: index z0.s, w8, #1
; CHECK-STREAMING-NEXT: and w8, w0, #0xff
; CHECK-STREAMING-NEXT: mov z1.s, w8
; CHECK-STREAMING-NEXT: and w8, w1, #0xff
; CHECK-STREAMING-NEXT: mov z0.s, p0/m, z0.s
; CHECK-STREAMING-NEXT: sel z1.s, p0, z1.s, z0.s
; CHECK-STREAMING-NEXT: add z0.s, z1.s, z0.s
; CHECK-STREAMING-NEXT: mov z1.s, w8
; CHECK-STREAMING-NEXT: umin z0.s, z0.s, #255
; CHECK-STREAMING-NEXT: cmphi p0.s, p0/z, z1.s, z0.s
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/arm64-build-vector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ define void @widen_f16_build_vector(ptr %addr) {
; CHECK-LABEL: widen_f16_build_vector:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #13294 // =0x33ee
; CHECK-NEXT: movk w8, #13294, lsl #16
; CHECK-NEXT: str w8, [x0]
; CHECK-NEXT: dup v0.4h, w8
; CHECK-NEXT: str s0, [x0]
; CHECK-NEXT: ret
store <2 x half> <half 0xH33EE, half 0xH33EE>, ptr %addr, align 2
ret void
Expand Down
12 changes: 5 additions & 7 deletions llvm/test/CodeGen/AArch64/concat-vector-add-combine.ll
Original file line number Diff line number Diff line change
Expand Up @@ -94,16 +94,14 @@ define i32 @combine_undef_add_8xi32(i32 %a, i32 %b, i32 %c, i32 %d) local_unname
; CHECK-LABEL: combine_undef_add_8xi32:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov s1, w0
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: dup v0.4s, w8
; CHECK-NEXT: mov v1.s[1], w1
; CHECK-NEXT: uhadd v0.4h, v0.4h, v0.4h
; CHECK-NEXT: mov v1.s[2], w2
; CHECK-NEXT: mov v1.s[3], w3
; CHECK-NEXT: xtn v2.4h, v1.4s
; CHECK-NEXT: shrn v1.4h, v1.4s, #16
; CHECK-NEXT: uhadd v1.4h, v2.4h, v1.4h
; CHECK-NEXT: mov v1.d[1], v0.d[0]
; CHECK-NEXT: uaddlv s0, v1.8h
; CHECK-NEXT: uzp2 v2.8h, v1.8h, v0.8h
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
; CHECK-NEXT: uhadd v0.8h, v0.8h, v2.8h
; CHECK-NEXT: uaddlv s0, v0.8h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%a1 = insertelement <8 x i32> poison, i32 %a, i32 0
Expand Down
2 changes: 2 additions & 0 deletions llvm/test/CodeGen/AArch64/sve-fix-length-and-combine-512.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ define void @vls_sve_and_64xi8(ptr %ap, ptr %out) nounwind {
; CHECK-NEXT: add x8, x8, :lo12:.LCPI0_0
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: ld1b { z1.b }, p0/z, [x8]
; CHECK-NEXT: mov z0.b, p0/m, z0.b
; CHECK-NEXT: sel z1.b, p0, z1.b, z0.b
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: st1b { z0.b }, p0, [x1]
; CHECK-NEXT: ret
Expand Down
21 changes: 14 additions & 7 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-bitselect.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,22 @@ define void @fixed_bitselect_v8i32(ptr %pre_cond_ptr, ptr %left_ptr, ptr %right_
; CHECK-LABEL: fixed_bitselect_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl8
; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1]
; CHECK-NEXT: ld1w { z3.s }, p0/z, [x2]
; CHECK-NEXT: add z1.s, z0.s, z1.s
; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-NEXT: ld1w { z2.s }, p0/z, [x2]
; CHECK-NEXT: mov z3.s, p0/m, #0 // =0x0
; CHECK-NEXT: mov z4.s, p0/m, #-1 // =0xffffffffffffffff
; CHECK-NEXT: mov z0.s, p0/m, z0.s
; CHECK-NEXT: sel z1.s, p0, z1.s, z0.s
; CHECK-NEXT: mov z0.s, p0/m, z0.s
; CHECK-NEXT: sel z2.s, p0, z2.s, z0.s
; CHECK-NEXT: sel z1.s, p0, z1.s, z0.s
; CHECK-NEXT: sub z3.s, z3.s, z0.s
; CHECK-NEXT: sel z2.s, p0, z2.s, z0.s
; CHECK-NEXT: add z0.s, z0.s, z4.s
; CHECK-NEXT: and z1.d, z3.d, z1.d
; CHECK-NEXT: and z0.d, z0.d, z2.d
; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z1.d, z0.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: st1w { z0.s }, p0, [x3]
; CHECK-NEXT: ret
%pre_cond = load <8 x i32>, ptr %pre_cond_ptr
Expand Down
Loading
Loading