Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[InstCombine] Convert fshl(x, 0, y) to shl(x, and(y, BitWidth - 1)) when BitWidth is pow2 #122362

Merged
merged 2 commits into from
Jan 11, 2025

Conversation

AmrDeveloper
Copy link
Member

@AmrDeveloper AmrDeveloper commented Jan 9, 2025

Convert fshl(x, 0, y) to shl(x, and(y, BitWidth - 1)) when BitWidth is pow2

Alive2 proof: https://alive2.llvm.org/ce/z/3oTEop
Fixes: #122235

@llvmbot
Copy link
Member

llvmbot commented Jan 9, 2025

@llvm/pr-subscribers-llvm-transforms

Author: Amr Hesham (AmrDeveloper)

Changes

Convert fshl(x, 0, y) to shl(X, and(Y, BitWidth - 1)) or to shl(x, y) if y within range (0, Bitwidth - 1)

Fixes: #122235


Full diff: https://github.com/llvm/llvm-project/pull/122362.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp (+13)
  • (modified) llvm/test/Transforms/InstCombine/fsh.ll (+95)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index c55c40c88bc845..f0ff76ba57555b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2229,6 +2229,19 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
         return BitOp;
     }
 
+    // fshal(X, 0, Y) --> shl(X, and(Y, BitWidth - 1))
+    // fshal(X, 0, Y) --> Shl(X, Y) if Y within the range 0 to type bit width
+    if (match(Op1, m_ZeroInt())) {
+      unsigned BitWidth = Ty->getScalarSizeInBits();
+      Value *Op2 = II->getArgOperand(2);
+      if (auto Range = II->getRange(); Range && Range->getLower().sge(0) &&
+                                       Range->getUpper().sle(BitWidth)) {
+        return BinaryOperator::CreateShl(Op0, Op2);
+      }
+      Value *And = Builder.CreateAnd(Op2, ConstantInt::get(Ty, BitWidth - 1));
+      return BinaryOperator::CreateShl(Op0, And);
+    }
+
     // Left or right might be masked.
     if (SimplifyDemandedInstructionBits(*II))
       return &CI;
diff --git a/llvm/test/Transforms/InstCombine/fsh.ll b/llvm/test/Transforms/InstCombine/fsh.ll
index 434cd810296d8c..c0f1ee4a5976bb 100644
--- a/llvm/test/Transforms/InstCombine/fsh.ll
+++ b/llvm/test/Transforms/InstCombine/fsh.ll
@@ -6,6 +6,7 @@ declare i16 @llvm.fshr.i16(i16, i16, i16)
 declare i32 @llvm.fshl.i32(i32, i32, i32)
 declare i33 @llvm.fshr.i33(i33, i33, i33)
 declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>)
+declare <2 x i16> @llvm.fshl.v2i16(<2 x i16>, <2 x i16>, <2 x i16>)
 declare <2 x i31> @llvm.fshl.v2i31(<2 x i31>, <2 x i31>, <2 x i31>)
 declare <3 x i16> @llvm.fshl.v3i16(<3 x i16>, <3 x i16>, <3 x i16>)
 
@@ -1010,3 +1011,97 @@ define <2 x i32> @fshr_vec_zero_elem(<2 x i32> %x, <2 x i32> %y) {
   %fsh = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 2, i32 0>)
   ret <2 x i32> %fsh
 }
+
+define i16 @fshl_i16_shl(i16 %x, i16 %y) {
+; CHECK-LABEL: @fshl_i16_shl(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i16 [[Y:%.*]], 15
+; CHECK-NEXT:    [[RES:%.*]] = shl i16 [[X:%.*]], [[TMP0]]
+; CHECK-NEXT:    ret i16 [[RES]]
+;
+entry:
+  %res = call i16 @llvm.fshl.i16(i16 %x, i16 0, i16 %y)
+  ret i16 %res
+}
+
+define i32 @fshl_i32_shl(i32 %x, i32 %y) {
+; CHECK-LABEL: @fshl_i32_shl(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[Y:%.*]], 31
+; CHECK-NEXT:    [[RES:%.*]] = shl i32 [[X:%.*]], [[TMP0]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %res = call i32 @llvm.fshl.i32(i32 %x, i32 0, i32 %y)
+  ret i32 %res
+}
+
+define <2 x i16> @fshl_vi16_shl(<2 x i16>  %x, <2 x i16> %y) {
+; CHECK-LABEL: @fshl_vi16_shl(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i16> [[Y:%.*]], splat (i16 15)
+; CHECK-NEXT:    [[RES:%.*]] = shl <2 x i16> [[X:%.*]], [[TMP0]]
+; CHECK-NEXT:    ret <2 x i16> [[RES]]
+;
+entry:
+  %res = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %x, <2 x i16> <i16 0, i16 0>, <2 x i16> %y)
+  ret <2 x i16> %res
+}
+
+define <2 x i31> @fshl_vi31_shl(<2 x i31>  %x, <2 x i31> %y) {
+; CHECK-LABEL: @fshl_vi31_shl(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i31> [[Y:%.*]], splat (i31 30)
+; CHECK-NEXT:    [[RES:%.*]] = shl <2 x i31> [[X:%.*]], [[TMP0]]
+; CHECK-NEXT:    ret <2 x i31> [[RES]]
+;
+entry:
+  %res = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> <i31 0, i31 0>, <2 x i31>  %y)
+  ret <2 x i31>  %res
+}
+
+define i16 @fshl_i16_shl_with_range(i16 %x, i16 range(i16 0, 16) %y) {
+; CHECK-LABEL: @fshl_i16_shl_with_range(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RES:%.*]] = shl i16 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i16 [[RES]]
+;
+entry:
+  %res = call i16 @llvm.fshl.i16(i16 %x, i16 0, i16 %y)
+  ret i16 %res
+}
+
+define i32 @fshl_i32_shl_with_range(i32 %x, i32 range(i32 0, 32) %y) {
+; CHECK-LABEL: @fshl_i32_shl_with_range(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RES:%.*]] = shl i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %res = call i32 @llvm.fshl.i32(i32 %x, i32 0, i32 %y)
+  ret i32 %res
+}
+
+define i16 @fshl_i16_shl_with_range_ignored(i16 %x, i16 range(i16 0, 17) %y) {
+; CHECK-LABEL: @fshl_i16_shl_with_range_ignored(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i16 [[Y:%.*]], 15
+; CHECK-NEXT:    [[RES:%.*]] = shl i16 [[X:%.*]], [[TMP0]]
+; CHECK-NEXT:    ret i16 [[RES]]
+;
+entry:
+  %res = call i16 @llvm.fshl.i16(i16 %x, i16 0, i16 %y)
+  ret i16 %res
+}
+
+define i32 @fshl_i32_shl_with_range_ignored(i32 %x, i32 range(i32 0, 33) %y) {
+; CHECK-LABEL: @fshl_i32_shl_with_range_ignored(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[Y:%.*]], 31
+; CHECK-NEXT:    [[RES:%.*]] = shl i32 [[X:%.*]], [[TMP0]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %res = call i32 @llvm.fshl.i32(i32 %x, i32 0, i32 %y)
+  ret i32 %res
+}

@dtcxzyw dtcxzyw changed the title [InstCombine] Convert fshl(x, 0, y) to shl(x, y) or shl(X, and(Y, BitWidth - 1)) [InstCombine] Convert fshl(x, 0, y) to shl(x, y) or shl(X, urem(Y, BitWidth)) Jan 10, 2025
@dtcxzyw dtcxzyw changed the title [InstCombine] Convert fshl(x, 0, y) to shl(x, y) or shl(X, urem(Y, BitWidth)) [InstCombine] Convert fshl(x, 0, y) to shl(x, and(y, BitWidth - 1)) when BitWidth is pow2 Jan 10, 2025
Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM. Please update the PR description.

ret i32 %res
}

define <2 x i16> @fshl_vi16_shl(<2 x i16> %x, <2 x i16> %y) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
define <2 x i16> @fshl_vi16_shl(<2 x i16> %x, <2 x i16> %y) {
define <2 x i16> @fshl_vi16_shl(<2 x i16> %x, <2 x i16> %y) {

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All comments addressed, Thank you

; CHECK-NEXT: ret <2 x i16> [[RES]]
;
entry:
%res = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %x, <2 x i16> <i16 0, i16 0>, <2 x i16> %y)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
%res = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %x, <2 x i16> <i16 0, i16 0>, <2 x i16> %y)
%res = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %x, <2 x i16> zeroinitializer, <2 x i16> %y)

; CHECK-NEXT: ret <2 x i31> [[RES]]
;
entry:
%res = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> <i31 0, i31 0>, <2 x i31> %y)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
%res = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> <i31 0, i31 0>, <2 x i31> %y)
%res = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> zeroinitializer, <2 x i31> %y)

@AmrDeveloper AmrDeveloper force-pushed the inst_combine_fshl_x_0_y branch from 345258d to f155039 Compare January 11, 2025 08:16
Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please add an alive2 proof to the PR description?

@@ -2229,6 +2229,14 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
return BitOp;
}

// fshl(X, 0, Y) --> shl(X, and(Y, BitWidth - 1)) if bitwidth is a
// power-of-2
if (isPowerOf2_32(BitWidth) && match(Op1, m_ZeroInt())) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aren't we missing a check that this is fshl rather than fshr? Can you please add a test using fshr?

Copy link
Member Author

@AmrDeveloper AmrDeveloper Jan 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But we already have an assert that it's fshl

assert(IID == Intrinsic::fshl &&
"All funnel shifts by simple constants should go left");
// fshl(X, 0, C) --> shl X, C

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is only for constant shift amount.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, you are right, i will add a check and update tests

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

@AmrDeveloper AmrDeveloper force-pushed the inst_combine_fshl_x_0_y branch from f155039 to af22b55 Compare January 11, 2025 09:19
@AmrDeveloper AmrDeveloper merged commit 642e493 into llvm:main Jan 11, 2025
8 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented Jan 11, 2025

LLVM Buildbot has detected a new failure on builder llvm-clang-x86_64-expensive-checks-ubuntu running on as-builder-4 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/187/builds/3679

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
Input file: /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/test/tools/llvm-gsymutil/ARM_AArch64/Output/macho-merged-funcs-dwarf.yaml.tmp.dSYM
Output file (aarch64): /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/test/tools/llvm-gsymutil/ARM_AArch64/Output/macho-merged-funcs-dwarf.yaml.tmp.default.gSYM
Loaded 3 functions from DWARF.
Loaded 3 functions from symbol table.
warning: same address range contains different debug info. Removing:
[0x0000000000000248 - 0x0000000000000270): Name=0x00000047
addr=0x0000000000000248, file=  3, line=  5
addr=0x0000000000000254, file=  3, line=  7
addr=0x0000000000000258, file=  3, line=  9
addr=0x000000000000025c, file=  3, line=  8
addr=0x0000000000000260, file=  3, line= 11
addr=0x0000000000000264, file=  3, line= 10
addr=0x0000000000000268, file=  3, line=  6


In favor of this one:
[0x0000000000000248 - 0x0000000000000270): Name=0x00000030
addr=0x0000000000000248, file=  2, line=  5
addr=0x0000000000000254, file=  2, line=  7
addr=0x0000000000000258, file=  2, line=  9
addr=0x000000000000025c, file=  2, line=  8
addr=0x0000000000000260, file=  2, line= 11
addr=0x0000000000000264, file=  2, line= 10
addr=0x0000000000000268, file=  2, line=  6


warning: same address range contains different debug info. Removing:
[0x0000000000000248 - 0x0000000000000270): Name=0x00000030
addr=0x0000000000000248, file=  2, line=  5
addr=0x0000000000000254, file=  2, line=  7
addr=0x0000000000000258, file=  2, line=  9
addr=0x000000000000025c, file=  2, line=  8
addr=0x0000000000000260, file=  2, line= 11
addr=0x0000000000000264, file=  2, line= 10
addr=0x0000000000000268, file=  2, line=  6


In favor of this one:
[0x0000000000000248 - 0x0000000000000270): Name=0x00000001
addr=0x0000000000000248, file=  1, line=  5
addr=0x0000000000000254, file=  1, line=  7
addr=0x0000000000000258, file=  1, line=  9
addr=0x000000000000025c, file=  1, line=  8
addr=0x0000000000000260, file=  1, line= 11
addr=0x0000000000000264, file=  1, line= 10
...

BaiXilin pushed a commit to BaiXilin/llvm-fix-vnni-instr-types that referenced this pull request Jan 12, 2025
…hen BitWidth is pow2 (llvm#122362)

Convert `fshl(x, 0, y)` to `shl(x, and(y, BitWidth - 1))` when BitWidth
is pow2

Alive2 proof: https://alive2.llvm.org/ce/z/3oTEop
Fixes: llvm#122235
Mel-Chen pushed a commit to Mel-Chen/llvm-project that referenced this pull request Jan 13, 2025
…hen BitWidth is pow2 (llvm#122362)

Convert `fshl(x, 0, y)` to `shl(x, and(y, BitWidth - 1))` when BitWidth
is pow2

Alive2 proof: https://alive2.llvm.org/ce/z/3oTEop
Fixes: llvm#122235
DKLoehr pushed a commit to DKLoehr/llvm-project that referenced this pull request Jan 17, 2025
…hen BitWidth is pow2 (llvm#122362)

Convert `fshl(x, 0, y)` to `shl(x, and(y, BitWidth - 1))` when BitWidth
is pow2

Alive2 proof: https://alive2.llvm.org/ce/z/3oTEop
Fixes: llvm#122235
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

[InstCombine] fshl(x, 0, y) with in-range y not converted to shl x, y
6 participants