Skip to content

Conversation

AmrDeveloper
Copy link
Member

@AmrDeveloper AmrDeveloper commented Jan 9, 2025

Convert fshl(x, 0, y) to shl(x, and(y, BitWidth - 1)) when BitWidth is pow2

Alive2 proof: https://alive2.llvm.org/ce/z/3oTEop
Fixes: #122235

@AmrDeveloper AmrDeveloper requested a review from dtcxzyw January 9, 2025 20:29
@AmrDeveloper AmrDeveloper requested a review from nikic as a code owner January 9, 2025 20:29
@llvmbot llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels Jan 9, 2025
@llvmbot
Copy link
Member

llvmbot commented Jan 9, 2025

@llvm/pr-subscribers-llvm-transforms

Author: Amr Hesham (AmrDeveloper)

Changes

Convert fshl(x, 0, y) to shl(X, and(Y, BitWidth - 1)) or to shl(x, y) if y within range (0, Bitwidth - 1)

Fixes: #122235


Full diff: https://github.com/llvm/llvm-project/pull/122362.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp (+13)
  • (modified) llvm/test/Transforms/InstCombine/fsh.ll (+95)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index c55c40c88bc845..f0ff76ba57555b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2229,6 +2229,19 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
         return BitOp;
     }
 
+    // fshal(X, 0, Y) --> shl(X, and(Y, BitWidth - 1))
+    // fshal(X, 0, Y) --> Shl(X, Y) if Y within the range 0 to type bit width
+    if (match(Op1, m_ZeroInt())) {
+      unsigned BitWidth = Ty->getScalarSizeInBits();
+      Value *Op2 = II->getArgOperand(2);
+      if (auto Range = II->getRange(); Range && Range->getLower().sge(0) &&
+                                       Range->getUpper().sle(BitWidth)) {
+        return BinaryOperator::CreateShl(Op0, Op2);
+      }
+      Value *And = Builder.CreateAnd(Op2, ConstantInt::get(Ty, BitWidth - 1));
+      return BinaryOperator::CreateShl(Op0, And);
+    }
+
     // Left or right might be masked.
     if (SimplifyDemandedInstructionBits(*II))
       return &CI;
diff --git a/llvm/test/Transforms/InstCombine/fsh.ll b/llvm/test/Transforms/InstCombine/fsh.ll
index 434cd810296d8c..c0f1ee4a5976bb 100644
--- a/llvm/test/Transforms/InstCombine/fsh.ll
+++ b/llvm/test/Transforms/InstCombine/fsh.ll
@@ -6,6 +6,7 @@ declare i16 @llvm.fshr.i16(i16, i16, i16)
 declare i32 @llvm.fshl.i32(i32, i32, i32)
 declare i33 @llvm.fshr.i33(i33, i33, i33)
 declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>)
+declare <2 x i16> @llvm.fshl.v2i16(<2 x i16>, <2 x i16>, <2 x i16>)
 declare <2 x i31> @llvm.fshl.v2i31(<2 x i31>, <2 x i31>, <2 x i31>)
 declare <3 x i16> @llvm.fshl.v3i16(<3 x i16>, <3 x i16>, <3 x i16>)
 
@@ -1010,3 +1011,97 @@ define <2 x i32> @fshr_vec_zero_elem(<2 x i32> %x, <2 x i32> %y) {
   %fsh = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 2, i32 0>)
   ret <2 x i32> %fsh
 }
+
+define i16 @fshl_i16_shl(i16 %x, i16 %y) {
+; CHECK-LABEL: @fshl_i16_shl(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i16 [[Y:%.*]], 15
+; CHECK-NEXT:    [[RES:%.*]] = shl i16 [[X:%.*]], [[TMP0]]
+; CHECK-NEXT:    ret i16 [[RES]]
+;
+entry:
+  %res = call i16 @llvm.fshl.i16(i16 %x, i16 0, i16 %y)
+  ret i16 %res
+}
+
+define i32 @fshl_i32_shl(i32 %x, i32 %y) {
+; CHECK-LABEL: @fshl_i32_shl(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[Y:%.*]], 31
+; CHECK-NEXT:    [[RES:%.*]] = shl i32 [[X:%.*]], [[TMP0]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %res = call i32 @llvm.fshl.i32(i32 %x, i32 0, i32 %y)
+  ret i32 %res
+}
+
+define <2 x i16> @fshl_vi16_shl(<2 x i16>  %x, <2 x i16> %y) {
+; CHECK-LABEL: @fshl_vi16_shl(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i16> [[Y:%.*]], splat (i16 15)
+; CHECK-NEXT:    [[RES:%.*]] = shl <2 x i16> [[X:%.*]], [[TMP0]]
+; CHECK-NEXT:    ret <2 x i16> [[RES]]
+;
+entry:
+  %res = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %x, <2 x i16> <i16 0, i16 0>, <2 x i16> %y)
+  ret <2 x i16> %res
+}
+
+define <2 x i31> @fshl_vi31_shl(<2 x i31>  %x, <2 x i31> %y) {
+; CHECK-LABEL: @fshl_vi31_shl(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i31> [[Y:%.*]], splat (i31 30)
+; CHECK-NEXT:    [[RES:%.*]] = shl <2 x i31> [[X:%.*]], [[TMP0]]
+; CHECK-NEXT:    ret <2 x i31> [[RES]]
+;
+entry:
+  %res = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> <i31 0, i31 0>, <2 x i31>  %y)
+  ret <2 x i31>  %res
+}
+
+define i16 @fshl_i16_shl_with_range(i16 %x, i16 range(i16 0, 16) %y) {
+; CHECK-LABEL: @fshl_i16_shl_with_range(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RES:%.*]] = shl i16 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i16 [[RES]]
+;
+entry:
+  %res = call i16 @llvm.fshl.i16(i16 %x, i16 0, i16 %y)
+  ret i16 %res
+}
+
+define i32 @fshl_i32_shl_with_range(i32 %x, i32 range(i32 0, 32) %y) {
+; CHECK-LABEL: @fshl_i32_shl_with_range(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RES:%.*]] = shl i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %res = call i32 @llvm.fshl.i32(i32 %x, i32 0, i32 %y)
+  ret i32 %res
+}
+
+define i16 @fshl_i16_shl_with_range_ignored(i16 %x, i16 range(i16 0, 17) %y) {
+; CHECK-LABEL: @fshl_i16_shl_with_range_ignored(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i16 [[Y:%.*]], 15
+; CHECK-NEXT:    [[RES:%.*]] = shl i16 [[X:%.*]], [[TMP0]]
+; CHECK-NEXT:    ret i16 [[RES]]
+;
+entry:
+  %res = call i16 @llvm.fshl.i16(i16 %x, i16 0, i16 %y)
+  ret i16 %res
+}
+
+define i32 @fshl_i32_shl_with_range_ignored(i32 %x, i32 range(i32 0, 33) %y) {
+; CHECK-LABEL: @fshl_i32_shl_with_range_ignored(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[Y:%.*]], 31
+; CHECK-NEXT:    [[RES:%.*]] = shl i32 [[X:%.*]], [[TMP0]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %res = call i32 @llvm.fshl.i32(i32 %x, i32 0, i32 %y)
+  ret i32 %res
+}

@dtcxzyw dtcxzyw changed the title [InstCombine] Convert fshl(x, 0, y) to shl(x, y) or shl(X, and(Y, BitWidth - 1)) [InstCombine] Convert fshl(x, 0, y) to shl(x, y) or shl(X, urem(Y, BitWidth)) Jan 10, 2025
@dtcxzyw dtcxzyw changed the title [InstCombine] Convert fshl(x, 0, y) to shl(x, y) or shl(X, urem(Y, BitWidth)) [InstCombine] Convert fshl(x, 0, y) to shl(x, and(y, BitWidth - 1)) when BitWidth is pow2 Jan 10, 2025
Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM. Please update the PR description.

ret i32 %res
}

define <2 x i16> @fshl_vi16_shl(<2 x i16> %x, <2 x i16> %y) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
define <2 x i16> @fshl_vi16_shl(<2 x i16> %x, <2 x i16> %y) {
define <2 x i16> @fshl_vi16_shl(<2 x i16> %x, <2 x i16> %y) {

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All comments addressed, Thank you

; CHECK-NEXT: ret <2 x i16> [[RES]]
;
entry:
%res = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %x, <2 x i16> <i16 0, i16 0>, <2 x i16> %y)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
%res = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %x, <2 x i16> <i16 0, i16 0>, <2 x i16> %y)
%res = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %x, <2 x i16> zeroinitializer, <2 x i16> %y)

; CHECK-NEXT: ret <2 x i31> [[RES]]
;
entry:
%res = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> <i31 0, i31 0>, <2 x i31> %y)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
%res = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> <i31 0, i31 0>, <2 x i31> %y)
%res = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> zeroinitializer, <2 x i31> %y)

@AmrDeveloper AmrDeveloper force-pushed the inst_combine_fshl_x_0_y branch from 345258d to f155039 Compare January 11, 2025 08:16
Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please add an alive2 proof to the PR description?


// fshl(X, 0, Y) --> shl(X, and(Y, BitWidth - 1)) if bitwidth is a
// power-of-2
if (isPowerOf2_32(BitWidth) && match(Op1, m_ZeroInt())) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aren't we missing a check that this is fshl rather than fshr? Can you please add a test using fshr?

Copy link
Member Author

@AmrDeveloper AmrDeveloper Jan 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But we already have an assert that it's fshl

assert(IID == Intrinsic::fshl &&
"All funnel shifts by simple constants should go left");
// fshl(X, 0, C) --> shl X, C

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is only for constant shift amount.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, you are right, i will add a check and update tests

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

@AmrDeveloper AmrDeveloper force-pushed the inst_combine_fshl_x_0_y branch from f155039 to af22b55 Compare January 11, 2025 09:19
@AmrDeveloper AmrDeveloper merged commit 642e493 into llvm:main Jan 11, 2025
8 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented Jan 11, 2025

LLVM Buildbot has detected a new failure on builder llvm-clang-x86_64-expensive-checks-ubuntu running on as-builder-4 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/187/builds/3679

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
Input file: /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/test/tools/llvm-gsymutil/ARM_AArch64/Output/macho-merged-funcs-dwarf.yaml.tmp.dSYM
Output file (aarch64): /home/buildbot/worker/as-builder-4/ramdisk/expensive-checks/build/test/tools/llvm-gsymutil/ARM_AArch64/Output/macho-merged-funcs-dwarf.yaml.tmp.default.gSYM
Loaded 3 functions from DWARF.
Loaded 3 functions from symbol table.
warning: same address range contains different debug info. Removing:
[0x0000000000000248 - 0x0000000000000270): Name=0x00000047
addr=0x0000000000000248, file=  3, line=  5
addr=0x0000000000000254, file=  3, line=  7
addr=0x0000000000000258, file=  3, line=  9
addr=0x000000000000025c, file=  3, line=  8
addr=0x0000000000000260, file=  3, line= 11
addr=0x0000000000000264, file=  3, line= 10
addr=0x0000000000000268, file=  3, line=  6


In favor of this one:
[0x0000000000000248 - 0x0000000000000270): Name=0x00000030
addr=0x0000000000000248, file=  2, line=  5
addr=0x0000000000000254, file=  2, line=  7
addr=0x0000000000000258, file=  2, line=  9
addr=0x000000000000025c, file=  2, line=  8
addr=0x0000000000000260, file=  2, line= 11
addr=0x0000000000000264, file=  2, line= 10
addr=0x0000000000000268, file=  2, line=  6


warning: same address range contains different debug info. Removing:
[0x0000000000000248 - 0x0000000000000270): Name=0x00000030
addr=0x0000000000000248, file=  2, line=  5
addr=0x0000000000000254, file=  2, line=  7
addr=0x0000000000000258, file=  2, line=  9
addr=0x000000000000025c, file=  2, line=  8
addr=0x0000000000000260, file=  2, line= 11
addr=0x0000000000000264, file=  2, line= 10
addr=0x0000000000000268, file=  2, line=  6


In favor of this one:
[0x0000000000000248 - 0x0000000000000270): Name=0x00000001
addr=0x0000000000000248, file=  1, line=  5
addr=0x0000000000000254, file=  1, line=  7
addr=0x0000000000000258, file=  1, line=  9
addr=0x000000000000025c, file=  1, line=  8
addr=0x0000000000000260, file=  1, line= 11
addr=0x0000000000000264, file=  1, line= 10
...

BaiXilin pushed a commit to BaiXilin/llvm-project that referenced this pull request Jan 12, 2025
…hen BitWidth is pow2 (llvm#122362)

Convert `fshl(x, 0, y)` to `shl(x, and(y, BitWidth - 1))` when BitWidth
is pow2

Alive2 proof: https://alive2.llvm.org/ce/z/3oTEop
Fixes: llvm#122235
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms
Projects
None yet
Development

Successfully merging this pull request may close these issues.

[InstCombine] fshl(x, 0, y) with in-range y not converted to shl x, y
6 participants