@@ -871,25 +871,177 @@ void GenSpecificPattern::visitSDiv(llvm::BinaryOperator& I)
871
871
}
872
872
}
873
873
874
+ /*
875
+ Optimizes bit reversing pattern:
876
+
877
+ %and = shl i32 %0, 1
878
+ %shl = and i32 %and, 0xAAAAAAAA
879
+ %and2 = lshr i32 %0, 1
880
+ %shr = and i32 %and2, 0x55555555
881
+ %or = or i32 %shl, %shr
882
+ %and3 = shl i32 %or, 2
883
+ %shl4 = and i32 %and3, 0xCCCCCCCC
884
+ %and5 = lshr i32 %or, 2
885
+ %shr6 = and i32 %and5, 0x33333333
886
+ %or7 = or i32 %shl4, %shr6
887
+ %and8 = shl i32 %or7, 4
888
+ %shl9 = and i32 %and8, 0xF0F0F0F0
889
+ %and10 = lshr i32 %or7, 4
890
+ %shr11 = and i32 %and10, 0x0F0F0F0F
891
+ %or12 = or i32 %shl9, %shr11
892
+ %and13 = shl i32 %or12, 8
893
+ %shl14 = and i32 %and13, 0xFF00FF00
894
+ %and15 = lshr i32 %or12, 8
895
+ %shr16 = and i32 %and15, 0x00FF00FF
896
+ %or17 = or i32 %shl14, %shr16
897
+ %shl19 = shl i32 %or17, 16
898
+ %shr21 = lshr i32 %or17, 16
899
+ %or22 = or i32 %shl19, %shr21
900
+
901
+ into:
902
+
903
+ %or22 = call i32 @llvm.genx.GenISA.bfrev.i32(i32 %0)
904
+
905
+ And similarly for patterns reversing 16 and 64 bit type values.
906
+ */
907
+ template <typename MaskType>
908
+ void GenSpecificPattern::matchReverse (BinaryOperator &I)
909
+ {
910
+ using namespace llvm ::PatternMatch;
911
+ assert (I.getType ()->isIntegerTy ());
912
+ Value *nextOrShl, *nextOrShr;
913
+ uint64_t currentShiftShl = 0 , currentShiftShr = 0 ;
914
+ uint64_t currentMaskShl = 0 , currentMaskShr = 0 ;
915
+ auto patternBfrevFirst =
916
+ m_Or (
917
+ m_Shl (m_Value (nextOrShl), m_ConstantInt (currentShiftShl)),
918
+ m_LShr (m_Value (nextOrShr), m_ConstantInt (currentShiftShr)));
919
+
920
+ auto patternBfrev =
921
+ m_Or (
922
+ m_And (
923
+ m_Shl (m_Value (nextOrShl), m_ConstantInt (currentShiftShl)),
924
+ m_ConstantInt (currentMaskShl)),
925
+ m_And (
926
+ m_LShr (m_Value (nextOrShr), m_ConstantInt (currentShiftShr)),
927
+ m_ConstantInt (currentMaskShr)));
928
+
929
+ unsigned int bitWidth = std::numeric_limits<MaskType>::digits;
930
+ assert (bitWidth == 16 || bitWidth == 32 || bitWidth == 64 );
931
+
932
+ unsigned int currentShift = bitWidth / 2 ;
933
+ // First mask is a value with all upper half bits present.
934
+ MaskType mask = std::numeric_limits<MaskType>::max () << currentShift;
935
+
936
+ bool isBfrevMatchFound = false ;
937
+ nextOrShl = &I;
938
+ if (match (nextOrShl, patternBfrevFirst) &&
939
+ nextOrShl == nextOrShr &&
940
+ currentShiftShl == currentShift &&
941
+ currentShiftShr == currentShift)
942
+ {
943
+ // NextOrShl is assigned to next one by match().
944
+ currentShift /= 2 ;
945
+ // Constructing next mask to match.
946
+ mask ^= mask >> currentShift;
947
+ }
948
+
949
+ while (currentShift > 0 )
950
+ {
951
+ if (match (nextOrShl, patternBfrev) &&
952
+ nextOrShl == nextOrShr &&
953
+ currentShiftShl == currentShift &&
954
+ currentShiftShr == currentShift &&
955
+ currentMaskShl == mask &&
956
+ currentMaskShr == (MaskType)~mask)
957
+ {
958
+ // NextOrShl is assigned to next one by match().
959
+ if (currentShift == 1 )
960
+ {
961
+ isBfrevMatchFound = true ;
962
+ break ;
963
+ }
964
+
965
+ currentShift /= 2 ;
966
+ // Constructing next mask to match.
967
+ mask ^= mask >> currentShift;
968
+ }
969
+ else
970
+ {
971
+ break ;
972
+ }
973
+ }
974
+
975
+ if (isBfrevMatchFound)
976
+ {
977
+ llvm::IRBuilder<> builder (&I);
978
+ Function *bfrevFunc = GenISAIntrinsic::getDeclaration (
979
+ I.getParent ()->getParent ()->getParent (), GenISAIntrinsic::GenISA_bfrev, builder.getInt32Ty ());
980
+ if (bitWidth == 16 )
981
+ {
982
+ Value* zext = builder.CreateZExt (nextOrShl, builder.getInt32Ty ());
983
+ Value* bfrev = builder.CreateCall (bfrevFunc, zext);
984
+ Value* lshr = builder.CreateLShr (bfrev, 16 );
985
+ Value* trunc = builder.CreateTrunc (lshr, I.getType ());
986
+ I.replaceAllUsesWith (trunc );
987
+ }
988
+ else if (bitWidth == 32 )
989
+ {
990
+ Value* bfrev = builder.CreateCall (bfrevFunc, nextOrShl);
991
+ I.replaceAllUsesWith (bfrev);
992
+ }
993
+ else
994
+ { // bitWidth == 64
995
+ Value* int32Source = builder.CreateBitCast (nextOrShl, llvm::VectorType::get (builder.getInt32Ty (), 2 ));
996
+ Value* extractElement0 = builder.CreateExtractElement (int32Source, builder.getInt32 (0 ));
997
+ Value* extractElement1 = builder.CreateExtractElement (int32Source, builder.getInt32 (1 ));
998
+ Value* bfrevLow = builder.CreateCall (bfrevFunc, extractElement0);
999
+ Value* bfrevHigh = builder.CreateCall (bfrevFunc, extractElement1);
1000
+ Value* bfrev64Result = llvm::UndefValue::get (int32Source->getType ());
1001
+ bfrev64Result = builder.CreateInsertElement (bfrev64Result, bfrevHigh, builder.getInt32 (0 ));
1002
+ bfrev64Result = builder.CreateInsertElement (bfrev64Result, bfrevLow, builder.getInt32 (1 ));
1003
+ Value* bfrevBitcast = builder.CreateBitCast (bfrev64Result, I.getType ());
1004
+ I.replaceAllUsesWith (bfrevBitcast);
1005
+ }
1006
+ }
1007
+ }
1008
+
874
1009
void GenSpecificPattern::visitBinaryOperator (BinaryOperator &I)
875
1010
{
876
- /*
877
- llvm changes ADD to OR when possible, and this optimization changes it back and allow 2 ADDs to merge.
878
- This can avoid scattered read for constant buffer when the index is calculated by shl + or + add.
1011
+ if (I. getOpcode () == Instruction::Or)
1012
+ {
1013
+ using namespace llvm ::PatternMatch ;
879
1014
880
- ex:
881
- from
1015
+ if (I.getType ()->isIntegerTy ())
1016
+ {
1017
+ unsigned int bitWidth = cast<IntegerType>(I.getType ())->getBitWidth ();
1018
+ switch (bitWidth)
1019
+ {
1020
+ case 16 :
1021
+ matchReverse<unsigned short >(I);
1022
+ break ;
1023
+ case 32 :
1024
+ matchReverse<unsigned int >(I);
1025
+ break ;
1026
+ case 64 :
1027
+ matchReverse<unsigned long long >(I);
1028
+ break ;
1029
+ }
1030
+ }
1031
+
1032
+ /*
1033
+ llvm changes ADD to OR when possible, and this optimization changes it back and allow 2 ADDs to merge.
1034
+ This can avoid scattered read for constant buffer when the index is calculated by shl + or + add.
1035
+
1036
+ ex:
1037
+ from
882
1038
%22 = shl i32 %14, 2
883
1039
%23 = or i32 %22, 3
884
1040
%24 = add i32 %23, 16
885
- to
1041
+ to
886
1042
%22 = shl i32 %14, 2
887
1043
%23 = add i32 %22, 19
888
- */
889
-
890
- if (I.getOpcode () == Instruction::Or)
891
- {
892
- using namespace llvm ::PatternMatch;
1044
+ */
893
1045
Value *AndOp1, *EltOp1;
894
1046
auto pattern1 = m_Or (
895
1047
m_And (m_Value (AndOp1), m_SpecificInt (0xFFFFFFFF )),
0 commit comments