Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions cranelift/codegen/src/isa/s390x/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1189,11 +1189,13 @@
(Add32x4)
(Add64x2)
(Add128)
(Add128Cout)
(Sub8x16)
(Sub16x8)
(Sub32x4)
(Sub64x2)
(Sub128)
(Sub128Cout)
;; Multiplication
(Mul8x16)
(Mul16x8)
Expand Down Expand Up @@ -3842,6 +3844,10 @@
(decl add_reg (Type Reg Reg) Reg)
(rule (add_reg ty x y) (alu_rrr ty (aluop_add ty) x y))

(decl add_reg_with_flags_paired (Type Reg Reg) ProducesFlags)
(rule (add_reg_with_flags_paired ty x y)
(alu_rrr_with_flags_paired ty (aluop_add ty) x y))

(decl add_reg_sext32 (Type Reg Reg) Reg)
(rule (add_reg_sext32 ty x y) (alu_rr ty (aluop_add_sext32 ty) x y))

Expand Down Expand Up @@ -3915,6 +3921,11 @@
(rule (add_logical_mem_zext32_with_flags_paired ty x y)
(alu_rx_with_flags_paired ty (aluop_add_logical_zext32 ty) x y))

(decl vecop_add_logical_cout (Type) VecBinaryOp)
(rule (vecop_add_logical_cout $I128) (VecBinaryOp.Add128Cout))

(decl vec_add_logical_cout (Type Reg Reg) Reg)
(rule (vec_add_logical_cout ty x y) (vec_rrr ty (vecop_add_logical_cout ty) x y))

;; Helpers for generating `sub` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand All @@ -3935,6 +3946,10 @@
(decl sub_reg (Type Reg Reg) Reg)
(rule (sub_reg ty x y) (alu_rrr ty (aluop_sub ty) x y))

(decl sub_reg_with_flags_paired (Type Reg Reg) ProducesFlags)
(rule (sub_reg_with_flags_paired ty x y)
(alu_rrr_with_flags_paired ty (aluop_sub ty) x y))

(decl sub_reg_sext32 (Type Reg Reg) Reg)
(rule (sub_reg_sext32 ty x y) (alu_rr ty (aluop_sub_sext32 ty) x y))

Expand Down Expand Up @@ -3970,6 +3985,10 @@
(decl sub_logical_reg (Type Reg Reg) Reg)
(rule (sub_logical_reg ty x y) (alu_rrr ty (aluop_sub_logical ty) x y))

(decl sub_logical_reg_with_flags_paired (Type Reg Reg) ProducesFlags)
(rule (sub_logical_reg_with_flags_paired ty x y)
(alu_rrr_with_flags_paired ty (aluop_sub_logical ty) x y))

(decl sub_logical_reg_zext32 (Type Reg Reg) Reg)
(rule (sub_logical_reg_zext32 ty x y) (alu_rr ty (aluop_sub_logical_zext32 ty) x y))

Expand All @@ -3982,6 +4001,11 @@
(decl sub_logical_mem_zext32 (Type Reg MemArg) Reg)
(rule (sub_logical_mem_zext32 ty x y) (alu_rx ty (aluop_sub_logical ty) x y))

(decl vecop_sub_logical_cout (Type) VecBinaryOp)
(rule (vecop_sub_logical_cout $I128) (VecBinaryOp.Sub128Cout))

(decl vec_sub_logical_cout (Type Reg Reg) Reg)
(rule (vec_sub_logical_cout ty x y) (vec_rrr ty (vecop_sub_logical_cout ty) x y))

;; Helpers for generating `mul` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand All @@ -4002,6 +4026,10 @@
(decl mul_reg (Type Reg Reg) Reg)
(rule (mul_reg ty x y) (alu_rrr ty (aluop_mul ty) x y))

(decl mul_reg_with_flags_paired (Type Reg Reg) ProducesFlags)
(rule (mul_reg_with_flags_paired ty x y)
(alu_rrr_with_flags_paired ty (aluop_mul ty) x y))

(decl mul_reg_sext32 (Type Reg Reg) Reg)
(rule (mul_reg_sext32 ty x y) (alu_rr ty (aluop_mul_sext32 ty) x y))

Expand Down
6 changes: 4 additions & 2 deletions cranelift/codegen/src/isa/s390x/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1515,8 +1515,8 @@ impl Inst {
ALUOp::Sub64 => (0xb9e9, true), // SGRK
ALUOp::SubLogical32 => (0xb9fb, true), // SLRK
ALUOp::SubLogical64 => (0xb9eb, true), // SLGRK
ALUOp::Mul32 => (0xb9fd, true), // MSRKC
ALUOp::Mul64 => (0xb9ed, true), // MSGRKC
ALUOp::Mul32 => (0xb9fd, false), // MSRKC
ALUOp::Mul64 => (0xb9ed, false), // MSGRKC
ALUOp::And32 => (0xb9f4, true), // NRK
ALUOp::And64 => (0xb9e4, true), // NGRK
ALUOp::Orr32 => (0xb9f6, true), // ORK
Expand Down Expand Up @@ -2719,11 +2719,13 @@ impl Inst {
VecBinaryOp::Add32x4 => (0xe7f3, 2), // VAF
VecBinaryOp::Add64x2 => (0xe7f3, 3), // VAG
VecBinaryOp::Add128 => (0xe7f3, 4), // VAQ
VecBinaryOp::Add128Cout => (0xe7f1, 4), // VACCQ
VecBinaryOp::Sub8x16 => (0xe7f7, 0), // VSB
VecBinaryOp::Sub16x8 => (0xe7f7, 1), // VSH
VecBinaryOp::Sub32x4 => (0xe7f7, 2), // VSF
VecBinaryOp::Sub64x2 => (0xe7f7, 3), // VSG
VecBinaryOp::Sub128 => (0xe7f7, 4), // VSQ
VecBinaryOp::Sub128Cout => (0xe7f5, 4), // VSCBI
VecBinaryOp::Mul8x16 => (0xe7a2, 0), // VMLB
VecBinaryOp::Mul16x8 => (0xe7a2, 1), // VMLHW
VecBinaryOp::Mul32x4 => (0xe7a2, 2), // VMLF
Expand Down
6 changes: 4 additions & 2 deletions cranelift/codegen/src/isa/s390x/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1295,8 +1295,8 @@ impl Inst {
ALUOp::Sub64 => ("sgrk", true),
ALUOp::SubLogical32 => ("slrk", true),
ALUOp::SubLogical64 => ("slgrk", true),
ALUOp::Mul32 => ("msrkc", true),
ALUOp::Mul64 => ("msgrkc", true),
ALUOp::Mul32 => ("msrkc", false),
ALUOp::Mul64 => ("msgrkc", false),
ALUOp::And32 => ("nrk", true),
ALUOp::And64 => ("ngrk", true),
ALUOp::Orr32 => ("ork", true),
Expand Down Expand Up @@ -2511,11 +2511,13 @@ impl Inst {
VecBinaryOp::Add32x4 => "vaf",
VecBinaryOp::Add64x2 => "vag",
VecBinaryOp::Add128 => "vaq",
VecBinaryOp::Add128Cout => "vaccq",
VecBinaryOp::Sub8x16 => "vsb",
VecBinaryOp::Sub16x8 => "vsh",
VecBinaryOp::Sub32x4 => "vsf",
VecBinaryOp::Sub64x2 => "vsg",
VecBinaryOp::Sub128 => "vsq",
VecBinaryOp::Sub128Cout => "vscbiq",
VecBinaryOp::Mul8x16 => "vmlb",
VecBinaryOp::Mul16x8 => "vmlhw",
VecBinaryOp::Mul32x4 => "vmlf",
Expand Down
141 changes: 136 additions & 5 deletions cranelift/codegen/src/isa/s390x/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -4311,12 +4311,143 @@

;;;; Rules for `uadd_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; For 8 & 16 bit widths, there is no instruction set support, so we have to do a
;; wider add and split manually

(decl type_shift_up (Type) u8)
(rule (type_shift_up $I8) 24)
(rule (type_shift_up $I16) 16)

;; For fit-in-16 bit itegers, we shift them into the most significant positions of their
;; 32-bit registers, use the codition codes for the overflow, and shifting back into
;; the expected least-significant position to generate the result
(decl overflow_and_result_from_shifted (Type ALUOp Reg Reg Cond) InstOutput)
(rule (overflow_and_result_from_shifted (fits_in_16 ty) op x y cond)
(let ((x_shifted Reg (lshl_imm $I32 x (type_shift_up ty)))
(y_shifted Reg (lshl_imm $I32 y (type_shift_up ty)))
(producer ProducesFlags (alu_rrr_with_flags_paired ty op x_shifted y_shifted))
(overflow Reg (lower_bool $I8 (bool (produces_flags_ignore producer) cond)))
(out Reg (lshr_imm $I32 (produces_flags_get_reg producer) (type_shift_up ty))))
(output_pair out overflow)))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is more of a cosmetic issue, but I think I'd prefer to keep these types of helpers in inst.isle - see e.g. what we're doing for the ..._sat_reg helpers. Ideally, we'd have a interface between inst.isle and lower.isle that looks more symmetrical across the different types ...


(rule 1 (lower (has_type (fits_in_16 ty) (uadd_overflow x y)))
(overflow_and_result_from_shifted ty (aluop_add_logical $I32) x y (mask_as_cond 3)))

;; Generate the paired overflow result from the generated condition codes
(decl overflow_and_result_from_producer (ProducesFlags Cond) InstOutput)
(rule (overflow_and_result_from_producer producer cond)
(output_pair
(produces_flags_get_reg producer)
(lower_bool $I8 (bool (produces_flags_ignore producer) cond))))

;; For 32 & 64 bit widths, we can convert condition codes to the overflow out byte
(rule 0 (lower (has_type (ty_32_or_64 ty) (uadd_overflow x y)))
(let ((sum Reg (add_reg ty x y))
(overflow Reg
(lower_bool $I8
(bool (icmpu_reg ty sum x) (intcc_as_cond (IntCC.UnsignedLessThan))))))
(output_pair sum overflow)))
(overflow_and_result_from_producer (add_logical_reg_with_flags_paired ty x y) (mask_as_cond 3)))

(rule 2 (lower (has_type $I128 (uadd_overflow x y)))
(output_pair
(vec_add $I128 x y)
(vec_extract_lane $I64X2 (vec_add_logical_cout $I128 x y) 1 (zero_reg))))

;;;; Rules for `usub_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Note: s390x stores and computes the borrow bit as a 0 when an overflow is present
;; so all of the conditons and computed borrows are inverted

(rule 1 (lower (has_type (fits_in_16 ty) (usub_overflow x y)))
(overflow_and_result_from_shifted ty (aluop_sub_logical $I32) x y
(invert_cond (mask_as_cond 3))))

(rule 0 (lower (has_type (ty_32_or_64 ty) (usub_overflow x y)))
(overflow_and_result_from_producer
(sub_logical_reg_with_flags_paired ty x y)
(invert_cond (mask_as_cond 3))))

(rule 2 (lower (has_type $I128 (usub_overflow x y)))
(output_pair
(vec_sub $I128 x y)
(xor_uimm32shifted $I8
(vec_extract_lane $I64X2 (vec_sub_logical_cout $I128 x y) 1 (zero_reg))
(uimm32shifted 1 0))))

;;;; Rules for `sadd_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule 1 (lower (has_type (fits_in_16 ty) (sadd_overflow x y)))
(overflow_and_result_from_shifted ty (aluop_add $I32) x y (mask_as_cond 1)))

(rule 0 (lower (has_type (ty_32_or_64 ty) (sadd_overflow x y)))
(overflow_and_result_from_producer (add_reg_with_flags_paired ty x y) (mask_as_cond 1)))

(rule 2 (lower (has_type $I128 (sadd_overflow x y)))
(let ((res Reg (vec_add $I128 x y))
(res_hi Reg (vec_extract_lane $I64X2 res 0 (zero_reg)))
(x_hi Reg (vec_extract_lane $I64X2 x 0 (zero_reg)))
(y_hi Reg (vec_extract_lane $I64X2 y 0 (zero_reg)))
(of_in_sign Reg
(and_reg $I64
(xor_reg $I64 x_hi res_hi)
(xor_reg $I64 y_hi res_hi))))
(output_pair res (lshr_imm $I64 of_in_sign 63))))

;;;; Rules for `ssub_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule 1 (lower (has_type (fits_in_16 ty) (ssub_overflow x y)))
(overflow_and_result_from_shifted ty (aluop_sub $I32) x y (mask_as_cond 1)))

;; Use flags generated by the add instruction to handle overflow
(rule 0 (lower (has_type (ty_32_or_64 ty) (ssub_overflow x y)))
(overflow_and_result_from_producer (sub_reg_with_flags_paired ty x y) (mask_as_cond 1)))

(rule 2 (lower (has_type $I128 (ssub_overflow x y)))
(let ((res Reg (vec_sub $I128 x y))
(res_hi Reg (vec_extract_lane $I64X2 res 0 (zero_reg)))
(x_hi Reg (vec_extract_lane $I64X2 x 0 (zero_reg)))
(y_hi Reg (vec_extract_lane $I64X2 y 0 (zero_reg)))
(of_in_sign Reg
(and_reg $I64
(xor_reg $I64 x_hi res_hi)
(xor_reg $I64 x_hi y_hi))))
(output_pair res (lshr_imm $I64 of_in_sign 63))))

;;;; Rules for `umul_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule 1 (lower (has_type (fits_in_32 ty) (umul_overflow x y)))
(let ((result Reg (mul_reg $I64 (zext64_reg ty y) (zext64_reg ty x)))
(of Reg (lower_bool $I8 (invert_bool (bool
(icmps_simm16 $I32 (lshr_imm $I64 result (ty_bits ty)) 0)
(intcc_as_cond (IntCC.Equal)))))))
(output_pair result of)))

(rule 0 (lower (has_type ty @ $I64 (umul_overflow x y)))
(let ((mul_out RegPair (umul_wide y x))
(result Reg (regpair_lo mul_out))
(of Reg (lower_bool $I8 (invert_bool (bool
(icmps_simm16 ty (regpair_hi mul_out) 0)
(intcc_as_cond (IntCC.Equal)))))))
(output_pair result of)))

;;;; Rules for `smul_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; For fit-in-16 bit integers, we shift just the rhs into the most
;; significant positions of its 32-bit register, use the codition
;; codes for the overflow, and shifting back into the expected
;; least-significant position to generate the result.
(rule 1 (lower (has_type (fits_in_16 ty) (smul_overflow x y)))
(let ((y_ext Reg (sext32_reg ty y))
(x_shifted Reg (lshl_imm $I32 x (type_shift_up ty)))
(producer ProducesFlags
(mul_reg_with_flags_paired $I32 x_shifted y_ext))
(overflow Reg (lower_bool $I8 (bool
(produces_flags_ignore producer)
(mask_as_cond 1))))
(out Reg (lshr_imm $I32
(produces_flags_get_reg producer)
(type_shift_up ty))))
(output_pair out overflow)))

;; Use flags generated by the add instruction to handle overflow
(rule 0 (lower (has_type (ty_32_or_64 ty) (smul_overflow x y)))
(overflow_and_result_from_producer (mul_reg_with_flags_paired ty x y) (mask_as_cond 1)))

;;;; Rules for `return` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down
28 changes: 14 additions & 14 deletions cranelift/filetests/filetests/isa/s390x/arithmetic.clif
Original file line number Diff line number Diff line change
Expand Up @@ -956,8 +956,8 @@ block0(v0: i128, v1: i128):
; vlgvg %r12, %v3, 1
; lgr %r3, %r5
; mlgr %r2, %r12
; msgr %r5, %r10
; msgr %r4, %r12
; msgrkc %r5, %r5, %r10
; msgrkc %r4, %r4, %r12
; agrk %r2, %r5, %r2
; agrk %r2, %r4, %r2
; vlvgp %v25, %r2, %r3
Expand All @@ -979,8 +979,8 @@ block0(v0: i128, v1: i128):
; vlgvg %r12, %v3, 1
; lgr %r3, %r5
; mlgr %r2, %r12
; msgr %r5, %r10
; msgr %r4, %r12
; msgrkc %r5, %r5, %r10
; msgrkc %r4, %r4, %r12
; agrk %r2, %r5, %r2
; agrk %r2, %r4, %r2
; vlvgp %v25, %r2, %r3
Expand All @@ -997,12 +997,12 @@ block0(v0: i64, v1: i64):

; VCode:
; block0:
; msgr %r2, %r3
; msgrkc %r2, %r2, %r3
; br %r14
;
; Disassembled:
; block0: ; offset 0x0
; msgr %r2, %r3
; msgrkc %r2, %r2, %r3
; br %r14

function %imul_i64_imm16(i64) -> i64 {
Expand Down Expand Up @@ -1098,12 +1098,12 @@ block0(v0: i32, v1: i32):

; VCode:
; block0:
; msr %r2, %r3
; msrkc %r2, %r2, %r3
; br %r14
;
; Disassembled:
; block0: ; offset 0x0
; msr %r2, %r3
; msrkc %r2, %r2, %r3
; br %r14

function %imul_i32_imm16(i32) -> i32 {
Expand Down Expand Up @@ -1216,12 +1216,12 @@ block0(v0: i16, v1: i16):

; VCode:
; block0:
; msr %r2, %r3
; msrkc %r2, %r2, %r3
; br %r14
;
; Disassembled:
; block0: ; offset 0x0
; msr %r2, %r3
; msrkc %r2, %r2, %r3
; br %r14

function %imul_i16_imm(i16) -> i16 {
Expand Down Expand Up @@ -1266,12 +1266,12 @@ block0(v0: i8, v1: i8):

; VCode:
; block0:
; msr %r2, %r3
; msrkc %r2, %r2, %r3
; br %r14
;
; Disassembled:
; block0: ; offset 0x0
; msr %r2, %r3
; msrkc %r2, %r2, %r3
; br %r14

function %imul_i8_imm(i8) -> i8 {
Expand Down Expand Up @@ -1301,13 +1301,13 @@ block0(v0: i8, v1: i64):
; VCode:
; block0:
; llc %r4, 0(%r3)
; msr %r2, %r4
; msrkc %r2, %r2, %r4
; br %r14
;
; Disassembled:
; block0: ; offset 0x0
; llc %r4, 0(%r3) ; trap: heap_oob
; msr %r2, %r4
; msrkc %r2, %r2, %r4
; br %r14

function %umulhi_i64(i64, i64) -> i64 {
Expand Down
Loading
Loading