diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index cabbffc34348..405caffac681 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -2738,7 +2738,7 @@ (rule (mul_hi ty signed src1 src2) (let ((dst_lo WritableGpr (temp_writable_gpr)) (dst_hi WritableGpr (temp_writable_gpr)) - (size OperandSize (operand_size_of_type_32_64 ty)) + (size OperandSize (raw_operand_size_of_type ty)) (_ Unit (emit (MInst.MulHi size signed src1 @@ -3587,6 +3587,25 @@ (rule (bitcast_gpr_to_xmm $I64 src) (gpr_to_xmm (SseOpcode.Movq) src (OperandSize.Size64))) +;;;; Stack Addresses ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl stack_addr_impl (StackSlot Offset32) Gpr) +(rule (stack_addr_impl stack_slot offset) + (let ((dst WritableGpr (temp_writable_gpr)) + (_ Unit (emit (abi_stackslot_addr dst stack_slot offset)))) + dst)) + +;;;; Division/Remainders ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl emit_div_or_rem (DivOrRemKind Type WritableGpr Gpr Gpr) Unit) +(extern constructor emit_div_or_rem emit_div_or_rem) + +(decl div_or_rem (DivOrRemKind Value Value) Gpr) +(rule (div_or_rem kind a @ (value_type ty) b) + (let ((dst WritableGpr (temp_writable_gpr)) + (_ Unit (emit_div_or_rem kind ty dst a b))) + dst)) + ;;;; Automatic conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (convert Gpr InstOutput output_gpr) diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index eddfc6cd948b..f364747a917e 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -1575,7 +1575,7 @@ impl fmt::Display for ShiftKind { } /// What kind of division or remainer instruction this is? -#[derive(Clone)] +#[derive(Clone, Eq, PartialEq)] pub enum DivOrRemKind { SignedDiv, UnsignedDiv, diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index b9a3a94ffbcd..ee01f41a1346 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -49,6 +49,23 @@ impl Inst { dst: WritableXmm::from_writable_reg(dst).unwrap(), } } + + fn mul_hi(size: OperandSize, signed: bool, rhs: RegMem) -> Inst { + debug_assert!(size.is_one_of(&[ + OperandSize::Size16, + OperandSize::Size32, + OperandSize::Size64 + ])); + rhs.assert_regclass_is(RegClass::Int); + Inst::MulHi { + size, + signed, + src1: Gpr::new(regs::rax()).unwrap(), + src2: GprMem::new(rhs).unwrap(), + dst_lo: WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()), + dst_hi: WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()), + } + } } #[test] diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index a7f221c0261c..83cca60a32f6 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -208,23 +208,6 @@ impl Inst { } } - pub(crate) fn mul_hi(size: OperandSize, signed: bool, rhs: RegMem) -> Inst { - debug_assert!(size.is_one_of(&[ - OperandSize::Size16, - OperandSize::Size32, - OperandSize::Size64 - ])); - rhs.assert_regclass_is(RegClass::Int); - Inst::MulHi { - size, - signed, - src1: Gpr::new(regs::rax()).unwrap(), - src2: GprMem::new(rhs).unwrap(), - dst_lo: WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()), - dst_hi: WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()), - } - } - pub(crate) fn checked_div_or_rem_seq( kind: DivOrRemKind, size: OperandSize, diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 10cdd7fd861a..98fffdb53ff2 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -3426,3 +3426,62 @@ (rule (lower (has_type (use_sse41) (trunc a @ (value_type $F64X2)))) (x64_roundpd a (RoundImm.RoundZero))) + +;; Rules for `stack_addr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (stack_addr stack_slot offset)) + (stack_addr_impl stack_slot offset)) + +;; Rules for `udiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (udiv a @ (value_type ty) b)) + (div_or_rem (DivOrRemKind.UnsignedDiv) a b)) + +;; Rules for `sdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (sdiv a @ (value_type ty) b)) + (div_or_rem (DivOrRemKind.SignedDiv) a b)) + +;; Rules for `urem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (urem a @ (value_type ty) b)) + (div_or_rem (DivOrRemKind.UnsignedRem) a b)) + +;; Rules for `srem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (srem a @ (value_type ty) b)) + (div_or_rem (DivOrRemKind.SignedRem) a b)) + +;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (umulhi a @ (value_type $I16) b)) + (let ((res ValueRegs (mul_hi $I16 $false a b)) + (hi Gpr (value_regs_get_gpr res 1))) + hi)) + +(rule (lower (umulhi a @ (value_type $I32) b)) + (let ((res ValueRegs (mul_hi $I32 $false a b)) + (hi Gpr (value_regs_get_gpr res 1))) + hi)) + +(rule (lower (umulhi a @ (value_type $I64) b)) + (let ((res ValueRegs (mul_hi $I64 $false a b)) + (hi Gpr (value_regs_get_gpr res 1))) + hi)) + +;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (smulhi a @ (value_type $I16) b)) + (let ((res ValueRegs (mul_hi $I16 $true a b)) + (hi Gpr (value_regs_get_gpr res 1))) + hi)) + +(rule (lower (smulhi a @ (value_type $I32) b)) + (let ((res ValueRegs (mul_hi $I32 $true a b)) + (hi Gpr (value_regs_get_gpr res 1))) + hi)) + +(rule (lower (smulhi a @ (value_type $I64) b)) + (let ((res ValueRegs (mul_hi $I64 $true a b)) + (hi Gpr (value_regs_get_gpr res 1))) + hi)) diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 3fab7062a491..c074521c48cd 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -14,7 +14,6 @@ use crate::machinst::*; use crate::result::CodegenResult; use crate::settings::{Flags, TlsModel}; use smallvec::SmallVec; -use std::convert::TryFrom; use target_lexicon::Triple; //============================================================================= @@ -574,150 +573,19 @@ fn lower_insn_to_regs( | Opcode::Ceil | Opcode::Floor | Opcode::Nearest - | Opcode::Trunc => { + | Opcode::Trunc + | Opcode::StackAddr + | Opcode::Udiv + | Opcode::Urem + | Opcode::Sdiv + | Opcode::Srem + | Opcode::Umulhi + | Opcode::Smulhi => { implemented_in_isle(ctx); } Opcode::DynamicStackAddr => unimplemented!("DynamicStackAddr"), - Opcode::StackAddr => { - let (stack_slot, offset) = match *ctx.data(insn) { - InstructionData::StackLoad { - opcode: Opcode::StackAddr, - stack_slot, - offset, - } => (stack_slot, offset), - _ => unreachable!(), - }; - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - let offset: i32 = offset.into(); - let inst = - ctx.abi() - .sized_stackslot_addr(stack_slot, u32::try_from(offset).unwrap(), dst); - ctx.emit(inst); - } - - Opcode::Udiv | Opcode::Urem | Opcode::Sdiv | Opcode::Srem => { - let kind = match op { - Opcode::Udiv => DivOrRemKind::UnsignedDiv, - Opcode::Sdiv => DivOrRemKind::SignedDiv, - Opcode::Urem => DivOrRemKind::UnsignedRem, - Opcode::Srem => DivOrRemKind::SignedRem, - _ => unreachable!(), - }; - let is_div = kind.is_div(); - - let input_ty = ctx.input_ty(insn, 0); - let size = OperandSize::from_ty(input_ty); - - let dividend = put_input_in_reg(ctx, inputs[0]); - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - - ctx.emit(Inst::gen_move( - Writable::from_reg(regs::rax()), - dividend, - input_ty, - )); - - // Always do explicit checks for `srem`: otherwise, INT_MIN % -1 is not handled properly. - if flags.avoid_div_traps() || op == Opcode::Srem { - // A vcode meta-instruction is used to lower the inline checks, since they embed - // pc-relative offsets that must not change, thus requiring regalloc to not - // interfere by introducing spills and reloads. - // - // Note it keeps the result in $rax (for divide) or $rdx (for rem), so that - // regalloc is aware of the coalescing opportunity between rax/rdx and the - // destination register. - let divisor = put_input_in_reg(ctx, inputs[1]); - - let divisor_copy = ctx.alloc_tmp(types::I64).only_reg().unwrap(); - ctx.emit(Inst::gen_move(divisor_copy, divisor, types::I64)); - - let tmp = if op == Opcode::Sdiv && size == OperandSize::Size64 { - Some(ctx.alloc_tmp(types::I64).only_reg().unwrap()) - } else { - None - }; - // TODO use xor - ctx.emit(Inst::imm( - OperandSize::Size32, - 0, - Writable::from_reg(regs::rdx()), - )); - ctx.emit(Inst::checked_div_or_rem_seq(kind, size, divisor_copy, tmp)); - } else { - // We don't want more than one trap record for a single instruction, - // so let's not allow the "mem" case (load-op merging) here; force - // divisor into a register instead. - let divisor = RegMem::reg(put_input_in_reg(ctx, inputs[1])); - - // Fill in the high parts: - if kind.is_signed() { - // sign-extend the sign-bit of al into ah for size 1, or rax into rdx, for - // signed opcodes. - ctx.emit(Inst::sign_extend_data(size)); - } else if input_ty == types::I8 { - ctx.emit(Inst::movzx_rm_r( - ExtMode::BL, - RegMem::reg(regs::rax()), - Writable::from_reg(regs::rax()), - )); - } else { - // zero for unsigned opcodes. - ctx.emit(Inst::imm( - OperandSize::Size64, - 0, - Writable::from_reg(regs::rdx()), - )); - } - - // Emit the actual idiv. - ctx.emit(Inst::div(size, kind.is_signed(), divisor)); - } - - // Move the result back into the destination reg. - if is_div { - // The quotient is in rax. - ctx.emit(Inst::gen_move(dst, regs::rax(), input_ty)); - } else { - if size == OperandSize::Size8 { - // The remainder is in AH. Right-shift by 8 bits then move from rax. - ctx.emit(Inst::shift_r( - OperandSize::Size64, - ShiftKind::ShiftRightLogical, - Some(8), - Writable::from_reg(regs::rax()), - )); - ctx.emit(Inst::gen_move(dst, regs::rax(), input_ty)); - } else { - // The remainder is in rdx. - ctx.emit(Inst::gen_move(dst, regs::rdx(), input_ty)); - } - } - } - - Opcode::Umulhi | Opcode::Smulhi => { - let input_ty = ctx.input_ty(insn, 0); - - let lhs = put_input_in_reg(ctx, inputs[0]); - let rhs = input_to_reg_mem(ctx, inputs[1]); - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - - // Move lhs in %rax. - ctx.emit(Inst::gen_move( - Writable::from_reg(regs::rax()), - lhs, - input_ty, - )); - - // Emit the actual mul or imul. - let signed = op == Opcode::Smulhi; - ctx.emit(Inst::mul_hi(OperandSize::from_ty(input_ty), signed, rhs)); - - // Read the result from the high part (stored in %rdx). - ctx.emit(Inst::gen_move(dst, regs::rdx(), input_ty)); - } - Opcode::GetPinnedReg => { let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); ctx.emit(Inst::gen_move(dst, regs::pinned_reg(), types::I64)); diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index 4d0dfea4a4fd..504fd18bf411 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -848,6 +848,108 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { self.lower_ctx .use_constant(VCodeConstantData::WellKnown(&UMAX_MASK)) } + + fn emit_div_or_rem( + &mut self, + kind: &DivOrRemKind, + ty: Type, + dst: WritableGpr, + dividend: Gpr, + divisor: Gpr, + ) { + let is_div = kind.is_div(); + let size = OperandSize::from_ty(ty); + + self.lower_ctx.emit(MInst::gen_move( + Writable::from_reg(regs::rax()), + dividend.to_reg(), + ty, + )); + + // Always do explicit checks for `srem`: otherwise, INT_MIN % -1 is not handled properly. + if self.flags.avoid_div_traps() || *kind == DivOrRemKind::SignedRem { + // A vcode meta-instruction is used to lower the inline checks, since they embed + // pc-relative offsets that must not change, thus requiring regalloc to not + // interfere by introducing spills and reloads. + // + // Note it keeps the result in $rax (for divide) or $rdx (for rem), so that + // regalloc is aware of the coalescing opportunity between rax/rdx and the + // destination register. + let divisor_copy = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap(); + self.lower_ctx + .emit(MInst::gen_move(divisor_copy, divisor.to_reg(), types::I64)); + + let tmp = if *kind == DivOrRemKind::SignedDiv && size == OperandSize::Size64 { + Some(self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap()) + } else { + None + }; + // TODO use xor + self.lower_ctx.emit(MInst::imm( + OperandSize::Size32, + 0, + Writable::from_reg(regs::rdx()), + )); + self.lower_ctx.emit(MInst::checked_div_or_rem_seq( + kind.clone(), + size, + divisor_copy, + tmp, + )); + } else { + // We don't want more than one trap record for a single instruction, + // so let's not allow the "mem" case (load-op merging) here; force + // divisor into a register instead. + let divisor = RegMem::reg(divisor.to_reg()); + + // Fill in the high parts: + if kind.is_signed() { + // sign-extend the sign-bit of al into ah for size 1, or rax into rdx, for + // signed opcodes. + self.lower_ctx.emit(MInst::sign_extend_data(size)); + } else if ty == types::I8 { + self.lower_ctx.emit(MInst::movzx_rm_r( + ExtMode::BL, + RegMem::reg(regs::rax()), + Writable::from_reg(regs::rax()), + )); + } else { + // zero for unsigned opcodes. + self.lower_ctx.emit(MInst::imm( + OperandSize::Size64, + 0, + Writable::from_reg(regs::rdx()), + )); + } + + // Emit the actual idiv. + self.lower_ctx + .emit(MInst::div(size, kind.is_signed(), divisor)); + } + + // Move the result back into the destination reg. + if is_div { + // The quotient is in rax. + self.lower_ctx + .emit(MInst::gen_move(dst.to_writable_reg(), regs::rax(), ty)); + } else { + if size == OperandSize::Size8 { + // The remainder is in AH. Right-shift by 8 bits then move from rax. + self.lower_ctx.emit(MInst::shift_r( + OperandSize::Size64, + ShiftKind::ShiftRightLogical, + Some(8), + Writable::from_reg(regs::rax()), + )); + self.lower_ctx + .emit(MInst::gen_move(dst.to_writable_reg(), regs::rax(), ty)); + } else { + // The remainder is in rdx. + self.lower_ctx + .emit(MInst::gen_move(dst.to_writable_reg(), regs::rdx(), ty)); + } + } + } } impl IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { diff --git a/cranelift/filetests/filetests/isa/x64/sdiv.clif b/cranelift/filetests/filetests/isa/x64/sdiv.clif new file mode 100644 index 000000000000..c0f486c71f1c --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/sdiv.clif @@ -0,0 +1,67 @@ +test compile precise-output +target x86_64 + +function %f1(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = sdiv v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; cbw %al, %dl +; idiv %al, (none), %sil, %al, %dl +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f2(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = sdiv v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; cwd %ax, %dx +; idiv %ax, %dx, %si, %ax, %dx +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f3(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = sdiv v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; cdq %eax, %edx +; idiv %eax, %edx, %esi, %eax, %edx +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f4(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = sdiv v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; cqo %rax, %rdx +; idiv %rax, %rdx, %rsi, %rax, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret + diff --git a/cranelift/filetests/filetests/isa/x64/smulhi.clif b/cranelift/filetests/filetests/isa/x64/smulhi.clif new file mode 100644 index 000000000000..0958ce301b62 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/smulhi.clif @@ -0,0 +1,51 @@ +test compile precise-output +target x86_64 + +function %f1(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = smulhi v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; imul %ax, %si, %ax, %dx +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f2(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = smulhi v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; imul %eax, %esi, %eax, %edx +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f3(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = smulhi v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; imul %rax, %rsi, %rax, %rdx +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + diff --git a/cranelift/filetests/filetests/isa/x64/srem.clif b/cranelift/filetests/filetests/isa/x64/srem.clif new file mode 100644 index 000000000000..99b137d56665 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/srem.clif @@ -0,0 +1,71 @@ +test compile precise-output +target x86_64 + +function %f1(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = srem v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; movl $0, %edx +; srem_seq %al, %dl, %sil, %al, %dl, tmp=(none) +; shrq $8, %rax, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f2(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = srem v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; movl $0, %edx +; srem_seq %ax, %dx, %si, %ax, %dx, tmp=(none) +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f3(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = srem v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; movl $0, %edx +; srem_seq %eax, %edx, %esi, %eax, %edx, tmp=(none) +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f4(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = srem v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; movl $0, %edx +; srem_seq %rax, %rdx, %rsi, %rax, %rdx, tmp=(none) +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + diff --git a/cranelift/filetests/filetests/isa/x64/udiv.clif b/cranelift/filetests/filetests/isa/x64/udiv.clif new file mode 100644 index 000000000000..a49b5a027ef5 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/udiv.clif @@ -0,0 +1,67 @@ +test compile precise-output +target x86_64 + +function %f1(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = udiv v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; movzbl %al, %eax +; div %al, (none), %sil, %al, %dl +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f2(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = udiv v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; movl $0, %edx +; div %ax, %dx, %si, %ax, %dx +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f3(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = udiv v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; movl $0, %edx +; div %eax, %edx, %esi, %eax, %edx +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f4(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = udiv v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; movl $0, %edx +; div %rax, %rdx, %rsi, %rax, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret + diff --git a/cranelift/filetests/filetests/isa/x64/umulhi.clif b/cranelift/filetests/filetests/isa/x64/umulhi.clif new file mode 100644 index 000000000000..c5b0b73a26ac --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/umulhi.clif @@ -0,0 +1,51 @@ +test compile precise-output +target x86_64 + +function %f1(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = umulhi v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; mul %ax, %si, %ax, %dx +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f2(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = umulhi v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; mul %eax, %esi, %eax, %edx +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f3(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = umulhi v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; mul %rax, %rsi, %rax, %rdx +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + diff --git a/cranelift/filetests/filetests/isa/x64/urem.clif b/cranelift/filetests/filetests/isa/x64/urem.clif new file mode 100644 index 000000000000..5f4e80251f76 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/urem.clif @@ -0,0 +1,71 @@ +test compile precise-output +target x86_64 + +function %f1(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = urem v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; movzbl %al, %eax +; div %al, (none), %sil, %al, %dl +; shrq $8, %rax, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f2(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = urem v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; movl $0, %edx +; div %ax, %dx, %si, %ax, %dx +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f3(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = urem v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; movl $0, %edx +; div %eax, %edx, %esi, %eax, %edx +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f4(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = urem v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; movl $0, %edx +; div %rax, %rdx, %rsi, %rax, %rdx +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +