diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle
index cabbffc34348..405caffac681 100644
--- a/cranelift/codegen/src/isa/x64/inst.isle
+++ b/cranelift/codegen/src/isa/x64/inst.isle
@@ -2738,7 +2738,7 @@
 (rule (mul_hi ty signed src1 src2)
       (let ((dst_lo WritableGpr (temp_writable_gpr))
             (dst_hi WritableGpr (temp_writable_gpr))
-            (size OperandSize (operand_size_of_type_32_64 ty))
+            (size OperandSize (raw_operand_size_of_type ty))
             (_ Unit (emit (MInst.MulHi size
                                        signed
                                        src1
@@ -3587,6 +3587,25 @@
 (rule (bitcast_gpr_to_xmm $I64 src)
       (gpr_to_xmm (SseOpcode.Movq) src (OperandSize.Size64)))
 
+;;;; Stack Addresses ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(decl stack_addr_impl (StackSlot Offset32) Gpr)
+(rule (stack_addr_impl stack_slot offset)
+      (let ((dst WritableGpr (temp_writable_gpr))
+           (_ Unit (emit (abi_stackslot_addr dst stack_slot offset))))
+        dst))
+
+;;;; Division/Remainders ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(decl emit_div_or_rem (DivOrRemKind Type WritableGpr Gpr Gpr) Unit)
+(extern constructor emit_div_or_rem emit_div_or_rem)
+
+(decl div_or_rem (DivOrRemKind Value Value) Gpr)
+(rule (div_or_rem kind a @ (value_type ty) b)
+      (let ((dst WritableGpr (temp_writable_gpr))
+            (_ Unit (emit_div_or_rem kind ty dst a b)))
+        dst))
+
 ;;;; Automatic conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (convert Gpr InstOutput output_gpr)
diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs
index eddfc6cd948b..f364747a917e 100644
--- a/cranelift/codegen/src/isa/x64/inst/args.rs
+++ b/cranelift/codegen/src/isa/x64/inst/args.rs
@@ -1575,7 +1575,7 @@ impl fmt::Display for ShiftKind {
 }
 
 /// What kind of division or remainer instruction this is?
-#[derive(Clone)]
+#[derive(Clone, Eq, PartialEq)]
 pub enum DivOrRemKind {
     SignedDiv,
     UnsignedDiv,
diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
index b9a3a94ffbcd..ee01f41a1346 100644
--- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
@@ -49,6 +49,23 @@ impl Inst {
             dst: WritableXmm::from_writable_reg(dst).unwrap(),
         }
     }
+
+    fn mul_hi(size: OperandSize, signed: bool, rhs: RegMem) -> Inst {
+        debug_assert!(size.is_one_of(&[
+            OperandSize::Size16,
+            OperandSize::Size32,
+            OperandSize::Size64
+        ]));
+        rhs.assert_regclass_is(RegClass::Int);
+        Inst::MulHi {
+            size,
+            signed,
+            src1: Gpr::new(regs::rax()).unwrap(),
+            src2: GprMem::new(rhs).unwrap(),
+            dst_lo: WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()),
+            dst_hi: WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()),
+        }
+    }
 }
 
 #[test]
diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs
index a7f221c0261c..83cca60a32f6 100644
--- a/cranelift/codegen/src/isa/x64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/x64/inst/mod.rs
@@ -208,23 +208,6 @@ impl Inst {
         }
     }
 
-    pub(crate) fn mul_hi(size: OperandSize, signed: bool, rhs: RegMem) -> Inst {
-        debug_assert!(size.is_one_of(&[
-            OperandSize::Size16,
-            OperandSize::Size32,
-            OperandSize::Size64
-        ]));
-        rhs.assert_regclass_is(RegClass::Int);
-        Inst::MulHi {
-            size,
-            signed,
-            src1: Gpr::new(regs::rax()).unwrap(),
-            src2: GprMem::new(rhs).unwrap(),
-            dst_lo: WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()),
-            dst_hi: WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()),
-        }
-    }
-
     pub(crate) fn checked_div_or_rem_seq(
         kind: DivOrRemKind,
         size: OperandSize,
diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle
index 10cdd7fd861a..98fffdb53ff2 100644
--- a/cranelift/codegen/src/isa/x64/lower.isle
+++ b/cranelift/codegen/src/isa/x64/lower.isle
@@ -3426,3 +3426,62 @@
 
 (rule (lower (has_type (use_sse41) (trunc a @ (value_type $F64X2))))
       (x64_roundpd a (RoundImm.RoundZero)))
+
+;; Rules for `stack_addr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (stack_addr stack_slot offset))
+      (stack_addr_impl stack_slot offset))
+
+;; Rules for `udiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (udiv a @ (value_type ty) b))
+      (div_or_rem (DivOrRemKind.UnsignedDiv) a b))
+
+;; Rules for `sdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (sdiv a @ (value_type ty) b))
+      (div_or_rem (DivOrRemKind.SignedDiv) a b))
+
+;; Rules for `urem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (urem a @ (value_type ty) b))
+      (div_or_rem (DivOrRemKind.UnsignedRem) a b))
+
+;; Rules for `srem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (srem a @ (value_type ty) b))
+      (div_or_rem (DivOrRemKind.SignedRem) a b))
+
+;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (umulhi a @ (value_type $I16) b))
+      (let ((res ValueRegs (mul_hi $I16 $false a b))
+            (hi Gpr (value_regs_get_gpr res 1)))
+        hi))
+
+(rule (lower (umulhi a @ (value_type $I32) b))
+      (let ((res ValueRegs (mul_hi $I32 $false a b))
+            (hi Gpr (value_regs_get_gpr res 1)))
+        hi))
+
+(rule (lower (umulhi a @ (value_type $I64) b))
+      (let ((res ValueRegs (mul_hi $I64 $false a b))
+            (hi Gpr (value_regs_get_gpr res 1)))
+        hi))
+
+;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (smulhi a @ (value_type $I16) b))
+      (let ((res ValueRegs (mul_hi $I16 $true a b))
+            (hi Gpr (value_regs_get_gpr res 1)))
+        hi))
+
+(rule (lower (smulhi a @ (value_type $I32) b))
+      (let ((res ValueRegs (mul_hi $I32 $true a b))
+            (hi Gpr (value_regs_get_gpr res 1)))
+        hi))
+
+(rule (lower (smulhi a @ (value_type $I64) b))
+      (let ((res ValueRegs (mul_hi $I64 $true a b))
+            (hi Gpr (value_regs_get_gpr res 1)))
+        hi))
diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs
index 3fab7062a491..c074521c48cd 100644
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -14,7 +14,6 @@ use crate::machinst::*;
 use crate::result::CodegenResult;
 use crate::settings::{Flags, TlsModel};
 use smallvec::SmallVec;
-use std::convert::TryFrom;
 use target_lexicon::Triple;
 
 //=============================================================================
@@ -574,150 +573,19 @@ fn lower_insn_to_regs(
         | Opcode::Ceil
         | Opcode::Floor
         | Opcode::Nearest
-        | Opcode::Trunc => {
+        | Opcode::Trunc
+        | Opcode::StackAddr
+        | Opcode::Udiv
+        | Opcode::Urem
+        | Opcode::Sdiv
+        | Opcode::Srem
+        | Opcode::Umulhi
+        | Opcode::Smulhi => {
             implemented_in_isle(ctx);
         }
 
         Opcode::DynamicStackAddr => unimplemented!("DynamicStackAddr"),
 
-        Opcode::StackAddr => {
-            let (stack_slot, offset) = match *ctx.data(insn) {
-                InstructionData::StackLoad {
-                    opcode: Opcode::StackAddr,
-                    stack_slot,
-                    offset,
-                } => (stack_slot, offset),
-                _ => unreachable!(),
-            };
-            let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-            let offset: i32 = offset.into();
-            let inst =
-                ctx.abi()
-                    .sized_stackslot_addr(stack_slot, u32::try_from(offset).unwrap(), dst);
-            ctx.emit(inst);
-        }
-
-        Opcode::Udiv | Opcode::Urem | Opcode::Sdiv | Opcode::Srem => {
-            let kind = match op {
-                Opcode::Udiv => DivOrRemKind::UnsignedDiv,
-                Opcode::Sdiv => DivOrRemKind::SignedDiv,
-                Opcode::Urem => DivOrRemKind::UnsignedRem,
-                Opcode::Srem => DivOrRemKind::SignedRem,
-                _ => unreachable!(),
-            };
-            let is_div = kind.is_div();
-
-            let input_ty = ctx.input_ty(insn, 0);
-            let size = OperandSize::from_ty(input_ty);
-
-            let dividend = put_input_in_reg(ctx, inputs[0]);
-            let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-
-            ctx.emit(Inst::gen_move(
-                Writable::from_reg(regs::rax()),
-                dividend,
-                input_ty,
-            ));
-
-            // Always do explicit checks for `srem`: otherwise, INT_MIN % -1 is not handled properly.
-            if flags.avoid_div_traps() || op == Opcode::Srem {
-                // A vcode meta-instruction is used to lower the inline checks, since they embed
-                // pc-relative offsets that must not change, thus requiring regalloc to not
-                // interfere by introducing spills and reloads.
-                //
-                // Note it keeps the result in $rax (for divide) or $rdx (for rem), so that
-                // regalloc is aware of the coalescing opportunity between rax/rdx and the
-                // destination register.
-                let divisor = put_input_in_reg(ctx, inputs[1]);
-
-                let divisor_copy = ctx.alloc_tmp(types::I64).only_reg().unwrap();
-                ctx.emit(Inst::gen_move(divisor_copy, divisor, types::I64));
-
-                let tmp = if op == Opcode::Sdiv && size == OperandSize::Size64 {
-                    Some(ctx.alloc_tmp(types::I64).only_reg().unwrap())
-                } else {
-                    None
-                };
-                // TODO use xor
-                ctx.emit(Inst::imm(
-                    OperandSize::Size32,
-                    0,
-                    Writable::from_reg(regs::rdx()),
-                ));
-                ctx.emit(Inst::checked_div_or_rem_seq(kind, size, divisor_copy, tmp));
-            } else {
-                // We don't want more than one trap record for a single instruction,
-                // so let's not allow the "mem" case (load-op merging) here; force
-                // divisor into a register instead.
-                let divisor = RegMem::reg(put_input_in_reg(ctx, inputs[1]));
-
-                // Fill in the high parts:
-                if kind.is_signed() {
-                    // sign-extend the sign-bit of al into ah for size 1, or rax into rdx, for
-                    // signed opcodes.
-                    ctx.emit(Inst::sign_extend_data(size));
-                } else if input_ty == types::I8 {
-                    ctx.emit(Inst::movzx_rm_r(
-                        ExtMode::BL,
-                        RegMem::reg(regs::rax()),
-                        Writable::from_reg(regs::rax()),
-                    ));
-                } else {
-                    // zero for unsigned opcodes.
-                    ctx.emit(Inst::imm(
-                        OperandSize::Size64,
-                        0,
-                        Writable::from_reg(regs::rdx()),
-                    ));
-                }
-
-                // Emit the actual idiv.
-                ctx.emit(Inst::div(size, kind.is_signed(), divisor));
-            }
-
-            // Move the result back into the destination reg.
-            if is_div {
-                // The quotient is in rax.
-                ctx.emit(Inst::gen_move(dst, regs::rax(), input_ty));
-            } else {
-                if size == OperandSize::Size8 {
-                    // The remainder is in AH. Right-shift by 8 bits then move from rax.
-                    ctx.emit(Inst::shift_r(
-                        OperandSize::Size64,
-                        ShiftKind::ShiftRightLogical,
-                        Some(8),
-                        Writable::from_reg(regs::rax()),
-                    ));
-                    ctx.emit(Inst::gen_move(dst, regs::rax(), input_ty));
-                } else {
-                    // The remainder is in rdx.
-                    ctx.emit(Inst::gen_move(dst, regs::rdx(), input_ty));
-                }
-            }
-        }
-
-        Opcode::Umulhi | Opcode::Smulhi => {
-            let input_ty = ctx.input_ty(insn, 0);
-
-            let lhs = put_input_in_reg(ctx, inputs[0]);
-            let rhs = input_to_reg_mem(ctx, inputs[1]);
-            let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-
-            // Move lhs in %rax.
-            ctx.emit(Inst::gen_move(
-                Writable::from_reg(regs::rax()),
-                lhs,
-                input_ty,
-            ));
-
-            // Emit the actual mul or imul.
-            let signed = op == Opcode::Smulhi;
-            ctx.emit(Inst::mul_hi(OperandSize::from_ty(input_ty), signed, rhs));
-
-            // Read the result from the high part (stored in %rdx).
-            ctx.emit(Inst::gen_move(dst, regs::rdx(), input_ty));
-        }
-
         Opcode::GetPinnedReg => {
             let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
             ctx.emit(Inst::gen_move(dst, regs::pinned_reg(), types::I64));
diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs
index 4d0dfea4a4fd..504fd18bf411 100644
--- a/cranelift/codegen/src/isa/x64/lower/isle.rs
+++ b/cranelift/codegen/src/isa/x64/lower/isle.rs
@@ -848,6 +848,108 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
         self.lower_ctx
             .use_constant(VCodeConstantData::WellKnown(&UMAX_MASK))
     }
+
+    fn emit_div_or_rem(
+        &mut self,
+        kind: &DivOrRemKind,
+        ty: Type,
+        dst: WritableGpr,
+        dividend: Gpr,
+        divisor: Gpr,
+    ) {
+        let is_div = kind.is_div();
+        let size = OperandSize::from_ty(ty);
+
+        self.lower_ctx.emit(MInst::gen_move(
+            Writable::from_reg(regs::rax()),
+            dividend.to_reg(),
+            ty,
+        ));
+
+        // Always do explicit checks for `srem`: otherwise, INT_MIN % -1 is not handled properly.
+        if self.flags.avoid_div_traps() || *kind == DivOrRemKind::SignedRem {
+            // A vcode meta-instruction is used to lower the inline checks, since they embed
+            // pc-relative offsets that must not change, thus requiring regalloc to not
+            // interfere by introducing spills and reloads.
+            //
+            // Note it keeps the result in $rax (for divide) or $rdx (for rem), so that
+            // regalloc is aware of the coalescing opportunity between rax/rdx and the
+            // destination register.
+            let divisor_copy = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
+            self.lower_ctx
+                .emit(MInst::gen_move(divisor_copy, divisor.to_reg(), types::I64));
+
+            let tmp = if *kind == DivOrRemKind::SignedDiv && size == OperandSize::Size64 {
+                Some(self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap())
+            } else {
+                None
+            };
+            // TODO use xor
+            self.lower_ctx.emit(MInst::imm(
+                OperandSize::Size32,
+                0,
+                Writable::from_reg(regs::rdx()),
+            ));
+            self.lower_ctx.emit(MInst::checked_div_or_rem_seq(
+                kind.clone(),
+                size,
+                divisor_copy,
+                tmp,
+            ));
+        } else {
+            // We don't want more than one trap record for a single instruction,
+            // so let's not allow the "mem" case (load-op merging) here; force
+            // divisor into a register instead.
+            let divisor = RegMem::reg(divisor.to_reg());
+
+            // Fill in the high parts:
+            if kind.is_signed() {
+                // sign-extend the sign-bit of al into ah for size 1, or rax into rdx, for
+                // signed opcodes.
+                self.lower_ctx.emit(MInst::sign_extend_data(size));
+            } else if ty == types::I8 {
+                self.lower_ctx.emit(MInst::movzx_rm_r(
+                    ExtMode::BL,
+                    RegMem::reg(regs::rax()),
+                    Writable::from_reg(regs::rax()),
+                ));
+            } else {
+                // zero for unsigned opcodes.
+                self.lower_ctx.emit(MInst::imm(
+                    OperandSize::Size64,
+                    0,
+                    Writable::from_reg(regs::rdx()),
+                ));
+            }
+
+            // Emit the actual idiv.
+            self.lower_ctx
+                .emit(MInst::div(size, kind.is_signed(), divisor));
+        }
+
+        // Move the result back into the destination reg.
+        if is_div {
+            // The quotient is in rax.
+            self.lower_ctx
+                .emit(MInst::gen_move(dst.to_writable_reg(), regs::rax(), ty));
+        } else {
+            if size == OperandSize::Size8 {
+                // The remainder is in AH. Right-shift by 8 bits then move from rax.
+                self.lower_ctx.emit(MInst::shift_r(
+                    OperandSize::Size64,
+                    ShiftKind::ShiftRightLogical,
+                    Some(8),
+                    Writable::from_reg(regs::rax()),
+                ));
+                self.lower_ctx
+                    .emit(MInst::gen_move(dst.to_writable_reg(), regs::rax(), ty));
+            } else {
+                // The remainder is in rdx.
+                self.lower_ctx
+                    .emit(MInst::gen_move(dst.to_writable_reg(), regs::rdx(), ty));
+            }
+        }
+    }
 }
 
 impl IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
diff --git a/cranelift/filetests/filetests/isa/x64/sdiv.clif b/cranelift/filetests/filetests/isa/x64/sdiv.clif
new file mode 100644
index 000000000000..c0f486c71f1c
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/x64/sdiv.clif
@@ -0,0 +1,67 @@
+test compile precise-output
+target x86_64
+
+function %f1(i8, i8) -> i8 {
+block0(v0: i8, v1: i8):
+  v2 = sdiv v0, v1
+  return v2
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   cbw %al, %dl
+;   idiv    %al, (none), %sil, %al, %dl
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+
+function %f2(i16, i16) -> i16 {
+block0(v0: i16, v1: i16):
+  v2 = sdiv v0, v1
+  return v2
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   cwd %ax, %dx
+;   idiv    %ax, %dx, %si, %ax, %dx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+
+function %f3(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+  v2 = sdiv v0, v1
+  return v2
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   cdq %eax, %edx
+;   idiv    %eax, %edx, %esi, %eax, %edx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+
+function %f4(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = sdiv v0, v1
+  return v2
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   cqo %rax, %rdx
+;   idiv    %rax, %rdx, %rsi, %rax, %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/x64/smulhi.clif b/cranelift/filetests/filetests/isa/x64/smulhi.clif
new file mode 100644
index 000000000000..0958ce301b62
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/x64/smulhi.clif
@@ -0,0 +1,51 @@
+test compile precise-output
+target x86_64
+
+function %f1(i16, i16) -> i16 {
+block0(v0: i16, v1: i16):
+  v2 = smulhi v0, v1
+  return v2
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   imul    %ax, %si, %ax, %dx
+;   movq    %rdx, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+
+function %f2(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+  v2 = smulhi v0, v1
+  return v2
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   imul    %eax, %esi, %eax, %edx
+;   movq    %rdx, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+
+function %f3(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = smulhi v0, v1
+  return v2
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   imul    %rax, %rsi, %rax, %rdx
+;   movq    %rdx, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/x64/srem.clif b/cranelift/filetests/filetests/isa/x64/srem.clif
new file mode 100644
index 000000000000..99b137d56665
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/x64/srem.clif
@@ -0,0 +1,71 @@
+test compile precise-output
+target x86_64
+
+function %f1(i8, i8) -> i8 {
+block0(v0: i8, v1: i8):
+  v2 = srem v0, v1
+  return v2
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   movl    $0, %edx
+;   srem_seq %al, %dl, %sil, %al, %dl, tmp=(none)
+;   shrq    $8, %rax, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+
+function %f2(i16, i16) -> i16 {
+block0(v0: i16, v1: i16):
+  v2 = srem v0, v1
+  return v2
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   movl    $0, %edx
+;   srem_seq %ax, %dx, %si, %ax, %dx, tmp=(none)
+;   movq    %rdx, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+
+function %f3(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+  v2 = srem v0, v1
+  return v2
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   movl    $0, %edx
+;   srem_seq %eax, %edx, %esi, %eax, %edx, tmp=(none)
+;   movq    %rdx, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+
+function %f4(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = srem v0, v1
+  return v2
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   movl    $0, %edx
+;   srem_seq %rax, %rdx, %rsi, %rax, %rdx, tmp=(none)
+;   movq    %rdx, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/x64/udiv.clif b/cranelift/filetests/filetests/isa/x64/udiv.clif
new file mode 100644
index 000000000000..a49b5a027ef5
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/x64/udiv.clif
@@ -0,0 +1,67 @@
+test compile precise-output
+target x86_64
+
+function %f1(i8, i8) -> i8 {
+block0(v0: i8, v1: i8):
+  v2 = udiv v0, v1
+  return v2
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   movzbl  %al, %eax
+;   div     %al, (none), %sil, %al, %dl
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+
+function %f2(i16, i16) -> i16 {
+block0(v0: i16, v1: i16):
+  v2 = udiv v0, v1
+  return v2
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   movl    $0, %edx
+;   div     %ax, %dx, %si, %ax, %dx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+
+function %f3(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+  v2 = udiv v0, v1
+  return v2
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   movl    $0, %edx
+;   div     %eax, %edx, %esi, %eax, %edx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+
+function %f4(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = udiv v0, v1
+  return v2
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   movl    $0, %edx
+;   div     %rax, %rdx, %rsi, %rax, %rdx
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/x64/umulhi.clif b/cranelift/filetests/filetests/isa/x64/umulhi.clif
new file mode 100644
index 000000000000..c5b0b73a26ac
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/x64/umulhi.clif
@@ -0,0 +1,51 @@
+test compile precise-output
+target x86_64
+
+function %f1(i16, i16) -> i16 {
+block0(v0: i16, v1: i16):
+  v2 = umulhi v0, v1
+  return v2
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   mul     %ax, %si, %ax, %dx
+;   movq    %rdx, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+
+function %f2(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+  v2 = umulhi v0, v1
+  return v2
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   mul     %eax, %esi, %eax, %edx
+;   movq    %rdx, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+
+function %f3(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = umulhi v0, v1
+  return v2
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   mul     %rax, %rsi, %rax, %rdx
+;   movq    %rdx, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/x64/urem.clif b/cranelift/filetests/filetests/isa/x64/urem.clif
new file mode 100644
index 000000000000..5f4e80251f76
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/x64/urem.clif
@@ -0,0 +1,71 @@
+test compile precise-output
+target x86_64
+
+function %f1(i8, i8) -> i8 {
+block0(v0: i8, v1: i8):
+  v2 = urem v0, v1
+  return v2
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   movzbl  %al, %eax
+;   div     %al, (none), %sil, %al, %dl
+;   shrq    $8, %rax, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+
+function %f2(i16, i16) -> i16 {
+block0(v0: i16, v1: i16):
+  v2 = urem v0, v1
+  return v2
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   movl    $0, %edx
+;   div     %ax, %dx, %si, %ax, %dx
+;   movq    %rdx, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+
+function %f3(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+  v2 = urem v0, v1
+  return v2
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   movl    $0, %edx
+;   div     %eax, %edx, %esi, %eax, %edx
+;   movq    %rdx, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+
+function %f4(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+  v2 = urem v0, v1
+  return v2
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rdi, %rax
+;   movl    $0, %edx
+;   div     %rax, %rdx, %rsi, %rax, %rdx
+;   movq    %rdx, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+