diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index ba037d7cfd..9f3025309a 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -957,13 +957,10 @@ (result Reg (a64_sdiv $I64 valid_x64 y64))) result)) -;; Helper for extracting an immediate that's not 0 and not -1 from an imm64. -(decl safe_divisor_from_imm64 (u64) Imm64) -(extern extractor safe_divisor_from_imm64 safe_divisor_from_imm64) - ;; Special case for `sdiv` where no checks are needed due to division by a ;; constant meaning the checks are always passed. -(rule 1 (lower (has_type (fits_in_64 ty) (sdiv x (iconst (safe_divisor_from_imm64 y))))) +(rule 1 (lower (has_type (fits_in_64 ty) (sdiv x (iconst imm)))) + (if-let y (safe_divisor_from_imm64 ty imm)) (a64_sdiv $I64 (put_in_reg_sext64 x) (imm ty (ImmExtend.Sign) y))) ;; Helper for placing a `Value` into a `Reg` and validating that it's nonzero. diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle.rs b/cranelift/codegen/src/isa/aarch64/lower/isle.rs index 883cb41a3f..a3fe4eb02b 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs @@ -392,13 +392,6 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> { writable_zero_reg() } - fn safe_divisor_from_imm64(&mut self, val: Imm64) -> Option { - match val.bits() { - 0 | -1 => None, - n => Some(n as u64), - } - } - fn shift_mask(&mut self, ty: Type) -> ImmLogic { debug_assert!(ty.lane_bits().is_power_of_two()); diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 9b1b6ad8b2..faf5bc0452 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -58,14 +58,23 @@ (dst WritableGpr)) ;; Integer quotient and remainder: (div idiv) $rax $rdx (reg addr) - (Div (size OperandSize) ;; 1, 2, 4, or 8 - (signed bool) + ;; + ;; Note that this isn't used for 8-bit division which has its own `Div8` + ;; instruction. + (Div (size OperandSize) ;; 2, 4, or 8 + (sign DivSignedness) (divisor GprMem) (dividend_lo Gpr) (dividend_hi Gpr) (dst_quotient WritableGpr) (dst_remainder WritableGpr)) + ;; Same as `Div`, but for 8-bits where the regalloc behavior is different + (Div8 (sign DivSignedness) + (divisor GprMem) + (dividend Gpr) + (dst WritableGpr)) + ;; The high (and low) bits of a (un)signed multiply: `RDX:RAX := RAX * ;; rhs`. (MulHi (size OperandSize) @@ -75,19 +84,47 @@ (dst_lo WritableGpr) (dst_hi WritableGpr)) - ;; A synthetic sequence to implement the right inline checks for - ;; remainder and division, assuming the dividend is in %rax. + ;; A synthetic instruction sequence used as part of the lowering of the + ;; `srem` instruction which returns 0 if the divisor is -1 and + ;; otherwise executes an `idiv` instruction. ;; - ;; The generated code sequence is described in the emit's function match - ;; arm for this instruction. - (CheckedDivOrRemSeq (kind DivOrRemKind) - (size OperandSize) - (dividend_lo Gpr) - (dividend_hi Gpr) - (divisor Gpr) - (dst_quotient WritableGpr) - (dst_remainder WritableGpr) - (tmp OptionWritableGpr)) + ;; Note that this does not check for 0 as that's expected to be done + ;; separately. Also note that 8-bit types don't use this and use + ;; `CheckedSRemSeq8` instead. + (CheckedSRemSeq (size OperandSize) + (dividend_lo Gpr) + (dividend_hi Gpr) + (divisor Gpr) + (dst_quotient WritableGpr) + (dst_remainder WritableGpr)) + + ;; Same as above but for 8-bit types. + (CheckedSRemSeq8 (dividend Gpr) + (divisor Gpr) + (dst WritableGpr)) + + ;; Validates that the `divisor` can be safely divided into the + ;; `dividend`. + ;; + ;; This is a separate pseudo-instruction because it has some jumps in + ;; ways that can't be modeled otherwise with instructions right now. This + ;; will trap if the `divisor` is zero or if it's -1 and `dividend` is + ;; INT_MIN for the associated type. + ;; + ;; Note that 64-bit types must use `ValidateSdivDivisor64`. + (ValidateSdivDivisor (size OperandSize) + (dividend Gpr) + (divisor Gpr)) + + ;; Same as `ValidateSdivDivisor` but for 64-bit types. + ;; + ;; This is a distinct instruction because the emission in `emit.rs` + ;; requires a temporary register to load an immediate into, hence the + ;; `tmp` field in this instruction not present in the non-64-bit one. + (ValidateSdivDivisor64 (dividend Gpr) + (divisor Gpr) + (tmp WritableGpr)) + ;; Do a sign-extend based on the sign of the value in rax into rdx: (cwd ;; cdq cqo) or al into ah: (cbw) @@ -628,6 +665,10 @@ Size32 Size64)) +(type DivSignedness + (enum Signed + Unsigned)) + (type FenceKind extern (enum MFence LFence @@ -690,12 +731,6 @@ Tzcnt Popcnt)) -(type DivOrRemKind extern - (enum SignedDiv - UnsignedDiv - SignedRem - UnsignedRem)) - (type SseOpcode extern (enum Addps Addpd @@ -4521,15 +4556,70 @@ ;;;; Division/Remainders ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(decl emit_div_or_rem (DivOrRemKind Type WritableGpr Gpr Gpr) Unit) -(extern constructor emit_div_or_rem emit_div_or_rem) +;; Helper for creating `CheckedSRemSeq` instructions. +(decl x64_checked_srem_seq (OperandSize Gpr Gpr Gpr) ValueRegs) +(rule (x64_checked_srem_seq size dividend_lo dividend_hi divisor) + (let ((dst_quotient WritableGpr (temp_writable_gpr)) + (dst_remainder WritableGpr (temp_writable_gpr)) + (_ Unit (emit (MInst.CheckedSRemSeq size dividend_lo dividend_hi divisor dst_quotient dst_remainder)))) + (value_regs dst_quotient dst_remainder))) -(decl div_or_rem (DivOrRemKind Value Value) Gpr) -(rule (div_or_rem kind a @ (value_type ty) b) +(decl x64_checked_srem_seq8 (Gpr Gpr) Gpr) +(rule (x64_checked_srem_seq8 dividend divisor) (let ((dst WritableGpr (temp_writable_gpr)) - (_ Unit (emit_div_or_rem kind ty dst a b))) + (_ Unit (emit (MInst.CheckedSRemSeq8 dividend divisor dst)))) dst)) +;; Helper for creating `Div8` instructions +(decl x64_div8 (Gpr GprMem DivSignedness) Gpr) +(rule (x64_div8 dividend divisor sign) + (let ((dst WritableGpr (temp_writable_gpr)) + (_ Unit (emit (MInst.Div8 sign divisor dividend dst)))) + dst)) + +;; Helper for creating `Div` instructions +;; +;; Two registers are returned through `ValueRegs` where the first is the +;; quotient and the second is the remainder. +(decl x64_div (Gpr Gpr GprMem OperandSize DivSignedness) ValueRegs) +(rule (x64_div dividend_lo dividend_hi divisor size sign) + (let ((dst_quotient WritableGpr (temp_writable_gpr)) + (dst_remainder WritableGpr (temp_writable_gpr)) + (_ Unit (emit (MInst.Div size sign divisor dividend_lo dividend_hi dst_quotient dst_remainder)))) + (value_regs dst_quotient dst_remainder))) + +;; Helper for `Div`, returning the quotient and discarding the remainder. +(decl x64_div_quotient (Gpr Gpr GprMem OperandSize DivSignedness) ValueRegs) +(rule (x64_div_quotient dividend_lo dividend_hi divisor size sign) + (value_regs_get (x64_div dividend_lo dividend_hi divisor size sign) 0)) + +;; Helper for `Div`, returning the remainder and discarding the quotient. +(decl x64_div_remainder (Gpr Gpr GprMem OperandSize DivSignedness) ValueRegs) +(rule (x64_div_remainder dividend_lo dividend_hi divisor size sign) + (value_regs_get (x64_div dividend_lo dividend_hi divisor size sign) 1)) + +;; Helper for creating `SignExtendData` instructions +(decl x64_sign_extend_data (Gpr OperandSize) Gpr) +(rule (x64_sign_extend_data src size) + (let ((dst WritableGpr (temp_writable_gpr)) + (_ Unit (emit (MInst.SignExtendData size src dst)))) + dst)) + +;; Helper for creating `ValidateSdivDivisor` instructions. +(decl validate_sdiv_divisor (OperandSize Gpr Gpr) Gpr) +(rule (validate_sdiv_divisor size dividend divisor) + (let ((_ Unit (emit (MInst.ValidateSdivDivisor size dividend divisor)))) + divisor)) + +;; Helper for creating `ValidateSdivDivisor64` instructions. +(decl validate_sdiv_divisor64 (Gpr Gpr) Gpr) +(rule (validate_sdiv_divisor64 dividend divisor) + (let ( + (tmp WritableGpr (temp_writable_gpr)) + (_ Unit (emit (MInst.ValidateSdivDivisor64 dividend divisor tmp))) + ) + divisor)) + ;;;; Pinned Register ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (decl read_pinned_gpr () Gpr) diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 069c38f316..024313c45f 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -12,6 +12,8 @@ use smallvec::{smallvec, SmallVec}; use std::fmt; use std::string::String; +pub use crate::isa::x64::lower::isle::generated_code::DivSignedness; + /// An extenstion trait for converting `Writable{Xmm,Gpr}` to `Writable`. pub trait ToWritableReg { /// Convert `Writable{Xmm,Gpr}` to `Writable`. @@ -1878,35 +1880,6 @@ impl fmt::Display for ShiftKind { } } -/// What kind of division or remainder instruction this is? -#[derive(Clone, Eq, PartialEq)] -pub enum DivOrRemKind { - /// Signed division. - SignedDiv, - /// Unsigned division. - UnsignedDiv, - /// Signed remainder. - SignedRem, - /// Unsigned remainder. - UnsignedRem, -} - -impl DivOrRemKind { - pub(crate) fn is_signed(&self) -> bool { - match self { - DivOrRemKind::SignedDiv | DivOrRemKind::SignedRem => true, - _ => false, - } - } - - pub(crate) fn is_div(&self) -> bool { - match self { - DivOrRemKind::SignedDiv | DivOrRemKind::UnsignedDiv => true, - _ => false, - } - } -} - /// These indicate condition code tests. Not all are represented since not all are useful in /// compiler-generated code. #[derive(Copy, Clone)] diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index a6da7d867c..4e409979dd 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -399,25 +399,36 @@ pub(crate) fn emit( emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, enc_src, rex_flags) } - Inst::Div { - size, - signed, - dividend_lo, - dividend_hi, - divisor, - dst_quotient, - dst_remainder, - } => { - let dividend_lo = allocs.next(dividend_lo.to_reg()); - let dst_quotient = allocs.next(dst_quotient.to_reg().to_reg()); - debug_assert_eq!(dividend_lo, regs::rax()); - debug_assert_eq!(dst_quotient, regs::rax()); - if size.to_bits() > 8 { - let dst_remainder = allocs.next(dst_remainder.to_reg().to_reg()); - debug_assert_eq!(dst_remainder, regs::rdx()); - let dividend_hi = allocs.next(dividend_hi.to_reg()); - debug_assert_eq!(dividend_hi, regs::rdx()); - } + Inst::Div { sign, divisor, .. } | Inst::Div8 { sign, divisor, .. } => { + let divisor = divisor.clone().to_reg_mem().with_allocs(allocs); + let size = match inst { + Inst::Div { + size, + dividend_lo, + dividend_hi, + dst_quotient, + dst_remainder, + .. + } => { + let dividend_lo = allocs.next(dividend_lo.to_reg()); + let dividend_hi = allocs.next(dividend_hi.to_reg()); + let dst_quotient = allocs.next(dst_quotient.to_reg().to_reg()); + let dst_remainder = allocs.next(dst_remainder.to_reg().to_reg()); + debug_assert_eq!(dividend_lo, regs::rax()); + debug_assert_eq!(dividend_hi, regs::rdx()); + debug_assert_eq!(dst_quotient, regs::rax()); + debug_assert_eq!(dst_remainder, regs::rdx()); + *size + } + Inst::Div8 { dividend, dst, .. } => { + let dividend = allocs.next(dividend.to_reg()); + let dst = allocs.next(dst.to_reg().to_reg()); + debug_assert_eq!(dividend, regs::rax()); + debug_assert_eq!(dst, regs::rax()); + OperandSize::Size8 + } + _ => unreachable!(), + }; let (opcode, prefix) = match size { OperandSize::Size8 => (0xF6, LegacyPrefixes::None), @@ -428,10 +439,12 @@ pub(crate) fn emit( sink.add_trap(TrapCode::IntegerDivisionByZero); - let subopcode = if *signed { 7 } else { 6 }; - match divisor.clone().to_reg_mem() { + let subopcode = match sign { + DivSignedness::Signed => 7, + DivSignedness::Unsigned => 6, + }; + match divisor { RegMem::Reg { reg } => { - let reg = allocs.next(reg); let src = int_reg_enc(reg); emit_std_enc_enc( sink, @@ -440,11 +453,11 @@ pub(crate) fn emit( 1, subopcode, src, - RexFlags::from((*size, reg)), + RexFlags::from((size, reg)), ) } RegMem::Mem { addr: src } => { - let amode = src.finalize(state, sink).with_allocs(allocs); + let amode = src.finalize(state, sink); emit_std_enc_mem( sink, prefix, @@ -452,7 +465,7 @@ pub(crate) fn emit( 1, subopcode, &amode, - RexFlags::from(*size), + RexFlags::from(size), 0, ); } @@ -522,164 +535,149 @@ pub(crate) fn emit( } } - Inst::CheckedDivOrRemSeq { - kind, - size, - dividend_lo, - dividend_hi, - divisor, - tmp, - dst_quotient, - dst_remainder, - } => { - let dividend_lo = allocs.next(dividend_lo.to_reg()); - let dividend_hi = allocs.next(dividend_hi.to_reg()); + Inst::CheckedSRemSeq { divisor, .. } | Inst::CheckedSRemSeq8 { divisor, .. } => { let divisor = allocs.next(divisor.to_reg()); - let dst_quotient = allocs.next(dst_quotient.to_reg().to_reg()); - let dst_remainder = allocs.next(dst_remainder.to_reg().to_reg()); - let tmp = tmp.map(|tmp| allocs.next(tmp.to_reg().to_reg())); - debug_assert_eq!(dividend_lo, regs::rax()); - debug_assert_eq!(dividend_hi, regs::rdx()); - debug_assert_eq!(dst_quotient, regs::rax()); - debug_assert_eq!(dst_remainder, regs::rdx()); + + // Validate that the register constraints of the dividend and the + // destination are all as expected. + let (dst, size) = match inst { + Inst::CheckedSRemSeq { + dividend_lo, + dividend_hi, + dst_quotient, + dst_remainder, + size, + .. + } => { + let dividend_lo = allocs.next(dividend_lo.to_reg()); + let dividend_hi = allocs.next(dividend_hi.to_reg()); + let dst_quotient = allocs.next(dst_quotient.to_reg().to_reg()); + let dst_remainder = allocs.next(dst_remainder.to_reg().to_reg()); + debug_assert_eq!(dividend_lo, regs::rax()); + debug_assert_eq!(dividend_hi, regs::rdx()); + debug_assert_eq!(dst_quotient, regs::rax()); + debug_assert_eq!(dst_remainder, regs::rdx()); + (regs::rdx(), *size) + } + Inst::CheckedSRemSeq8 { dividend, dst, .. } => { + let dividend = allocs.next(dividend.to_reg()); + let dst = allocs.next(dst.to_reg().to_reg()); + debug_assert_eq!(dividend, regs::rax()); + debug_assert_eq!(dst, regs::rax()); + (regs::rax(), OperandSize::Size8) + } + _ => unreachable!(), + }; // Generates the following code sequence: // - // ;; check divide by zero: - // cmp 0 %divisor - // jnz $after_trap - // ud2 - // $after_trap: - // - // ;; for signed modulo/div: // cmp -1 %divisor // jnz $do_op - // ;; for signed modulo, result is 0 - // mov #0, %rdx - // j $done - // ;; for signed div, check for integer overflow against INT_MIN of the right size - // cmp INT_MIN, %rax - // jnz $do_op - // ud2 + // + // ;; for srem, result is 0 + // mov #0, %dst + // j $done // // $do_op: - // ;; if signed - // cdq ;; sign-extend from rax into rdx - // ;; else - // mov #0, %rdx // idiv %divisor // // $done: - // Check if the divisor is zero, first. - let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0), divisor); + let do_op = sink.get_label(); + let done_label = sink.get_label(); + + // Check if the divisor is -1, and if it isn't then immediately + // go to the `idiv`. + let inst = Inst::cmp_rmi_r(size, RegMemImm::imm(0xffffffff), divisor); + inst.emit(&[], sink, info, state); + one_way_jmp(sink, CC::NZ, do_op); + + // ... otherwise the divisor is -1 and the result is always 0. This + // is written to the destination register which will be %rax for + // 8-bit srem and %rdx otherwise. + // + // Note that for 16-to-64-bit srem operations this leaves the + // second destination, %rax, unchanged. This isn't semantically + // correct if a lowering actually tries to use the `dst_quotient` + // output but for srem only the `dst_remainder` output is used for + // now. + let inst = Inst::imm(OperandSize::Size64, 0, Writable::from_reg(dst)); + inst.emit(&[], sink, info, state); + let inst = Inst::jmp_known(done_label); inst.emit(&[], sink, info, state); + // Here the `idiv` is executed, which is different depending on the + // size + sink.bind_label(do_op); + let inst = match size { + OperandSize::Size8 => Inst::div8( + DivSignedness::Signed, + RegMem::reg(divisor), + Gpr::new(regs::rax()).unwrap(), + Writable::from_reg(Gpr::new(regs::rax()).unwrap()), + ), + _ => Inst::div( + size, + DivSignedness::Signed, + RegMem::reg(divisor), + Gpr::new(regs::rax()).unwrap(), + Gpr::new(regs::rdx()).unwrap(), + Writable::from_reg(Gpr::new(regs::rax()).unwrap()), + Writable::from_reg(Gpr::new(regs::rdx()).unwrap()), + ), + }; + inst.emit(&[], sink, info, state); + + sink.bind_label(done_label); + } + + Inst::ValidateSdivDivisor { + dividend, divisor, .. + } + | Inst::ValidateSdivDivisor64 { + dividend, divisor, .. + } => { + let orig_inst = &inst; + let divisor = allocs.next(divisor.to_reg()); + let dividend = allocs.next(dividend.to_reg()); + let size = match inst { + Inst::ValidateSdivDivisor { size, .. } => *size, + _ => OperandSize::Size64, + }; + + // First trap if the divisor is zero + let inst = Inst::cmp_rmi_r(size, RegMemImm::imm(0), divisor); + inst.emit(&[], sink, info, state); let inst = Inst::trap_if(CC::Z, TrapCode::IntegerDivisionByZero); inst.emit(&[], sink, info, state); - let (do_op, done_label) = if kind.is_signed() { - // Now check if the divisor is -1. - let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0xffffffff), divisor); - inst.emit(&[], sink, info, state); - let do_op = sink.get_label(); - - // If not equal, jump to do-op. - one_way_jmp(sink, CC::NZ, do_op); - - // Here, divisor == -1. - if !kind.is_div() { - // x % -1 = 0; put the result into the destination, $rax. - let done_label = sink.get_label(); - - let inst = Inst::imm(OperandSize::Size64, 0, Writable::from_reg(regs::rax())); + // Now check if the divisor is -1. If it is then additionally + // check if the dividend is INT_MIN. If it isn't then jump to the + // end. If both conditions here are true then trap. + let inst = Inst::cmp_rmi_r(size, RegMemImm::imm(0xffffffff), divisor); + inst.emit(&[], sink, info, state); + let done = sink.get_label(); + one_way_jmp(sink, CC::NZ, done); + let int_min = match orig_inst { + Inst::ValidateSdivDivisor64 { tmp, .. } => { + let tmp = allocs.next(tmp.to_reg().to_reg()); + let inst = Inst::imm(size, i64::MIN as u64, Writable::from_reg(tmp)); inst.emit(&[], sink, info, state); - - let inst = Inst::jmp_known(done_label); - inst.emit(&[], sink, info, state); - - (Some(do_op), Some(done_label)) - } else { - // Check for integer overflow. - if *size == OperandSize::Size64 { - let tmp = tmp.expect("temporary for i64 sdiv"); - - let inst = Inst::imm( - OperandSize::Size64, - 0x8000000000000000, - Writable::from_reg(tmp), - ); - inst.emit(&[], sink, info, state); - - let inst = - Inst::cmp_rmi_r(OperandSize::Size64, RegMemImm::reg(tmp), regs::rax()); - inst.emit(&[], sink, info, state); - } else { - let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0x80000000), regs::rax()); - inst.emit(&[], sink, info, state); - } - - // If not equal, jump over the trap. - let inst = Inst::trap_if(CC::Z, TrapCode::IntegerOverflow); - inst.emit(&[], sink, info, state); - - (Some(do_op), None) + RegMemImm::reg(tmp) } - } else { - (None, None) + _ => RegMemImm::imm(match size { + OperandSize::Size8 => 0x80, + OperandSize::Size16 => 0x8000, + OperandSize::Size32 => 0x80000000, + OperandSize::Size64 => unreachable!(), + }), }; - - if let Some(do_op) = do_op { - sink.bind_label(do_op); - } - - let dividend_lo = Gpr::new(regs::rax()).unwrap(); - let dst_quotient = WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()); - let (dividend_hi, dst_remainder) = if *size == OperandSize::Size8 { - ( - Gpr::new(regs::rax()).unwrap(), - Writable::from_reg(Gpr::new(regs::rax()).unwrap()), - ) - } else { - ( - Gpr::new(regs::rdx()).unwrap(), - Writable::from_reg(Gpr::new(regs::rdx()).unwrap()), - ) - }; - - // Fill in the high parts: - if kind.is_signed() { - // sign-extend the sign-bit of rax into rdx, for signed opcodes. - let inst = - Inst::sign_extend_data(*size, dividend_lo, WritableGpr::from_reg(dividend_hi)); - inst.emit(&[], sink, info, state); - } else if *size != OperandSize::Size8 { - // zero for unsigned opcodes. - let inst = Inst::imm( - OperandSize::Size64, - 0, - Writable::from_reg(dividend_hi.to_reg()), - ); - inst.emit(&[], sink, info, state); - } - - let inst = Inst::div( - *size, - kind.is_signed(), - RegMem::reg(divisor), - dividend_lo, - dividend_hi, - dst_quotient, - dst_remainder, - ); + let inst = Inst::cmp_rmi_r(size, int_min, dividend); + inst.emit(&[], sink, info, state); + let inst = Inst::trap_if(CC::Z, TrapCode::IntegerOverflow); inst.emit(&[], sink, info, state); - // Lowering takes care of moving the result back into the right register, see comment - // there. - - if let Some(done) = done_label { - sink.bind_label(done); - } + sink.bind_label(done); } Inst::Imm { diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index d94828c557..7e258bb8fe 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -1749,7 +1749,7 @@ fn test_x64_emit() { insns.push(( Inst::div( OperandSize::Size32, - true, /*signed*/ + DivSignedness::Signed, RegMem::reg(regs::rsi()), Gpr::new(regs::rax()).unwrap(), Gpr::new(regs::rdx()).unwrap(), @@ -1762,7 +1762,7 @@ fn test_x64_emit() { insns.push(( Inst::div( OperandSize::Size64, - true, /*signed*/ + DivSignedness::Signed, RegMem::reg(regs::r15()), Gpr::new(regs::rax()).unwrap(), Gpr::new(regs::rdx()).unwrap(), @@ -1775,7 +1775,7 @@ fn test_x64_emit() { insns.push(( Inst::div( OperandSize::Size32, - false, /*signed*/ + DivSignedness::Unsigned, RegMem::reg(regs::r14()), Gpr::new(regs::rax()).unwrap(), Gpr::new(regs::rdx()).unwrap(), @@ -1788,7 +1788,7 @@ fn test_x64_emit() { insns.push(( Inst::div( OperandSize::Size64, - false, /*signed*/ + DivSignedness::Unsigned, RegMem::reg(regs::rdi()), Gpr::new(regs::rax()).unwrap(), Gpr::new(regs::rdx()).unwrap(), @@ -1799,30 +1799,24 @@ fn test_x64_emit() { "div %rax, %rdx, %rdi, %rax, %rdx", )); insns.push(( - Inst::div( - OperandSize::Size8, - false, + Inst::div8( + DivSignedness::Unsigned, RegMem::reg(regs::rax()), Gpr::new(regs::rax()).unwrap(), - Gpr::new(regs::rdx()).unwrap(), WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()), - WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()), ), "F6F0", - "div %al, (none), %al, %al, (none)", + "div %al, %al, %al", )); insns.push(( - Inst::div( - OperandSize::Size8, - false, + Inst::div8( + DivSignedness::Unsigned, RegMem::reg(regs::rsi()), Gpr::new(regs::rax()).unwrap(), - Gpr::new(regs::rdx()).unwrap(), WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()), - WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()), ), "40F6F6", - "div %al, (none), %sil, %al, (none)", + "div %al, %sil, %al", )); // ======================================================== @@ -1864,48 +1858,6 @@ fn test_x64_emit() { "mul %rax, %rdi, %rax, %rdx", )); - // ======================================================== - // cbw - insns.push(( - Inst::sign_extend_data( - OperandSize::Size8, - Gpr::new(regs::rax()).unwrap(), - WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()), - ), - "6698", - "cbw %al, %al", - )); - - // ======================================================== - // cdq family: SignExtendRaxRdx - insns.push(( - Inst::sign_extend_data( - OperandSize::Size16, - Gpr::new(regs::rax()).unwrap(), - WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()), - ), - "6699", - "cwd %ax, %dx", - )); - insns.push(( - Inst::sign_extend_data( - OperandSize::Size32, - Gpr::new(regs::rax()).unwrap(), - WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()), - ), - "99", - "cdq %eax, %edx", - )); - insns.push(( - Inst::sign_extend_data( - OperandSize::Size64, - Gpr::new(regs::rax()).unwrap(), - WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()), - ), - "4899", - "cqo %rax, %rdx", - )); - // ======================================================== // Imm_R // diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 95e39293dd..b7865b21b5 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -71,13 +71,17 @@ impl Inst { | Inst::Bswap { .. } | Inst::CallKnown { .. } | Inst::CallUnknown { .. } - | Inst::CheckedDivOrRemSeq { .. } + | Inst::CheckedSRemSeq { .. } + | Inst::CheckedSRemSeq8 { .. } + | Inst::ValidateSdivDivisor { .. } + | Inst::ValidateSdivDivisor64 { .. } | Inst::Cmove { .. } | Inst::CmpRmiR { .. } | Inst::CvtFloatToSintSeq { .. } | Inst::CvtFloatToUintSeq { .. } | Inst::CvtUint64ToFloatSeq { .. } | Inst::Div { .. } + | Inst::Div8 { .. } | Inst::Fence { .. } | Inst::Hlt | Inst::Imm { .. } @@ -220,7 +224,7 @@ impl Inst { pub(crate) fn div( size: OperandSize, - signed: bool, + sign: DivSignedness, divisor: RegMem, dividend_lo: Gpr, dividend_hi: Gpr, @@ -230,7 +234,7 @@ impl Inst { divisor.assert_regclass_is(RegClass::Int); Inst::Div { size, - signed, + sign, divisor: GprMem::new(divisor).unwrap(), dividend_lo, dividend_hi, @@ -239,36 +243,21 @@ impl Inst { } } - pub(crate) fn checked_div_or_rem_seq( - kind: DivOrRemKind, - size: OperandSize, - divisor: Reg, - dividend_lo: Gpr, - dividend_hi: Gpr, - dst_quotient: WritableGpr, - dst_remainder: WritableGpr, - tmp: Option>, + pub(crate) fn div8( + sign: DivSignedness, + divisor: RegMem, + dividend: Gpr, + dst: WritableGpr, ) -> Inst { - debug_assert!(divisor.class() == RegClass::Int); - debug_assert!(tmp - .map(|tmp| tmp.to_reg().class() == RegClass::Int) - .unwrap_or(true)); - Inst::CheckedDivOrRemSeq { - kind, - size, - divisor: Gpr::new(divisor).unwrap(), - dividend_lo, - dividend_hi, - dst_quotient, - dst_remainder, - tmp: tmp.map(|tmp| WritableGpr::from_writable_reg(tmp).unwrap()), + divisor.assert_regclass_is(RegClass::Int); + Inst::Div8 { + sign, + divisor: GprMem::new(divisor).unwrap(), + dividend, + dst, } } - pub(crate) fn sign_extend_data(size: OperandSize, src: Gpr, dst: WritableGpr) -> Inst { - Inst::SignExtendData { size, src, dst } - } - pub(crate) fn imm(dst_size: OperandSize, simm64: u64, dst: Writable) -> Inst { debug_assert!(dst_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64])); debug_assert!(dst.to_reg().class() == RegClass::Int); @@ -780,33 +769,25 @@ impl PrettyPrint for Inst { Inst::Div { size, - signed, + sign, divisor, dividend_lo, dividend_hi, dst_quotient, dst_remainder, } => { + let divisor = divisor.pretty_print(size.to_bytes(), allocs); let dividend_lo = pretty_print_reg(dividend_lo.to_reg(), size.to_bytes(), allocs); + let dividend_hi = pretty_print_reg(dividend_hi.to_reg(), size.to_bytes(), allocs); let dst_quotient = pretty_print_reg(dst_quotient.to_reg().to_reg(), size.to_bytes(), allocs); - let dst_remainder = if size.to_bits() > 8 { - pretty_print_reg(dst_remainder.to_reg().to_reg(), size.to_bytes(), allocs) - } else { - "(none)".to_string() - }; - let dividend_hi = if size.to_bits() > 8 { - pretty_print_reg(dividend_hi.to_reg(), size.to_bytes(), allocs) - } else { - "(none)".to_string() - }; - let divisor = divisor.pretty_print(size.to_bytes(), allocs); + let dst_remainder = + pretty_print_reg(dst_remainder.to_reg().to_reg(), size.to_bytes(), allocs); format!( "{} {}, {}, {}, {}, {}", - ljustify(if *signed { - "idiv".to_string() - } else { - "div".into() + ljustify(match sign { + DivSignedness::Signed => "idiv".to_string(), + DivSignedness::Unsigned => "div".to_string(), }), dividend_lo, dividend_hi, @@ -816,6 +797,24 @@ impl PrettyPrint for Inst { ) } + Inst::Div8 { + sign, + divisor, + dividend, + dst, + } => { + let divisor = divisor.pretty_print(1, allocs); + let dividend = pretty_print_reg(dividend.to_reg(), 1, allocs); + let dst = pretty_print_reg(dst.to_reg().to_reg(), 1, allocs); + format!( + "{} {dividend}, {divisor}, {dst}", + ljustify(match sign { + DivSignedness::Signed => "idiv".to_string(), + DivSignedness::Unsigned => "div".to_string(), + }), + ) + } + Inst::MulHi { size, signed, @@ -842,43 +841,59 @@ impl PrettyPrint for Inst { ) } - Inst::CheckedDivOrRemSeq { - kind, + Inst::CheckedSRemSeq { size, divisor, dividend_lo, dividend_hi, dst_quotient, dst_remainder, - tmp, } => { + let divisor = pretty_print_reg(divisor.to_reg(), size.to_bytes(), allocs); let dividend_lo = pretty_print_reg(dividend_lo.to_reg(), size.to_bytes(), allocs); let dividend_hi = pretty_print_reg(dividend_hi.to_reg(), size.to_bytes(), allocs); - let divisor = pretty_print_reg(divisor.to_reg(), size.to_bytes(), allocs); let dst_quotient = pretty_print_reg(dst_quotient.to_reg().to_reg(), size.to_bytes(), allocs); let dst_remainder = pretty_print_reg(dst_remainder.to_reg().to_reg(), size.to_bytes(), allocs); - let tmp = tmp - .map(|tmp| pretty_print_reg(tmp.to_reg().to_reg(), size.to_bytes(), allocs)) - .unwrap_or("(none)".to_string()); format!( - "{} {}, {}, {}, {}, {}, tmp={}", - match kind { - DivOrRemKind::SignedDiv => "sdiv_seq", - DivOrRemKind::UnsignedDiv => "udiv_seq", - DivOrRemKind::SignedRem => "srem_seq", - DivOrRemKind::UnsignedRem => "urem_seq", - }, - dividend_lo, - dividend_hi, - divisor, - dst_quotient, - dst_remainder, - tmp, + "checked_srem_seq {dividend_lo}, {dividend_hi}, \ + {divisor}, {dst_quotient}, {dst_remainder}", ) } + Inst::CheckedSRemSeq8 { + divisor, + dividend, + dst, + } => { + let divisor = pretty_print_reg(divisor.to_reg(), 1, allocs); + let dividend = pretty_print_reg(dividend.to_reg(), 1, allocs); + let dst = pretty_print_reg(dst.to_reg().to_reg(), 1, allocs); + format!("checked_srem_seq {dividend}, {divisor}, {dst}") + } + + Inst::ValidateSdivDivisor { + dividend, + divisor, + size, + } => { + let dividend = pretty_print_reg(dividend.to_reg(), size.to_bytes(), allocs); + let divisor = pretty_print_reg(divisor.to_reg(), size.to_bytes(), allocs); + format!("validate_sdiv_divisor {dividend}, {divisor}") + } + + Inst::ValidateSdivDivisor64 { + dividend, + divisor, + tmp, + } => { + let dividend = pretty_print_reg(dividend.to_reg(), 8, allocs); + let divisor = pretty_print_reg(divisor.to_reg(), 8, allocs); + let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8, allocs); + format!("validate_sdiv_divisor {dividend}, {divisor} {tmp}") + } + Inst::SignExtendData { size, src, dst } => { let src = pretty_print_reg(src.to_reg(), size.to_bytes(), allocs); let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs); @@ -1857,21 +1872,37 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol collector.reg_reuse_def(dst.to_writable_reg(), 0); } Inst::Div { - divisor, dividend_lo, dividend_hi, dst_quotient, dst_remainder, - size, + .. + } + | Inst::CheckedSRemSeq { + dividend_lo, + dividend_hi, + dst_quotient, + dst_remainder, .. } => { - collector.reg_fixed_use(dividend_lo.to_reg(), regs::rax()); - collector.reg_fixed_def(dst_quotient.to_writable_reg(), regs::rax()); - if size.to_bits() > 8 { - collector.reg_fixed_def(dst_remainder.to_writable_reg(), regs::rdx()); - collector.reg_fixed_use(dividend_hi.to_reg(), regs::rdx()); + match inst { + Inst::Div { divisor, .. } => divisor.get_operands(collector), + Inst::CheckedSRemSeq { divisor, .. } => collector.reg_use(divisor.to_reg()), + _ => {} } - divisor.get_operands(collector); + collector.reg_fixed_use(dividend_lo.to_reg(), regs::rax()); + collector.reg_fixed_use(dividend_hi.to_reg(), regs::rdx()); + collector.reg_fixed_def(dst_quotient.to_writable_reg(), regs::rax()); + collector.reg_fixed_def(dst_remainder.to_writable_reg(), regs::rdx()); + } + Inst::Div8 { dividend, dst, .. } | Inst::CheckedSRemSeq8 { dividend, dst, .. } => { + match inst { + Inst::Div8 { divisor, .. } => divisor.get_operands(collector), + Inst::CheckedSRemSeq8 { divisor, .. } => collector.reg_use(divisor.to_reg()), + _ => {} + } + collector.reg_fixed_use(dividend.to_reg(), regs::rax()); + collector.reg_fixed_def(dst.to_writable_reg(), regs::rax()); } Inst::MulHi { src1, @@ -1885,25 +1916,20 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol collector.reg_fixed_def(dst_hi.to_writable_reg(), regs::rdx()); src2.get_operands(collector); } - Inst::CheckedDivOrRemSeq { - divisor, - dividend_lo, - dividend_hi, - dst_quotient, - dst_remainder, - tmp, - .. + Inst::ValidateSdivDivisor { + dividend, divisor, .. } => { - collector.reg_fixed_use(dividend_lo.to_reg(), regs::rax()); - collector.reg_fixed_use(dividend_hi.to_reg(), regs::rdx()); collector.reg_use(divisor.to_reg()); - collector.reg_fixed_def(dst_quotient.to_writable_reg(), regs::rax()); - collector.reg_fixed_def(dst_remainder.to_writable_reg(), regs::rdx()); - if let Some(tmp) = tmp { - // Early def so that the temporary register does not - // conflict with inputs or outputs. - collector.reg_early_def(tmp.to_writable_reg()); - } + collector.reg_use(dividend.to_reg()); + } + Inst::ValidateSdivDivisor64 { + dividend, + divisor, + tmp, + } => { + collector.reg_use(divisor.to_reg()); + collector.reg_use(dividend.to_reg()); + collector.reg_early_def(tmp.to_writable_reg()); } Inst::SignExtendData { size, src, dst } => { match size { diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 6a4f73620d..b5fcffe480 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -3491,23 +3491,154 @@ ;; Rules for `udiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (udiv a @ (value_type ty) b)) - (div_or_rem (DivOrRemKind.UnsignedDiv) a b)) +;; The inputs to the `div` instruction are different for 8-bit division so +;; it needs a special case here since the instruction being crafted has a +;; different shape. +(rule 2 (lower (udiv a @ (value_type $I8) b)) + (x64_div8 (extend_to_gpr a $I32 (ExtendKind.Zero)) + (nonzero_divisor $I8 b) + (DivSignedness.Unsigned))) + +;; 16-to-64-bit division is all done with a similar instruction and the only +;; tricky requirement here is that when div traps are disallowed the divisor +;; must not be zero. +(rule 1 (lower (udiv a @ (value_type (fits_in_64 ty)) b)) + (x64_div_quotient a + (imm $I64 0) + (nonzero_divisor ty b) + (raw_operand_size_of_type ty) + (DivSignedness.Unsigned))) + +;; Helper to place `Value` into a `Gpr` while possibly trapping if it's zero. +;; +;; If the `avoid_div_traps=true` codegen setting is specified then the value +;; is checked for zero and a trap happens before the value is returned as a +;; register here. +(decl nonzero_divisor (Type Value) Gpr) + +;; As a special-case if the divisor is a constant number which is nonzero then +;; no matter what there's no checks necessary. +(rule 2 (nonzero_divisor ty (iconst (u64_from_imm64 (u64_nonzero n)))) + (imm ty n)) + +;; No checks necessary when `avoid_div_traps=false` +(rule 1 (nonzero_divisor ty val) + (if-let $false (avoid_div_traps)) + val) + +;; Base case traps if `val` is zero by using a `test` + `trap_if` combo +(rule (nonzero_divisor ty val) + (let ( + (val Reg val) + (_ InstOutput (side_effect (with_flags_side_effect + (x64_test (raw_operand_size_of_type ty) val val) + (trap_if (CC.Z) (TrapCode.IntegerDivisionByZero))))) + ) + val)) ;; Rules for `sdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (sdiv a @ (value_type ty) b)) - (div_or_rem (DivOrRemKind.SignedDiv) a b)) +(rule 2 (lower (sdiv a @ (value_type $I8) b)) + (let ( + (a Gpr (x64_sign_extend_data a (OperandSize.Size8))) + ) + (x64_div8 a (safe_sdiv_divisor $I8 a b) (DivSignedness.Signed)))) + +(rule 1 (lower (sdiv a @ (value_type (fits_in_64 ty)) b)) + (let ( + (a Gpr a) + (size OperandSize (raw_operand_size_of_type ty)) + (b Gpr (safe_sdiv_divisor ty a b)) + ) + (x64_div_quotient a (x64_sign_extend_data a size) b size (DivSignedness.Signed)))) + +;; Similar to `nonzero_divisor` except this checks to make sure that the divisor +;; provided as a `Value` is safe to divide into the dividend `Gpr` provided. +(decl safe_sdiv_divisor (Type Gpr Value) Reg) + +;; If the divisor is a constant that isn't 0 or -1, then it's always safe so +;; materialize it into a register. +(rule 3 (safe_sdiv_divisor ty a (iconst imm)) + (if-let n (safe_divisor_from_imm64 ty imm)) + (imm ty n)) + +;; With `avoid_div_traps=false` the divisor can be plumbed through. +;; +;; Note that CLIF semantics dictate that division-by-zero and INT_MIN/-1 both +;; trap, but this matches the hardware semantics of `idiv` on x64 so they're +;; fine to get plumbed through as-is. +(rule 2 (safe_sdiv_divisor ty a b) + (if-let $false (avoid_div_traps)) + b) + +;; The base cases here rely on some pseudo-instructions to do the checks to +;; jump around with labels and such. +(rule 1 (safe_sdiv_divisor $I64 a b) (validate_sdiv_divisor64 a b)) +(rule 0 (safe_sdiv_divisor ty a b) (validate_sdiv_divisor (raw_operand_size_of_type ty) a b)) ;; Rules for `urem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (urem a @ (value_type ty) b)) - (div_or_rem (DivOrRemKind.UnsignedRem) a b)) +;; The remainder is in AH, so take the result of the division and right-shift +;; by 8. +(rule 2 (lower (urem a @ (value_type $I8) b)) + (let ( + (a Gpr (extend_to_gpr a $I32 (ExtendKind.Zero))) + (b Gpr (nonzero_divisor $I8 b)) + (result Gpr (x64_div8 a b (DivSignedness.Unsigned))) + ) + (x64_shr $I64 result (Imm8Reg.Imm8 8)))) + +(rule 1 (lower (urem a @ (value_type (fits_in_64 ty)) b)) + (x64_div_remainder a + (imm $I64 0) + (nonzero_divisor ty b) + (raw_operand_size_of_type ty) + (DivSignedness.Unsigned))) ;; Rules for `srem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Special-cases first for constant `srem` where the checks for 0 and -1 aren't +;; applicable. +;; +;; Note that like `urem` for i8 types the result is in AH so to get the result +;; it's right-shifted down. +(rule 3 (lower (srem a @ (value_type $I8) (iconst imm))) + (if-let n (safe_divisor_from_imm64 $I8 imm)) + (let ( + (a Gpr (x64_sign_extend_data a (OperandSize.Size8))) + (result Gpr (x64_div8 a (imm $I8 n) (DivSignedness.Signed))) + ) + (x64_shr $I64 result (Imm8Reg.Imm8 8)))) + +;; Same as the above rule but for 16-to-64 bit types. +(rule 2 (lower (srem a @ (value_type ty) (iconst imm))) + (if-let n (safe_divisor_from_imm64 ty imm)) + (let ( + (a Gpr a) + (size OperandSize (raw_operand_size_of_type ty)) + ) + (x64_div_remainder a + (x64_sign_extend_data a size) + (imm ty n) + size + (DivSignedness.Signed)))) + +(rule 1 (lower (srem a @ (value_type $I8) b)) + (let ( + (a Gpr (x64_sign_extend_data a (OperandSize.Size8))) + (b Gpr (nonzero_divisor $I8 b)) + ) + (x64_shr $I64 (x64_checked_srem_seq8 a b) (Imm8Reg.Imm8 8)))) + (rule (lower (srem a @ (value_type ty) b)) - (div_or_rem (DivOrRemKind.SignedRem) a b)) + (let ( + (a Gpr a) + (b Gpr (nonzero_divisor ty b)) + (size OperandSize (raw_operand_size_of_type ty)) + (hi Gpr (x64_sign_extend_data a size)) + (tmp ValueRegs (x64_checked_srem_seq size a hi b)) + ) + (value_regs_get tmp 1))) ;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index bff7c42807..4d0a960151 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -848,138 +848,6 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> { .use_constant(VCodeConstantData::WellKnown(&UINT_MASK)) } - fn emit_div_or_rem( - &mut self, - kind: &DivOrRemKind, - ty: Type, - dst: WritableGpr, - dividend: Gpr, - divisor: Gpr, - ) { - let is_div = kind.is_div(); - let size = OperandSize::from_ty(ty); - - let dst_quotient = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap(); - let dst_remainder = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap(); - - // Always do explicit checks for `srem`: otherwise, INT_MIN % -1 is not handled properly. - if self.backend.flags.avoid_div_traps() || *kind == DivOrRemKind::SignedRem { - // A vcode meta-instruction is used to lower the inline checks, since they embed - // pc-relative offsets that must not change, thus requiring regalloc to not - // interfere by introducing spills and reloads. - let tmp = if *kind == DivOrRemKind::SignedDiv && size == OperandSize::Size64 { - Some(self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap()) - } else { - None - }; - let dividend_hi = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap(); - self.lower_ctx.emit(MInst::AluConstOp { - op: AluRmiROpcode::Xor, - size: OperandSize::Size32, - dst: WritableGpr::from_reg(Gpr::new(dividend_hi.to_reg()).unwrap()), - }); - self.lower_ctx.emit(MInst::checked_div_or_rem_seq( - kind.clone(), - size, - divisor.to_reg(), - Gpr::new(dividend.to_reg()).unwrap(), - Gpr::new(dividend_hi.to_reg()).unwrap(), - WritableGpr::from_reg(Gpr::new(dst_quotient.to_reg()).unwrap()), - WritableGpr::from_reg(Gpr::new(dst_remainder.to_reg()).unwrap()), - tmp, - )); - } else { - // We don't want more than one trap record for a single instruction, - // so let's not allow the "mem" case (load-op merging) here; force - // divisor into a register instead. - let divisor = RegMem::reg(divisor.to_reg()); - - let dividend_hi = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap(); - - // Fill in the high parts: - let dividend_lo = if kind.is_signed() && ty == types::I8 { - let dividend_lo = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap(); - // 8-bit div takes its dividend in only the `lo` reg. - self.lower_ctx.emit(MInst::sign_extend_data( - size, - Gpr::new(dividend.to_reg()).unwrap(), - WritableGpr::from_reg(Gpr::new(dividend_lo.to_reg()).unwrap()), - )); - // `dividend_hi` is not used by the Div below, so we - // don't def it here. - - dividend_lo.to_reg() - } else if kind.is_signed() { - // 16-bit and higher div takes its operand in hi:lo - // with half in each (64:64, 32:32 or 16:16). - self.lower_ctx.emit(MInst::sign_extend_data( - size, - Gpr::new(dividend.to_reg()).unwrap(), - WritableGpr::from_reg(Gpr::new(dividend_hi.to_reg()).unwrap()), - )); - - dividend.to_reg() - } else if ty == types::I8 { - let dividend_lo = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap(); - self.lower_ctx.emit(MInst::movzx_rm_r( - ExtMode::BL, - RegMem::reg(dividend.to_reg()), - dividend_lo, - )); - - dividend_lo.to_reg() - } else { - // zero for unsigned opcodes. - self.lower_ctx - .emit(MInst::imm(OperandSize::Size64, 0, dividend_hi)); - - dividend.to_reg() - }; - - // Emit the actual idiv. - self.lower_ctx.emit(MInst::div( - size, - kind.is_signed(), - divisor, - Gpr::new(dividend_lo).unwrap(), - Gpr::new(dividend_hi.to_reg()).unwrap(), - WritableGpr::from_reg(Gpr::new(dst_quotient.to_reg()).unwrap()), - WritableGpr::from_reg(Gpr::new(dst_remainder.to_reg()).unwrap()), - )); - } - - // Move the result back into the destination reg. - if is_div { - // The quotient is in rax. - self.lower_ctx.emit(MInst::gen_move( - dst.to_writable_reg(), - dst_quotient.to_reg(), - ty, - )); - } else { - if size == OperandSize::Size8 { - let tmp = self.temp_writable_reg(ty); - // The remainder is in AH. Right-shift by 8 bits then move from rax. - self.lower_ctx.emit(MInst::shift_r( - OperandSize::Size64, - ShiftKind::ShiftRightLogical, - Imm8Gpr::new(Imm8Reg::Imm8 { imm: 8 }).unwrap(), - dst_quotient.to_reg(), - tmp, - )); - self.lower_ctx - .emit(MInst::gen_move(dst.to_writable_reg(), tmp.to_reg(), ty)); - } else { - // The remainder is in rdx. - self.lower_ctx.emit(MInst::gen_move( - dst.to_writable_reg(), - dst_remainder.to_reg(), - ty, - )); - } - } - } - fn xmm_mem_to_xmm_mem_aligned(&mut self, arg: &XmmMem) -> XmmMemAligned { match XmmMemAligned::new(arg.clone().into()) { Some(aligned) => aligned, diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index 9b6adcab9c..ca39d0feb3 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -285,12 +285,8 @@ macro_rules! isle_lower_prelude_methods { } } - fn avoid_div_traps(&mut self, _: Type) -> Option<()> { - if self.backend.flags().avoid_div_traps() { - Some(()) - } else { - None - } + fn avoid_div_traps(&mut self) -> bool { + self.backend.flags().avoid_div_traps() } #[inline] @@ -637,6 +633,20 @@ macro_rules! isle_lower_prelude_methods { shuffle_imm_as_le_lane_idx(2, &bytes[14..16])?, )) } + + fn safe_divisor_from_imm64(&mut self, ty: Type, val: Imm64) -> Option { + let minus_one = if ty.bytes() == 8 { + -1 + } else { + (1 << (ty.bytes() * 8)) - 1 + }; + let bits = val.bits() & minus_one; + if bits == 0 || bits == minus_one { + None + } else { + Some(bits as u64) + } + } }; } diff --git a/cranelift/codegen/src/prelude_lower.isle b/cranelift/codegen/src/prelude_lower.isle index a0498fae4d..8db2f73556 100644 --- a/cranelift/codegen/src/prelude_lower.isle +++ b/cranelift/codegen/src/prelude_lower.isle @@ -530,8 +530,8 @@ ;;;; Helpers for accessing compilation flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(decl avoid_div_traps () Type) -(extern extractor avoid_div_traps avoid_div_traps) +(decl pure avoid_div_traps () bool) +(extern constructor avoid_div_traps avoid_div_traps) ;; This definition should be kept up to date with the values defined in ;; cranelift/codegen/meta/src/shared/settings.rs @@ -722,6 +722,10 @@ (decl gen_return (ValueSlice) Unit) (extern constructor gen_return gen_return) +;; Helper for extracting an immediate that's not 0 and not -1 from an imm64. +(decl pure partial safe_divisor_from_imm64 (Type Imm64) u64) +(extern constructor safe_divisor_from_imm64 safe_divisor_from_imm64) + ;;;; Automatic conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (convert Inst Value def_inst) diff --git a/cranelift/filetests/filetests/isa/x64/div-checks.clif b/cranelift/filetests/filetests/isa/x64/div-checks.clif index b147c49dca..381a66b63c 100644 --- a/cranelift/filetests/filetests/isa/x64/div-checks.clif +++ b/cranelift/filetests/filetests/isa/x64/div-checks.clif @@ -19,8 +19,8 @@ block0(v0: i8, v1: i8): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; xorl %edx, %edx, %edx -; srem_seq %al, %dl, %sil, %al, %dl, tmp=(none) +; cbw %al, %al +; checked_srem_seq %al, %sil, %al ; shrq $8, %rax, %rax ; movq %rbp, %rsp ; popq %rbp @@ -32,15 +32,11 @@ block0(v0: i8, v1: i8): ; movq %rsp, %rbp ; block1: ; offset 0x4 ; movq %rdi, %rax -; xorl %edx, %edx -; cmpb $0, %sil -; jne 0x15 -; ud2 ; trap: int_divz -; cmpb $0xff, %sil -; jne 0x29 -; movl $0, %eax -; jmp 0x2e ; cbtw +; cmpb $0xff, %sil +; jne 0x1d +; movl $0, %eax +; jmp 0x20 ; idivb %sil ; trap: int_divz ; shrq $8, %rax ; movq %rbp, %rsp @@ -59,8 +55,8 @@ block0(v0: i16, v1: i16): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; xorl %edx, %edx, %edx -; srem_seq %ax, %dx, %si, %ax, %dx, tmp=(none) +; cwd %ax, %dx +; checked_srem_seq %ax, %dx, %si, %ax, %dx ; movq %rdx, %rax ; movq %rbp, %rsp ; popq %rbp @@ -72,15 +68,11 @@ block0(v0: i16, v1: i16): ; movq %rsp, %rbp ; block1: ; offset 0x4 ; movq %rdi, %rax -; xorl %edx, %edx -; cmpw $0, %si -; jne 0x15 -; ud2 ; trap: int_divz -; cmpw $-1, %si -; jne 0x29 -; movl $0, %eax -; jmp 0x2e ; cwtd +; cmpw $-1, %si +; jne 0x1d +; movl $0, %edx +; jmp 0x20 ; idivw %si ; trap: int_divz ; movq %rdx, %rax ; movq %rbp, %rsp @@ -99,8 +91,8 @@ block0(v0: i32, v1: i32): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; xorl %edx, %edx, %edx -; srem_seq %eax, %edx, %esi, %eax, %edx, tmp=(none) +; cdq %eax, %edx +; checked_srem_seq %eax, %edx, %esi, %eax, %edx ; movq %rdx, %rax ; movq %rbp, %rsp ; popq %rbp @@ -112,15 +104,11 @@ block0(v0: i32, v1: i32): ; movq %rsp, %rbp ; block1: ; offset 0x4 ; movq %rdi, %rax -; xorl %edx, %edx -; cmpl $0, %esi -; jne 0x14 -; ud2 ; trap: int_divz -; cmpl $-1, %esi -; jne 0x27 -; movl $0, %eax -; jmp 0x2a ; cltd +; cmpl $-1, %esi +; jne 0x1b +; movl $0, %edx +; jmp 0x1d ; idivl %esi ; trap: int_divz ; movq %rdx, %rax ; movq %rbp, %rsp @@ -139,8 +127,8 @@ block0(v0: i64, v1: i64): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; xorl %edx, %edx, %edx -; srem_seq %rax, %rdx, %rsi, %rax, %rdx, tmp=(none) +; cqo %rax, %rdx +; checked_srem_seq %rax, %rdx, %rsi, %rax, %rdx ; movq %rdx, %rax ; movq %rbp, %rsp ; popq %rbp @@ -152,15 +140,11 @@ block0(v0: i64, v1: i64): ; movq %rsp, %rbp ; block1: ; offset 0x4 ; movq %rdi, %rax -; xorl %edx, %edx -; cmpq $0, %rsi -; jne 0x15 -; ud2 ; trap: int_divz -; cmpq $-1, %rsi -; jne 0x29 -; movl $0, %eax -; jmp 0x2e ; cqto +; cmpq $-1, %rsi +; jne 0x1d +; movl $0, %edx +; jmp 0x20 ; idivq %rsi ; trap: int_divz ; movq %rdx, %rax ; movq %rbp, %rsp diff --git a/cranelift/filetests/filetests/isa/x64/sdiv-checked.clif b/cranelift/filetests/filetests/isa/x64/sdiv-checked.clif new file mode 100644 index 0000000000..7505a30789 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/sdiv-checked.clif @@ -0,0 +1,285 @@ +test compile precise-output +set avoid_div_traps=true +target x86_64 + +function %f1(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = sdiv v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; cbw %al, %al +; validate_sdiv_divisor %sil, %al +; idiv %al, %sil, %al +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rdi, %rax +; cbtw +; cmpb $0, %sil +; jne 0x15 +; ud2 ; trap: int_divz +; cmpb $0xff, %sil +; jne 0x2a +; cmpb $0x80, %al +; jne 0x2a +; ud2 ; trap: int_ovf +; idivb %sil ; trap: int_divz +; movq %rbp, %rsp +; popq %rbp +; retq + +function %f2(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = sdiv v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; validate_sdiv_divisor %si, %di +; movq %rdi, %rax +; cwd %ax, %dx +; idiv %ax, %dx, %si, %ax, %dx +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; cmpw $0, %si +; jne 0x10 +; ud2 ; trap: int_divz +; cmpw $-1, %si +; jne 0x27 +; cmpw $0x8000, %di +; jne 0x27 +; ud2 ; trap: int_ovf +; movq %rdi, %rax +; cwtd +; idivw %si ; trap: int_divz +; movq %rbp, %rsp +; popq %rbp +; retq + +function %f3(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = sdiv v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; validate_sdiv_divisor %esi, %edi +; movq %rdi, %rax +; cdq %eax, %edx +; idiv %eax, %edx, %esi, %eax, %edx +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; cmpl $0, %esi +; jne 0xf +; ud2 ; trap: int_divz +; cmpl $-1, %esi +; jne 0x26 +; cmpl $0x80000000, %edi +; jne 0x26 +; ud2 ; trap: int_ovf +; movq %rdi, %rax +; cltd +; idivl %esi ; trap: int_divz +; movq %rbp, %rsp +; popq %rbp +; retq + +function %f4(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = sdiv v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; validate_sdiv_divisor %rsi, %rdi %rcx +; movq %rdi, %rax +; cqo %rax, %rdx +; idiv %rax, %rdx, %rsi, %rax, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; cmpq $0, %rsi +; jne 0x10 +; ud2 ; trap: int_divz +; cmpq $-1, %rsi +; jne 0x2f +; movabsq $9223372036854775808, %rcx +; cmpq %rcx, %rdi +; jne 0x2f +; ud2 ; trap: int_ovf +; movq %rdi, %rax +; cqto +; idivq %rsi ; trap: int_divz +; movq %rbp, %rsp +; popq %rbp +; retq + +function %i8_imm(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 17 + v2 = sdiv v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; cbw %al, %al +; movl $17, %edx +; idiv %al, %dl, %al +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rdi, %rax +; cbtw +; movl $0x11, %edx +; idivb %dl ; trap: int_divz +; movq %rbp, %rsp +; popq %rbp +; retq + +function %i16_imm(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i16 17 + v2 = sdiv v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movl $17, %ecx +; movq %rdi, %rax +; cwd %ax, %dx +; idiv %ax, %dx, %cx, %ax, %dx +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movl $0x11, %ecx +; movq %rdi, %rax +; cwtd +; idivw %cx ; trap: int_divz +; movq %rbp, %rsp +; popq %rbp +; retq + +function %i32_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 17 + v2 = sdiv v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movl $17, %ecx +; movq %rdi, %rax +; cdq %eax, %edx +; idiv %eax, %edx, %ecx, %eax, %edx +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movl $0x11, %ecx +; movq %rdi, %rax +; cltd +; idivl %ecx ; trap: int_divz +; movq %rbp, %rsp +; popq %rbp +; retq + +function %i64_imm(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 17 + v2 = sdiv v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movl $17, %ecx +; movq %rdi, %rax +; cqo %rax, %rdx +; idiv %rax, %rdx, %rcx, %rax, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movl $0x11, %ecx +; movq %rdi, %rax +; cqto +; idivq %rcx ; trap: int_divz +; movq %rbp, %rsp +; popq %rbp +; retq + diff --git a/cranelift/filetests/filetests/isa/x64/sdiv.clif b/cranelift/filetests/filetests/isa/x64/sdiv.clif index 17c79168dc..657a1e7fa4 100644 --- a/cranelift/filetests/filetests/isa/x64/sdiv.clif +++ b/cranelift/filetests/filetests/isa/x64/sdiv.clif @@ -13,7 +13,7 @@ block0(v0: i8, v1: i8): ; block0: ; movq %rdi, %rax ; cbw %al, %al -; idiv %al, (none), %sil, %al, (none) +; idiv %al, %sil, %al ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/srem-checked.clif b/cranelift/filetests/filetests/isa/x64/srem-checked.clif new file mode 100644 index 0000000000..e4bb829b3a --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/srem-checked.clif @@ -0,0 +1,300 @@ +test compile precise-output +set avoid_div_traps=true +target x86_64 + +function %f1(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = srem v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; cbw %al, %al +; testb %sil, %sil +; jnz ; ud2 int_divz ; +; checked_srem_seq %al, %sil, %al +; shrq $8, %rax, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rdi, %rax +; cbtw +; testb %sil, %sil +; jne 0x14 +; ud2 ; trap: int_divz +; cmpb $0xff, %sil +; jne 0x28 +; movl $0, %eax +; jmp 0x2b +; idivb %sil ; trap: int_divz +; shrq $8, %rax +; movq %rbp, %rsp +; popq %rbp +; retq + +function %f2(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = srem v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; testw %si, %si +; jnz ; ud2 int_divz ; +; movq %rdi, %rax +; cwd %ax, %dx +; checked_srem_seq %ax, %dx, %si, %ax, %dx +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; testw %si, %si +; jne 0xf +; ud2 ; trap: int_divz +; movq %rdi, %rax +; cwtd +; cmpw $-1, %si +; jne 0x28 +; movl $0, %edx +; jmp 0x2b +; idivw %si ; trap: int_divz +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; retq + +function %f3(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = srem v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; testl %esi, %esi +; jnz ; ud2 int_divz ; +; movq %rdi, %rax +; cdq %eax, %edx +; checked_srem_seq %eax, %edx, %esi, %eax, %edx +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; testl %esi, %esi +; jne 0xe +; ud2 ; trap: int_divz +; movq %rdi, %rax +; cltd +; cmpl $-1, %esi +; jne 0x25 +; movl $0, %edx +; jmp 0x27 +; idivl %esi ; trap: int_divz +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; retq + +function %f4(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = srem v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; testq %rsi, %rsi +; jnz ; ud2 int_divz ; +; movq %rdi, %rax +; cqo %rax, %rdx +; checked_srem_seq %rax, %rdx, %rsi, %rax, %rdx +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; testq %rsi, %rsi +; jne 0xf +; ud2 ; trap: int_divz +; movq %rdi, %rax +; cqto +; cmpq $-1, %rsi +; jne 0x28 +; movl $0, %edx +; jmp 0x2b +; idivq %rsi ; trap: int_divz +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; retq + +function %i8_imm(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 17 + v2 = srem v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; cbw %al, %al +; movl $17, %edx +; idiv %al, %dl, %al +; shrq $8, %rax, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rdi, %rax +; cbtw +; movl $0x11, %edx +; idivb %dl ; trap: int_divz +; shrq $8, %rax +; movq %rbp, %rsp +; popq %rbp +; retq + +function %i16_imm(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i16 17 + v2 = srem v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; cwd %ax, %dx +; movl $17, %r8d +; idiv %ax, %dx, %r8w, %ax, %dx +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rdi, %rax +; cwtd +; movl $0x11, %r8d +; idivw %r8w ; trap: int_divz +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; retq + +function %i32_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 17 + v2 = srem v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; cdq %eax, %edx +; movl $17, %r8d +; idiv %eax, %edx, %r8d, %eax, %edx +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rdi, %rax +; cltd +; movl $0x11, %r8d +; idivl %r8d ; trap: int_divz +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; retq + +function %i64_imm(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 17 + v2 = srem v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; cqo %rax, %rdx +; movl $17, %r8d +; idiv %rax, %rdx, %r8, %rax, %rdx +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rdi, %rax +; cqto +; movl $0x11, %r8d +; idivq %r8 ; trap: int_divz +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; retq + diff --git a/cranelift/filetests/filetests/isa/x64/srem.clif b/cranelift/filetests/filetests/isa/x64/srem.clif index e7fd115784..973a9f0bee 100644 --- a/cranelift/filetests/filetests/isa/x64/srem.clif +++ b/cranelift/filetests/filetests/isa/x64/srem.clif @@ -12,8 +12,8 @@ block0(v0: i8, v1: i8): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; xorl %edx, %edx, %edx -; srem_seq %al, %dl, %sil, %al, %dl, tmp=(none) +; cbw %al, %al +; checked_srem_seq %al, %sil, %al ; shrq $8, %rax, %rax ; movq %rbp, %rsp ; popq %rbp @@ -25,15 +25,11 @@ block0(v0: i8, v1: i8): ; movq %rsp, %rbp ; block1: ; offset 0x4 ; movq %rdi, %rax -; xorl %edx, %edx -; cmpb $0, %sil -; jne 0x15 -; ud2 ; trap: int_divz -; cmpb $0xff, %sil -; jne 0x29 -; movl $0, %eax -; jmp 0x2e ; cbtw +; cmpb $0xff, %sil +; jne 0x1d +; movl $0, %eax +; jmp 0x20 ; idivb %sil ; trap: int_divz ; shrq $8, %rax ; movq %rbp, %rsp @@ -51,8 +47,8 @@ block0(v0: i16, v1: i16): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; xorl %edx, %edx, %edx -; srem_seq %ax, %dx, %si, %ax, %dx, tmp=(none) +; cwd %ax, %dx +; checked_srem_seq %ax, %dx, %si, %ax, %dx ; movq %rdx, %rax ; movq %rbp, %rsp ; popq %rbp @@ -64,15 +60,11 @@ block0(v0: i16, v1: i16): ; movq %rsp, %rbp ; block1: ; offset 0x4 ; movq %rdi, %rax -; xorl %edx, %edx -; cmpw $0, %si -; jne 0x15 -; ud2 ; trap: int_divz -; cmpw $-1, %si -; jne 0x29 -; movl $0, %eax -; jmp 0x2e ; cwtd +; cmpw $-1, %si +; jne 0x1d +; movl $0, %edx +; jmp 0x20 ; idivw %si ; trap: int_divz ; movq %rdx, %rax ; movq %rbp, %rsp @@ -90,8 +82,8 @@ block0(v0: i32, v1: i32): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; xorl %edx, %edx, %edx -; srem_seq %eax, %edx, %esi, %eax, %edx, tmp=(none) +; cdq %eax, %edx +; checked_srem_seq %eax, %edx, %esi, %eax, %edx ; movq %rdx, %rax ; movq %rbp, %rsp ; popq %rbp @@ -103,15 +95,11 @@ block0(v0: i32, v1: i32): ; movq %rsp, %rbp ; block1: ; offset 0x4 ; movq %rdi, %rax -; xorl %edx, %edx -; cmpl $0, %esi -; jne 0x14 -; ud2 ; trap: int_divz -; cmpl $-1, %esi -; jne 0x27 -; movl $0, %eax -; jmp 0x2a ; cltd +; cmpl $-1, %esi +; jne 0x1b +; movl $0, %edx +; jmp 0x1d ; idivl %esi ; trap: int_divz ; movq %rdx, %rax ; movq %rbp, %rsp @@ -129,8 +117,8 @@ block0(v0: i64, v1: i64): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; xorl %edx, %edx, %edx -; srem_seq %rax, %rdx, %rsi, %rax, %rdx, tmp=(none) +; cqo %rax, %rdx +; checked_srem_seq %rax, %rdx, %rsi, %rax, %rdx ; movq %rdx, %rax ; movq %rbp, %rsp ; popq %rbp @@ -142,18 +130,150 @@ block0(v0: i64, v1: i64): ; movq %rsp, %rbp ; block1: ; offset 0x4 ; movq %rdi, %rax -; xorl %edx, %edx -; cmpq $0, %rsi -; jne 0x15 -; ud2 ; trap: int_divz -; cmpq $-1, %rsi -; jne 0x29 -; movl $0, %eax -; jmp 0x2e ; cqto +; cmpq $-1, %rsi +; jne 0x1d +; movl $0, %edx +; jmp 0x20 ; idivq %rsi ; trap: int_divz ; movq %rdx, %rax ; movq %rbp, %rsp ; popq %rbp ; retq +function %i8_imm(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 17 + v2 = srem v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; cbw %al, %al +; movl $17, %edx +; idiv %al, %dl, %al +; shrq $8, %rax, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rdi, %rax +; cbtw +; movl $0x11, %edx +; idivb %dl ; trap: int_divz +; shrq $8, %rax +; movq %rbp, %rsp +; popq %rbp +; retq + +function %i16_imm(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i16 17 + v2 = srem v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; cwd %ax, %dx +; movl $17, %r8d +; idiv %ax, %dx, %r8w, %ax, %dx +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rdi, %rax +; cwtd +; movl $0x11, %r8d +; idivw %r8w ; trap: int_divz +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; retq + +function %i32_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 17 + v2 = srem v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; cdq %eax, %edx +; movl $17, %r8d +; idiv %eax, %edx, %r8d, %eax, %edx +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rdi, %rax +; cltd +; movl $0x11, %r8d +; idivl %r8d ; trap: int_divz +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; retq + +function %i64_imm(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 17 + v2 = srem v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; cqo %rax, %rdx +; movl $17, %r8d +; idiv %rax, %rdx, %r8, %rax, %rdx +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rdi, %rax +; cqto +; movl $0x11, %r8d +; idivq %r8 ; trap: int_divz +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; retq + diff --git a/cranelift/filetests/filetests/isa/x64/udiv-checked.clif b/cranelift/filetests/filetests/isa/x64/udiv-checked.clif new file mode 100644 index 0000000000..4eec243fb1 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/udiv-checked.clif @@ -0,0 +1,264 @@ +test compile precise-output +set avoid_div_traps=true +target x86_64 + +function %f1(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = udiv v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movzbl %dil, %eax +; testb %sil, %sil +; jnz ; ud2 int_divz ; +; div %al, %sil, %al +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movzbl %dil, %eax +; testb %sil, %sil +; jne 0x13 +; ud2 ; trap: int_divz +; divb %sil ; trap: int_divz +; movq %rbp, %rsp +; popq %rbp +; retq + +function %f2(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = udiv v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; xorq %rdx, %rdx, %rdx +; testw %si, %si +; jnz ; ud2 int_divz ; +; div %ax, %dx, %si, %ax, %dx +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rdi, %rax +; xorq %rdx, %rdx +; testw %si, %si +; jne 0x15 +; ud2 ; trap: int_divz +; divw %si ; trap: int_divz +; movq %rbp, %rsp +; popq %rbp +; retq + +function %f3(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = udiv v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; xorq %rdx, %rdx, %rdx +; testl %esi, %esi +; jnz ; ud2 int_divz ; +; div %eax, %edx, %esi, %eax, %edx +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rdi, %rax +; xorq %rdx, %rdx +; testl %esi, %esi +; jne 0x14 +; ud2 ; trap: int_divz +; divl %esi ; trap: int_divz +; movq %rbp, %rsp +; popq %rbp +; retq + +function %f4(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = udiv v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; xorq %rdx, %rdx, %rdx +; testq %rsi, %rsi +; jnz ; ud2 int_divz ; +; div %rax, %rdx, %rsi, %rax, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rdi, %rax +; xorq %rdx, %rdx +; testq %rsi, %rsi +; jne 0x15 +; ud2 ; trap: int_divz +; divq %rsi ; trap: int_divz +; movq %rbp, %rsp +; popq %rbp +; retq + +function %i8_imm(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 17 + v2 = udiv v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movzbl %dil, %eax +; movl $17, %edx +; div %al, %dl, %al +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movzbl %dil, %eax +; movl $0x11, %edx +; divb %dl ; trap: int_divz +; movq %rbp, %rsp +; popq %rbp +; retq + +function %i16_imm(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i16 17 + v2 = udiv v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; xorq %rdx, %rdx, %rdx +; movl $17, %r8d +; div %ax, %dx, %r8w, %ax, %dx +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rdi, %rax +; xorq %rdx, %rdx +; movl $0x11, %r8d +; divw %r8w ; trap: int_divz +; movq %rbp, %rsp +; popq %rbp +; retq + +function %i32_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 17 + v2 = udiv v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; xorq %rdx, %rdx, %rdx +; movl $17, %r8d +; div %eax, %edx, %r8d, %eax, %edx +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rdi, %rax +; xorq %rdx, %rdx +; movl $0x11, %r8d +; divl %r8d ; trap: int_divz +; movq %rbp, %rsp +; popq %rbp +; retq + +function %i64_imm(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 17 + v2 = udiv v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; xorq %rdx, %rdx, %rdx +; movl $17, %r8d +; div %rax, %rdx, %r8, %rax, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rdi, %rax +; xorq %rdx, %rdx +; movl $0x11, %r8d +; divq %r8 ; trap: int_divz +; movq %rbp, %rsp +; popq %rbp +; retq + diff --git a/cranelift/filetests/filetests/isa/x64/udiv.clif b/cranelift/filetests/filetests/isa/x64/udiv.clif index b0affd746d..391abff20a 100644 --- a/cranelift/filetests/filetests/isa/x64/udiv.clif +++ b/cranelift/filetests/filetests/isa/x64/udiv.clif @@ -12,7 +12,7 @@ block0(v0: i8, v1: i8): ; movq %rsp, %rbp ; block0: ; movzbl %dil, %eax -; div %al, (none), %sil, %al, (none) +; div %al, %sil, %al ; movq %rbp, %rsp ; popq %rbp ; ret @@ -39,7 +39,7 @@ block0(v0: i16, v1: i16): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; movl $0, %edx +; xorq %rdx, %rdx, %rdx ; div %ax, %dx, %si, %ax, %dx ; movq %rbp, %rsp ; popq %rbp @@ -51,7 +51,7 @@ block0(v0: i16, v1: i16): ; movq %rsp, %rbp ; block1: ; offset 0x4 ; movq %rdi, %rax -; movl $0, %edx +; xorq %rdx, %rdx ; divw %si ; trap: int_divz ; movq %rbp, %rsp ; popq %rbp @@ -68,7 +68,7 @@ block0(v0: i32, v1: i32): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; movl $0, %edx +; xorq %rdx, %rdx, %rdx ; div %eax, %edx, %esi, %eax, %edx ; movq %rbp, %rsp ; popq %rbp @@ -80,7 +80,7 @@ block0(v0: i32, v1: i32): ; movq %rsp, %rbp ; block1: ; offset 0x4 ; movq %rdi, %rax -; movl $0, %edx +; xorq %rdx, %rdx ; divl %esi ; trap: int_divz ; movq %rbp, %rsp ; popq %rbp @@ -97,7 +97,7 @@ block0(v0: i64, v1: i64): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; movl $0, %edx +; xorq %rdx, %rdx, %rdx ; div %rax, %rdx, %rsi, %rax, %rdx ; movq %rbp, %rsp ; popq %rbp @@ -109,7 +109,7 @@ block0(v0: i64, v1: i64): ; movq %rsp, %rbp ; block1: ; offset 0x4 ; movq %rdi, %rax -; movl $0, %edx +; xorq %rdx, %rdx ; divq %rsi ; trap: int_divz ; movq %rbp, %rsp ; popq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/udivrem.clif b/cranelift/filetests/filetests/isa/x64/udivrem.clif index 9385c4b870..d0368a430b 100644 --- a/cranelift/filetests/filetests/isa/x64/udivrem.clif +++ b/cranelift/filetests/filetests/isa/x64/udivrem.clif @@ -15,13 +15,13 @@ block0(v0: i8, v1: i8): ; movq %rsp, %rbp ; block0: ; movzbl %dil, %eax -; div %al, (none), %sil, %al, (none) -; movq %rax, %rcx +; div %al, %sil, %al +; movq %rax, %r11 ; movzbl %dil, %eax -; div %al, (none), %sil, %al, (none) +; div %al, %sil, %al ; movq %rax, %rdx ; shrq $8, %rdx, %rdx -; movq %rcx, %rax +; movq %r11, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -33,12 +33,12 @@ block0(v0: i8, v1: i8): ; block1: ; offset 0x4 ; movzbl %dil, %eax ; divb %sil ; trap: int_divz -; movq %rax, %rcx +; movq %rax, %r11 ; movzbl %dil, %eax ; divb %sil ; trap: int_divz ; movq %rax, %rdx ; shrq $8, %rdx -; movq %rcx, %rax +; movq %r11, %rax ; movq %rbp, %rsp ; popq %rbp ; retq @@ -54,15 +54,14 @@ block0(v0: i16, v1: i16): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movl $0, %edx +; xorq %rdx, %rdx, %rdx ; movq %rdi, %rax ; div %ax, %dx, %si, %ax, %dx -; movq %rdi, %rcx -; movq %rax, %r8 -; movl $0, %edx -; movq %rcx, %rax +; movq %rax, %rcx +; movq %rdi, %rax +; xorq %rdx, %rdx, %rdx ; div %ax, %dx, %si, %ax, %dx -; movq %r8, %rax +; movq %rcx, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -72,15 +71,14 @@ block0(v0: i16, v1: i16): ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; movl $0, %edx +; xorq %rdx, %rdx ; movq %rdi, %rax ; divw %si ; trap: int_divz -; movq %rdi, %rcx -; movq %rax, %r8 -; movl $0, %edx -; movq %rcx, %rax +; movq %rax, %rcx +; movq %rdi, %rax +; xorq %rdx, %rdx ; divw %si ; trap: int_divz -; movq %r8, %rax +; movq %rcx, %rax ; movq %rbp, %rsp ; popq %rbp ; retq @@ -96,15 +94,14 @@ block0(v0: i32, v1: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movl $0, %edx +; xorq %rdx, %rdx, %rdx ; movq %rdi, %rax ; div %eax, %edx, %esi, %eax, %edx -; movq %rdi, %rcx -; movq %rax, %r8 -; movl $0, %edx -; movq %rcx, %rax +; movq %rax, %rcx +; movq %rdi, %rax +; xorq %rdx, %rdx, %rdx ; div %eax, %edx, %esi, %eax, %edx -; movq %r8, %rax +; movq %rcx, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -114,15 +111,14 @@ block0(v0: i32, v1: i32): ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; movl $0, %edx +; xorq %rdx, %rdx ; movq %rdi, %rax ; divl %esi ; trap: int_divz -; movq %rdi, %rcx -; movq %rax, %r8 -; movl $0, %edx -; movq %rcx, %rax +; movq %rax, %rcx +; movq %rdi, %rax +; xorq %rdx, %rdx ; divl %esi ; trap: int_divz -; movq %r8, %rax +; movq %rcx, %rax ; movq %rbp, %rsp ; popq %rbp ; retq @@ -138,15 +134,14 @@ block0(v0: i64, v1: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movl $0, %edx +; xorq %rdx, %rdx, %rdx ; movq %rdi, %rax ; div %rax, %rdx, %rsi, %rax, %rdx -; movq %rdi, %rcx -; movq %rax, %r8 -; movl $0, %edx -; movq %rcx, %rax +; movq %rax, %rcx +; movq %rdi, %rax +; xorq %rdx, %rdx, %rdx ; div %rax, %rdx, %rsi, %rax, %rdx -; movq %r8, %rax +; movq %rcx, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -156,15 +151,14 @@ block0(v0: i64, v1: i64): ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; movl $0, %edx +; xorq %rdx, %rdx ; movq %rdi, %rax ; divq %rsi ; trap: int_divz -; movq %rdi, %rcx -; movq %rax, %r8 -; movl $0, %edx -; movq %rcx, %rax +; movq %rax, %rcx +; movq %rdi, %rax +; xorq %rdx, %rdx ; divq %rsi ; trap: int_divz -; movq %r8, %rax +; movq %rcx, %rax ; movq %rbp, %rsp ; popq %rbp ; retq diff --git a/cranelift/filetests/filetests/isa/x64/urem-checked.clif b/cranelift/filetests/filetests/isa/x64/urem-checked.clif new file mode 100644 index 0000000000..df6e20e1c3 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/urem-checked.clif @@ -0,0 +1,280 @@ +test compile precise-output +set avoid_div_traps=true +target x86_64 + +function %f1(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = urem v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movzbl %dil, %eax +; testb %sil, %sil +; jnz ; ud2 int_divz ; +; div %al, %sil, %al +; shrq $8, %rax, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movzbl %dil, %eax +; testb %sil, %sil +; jne 0x13 +; ud2 ; trap: int_divz +; divb %sil ; trap: int_divz +; shrq $8, %rax +; movq %rbp, %rsp +; popq %rbp +; retq + +function %f2(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = urem v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; xorq %rdx, %rdx, %rdx +; testw %si, %si +; jnz ; ud2 int_divz ; +; div %ax, %dx, %si, %ax, %dx +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rdi, %rax +; xorq %rdx, %rdx +; testw %si, %si +; jne 0x15 +; ud2 ; trap: int_divz +; divw %si ; trap: int_divz +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; retq + +function %f3(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = urem v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; xorq %rdx, %rdx, %rdx +; testl %esi, %esi +; jnz ; ud2 int_divz ; +; div %eax, %edx, %esi, %eax, %edx +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rdi, %rax +; xorq %rdx, %rdx +; testl %esi, %esi +; jne 0x14 +; ud2 ; trap: int_divz +; divl %esi ; trap: int_divz +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; retq + +function %f4(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = urem v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; xorq %rdx, %rdx, %rdx +; testq %rsi, %rsi +; jnz ; ud2 int_divz ; +; div %rax, %rdx, %rsi, %rax, %rdx +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rdi, %rax +; xorq %rdx, %rdx +; testq %rsi, %rsi +; jne 0x15 +; ud2 ; trap: int_divz +; divq %rsi ; trap: int_divz +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; retq + +function %i8_imm(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 17 + v2 = urem v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movzbl %dil, %eax +; movl $17, %edx +; div %al, %dl, %al +; shrq $8, %rax, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movzbl %dil, %eax +; movl $0x11, %edx +; divb %dl ; trap: int_divz +; shrq $8, %rax +; movq %rbp, %rsp +; popq %rbp +; retq + +function %i16_imm(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i16 17 + v2 = urem v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; xorq %rdx, %rdx, %rdx +; movl $17, %r8d +; div %ax, %dx, %r8w, %ax, %dx +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rdi, %rax +; xorq %rdx, %rdx +; movl $0x11, %r8d +; divw %r8w ; trap: int_divz +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; retq + +function %i32_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 17 + v2 = urem v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; xorq %rdx, %rdx, %rdx +; movl $17, %r8d +; div %eax, %edx, %r8d, %eax, %edx +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rdi, %rax +; xorq %rdx, %rdx +; movl $0x11, %r8d +; divl %r8d ; trap: int_divz +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; retq + +function %i64_imm(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 17 + v2 = urem v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; xorq %rdx, %rdx, %rdx +; movl $17, %r8d +; div %rax, %rdx, %r8, %rax, %rdx +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movq %rdi, %rax +; xorq %rdx, %rdx +; movl $0x11, %r8d +; divq %r8 ; trap: int_divz +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; retq + diff --git a/cranelift/filetests/filetests/isa/x64/urem.clif b/cranelift/filetests/filetests/isa/x64/urem.clif index 05b8949432..63daeaade0 100644 --- a/cranelift/filetests/filetests/isa/x64/urem.clif +++ b/cranelift/filetests/filetests/isa/x64/urem.clif @@ -12,7 +12,7 @@ block0(v0: i8, v1: i8): ; movq %rsp, %rbp ; block0: ; movzbl %dil, %eax -; div %al, (none), %sil, %al, (none) +; div %al, %sil, %al ; shrq $8, %rax, %rax ; movq %rbp, %rsp ; popq %rbp @@ -41,7 +41,7 @@ block0(v0: i16, v1: i16): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; movl $0, %edx +; xorq %rdx, %rdx, %rdx ; div %ax, %dx, %si, %ax, %dx ; movq %rdx, %rax ; movq %rbp, %rsp @@ -54,7 +54,7 @@ block0(v0: i16, v1: i16): ; movq %rsp, %rbp ; block1: ; offset 0x4 ; movq %rdi, %rax -; movl $0, %edx +; xorq %rdx, %rdx ; divw %si ; trap: int_divz ; movq %rdx, %rax ; movq %rbp, %rsp @@ -72,7 +72,7 @@ block0(v0: i32, v1: i32): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; movl $0, %edx +; xorq %rdx, %rdx, %rdx ; div %eax, %edx, %esi, %eax, %edx ; movq %rdx, %rax ; movq %rbp, %rsp @@ -85,7 +85,7 @@ block0(v0: i32, v1: i32): ; movq %rsp, %rbp ; block1: ; offset 0x4 ; movq %rdi, %rax -; movl $0, %edx +; xorq %rdx, %rdx ; divl %esi ; trap: int_divz ; movq %rdx, %rax ; movq %rbp, %rsp @@ -103,7 +103,7 @@ block0(v0: i64, v1: i64): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; movl $0, %edx +; xorq %rdx, %rdx, %rdx ; div %rax, %rdx, %rsi, %rax, %rdx ; movq %rdx, %rax ; movq %rbp, %rsp @@ -116,7 +116,7 @@ block0(v0: i64, v1: i64): ; movq %rsp, %rbp ; block1: ; offset 0x4 ; movq %rdi, %rax -; movl $0, %edx +; xorq %rdx, %rdx ; divq %rsi ; trap: int_divz ; movq %rdx, %rax ; movq %rbp, %rsp diff --git a/winch/codegen/src/isa/x64/asm.rs b/winch/codegen/src/isa/x64/asm.rs index 03bddaac62..f60dde22d6 100644 --- a/winch/codegen/src/isa/x64/asm.rs +++ b/winch/codegen/src/isa/x64/asm.rs @@ -5,10 +5,11 @@ use crate::{ masm::{DivKind, OperandSize, RemKind}, }; use cranelift_codegen::{ + ir::TrapCode, isa::x64::{ args::{ - self, AluRmiROpcode, Amode, DivOrRemKind, ExtMode, FromWritableReg, Gpr, GprMem, - GprMemImm, RegMem, RegMemImm, SyntheticAmode, WritableGpr, + self, AluRmiROpcode, Amode, CmpOpcode, DivSignedness, ExtMode, FromWritableReg, Gpr, + GprMem, GprMemImm, RegMem, RegMemImm, SyntheticAmode, WritableGpr, CC, }, settings as x64_settings, EmitInfo, EmitState, Inst, }, @@ -64,20 +65,11 @@ impl From for args::OperandSize { } } -impl From for DivOrRemKind { - fn from(kind: DivKind) -> Self { +impl From for DivSignedness { + fn from(kind: DivKind) -> DivSignedness { match kind { - DivKind::Signed => DivOrRemKind::SignedDiv, - DivKind::Unsigned => DivOrRemKind::UnsignedDiv, - } - } -} - -impl From for DivOrRemKind { - fn from(kind: RemKind) -> Self { - match kind { - RemKind::Signed => DivOrRemKind::SignedRem, - RemKind::Unsigned => DivOrRemKind::UnsignedRem, + DivKind::Signed => DivSignedness::Signed, + DivKind::Unsigned => DivSignedness::Unsigned, } } } @@ -290,21 +282,61 @@ impl Assembler { /// caller has correctly allocated the dividend as `(rdx:rax)` and /// accounted for the quotient to be stored in `rax`. pub fn div(&mut self, divisor: Reg, dst: (Reg, Reg), kind: DivKind, size: OperandSize) { - let tmp = if size == OperandSize::S64 && kind == DivKind::Signed { - Some(regs::scratch()) - } else { - None - }; + match kind { + // Signed division goes through a pseudo-instruction to validate + // the divisor followed by a sign extension to initialize `rdx`. + DivKind::Signed => { + if size == OperandSize::S64 { + self.emit(Inst::ValidateSdivDivisor64 { + dividend: dst.0.into(), + divisor: divisor.into(), + tmp: regs::scratch().into(), + }); + } else { + self.emit(Inst::ValidateSdivDivisor { + dividend: dst.0.into(), + divisor: divisor.into(), + size: size.into(), + }); + } + self.emit(Inst::SignExtendData { + size: size.into(), + src: dst.0.into(), + dst: dst.1.into(), + }); + } - self.emit(Inst::CheckedDivOrRemSeq { - kind: kind.into(), + // Unsigned division only needs to check for 0 and then the `rdx` + // divisor_hi is initialized with zero through an xor-against-itself + // op. + DivKind::Unsigned => { + self.emit(Inst::CmpRmiR { + size: size.into(), + src: GprMemImm::new(RegMemImm::imm(0)).unwrap(), + dst: divisor.into(), + opcode: CmpOpcode::Cmp, + }); + self.emit(Inst::TrapIf { + cc: CC::Z, + trap_code: TrapCode::IntegerDivisionByZero, + }); + self.emit(Inst::AluRmiR { + size: size.into(), + op: AluRmiROpcode::Xor, + src1: dst.1.into(), + src2: dst.1.into(), + dst: dst.1.into(), + }); + } + } + self.emit(Inst::Div { + sign: kind.into(), size: size.into(), - divisor: divisor.into(), + divisor: GprMem::new(RegMem::reg(divisor.into())).unwrap(), dividend_lo: dst.0.into(), dividend_hi: dst.1.into(), dst_quotient: dst.0.into(), dst_remainder: dst.1.into(), - tmp: tmp.map(|reg| reg.into()), }); } @@ -316,16 +348,58 @@ impl Assembler { /// caller has correctly allocated the dividend as `(rdx:rax)` and /// accounted for the remainder to be stored in `rdx`. pub fn rem(&mut self, divisor: Reg, dst: (Reg, Reg), kind: RemKind, size: OperandSize) { - self.emit(Inst::CheckedDivOrRemSeq { - kind: kind.into(), + // First check for zero and explicitly trap. + self.emit(Inst::CmpRmiR { size: size.into(), - divisor: divisor.into(), - dividend_lo: dst.0.into(), - dividend_hi: dst.1.into(), - dst_quotient: dst.0.into(), - dst_remainder: dst.1.into(), - tmp: None, + src: GprMemImm::new(RegMemImm::imm(0)).unwrap(), + dst: divisor.into(), + opcode: CmpOpcode::Cmp, }); + self.emit(Inst::TrapIf { + cc: CC::Z, + trap_code: TrapCode::IntegerDivisionByZero, + }); + match kind { + // Signed remainder goes through a pseudo-instruction which has + // some internal branching. The `dividend_hi`, or `rdx`, is + // initialized here with a `SignExtendData` instruction. + RemKind::Signed => { + self.emit(Inst::SignExtendData { + size: size.into(), + src: dst.0.into(), + dst: dst.1.into(), + }); + self.emit(Inst::CheckedSRemSeq { + size: size.into(), + divisor: divisor.into(), + dividend_lo: dst.0.into(), + dividend_hi: dst.1.into(), + dst_quotient: dst.0.into(), + dst_remainder: dst.1.into(), + }); + } + + // Unsigned remainder initializes `dividend_hi` with zero and + // then executes a normal `div` instruction. + RemKind::Unsigned => { + self.emit(Inst::AluRmiR { + size: size.into(), + op: AluRmiROpcode::Xor, + src1: dst.1.into(), + src2: dst.1.into(), + dst: dst.1.into(), + }); + self.emit(Inst::Div { + sign: DivSignedness::Unsigned, + size: size.into(), + divisor: GprMem::new(RegMem::reg(divisor.into())).unwrap(), + dividend_lo: dst.0.into(), + dividend_hi: dst.1.into(), + dst_quotient: dst.0.into(), + dst_remainder: dst.1.into(), + }); + } + } } /// Multiply immediate and register. diff --git a/winch/filetests/filetests/x64/i32_divu/const.wat b/winch/filetests/filetests/x64/i32_divu/const.wat index d7be12fd17..3ba1c3dbb8 100644 --- a/winch/filetests/filetests/x64/i32_divu/const.wat +++ b/winch/filetests/filetests/x64/i32_divu/const.wat @@ -14,7 +14,7 @@ ;; e: 83f900 cmp ecx, 0 ;; 11: 0f8502000000 jne 0x19 ;; 17: 0f0b ud2 -;; 19: ba00000000 mov edx, 0 -;; 1e: f7f1 div ecx -;; 20: 5d pop rbp -;; 21: c3 ret +;; 19: 31d2 xor edx, edx +;; 1b: f7f1 div ecx +;; 1d: 5d pop rbp +;; 1e: c3 ret diff --git a/winch/filetests/filetests/x64/i32_divu/one_zero.wat b/winch/filetests/filetests/x64/i32_divu/one_zero.wat index 8eb16cfb28..938f787785 100644 --- a/winch/filetests/filetests/x64/i32_divu/one_zero.wat +++ b/winch/filetests/filetests/x64/i32_divu/one_zero.wat @@ -14,7 +14,7 @@ ;; e: 83f900 cmp ecx, 0 ;; 11: 0f8502000000 jne 0x19 ;; 17: 0f0b ud2 -;; 19: ba00000000 mov edx, 0 -;; 1e: f7f1 div ecx -;; 20: 5d pop rbp -;; 21: c3 ret +;; 19: 31d2 xor edx, edx +;; 1b: f7f1 div ecx +;; 1d: 5d pop rbp +;; 1e: c3 ret diff --git a/winch/filetests/filetests/x64/i32_divu/params.wat b/winch/filetests/filetests/x64/i32_divu/params.wat index f3a57b8648..e732e05bc8 100644 --- a/winch/filetests/filetests/x64/i32_divu/params.wat +++ b/winch/filetests/filetests/x64/i32_divu/params.wat @@ -17,8 +17,8 @@ ;; 16: 83f900 cmp ecx, 0 ;; 19: 0f8502000000 jne 0x21 ;; 1f: 0f0b ud2 -;; 21: ba00000000 mov edx, 0 -;; 26: f7f1 div ecx -;; 28: 4883c408 add rsp, 8 -;; 2c: 5d pop rbp -;; 2d: c3 ret +;; 21: 31d2 xor edx, edx +;; 23: f7f1 div ecx +;; 25: 4883c408 add rsp, 8 +;; 29: 5d pop rbp +;; 2a: c3 ret diff --git a/winch/filetests/filetests/x64/i32_divu/signed.wat b/winch/filetests/filetests/x64/i32_divu/signed.wat index 925c2a038e..ce39cff780 100644 --- a/winch/filetests/filetests/x64/i32_divu/signed.wat +++ b/winch/filetests/filetests/x64/i32_divu/signed.wat @@ -14,7 +14,7 @@ ;; e: 83f900 cmp ecx, 0 ;; 11: 0f8502000000 jne 0x19 ;; 17: 0f0b ud2 -;; 19: ba00000000 mov edx, 0 -;; 1e: f7f1 div ecx -;; 20: 5d pop rbp -;; 21: c3 ret +;; 19: 31d2 xor edx, edx +;; 1b: f7f1 div ecx +;; 1d: 5d pop rbp +;; 1e: c3 ret diff --git a/winch/filetests/filetests/x64/i32_divu/zero_zero.wat b/winch/filetests/filetests/x64/i32_divu/zero_zero.wat index c67d1a06ce..418d084329 100644 --- a/winch/filetests/filetests/x64/i32_divu/zero_zero.wat +++ b/winch/filetests/filetests/x64/i32_divu/zero_zero.wat @@ -14,7 +14,7 @@ ;; e: 83f900 cmp ecx, 0 ;; 11: 0f8502000000 jne 0x19 ;; 17: 0f0b ud2 -;; 19: ba00000000 mov edx, 0 -;; 1e: f7f1 div ecx -;; 20: 5d pop rbp -;; 21: c3 ret +;; 19: 31d2 xor edx, edx +;; 1b: f7f1 div ecx +;; 1d: 5d pop rbp +;; 1e: c3 ret diff --git a/winch/filetests/filetests/x64/i32_rems/const.wat b/winch/filetests/filetests/x64/i32_rems/const.wat index 1dc8224b72..1edb0d8962 100644 --- a/winch/filetests/filetests/x64/i32_rems/const.wat +++ b/winch/filetests/filetests/x64/i32_rems/const.wat @@ -14,11 +14,11 @@ ;; e: 83f900 cmp ecx, 0 ;; 11: 0f8502000000 jne 0x19 ;; 17: 0f0b ud2 -;; 19: 83f9ff cmp ecx, -1 -;; 1c: 0f850a000000 jne 0x2c -;; 22: b800000000 mov eax, 0 -;; 27: e903000000 jmp 0x2f -;; 2c: 99 cdq +;; 19: 99 cdq +;; 1a: 83f9ff cmp ecx, -1 +;; 1d: 0f850a000000 jne 0x2d +;; 23: ba00000000 mov edx, 0 +;; 28: e902000000 jmp 0x2f ;; 2d: f7f9 idiv ecx ;; 2f: 4889d0 mov rax, rdx ;; 32: 5d pop rbp diff --git a/winch/filetests/filetests/x64/i32_rems/one_zero.wat b/winch/filetests/filetests/x64/i32_rems/one_zero.wat index bc7804ede1..3b618c3c10 100644 --- a/winch/filetests/filetests/x64/i32_rems/one_zero.wat +++ b/winch/filetests/filetests/x64/i32_rems/one_zero.wat @@ -14,11 +14,11 @@ ;; e: 83f900 cmp ecx, 0 ;; 11: 0f8502000000 jne 0x19 ;; 17: 0f0b ud2 -;; 19: 83f9ff cmp ecx, -1 -;; 1c: 0f850a000000 jne 0x2c -;; 22: b800000000 mov eax, 0 -;; 27: e903000000 jmp 0x2f -;; 2c: 99 cdq +;; 19: 99 cdq +;; 1a: 83f9ff cmp ecx, -1 +;; 1d: 0f850a000000 jne 0x2d +;; 23: ba00000000 mov edx, 0 +;; 28: e902000000 jmp 0x2f ;; 2d: f7f9 idiv ecx ;; 2f: 4889d0 mov rax, rdx ;; 32: 5d pop rbp diff --git a/winch/filetests/filetests/x64/i32_rems/overflow.wat b/winch/filetests/filetests/x64/i32_rems/overflow.wat index 7211182b2f..b42f2ce35d 100644 --- a/winch/filetests/filetests/x64/i32_rems/overflow.wat +++ b/winch/filetests/filetests/x64/i32_rems/overflow.wat @@ -14,11 +14,11 @@ ;; e: 83f900 cmp ecx, 0 ;; 11: 0f8502000000 jne 0x19 ;; 17: 0f0b ud2 -;; 19: 83f9ff cmp ecx, -1 -;; 1c: 0f850a000000 jne 0x2c -;; 22: b800000000 mov eax, 0 -;; 27: e903000000 jmp 0x2f -;; 2c: 99 cdq +;; 19: 99 cdq +;; 1a: 83f9ff cmp ecx, -1 +;; 1d: 0f850a000000 jne 0x2d +;; 23: ba00000000 mov edx, 0 +;; 28: e902000000 jmp 0x2f ;; 2d: f7f9 idiv ecx ;; 2f: 4889d0 mov rax, rdx ;; 32: 5d pop rbp diff --git a/winch/filetests/filetests/x64/i32_rems/params.wat b/winch/filetests/filetests/x64/i32_rems/params.wat index d91d3937e9..752f5abee9 100644 --- a/winch/filetests/filetests/x64/i32_rems/params.wat +++ b/winch/filetests/filetests/x64/i32_rems/params.wat @@ -17,11 +17,11 @@ ;; 16: 83f900 cmp ecx, 0 ;; 19: 0f8502000000 jne 0x21 ;; 1f: 0f0b ud2 -;; 21: 83f9ff cmp ecx, -1 -;; 24: 0f850a000000 jne 0x34 -;; 2a: b800000000 mov eax, 0 -;; 2f: e903000000 jmp 0x37 -;; 34: 99 cdq +;; 21: 99 cdq +;; 22: 83f9ff cmp ecx, -1 +;; 25: 0f850a000000 jne 0x35 +;; 2b: ba00000000 mov edx, 0 +;; 30: e902000000 jmp 0x37 ;; 35: f7f9 idiv ecx ;; 37: 4889d0 mov rax, rdx ;; 3a: 4883c408 add rsp, 8 diff --git a/winch/filetests/filetests/x64/i32_rems/zero_zero.wat b/winch/filetests/filetests/x64/i32_rems/zero_zero.wat index dd56aff9b0..01358814fb 100644 --- a/winch/filetests/filetests/x64/i32_rems/zero_zero.wat +++ b/winch/filetests/filetests/x64/i32_rems/zero_zero.wat @@ -14,11 +14,11 @@ ;; e: 83f900 cmp ecx, 0 ;; 11: 0f8502000000 jne 0x19 ;; 17: 0f0b ud2 -;; 19: 83f9ff cmp ecx, -1 -;; 1c: 0f850a000000 jne 0x2c -;; 22: b800000000 mov eax, 0 -;; 27: e903000000 jmp 0x2f -;; 2c: 99 cdq +;; 19: 99 cdq +;; 1a: 83f9ff cmp ecx, -1 +;; 1d: 0f850a000000 jne 0x2d +;; 23: ba00000000 mov edx, 0 +;; 28: e902000000 jmp 0x2f ;; 2d: f7f9 idiv ecx ;; 2f: 4889d0 mov rax, rdx ;; 32: 5d pop rbp diff --git a/winch/filetests/filetests/x64/i32_remu/const.wat b/winch/filetests/filetests/x64/i32_remu/const.wat index 65dc05cef9..bbcb387cb6 100644 --- a/winch/filetests/filetests/x64/i32_remu/const.wat +++ b/winch/filetests/filetests/x64/i32_remu/const.wat @@ -14,8 +14,8 @@ ;; e: 83f900 cmp ecx, 0 ;; 11: 0f8502000000 jne 0x19 ;; 17: 0f0b ud2 -;; 19: ba00000000 mov edx, 0 -;; 1e: f7f1 div ecx -;; 20: 4889d0 mov rax, rdx -;; 23: 5d pop rbp -;; 24: c3 ret +;; 19: 31d2 xor edx, edx +;; 1b: f7f1 div ecx +;; 1d: 4889d0 mov rax, rdx +;; 20: 5d pop rbp +;; 21: c3 ret diff --git a/winch/filetests/filetests/x64/i32_remu/one_zero.wat b/winch/filetests/filetests/x64/i32_remu/one_zero.wat index 9a482a68f5..404264c863 100644 --- a/winch/filetests/filetests/x64/i32_remu/one_zero.wat +++ b/winch/filetests/filetests/x64/i32_remu/one_zero.wat @@ -14,8 +14,8 @@ ;; e: 83f900 cmp ecx, 0 ;; 11: 0f8502000000 jne 0x19 ;; 17: 0f0b ud2 -;; 19: ba00000000 mov edx, 0 -;; 1e: f7f1 div ecx -;; 20: 4889d0 mov rax, rdx -;; 23: 5d pop rbp -;; 24: c3 ret +;; 19: 31d2 xor edx, edx +;; 1b: f7f1 div ecx +;; 1d: 4889d0 mov rax, rdx +;; 20: 5d pop rbp +;; 21: c3 ret diff --git a/winch/filetests/filetests/x64/i32_remu/params.wat b/winch/filetests/filetests/x64/i32_remu/params.wat index 743ce4446d..9d6a2e632e 100644 --- a/winch/filetests/filetests/x64/i32_remu/params.wat +++ b/winch/filetests/filetests/x64/i32_remu/params.wat @@ -17,9 +17,9 @@ ;; 16: 83f900 cmp ecx, 0 ;; 19: 0f8502000000 jne 0x21 ;; 1f: 0f0b ud2 -;; 21: ba00000000 mov edx, 0 -;; 26: f7f1 div ecx -;; 28: 4889d0 mov rax, rdx -;; 2b: 4883c408 add rsp, 8 -;; 2f: 5d pop rbp -;; 30: c3 ret +;; 21: 31d2 xor edx, edx +;; 23: f7f1 div ecx +;; 25: 4889d0 mov rax, rdx +;; 28: 4883c408 add rsp, 8 +;; 2c: 5d pop rbp +;; 2d: c3 ret diff --git a/winch/filetests/filetests/x64/i32_remu/signed.wat b/winch/filetests/filetests/x64/i32_remu/signed.wat index c29c66e5c5..f886e5ac7c 100644 --- a/winch/filetests/filetests/x64/i32_remu/signed.wat +++ b/winch/filetests/filetests/x64/i32_remu/signed.wat @@ -14,8 +14,8 @@ ;; e: 83f900 cmp ecx, 0 ;; 11: 0f8502000000 jne 0x19 ;; 17: 0f0b ud2 -;; 19: ba00000000 mov edx, 0 -;; 1e: f7f1 div ecx -;; 20: 4889d0 mov rax, rdx -;; 23: 5d pop rbp -;; 24: c3 ret +;; 19: 31d2 xor edx, edx +;; 1b: f7f1 div ecx +;; 1d: 4889d0 mov rax, rdx +;; 20: 5d pop rbp +;; 21: c3 ret diff --git a/winch/filetests/filetests/x64/i32_remu/zero_zero.wat b/winch/filetests/filetests/x64/i32_remu/zero_zero.wat index 671840c328..4654eafd47 100644 --- a/winch/filetests/filetests/x64/i32_remu/zero_zero.wat +++ b/winch/filetests/filetests/x64/i32_remu/zero_zero.wat @@ -14,8 +14,8 @@ ;; e: 83f900 cmp ecx, 0 ;; 11: 0f8502000000 jne 0x19 ;; 17: 0f0b ud2 -;; 19: ba00000000 mov edx, 0 -;; 1e: f7f1 div ecx -;; 20: 4889d0 mov rax, rdx -;; 23: 5d pop rbp -;; 24: c3 ret +;; 19: 31d2 xor edx, edx +;; 1b: f7f1 div ecx +;; 1d: 4889d0 mov rax, rdx +;; 20: 5d pop rbp +;; 21: c3 ret diff --git a/winch/filetests/filetests/x64/i64_divu/const.wat b/winch/filetests/filetests/x64/i64_divu/const.wat index 3e6b33ba96..c44bc60915 100644 --- a/winch/filetests/filetests/x64/i64_divu/const.wat +++ b/winch/filetests/filetests/x64/i64_divu/const.wat @@ -14,7 +14,7 @@ ;; 12: 4883f900 cmp rcx, 0 ;; 16: 0f8502000000 jne 0x1e ;; 1c: 0f0b ud2 -;; 1e: ba00000000 mov edx, 0 -;; 23: 48f7f1 div rcx -;; 26: 5d pop rbp -;; 27: c3 ret +;; 1e: 4831d2 xor rdx, rdx +;; 21: 48f7f1 div rcx +;; 24: 5d pop rbp +;; 25: c3 ret diff --git a/winch/filetests/filetests/x64/i64_divu/one_zero.wat b/winch/filetests/filetests/x64/i64_divu/one_zero.wat index 53bddb442c..7c5accf236 100644 --- a/winch/filetests/filetests/x64/i64_divu/one_zero.wat +++ b/winch/filetests/filetests/x64/i64_divu/one_zero.wat @@ -14,7 +14,7 @@ ;; 12: 4883f900 cmp rcx, 0 ;; 16: 0f8502000000 jne 0x1e ;; 1c: 0f0b ud2 -;; 1e: ba00000000 mov edx, 0 -;; 23: 48f7f1 div rcx -;; 26: 5d pop rbp -;; 27: c3 ret +;; 1e: 4831d2 xor rdx, rdx +;; 21: 48f7f1 div rcx +;; 24: 5d pop rbp +;; 25: c3 ret diff --git a/winch/filetests/filetests/x64/i64_divu/params.wat b/winch/filetests/filetests/x64/i64_divu/params.wat index 75aeddef3c..8dfbdbe85b 100644 --- a/winch/filetests/filetests/x64/i64_divu/params.wat +++ b/winch/filetests/filetests/x64/i64_divu/params.wat @@ -17,8 +17,8 @@ ;; 1a: 4883f900 cmp rcx, 0 ;; 1e: 0f8502000000 jne 0x26 ;; 24: 0f0b ud2 -;; 26: ba00000000 mov edx, 0 -;; 2b: 48f7f1 div rcx -;; 2e: 4883c410 add rsp, 0x10 -;; 32: 5d pop rbp -;; 33: c3 ret +;; 26: 4831d2 xor rdx, rdx +;; 29: 48f7f1 div rcx +;; 2c: 4883c410 add rsp, 0x10 +;; 30: 5d pop rbp +;; 31: c3 ret diff --git a/winch/filetests/filetests/x64/i64_divu/signed.wat b/winch/filetests/filetests/x64/i64_divu/signed.wat index 6fb06fa866..af7cc2d208 100644 --- a/winch/filetests/filetests/x64/i64_divu/signed.wat +++ b/winch/filetests/filetests/x64/i64_divu/signed.wat @@ -14,7 +14,7 @@ ;; 12: 4883f900 cmp rcx, 0 ;; 16: 0f8502000000 jne 0x1e ;; 1c: 0f0b ud2 -;; 1e: ba00000000 mov edx, 0 -;; 23: 48f7f1 div rcx -;; 26: 5d pop rbp -;; 27: c3 ret +;; 1e: 4831d2 xor rdx, rdx +;; 21: 48f7f1 div rcx +;; 24: 5d pop rbp +;; 25: c3 ret diff --git a/winch/filetests/filetests/x64/i64_divu/zero_zero.wat b/winch/filetests/filetests/x64/i64_divu/zero_zero.wat index d0c087eee9..458e4965af 100644 --- a/winch/filetests/filetests/x64/i64_divu/zero_zero.wat +++ b/winch/filetests/filetests/x64/i64_divu/zero_zero.wat @@ -14,7 +14,7 @@ ;; 12: 4883f900 cmp rcx, 0 ;; 16: 0f8502000000 jne 0x1e ;; 1c: 0f0b ud2 -;; 1e: ba00000000 mov edx, 0 -;; 23: 48f7f1 div rcx -;; 26: 5d pop rbp -;; 27: c3 ret +;; 1e: 4831d2 xor rdx, rdx +;; 21: 48f7f1 div rcx +;; 24: 5d pop rbp +;; 25: c3 ret diff --git a/winch/filetests/filetests/x64/i64_rems/const.wat b/winch/filetests/filetests/x64/i64_rems/const.wat index 4eb4e83520..f8e1c34395 100644 --- a/winch/filetests/filetests/x64/i64_rems/const.wat +++ b/winch/filetests/filetests/x64/i64_rems/const.wat @@ -14,11 +14,11 @@ ;; 12: 4883f900 cmp rcx, 0 ;; 16: 0f8502000000 jne 0x1e ;; 1c: 0f0b ud2 -;; 1e: 4883f9ff cmp rcx, -1 -;; 22: 0f850a000000 jne 0x32 -;; 28: b800000000 mov eax, 0 -;; 2d: e905000000 jmp 0x37 -;; 32: 4899 cqo +;; 1e: 4899 cqo +;; 20: 4883f9ff cmp rcx, -1 +;; 24: 0f850a000000 jne 0x34 +;; 2a: ba00000000 mov edx, 0 +;; 2f: e903000000 jmp 0x37 ;; 34: 48f7f9 idiv rcx ;; 37: 4889d0 mov rax, rdx ;; 3a: 5d pop rbp diff --git a/winch/filetests/filetests/x64/i64_rems/one_zero.wat b/winch/filetests/filetests/x64/i64_rems/one_zero.wat index dc748d6221..3ef0f3289f 100644 --- a/winch/filetests/filetests/x64/i64_rems/one_zero.wat +++ b/winch/filetests/filetests/x64/i64_rems/one_zero.wat @@ -14,11 +14,11 @@ ;; 12: 4883f900 cmp rcx, 0 ;; 16: 0f8502000000 jne 0x1e ;; 1c: 0f0b ud2 -;; 1e: 4883f9ff cmp rcx, -1 -;; 22: 0f850a000000 jne 0x32 -;; 28: b800000000 mov eax, 0 -;; 2d: e905000000 jmp 0x37 -;; 32: 4899 cqo +;; 1e: 4899 cqo +;; 20: 4883f9ff cmp rcx, -1 +;; 24: 0f850a000000 jne 0x34 +;; 2a: ba00000000 mov edx, 0 +;; 2f: e903000000 jmp 0x37 ;; 34: 48f7f9 idiv rcx ;; 37: 4889d0 mov rax, rdx ;; 3a: 5d pop rbp diff --git a/winch/filetests/filetests/x64/i64_rems/overflow.wat b/winch/filetests/filetests/x64/i64_rems/overflow.wat index 33bd76cddd..d3c69a9c63 100644 --- a/winch/filetests/filetests/x64/i64_rems/overflow.wat +++ b/winch/filetests/filetests/x64/i64_rems/overflow.wat @@ -15,11 +15,11 @@ ;; 15: 4883f900 cmp rcx, 0 ;; 19: 0f8502000000 jne 0x21 ;; 1f: 0f0b ud2 -;; 21: 4883f9ff cmp rcx, -1 -;; 25: 0f850a000000 jne 0x35 -;; 2b: b800000000 mov eax, 0 -;; 30: e905000000 jmp 0x3a -;; 35: 4899 cqo +;; 21: 4899 cqo +;; 23: 4883f9ff cmp rcx, -1 +;; 27: 0f850a000000 jne 0x37 +;; 2d: ba00000000 mov edx, 0 +;; 32: e903000000 jmp 0x3a ;; 37: 48f7f9 idiv rcx ;; 3a: 4889d0 mov rax, rdx ;; 3d: 5d pop rbp diff --git a/winch/filetests/filetests/x64/i64_rems/params.wat b/winch/filetests/filetests/x64/i64_rems/params.wat index 5fc16387e8..9851aa14c4 100644 --- a/winch/filetests/filetests/x64/i64_rems/params.wat +++ b/winch/filetests/filetests/x64/i64_rems/params.wat @@ -17,11 +17,11 @@ ;; 1a: 4883f900 cmp rcx, 0 ;; 1e: 0f8502000000 jne 0x26 ;; 24: 0f0b ud2 -;; 26: 4883f9ff cmp rcx, -1 -;; 2a: 0f850a000000 jne 0x3a -;; 30: b800000000 mov eax, 0 -;; 35: e905000000 jmp 0x3f -;; 3a: 4899 cqo +;; 26: 4899 cqo +;; 28: 4883f9ff cmp rcx, -1 +;; 2c: 0f850a000000 jne 0x3c +;; 32: ba00000000 mov edx, 0 +;; 37: e903000000 jmp 0x3f ;; 3c: 48f7f9 idiv rcx ;; 3f: 4889d0 mov rax, rdx ;; 42: 4883c410 add rsp, 0x10 diff --git a/winch/filetests/filetests/x64/i64_rems/zero_zero.wat b/winch/filetests/filetests/x64/i64_rems/zero_zero.wat index ad5a74bdc1..3a745e384a 100644 --- a/winch/filetests/filetests/x64/i64_rems/zero_zero.wat +++ b/winch/filetests/filetests/x64/i64_rems/zero_zero.wat @@ -14,11 +14,11 @@ ;; 12: 4883f900 cmp rcx, 0 ;; 16: 0f8502000000 jne 0x1e ;; 1c: 0f0b ud2 -;; 1e: 4883f9ff cmp rcx, -1 -;; 22: 0f850a000000 jne 0x32 -;; 28: b800000000 mov eax, 0 -;; 2d: e905000000 jmp 0x37 -;; 32: 4899 cqo +;; 1e: 4899 cqo +;; 20: 4883f9ff cmp rcx, -1 +;; 24: 0f850a000000 jne 0x34 +;; 2a: ba00000000 mov edx, 0 +;; 2f: e903000000 jmp 0x37 ;; 34: 48f7f9 idiv rcx ;; 37: 4889d0 mov rax, rdx ;; 3a: 5d pop rbp diff --git a/winch/filetests/filetests/x64/i64_remu/const.wat b/winch/filetests/filetests/x64/i64_remu/const.wat index c139c6d72d..3b86a6769f 100644 --- a/winch/filetests/filetests/x64/i64_remu/const.wat +++ b/winch/filetests/filetests/x64/i64_remu/const.wat @@ -14,8 +14,8 @@ ;; 12: 4883f900 cmp rcx, 0 ;; 16: 0f8502000000 jne 0x1e ;; 1c: 0f0b ud2 -;; 1e: ba00000000 mov edx, 0 -;; 23: 48f7f1 div rcx -;; 26: 4889d0 mov rax, rdx -;; 29: 5d pop rbp -;; 2a: c3 ret +;; 1e: 4831d2 xor rdx, rdx +;; 21: 48f7f1 div rcx +;; 24: 4889d0 mov rax, rdx +;; 27: 5d pop rbp +;; 28: c3 ret diff --git a/winch/filetests/filetests/x64/i64_remu/one_zero.wat b/winch/filetests/filetests/x64/i64_remu/one_zero.wat index 2c79349fa3..e31033b33c 100644 --- a/winch/filetests/filetests/x64/i64_remu/one_zero.wat +++ b/winch/filetests/filetests/x64/i64_remu/one_zero.wat @@ -14,8 +14,8 @@ ;; 12: 4883f900 cmp rcx, 0 ;; 16: 0f8502000000 jne 0x1e ;; 1c: 0f0b ud2 -;; 1e: ba00000000 mov edx, 0 -;; 23: 48f7f1 div rcx -;; 26: 4889d0 mov rax, rdx -;; 29: 5d pop rbp -;; 2a: c3 ret +;; 1e: 4831d2 xor rdx, rdx +;; 21: 48f7f1 div rcx +;; 24: 4889d0 mov rax, rdx +;; 27: 5d pop rbp +;; 28: c3 ret diff --git a/winch/filetests/filetests/x64/i64_remu/params.wat b/winch/filetests/filetests/x64/i64_remu/params.wat index e37f955349..773debc841 100644 --- a/winch/filetests/filetests/x64/i64_remu/params.wat +++ b/winch/filetests/filetests/x64/i64_remu/params.wat @@ -17,9 +17,9 @@ ;; 1a: 4883f900 cmp rcx, 0 ;; 1e: 0f8502000000 jne 0x26 ;; 24: 0f0b ud2 -;; 26: ba00000000 mov edx, 0 -;; 2b: 48f7f1 div rcx -;; 2e: 4889d0 mov rax, rdx -;; 31: 4883c410 add rsp, 0x10 -;; 35: 5d pop rbp -;; 36: c3 ret +;; 26: 4831d2 xor rdx, rdx +;; 29: 48f7f1 div rcx +;; 2c: 4889d0 mov rax, rdx +;; 2f: 4883c410 add rsp, 0x10 +;; 33: 5d pop rbp +;; 34: c3 ret diff --git a/winch/filetests/filetests/x64/i64_remu/signed.wat b/winch/filetests/filetests/x64/i64_remu/signed.wat index b0b4cdce9a..77b1859824 100644 --- a/winch/filetests/filetests/x64/i64_remu/signed.wat +++ b/winch/filetests/filetests/x64/i64_remu/signed.wat @@ -14,8 +14,8 @@ ;; 12: 4883f900 cmp rcx, 0 ;; 16: 0f8502000000 jne 0x1e ;; 1c: 0f0b ud2 -;; 1e: ba00000000 mov edx, 0 -;; 23: 48f7f1 div rcx -;; 26: 4889d0 mov rax, rdx -;; 29: 5d pop rbp -;; 2a: c3 ret +;; 1e: 4831d2 xor rdx, rdx +;; 21: 48f7f1 div rcx +;; 24: 4889d0 mov rax, rdx +;; 27: 5d pop rbp +;; 28: c3 ret diff --git a/winch/filetests/filetests/x64/i64_remu/zero_zero.wat b/winch/filetests/filetests/x64/i64_remu/zero_zero.wat index 7fbf7a9ae8..df358d9227 100644 --- a/winch/filetests/filetests/x64/i64_remu/zero_zero.wat +++ b/winch/filetests/filetests/x64/i64_remu/zero_zero.wat @@ -14,8 +14,8 @@ ;; 12: 4883f900 cmp rcx, 0 ;; 16: 0f8502000000 jne 0x1e ;; 1c: 0f0b ud2 -;; 1e: ba00000000 mov edx, 0 -;; 23: 48f7f1 div rcx -;; 26: 4889d0 mov rax, rdx -;; 29: 5d pop rbp -;; 2a: c3 ret +;; 1e: 4831d2 xor rdx, rdx +;; 21: 48f7f1 div rcx +;; 24: 4889d0 mov rax, rdx +;; 27: 5d pop rbp +;; 28: c3 ret