From 186c7c3b89c0e172ff39c0126ca71bf9e995dc8c Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 30 Aug 2022 17:21:14 -0700 Subject: [PATCH] x64: clean up regalloc-related semantics on several instructions. (#4811) * x64: clean up regalloc-related semantics on several instructions. This PR removes all uses of "modify" operands on instructions in the x64 backend, and also removes all uses of "pinned vregs", or vregs that are explicitly tied to particular physical registers. In place of both of these mechanisms, which are legacies of the old regalloc design and supported via compatibility code, the backend now uses operand constraints. This is more flexible as it allows the regalloc to see the liveranges and constraints without "reverse-engineering" move instructions. Eventually, after removing all such uses (including in other backends and by the ABI code), we can remove the compatibility code in regalloc2, significantly simplifying its liverange-construction frontend and thus allowing for higher confidence in correctness as well as possibly a bit more compilation speed. Curiously, there are a few extra move instructions now; they are likely poor splitting decisions and I can try to chase these down later. * Fix cranelift-codegen tests. * Review feedback. --- cranelift/codegen/src/isa/x64/inst.isle | 76 ++---- cranelift/codegen/src/isa/x64/inst/emit.rs | 77 +++++-- .../codegen/src/isa/x64/inst/emit_tests.rs | 216 ++++++++++++++---- cranelift/codegen/src/isa/x64/inst/mod.rs | 149 ++++++++---- cranelift/codegen/src/isa/x64/lower/isle.rs | 112 +++++---- cranelift/codegen/src/isa/x64/mod.rs | 55 +++-- .../filetests/filetests/isa/x64/branches.clif | 3 +- .../filetests/isa/x64/div-checks.clif | 20 +- .../filetests/filetests/isa/x64/fcvt.clif | 50 ++-- .../filetests/filetests/isa/x64/sdiv.clif | 8 +- .../filetests/isa/x64/shuffle-avx512.clif | 19 +- .../filetests/filetests/isa/x64/srem.clif | 12 +- .../filetests/filetests/isa/x64/udiv.clif | 15 +- .../filetests/filetests/isa/x64/urem.clif | 15 +- 14 files changed, 543 insertions(+), 284 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index bd25f388ea..ff6bbd1775 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -64,24 +64,13 @@ ;; A synthetic sequence to implement the right inline checks for ;; remainder and division, assuming the dividend is in %rax. ;; - ;; Puts the result back into %rax if is_div, %rdx if !is_div, to mimic - ;; what the div instruction does. - ;; ;; The generated code sequence is described in the emit's function match ;; arm for this instruction. - ;; - ;; Note: %rdx is marked as modified by this instruction, to avoid an - ;; early clobber problem with the temporary and divisor registers. Make - ;; sure to zero %rdx right before this instruction, or you might run into - ;; regalloc failures where %rdx is live before its first def! (CheckedDivOrRemSeq (kind DivOrRemKind) (size OperandSize) (dividend_lo Gpr) (dividend_hi Gpr) - ;; The divisor operand. Note it's marked as modified - ;; so that it gets assigned a register different from - ;; the temporary. - (divisor WritableGpr) + (divisor Gpr) (dst_quotient WritableGpr) (dst_remainder WritableGpr) (tmp OptionWritableGpr)) @@ -205,12 +194,21 @@ (src3 XmmMem) (dst WritableXmm)) - ;; XMM (scalar or vector) binary op that relies on the EVEX prefix. + ;; XMM (scalar or vector) binary op that relies on the EVEX + ;; prefix. Takes two inputs. (XmmRmREvex (op Avx512Opcode) (src1 XmmMem) (src2 Xmm) (dst WritableXmm)) + ;; XMM (scalar or vector) binary op that relies on the EVEX + ;; prefix. Takes three inputs. + (XmmRmREvex3 (op Avx512Opcode) + (src1 XmmMem) + (src2 Xmm) + (src3 Xmm) + (dst WritableXmm)) + ;; XMM (scalar or vector) unary op: mov between XMM registers (32 64) ;; (reg addr) reg, sqrt, etc. ;; @@ -255,13 +253,7 @@ ;; Converts an unsigned int64 to a float32/float64. (CvtUint64ToFloatSeq (dst_size OperandSize) ;; 4 or 8 - ;; A copy of the source register, fed by - ;; lowering. It is marked as modified during - ;; register allocation to make sure that the - ;; temporary registers differ from the src register, - ;; since both registers are live at the same time in - ;; the generated code sequence. - (src WritableGpr) + (src Gpr) (dst WritableXmm) (tmp_gpr1 WritableGpr) (tmp_gpr2 WritableGpr)) @@ -270,13 +262,7 @@ (CvtFloatToSintSeq (dst_size OperandSize) (src_size OperandSize) (is_saturating bool) - ;; A copy of the source register, fed by - ;; lowering. It is marked as modified during - ;; register allocation to make sure that the - ;; temporary registers differ from the src register, - ;; since both registers are live at the same time in - ;; the generated code sequence. - (src WritableXmm) + (src Xmm) (dst WritableGpr) (tmp_gpr WritableGpr) (tmp_xmm WritableXmm)) @@ -285,13 +271,7 @@ (CvtFloatToUintSeq (dst_size OperandSize) (src_size OperandSize) (is_saturating bool) - ;; A copy of the source register, fed by - ;; lowering. It is marked as modified during - ;; register allocation to make sure that the - ;; temporary registers differ from the src register, - ;; since both registers are live at the same time in - ;; the generated code sequence. - (src WritableXmm) + (src Xmm) (dst WritableGpr) (tmp_gpr WritableGpr) (tmp_xmm WritableXmm)) @@ -2769,11 +2749,11 @@ (decl x64_vpermi2b (Xmm Xmm Xmm) Xmm) (rule (x64_vpermi2b src1 src2 src3) (let ((dst WritableXmm (temp_writable_xmm)) - (_ Unit (emit (gen_move $I8X16 dst src3))) - (_ Unit (emit (MInst.XmmRmREvex (Avx512Opcode.Vpermi2b) - src1 - src2 - dst)))) + (_ Unit (emit (MInst.XmmRmREvex3 (Avx512Opcode.Vpermi2b) + src1 + src2 + src3 + dst)))) dst)) ;; Helper for creating `MInst.MulHi` instructions. @@ -3214,12 +3194,10 @@ (decl cvt_u64_to_float_seq (Type Gpr) Xmm) (rule (cvt_u64_to_float_seq ty src) (let ((size OperandSize (raw_operand_size_of_type ty)) - (src_copy WritableGpr (temp_writable_gpr)) (dst WritableXmm (temp_writable_xmm)) (tmp_gpr1 WritableGpr (temp_writable_gpr)) (tmp_gpr2 WritableGpr (temp_writable_gpr)) - (_ Unit (emit (gen_move $I64 src_copy src))) - (_ Unit (emit (MInst.CvtUint64ToFloatSeq size src_copy dst tmp_gpr1 tmp_gpr2)))) + (_ Unit (emit (MInst.CvtUint64ToFloatSeq size src dst tmp_gpr1 tmp_gpr2)))) dst)) (decl cvt_float_to_uint_seq (Type Value bool) Gpr) @@ -3227,13 +3205,10 @@ (let ((out_size OperandSize (raw_operand_size_of_type out_ty)) (src_size OperandSize (raw_operand_size_of_type src_ty)) - (tmp WritableXmm (temp_writable_xmm)) - (_ Unit (emit (gen_move src_ty tmp src))) - (dst WritableGpr (temp_writable_gpr)) (tmp_xmm WritableXmm (temp_writable_xmm)) (tmp_gpr WritableGpr (temp_writable_gpr)) - (_ Unit (emit (MInst.CvtFloatToUintSeq out_size src_size is_saturating tmp dst tmp_gpr tmp_xmm)))) + (_ Unit (emit (MInst.CvtFloatToUintSeq out_size src_size is_saturating src dst tmp_gpr tmp_xmm)))) dst)) (decl cvt_float_to_sint_seq (Type Value bool) Gpr) @@ -3241,13 +3216,10 @@ (let ((out_size OperandSize (raw_operand_size_of_type out_ty)) (src_size OperandSize (raw_operand_size_of_type src_ty)) - (tmp WritableXmm (temp_writable_xmm)) - (_ Unit (emit (gen_move src_ty tmp src))) - (dst WritableGpr (temp_writable_gpr)) (tmp_xmm WritableXmm (temp_writable_xmm)) (tmp_gpr WritableGpr (temp_writable_gpr)) - (_ Unit (emit (MInst.CvtFloatToSintSeq out_size src_size is_saturating tmp dst tmp_gpr tmp_xmm)))) + (_ Unit (emit (MInst.CvtFloatToSintSeq out_size src_size is_saturating src dst tmp_gpr tmp_xmm)))) dst)) (decl fcvt_uint_mask_const () VCodeConstant) @@ -3396,10 +3368,6 @@ ;; addresses). (tmp1 WritableGpr (temp_writable_gpr)) - ;; Put a zero in tmp1. This is needed for Spectre mitigations (a - ;; CMOV that zeroes the index on misspeculation). - (_ Unit (emit (MInst.Imm (OperandSize.Size32) 0 tmp1))) - ;; This temporary is used as a signed integer of 32-bits (for the ;; wasm-table index) and then 64-bits (address addend). The small ;; lie about the I64 type is benign, since the temporary is dead diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 6e6ef44bd5..8489338054 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -377,11 +377,11 @@ pub(crate) fn emit( } => { let dividend_lo = allocs.next(dividend_lo.to_reg()); let dst_quotient = allocs.next(dst_quotient.to_reg().to_reg()); - let dst_remainder = allocs.next(dst_remainder.to_reg().to_reg()); debug_assert_eq!(dividend_lo, regs::rax()); debug_assert_eq!(dst_quotient, regs::rax()); - debug_assert_eq!(dst_remainder, regs::rdx()); if size.to_bits() > 8 { + let dst_remainder = allocs.next(dst_remainder.to_reg().to_reg()); + debug_assert_eq!(dst_remainder, regs::rdx()); let dividend_hi = allocs.next(dividend_hi.to_reg()); debug_assert_eq!(dividend_hi, regs::rdx()); } @@ -468,7 +468,11 @@ pub(crate) fn emit( let src = allocs.next(src.to_reg()); let dst = allocs.next(dst.to_reg().to_reg()); debug_assert_eq!(src, regs::rax()); - debug_assert_eq!(dst, regs::rdx()); + if *size == OperandSize::Size8 { + debug_assert_eq!(dst, regs::rax()); + } else { + debug_assert_eq!(dst, regs::rdx()); + } match size { OperandSize::Size8 => { sink.put1(0x66); @@ -498,7 +502,7 @@ pub(crate) fn emit( } => { let dividend_lo = allocs.next(dividend_lo.to_reg()); let dividend_hi = allocs.next(dividend_hi.to_reg()); - let divisor = allocs.next(divisor.to_reg().to_reg()); + let divisor = allocs.next(divisor.to_reg()); let dst_quotient = allocs.next(dst_quotient.to_reg().to_reg()); let dst_remainder = allocs.next(dst_remainder.to_reg().to_reg()); let tmp = tmp.map(|tmp| allocs.next(tmp.to_reg().to_reg())); @@ -597,18 +601,45 @@ pub(crate) fn emit( sink.bind_label(do_op); } + let dividend_lo = Gpr::new(regs::rax()).unwrap(); + let dst_quotient = WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()); + let (dividend_hi, dst_remainder) = if *size == OperandSize::Size8 { + ( + Gpr::new(regs::rax()).unwrap(), + Writable::from_reg(Gpr::new(regs::rax()).unwrap()), + ) + } else { + ( + Gpr::new(regs::rdx()).unwrap(), + Writable::from_reg(Gpr::new(regs::rdx()).unwrap()), + ) + }; + // Fill in the high parts: if kind.is_signed() { // sign-extend the sign-bit of rax into rdx, for signed opcodes. - let inst = Inst::sign_extend_data(*size); + let inst = + Inst::sign_extend_data(*size, dividend_lo, WritableGpr::from_reg(dividend_hi)); inst.emit(&[], sink, info, state); - } else { + } else if *size != OperandSize::Size8 { // zero for unsigned opcodes. - let inst = Inst::imm(OperandSize::Size64, 0, Writable::from_reg(regs::rdx())); + let inst = Inst::imm( + OperandSize::Size64, + 0, + Writable::from_reg(dividend_hi.to_reg()), + ); inst.emit(&[], sink, info, state); } - let inst = Inst::div(*size, kind.is_signed(), RegMem::reg(divisor)); + let inst = Inst::div( + *size, + kind.is_signed(), + RegMem::reg(divisor), + dividend_lo, + dividend_hi, + dst_quotient, + dst_remainder, + ); inst.emit(&[], sink, info, state); // Lowering takes care of moving the result back into the right register, see comment @@ -1393,7 +1424,8 @@ pub(crate) fn emit( // ;; generated by lowering: cmp #jmp_table_size, %idx // jnb $default_target // movl %idx, %tmp2 - // cmovnb %tmp1, %tmp2 ;; Spectre mitigation; we require tmp1 to be zero on entry. + // mov $0, %tmp1 + // cmovnb %tmp1, %tmp2 ;; Spectre mitigation. // lea start_of_jump_table_offset(%rip), %tmp1 // movslq [%tmp1, %tmp2, 4], %tmp2 ;; shift of 2, viz. multiply index by 4 // addq %tmp2, %tmp1 @@ -1406,6 +1438,13 @@ pub(crate) fn emit( let inst = Inst::movzx_rm_r(ExtMode::LQ, RegMem::reg(idx), tmp2); inst.emit(&[], sink, info, state); + // Zero `tmp1` to overwrite `tmp2` with zeroes on the + // out-of-bounds case (Spectre mitigation using CMOV). + // Note that we need to do this with a move-immediate + // form, because we cannot clobber the flags. + let inst = Inst::imm(OperandSize::Size32, 0, tmp1); + inst.emit(&[], sink, info, state); + // Spectre mitigation: CMOV to zero the index if the out-of-bounds branch above misspeculated. let inst = Inst::cmove( OperandSize::Size64, @@ -1768,9 +1807,21 @@ pub(crate) fn emit( src1, src2, dst, + } + | Inst::XmmRmREvex3 { + op, + src1, + src2, + dst, + // `dst` reuses `src3`. + .. } => { let dst = allocs.next(dst.to_reg().to_reg()); let src2 = allocs.next(src2.to_reg()); + if let Inst::XmmRmREvex3 { src3, .. } = inst { + let src3 = allocs.next(src3.to_reg()); + debug_assert_eq!(src3, dst); + } let src1 = src1.clone().to_reg_mem().with_allocs(allocs); let (w, opcode) = match op { @@ -2086,7 +2137,7 @@ pub(crate) fn emit( tmp_gpr1, tmp_gpr2, } => { - let src = allocs.next(src.to_reg().to_reg()); + let src = allocs.next(src.to_reg()); let dst = allocs.next(dst.to_reg().to_reg()); let tmp_gpr1 = allocs.next(tmp_gpr1.to_reg().to_reg()); let tmp_gpr2 = allocs.next(tmp_gpr2.to_reg().to_reg()); @@ -2155,7 +2206,7 @@ pub(crate) fn emit( let inst = Inst::shift_r( OperandSize::Size64, ShiftKind::ShiftRightLogical, - Some(1), + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 1 }).unwrap(), Writable::from_reg(tmp_gpr1), ); inst.emit(&[], sink, info, state); @@ -2208,7 +2259,7 @@ pub(crate) fn emit( tmp_gpr, tmp_xmm, } => { - let src = allocs.next(src.to_reg().to_reg()); + let src = allocs.next(src.to_reg()); let dst = allocs.next(dst.to_reg().to_reg()); let tmp_gpr = allocs.next(tmp_gpr.to_reg().to_reg()); let tmp_xmm = allocs.next(tmp_xmm.to_reg().to_reg()); @@ -2417,7 +2468,7 @@ pub(crate) fn emit( tmp_gpr, tmp_xmm, } => { - let src = allocs.next(src.to_reg().to_reg()); + let src = allocs.next(src.to_reg()); let dst = allocs.next(dst.to_reg().to_reg()); let tmp_gpr = allocs.next(tmp_gpr.to_reg().to_reg()); let tmp_xmm = allocs.next(tmp_xmm.to_reg().to_reg()); diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index d0dde74727..9cbde12668 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -1723,6 +1723,10 @@ fn test_x64_emit() { OperandSize::Size32, true, /*signed*/ RegMem::reg(regs::rsi()), + Gpr::new(regs::rax()).unwrap(), + Gpr::new(regs::rdx()).unwrap(), + WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()), + WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()), ), "F7FE", "idiv %eax, %edx, %esi, %eax, %edx", @@ -1732,6 +1736,10 @@ fn test_x64_emit() { OperandSize::Size64, true, /*signed*/ RegMem::reg(regs::r15()), + Gpr::new(regs::rax()).unwrap(), + Gpr::new(regs::rdx()).unwrap(), + WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()), + WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()), ), "49F7FF", "idiv %rax, %rdx, %r15, %rax, %rdx", @@ -1741,6 +1749,10 @@ fn test_x64_emit() { OperandSize::Size32, false, /*signed*/ RegMem::reg(regs::r14()), + Gpr::new(regs::rax()).unwrap(), + Gpr::new(regs::rdx()).unwrap(), + WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()), + WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()), ), "41F7F6", "div %eax, %edx, %r14d, %eax, %edx", @@ -1750,19 +1762,39 @@ fn test_x64_emit() { OperandSize::Size64, false, /*signed*/ RegMem::reg(regs::rdi()), + Gpr::new(regs::rax()).unwrap(), + Gpr::new(regs::rdx()).unwrap(), + WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()), + WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()), ), "48F7F7", "div %rax, %rdx, %rdi, %rax, %rdx", )); insns.push(( - Inst::div(OperandSize::Size8, false, RegMem::reg(regs::rax())), + Inst::div( + OperandSize::Size8, + false, + RegMem::reg(regs::rax()), + Gpr::new(regs::rax()).unwrap(), + Gpr::new(regs::rdx()).unwrap(), + WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()), + WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()), + ), "F6F0", - "div %al, (none), %al, %al, %dl", + "div %al, (none), %al, %al, (none)", )); insns.push(( - Inst::div(OperandSize::Size8, false, RegMem::reg(regs::rsi())), + Inst::div( + OperandSize::Size8, + false, + RegMem::reg(regs::rsi()), + Gpr::new(regs::rax()).unwrap(), + Gpr::new(regs::rdx()).unwrap(), + WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()), + WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()), + ), "40F6F6", - "div %al, (none), %sil, %al, %dl", + "div %al, (none), %sil, %al, (none)", )); // ======================================================== @@ -1807,25 +1839,41 @@ fn test_x64_emit() { // ======================================================== // cbw insns.push(( - Inst::sign_extend_data(OperandSize::Size8), + Inst::sign_extend_data( + OperandSize::Size8, + Gpr::new(regs::rax()).unwrap(), + WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()), + ), "6698", - "cbw %al, %dl", + "cbw %al, %al", )); // ======================================================== // cdq family: SignExtendRaxRdx insns.push(( - Inst::sign_extend_data(OperandSize::Size16), + Inst::sign_extend_data( + OperandSize::Size16, + Gpr::new(regs::rax()).unwrap(), + WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()), + ), "6699", "cwd %ax, %dx", )); insns.push(( - Inst::sign_extend_data(OperandSize::Size32), + Inst::sign_extend_data( + OperandSize::Size32, + Gpr::new(regs::rax()).unwrap(), + WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()), + ), "99", "cdq %eax, %edx", )); insns.push(( - Inst::sign_extend_data(OperandSize::Size64), + Inst::sign_extend_data( + OperandSize::Size64, + Gpr::new(regs::rax()).unwrap(), + WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()), + ), "4899", "cqo %rax, %rdx", )); @@ -2813,47 +2861,92 @@ fn test_x64_emit() { // ======================================================== // Shift_R insns.push(( - Inst::shift_r(OperandSize::Size32, ShiftKind::ShiftLeft, None, w_rdi), + Inst::shift_r( + OperandSize::Size32, + ShiftKind::ShiftLeft, + Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), + w_rdi, + ), "D3E7", "shll %cl, %edi, %edi", )); insns.push(( - Inst::shift_r(OperandSize::Size32, ShiftKind::ShiftLeft, None, w_r12), + Inst::shift_r( + OperandSize::Size32, + ShiftKind::ShiftLeft, + Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), + w_r12, + ), "41D3E4", "shll %cl, %r12d, %r12d", )); insns.push(( - Inst::shift_r(OperandSize::Size32, ShiftKind::ShiftLeft, Some(2), w_r8), + Inst::shift_r( + OperandSize::Size32, + ShiftKind::ShiftLeft, + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 2 }).unwrap(), + w_r8, + ), "41C1E002", "shll $2, %r8d, %r8d", )); insns.push(( - Inst::shift_r(OperandSize::Size32, ShiftKind::ShiftLeft, Some(31), w_r13), + Inst::shift_r( + OperandSize::Size32, + ShiftKind::ShiftLeft, + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 31 }).unwrap(), + w_r13, + ), "41C1E51F", "shll $31, %r13d, %r13d", )); insns.push(( - Inst::shift_r(OperandSize::Size64, ShiftKind::ShiftLeft, None, w_r13), + Inst::shift_r( + OperandSize::Size64, + ShiftKind::ShiftLeft, + Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), + w_r13, + ), "49D3E5", "shlq %cl, %r13, %r13", )); insns.push(( - Inst::shift_r(OperandSize::Size64, ShiftKind::ShiftLeft, None, w_rdi), + Inst::shift_r( + OperandSize::Size64, + ShiftKind::ShiftLeft, + Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), + w_rdi, + ), "48D3E7", "shlq %cl, %rdi, %rdi", )); insns.push(( - Inst::shift_r(OperandSize::Size64, ShiftKind::ShiftLeft, Some(2), w_r8), + Inst::shift_r( + OperandSize::Size64, + ShiftKind::ShiftLeft, + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 2 }).unwrap(), + w_r8, + ), "49C1E002", "shlq $2, %r8, %r8", )); insns.push(( - Inst::shift_r(OperandSize::Size64, ShiftKind::ShiftLeft, Some(3), w_rbx), + Inst::shift_r( + OperandSize::Size64, + ShiftKind::ShiftLeft, + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 3 }).unwrap(), + w_rbx, + ), "48C1E303", "shlq $3, %rbx, %rbx", )); insns.push(( - Inst::shift_r(OperandSize::Size64, ShiftKind::ShiftLeft, Some(63), w_r13), + Inst::shift_r( + OperandSize::Size64, + ShiftKind::ShiftLeft, + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 63 }).unwrap(), + w_r13, + ), "49C1E53F", "shlq $63, %r13, %r13", )); @@ -2861,7 +2954,7 @@ fn test_x64_emit() { Inst::shift_r( OperandSize::Size32, ShiftKind::ShiftRightLogical, - None, + Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), w_rdi, ), "D3EF", @@ -2871,7 +2964,7 @@ fn test_x64_emit() { Inst::shift_r( OperandSize::Size32, ShiftKind::ShiftRightLogical, - Some(2), + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 2 }).unwrap(), w_r8, ), "41C1E802", @@ -2881,7 +2974,7 @@ fn test_x64_emit() { Inst::shift_r( OperandSize::Size32, ShiftKind::ShiftRightLogical, - Some(31), + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 31 }).unwrap(), w_r13, ), "41C1ED1F", @@ -2891,7 +2984,7 @@ fn test_x64_emit() { Inst::shift_r( OperandSize::Size64, ShiftKind::ShiftRightLogical, - None, + Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), w_rdi, ), "48D3EF", @@ -2901,7 +2994,7 @@ fn test_x64_emit() { Inst::shift_r( OperandSize::Size64, ShiftKind::ShiftRightLogical, - Some(2), + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 2 }).unwrap(), w_r8, ), "49C1E802", @@ -2911,7 +3004,7 @@ fn test_x64_emit() { Inst::shift_r( OperandSize::Size64, ShiftKind::ShiftRightLogical, - Some(63), + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 63 }).unwrap(), w_r13, ), "49C1ED3F", @@ -2921,7 +3014,7 @@ fn test_x64_emit() { Inst::shift_r( OperandSize::Size32, ShiftKind::ShiftRightArithmetic, - None, + Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), w_rdi, ), "D3FF", @@ -2931,7 +3024,7 @@ fn test_x64_emit() { Inst::shift_r( OperandSize::Size32, ShiftKind::ShiftRightArithmetic, - Some(2), + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 2 }).unwrap(), w_r8, ), "41C1F802", @@ -2941,7 +3034,7 @@ fn test_x64_emit() { Inst::shift_r( OperandSize::Size32, ShiftKind::ShiftRightArithmetic, - Some(31), + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 31 }).unwrap(), w_r13, ), "41C1FD1F", @@ -2951,7 +3044,7 @@ fn test_x64_emit() { Inst::shift_r( OperandSize::Size64, ShiftKind::ShiftRightArithmetic, - None, + Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), w_rdi, ), "48D3FF", @@ -2961,7 +3054,7 @@ fn test_x64_emit() { Inst::shift_r( OperandSize::Size64, ShiftKind::ShiftRightArithmetic, - Some(2), + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 2 }).unwrap(), w_r8, ), "49C1F802", @@ -2971,54 +3064,99 @@ fn test_x64_emit() { Inst::shift_r( OperandSize::Size64, ShiftKind::ShiftRightArithmetic, - Some(63), + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 63 }).unwrap(), w_r13, ), "49C1FD3F", "sarq $63, %r13, %r13", )); insns.push(( - Inst::shift_r(OperandSize::Size64, ShiftKind::RotateLeft, None, w_r8), + Inst::shift_r( + OperandSize::Size64, + ShiftKind::RotateLeft, + Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), + w_r8, + ), "49D3C0", "rolq %cl, %r8, %r8", )); insns.push(( - Inst::shift_r(OperandSize::Size32, ShiftKind::RotateLeft, Some(3), w_r9), + Inst::shift_r( + OperandSize::Size32, + ShiftKind::RotateLeft, + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 3 }).unwrap(), + w_r9, + ), "41C1C103", "roll $3, %r9d, %r9d", )); insns.push(( - Inst::shift_r(OperandSize::Size32, ShiftKind::RotateRight, None, w_rsi), + Inst::shift_r( + OperandSize::Size32, + ShiftKind::RotateRight, + Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), + w_rsi, + ), "D3CE", "rorl %cl, %esi, %esi", )); insns.push(( - Inst::shift_r(OperandSize::Size64, ShiftKind::RotateRight, Some(5), w_r15), + Inst::shift_r( + OperandSize::Size64, + ShiftKind::RotateRight, + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 5 }).unwrap(), + w_r15, + ), "49C1CF05", "rorq $5, %r15, %r15", )); insns.push(( - Inst::shift_r(OperandSize::Size8, ShiftKind::RotateRight, None, w_rsi), + Inst::shift_r( + OperandSize::Size8, + ShiftKind::RotateRight, + Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), + w_rsi, + ), "40D2CE", "rorb %cl, %sil, %sil", )); insns.push(( - Inst::shift_r(OperandSize::Size8, ShiftKind::RotateRight, None, w_rax), + Inst::shift_r( + OperandSize::Size8, + ShiftKind::RotateRight, + Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), + w_rax, + ), "D2C8", "rorb %cl, %al, %al", )); insns.push(( - Inst::shift_r(OperandSize::Size8, ShiftKind::RotateRight, Some(5), w_r15), + Inst::shift_r( + OperandSize::Size8, + ShiftKind::RotateRight, + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 5 }).unwrap(), + w_r15, + ), "41C0CF05", "rorb $5, %r15b, %r15b", )); insns.push(( - Inst::shift_r(OperandSize::Size16, ShiftKind::RotateRight, None, w_rsi), + Inst::shift_r( + OperandSize::Size16, + ShiftKind::RotateRight, + Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), + w_rsi, + ), "66D3CE", "rorw %cl, %si, %si", )); insns.push(( - Inst::shift_r(OperandSize::Size16, ShiftKind::RotateRight, Some(5), w_r15), + Inst::shift_r( + OperandSize::Size16, + ShiftKind::RotateRight, + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 5 }).unwrap(), + w_r15, + ), "6641C1CF05", "rorw $5, %r15w, %r15w", )); diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 9d7f1bd0f4..09b5993298 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -131,7 +131,9 @@ impl Inst { | Inst::XmmToGpr { op, .. } | Inst::XmmUnaryRmR { op, .. } => smallvec![op.available_from()], - Inst::XmmUnaryRmREvex { op, .. } | Inst::XmmRmREvex { op, .. } => op.available_from(), + Inst::XmmUnaryRmREvex { op, .. } + | Inst::XmmRmREvex { op, .. } + | Inst::XmmRmREvex3 { op, .. } => op.available_from(), Inst::XmmRmRVex { op, .. } => op.available_from(), } @@ -195,47 +197,55 @@ impl Inst { } } - pub(crate) fn div(size: OperandSize, signed: bool, divisor: RegMem) -> Inst { + pub(crate) fn div( + size: OperandSize, + signed: bool, + divisor: RegMem, + dividend_lo: Gpr, + dividend_hi: Gpr, + dst_quotient: WritableGpr, + dst_remainder: WritableGpr, + ) -> Inst { divisor.assert_regclass_is(RegClass::Int); Inst::Div { size, signed, divisor: GprMem::new(divisor).unwrap(), - dividend_lo: Gpr::new(regs::rax()).unwrap(), - dividend_hi: Gpr::new(regs::rdx()).unwrap(), - dst_quotient: WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()), - dst_remainder: Writable::from_reg(Gpr::new(regs::rdx()).unwrap()), + dividend_lo, + dividend_hi, + dst_quotient, + dst_remainder, } } pub(crate) fn checked_div_or_rem_seq( kind: DivOrRemKind, size: OperandSize, - divisor: Writable, + divisor: Reg, + dividend_lo: Gpr, + dividend_hi: Gpr, + dst_quotient: WritableGpr, + dst_remainder: WritableGpr, tmp: Option>, ) -> Inst { - debug_assert!(divisor.to_reg().class() == RegClass::Int); + debug_assert!(divisor.class() == RegClass::Int); debug_assert!(tmp .map(|tmp| tmp.to_reg().class() == RegClass::Int) .unwrap_or(true)); Inst::CheckedDivOrRemSeq { kind, size, - divisor: WritableGpr::from_writable_reg(divisor).unwrap(), - dividend_lo: Gpr::new(regs::rax()).unwrap(), - dividend_hi: Gpr::new(regs::rdx()).unwrap(), - dst_quotient: Writable::from_reg(Gpr::new(regs::rax()).unwrap()), - dst_remainder: Writable::from_reg(Gpr::new(regs::rdx()).unwrap()), + divisor: Gpr::new(divisor).unwrap(), + dividend_lo, + dividend_hi, + dst_quotient, + dst_remainder, tmp: tmp.map(|tmp| WritableGpr::from_writable_reg(tmp).unwrap()), } } - pub(crate) fn sign_extend_data(size: OperandSize) -> Inst { - Inst::SignExtendData { - size, - src: Gpr::new(regs::rax()).unwrap(), - dst: Writable::from_reg(Gpr::new(regs::rdx()).unwrap()), - } + pub(crate) fn sign_extend_data(size: OperandSize, src: Gpr, dst: WritableGpr) -> Inst { + Inst::SignExtendData { size, src, dst } } pub(crate) fn imm(dst_size: OperandSize, simm64: u64, dst: Writable) -> Inst { @@ -415,24 +425,18 @@ impl Inst { pub(crate) fn shift_r( size: OperandSize, kind: ShiftKind, - num_bits: Option, + num_bits: Imm8Gpr, dst: Writable, ) -> Inst { - debug_assert!(if let Some(num_bits) = num_bits { - num_bits < size.to_bits() - } else { - true - }); + if let Imm8Reg::Imm8 { imm: num_bits } = num_bits.clone().to_imm8_reg() { + debug_assert!(num_bits < size.to_bits()); + } debug_assert!(dst.to_reg().class() == RegClass::Int); Inst::ShiftR { size, kind, src: Gpr::new(dst.to_reg()).unwrap(), - num_bits: Imm8Gpr::new(match num_bits { - Some(imm) => Imm8Reg::Imm8 { imm }, - None => Imm8Reg::Reg { reg: regs::rcx() }, - }) - .unwrap(), + num_bits, dst: WritableGpr::from_writable_reg(dst).unwrap(), } } @@ -781,8 +785,11 @@ impl PrettyPrint for Inst { let dividend_lo = pretty_print_reg(dividend_lo.to_reg(), size.to_bytes(), allocs); let dst_quotient = pretty_print_reg(dst_quotient.to_reg().to_reg(), size.to_bytes(), allocs); - let dst_remainder = - pretty_print_reg(dst_remainder.to_reg().to_reg(), size.to_bytes(), allocs); + let dst_remainder = if size.to_bits() > 8 { + pretty_print_reg(dst_remainder.to_reg().to_reg(), size.to_bytes(), allocs) + } else { + "(none)".to_string() + }; let dividend_hi = if size.to_bits() > 8 { pretty_print_reg(dividend_hi.to_reg(), size.to_bytes(), allocs) } else { @@ -842,7 +849,7 @@ impl PrettyPrint for Inst { } => { let dividend_lo = pretty_print_reg(dividend_lo.to_reg(), size.to_bytes(), allocs); let dividend_hi = pretty_print_reg(dividend_hi.to_reg(), size.to_bytes(), allocs); - let divisor = pretty_print_reg(divisor.to_reg().to_reg(), size.to_bytes(), allocs); + let divisor = pretty_print_reg(divisor.to_reg(), size.to_bytes(), allocs); let dst_quotient = pretty_print_reg(dst_quotient.to_reg().to_reg(), size.to_bytes(), allocs); let dst_remainder = @@ -949,12 +956,34 @@ impl PrettyPrint for Inst { dst, .. } => { - let src2 = pretty_print_reg(src2.to_reg(), 8, allocs); let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs); + let src2 = pretty_print_reg(src2.to_reg(), 8, allocs); let src1 = src1.pretty_print(8, allocs); format!("{} {}, {}, {}", ljustify(op.to_string()), src1, src2, dst) } + Inst::XmmRmREvex3 { + op, + src1, + src2, + src3, + dst, + .. + } => { + let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs); + let src2 = pretty_print_reg(src2.to_reg(), 8, allocs); + let src3 = pretty_print_reg(src3.to_reg(), 8, allocs); + let src1 = src1.pretty_print(8, allocs); + format!( + "{} {}, {}, {}, {}", + ljustify(op.to_string()), + src1, + src2, + src3, + dst + ) + } + Inst::XmmMinMaxSeq { lhs, rhs, @@ -1084,7 +1113,7 @@ impl PrettyPrint for Inst { tmp_gpr2, .. } => { - let src = pretty_print_reg(src.to_reg().to_reg(), 8, allocs); + let src = pretty_print_reg(src.to_reg(), 8, allocs); let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes(), allocs); let tmp_gpr1 = pretty_print_reg(tmp_gpr1.to_reg().to_reg(), 8, allocs); let tmp_gpr2 = pretty_print_reg(tmp_gpr2.to_reg().to_reg(), 8, allocs); @@ -1114,7 +1143,7 @@ impl PrettyPrint for Inst { tmp_gpr, is_saturating, } => { - let src = pretty_print_reg(src.to_reg().to_reg(), src_size.to_bytes(), allocs); + let src = pretty_print_reg(src.to_reg(), src_size.to_bytes(), allocs); let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes(), allocs); let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8, allocs); let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8, allocs); @@ -1142,7 +1171,7 @@ impl PrettyPrint for Inst { tmp_xmm, is_saturating, } => { - let src = pretty_print_reg(src.to_reg().to_reg(), src_size.to_bytes(), allocs); + let src = pretty_print_reg(src.to_reg(), src_size.to_bytes(), allocs); let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes(), allocs); let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8, allocs); let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8, allocs); @@ -1424,9 +1453,19 @@ impl PrettyPrint for Inst { not_taken.to_string() ), - Inst::JmpTableSeq { idx, .. } => { + Inst::JmpTableSeq { + idx, tmp1, tmp2, .. + } => { let idx = pretty_print_reg(*idx, 8, allocs); - format!("{} {}", ljustify("br_table".into()), idx) + let tmp1 = pretty_print_reg(tmp1.to_reg(), 8, allocs); + let tmp2 = pretty_print_reg(tmp2.to_reg(), 8, allocs); + format!( + "{} {}, {}, {}", + ljustify("br_table".into()), + idx, + tmp1, + tmp2 + ) } Inst::JmpUnknown { target } => { @@ -1605,8 +1644,8 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol } => { collector.reg_fixed_use(dividend_lo.to_reg(), regs::rax()); collector.reg_fixed_def(dst_quotient.to_writable_reg(), regs::rax()); - collector.reg_fixed_def(dst_remainder.to_writable_reg(), regs::rdx()); if size.to_bits() > 8 { + collector.reg_fixed_def(dst_remainder.to_writable_reg(), regs::rdx()); collector.reg_fixed_use(dividend_hi.to_reg(), regs::rdx()); } divisor.get_operands(collector); @@ -1634,10 +1673,12 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol } => { collector.reg_fixed_use(dividend_lo.to_reg(), regs::rax()); collector.reg_fixed_use(dividend_hi.to_reg(), regs::rdx()); - collector.reg_mod(divisor.to_writable_reg()); + collector.reg_use(divisor.to_reg()); collector.reg_fixed_def(dst_quotient.to_writable_reg(), regs::rax()); collector.reg_fixed_def(dst_remainder.to_writable_reg(), regs::rdx()); if let Some(tmp) = tmp { + // Early def so that the temporary register does not + // conflict with inputs or outputs. collector.reg_early_def(tmp.to_writable_reg()); } } @@ -1718,13 +1759,25 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol dst, .. } => { - match *op { - Avx512Opcode::Vpermi2b => collector.reg_mod(dst.to_writable_reg()), - _ => collector.reg_def(dst.to_writable_reg()), - } + assert_ne!(*op, Avx512Opcode::Vpermi2b); + collector.reg_def(dst.to_writable_reg()); collector.reg_use(src2.to_reg()); src1.get_operands(collector); } + Inst::XmmRmREvex3 { + op, + src1, + src2, + src3, + dst, + .. + } => { + assert_eq!(*op, Avx512Opcode::Vpermi2b); + collector.reg_reuse_def(dst.to_writable_reg(), 2); // Reuse `src3`. + collector.reg_use(src2.to_reg()); + collector.reg_use(src3.to_reg()); + src1.get_operands(collector); + } Inst::XmmRmRImm { op, src1, @@ -1795,7 +1848,7 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol tmp_gpr2, .. } => { - collector.reg_mod(src.to_writable_reg()); + collector.reg_use(src.to_reg()); collector.reg_def(dst.to_writable_reg()); collector.reg_early_def(tmp_gpr1.to_writable_reg()); collector.reg_early_def(tmp_gpr2.to_writable_reg()); @@ -1814,7 +1867,7 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol tmp_xmm, .. } => { - collector.reg_mod(src.to_writable_reg()); + collector.reg_use(src.to_reg()); collector.reg_def(dst.to_writable_reg()); collector.reg_early_def(tmp_gpr.to_writable_reg()); collector.reg_early_def(tmp_xmm.to_writable_reg()); @@ -1911,7 +1964,7 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol .. } => { collector.reg_use(*idx); - collector.reg_mod(*tmp1); + collector.reg_early_def(*tmp1); collector.reg_early_def(*tmp2); } diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index d76e72f88c..2148d4f400 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -955,40 +955,34 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { let is_div = kind.is_div(); let size = OperandSize::from_ty(ty); - self.lower_ctx.emit(MInst::gen_move( - Writable::from_reg(regs::rax()), - dividend.to_reg(), - ty, - )); + let dst_quotient = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap(); + let dst_remainder = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap(); // Always do explicit checks for `srem`: otherwise, INT_MIN % -1 is not handled properly. if self.flags.avoid_div_traps() || *kind == DivOrRemKind::SignedRem { // A vcode meta-instruction is used to lower the inline checks, since they embed // pc-relative offsets that must not change, thus requiring regalloc to not // interfere by introducing spills and reloads. - // - // Note it keeps the result in $rax (for divide) or $rdx (for rem), so that - // regalloc is aware of the coalescing opportunity between rax/rdx and the - // destination register. - let divisor_copy = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap(); - self.lower_ctx - .emit(MInst::gen_move(divisor_copy, divisor.to_reg(), types::I64)); - let tmp = if *kind == DivOrRemKind::SignedDiv && size == OperandSize::Size64 { Some(self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap()) } else { None }; - // TODO use xor - self.lower_ctx.emit(MInst::imm( + let dividend_hi = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap(); + self.lower_ctx.emit(MInst::alu_rmi_r( OperandSize::Size32, - 0, - Writable::from_reg(regs::rdx()), + AluRmiROpcode::Xor, + RegMemImm::reg(dividend_hi.to_reg()), + dividend_hi, )); self.lower_ctx.emit(MInst::checked_div_or_rem_seq( kind.clone(), size, - divisor_copy, + divisor.to_reg(), + Gpr::new(dividend.to_reg()).unwrap(), + Gpr::new(dividend_hi.to_reg()).unwrap(), + WritableGpr::from_reg(Gpr::new(dst_quotient.to_reg()).unwrap()), + WritableGpr::from_reg(Gpr::new(dst_remainder.to_reg()).unwrap()), tmp, )); } else { @@ -997,51 +991,89 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { // divisor into a register instead. let divisor = RegMem::reg(divisor.to_reg()); + let dividend_hi = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap(); + // Fill in the high parts: - if kind.is_signed() { - // sign-extend the sign-bit of al into ah for size 1, or rax into rdx, for - // signed opcodes. - self.lower_ctx.emit(MInst::sign_extend_data(size)); + let dividend_lo = if kind.is_signed() && ty == types::I8 { + let dividend_lo = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap(); + // 8-bit div takes its dividend in only the `lo` reg. + self.lower_ctx.emit(MInst::sign_extend_data( + size, + Gpr::new(dividend.to_reg()).unwrap(), + WritableGpr::from_reg(Gpr::new(dividend_lo.to_reg()).unwrap()), + )); + // `dividend_hi` is not used by the Div below, so we + // don't def it here. + + dividend_lo.to_reg() + } else if kind.is_signed() { + // 16-bit and higher div takes its operand in hi:lo + // with half in each (64:64, 32:32 or 16:16). + self.lower_ctx.emit(MInst::sign_extend_data( + size, + Gpr::new(dividend.to_reg()).unwrap(), + WritableGpr::from_reg(Gpr::new(dividend_hi.to_reg()).unwrap()), + )); + + dividend.to_reg() } else if ty == types::I8 { + let dividend_lo = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap(); self.lower_ctx.emit(MInst::movzx_rm_r( ExtMode::BL, - RegMem::reg(regs::rax()), - Writable::from_reg(regs::rax()), + RegMem::reg(dividend.to_reg()), + dividend_lo, )); + + dividend_lo.to_reg() } else { // zero for unsigned opcodes. - self.lower_ctx.emit(MInst::imm( - OperandSize::Size64, - 0, - Writable::from_reg(regs::rdx()), - )); - } + self.lower_ctx + .emit(MInst::imm(OperandSize::Size64, 0, dividend_hi)); + + dividend.to_reg() + }; // Emit the actual idiv. - self.lower_ctx - .emit(MInst::div(size, kind.is_signed(), divisor)); + self.lower_ctx.emit(MInst::div( + size, + kind.is_signed(), + divisor, + Gpr::new(dividend_lo).unwrap(), + Gpr::new(dividend_hi.to_reg()).unwrap(), + WritableGpr::from_reg(Gpr::new(dst_quotient.to_reg()).unwrap()), + WritableGpr::from_reg(Gpr::new(dst_remainder.to_reg()).unwrap()), + )); } // Move the result back into the destination reg. if is_div { // The quotient is in rax. - self.lower_ctx - .emit(MInst::gen_move(dst.to_writable_reg(), regs::rax(), ty)); + self.lower_ctx.emit(MInst::gen_move( + dst.to_writable_reg(), + dst_quotient.to_reg(), + ty, + )); } else { if size == OperandSize::Size8 { // The remainder is in AH. Right-shift by 8 bits then move from rax. self.lower_ctx.emit(MInst::shift_r( OperandSize::Size64, ShiftKind::ShiftRightLogical, - Some(8), - Writable::from_reg(regs::rax()), + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 8 }).unwrap(), + dst_quotient, + )); + self.lower_ctx.emit(MInst::gen_move( + dst.to_writable_reg(), + dst_quotient.to_reg(), + ty, )); - self.lower_ctx - .emit(MInst::gen_move(dst.to_writable_reg(), regs::rax(), ty)); } else { // The remainder is in rdx. - self.lower_ctx - .emit(MInst::gen_move(dst.to_writable_reg(), regs::rdx(), ty)); + self.lower_ctx.emit(MInst::gen_move( + dst.to_writable_reg(), + dst_remainder.to_reg(), + ty, + )); } } } diff --git a/cranelift/codegen/src/isa/x64/mod.rs b/cranelift/codegen/src/isa/x64/mod.rs index c6093e5b71..303b90d3ab 100644 --- a/cranelift/codegen/src/isa/x64/mod.rs +++ b/cranelift/codegen/src/isa/x64/mod.rs @@ -427,37 +427,34 @@ mod test { // 00000000 55 push rbp // 00000001 4889E5 mov rbp,rsp - // 00000004 41B900000000 mov r9d,0x0 - // 0000000A 83FF02 cmp edi,byte +0x2 - // 0000000D 0F8320000000 jnc near 0x33 - // 00000013 8BF7 mov esi,edi - // 00000015 490F43F1 cmovnc rsi,r9 - // 00000019 4C8D0D0B000000 lea r9,[rel 0x2b] - // 00000020 496374B100 movsxd rsi,dword [r9+rsi*4+0x0] - // 00000025 4901F1 add r9,rsi - // 00000028 41FFE1 jmp r9 - // 0000002B 1200 adc al,[rax] - // 0000002D 0000 add [rax],al - // 0000002F 1C00 sbb al,0x0 - // 00000031 0000 add [rax],al - // 00000033 B803000000 mov eax,0x3 - // 00000038 4889EC mov rsp,rbp - // 0000003B 5D pop rbp - // 0000003C C3 ret - // 0000003D B801000000 mov eax,0x1 - // 00000042 4889EC mov rsp,rbp - // 00000045 5D pop rbp - // 00000046 C3 ret - // 00000047 B802000000 mov eax,0x2 - // 0000004C 4889EC mov rsp,rbp - // 0000004F 5D pop rbp - // 00000050 C3 ret + // 00000004 83FF02 cmp edi,byte +0x2 + // 00000007 0F8327000000 jnc near 0x34 + // 0000000D 448BDF mov r11d,edi + // 00000010 41BA00000000 mov r10d,0x0 + // 00000016 4D0F43DA cmovnc r11,r10 + // 0000001A 4C8D150B000000 lea r10,[rel 0x2c] + // 00000021 4F635C9A00 movsxd r11,dword [r10+r11*4+0x0] + // 00000026 4D01DA add r10,r11 + // 00000029 41FFE2 jmp r10 + // 0000002C 120000001C000000 (jumptable data) + // 00000034 B803000000 mov eax,0x3 + // 00000039 4889EC mov rsp,rbp + // 0000003C 5D pop rbp + // 0000003D C3 ret + // 0000003E B801000000 mov eax,0x1 + // 00000043 4889EC mov rsp,rbp + // 00000046 5D pop rbp + // 00000047 C3 ret + // 00000048 B802000000 mov eax,0x2 + // 0000004D 4889EC mov rsp,rbp + // 00000050 5D pop rbp + // 00000051 C3 ret let golden = vec![ - 85, 72, 137, 229, 65, 185, 0, 0, 0, 0, 131, 255, 2, 15, 131, 32, 0, 0, 0, 139, 247, 73, - 15, 67, 241, 76, 141, 13, 11, 0, 0, 0, 73, 99, 116, 177, 0, 73, 1, 241, 65, 255, 225, - 18, 0, 0, 0, 28, 0, 0, 0, 184, 3, 0, 0, 0, 72, 137, 236, 93, 195, 184, 1, 0, 0, 0, 72, - 137, 236, 93, 195, 184, 2, 0, 0, 0, 72, 137, 236, 93, 195, + 85, 72, 137, 229, 131, 255, 2, 15, 131, 39, 0, 0, 0, 68, 139, 223, 65, 186, 0, 0, 0, 0, + 77, 15, 67, 218, 76, 141, 21, 11, 0, 0, 0, 79, 99, 92, 154, 0, 77, 1, 218, 65, 255, + 226, 18, 0, 0, 0, 28, 0, 0, 0, 184, 3, 0, 0, 0, 72, 137, 236, 93, 195, 184, 1, 0, 0, 0, + 72, 137, 236, 93, 195, 184, 2, 0, 0, 0, 72, 137, 236, 93, 195, ]; assert_eq!(code, &golden[..]); diff --git a/cranelift/filetests/filetests/isa/x64/branches.clif b/cranelift/filetests/filetests/isa/x64/branches.clif index ecb8800842..9bdd14e2b7 100644 --- a/cranelift/filetests/filetests/isa/x64/branches.clif +++ b/cranelift/filetests/filetests/isa/x64/branches.clif @@ -205,9 +205,8 @@ block2: ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movl $0, %r8d ; cmpl $2, %edi -; br_table %rdi +; br_table %rdi, %r9, %r10 ; block1: ; jmp label3 ; block2: diff --git a/cranelift/filetests/filetests/isa/x64/div-checks.clif b/cranelift/filetests/filetests/isa/x64/div-checks.clif index 132f939818..8361e2880d 100644 --- a/cranelift/filetests/filetests/isa/x64/div-checks.clif +++ b/cranelift/filetests/filetests/isa/x64/div-checks.clif @@ -10,8 +10,9 @@ target x86_64 function %i8(i8, i8) -> i8 { block0(v0: i8, v1: i8): v2 = srem.i8 v0, v1 -; check: movq %rdi, %rax -; nextln: movl $$0, %edx +; check: xorl %r11d, %r11d, %r11d +; nextln: movq %rdi, %rax +; nextln: movq %r11, %rdx ; nextln: srem_seq %al, %dl, %sil, %al, %dl, tmp=(none) ; nextln: shrq $$8, %rax, %rax @@ -21,8 +22,9 @@ block0(v0: i8, v1: i8): function %i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): v2 = srem.i16 v0, v1 -; check: movq %rdi, %rax -; nextln: movl $$0, %edx +; check: xorl %r11d, %r11d, %r11d +; nextln: movq %rdi, %rax +; nextln: movq %r11, %rdx ; nextln: srem_seq %ax, %dx, %si, %ax, %dx, tmp=(none) ; nextln: movq %rdx, %rax @@ -32,8 +34,9 @@ block0(v0: i16, v1: i16): function %i32(i32, i32) -> i32 { block0(v0: i32, v1: i32): v2 = srem.i32 v0, v1 -; check: movq %rdi, %rax -; nextln: movl $$0, %edx +; check: xorl %r11d, %r11d, %r11d +; nextln: movq %rdi, %rax +; nextln: movq %r11, %rdx ; nextln: srem_seq %eax, %edx, %esi, %eax, %edx, tmp=(none) ; nextln: movq %rdx, %rax @@ -43,8 +46,9 @@ block0(v0: i32, v1: i32): function %i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): v2 = srem.i64 v0, v1 -; check: movq %rdi, %rax -; nextln: movl $$0, %edx +; check: xorl %r11d, %r11d, %r11d +; nextln: movq %rdi, %rax +; nextln: movq %r11, %rdx ; nextln: srem_seq %rax, %rdx, %rsi, %rax, %rdx, tmp=(none) ; nextln: movq %rdx, %rax diff --git a/cranelift/filetests/filetests/isa/x64/fcvt.clif b/cranelift/filetests/filetests/isa/x64/fcvt.clif index 09c6093c54..3429078f59 100644 --- a/cranelift/filetests/filetests/isa/x64/fcvt.clif +++ b/cranelift/filetests/filetests/isa/x64/fcvt.clif @@ -146,16 +146,16 @@ block0(v0: i8, v1: i16, v2: i32, v3: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movzbq %dil, %rax -; cvtsi2ss %rax, %xmm0 -; movzwq %si, %rax -; cvtsi2ss %rax, %xmm6 -; movl %edx, %eax -; cvtsi2ss %rax, %xmm7 -; u64_to_f32_seq %rcx, %xmm4, %r8, %rdx +; movzbq %dil, %rdi +; cvtsi2ss %rdi, %xmm0 +; movzwq %si, %rdi +; cvtsi2ss %rdi, %xmm5 +; movl %edx, %edi +; cvtsi2ss %rdi, %xmm6 +; u64_to_f32_seq %rcx, %xmm2, %rdi, %rax +; addss %xmm0, %xmm5, %xmm0 ; addss %xmm0, %xmm6, %xmm0 -; addss %xmm0, %xmm7, %xmm0 -; addss %xmm0, %xmm4, %xmm0 +; addss %xmm0, %xmm2, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -209,7 +209,7 @@ block0(v0: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float32_to_uint32_seq %xmm0, %eax, %r10, %xmm6 +; cvt_float32_to_uint32_seq %xmm0, %eax, %r8, %xmm4 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -223,7 +223,7 @@ block0(v0: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float32_to_uint64_seq %xmm0, %rax, %r10, %xmm6 +; cvt_float32_to_uint64_seq %xmm0, %rax, %r8, %xmm4 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -237,7 +237,7 @@ block0(v0: f64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float64_to_uint32_seq %xmm0, %eax, %r10, %xmm6 +; cvt_float64_to_uint32_seq %xmm0, %eax, %r8, %xmm4 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -251,7 +251,7 @@ block0(v0: f64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float64_to_uint64_seq %xmm0, %rax, %r10, %xmm6 +; cvt_float64_to_uint64_seq %xmm0, %rax, %r8, %xmm4 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -265,7 +265,7 @@ block0(v0: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float32_to_uint32_sat_seq %xmm0, %eax, %r10, %xmm6 +; cvt_float32_to_uint32_sat_seq %xmm0, %eax, %r8, %xmm4 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -279,7 +279,7 @@ block0(v0: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float32_to_uint64_sat_seq %xmm0, %rax, %r10, %xmm6 +; cvt_float32_to_uint64_sat_seq %xmm0, %rax, %r8, %xmm4 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -293,7 +293,7 @@ block0(v0: f64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float64_to_uint32_sat_seq %xmm0, %eax, %r10, %xmm6 +; cvt_float64_to_uint32_sat_seq %xmm0, %eax, %r8, %xmm4 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -307,7 +307,7 @@ block0(v0: f64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float64_to_uint64_sat_seq %xmm0, %rax, %r10, %xmm6 +; cvt_float64_to_uint64_sat_seq %xmm0, %rax, %r8, %xmm4 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -321,7 +321,7 @@ block0(v0: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float32_to_sint32_seq %xmm0, %eax, %r10, %xmm6 +; cvt_float32_to_sint32_seq %xmm0, %eax, %r8, %xmm4 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -335,7 +335,7 @@ block0(v0: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float32_to_sint64_seq %xmm0, %rax, %r10, %xmm6 +; cvt_float32_to_sint64_seq %xmm0, %rax, %r8, %xmm4 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -349,7 +349,7 @@ block0(v0: f64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float64_to_sint32_seq %xmm0, %eax, %r10, %xmm6 +; cvt_float64_to_sint32_seq %xmm0, %eax, %r8, %xmm4 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -363,7 +363,7 @@ block0(v0: f64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float64_to_sint64_seq %xmm0, %rax, %r10, %xmm6 +; cvt_float64_to_sint64_seq %xmm0, %rax, %r8, %xmm4 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -377,7 +377,7 @@ block0(v0: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float32_to_sint32_sat_seq %xmm0, %eax, %r10, %xmm6 +; cvt_float32_to_sint32_sat_seq %xmm0, %eax, %r8, %xmm4 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -391,7 +391,7 @@ block0(v0: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float32_to_sint64_sat_seq %xmm0, %rax, %r10, %xmm6 +; cvt_float32_to_sint64_sat_seq %xmm0, %rax, %r8, %xmm4 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -405,7 +405,7 @@ block0(v0: f64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float64_to_sint32_sat_seq %xmm0, %eax, %r10, %xmm6 +; cvt_float64_to_sint32_sat_seq %xmm0, %eax, %r8, %xmm4 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -419,7 +419,7 @@ block0(v0: f64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float64_to_sint64_sat_seq %xmm0, %rax, %r10, %xmm6 +; cvt_float64_to_sint64_sat_seq %xmm0, %rax, %r8, %xmm4 ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/sdiv.clif b/cranelift/filetests/filetests/isa/x64/sdiv.clif index c0f486c71f..6c13154db7 100644 --- a/cranelift/filetests/filetests/isa/x64/sdiv.clif +++ b/cranelift/filetests/filetests/isa/x64/sdiv.clif @@ -11,8 +11,9 @@ block0(v0: i8, v1: i8): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; cbw %al, %dl -; idiv %al, (none), %sil, %al, %dl +; cbw %al, %al +; movq %rax, %rdi +; idiv %al, (none), %sil, %al, (none) ; movq %rbp, %rsp ; popq %rbp ; ret @@ -28,6 +29,7 @@ block0(v0: i16, v1: i16): ; block0: ; movq %rdi, %rax ; cwd %ax, %dx +; movq %rdx, %r8 ; idiv %ax, %dx, %si, %ax, %dx ; movq %rbp, %rsp ; popq %rbp @@ -44,6 +46,7 @@ block0(v0: i32, v1: i32): ; block0: ; movq %rdi, %rax ; cdq %eax, %edx +; movq %rdx, %r8 ; idiv %eax, %edx, %esi, %eax, %edx ; movq %rbp, %rsp ; popq %rbp @@ -60,6 +63,7 @@ block0(v0: i64, v1: i64): ; block0: ; movq %rdi, %rax ; cqo %rax, %rdx +; movq %rdx, %r8 ; idiv %rax, %rdx, %rsi, %rax, %rdx ; movq %rbp, %rsp ; popq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/shuffle-avx512.clif b/cranelift/filetests/filetests/isa/x64/shuffle-avx512.clif index 29221415ca..827c80ffe2 100644 --- a/cranelift/filetests/filetests/isa/x64/shuffle-avx512.clif +++ b/cranelift/filetests/filetests/isa/x64/shuffle-avx512.clif @@ -12,9 +12,10 @@ block0(v0: i8x16, v1: i8x16): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movdqa %xmm0, %xmm9 +; movdqa %xmm0, %xmm6 ; load_const VCodeConstant(0), %xmm0 -; vpermi2b %xmm1, %xmm0, %xmm9 +; movdqa %xmm6, %xmm8 +; vpermi2b %xmm1, %xmm8, %xmm0, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -31,11 +32,12 @@ block0(v0: i8x16, v1: i8x16): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movdqa %xmm0, %xmm12 +; movdqa %xmm0, %xmm9 ; load_const VCodeConstant(1), %xmm0 -; load_const VCodeConstant(0), %xmm7 -; vpermi2b %xmm1, %xmm7, %xmm12 -; andps %xmm0, %xmm7, %xmm0 +; load_const VCodeConstant(0), %xmm8 +; movdqa %xmm9, %xmm11 +; vpermi2b %xmm1, %xmm11, %xmm8, %xmm8 +; andps %xmm0, %xmm8, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -49,9 +51,10 @@ block0(v0: i8x16, v1: i8x16): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movdqa %xmm0, %xmm9 +; movdqa %xmm0, %xmm6 ; load_const VCodeConstant(0), %xmm0 -; vpermi2b %xmm1, %xmm0, %xmm9 +; movdqa %xmm6, %xmm8 +; vpermi2b %xmm1, %xmm8, %xmm0, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/srem.clif b/cranelift/filetests/filetests/isa/x64/srem.clif index 99b137d566..fa7ee252fe 100644 --- a/cranelift/filetests/filetests/isa/x64/srem.clif +++ b/cranelift/filetests/filetests/isa/x64/srem.clif @@ -10,8 +10,9 @@ block0(v0: i8, v1: i8): ; pushq %rbp ; movq %rsp, %rbp ; block0: +; xorl %r11d, %r11d, %r11d ; movq %rdi, %rax -; movl $0, %edx +; movq %r11, %rdx ; srem_seq %al, %dl, %sil, %al, %dl, tmp=(none) ; shrq $8, %rax, %rax ; movq %rbp, %rsp @@ -27,8 +28,9 @@ block0(v0: i16, v1: i16): ; pushq %rbp ; movq %rsp, %rbp ; block0: +; xorl %r11d, %r11d, %r11d ; movq %rdi, %rax -; movl $0, %edx +; movq %r11, %rdx ; srem_seq %ax, %dx, %si, %ax, %dx, tmp=(none) ; movq %rdx, %rax ; movq %rbp, %rsp @@ -44,8 +46,9 @@ block0(v0: i32, v1: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: +; xorl %r11d, %r11d, %r11d ; movq %rdi, %rax -; movl $0, %edx +; movq %r11, %rdx ; srem_seq %eax, %edx, %esi, %eax, %edx, tmp=(none) ; movq %rdx, %rax ; movq %rbp, %rsp @@ -61,8 +64,9 @@ block0(v0: i64, v1: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: +; xorl %r11d, %r11d, %r11d ; movq %rdi, %rax -; movl $0, %edx +; movq %r11, %rdx ; srem_seq %rax, %rdx, %rsi, %rax, %rdx, tmp=(none) ; movq %rdx, %rax ; movq %rbp, %rsp diff --git a/cranelift/filetests/filetests/isa/x64/udiv.clif b/cranelift/filetests/filetests/isa/x64/udiv.clif index a49b5a027e..75efb2d9d7 100644 --- a/cranelift/filetests/filetests/isa/x64/udiv.clif +++ b/cranelift/filetests/filetests/isa/x64/udiv.clif @@ -10,9 +10,9 @@ block0(v0: i8, v1: i8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movq %rdi, %rax -; movzbl %al, %eax -; div %al, (none), %sil, %al, %dl +; movzbl %dil, %r10d +; movq %r10, %rax +; div %al, (none), %sil, %al, (none) ; movq %rbp, %rsp ; popq %rbp ; ret @@ -26,8 +26,9 @@ block0(v0: i16, v1: i16): ; pushq %rbp ; movq %rsp, %rbp ; block0: +; movl $0, %r11d ; movq %rdi, %rax -; movl $0, %edx +; movq %r11, %rdx ; div %ax, %dx, %si, %ax, %dx ; movq %rbp, %rsp ; popq %rbp @@ -42,8 +43,9 @@ block0(v0: i32, v1: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: +; movl $0, %r11d ; movq %rdi, %rax -; movl $0, %edx +; movq %r11, %rdx ; div %eax, %edx, %esi, %eax, %edx ; movq %rbp, %rsp ; popq %rbp @@ -58,8 +60,9 @@ block0(v0: i64, v1: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: +; movl $0, %r11d ; movq %rdi, %rax -; movl $0, %edx +; movq %r11, %rdx ; div %rax, %rdx, %rsi, %rax, %rdx ; movq %rbp, %rsp ; popq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/urem.clif b/cranelift/filetests/filetests/isa/x64/urem.clif index 5f4e80251f..dc21776f6a 100644 --- a/cranelift/filetests/filetests/isa/x64/urem.clif +++ b/cranelift/filetests/filetests/isa/x64/urem.clif @@ -10,9 +10,9 @@ block0(v0: i8, v1: i8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movq %rdi, %rax -; movzbl %al, %eax -; div %al, (none), %sil, %al, %dl +; movzbl %dil, %r10d +; movq %r10, %rax +; div %al, (none), %sil, %al, (none) ; shrq $8, %rax, %rax ; movq %rbp, %rsp ; popq %rbp @@ -27,8 +27,9 @@ block0(v0: i16, v1: i16): ; pushq %rbp ; movq %rsp, %rbp ; block0: +; movl $0, %r11d ; movq %rdi, %rax -; movl $0, %edx +; movq %r11, %rdx ; div %ax, %dx, %si, %ax, %dx ; movq %rdx, %rax ; movq %rbp, %rsp @@ -44,8 +45,9 @@ block0(v0: i32, v1: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: +; movl $0, %r11d ; movq %rdi, %rax -; movl $0, %edx +; movq %r11, %rdx ; div %eax, %edx, %esi, %eax, %edx ; movq %rdx, %rax ; movq %rbp, %rsp @@ -61,8 +63,9 @@ block0(v0: i64, v1: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: +; movl $0, %r11d ; movq %rdi, %rax -; movl $0, %edx +; movq %r11, %rdx ; div %rax, %rdx, %rsi, %rax, %rdx ; movq %rdx, %rax ; movq %rbp, %rsp