diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index bd25f388ea..ff6bbd1775 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -64,24 +64,13 @@ ;; A synthetic sequence to implement the right inline checks for ;; remainder and division, assuming the dividend is in %rax. ;; - ;; Puts the result back into %rax if is_div, %rdx if !is_div, to mimic - ;; what the div instruction does. - ;; ;; The generated code sequence is described in the emit's function match ;; arm for this instruction. - ;; - ;; Note: %rdx is marked as modified by this instruction, to avoid an - ;; early clobber problem with the temporary and divisor registers. Make - ;; sure to zero %rdx right before this instruction, or you might run into - ;; regalloc failures where %rdx is live before its first def! (CheckedDivOrRemSeq (kind DivOrRemKind) (size OperandSize) (dividend_lo Gpr) (dividend_hi Gpr) - ;; The divisor operand. Note it's marked as modified - ;; so that it gets assigned a register different from - ;; the temporary. - (divisor WritableGpr) + (divisor Gpr) (dst_quotient WritableGpr) (dst_remainder WritableGpr) (tmp OptionWritableGpr)) @@ -205,12 +194,21 @@ (src3 XmmMem) (dst WritableXmm)) - ;; XMM (scalar or vector) binary op that relies on the EVEX prefix. + ;; XMM (scalar or vector) binary op that relies on the EVEX + ;; prefix. Takes two inputs. (XmmRmREvex (op Avx512Opcode) (src1 XmmMem) (src2 Xmm) (dst WritableXmm)) + ;; XMM (scalar or vector) binary op that relies on the EVEX + ;; prefix. Takes three inputs. + (XmmRmREvex3 (op Avx512Opcode) + (src1 XmmMem) + (src2 Xmm) + (src3 Xmm) + (dst WritableXmm)) + ;; XMM (scalar or vector) unary op: mov between XMM registers (32 64) ;; (reg addr) reg, sqrt, etc. ;; @@ -255,13 +253,7 @@ ;; Converts an unsigned int64 to a float32/float64. (CvtUint64ToFloatSeq (dst_size OperandSize) ;; 4 or 8 - ;; A copy of the source register, fed by - ;; lowering. It is marked as modified during - ;; register allocation to make sure that the - ;; temporary registers differ from the src register, - ;; since both registers are live at the same time in - ;; the generated code sequence. - (src WritableGpr) + (src Gpr) (dst WritableXmm) (tmp_gpr1 WritableGpr) (tmp_gpr2 WritableGpr)) @@ -270,13 +262,7 @@ (CvtFloatToSintSeq (dst_size OperandSize) (src_size OperandSize) (is_saturating bool) - ;; A copy of the source register, fed by - ;; lowering. It is marked as modified during - ;; register allocation to make sure that the - ;; temporary registers differ from the src register, - ;; since both registers are live at the same time in - ;; the generated code sequence. - (src WritableXmm) + (src Xmm) (dst WritableGpr) (tmp_gpr WritableGpr) (tmp_xmm WritableXmm)) @@ -285,13 +271,7 @@ (CvtFloatToUintSeq (dst_size OperandSize) (src_size OperandSize) (is_saturating bool) - ;; A copy of the source register, fed by - ;; lowering. It is marked as modified during - ;; register allocation to make sure that the - ;; temporary registers differ from the src register, - ;; since both registers are live at the same time in - ;; the generated code sequence. - (src WritableXmm) + (src Xmm) (dst WritableGpr) (tmp_gpr WritableGpr) (tmp_xmm WritableXmm)) @@ -2769,11 +2749,11 @@ (decl x64_vpermi2b (Xmm Xmm Xmm) Xmm) (rule (x64_vpermi2b src1 src2 src3) (let ((dst WritableXmm (temp_writable_xmm)) - (_ Unit (emit (gen_move $I8X16 dst src3))) - (_ Unit (emit (MInst.XmmRmREvex (Avx512Opcode.Vpermi2b) - src1 - src2 - dst)))) + (_ Unit (emit (MInst.XmmRmREvex3 (Avx512Opcode.Vpermi2b) + src1 + src2 + src3 + dst)))) dst)) ;; Helper for creating `MInst.MulHi` instructions. @@ -3214,12 +3194,10 @@ (decl cvt_u64_to_float_seq (Type Gpr) Xmm) (rule (cvt_u64_to_float_seq ty src) (let ((size OperandSize (raw_operand_size_of_type ty)) - (src_copy WritableGpr (temp_writable_gpr)) (dst WritableXmm (temp_writable_xmm)) (tmp_gpr1 WritableGpr (temp_writable_gpr)) (tmp_gpr2 WritableGpr (temp_writable_gpr)) - (_ Unit (emit (gen_move $I64 src_copy src))) - (_ Unit (emit (MInst.CvtUint64ToFloatSeq size src_copy dst tmp_gpr1 tmp_gpr2)))) + (_ Unit (emit (MInst.CvtUint64ToFloatSeq size src dst tmp_gpr1 tmp_gpr2)))) dst)) (decl cvt_float_to_uint_seq (Type Value bool) Gpr) @@ -3227,13 +3205,10 @@ (let ((out_size OperandSize (raw_operand_size_of_type out_ty)) (src_size OperandSize (raw_operand_size_of_type src_ty)) - (tmp WritableXmm (temp_writable_xmm)) - (_ Unit (emit (gen_move src_ty tmp src))) - (dst WritableGpr (temp_writable_gpr)) (tmp_xmm WritableXmm (temp_writable_xmm)) (tmp_gpr WritableGpr (temp_writable_gpr)) - (_ Unit (emit (MInst.CvtFloatToUintSeq out_size src_size is_saturating tmp dst tmp_gpr tmp_xmm)))) + (_ Unit (emit (MInst.CvtFloatToUintSeq out_size src_size is_saturating src dst tmp_gpr tmp_xmm)))) dst)) (decl cvt_float_to_sint_seq (Type Value bool) Gpr) @@ -3241,13 +3216,10 @@ (let ((out_size OperandSize (raw_operand_size_of_type out_ty)) (src_size OperandSize (raw_operand_size_of_type src_ty)) - (tmp WritableXmm (temp_writable_xmm)) - (_ Unit (emit (gen_move src_ty tmp src))) - (dst WritableGpr (temp_writable_gpr)) (tmp_xmm WritableXmm (temp_writable_xmm)) (tmp_gpr WritableGpr (temp_writable_gpr)) - (_ Unit (emit (MInst.CvtFloatToSintSeq out_size src_size is_saturating tmp dst tmp_gpr tmp_xmm)))) + (_ Unit (emit (MInst.CvtFloatToSintSeq out_size src_size is_saturating src dst tmp_gpr tmp_xmm)))) dst)) (decl fcvt_uint_mask_const () VCodeConstant) @@ -3396,10 +3368,6 @@ ;; addresses). (tmp1 WritableGpr (temp_writable_gpr)) - ;; Put a zero in tmp1. This is needed for Spectre mitigations (a - ;; CMOV that zeroes the index on misspeculation). - (_ Unit (emit (MInst.Imm (OperandSize.Size32) 0 tmp1))) - ;; This temporary is used as a signed integer of 32-bits (for the ;; wasm-table index) and then 64-bits (address addend). The small ;; lie about the I64 type is benign, since the temporary is dead diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 6e6ef44bd5..8489338054 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -377,11 +377,11 @@ pub(crate) fn emit( } => { let dividend_lo = allocs.next(dividend_lo.to_reg()); let dst_quotient = allocs.next(dst_quotient.to_reg().to_reg()); - let dst_remainder = allocs.next(dst_remainder.to_reg().to_reg()); debug_assert_eq!(dividend_lo, regs::rax()); debug_assert_eq!(dst_quotient, regs::rax()); - debug_assert_eq!(dst_remainder, regs::rdx()); if size.to_bits() > 8 { + let dst_remainder = allocs.next(dst_remainder.to_reg().to_reg()); + debug_assert_eq!(dst_remainder, regs::rdx()); let dividend_hi = allocs.next(dividend_hi.to_reg()); debug_assert_eq!(dividend_hi, regs::rdx()); } @@ -468,7 +468,11 @@ pub(crate) fn emit( let src = allocs.next(src.to_reg()); let dst = allocs.next(dst.to_reg().to_reg()); debug_assert_eq!(src, regs::rax()); - debug_assert_eq!(dst, regs::rdx()); + if *size == OperandSize::Size8 { + debug_assert_eq!(dst, regs::rax()); + } else { + debug_assert_eq!(dst, regs::rdx()); + } match size { OperandSize::Size8 => { sink.put1(0x66); @@ -498,7 +502,7 @@ pub(crate) fn emit( } => { let dividend_lo = allocs.next(dividend_lo.to_reg()); let dividend_hi = allocs.next(dividend_hi.to_reg()); - let divisor = allocs.next(divisor.to_reg().to_reg()); + let divisor = allocs.next(divisor.to_reg()); let dst_quotient = allocs.next(dst_quotient.to_reg().to_reg()); let dst_remainder = allocs.next(dst_remainder.to_reg().to_reg()); let tmp = tmp.map(|tmp| allocs.next(tmp.to_reg().to_reg())); @@ -597,18 +601,45 @@ pub(crate) fn emit( sink.bind_label(do_op); } + let dividend_lo = Gpr::new(regs::rax()).unwrap(); + let dst_quotient = WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()); + let (dividend_hi, dst_remainder) = if *size == OperandSize::Size8 { + ( + Gpr::new(regs::rax()).unwrap(), + Writable::from_reg(Gpr::new(regs::rax()).unwrap()), + ) + } else { + ( + Gpr::new(regs::rdx()).unwrap(), + Writable::from_reg(Gpr::new(regs::rdx()).unwrap()), + ) + }; + // Fill in the high parts: if kind.is_signed() { // sign-extend the sign-bit of rax into rdx, for signed opcodes. - let inst = Inst::sign_extend_data(*size); + let inst = + Inst::sign_extend_data(*size, dividend_lo, WritableGpr::from_reg(dividend_hi)); inst.emit(&[], sink, info, state); - } else { + } else if *size != OperandSize::Size8 { // zero for unsigned opcodes. - let inst = Inst::imm(OperandSize::Size64, 0, Writable::from_reg(regs::rdx())); + let inst = Inst::imm( + OperandSize::Size64, + 0, + Writable::from_reg(dividend_hi.to_reg()), + ); inst.emit(&[], sink, info, state); } - let inst = Inst::div(*size, kind.is_signed(), RegMem::reg(divisor)); + let inst = Inst::div( + *size, + kind.is_signed(), + RegMem::reg(divisor), + dividend_lo, + dividend_hi, + dst_quotient, + dst_remainder, + ); inst.emit(&[], sink, info, state); // Lowering takes care of moving the result back into the right register, see comment @@ -1393,7 +1424,8 @@ pub(crate) fn emit( // ;; generated by lowering: cmp #jmp_table_size, %idx // jnb $default_target // movl %idx, %tmp2 - // cmovnb %tmp1, %tmp2 ;; Spectre mitigation; we require tmp1 to be zero on entry. + // mov $0, %tmp1 + // cmovnb %tmp1, %tmp2 ;; Spectre mitigation. // lea start_of_jump_table_offset(%rip), %tmp1 // movslq [%tmp1, %tmp2, 4], %tmp2 ;; shift of 2, viz. multiply index by 4 // addq %tmp2, %tmp1 @@ -1406,6 +1438,13 @@ pub(crate) fn emit( let inst = Inst::movzx_rm_r(ExtMode::LQ, RegMem::reg(idx), tmp2); inst.emit(&[], sink, info, state); + // Zero `tmp1` to overwrite `tmp2` with zeroes on the + // out-of-bounds case (Spectre mitigation using CMOV). + // Note that we need to do this with a move-immediate + // form, because we cannot clobber the flags. + let inst = Inst::imm(OperandSize::Size32, 0, tmp1); + inst.emit(&[], sink, info, state); + // Spectre mitigation: CMOV to zero the index if the out-of-bounds branch above misspeculated. let inst = Inst::cmove( OperandSize::Size64, @@ -1768,9 +1807,21 @@ pub(crate) fn emit( src1, src2, dst, + } + | Inst::XmmRmREvex3 { + op, + src1, + src2, + dst, + // `dst` reuses `src3`. + .. } => { let dst = allocs.next(dst.to_reg().to_reg()); let src2 = allocs.next(src2.to_reg()); + if let Inst::XmmRmREvex3 { src3, .. } = inst { + let src3 = allocs.next(src3.to_reg()); + debug_assert_eq!(src3, dst); + } let src1 = src1.clone().to_reg_mem().with_allocs(allocs); let (w, opcode) = match op { @@ -2086,7 +2137,7 @@ pub(crate) fn emit( tmp_gpr1, tmp_gpr2, } => { - let src = allocs.next(src.to_reg().to_reg()); + let src = allocs.next(src.to_reg()); let dst = allocs.next(dst.to_reg().to_reg()); let tmp_gpr1 = allocs.next(tmp_gpr1.to_reg().to_reg()); let tmp_gpr2 = allocs.next(tmp_gpr2.to_reg().to_reg()); @@ -2155,7 +2206,7 @@ pub(crate) fn emit( let inst = Inst::shift_r( OperandSize::Size64, ShiftKind::ShiftRightLogical, - Some(1), + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 1 }).unwrap(), Writable::from_reg(tmp_gpr1), ); inst.emit(&[], sink, info, state); @@ -2208,7 +2259,7 @@ pub(crate) fn emit( tmp_gpr, tmp_xmm, } => { - let src = allocs.next(src.to_reg().to_reg()); + let src = allocs.next(src.to_reg()); let dst = allocs.next(dst.to_reg().to_reg()); let tmp_gpr = allocs.next(tmp_gpr.to_reg().to_reg()); let tmp_xmm = allocs.next(tmp_xmm.to_reg().to_reg()); @@ -2417,7 +2468,7 @@ pub(crate) fn emit( tmp_gpr, tmp_xmm, } => { - let src = allocs.next(src.to_reg().to_reg()); + let src = allocs.next(src.to_reg()); let dst = allocs.next(dst.to_reg().to_reg()); let tmp_gpr = allocs.next(tmp_gpr.to_reg().to_reg()); let tmp_xmm = allocs.next(tmp_xmm.to_reg().to_reg()); diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index d0dde74727..9cbde12668 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -1723,6 +1723,10 @@ fn test_x64_emit() { OperandSize::Size32, true, /*signed*/ RegMem::reg(regs::rsi()), + Gpr::new(regs::rax()).unwrap(), + Gpr::new(regs::rdx()).unwrap(), + WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()), + WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()), ), "F7FE", "idiv %eax, %edx, %esi, %eax, %edx", @@ -1732,6 +1736,10 @@ fn test_x64_emit() { OperandSize::Size64, true, /*signed*/ RegMem::reg(regs::r15()), + Gpr::new(regs::rax()).unwrap(), + Gpr::new(regs::rdx()).unwrap(), + WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()), + WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()), ), "49F7FF", "idiv %rax, %rdx, %r15, %rax, %rdx", @@ -1741,6 +1749,10 @@ fn test_x64_emit() { OperandSize::Size32, false, /*signed*/ RegMem::reg(regs::r14()), + Gpr::new(regs::rax()).unwrap(), + Gpr::new(regs::rdx()).unwrap(), + WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()), + WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()), ), "41F7F6", "div %eax, %edx, %r14d, %eax, %edx", @@ -1750,19 +1762,39 @@ fn test_x64_emit() { OperandSize::Size64, false, /*signed*/ RegMem::reg(regs::rdi()), + Gpr::new(regs::rax()).unwrap(), + Gpr::new(regs::rdx()).unwrap(), + WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()), + WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()), ), "48F7F7", "div %rax, %rdx, %rdi, %rax, %rdx", )); insns.push(( - Inst::div(OperandSize::Size8, false, RegMem::reg(regs::rax())), + Inst::div( + OperandSize::Size8, + false, + RegMem::reg(regs::rax()), + Gpr::new(regs::rax()).unwrap(), + Gpr::new(regs::rdx()).unwrap(), + WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()), + WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()), + ), "F6F0", - "div %al, (none), %al, %al, %dl", + "div %al, (none), %al, %al, (none)", )); insns.push(( - Inst::div(OperandSize::Size8, false, RegMem::reg(regs::rsi())), + Inst::div( + OperandSize::Size8, + false, + RegMem::reg(regs::rsi()), + Gpr::new(regs::rax()).unwrap(), + Gpr::new(regs::rdx()).unwrap(), + WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()), + WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()), + ), "40F6F6", - "div %al, (none), %sil, %al, %dl", + "div %al, (none), %sil, %al, (none)", )); // ======================================================== @@ -1807,25 +1839,41 @@ fn test_x64_emit() { // ======================================================== // cbw insns.push(( - Inst::sign_extend_data(OperandSize::Size8), + Inst::sign_extend_data( + OperandSize::Size8, + Gpr::new(regs::rax()).unwrap(), + WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()), + ), "6698", - "cbw %al, %dl", + "cbw %al, %al", )); // ======================================================== // cdq family: SignExtendRaxRdx insns.push(( - Inst::sign_extend_data(OperandSize::Size16), + Inst::sign_extend_data( + OperandSize::Size16, + Gpr::new(regs::rax()).unwrap(), + WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()), + ), "6699", "cwd %ax, %dx", )); insns.push(( - Inst::sign_extend_data(OperandSize::Size32), + Inst::sign_extend_data( + OperandSize::Size32, + Gpr::new(regs::rax()).unwrap(), + WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()), + ), "99", "cdq %eax, %edx", )); insns.push(( - Inst::sign_extend_data(OperandSize::Size64), + Inst::sign_extend_data( + OperandSize::Size64, + Gpr::new(regs::rax()).unwrap(), + WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()), + ), "4899", "cqo %rax, %rdx", )); @@ -2813,47 +2861,92 @@ fn test_x64_emit() { // ======================================================== // Shift_R insns.push(( - Inst::shift_r(OperandSize::Size32, ShiftKind::ShiftLeft, None, w_rdi), + Inst::shift_r( + OperandSize::Size32, + ShiftKind::ShiftLeft, + Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), + w_rdi, + ), "D3E7", "shll %cl, %edi, %edi", )); insns.push(( - Inst::shift_r(OperandSize::Size32, ShiftKind::ShiftLeft, None, w_r12), + Inst::shift_r( + OperandSize::Size32, + ShiftKind::ShiftLeft, + Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), + w_r12, + ), "41D3E4", "shll %cl, %r12d, %r12d", )); insns.push(( - Inst::shift_r(OperandSize::Size32, ShiftKind::ShiftLeft, Some(2), w_r8), + Inst::shift_r( + OperandSize::Size32, + ShiftKind::ShiftLeft, + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 2 }).unwrap(), + w_r8, + ), "41C1E002", "shll $2, %r8d, %r8d", )); insns.push(( - Inst::shift_r(OperandSize::Size32, ShiftKind::ShiftLeft, Some(31), w_r13), + Inst::shift_r( + OperandSize::Size32, + ShiftKind::ShiftLeft, + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 31 }).unwrap(), + w_r13, + ), "41C1E51F", "shll $31, %r13d, %r13d", )); insns.push(( - Inst::shift_r(OperandSize::Size64, ShiftKind::ShiftLeft, None, w_r13), + Inst::shift_r( + OperandSize::Size64, + ShiftKind::ShiftLeft, + Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), + w_r13, + ), "49D3E5", "shlq %cl, %r13, %r13", )); insns.push(( - Inst::shift_r(OperandSize::Size64, ShiftKind::ShiftLeft, None, w_rdi), + Inst::shift_r( + OperandSize::Size64, + ShiftKind::ShiftLeft, + Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), + w_rdi, + ), "48D3E7", "shlq %cl, %rdi, %rdi", )); insns.push(( - Inst::shift_r(OperandSize::Size64, ShiftKind::ShiftLeft, Some(2), w_r8), + Inst::shift_r( + OperandSize::Size64, + ShiftKind::ShiftLeft, + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 2 }).unwrap(), + w_r8, + ), "49C1E002", "shlq $2, %r8, %r8", )); insns.push(( - Inst::shift_r(OperandSize::Size64, ShiftKind::ShiftLeft, Some(3), w_rbx), + Inst::shift_r( + OperandSize::Size64, + ShiftKind::ShiftLeft, + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 3 }).unwrap(), + w_rbx, + ), "48C1E303", "shlq $3, %rbx, %rbx", )); insns.push(( - Inst::shift_r(OperandSize::Size64, ShiftKind::ShiftLeft, Some(63), w_r13), + Inst::shift_r( + OperandSize::Size64, + ShiftKind::ShiftLeft, + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 63 }).unwrap(), + w_r13, + ), "49C1E53F", "shlq $63, %r13, %r13", )); @@ -2861,7 +2954,7 @@ fn test_x64_emit() { Inst::shift_r( OperandSize::Size32, ShiftKind::ShiftRightLogical, - None, + Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), w_rdi, ), "D3EF", @@ -2871,7 +2964,7 @@ fn test_x64_emit() { Inst::shift_r( OperandSize::Size32, ShiftKind::ShiftRightLogical, - Some(2), + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 2 }).unwrap(), w_r8, ), "41C1E802", @@ -2881,7 +2974,7 @@ fn test_x64_emit() { Inst::shift_r( OperandSize::Size32, ShiftKind::ShiftRightLogical, - Some(31), + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 31 }).unwrap(), w_r13, ), "41C1ED1F", @@ -2891,7 +2984,7 @@ fn test_x64_emit() { Inst::shift_r( OperandSize::Size64, ShiftKind::ShiftRightLogical, - None, + Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), w_rdi, ), "48D3EF", @@ -2901,7 +2994,7 @@ fn test_x64_emit() { Inst::shift_r( OperandSize::Size64, ShiftKind::ShiftRightLogical, - Some(2), + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 2 }).unwrap(), w_r8, ), "49C1E802", @@ -2911,7 +3004,7 @@ fn test_x64_emit() { Inst::shift_r( OperandSize::Size64, ShiftKind::ShiftRightLogical, - Some(63), + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 63 }).unwrap(), w_r13, ), "49C1ED3F", @@ -2921,7 +3014,7 @@ fn test_x64_emit() { Inst::shift_r( OperandSize::Size32, ShiftKind::ShiftRightArithmetic, - None, + Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), w_rdi, ), "D3FF", @@ -2931,7 +3024,7 @@ fn test_x64_emit() { Inst::shift_r( OperandSize::Size32, ShiftKind::ShiftRightArithmetic, - Some(2), + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 2 }).unwrap(), w_r8, ), "41C1F802", @@ -2941,7 +3034,7 @@ fn test_x64_emit() { Inst::shift_r( OperandSize::Size32, ShiftKind::ShiftRightArithmetic, - Some(31), + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 31 }).unwrap(), w_r13, ), "41C1FD1F", @@ -2951,7 +3044,7 @@ fn test_x64_emit() { Inst::shift_r( OperandSize::Size64, ShiftKind::ShiftRightArithmetic, - None, + Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), w_rdi, ), "48D3FF", @@ -2961,7 +3054,7 @@ fn test_x64_emit() { Inst::shift_r( OperandSize::Size64, ShiftKind::ShiftRightArithmetic, - Some(2), + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 2 }).unwrap(), w_r8, ), "49C1F802", @@ -2971,54 +3064,99 @@ fn test_x64_emit() { Inst::shift_r( OperandSize::Size64, ShiftKind::ShiftRightArithmetic, - Some(63), + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 63 }).unwrap(), w_r13, ), "49C1FD3F", "sarq $63, %r13, %r13", )); insns.push(( - Inst::shift_r(OperandSize::Size64, ShiftKind::RotateLeft, None, w_r8), + Inst::shift_r( + OperandSize::Size64, + ShiftKind::RotateLeft, + Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), + w_r8, + ), "49D3C0", "rolq %cl, %r8, %r8", )); insns.push(( - Inst::shift_r(OperandSize::Size32, ShiftKind::RotateLeft, Some(3), w_r9), + Inst::shift_r( + OperandSize::Size32, + ShiftKind::RotateLeft, + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 3 }).unwrap(), + w_r9, + ), "41C1C103", "roll $3, %r9d, %r9d", )); insns.push(( - Inst::shift_r(OperandSize::Size32, ShiftKind::RotateRight, None, w_rsi), + Inst::shift_r( + OperandSize::Size32, + ShiftKind::RotateRight, + Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), + w_rsi, + ), "D3CE", "rorl %cl, %esi, %esi", )); insns.push(( - Inst::shift_r(OperandSize::Size64, ShiftKind::RotateRight, Some(5), w_r15), + Inst::shift_r( + OperandSize::Size64, + ShiftKind::RotateRight, + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 5 }).unwrap(), + w_r15, + ), "49C1CF05", "rorq $5, %r15, %r15", )); insns.push(( - Inst::shift_r(OperandSize::Size8, ShiftKind::RotateRight, None, w_rsi), + Inst::shift_r( + OperandSize::Size8, + ShiftKind::RotateRight, + Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), + w_rsi, + ), "40D2CE", "rorb %cl, %sil, %sil", )); insns.push(( - Inst::shift_r(OperandSize::Size8, ShiftKind::RotateRight, None, w_rax), + Inst::shift_r( + OperandSize::Size8, + ShiftKind::RotateRight, + Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), + w_rax, + ), "D2C8", "rorb %cl, %al, %al", )); insns.push(( - Inst::shift_r(OperandSize::Size8, ShiftKind::RotateRight, Some(5), w_r15), + Inst::shift_r( + OperandSize::Size8, + ShiftKind::RotateRight, + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 5 }).unwrap(), + w_r15, + ), "41C0CF05", "rorb $5, %r15b, %r15b", )); insns.push(( - Inst::shift_r(OperandSize::Size16, ShiftKind::RotateRight, None, w_rsi), + Inst::shift_r( + OperandSize::Size16, + ShiftKind::RotateRight, + Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(), + w_rsi, + ), "66D3CE", "rorw %cl, %si, %si", )); insns.push(( - Inst::shift_r(OperandSize::Size16, ShiftKind::RotateRight, Some(5), w_r15), + Inst::shift_r( + OperandSize::Size16, + ShiftKind::RotateRight, + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 5 }).unwrap(), + w_r15, + ), "6641C1CF05", "rorw $5, %r15w, %r15w", )); diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 9d7f1bd0f4..09b5993298 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -131,7 +131,9 @@ impl Inst { | Inst::XmmToGpr { op, .. } | Inst::XmmUnaryRmR { op, .. } => smallvec![op.available_from()], - Inst::XmmUnaryRmREvex { op, .. } | Inst::XmmRmREvex { op, .. } => op.available_from(), + Inst::XmmUnaryRmREvex { op, .. } + | Inst::XmmRmREvex { op, .. } + | Inst::XmmRmREvex3 { op, .. } => op.available_from(), Inst::XmmRmRVex { op, .. } => op.available_from(), } @@ -195,47 +197,55 @@ impl Inst { } } - pub(crate) fn div(size: OperandSize, signed: bool, divisor: RegMem) -> Inst { + pub(crate) fn div( + size: OperandSize, + signed: bool, + divisor: RegMem, + dividend_lo: Gpr, + dividend_hi: Gpr, + dst_quotient: WritableGpr, + dst_remainder: WritableGpr, + ) -> Inst { divisor.assert_regclass_is(RegClass::Int); Inst::Div { size, signed, divisor: GprMem::new(divisor).unwrap(), - dividend_lo: Gpr::new(regs::rax()).unwrap(), - dividend_hi: Gpr::new(regs::rdx()).unwrap(), - dst_quotient: WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()), - dst_remainder: Writable::from_reg(Gpr::new(regs::rdx()).unwrap()), + dividend_lo, + dividend_hi, + dst_quotient, + dst_remainder, } } pub(crate) fn checked_div_or_rem_seq( kind: DivOrRemKind, size: OperandSize, - divisor: Writable, + divisor: Reg, + dividend_lo: Gpr, + dividend_hi: Gpr, + dst_quotient: WritableGpr, + dst_remainder: WritableGpr, tmp: Option>, ) -> Inst { - debug_assert!(divisor.to_reg().class() == RegClass::Int); + debug_assert!(divisor.class() == RegClass::Int); debug_assert!(tmp .map(|tmp| tmp.to_reg().class() == RegClass::Int) .unwrap_or(true)); Inst::CheckedDivOrRemSeq { kind, size, - divisor: WritableGpr::from_writable_reg(divisor).unwrap(), - dividend_lo: Gpr::new(regs::rax()).unwrap(), - dividend_hi: Gpr::new(regs::rdx()).unwrap(), - dst_quotient: Writable::from_reg(Gpr::new(regs::rax()).unwrap()), - dst_remainder: Writable::from_reg(Gpr::new(regs::rdx()).unwrap()), + divisor: Gpr::new(divisor).unwrap(), + dividend_lo, + dividend_hi, + dst_quotient, + dst_remainder, tmp: tmp.map(|tmp| WritableGpr::from_writable_reg(tmp).unwrap()), } } - pub(crate) fn sign_extend_data(size: OperandSize) -> Inst { - Inst::SignExtendData { - size, - src: Gpr::new(regs::rax()).unwrap(), - dst: Writable::from_reg(Gpr::new(regs::rdx()).unwrap()), - } + pub(crate) fn sign_extend_data(size: OperandSize, src: Gpr, dst: WritableGpr) -> Inst { + Inst::SignExtendData { size, src, dst } } pub(crate) fn imm(dst_size: OperandSize, simm64: u64, dst: Writable) -> Inst { @@ -415,24 +425,18 @@ impl Inst { pub(crate) fn shift_r( size: OperandSize, kind: ShiftKind, - num_bits: Option, + num_bits: Imm8Gpr, dst: Writable, ) -> Inst { - debug_assert!(if let Some(num_bits) = num_bits { - num_bits < size.to_bits() - } else { - true - }); + if let Imm8Reg::Imm8 { imm: num_bits } = num_bits.clone().to_imm8_reg() { + debug_assert!(num_bits < size.to_bits()); + } debug_assert!(dst.to_reg().class() == RegClass::Int); Inst::ShiftR { size, kind, src: Gpr::new(dst.to_reg()).unwrap(), - num_bits: Imm8Gpr::new(match num_bits { - Some(imm) => Imm8Reg::Imm8 { imm }, - None => Imm8Reg::Reg { reg: regs::rcx() }, - }) - .unwrap(), + num_bits, dst: WritableGpr::from_writable_reg(dst).unwrap(), } } @@ -781,8 +785,11 @@ impl PrettyPrint for Inst { let dividend_lo = pretty_print_reg(dividend_lo.to_reg(), size.to_bytes(), allocs); let dst_quotient = pretty_print_reg(dst_quotient.to_reg().to_reg(), size.to_bytes(), allocs); - let dst_remainder = - pretty_print_reg(dst_remainder.to_reg().to_reg(), size.to_bytes(), allocs); + let dst_remainder = if size.to_bits() > 8 { + pretty_print_reg(dst_remainder.to_reg().to_reg(), size.to_bytes(), allocs) + } else { + "(none)".to_string() + }; let dividend_hi = if size.to_bits() > 8 { pretty_print_reg(dividend_hi.to_reg(), size.to_bytes(), allocs) } else { @@ -842,7 +849,7 @@ impl PrettyPrint for Inst { } => { let dividend_lo = pretty_print_reg(dividend_lo.to_reg(), size.to_bytes(), allocs); let dividend_hi = pretty_print_reg(dividend_hi.to_reg(), size.to_bytes(), allocs); - let divisor = pretty_print_reg(divisor.to_reg().to_reg(), size.to_bytes(), allocs); + let divisor = pretty_print_reg(divisor.to_reg(), size.to_bytes(), allocs); let dst_quotient = pretty_print_reg(dst_quotient.to_reg().to_reg(), size.to_bytes(), allocs); let dst_remainder = @@ -949,12 +956,34 @@ impl PrettyPrint for Inst { dst, .. } => { - let src2 = pretty_print_reg(src2.to_reg(), 8, allocs); let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs); + let src2 = pretty_print_reg(src2.to_reg(), 8, allocs); let src1 = src1.pretty_print(8, allocs); format!("{} {}, {}, {}", ljustify(op.to_string()), src1, src2, dst) } + Inst::XmmRmREvex3 { + op, + src1, + src2, + src3, + dst, + .. + } => { + let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs); + let src2 = pretty_print_reg(src2.to_reg(), 8, allocs); + let src3 = pretty_print_reg(src3.to_reg(), 8, allocs); + let src1 = src1.pretty_print(8, allocs); + format!( + "{} {}, {}, {}, {}", + ljustify(op.to_string()), + src1, + src2, + src3, + dst + ) + } + Inst::XmmMinMaxSeq { lhs, rhs, @@ -1084,7 +1113,7 @@ impl PrettyPrint for Inst { tmp_gpr2, .. } => { - let src = pretty_print_reg(src.to_reg().to_reg(), 8, allocs); + let src = pretty_print_reg(src.to_reg(), 8, allocs); let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes(), allocs); let tmp_gpr1 = pretty_print_reg(tmp_gpr1.to_reg().to_reg(), 8, allocs); let tmp_gpr2 = pretty_print_reg(tmp_gpr2.to_reg().to_reg(), 8, allocs); @@ -1114,7 +1143,7 @@ impl PrettyPrint for Inst { tmp_gpr, is_saturating, } => { - let src = pretty_print_reg(src.to_reg().to_reg(), src_size.to_bytes(), allocs); + let src = pretty_print_reg(src.to_reg(), src_size.to_bytes(), allocs); let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes(), allocs); let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8, allocs); let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8, allocs); @@ -1142,7 +1171,7 @@ impl PrettyPrint for Inst { tmp_xmm, is_saturating, } => { - let src = pretty_print_reg(src.to_reg().to_reg(), src_size.to_bytes(), allocs); + let src = pretty_print_reg(src.to_reg(), src_size.to_bytes(), allocs); let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes(), allocs); let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8, allocs); let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8, allocs); @@ -1424,9 +1453,19 @@ impl PrettyPrint for Inst { not_taken.to_string() ), - Inst::JmpTableSeq { idx, .. } => { + Inst::JmpTableSeq { + idx, tmp1, tmp2, .. + } => { let idx = pretty_print_reg(*idx, 8, allocs); - format!("{} {}", ljustify("br_table".into()), idx) + let tmp1 = pretty_print_reg(tmp1.to_reg(), 8, allocs); + let tmp2 = pretty_print_reg(tmp2.to_reg(), 8, allocs); + format!( + "{} {}, {}, {}", + ljustify("br_table".into()), + idx, + tmp1, + tmp2 + ) } Inst::JmpUnknown { target } => { @@ -1605,8 +1644,8 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol } => { collector.reg_fixed_use(dividend_lo.to_reg(), regs::rax()); collector.reg_fixed_def(dst_quotient.to_writable_reg(), regs::rax()); - collector.reg_fixed_def(dst_remainder.to_writable_reg(), regs::rdx()); if size.to_bits() > 8 { + collector.reg_fixed_def(dst_remainder.to_writable_reg(), regs::rdx()); collector.reg_fixed_use(dividend_hi.to_reg(), regs::rdx()); } divisor.get_operands(collector); @@ -1634,10 +1673,12 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol } => { collector.reg_fixed_use(dividend_lo.to_reg(), regs::rax()); collector.reg_fixed_use(dividend_hi.to_reg(), regs::rdx()); - collector.reg_mod(divisor.to_writable_reg()); + collector.reg_use(divisor.to_reg()); collector.reg_fixed_def(dst_quotient.to_writable_reg(), regs::rax()); collector.reg_fixed_def(dst_remainder.to_writable_reg(), regs::rdx()); if let Some(tmp) = tmp { + // Early def so that the temporary register does not + // conflict with inputs or outputs. collector.reg_early_def(tmp.to_writable_reg()); } } @@ -1718,13 +1759,25 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol dst, .. } => { - match *op { - Avx512Opcode::Vpermi2b => collector.reg_mod(dst.to_writable_reg()), - _ => collector.reg_def(dst.to_writable_reg()), - } + assert_ne!(*op, Avx512Opcode::Vpermi2b); + collector.reg_def(dst.to_writable_reg()); collector.reg_use(src2.to_reg()); src1.get_operands(collector); } + Inst::XmmRmREvex3 { + op, + src1, + src2, + src3, + dst, + .. + } => { + assert_eq!(*op, Avx512Opcode::Vpermi2b); + collector.reg_reuse_def(dst.to_writable_reg(), 2); // Reuse `src3`. + collector.reg_use(src2.to_reg()); + collector.reg_use(src3.to_reg()); + src1.get_operands(collector); + } Inst::XmmRmRImm { op, src1, @@ -1795,7 +1848,7 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol tmp_gpr2, .. } => { - collector.reg_mod(src.to_writable_reg()); + collector.reg_use(src.to_reg()); collector.reg_def(dst.to_writable_reg()); collector.reg_early_def(tmp_gpr1.to_writable_reg()); collector.reg_early_def(tmp_gpr2.to_writable_reg()); @@ -1814,7 +1867,7 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol tmp_xmm, .. } => { - collector.reg_mod(src.to_writable_reg()); + collector.reg_use(src.to_reg()); collector.reg_def(dst.to_writable_reg()); collector.reg_early_def(tmp_gpr.to_writable_reg()); collector.reg_early_def(tmp_xmm.to_writable_reg()); @@ -1911,7 +1964,7 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol .. } => { collector.reg_use(*idx); - collector.reg_mod(*tmp1); + collector.reg_early_def(*tmp1); collector.reg_early_def(*tmp2); } diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index d76e72f88c..2148d4f400 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -955,40 +955,34 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { let is_div = kind.is_div(); let size = OperandSize::from_ty(ty); - self.lower_ctx.emit(MInst::gen_move( - Writable::from_reg(regs::rax()), - dividend.to_reg(), - ty, - )); + let dst_quotient = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap(); + let dst_remainder = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap(); // Always do explicit checks for `srem`: otherwise, INT_MIN % -1 is not handled properly. if self.flags.avoid_div_traps() || *kind == DivOrRemKind::SignedRem { // A vcode meta-instruction is used to lower the inline checks, since they embed // pc-relative offsets that must not change, thus requiring regalloc to not // interfere by introducing spills and reloads. - // - // Note it keeps the result in $rax (for divide) or $rdx (for rem), so that - // regalloc is aware of the coalescing opportunity between rax/rdx and the - // destination register. - let divisor_copy = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap(); - self.lower_ctx - .emit(MInst::gen_move(divisor_copy, divisor.to_reg(), types::I64)); - let tmp = if *kind == DivOrRemKind::SignedDiv && size == OperandSize::Size64 { Some(self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap()) } else { None }; - // TODO use xor - self.lower_ctx.emit(MInst::imm( + let dividend_hi = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap(); + self.lower_ctx.emit(MInst::alu_rmi_r( OperandSize::Size32, - 0, - Writable::from_reg(regs::rdx()), + AluRmiROpcode::Xor, + RegMemImm::reg(dividend_hi.to_reg()), + dividend_hi, )); self.lower_ctx.emit(MInst::checked_div_or_rem_seq( kind.clone(), size, - divisor_copy, + divisor.to_reg(), + Gpr::new(dividend.to_reg()).unwrap(), + Gpr::new(dividend_hi.to_reg()).unwrap(), + WritableGpr::from_reg(Gpr::new(dst_quotient.to_reg()).unwrap()), + WritableGpr::from_reg(Gpr::new(dst_remainder.to_reg()).unwrap()), tmp, )); } else { @@ -997,51 +991,89 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { // divisor into a register instead. let divisor = RegMem::reg(divisor.to_reg()); + let dividend_hi = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap(); + // Fill in the high parts: - if kind.is_signed() { - // sign-extend the sign-bit of al into ah for size 1, or rax into rdx, for - // signed opcodes. - self.lower_ctx.emit(MInst::sign_extend_data(size)); + let dividend_lo = if kind.is_signed() && ty == types::I8 { + let dividend_lo = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap(); + // 8-bit div takes its dividend in only the `lo` reg. + self.lower_ctx.emit(MInst::sign_extend_data( + size, + Gpr::new(dividend.to_reg()).unwrap(), + WritableGpr::from_reg(Gpr::new(dividend_lo.to_reg()).unwrap()), + )); + // `dividend_hi` is not used by the Div below, so we + // don't def it here. + + dividend_lo.to_reg() + } else if kind.is_signed() { + // 16-bit and higher div takes its operand in hi:lo + // with half in each (64:64, 32:32 or 16:16). + self.lower_ctx.emit(MInst::sign_extend_data( + size, + Gpr::new(dividend.to_reg()).unwrap(), + WritableGpr::from_reg(Gpr::new(dividend_hi.to_reg()).unwrap()), + )); + + dividend.to_reg() } else if ty == types::I8 { + let dividend_lo = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap(); self.lower_ctx.emit(MInst::movzx_rm_r( ExtMode::BL, - RegMem::reg(regs::rax()), - Writable::from_reg(regs::rax()), + RegMem::reg(dividend.to_reg()), + dividend_lo, )); + + dividend_lo.to_reg() } else { // zero for unsigned opcodes. - self.lower_ctx.emit(MInst::imm( - OperandSize::Size64, - 0, - Writable::from_reg(regs::rdx()), - )); - } + self.lower_ctx + .emit(MInst::imm(OperandSize::Size64, 0, dividend_hi)); + + dividend.to_reg() + }; // Emit the actual idiv. - self.lower_ctx - .emit(MInst::div(size, kind.is_signed(), divisor)); + self.lower_ctx.emit(MInst::div( + size, + kind.is_signed(), + divisor, + Gpr::new(dividend_lo).unwrap(), + Gpr::new(dividend_hi.to_reg()).unwrap(), + WritableGpr::from_reg(Gpr::new(dst_quotient.to_reg()).unwrap()), + WritableGpr::from_reg(Gpr::new(dst_remainder.to_reg()).unwrap()), + )); } // Move the result back into the destination reg. if is_div { // The quotient is in rax. - self.lower_ctx - .emit(MInst::gen_move(dst.to_writable_reg(), regs::rax(), ty)); + self.lower_ctx.emit(MInst::gen_move( + dst.to_writable_reg(), + dst_quotient.to_reg(), + ty, + )); } else { if size == OperandSize::Size8 { // The remainder is in AH. Right-shift by 8 bits then move from rax. self.lower_ctx.emit(MInst::shift_r( OperandSize::Size64, ShiftKind::ShiftRightLogical, - Some(8), - Writable::from_reg(regs::rax()), + Imm8Gpr::new(Imm8Reg::Imm8 { imm: 8 }).unwrap(), + dst_quotient, + )); + self.lower_ctx.emit(MInst::gen_move( + dst.to_writable_reg(), + dst_quotient.to_reg(), + ty, )); - self.lower_ctx - .emit(MInst::gen_move(dst.to_writable_reg(), regs::rax(), ty)); } else { // The remainder is in rdx. - self.lower_ctx - .emit(MInst::gen_move(dst.to_writable_reg(), regs::rdx(), ty)); + self.lower_ctx.emit(MInst::gen_move( + dst.to_writable_reg(), + dst_remainder.to_reg(), + ty, + )); } } } diff --git a/cranelift/codegen/src/isa/x64/mod.rs b/cranelift/codegen/src/isa/x64/mod.rs index c6093e5b71..303b90d3ab 100644 --- a/cranelift/codegen/src/isa/x64/mod.rs +++ b/cranelift/codegen/src/isa/x64/mod.rs @@ -427,37 +427,34 @@ mod test { // 00000000 55 push rbp // 00000001 4889E5 mov rbp,rsp - // 00000004 41B900000000 mov r9d,0x0 - // 0000000A 83FF02 cmp edi,byte +0x2 - // 0000000D 0F8320000000 jnc near 0x33 - // 00000013 8BF7 mov esi,edi - // 00000015 490F43F1 cmovnc rsi,r9 - // 00000019 4C8D0D0B000000 lea r9,[rel 0x2b] - // 00000020 496374B100 movsxd rsi,dword [r9+rsi*4+0x0] - // 00000025 4901F1 add r9,rsi - // 00000028 41FFE1 jmp r9 - // 0000002B 1200 adc al,[rax] - // 0000002D 0000 add [rax],al - // 0000002F 1C00 sbb al,0x0 - // 00000031 0000 add [rax],al - // 00000033 B803000000 mov eax,0x3 - // 00000038 4889EC mov rsp,rbp - // 0000003B 5D pop rbp - // 0000003C C3 ret - // 0000003D B801000000 mov eax,0x1 - // 00000042 4889EC mov rsp,rbp - // 00000045 5D pop rbp - // 00000046 C3 ret - // 00000047 B802000000 mov eax,0x2 - // 0000004C 4889EC mov rsp,rbp - // 0000004F 5D pop rbp - // 00000050 C3 ret + // 00000004 83FF02 cmp edi,byte +0x2 + // 00000007 0F8327000000 jnc near 0x34 + // 0000000D 448BDF mov r11d,edi + // 00000010 41BA00000000 mov r10d,0x0 + // 00000016 4D0F43DA cmovnc r11,r10 + // 0000001A 4C8D150B000000 lea r10,[rel 0x2c] + // 00000021 4F635C9A00 movsxd r11,dword [r10+r11*4+0x0] + // 00000026 4D01DA add r10,r11 + // 00000029 41FFE2 jmp r10 + // 0000002C 120000001C000000 (jumptable data) + // 00000034 B803000000 mov eax,0x3 + // 00000039 4889EC mov rsp,rbp + // 0000003C 5D pop rbp + // 0000003D C3 ret + // 0000003E B801000000 mov eax,0x1 + // 00000043 4889EC mov rsp,rbp + // 00000046 5D pop rbp + // 00000047 C3 ret + // 00000048 B802000000 mov eax,0x2 + // 0000004D 4889EC mov rsp,rbp + // 00000050 5D pop rbp + // 00000051 C3 ret let golden = vec![ - 85, 72, 137, 229, 65, 185, 0, 0, 0, 0, 131, 255, 2, 15, 131, 32, 0, 0, 0, 139, 247, 73, - 15, 67, 241, 76, 141, 13, 11, 0, 0, 0, 73, 99, 116, 177, 0, 73, 1, 241, 65, 255, 225, - 18, 0, 0, 0, 28, 0, 0, 0, 184, 3, 0, 0, 0, 72, 137, 236, 93, 195, 184, 1, 0, 0, 0, 72, - 137, 236, 93, 195, 184, 2, 0, 0, 0, 72, 137, 236, 93, 195, + 85, 72, 137, 229, 131, 255, 2, 15, 131, 39, 0, 0, 0, 68, 139, 223, 65, 186, 0, 0, 0, 0, + 77, 15, 67, 218, 76, 141, 21, 11, 0, 0, 0, 79, 99, 92, 154, 0, 77, 1, 218, 65, 255, + 226, 18, 0, 0, 0, 28, 0, 0, 0, 184, 3, 0, 0, 0, 72, 137, 236, 93, 195, 184, 1, 0, 0, 0, + 72, 137, 236, 93, 195, 184, 2, 0, 0, 0, 72, 137, 236, 93, 195, ]; assert_eq!(code, &golden[..]); diff --git a/cranelift/filetests/filetests/isa/x64/branches.clif b/cranelift/filetests/filetests/isa/x64/branches.clif index ecb8800842..9bdd14e2b7 100644 --- a/cranelift/filetests/filetests/isa/x64/branches.clif +++ b/cranelift/filetests/filetests/isa/x64/branches.clif @@ -205,9 +205,8 @@ block2: ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movl $0, %r8d ; cmpl $2, %edi -; br_table %rdi +; br_table %rdi, %r9, %r10 ; block1: ; jmp label3 ; block2: diff --git a/cranelift/filetests/filetests/isa/x64/div-checks.clif b/cranelift/filetests/filetests/isa/x64/div-checks.clif index 132f939818..8361e2880d 100644 --- a/cranelift/filetests/filetests/isa/x64/div-checks.clif +++ b/cranelift/filetests/filetests/isa/x64/div-checks.clif @@ -10,8 +10,9 @@ target x86_64 function %i8(i8, i8) -> i8 { block0(v0: i8, v1: i8): v2 = srem.i8 v0, v1 -; check: movq %rdi, %rax -; nextln: movl $$0, %edx +; check: xorl %r11d, %r11d, %r11d +; nextln: movq %rdi, %rax +; nextln: movq %r11, %rdx ; nextln: srem_seq %al, %dl, %sil, %al, %dl, tmp=(none) ; nextln: shrq $$8, %rax, %rax @@ -21,8 +22,9 @@ block0(v0: i8, v1: i8): function %i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): v2 = srem.i16 v0, v1 -; check: movq %rdi, %rax -; nextln: movl $$0, %edx +; check: xorl %r11d, %r11d, %r11d +; nextln: movq %rdi, %rax +; nextln: movq %r11, %rdx ; nextln: srem_seq %ax, %dx, %si, %ax, %dx, tmp=(none) ; nextln: movq %rdx, %rax @@ -32,8 +34,9 @@ block0(v0: i16, v1: i16): function %i32(i32, i32) -> i32 { block0(v0: i32, v1: i32): v2 = srem.i32 v0, v1 -; check: movq %rdi, %rax -; nextln: movl $$0, %edx +; check: xorl %r11d, %r11d, %r11d +; nextln: movq %rdi, %rax +; nextln: movq %r11, %rdx ; nextln: srem_seq %eax, %edx, %esi, %eax, %edx, tmp=(none) ; nextln: movq %rdx, %rax @@ -43,8 +46,9 @@ block0(v0: i32, v1: i32): function %i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): v2 = srem.i64 v0, v1 -; check: movq %rdi, %rax -; nextln: movl $$0, %edx +; check: xorl %r11d, %r11d, %r11d +; nextln: movq %rdi, %rax +; nextln: movq %r11, %rdx ; nextln: srem_seq %rax, %rdx, %rsi, %rax, %rdx, tmp=(none) ; nextln: movq %rdx, %rax diff --git a/cranelift/filetests/filetests/isa/x64/fcvt.clif b/cranelift/filetests/filetests/isa/x64/fcvt.clif index 09c6093c54..3429078f59 100644 --- a/cranelift/filetests/filetests/isa/x64/fcvt.clif +++ b/cranelift/filetests/filetests/isa/x64/fcvt.clif @@ -146,16 +146,16 @@ block0(v0: i8, v1: i16, v2: i32, v3: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movzbq %dil, %rax -; cvtsi2ss %rax, %xmm0 -; movzwq %si, %rax -; cvtsi2ss %rax, %xmm6 -; movl %edx, %eax -; cvtsi2ss %rax, %xmm7 -; u64_to_f32_seq %rcx, %xmm4, %r8, %rdx +; movzbq %dil, %rdi +; cvtsi2ss %rdi, %xmm0 +; movzwq %si, %rdi +; cvtsi2ss %rdi, %xmm5 +; movl %edx, %edi +; cvtsi2ss %rdi, %xmm6 +; u64_to_f32_seq %rcx, %xmm2, %rdi, %rax +; addss %xmm0, %xmm5, %xmm0 ; addss %xmm0, %xmm6, %xmm0 -; addss %xmm0, %xmm7, %xmm0 -; addss %xmm0, %xmm4, %xmm0 +; addss %xmm0, %xmm2, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -209,7 +209,7 @@ block0(v0: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float32_to_uint32_seq %xmm0, %eax, %r10, %xmm6 +; cvt_float32_to_uint32_seq %xmm0, %eax, %r8, %xmm4 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -223,7 +223,7 @@ block0(v0: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float32_to_uint64_seq %xmm0, %rax, %r10, %xmm6 +; cvt_float32_to_uint64_seq %xmm0, %rax, %r8, %xmm4 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -237,7 +237,7 @@ block0(v0: f64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float64_to_uint32_seq %xmm0, %eax, %r10, %xmm6 +; cvt_float64_to_uint32_seq %xmm0, %eax, %r8, %xmm4 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -251,7 +251,7 @@ block0(v0: f64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float64_to_uint64_seq %xmm0, %rax, %r10, %xmm6 +; cvt_float64_to_uint64_seq %xmm0, %rax, %r8, %xmm4 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -265,7 +265,7 @@ block0(v0: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float32_to_uint32_sat_seq %xmm0, %eax, %r10, %xmm6 +; cvt_float32_to_uint32_sat_seq %xmm0, %eax, %r8, %xmm4 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -279,7 +279,7 @@ block0(v0: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float32_to_uint64_sat_seq %xmm0, %rax, %r10, %xmm6 +; cvt_float32_to_uint64_sat_seq %xmm0, %rax, %r8, %xmm4 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -293,7 +293,7 @@ block0(v0: f64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float64_to_uint32_sat_seq %xmm0, %eax, %r10, %xmm6 +; cvt_float64_to_uint32_sat_seq %xmm0, %eax, %r8, %xmm4 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -307,7 +307,7 @@ block0(v0: f64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float64_to_uint64_sat_seq %xmm0, %rax, %r10, %xmm6 +; cvt_float64_to_uint64_sat_seq %xmm0, %rax, %r8, %xmm4 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -321,7 +321,7 @@ block0(v0: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float32_to_sint32_seq %xmm0, %eax, %r10, %xmm6 +; cvt_float32_to_sint32_seq %xmm0, %eax, %r8, %xmm4 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -335,7 +335,7 @@ block0(v0: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float32_to_sint64_seq %xmm0, %rax, %r10, %xmm6 +; cvt_float32_to_sint64_seq %xmm0, %rax, %r8, %xmm4 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -349,7 +349,7 @@ block0(v0: f64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float64_to_sint32_seq %xmm0, %eax, %r10, %xmm6 +; cvt_float64_to_sint32_seq %xmm0, %eax, %r8, %xmm4 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -363,7 +363,7 @@ block0(v0: f64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float64_to_sint64_seq %xmm0, %rax, %r10, %xmm6 +; cvt_float64_to_sint64_seq %xmm0, %rax, %r8, %xmm4 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -377,7 +377,7 @@ block0(v0: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float32_to_sint32_sat_seq %xmm0, %eax, %r10, %xmm6 +; cvt_float32_to_sint32_sat_seq %xmm0, %eax, %r8, %xmm4 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -391,7 +391,7 @@ block0(v0: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float32_to_sint64_sat_seq %xmm0, %rax, %r10, %xmm6 +; cvt_float32_to_sint64_sat_seq %xmm0, %rax, %r8, %xmm4 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -405,7 +405,7 @@ block0(v0: f64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float64_to_sint32_sat_seq %xmm0, %eax, %r10, %xmm6 +; cvt_float64_to_sint32_sat_seq %xmm0, %eax, %r8, %xmm4 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -419,7 +419,7 @@ block0(v0: f64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float64_to_sint64_sat_seq %xmm0, %rax, %r10, %xmm6 +; cvt_float64_to_sint64_sat_seq %xmm0, %rax, %r8, %xmm4 ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/sdiv.clif b/cranelift/filetests/filetests/isa/x64/sdiv.clif index c0f486c71f..6c13154db7 100644 --- a/cranelift/filetests/filetests/isa/x64/sdiv.clif +++ b/cranelift/filetests/filetests/isa/x64/sdiv.clif @@ -11,8 +11,9 @@ block0(v0: i8, v1: i8): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; cbw %al, %dl -; idiv %al, (none), %sil, %al, %dl +; cbw %al, %al +; movq %rax, %rdi +; idiv %al, (none), %sil, %al, (none) ; movq %rbp, %rsp ; popq %rbp ; ret @@ -28,6 +29,7 @@ block0(v0: i16, v1: i16): ; block0: ; movq %rdi, %rax ; cwd %ax, %dx +; movq %rdx, %r8 ; idiv %ax, %dx, %si, %ax, %dx ; movq %rbp, %rsp ; popq %rbp @@ -44,6 +46,7 @@ block0(v0: i32, v1: i32): ; block0: ; movq %rdi, %rax ; cdq %eax, %edx +; movq %rdx, %r8 ; idiv %eax, %edx, %esi, %eax, %edx ; movq %rbp, %rsp ; popq %rbp @@ -60,6 +63,7 @@ block0(v0: i64, v1: i64): ; block0: ; movq %rdi, %rax ; cqo %rax, %rdx +; movq %rdx, %r8 ; idiv %rax, %rdx, %rsi, %rax, %rdx ; movq %rbp, %rsp ; popq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/shuffle-avx512.clif b/cranelift/filetests/filetests/isa/x64/shuffle-avx512.clif index 29221415ca..827c80ffe2 100644 --- a/cranelift/filetests/filetests/isa/x64/shuffle-avx512.clif +++ b/cranelift/filetests/filetests/isa/x64/shuffle-avx512.clif @@ -12,9 +12,10 @@ block0(v0: i8x16, v1: i8x16): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movdqa %xmm0, %xmm9 +; movdqa %xmm0, %xmm6 ; load_const VCodeConstant(0), %xmm0 -; vpermi2b %xmm1, %xmm0, %xmm9 +; movdqa %xmm6, %xmm8 +; vpermi2b %xmm1, %xmm8, %xmm0, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -31,11 +32,12 @@ block0(v0: i8x16, v1: i8x16): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movdqa %xmm0, %xmm12 +; movdqa %xmm0, %xmm9 ; load_const VCodeConstant(1), %xmm0 -; load_const VCodeConstant(0), %xmm7 -; vpermi2b %xmm1, %xmm7, %xmm12 -; andps %xmm0, %xmm7, %xmm0 +; load_const VCodeConstant(0), %xmm8 +; movdqa %xmm9, %xmm11 +; vpermi2b %xmm1, %xmm11, %xmm8, %xmm8 +; andps %xmm0, %xmm8, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -49,9 +51,10 @@ block0(v0: i8x16, v1: i8x16): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movdqa %xmm0, %xmm9 +; movdqa %xmm0, %xmm6 ; load_const VCodeConstant(0), %xmm0 -; vpermi2b %xmm1, %xmm0, %xmm9 +; movdqa %xmm6, %xmm8 +; vpermi2b %xmm1, %xmm8, %xmm0, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/srem.clif b/cranelift/filetests/filetests/isa/x64/srem.clif index 99b137d566..fa7ee252fe 100644 --- a/cranelift/filetests/filetests/isa/x64/srem.clif +++ b/cranelift/filetests/filetests/isa/x64/srem.clif @@ -10,8 +10,9 @@ block0(v0: i8, v1: i8): ; pushq %rbp ; movq %rsp, %rbp ; block0: +; xorl %r11d, %r11d, %r11d ; movq %rdi, %rax -; movl $0, %edx +; movq %r11, %rdx ; srem_seq %al, %dl, %sil, %al, %dl, tmp=(none) ; shrq $8, %rax, %rax ; movq %rbp, %rsp @@ -27,8 +28,9 @@ block0(v0: i16, v1: i16): ; pushq %rbp ; movq %rsp, %rbp ; block0: +; xorl %r11d, %r11d, %r11d ; movq %rdi, %rax -; movl $0, %edx +; movq %r11, %rdx ; srem_seq %ax, %dx, %si, %ax, %dx, tmp=(none) ; movq %rdx, %rax ; movq %rbp, %rsp @@ -44,8 +46,9 @@ block0(v0: i32, v1: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: +; xorl %r11d, %r11d, %r11d ; movq %rdi, %rax -; movl $0, %edx +; movq %r11, %rdx ; srem_seq %eax, %edx, %esi, %eax, %edx, tmp=(none) ; movq %rdx, %rax ; movq %rbp, %rsp @@ -61,8 +64,9 @@ block0(v0: i64, v1: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: +; xorl %r11d, %r11d, %r11d ; movq %rdi, %rax -; movl $0, %edx +; movq %r11, %rdx ; srem_seq %rax, %rdx, %rsi, %rax, %rdx, tmp=(none) ; movq %rdx, %rax ; movq %rbp, %rsp diff --git a/cranelift/filetests/filetests/isa/x64/udiv.clif b/cranelift/filetests/filetests/isa/x64/udiv.clif index a49b5a027e..75efb2d9d7 100644 --- a/cranelift/filetests/filetests/isa/x64/udiv.clif +++ b/cranelift/filetests/filetests/isa/x64/udiv.clif @@ -10,9 +10,9 @@ block0(v0: i8, v1: i8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movq %rdi, %rax -; movzbl %al, %eax -; div %al, (none), %sil, %al, %dl +; movzbl %dil, %r10d +; movq %r10, %rax +; div %al, (none), %sil, %al, (none) ; movq %rbp, %rsp ; popq %rbp ; ret @@ -26,8 +26,9 @@ block0(v0: i16, v1: i16): ; pushq %rbp ; movq %rsp, %rbp ; block0: +; movl $0, %r11d ; movq %rdi, %rax -; movl $0, %edx +; movq %r11, %rdx ; div %ax, %dx, %si, %ax, %dx ; movq %rbp, %rsp ; popq %rbp @@ -42,8 +43,9 @@ block0(v0: i32, v1: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: +; movl $0, %r11d ; movq %rdi, %rax -; movl $0, %edx +; movq %r11, %rdx ; div %eax, %edx, %esi, %eax, %edx ; movq %rbp, %rsp ; popq %rbp @@ -58,8 +60,9 @@ block0(v0: i64, v1: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: +; movl $0, %r11d ; movq %rdi, %rax -; movl $0, %edx +; movq %r11, %rdx ; div %rax, %rdx, %rsi, %rax, %rdx ; movq %rbp, %rsp ; popq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/urem.clif b/cranelift/filetests/filetests/isa/x64/urem.clif index 5f4e80251f..dc21776f6a 100644 --- a/cranelift/filetests/filetests/isa/x64/urem.clif +++ b/cranelift/filetests/filetests/isa/x64/urem.clif @@ -10,9 +10,9 @@ block0(v0: i8, v1: i8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movq %rdi, %rax -; movzbl %al, %eax -; div %al, (none), %sil, %al, %dl +; movzbl %dil, %r10d +; movq %r10, %rax +; div %al, (none), %sil, %al, (none) ; shrq $8, %rax, %rax ; movq %rbp, %rsp ; popq %rbp @@ -27,8 +27,9 @@ block0(v0: i16, v1: i16): ; pushq %rbp ; movq %rsp, %rbp ; block0: +; movl $0, %r11d ; movq %rdi, %rax -; movl $0, %edx +; movq %r11, %rdx ; div %ax, %dx, %si, %ax, %dx ; movq %rdx, %rax ; movq %rbp, %rsp @@ -44,8 +45,9 @@ block0(v0: i32, v1: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: +; movl $0, %r11d ; movq %rdi, %rax -; movl $0, %edx +; movq %r11, %rdx ; div %eax, %edx, %esi, %eax, %edx ; movq %rdx, %rax ; movq %rbp, %rsp @@ -61,8 +63,9 @@ block0(v0: i64, v1: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: +; movl $0, %r11d ; movq %rdi, %rax -; movl $0, %edx +; movq %r11, %rdx ; div %rax, %rdx, %rsi, %rax, %rdx ; movq %rdx, %rax ; movq %rbp, %rsp