diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index ca9059f016..f7bd6a2fef 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -289,6 +289,26 @@ impl ToString for AluRmiROpcode { } } +#[derive(Clone, PartialEq)] +pub enum ReadOnlyGprRmROpcode { + /// Bit-scan reverse. + Bsr, +} + +impl fmt::Debug for ReadOnlyGprRmROpcode { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + match self { + ReadOnlyGprRmROpcode::Bsr => write!(fmt, "bsr"), + } + } +} + +impl ToString for ReadOnlyGprRmROpcode { + fn to_string(&self) -> String { + format!("{:?}", self) + } +} + pub(crate) enum InstructionSet { SSE, SSE2, diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 414f033194..f27bfa8b31 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -556,6 +556,40 @@ pub(crate) fn emit( } } + Inst::ReadOnly_Gpr_Rm_R { size, op, src, dst } => { + let (prefix, rex_flags) = match size { + 2 => (LegacyPrefix::_66, RexFlags::clear_w()), + 4 => (LegacyPrefix::None, RexFlags::clear_w()), + 8 => (LegacyPrefix::None, RexFlags::set_w()), + _ => unreachable!(), + }; + + let (opcode, num_opcodes) = match op { + ReadOnlyGprRmROpcode::Bsr => (0x0fbd, 2), + }; + + match src { + RegMem::Reg { reg: src } => emit_std_reg_reg( + sink, + prefix, + opcode, + num_opcodes, + dst.to_reg(), + *src, + rex_flags, + ), + RegMem::Mem { addr: src } => emit_std_reg_mem( + sink, + prefix, + opcode, + num_opcodes, + dst.to_reg(), + &src.finalize(state), + rex_flags, + ), + } + } + Inst::Div { size, signed, diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index b18115a814..da1aed2442 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -1154,6 +1154,20 @@ fn test_x64_emit() { "imull $76543210, %esi", )); + // ======================================================== + // ReadOnly_Gpr_Rm_R + + insns.push(( + Inst::read_only_gpr_rm_r(4, ReadOnlyGprRmROpcode::Bsr, RegMem::reg(rsi), w_rdi), + "0FBDFE", + "bsrl %esi, %edi", + )); + insns.push(( + Inst::read_only_gpr_rm_r(8, ReadOnlyGprRmROpcode::Bsr, RegMem::reg(r15), w_rax), + "490FBDC7", + "bsrq %r15, %rax", + )); + // ======================================================== // Div insns.push(( diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 327d35f828..d4b0e3eace 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -49,6 +49,14 @@ pub enum Inst { dst: Writable, }, + /// Instructions on GPR that only read src and defines dst (dst is not modified): bsr, etc. + ReadOnly_Gpr_Rm_R { + size: u8, // 2, 4 or 8 + op: ReadOnlyGprRmROpcode, + src: RegMem, + dst: Writable, + }, + /// Integer quotient and remainder: (div idiv) $rax $rdx (reg addr) Div { size: u8, // 1, 2, 4 or 8 @@ -295,6 +303,17 @@ impl Inst { } } + pub(crate) fn read_only_gpr_rm_r( + size: u8, + op: ReadOnlyGprRmROpcode, + src: RegMem, + dst: Writable, + ) -> Self { + debug_assert!(dst.to_reg().get_class() == RegClass::I64); + debug_assert!(size == 8 || size == 4 || size == 2); + Self::ReadOnly_Gpr_Rm_R { size, op, src, dst } + } + pub(crate) fn div(size: u8, signed: bool, divisor: RegMem, loc: SourceLoc) -> Inst { debug_assert!(size == 8 || size == 4 || size == 2 || size == 1); Inst::Div { @@ -357,6 +376,11 @@ impl Inst { Inst::MovZX_RM_R { ext_mode, src, dst } } + pub(crate) fn movsx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable) -> Inst { + debug_assert!(dst.to_reg().get_class() == RegClass::I64); + Inst::MovSX_RM_R { ext_mode, src, dst } + } + pub(crate) fn mov64_m_r(src: impl Into, dst: Writable) -> Inst { debug_assert!(dst.to_reg().get_class() == RegClass::I64); Inst::Mov64_M_R { @@ -373,11 +397,6 @@ impl Inst { } } - pub(crate) fn movsx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable) -> Inst { - debug_assert!(dst.to_reg().get_class() == RegClass::I64); - Inst::MovSX_RM_R { ext_mode, src, dst } - } - pub(crate) fn mov_r_m( size: u8, // 1, 2, 4 or 8 src: Reg, @@ -565,6 +584,7 @@ impl ShowWithRRU for Inst { match self { Inst::Nop { len } => format!("{} len={}", ljustify("nop".to_string()), len), + Inst::Alu_RMI_R { is_64, op, @@ -576,6 +596,14 @@ impl ShowWithRRU for Inst { src.show_rru_sized(mb_rru, sizeLQ(*is_64)), show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64)), ), + + Inst::ReadOnly_Gpr_Rm_R { src, dst, op, size } => format!( + "{} {}, {}", + ljustify2(op.to_string(), suffixBWLQ(*size)), + src.show_rru_sized(mb_rru, *size), + show_ireg_sized(dst.to_reg(), mb_rru, *size), + ), + Inst::Div { size, signed, @@ -830,7 +858,7 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { collector.add_use(regs::rax()); collector.add_mod(Writable::from_reg(regs::rdx())); } - Inst::XMM_Mov_RM_R { src, dst, .. } => { + Inst::ReadOnly_Gpr_Rm_R { src, dst, .. } | Inst::XMM_Mov_RM_R { src, dst, .. } => { src.get_regs_as_uses(collector); collector.add_def(*dst); } @@ -1010,10 +1038,9 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { match inst { // ** Nop Inst::Alu_RMI_R { - is_64: _, - op: _, ref mut src, ref mut dst, + .. } => { src.map_uses(mapper); map_mod(mapper, dst); @@ -1028,6 +1055,11 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { ref mut src, ref mut dst, .. + } + | Inst::ReadOnly_Gpr_Rm_R { + ref mut src, + ref mut dst, + .. } => { src.map_uses(mapper); map_def(mapper, dst); diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index a2034e754c..6a0abd5c7b 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -120,12 +120,51 @@ struct InsnOutput { output: usize, } -fn input_to_reg<'a>(ctx: Ctx<'a>, spec: InsnInput) -> Reg { +fn input_to_reg(ctx: Ctx, spec: InsnInput) -> Reg { let inputs = ctx.get_input(spec.insn, spec.input); ctx.use_input_reg(inputs); inputs.reg } +enum ExtSpec { + ZeroExtend32, + ZeroExtend64, + SignExtend32, + SignExtend64, +} + +fn extend_input_to_reg(ctx: Ctx, spec: InsnInput, ext_spec: ExtSpec) -> Reg { + let requested_size = match ext_spec { + ExtSpec::ZeroExtend32 | ExtSpec::SignExtend32 => 32, + ExtSpec::ZeroExtend64 | ExtSpec::SignExtend64 => 64, + }; + let input_size = ctx.input_ty(spec.insn, spec.input).bits(); + + let ext_mode = match (input_size, requested_size) { + (a, b) if a == b => return input_to_reg(ctx, spec), + (a, 32) if a == 1 || a == 8 => ExtMode::BL, + (a, 64) if a == 1 || a == 8 => ExtMode::BQ, + (16, 32) => ExtMode::WL, + (16, 64) => ExtMode::WQ, + (32, 64) => ExtMode::LQ, + _ => unreachable!(), + }; + + let requested_ty = if requested_size == 32 { I32 } else { I64 }; + + let src = input_to_reg_mem(ctx, spec); + let dst = ctx.alloc_tmp(RegClass::I64, requested_ty); + match ext_spec { + ExtSpec::ZeroExtend32 | ExtSpec::ZeroExtend64 => { + ctx.emit(Inst::movzx_rm_r(ext_mode, src, dst)) + } + ExtSpec::SignExtend32 | ExtSpec::SignExtend64 => { + ctx.emit(Inst::movsx_rm_r(ext_mode, src, dst)) + } + } + dst.to_reg() +} + fn input_to_reg_mem(ctx: Ctx, spec: InsnInput) -> RegMem { // TODO handle memory. RegMem::reg(input_to_reg(ctx, spec)) @@ -267,6 +306,60 @@ fn lower_insn_to_regs>( ctx.emit(Inst::shift_r(is_64, shift_kind, count, dst)); } + Opcode::Clz => { + // TODO when the x86 flags have use_lzcnt, we can use LZCNT. + + // General formula using bit-scan reverse (BSR): + // mov -1, %dst + // bsr %src, %tmp + // cmovz %dst, %tmp + // mov $(size_bits - 1), %dst + // sub %tmp, %dst + + let (ext_spec, ty) = match ctx.input_ty(insn, 0) { + I8 | I16 => (Some(ExtSpec::ZeroExtend32), I32), + a if a == I32 || a == I64 => (None, a), + _ => unreachable!(), + }; + + let src = if let Some(ext_spec) = ext_spec { + RegMem::reg(extend_input_to_reg(ctx, inputs[0], ext_spec)) + } else { + input_to_reg_mem(ctx, inputs[0]) + }; + let dst = output_to_reg(ctx, outputs[0]); + + let tmp = ctx.alloc_tmp(RegClass::I64, ty); + ctx.emit(Inst::imm_r(ty == I64, u64::max_value(), dst)); + + ctx.emit(Inst::read_only_gpr_rm_r( + ty.bytes() as u8, + ReadOnlyGprRmROpcode::Bsr, + src, + tmp, + )); + + ctx.emit(Inst::cmove( + ty.bytes() as u8, + CC::Z, + RegMem::reg(dst.to_reg()), + tmp, + )); + + ctx.emit(Inst::imm_r( + ty == I64, + ty.bits() as u64 - 1, + dst, + )); + + ctx.emit(Inst::alu_rmi_r( + ty == I64, + AluRmiROpcode::Sub, + RegMemImm::reg(tmp.to_reg()), + dst, + )); + } + Opcode::Uextend | Opcode::Sextend | Opcode::Bint @@ -636,7 +729,6 @@ fn lower_insn_to_regs>( }; let dst = output_to_reg(ctx, outputs[0]); let offset: i32 = offset.into(); - println!("stackslot_addr: {:?} @ off{}", stack_slot, offset); let inst = ctx .abi() .stackslot_addr(stack_slot, u32::try_from(offset).unwrap(), dst); @@ -919,35 +1011,14 @@ impl LowerBackend for X64Backend { assert!(jt_size <= u32::max_value() as usize); let jt_size = jt_size as u32; - let idx_size_bits = ctx.input_ty(branches[0], 0).bits(); - - // Zero-extend to 32-bits if needed. - // TODO consider factoring this out? - let idx = if idx_size_bits < 32 { - let ext_mode = match idx_size_bits { - 1 | 8 => ExtMode::BL, - 16 => ExtMode::WL, - _ => unreachable!(), - }; - let idx = input_to_reg_mem( - ctx, - InsnInput { - insn: branches[0], - input: 0, - }, - ); - let tmp_idx = ctx.alloc_tmp(RegClass::I64, I32); - ctx.emit(Inst::movzx_rm_r(ext_mode, idx, tmp_idx)); - tmp_idx.to_reg() - } else { - input_to_reg( - ctx, - InsnInput { - insn: branches[0], - input: 0, - }, - ) - }; + let idx = extend_input_to_reg( + ctx, + InsnInput { + insn: branches[0], + input: 0, + }, + ExtSpec::ZeroExtend32, + ); // Bounds-check (compute flags from idx - jt_size) and branch to default. ctx.emit(Inst::cmp_rmi_r(4, RegMemImm::imm(jt_size), idx));