machinst x64: implement cmov
This commit is contained in:
@@ -988,16 +988,7 @@ pub(crate) fn lower_icmp_or_ifcmp_to_flags<C: LowerCtx<I = Inst>>(
|
|||||||
(false, true) => NarrowValueMode::SignExtend64,
|
(false, true) => NarrowValueMode::SignExtend64,
|
||||||
(false, false) => NarrowValueMode::ZeroExtend64,
|
(false, false) => NarrowValueMode::ZeroExtend64,
|
||||||
};
|
};
|
||||||
let inputs = [
|
let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }];
|
||||||
InsnInput {
|
|
||||||
insn: insn,
|
|
||||||
input: 0,
|
|
||||||
},
|
|
||||||
InsnInput {
|
|
||||||
insn: insn,
|
|
||||||
input: 1,
|
|
||||||
},
|
|
||||||
];
|
|
||||||
let ty = ctx.input_ty(insn, 0);
|
let ty = ctx.input_ty(insn, 0);
|
||||||
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
|
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
|
||||||
let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode);
|
let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode);
|
||||||
@@ -1010,16 +1001,7 @@ pub(crate) fn lower_icmp_or_ifcmp_to_flags<C: LowerCtx<I = Inst>>(
|
|||||||
pub(crate) fn lower_fcmp_or_ffcmp_to_flags<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
|
pub(crate) fn lower_fcmp_or_ffcmp_to_flags<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
|
||||||
let ty = ctx.input_ty(insn, 0);
|
let ty = ctx.input_ty(insn, 0);
|
||||||
let bits = ty_bits(ty);
|
let bits = ty_bits(ty);
|
||||||
let inputs = [
|
let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }];
|
||||||
InsnInput {
|
|
||||||
insn: insn,
|
|
||||||
input: 0,
|
|
||||||
},
|
|
||||||
InsnInput {
|
|
||||||
insn: insn,
|
|
||||||
input: 1,
|
|
||||||
},
|
|
||||||
];
|
|
||||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||||
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||||
match bits {
|
match bits {
|
||||||
|
|||||||
@@ -849,6 +849,30 @@ pub(crate) fn emit(
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Inst::Cmove {
|
||||||
|
size,
|
||||||
|
cc,
|
||||||
|
src,
|
||||||
|
dst: reg_g,
|
||||||
|
} => {
|
||||||
|
let (prefix, rex_flags) = match size {
|
||||||
|
2 => (LegacyPrefix::_66, RexFlags::clear_w()),
|
||||||
|
4 => (LegacyPrefix::None, RexFlags::clear_w()),
|
||||||
|
8 => (LegacyPrefix::None, RexFlags::set_w()),
|
||||||
|
_ => unreachable!("invalid size spec for cmove"),
|
||||||
|
};
|
||||||
|
let opcode = 0x0F40 + cc.get_enc() as u32;
|
||||||
|
match src {
|
||||||
|
RegMem::Reg { reg: reg_e } => {
|
||||||
|
emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex_flags);
|
||||||
|
}
|
||||||
|
RegMem::Mem { addr } => {
|
||||||
|
let addr = &addr.finalize(state);
|
||||||
|
emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex_flags);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Inst::Push64 { src } => {
|
Inst::Push64 { src } => {
|
||||||
match src {
|
match src {
|
||||||
RegMemImm::Reg { reg } => {
|
RegMemImm::Reg { reg } => {
|
||||||
|
|||||||
@@ -2481,6 +2481,44 @@ fn test_x64_emit() {
|
|||||||
insns.push((Inst::setcc(CC::Z, w_r14), "410F94C6", "setz %r14b"));
|
insns.push((Inst::setcc(CC::Z, w_r14), "410F94C6", "setz %r14b"));
|
||||||
insns.push((Inst::setcc(CC::LE, w_r14), "410F9EC6", "setle %r14b"));
|
insns.push((Inst::setcc(CC::LE, w_r14), "410F9EC6", "setle %r14b"));
|
||||||
|
|
||||||
|
// ========================================================
|
||||||
|
// Cmove
|
||||||
|
insns.push((
|
||||||
|
Inst::cmove(2, CC::O, RegMem::reg(rdi), w_rsi),
|
||||||
|
"660F40F7",
|
||||||
|
"cmovow %di, %si",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::cmove(
|
||||||
|
2,
|
||||||
|
CC::NO,
|
||||||
|
RegMem::mem(Amode::imm_reg_reg_shift(37, rdi, rsi, 2)),
|
||||||
|
w_r15,
|
||||||
|
),
|
||||||
|
"66440F417CB725",
|
||||||
|
"cmovnow 37(%rdi,%rsi,4), %r15w",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::cmove(4, CC::LE, RegMem::reg(rdi), w_rsi),
|
||||||
|
"0F4EF7",
|
||||||
|
"cmovlel %edi, %esi",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::cmove(4, CC::NLE, RegMem::mem(Amode::imm_reg(0, r15)), w_rsi),
|
||||||
|
"410F4F37",
|
||||||
|
"cmovnlel 0(%r15), %esi",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::cmove(8, CC::Z, RegMem::reg(rdi), w_r14),
|
||||||
|
"4C0F44F7",
|
||||||
|
"cmovzq %rdi, %r14",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::cmove(8, CC::NZ, RegMem::mem(Amode::imm_reg(13, rdi)), w_r14),
|
||||||
|
"4C0F45770D",
|
||||||
|
"cmovnzq 13(%rdi), %r14",
|
||||||
|
));
|
||||||
|
|
||||||
// ========================================================
|
// ========================================================
|
||||||
// Push64
|
// Push64
|
||||||
insns.push((Inst::push64(RegMemImm::reg(rdi)), "57", "pushq %rdi"));
|
insns.push((Inst::push64(RegMemImm::reg(rdi)), "57", "pushq %rdi"));
|
||||||
|
|||||||
@@ -118,6 +118,16 @@ pub enum Inst {
|
|||||||
/// Materializes the requested condition code in the destination reg.
|
/// Materializes the requested condition code in the destination reg.
|
||||||
Setcc { cc: CC, dst: Writable<Reg> },
|
Setcc { cc: CC, dst: Writable<Reg> },
|
||||||
|
|
||||||
|
/// Integer conditional move.
|
||||||
|
/// Overwrites the destination register.
|
||||||
|
Cmove {
|
||||||
|
/// Possible values are 2, 4 or 8. Checked in the related factory.
|
||||||
|
size: u8,
|
||||||
|
cc: CC,
|
||||||
|
src: RegMem,
|
||||||
|
dst: Writable<Reg>,
|
||||||
|
},
|
||||||
|
|
||||||
// =====================================
|
// =====================================
|
||||||
// Stack manipulation.
|
// Stack manipulation.
|
||||||
/// pushq (reg addr imm)
|
/// pushq (reg addr imm)
|
||||||
@@ -350,6 +360,12 @@ impl Inst {
|
|||||||
Inst::Setcc { cc, dst }
|
Inst::Setcc { cc, dst }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn cmove(size: u8, cc: CC, src: RegMem, dst: Writable<Reg>) -> Inst {
|
||||||
|
debug_assert!(size == 8 || size == 4 || size == 2);
|
||||||
|
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
|
||||||
|
Inst::Cmove { size, cc, src, dst }
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn push64(src: RegMemImm) -> Inst {
|
pub(crate) fn push64(src: RegMemImm) -> Inst {
|
||||||
Inst::Push64 { src }
|
Inst::Push64 { src }
|
||||||
}
|
}
|
||||||
@@ -585,6 +601,12 @@ impl ShowWithRRU for Inst {
|
|||||||
ljustify2("set".to_string(), cc.to_string()),
|
ljustify2("set".to_string(), cc.to_string()),
|
||||||
show_ireg_sized(dst.to_reg(), mb_rru, 1)
|
show_ireg_sized(dst.to_reg(), mb_rru, 1)
|
||||||
),
|
),
|
||||||
|
Inst::Cmove { size, cc, src, dst } => format!(
|
||||||
|
"{} {}, {}",
|
||||||
|
ljustify(format!("cmov{}{}", cc.to_string(), suffixBWLQ(*size))),
|
||||||
|
src.show_rru_sized(mb_rru, *size),
|
||||||
|
show_ireg_sized(dst.to_reg(), mb_rru, *size)
|
||||||
|
),
|
||||||
Inst::Push64 { src } => {
|
Inst::Push64 { src } => {
|
||||||
format!("{} {}", ljustify("pushq".to_string()), src.show_rru(mb_rru))
|
format!("{} {}", ljustify("pushq".to_string()), src.show_rru(mb_rru))
|
||||||
}
|
}
|
||||||
@@ -701,6 +723,10 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
|||||||
Inst::Setcc { dst, .. } => {
|
Inst::Setcc { dst, .. } => {
|
||||||
collector.add_def(*dst);
|
collector.add_def(*dst);
|
||||||
}
|
}
|
||||||
|
Inst::Cmove { src, dst, .. } => {
|
||||||
|
src.get_regs_as_uses(collector);
|
||||||
|
collector.add_def(*dst);
|
||||||
|
}
|
||||||
Inst::Push64 { src } => {
|
Inst::Push64 { src } => {
|
||||||
src.get_regs_as_uses(collector);
|
src.get_regs_as_uses(collector);
|
||||||
collector.add_mod(Writable::from_reg(regs::rsp()));
|
collector.add_mod(Writable::from_reg(regs::rsp()));
|
||||||
@@ -899,6 +925,14 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
|||||||
map_use(mapper, dst);
|
map_use(mapper, dst);
|
||||||
}
|
}
|
||||||
Inst::Setcc { ref mut dst, .. } => map_def(mapper, dst),
|
Inst::Setcc { ref mut dst, .. } => map_def(mapper, dst),
|
||||||
|
Inst::Cmove {
|
||||||
|
ref mut src,
|
||||||
|
ref mut dst,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
|
src.map_uses(mapper);
|
||||||
|
map_def(mapper, dst)
|
||||||
|
}
|
||||||
Inst::Push64 { ref mut src } => src.map_uses(mapper),
|
Inst::Push64 { ref mut src } => src.map_uses(mapper),
|
||||||
Inst::Pop64 { ref mut dst } => {
|
Inst::Pop64 { ref mut dst } => {
|
||||||
map_def(mapper, dst);
|
map_def(mapper, dst);
|
||||||
|
|||||||
@@ -123,6 +123,11 @@ fn input_to_reg<'a>(ctx: Ctx<'a>, spec: InsnInput) -> Reg {
|
|||||||
inputs.reg
|
inputs.reg
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn input_to_reg_mem(ctx: Ctx, spec: InsnInput) -> RegMem {
|
||||||
|
// TODO handle memory.
|
||||||
|
RegMem::reg(input_to_reg(ctx, spec))
|
||||||
|
}
|
||||||
|
|
||||||
/// Try to use an immediate for constant inputs, and a register otherwise.
|
/// Try to use an immediate for constant inputs, and a register otherwise.
|
||||||
/// TODO: handle memory as well!
|
/// TODO: handle memory as well!
|
||||||
fn input_to_reg_mem_imm(ctx: Ctx, spec: InsnInput) -> RegMemImm {
|
fn input_to_reg_mem_imm(ctx: Ctx, spec: InsnInput) -> RegMemImm {
|
||||||
@@ -146,6 +151,20 @@ fn output_to_reg<'a>(ctx: Ctx<'a>, spec: InsnOutput) -> Writable<Reg> {
|
|||||||
ctx.get_output(spec.insn, spec.output)
|
ctx.get_output(spec.insn, spec.output)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn emit_cmp(ctx: Ctx, insn: IRInst) {
|
||||||
|
let ty = ctx.input_ty(insn, 0);
|
||||||
|
|
||||||
|
let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }];
|
||||||
|
|
||||||
|
// TODO Try to commute the operands (and invert the condition) if one is an immediate.
|
||||||
|
let lhs = input_to_reg(ctx, inputs[0]);
|
||||||
|
let rhs = input_to_reg_mem_imm(ctx, inputs[1]);
|
||||||
|
|
||||||
|
// Cranelift's icmp semantics want to compare lhs - rhs, while Intel gives
|
||||||
|
// us dst - src at the machine instruction level, so invert operands.
|
||||||
|
ctx.emit(Inst::cmp_rmi_r(ty.bytes() as u8, rhs, lhs));
|
||||||
|
}
|
||||||
|
|
||||||
//=============================================================================
|
//=============================================================================
|
||||||
// Top-level instruction lowering entry point, for one instruction.
|
// Top-level instruction lowering entry point, for one instruction.
|
||||||
|
|
||||||
@@ -269,18 +288,11 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) -> Codeg
|
|||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Icmp => {
|
Opcode::Icmp => {
|
||||||
|
emit_cmp(ctx, insn);
|
||||||
|
|
||||||
let condcode = inst_condcode(ctx.data(insn));
|
let condcode = inst_condcode(ctx.data(insn));
|
||||||
let cc = CC::from_intcc(condcode);
|
let cc = CC::from_intcc(condcode);
|
||||||
let ty = ctx.input_ty(insn, 0);
|
|
||||||
|
|
||||||
// TODO Try to commute the operands (and invert the condition) if one is an immediate.
|
|
||||||
let lhs = input_to_reg(ctx, inputs[0]);
|
|
||||||
let rhs = input_to_reg_mem_imm(ctx, inputs[1]);
|
|
||||||
let dst = output_to_reg(ctx, outputs[0]);
|
let dst = output_to_reg(ctx, outputs[0]);
|
||||||
|
|
||||||
// Cranelift's icmp semantics want to compare lhs - rhs, while Intel gives
|
|
||||||
// us dst - src at the machine instruction level, so invert operands.
|
|
||||||
ctx.emit(Inst::cmp_rmi_r(ty.bytes() as u8, rhs, lhs));
|
|
||||||
ctx.emit(Inst::setcc(cc, dst));
|
ctx.emit(Inst::setcc(cc, dst));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -603,6 +615,47 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) -> Codeg
|
|||||||
ctx.emit(inst);
|
ctx.emit(inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Opcode::Select | Opcode::Selectif => {
|
||||||
|
let cc = if op == Opcode::Select {
|
||||||
|
// The input is a boolean value, compare it against zero.
|
||||||
|
let size = ctx.input_ty(insn, 0).bytes() as u8;
|
||||||
|
let test = input_to_reg(ctx, inputs[0]);
|
||||||
|
ctx.emit(Inst::cmp_rmi_r(size, RegMemImm::imm(0), test));
|
||||||
|
|
||||||
|
CC::NZ
|
||||||
|
} else {
|
||||||
|
// Verification ensures that the input is always a single-def ifcmp.
|
||||||
|
let cmp_insn = ctx
|
||||||
|
.get_input(inputs[0].insn, inputs[0].input)
|
||||||
|
.inst
|
||||||
|
.unwrap()
|
||||||
|
.0;
|
||||||
|
debug_assert_eq!(ctx.data(cmp_insn).opcode(), Opcode::Ifcmp);
|
||||||
|
emit_cmp(ctx, cmp_insn);
|
||||||
|
|
||||||
|
CC::from_intcc(inst_condcode(ctx.data(insn)))
|
||||||
|
};
|
||||||
|
|
||||||
|
let lhs = input_to_reg_mem(ctx, inputs[1]);
|
||||||
|
let rhs = input_to_reg(ctx, inputs[2]);
|
||||||
|
let dst = output_to_reg(ctx, outputs[0]);
|
||||||
|
|
||||||
|
let ty = ctx.output_ty(insn, 0);
|
||||||
|
assert!(is_int_ty(ty), "float cmov NYI");
|
||||||
|
|
||||||
|
let size = ty.bytes() as u8;
|
||||||
|
if size == 1 {
|
||||||
|
// Sign-extend operands to 32, then do a cmove of size 4.
|
||||||
|
let lhs_se = ctx.alloc_tmp(RegClass::I64, I32);
|
||||||
|
ctx.emit(Inst::movsx_rm_r(ExtMode::BL, lhs, lhs_se));
|
||||||
|
ctx.emit(Inst::movsx_rm_r(ExtMode::BL, RegMem::reg(rhs), dst));
|
||||||
|
ctx.emit(Inst::cmove(4, cc, RegMem::reg(lhs_se.to_reg()), dst));
|
||||||
|
} else {
|
||||||
|
ctx.emit(Inst::gen_move(dst, rhs, ty));
|
||||||
|
ctx.emit(Inst::cmove(size, cc, lhs, dst));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Opcode::IaddImm
|
Opcode::IaddImm
|
||||||
| Opcode::ImulImm
|
| Opcode::ImulImm
|
||||||
| Opcode::UdivImm
|
| Opcode::UdivImm
|
||||||
|
|||||||
Reference in New Issue
Block a user