machinst x64: revamp integer immediate emission;

In particular:

- try to optimize the integer emission into a 32-bit emission, when the
high bits are all zero, and stop relying on the caller of `imm_r` to
ensure this.
- rename `Inst::imm_r`/`Inst::Imm_R` to `Inst::imm`/`Inst::Imm`.
- generate a sign-extending mov 32-bit immediate to 64-bits, whenever
possible.
- fix a few places where the previous commit did introduce the
generation of zero-constants with xor, when calling `put_input_to_reg`,
thus clobbering the flags before they were read.
This commit is contained in:
Benjamin Bouvier
2020-09-08 18:23:11 +02:00
parent d9052d0a9c
commit 3849dc18b1
5 changed files with 229 additions and 172 deletions

View File

@@ -1001,6 +1001,14 @@ pub enum OperandSize {
} }
impl OperandSize { impl OperandSize {
pub(crate) fn from_bytes(num_bytes: u32) -> Self {
match num_bytes {
1 | 2 | 4 => OperandSize::Size32,
8 => OperandSize::Size64,
_ => unreachable!(),
}
}
pub(crate) fn to_bytes(&self) -> u8 { pub(crate) fn to_bytes(&self) -> u8 {
match self { match self {
Self::Size32 => 4, Self::Size32 => 4,

View File

@@ -791,7 +791,11 @@ pub(crate) fn emit(
// x % -1 = 0; put the result into the destination, $rdx. // x % -1 = 0; put the result into the destination, $rdx.
let done_label = sink.get_label(); let done_label = sink.get_label();
let inst = Inst::imm_r(*size == 8, 0, Writable::from_reg(regs::rdx())); let inst = Inst::imm(
OperandSize::from_bytes(*size as u32),
0,
Writable::from_reg(regs::rdx()),
);
inst.emit(sink, flags, state); inst.emit(sink, flags, state);
let inst = Inst::jmp_known(BranchTarget::Label(done_label)); let inst = Inst::jmp_known(BranchTarget::Label(done_label));
@@ -803,7 +807,7 @@ pub(crate) fn emit(
if *size == 8 { if *size == 8 {
let tmp = tmp.expect("temporary for i64 sdiv"); let tmp = tmp.expect("temporary for i64 sdiv");
let inst = Inst::imm_r(true, 0x8000000000000000, tmp); let inst = Inst::imm(OperandSize::Size64, 0x8000000000000000, tmp);
inst.emit(sink, flags, state); inst.emit(sink, flags, state);
let inst = Inst::cmp_rmi_r(8, RegMemImm::reg(tmp.to_reg()), regs::rax()); let inst = Inst::cmp_rmi_r(8, RegMemImm::reg(tmp.to_reg()), regs::rax());
@@ -839,7 +843,7 @@ pub(crate) fn emit(
inst.emit(sink, flags, state); inst.emit(sink, flags, state);
} else { } else {
// zero for unsigned opcodes. // zero for unsigned opcodes.
let inst = Inst::imm_r(true /* is_64 */, 0, Writable::from_reg(regs::rdx())); let inst = Inst::imm(OperandSize::Size64, 0, Writable::from_reg(regs::rdx()));
inst.emit(sink, flags, state); inst.emit(sink, flags, state);
} }
@@ -854,18 +858,30 @@ pub(crate) fn emit(
} }
} }
Inst::Imm_R { Inst::Imm {
dst_is_64, dst_is_64,
simm64, simm64,
dst, dst,
} => { } => {
let enc_dst = int_reg_enc(dst.to_reg()); let enc_dst = int_reg_enc(dst.to_reg());
if *dst_is_64 { if *dst_is_64 {
// FIXME JRS 2020Feb10: also use the 32-bit case here when if low32_will_sign_extend_to_64(*simm64) {
// possible // Sign-extended move imm32.
sink.put1(0x48 | ((enc_dst >> 3) & 1)); emit_std_enc_enc(
sink.put1(0xB8 | (enc_dst & 7)); sink,
sink.put8(*simm64); LegacyPrefixes::None,
0xC7,
1,
/* subopcode */ 0,
enc_dst,
RexFlags::set_w(),
);
sink.put4(*simm64 as u32);
} else {
sink.put1(0x48 | ((enc_dst >> 3) & 1));
sink.put1(0xB8 | (enc_dst & 7));
sink.put8(*simm64);
}
} else { } else {
if ((enc_dst >> 3) & 1) == 1 { if ((enc_dst >> 3) & 1) == 1 {
sink.put1(0x41); sink.put1(0x41);
@@ -2223,10 +2239,10 @@ pub(crate) fn emit(
// Otherwise, put INT_MAX. // Otherwise, put INT_MAX.
if *dst_size == OperandSize::Size64 { if *dst_size == OperandSize::Size64 {
let inst = Inst::imm_r(true, 0x7fffffffffffffff, *dst); let inst = Inst::imm(OperandSize::Size64, 0x7fffffffffffffff, *dst);
inst.emit(sink, flags, state); inst.emit(sink, flags, state);
} else { } else {
let inst = Inst::imm_r(false, 0x7fffffff, *dst); let inst = Inst::imm(OperandSize::Size32, 0x7fffffff, *dst);
inst.emit(sink, flags, state); inst.emit(sink, flags, state);
} }
} else { } else {
@@ -2248,7 +2264,7 @@ pub(crate) fn emit(
match *src_size { match *src_size {
OperandSize::Size32 => { OperandSize::Size32 => {
let cst = Ieee32::pow2(output_bits - 1).neg().bits(); let cst = Ieee32::pow2(output_bits - 1).neg().bits();
let inst = Inst::imm32_r_unchecked(cst as u64, *tmp_gpr); let inst = Inst::imm(OperandSize::Size32, cst as u64, *tmp_gpr);
inst.emit(sink, flags, state); inst.emit(sink, flags, state);
} }
OperandSize::Size64 => { OperandSize::Size64 => {
@@ -2260,7 +2276,7 @@ pub(crate) fn emit(
} else { } else {
Ieee64::pow2(output_bits - 1).neg() Ieee64::pow2(output_bits - 1).neg()
}; };
let inst = Inst::imm_r(true, cst.bits(), *tmp_gpr); let inst = Inst::imm(OperandSize::Size64, cst.bits(), *tmp_gpr);
inst.emit(sink, flags, state); inst.emit(sink, flags, state);
} }
} }
@@ -2362,15 +2378,14 @@ pub(crate) fn emit(
let done = sink.get_label(); let done = sink.get_label();
if *src_size == OperandSize::Size64 { let cst = if *src_size == OperandSize::Size64 {
let cst = Ieee64::pow2(dst_size.to_bits() - 1).bits(); Ieee64::pow2(dst_size.to_bits() - 1).bits()
let inst = Inst::imm_r(true, cst, *tmp_gpr);
inst.emit(sink, flags, state);
} else { } else {
let cst = Ieee32::pow2(dst_size.to_bits() - 1).bits() as u64; Ieee32::pow2(dst_size.to_bits() - 1).bits() as u64
let inst = Inst::imm32_r_unchecked(cst, *tmp_gpr); };
inst.emit(sink, flags, state);
} let inst = Inst::imm(*src_size, cst, *tmp_gpr);
inst.emit(sink, flags, state);
let inst = let inst =
Inst::gpr_to_xmm(cast_op, RegMem::reg(tmp_gpr.to_reg()), *src_size, *tmp_xmm); Inst::gpr_to_xmm(cast_op, RegMem::reg(tmp_gpr.to_reg()), *src_size, *tmp_xmm);
@@ -2454,8 +2469,8 @@ pub(crate) fn emit(
if *is_saturating { if *is_saturating {
// The input was "large" (>= 2**(width -1)), so the only way to get an integer // The input was "large" (>= 2**(width -1)), so the only way to get an integer
// overflow is because the input was too large: saturate to the max value. // overflow is because the input was too large: saturate to the max value.
let inst = Inst::imm_r( let inst = Inst::imm(
true, OperandSize::Size64,
if *dst_size == OperandSize::Size64 { if *dst_size == OperandSize::Size64 {
u64::max_value() u64::max_value()
} else { } else {
@@ -2475,7 +2490,7 @@ pub(crate) fn emit(
sink.bind_label(next_is_large); sink.bind_label(next_is_large);
if *dst_size == OperandSize::Size64 { if *dst_size == OperandSize::Size64 {
let inst = Inst::imm_r(true, 1 << 63, *tmp_gpr); let inst = Inst::imm(OperandSize::Size64, 1 << 63, *tmp_gpr);
inst.emit(sink, flags, state); inst.emit(sink, flags, state);
let inst = Inst::alu_rmi_r( let inst = Inst::alu_rmi_r(

View File

@@ -1368,43 +1368,43 @@ fn test_x64_emit() {
// Imm_R // Imm_R
// //
insns.push(( insns.push((
Inst::imm_r(false, 1234567, w_r14), Inst::imm(OperandSize::Size32, 1234567, w_r14),
"41BE87D61200", "41BE87D61200",
"movl $1234567, %r14d", "movl $1234567, %r14d",
)); ));
insns.push(( insns.push((
Inst::imm_r(false, -126i64 as u64, w_r14), Inst::imm(OperandSize::Size32, -126i64 as u64, w_r14),
"41BE82FFFFFF", "41BE82FFFFFF",
"movl $-126, %r14d", "movl $-126, %r14d",
)); ));
insns.push(( insns.push((
Inst::imm_r(true, 1234567898765, w_r14), Inst::imm(OperandSize::Size64, 1234567898765, w_r14),
"49BE8D26FB711F010000", "49BE8D26FB711F010000",
"movabsq $1234567898765, %r14", "movabsq $1234567898765, %r14",
)); ));
insns.push(( insns.push((
Inst::imm_r(true, -126i64 as u64, w_r14), Inst::imm(OperandSize::Size64, -126i64 as u64, w_r14),
"49BE82FFFFFFFFFFFFFF", "49C7C682FFFFFF",
"movabsq $-126, %r14", "movabsq $-126, %r14",
)); ));
insns.push(( insns.push((
Inst::imm_r(false, 1234567, w_rcx), Inst::imm(OperandSize::Size32, 1234567, w_rcx),
"B987D61200", "B987D61200",
"movl $1234567, %ecx", "movl $1234567, %ecx",
)); ));
insns.push(( insns.push((
Inst::imm_r(false, -126i64 as u64, w_rcx), Inst::imm(OperandSize::Size32, -126i64 as u64, w_rcx),
"B982FFFFFF", "B982FFFFFF",
"movl $-126, %ecx", "movl $-126, %ecx",
)); ));
insns.push(( insns.push((
Inst::imm_r(true, 1234567898765, w_rsi), Inst::imm(OperandSize::Size64, 1234567898765, w_rsi),
"48BE8D26FB711F010000", "48BE8D26FB711F010000",
"movabsq $1234567898765, %rsi", "movabsq $1234567898765, %rsi",
)); ));
insns.push(( insns.push((
Inst::imm_r(true, -126i64 as u64, w_rbx), Inst::imm(OperandSize::Size64, -126i64 as u64, w_rbx),
"48BB82FFFFFFFFFFFFFF", "48C7C382FFFFFF",
"movabsq $-126, %rbx", "movabsq $-126, %rbx",
)); ));

View File

@@ -107,7 +107,7 @@ pub enum Inst {
/// Constant materialization: (imm32 imm64) reg. /// Constant materialization: (imm32 imm64) reg.
/// Either: movl $imm32, %reg32 or movabsq $imm64, %reg32. /// Either: movl $imm32, %reg32 or movabsq $imm64, %reg32.
Imm_R { Imm {
dst_is_64: bool, dst_is_64: bool,
simm64: u64, simm64: u64,
dst: Writable<Reg>, dst: Writable<Reg>,
@@ -579,31 +579,18 @@ impl Inst {
Inst::SignExtendData { size } Inst::SignExtendData { size }
} }
pub(crate) fn imm_r(dst_is_64: bool, simm64: u64, dst: Writable<Reg>) -> Inst { pub(crate) fn imm(size: OperandSize, simm64: u64, dst: Writable<Reg>) -> Inst {
debug_assert!(dst.to_reg().get_class() == RegClass::I64); debug_assert!(dst.to_reg().get_class() == RegClass::I64);
if !dst_is_64 { // Try to generate a 32-bit immediate when the upper high bits are zeroed (which matches
debug_assert!( // the semantics of movl).
low32_will_sign_extend_to_64(simm64), let dst_is_64 = size == OperandSize::Size64 && simm64 > u32::max_value() as u64;
"{} won't sign-extend to 64 bits!", Inst::Imm {
simm64
);
}
Inst::Imm_R {
dst_is_64, dst_is_64,
simm64, simm64,
dst, dst,
} }
} }
pub(crate) fn imm32_r_unchecked(simm64: u64, dst: Writable<Reg>) -> Inst {
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
Inst::Imm_R {
dst_is_64: false,
simm64,
dst,
}
}
pub(crate) fn mov_r_r(is_64: bool, src: Reg, dst: Writable<Reg>) -> Inst { pub(crate) fn mov_r_r(is_64: bool, src: Reg, dst: Writable<Reg>) -> Inst {
debug_assert!(src.get_class() == RegClass::I64); debug_assert!(src.get_class() == RegClass::I64);
debug_assert!(dst.to_reg().get_class() == RegClass::I64); debug_assert!(dst.to_reg().get_class() == RegClass::I64);
@@ -1424,7 +1411,7 @@ impl ShowWithRRU for Inst {
show_ireg_sized(dst.to_reg(), mb_rru, dst_size.to_bytes()), show_ireg_sized(dst.to_reg(), mb_rru, dst_size.to_bytes()),
), ),
Inst::Imm_R { Inst::Imm {
dst_is_64, dst_is_64,
simm64, simm64,
dst, dst,
@@ -1761,7 +1748,7 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
src.get_regs_as_uses(collector); src.get_regs_as_uses(collector);
collector.add_use(*dst); collector.add_use(*dst);
} }
Inst::Imm_R { dst, .. } => { Inst::Imm { dst, .. } => {
collector.add_def(*dst); collector.add_def(*dst);
} }
Inst::Mov_R_R { src, dst, .. } | Inst::XmmToGpr { src, dst, .. } => { Inst::Mov_R_R { src, dst, .. } | Inst::XmmToGpr { src, dst, .. } => {
@@ -2097,7 +2084,7 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
src.map_uses(mapper); src.map_uses(mapper);
map_use(mapper, dst); map_use(mapper, dst);
} }
Inst::Imm_R { ref mut dst, .. } => map_def(mapper, dst), Inst::Imm { ref mut dst, .. } => map_def(mapper, dst),
Inst::Mov_R_R { Inst::Mov_R_R {
ref mut src, ref mut src,
ref mut dst, ref mut dst,
@@ -2407,7 +2394,57 @@ impl MachInst for Inst {
mut alloc_tmp: F, mut alloc_tmp: F,
) -> SmallVec<[Self; 4]> { ) -> SmallVec<[Self; 4]> {
let mut ret = SmallVec::new(); let mut ret = SmallVec::new();
if ty.is_int() { if ty == types::F32 {
if value == 0 {
ret.push(Inst::xmm_rm_r(
SseOpcode::Xorps,
RegMem::reg(to_reg.to_reg()),
to_reg,
));
} else {
let tmp = alloc_tmp(RegClass::I64, types::I32);
ret.push(Inst::imm(OperandSize::Size32, value, tmp));
ret.push(Inst::gpr_to_xmm(
SseOpcode::Movd,
RegMem::reg(tmp.to_reg()),
OperandSize::Size32,
to_reg,
));
}
} else if ty == types::F64 {
if value == 0 {
ret.push(Inst::xmm_rm_r(
SseOpcode::Xorpd,
RegMem::reg(to_reg.to_reg()),
to_reg,
));
} else {
let tmp = alloc_tmp(RegClass::I64, types::I64);
ret.push(Inst::imm(OperandSize::Size64, value, tmp));
ret.push(Inst::gpr_to_xmm(
SseOpcode::Movq,
RegMem::reg(tmp.to_reg()),
OperandSize::Size64,
to_reg,
));
}
} else {
// Must be an integer type.
debug_assert!(
ty == types::B1
|| ty == types::I8
|| ty == types::B8
|| ty == types::I16
|| ty == types::B16
|| ty == types::I32
|| ty == types::B32
|| ty == types::I64
|| ty == types::B64
|| ty == types::R32
|| ty == types::R64
);
if value == 0 { if value == 0 {
ret.push(Inst::alu_rmi_r( ret.push(Inst::alu_rmi_r(
ty == types::I64, ty == types::I64,
@@ -2416,42 +2453,11 @@ impl MachInst for Inst {
to_reg, to_reg,
)); ));
} else { } else {
let is_64 = ty == types::I64 && value > 0x7fffffff; ret.push(Inst::imm(
ret.push(Inst::imm_r(is_64, value, to_reg)); OperandSize::from_bytes(ty.bytes()),
} value,
} else if value == 0 { to_reg,
ret.push(Inst::xmm_rm_r( ));
SseOpcode::Xorps,
RegMem::reg(to_reg.to_reg()),
to_reg,
));
} else {
match ty {
types::F32 => {
let tmp = alloc_tmp(RegClass::I64, types::I32);
ret.push(Inst::imm32_r_unchecked(value, tmp));
ret.push(Inst::gpr_to_xmm(
SseOpcode::Movd,
RegMem::reg(tmp.to_reg()),
OperandSize::Size32,
to_reg,
));
}
types::F64 => {
let tmp = alloc_tmp(RegClass::I64, types::I64);
ret.push(Inst::imm_r(true, value, tmp));
ret.push(Inst::gpr_to_xmm(
SseOpcode::Movq,
RegMem::reg(tmp.to_reg()),
OperandSize::Size64,
to_reg,
));
}
_ => panic!("unexpected type {:?} in gen_constant", ty),
} }
} }
ret ret

View File

@@ -29,7 +29,7 @@ type Ctx<'a> = &'a mut dyn LowerCtx<I = Inst>;
//============================================================================= //=============================================================================
// Helpers for instruction lowering. // Helpers for instruction lowering.
fn is_int_ty(ty: Type) -> bool { fn is_int_or_ref_ty(ty: Type) -> bool {
match ty { match ty {
types::I8 | types::I16 | types::I32 | types::I64 | types::R64 => true, types::I8 | types::I16 | types::I32 | types::I64 | types::R64 => true,
types::R32 => panic!("shouldn't have 32-bits refs on x64"), types::R32 => panic!("shouldn't have 32-bits refs on x64"),
@@ -207,6 +207,10 @@ fn input_to_reg_mem_imm(ctx: Ctx, spec: InsnInput) -> RegMemImm {
} }
} }
/// Emits an int comparison instruction.
///
/// Note: make sure that there are no instructions modifying the flags between a call to this
/// function and the use of the flags!
fn emit_cmp(ctx: Ctx, insn: IRInst) { fn emit_cmp(ctx: Ctx, insn: IRInst) {
let ty = ctx.input_ty(insn, 0); let ty = ctx.input_ty(insn, 0);
@@ -253,6 +257,10 @@ enum FcmpCondResult {
InvertedEqualOrConditions(CC, CC), InvertedEqualOrConditions(CC, CC),
} }
/// Emits a float comparison instruction.
///
/// Note: make sure that there are no instructions modifying the flags between a call to this
/// function and the use of the flags!
fn emit_fcmp(ctx: Ctx, insn: IRInst, mut cond_code: FloatCC, spec: FcmpSpec) -> FcmpCondResult { fn emit_fcmp(ctx: Ctx, insn: IRInst, mut cond_code: FloatCC, spec: FcmpSpec) -> FcmpCondResult {
let (flip_operands, inverted_equal) = match cond_code { let (flip_operands, inverted_equal) = match cond_code {
FloatCC::LessThan FloatCC::LessThan
@@ -485,12 +493,15 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
match op { match op {
Opcode::Iconst | Opcode::Bconst | Opcode::Null => { Opcode::Iconst | Opcode::Bconst | Opcode::Null => {
let w64 = ctx let value = ctx
.get_constant(insn) .get_constant(insn)
.expect("constant value for iconst et al"); .expect("constant value for iconst et al");
let dst_is_64 = w64 > 0x7fffffff;
let dst = get_output_reg(ctx, outputs[0]); let dst = get_output_reg(ctx, outputs[0]);
ctx.emit(Inst::imm_r(dst_is_64, w64, dst)); for inst in Inst::gen_constant(dst, value, ty.unwrap(), |reg_class, ty| {
ctx.alloc_tmp(reg_class, ty)
}) {
ctx.emit(inst);
}
} }
Opcode::Iadd Opcode::Iadd
@@ -704,7 +715,11 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let dst = get_output_reg(ctx, outputs[0]); let dst = get_output_reg(ctx, outputs[0]);
let tmp = ctx.alloc_tmp(RegClass::I64, ty); let tmp = ctx.alloc_tmp(RegClass::I64, ty);
ctx.emit(Inst::imm_r(ty == types::I64, u64::max_value(), dst)); ctx.emit(Inst::imm(
OperandSize::from_bytes(ty.bytes()),
u64::max_value(),
dst,
));
ctx.emit(Inst::unary_rm_r( ctx.emit(Inst::unary_rm_r(
ty.bytes() as u8, ty.bytes() as u8,
@@ -720,7 +735,11 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
tmp, tmp,
)); ));
ctx.emit(Inst::imm_r(ty == types::I64, ty.bits() as u64 - 1, dst)); ctx.emit(Inst::imm(
OperandSize::from_bytes(ty.bytes()),
ty.bits() as u64 - 1,
dst,
));
ctx.emit(Inst::alu_rmi_r( ctx.emit(Inst::alu_rmi_r(
ty == types::I64, ty == types::I64,
@@ -745,7 +764,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let dst = get_output_reg(ctx, outputs[0]); let dst = get_output_reg(ctx, outputs[0]);
let tmp = ctx.alloc_tmp(RegClass::I64, ty); let tmp = ctx.alloc_tmp(RegClass::I64, ty);
ctx.emit(Inst::imm_r(false /* 64 bits */, ty.bits() as u64, tmp)); ctx.emit(Inst::imm(OperandSize::Size32, ty.bits() as u64, tmp));
ctx.emit(Inst::unary_rm_r( ctx.emit(Inst::unary_rm_r(
ty.bytes() as u8, ty.bytes() as u8,
@@ -797,7 +816,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
)); ));
// mov 0x7777_7777_7777_7777, cst // mov 0x7777_7777_7777_7777, cst
ctx.emit(Inst::imm_r(is_64, 0x7777777777777777, cst)); ctx.emit(Inst::imm(OperandSize::Size64, 0x7777777777777777, cst));
// andq cst, tmp1 // andq cst, tmp1
ctx.emit(Inst::alu_rmi_r( ctx.emit(Inst::alu_rmi_r(
@@ -881,7 +900,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
)); ));
// mov $0x0F0F_0F0F_0F0F_0F0F, cst // mov $0x0F0F_0F0F_0F0F_0F0F, cst
ctx.emit(Inst::imm_r(is_64, 0x0F0F0F0F0F0F0F0F, cst)); ctx.emit(Inst::imm(OperandSize::Size64, 0x0F0F0F0F0F0F0F0F, cst));
// and cst, dst // and cst, dst
ctx.emit(Inst::alu_rmi_r( ctx.emit(Inst::alu_rmi_r(
@@ -892,7 +911,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
)); ));
// mov $0x0101_0101_0101_0101, cst // mov $0x0101_0101_0101_0101, cst
ctx.emit(Inst::imm_r(is_64, 0x0101010101010101, cst)); ctx.emit(Inst::imm(OperandSize::Size64, 0x0101010101010101, cst));
// mul cst, dst // mul cst, dst
ctx.emit(Inst::alu_rmi_r( ctx.emit(Inst::alu_rmi_r(
@@ -2160,23 +2179,17 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
if let Some(fcmp) = matches_input(ctx, flag_input, Opcode::Fcmp) { if let Some(fcmp) = matches_input(ctx, flag_input, Opcode::Fcmp) {
let cond_code = ctx.data(fcmp).fp_cond_code().unwrap(); let cond_code = ctx.data(fcmp).fp_cond_code().unwrap();
// we request inversion of Equal to NotEqual here: taking LHS if equal would mean // For equal, we flip the operands, because we can't test a conjunction of
// take it if both CC::NP and CC::Z are set, the conjunction of which can't be // CPU flags with a single cmove; see InvertedEqualOrConditions doc comment.
// modeled with a single cmov instruction. Instead, we'll swap LHS and RHS in the let (lhs_input, rhs_input) = match cond_code {
// select operation, and invert the equal to a not-equal here. FloatCC::Equal => (inputs[2], inputs[1]),
let fcmp_results = emit_fcmp(ctx, fcmp, cond_code, FcmpSpec::InvertEqual); _ => (inputs[1], inputs[2]),
let (lhs_input, rhs_input) = match fcmp_results {
FcmpCondResult::InvertedEqualOrConditions(_, _) => (inputs[2], inputs[1]),
FcmpCondResult::Condition(_)
| FcmpCondResult::AndConditions(_, _)
| FcmpCondResult::OrConditions(_, _) => (inputs[1], inputs[2]),
}; };
let ty = ctx.output_ty(insn, 0); let ty = ctx.output_ty(insn, 0);
let rhs = put_input_in_reg(ctx, rhs_input); let rhs = put_input_in_reg(ctx, rhs_input);
let dst = get_output_reg(ctx, outputs[0]); let dst = get_output_reg(ctx, outputs[0]);
let lhs = if is_int_ty(ty) && ty.bytes() < 4 { let lhs = if is_int_or_ref_ty(ty) && ty.bytes() < 4 {
// Special case: since the higher bits are undefined per CLIF semantics, we // Special case: since the higher bits are undefined per CLIF semantics, we
// can just apply a 32-bit cmove here. Force inputs into registers, to // can just apply a 32-bit cmove here. Force inputs into registers, to
// avoid partial spilling out-of-bounds with memory accesses, though. // avoid partial spilling out-of-bounds with memory accesses, though.
@@ -2186,11 +2199,22 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
input_to_reg_mem(ctx, lhs_input) input_to_reg_mem(ctx, lhs_input)
}; };
// We request inversion of Equal to NotEqual here: taking LHS if equal would mean
// take it if both CC::NP and CC::Z are set, the conjunction of which can't be
// modeled with a single cmov instruction. Instead, we'll swap LHS and RHS in the
// select operation, and invert the equal to a not-equal here.
let fcmp_results = emit_fcmp(ctx, fcmp, cond_code, FcmpSpec::InvertEqual);
if let FcmpCondResult::InvertedEqualOrConditions(_, _) = &fcmp_results {
// Keep this sync'd with the lowering of the select inputs above.
assert_eq!(cond_code, FloatCC::Equal);
}
ctx.emit(Inst::gen_move(dst, rhs, ty)); ctx.emit(Inst::gen_move(dst, rhs, ty));
match fcmp_results { match fcmp_results {
FcmpCondResult::Condition(cc) => { FcmpCondResult::Condition(cc) => {
if is_int_ty(ty) { if is_int_or_ref_ty(ty) {
let size = u8::max(ty.bytes() as u8, 4); let size = u8::max(ty.bytes() as u8, 4);
ctx.emit(Inst::cmove(size, cc, lhs, dst)); ctx.emit(Inst::cmove(size, cc, lhs, dst));
} else { } else {
@@ -2204,7 +2228,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
} }
FcmpCondResult::InvertedEqualOrConditions(cc1, cc2) FcmpCondResult::InvertedEqualOrConditions(cc1, cc2)
| FcmpCondResult::OrConditions(cc1, cc2) => { | FcmpCondResult::OrConditions(cc1, cc2) => {
if is_int_ty(ty) { if is_int_or_ref_ty(ty) {
let size = u8::max(ty.bytes() as u8, 4); let size = u8::max(ty.bytes() as u8, 4);
ctx.emit(Inst::cmove(size, cc1, lhs.clone(), dst)); ctx.emit(Inst::cmove(size, cc1, lhs.clone(), dst));
ctx.emit(Inst::cmove(size, cc2, lhs, dst)); ctx.emit(Inst::cmove(size, cc2, lhs, dst));
@@ -2215,27 +2239,11 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
} }
} }
} else { } else {
let cc = if let Some(icmp) = matches_input(ctx, flag_input, Opcode::Icmp) { let ty = ty.unwrap();
emit_cmp(ctx, icmp);
let cond_code = ctx.data(icmp).cond_code().unwrap();
CC::from_intcc(cond_code)
} else {
// The input is a boolean value, compare it against zero.
let size = ctx.input_ty(insn, 0).bytes() as u8;
let test = put_input_in_reg(ctx, inputs[0]);
ctx.emit(Inst::cmp_rmi_r(size, RegMemImm::imm(0), test));
CC::NZ
};
let rhs = put_input_in_reg(ctx, inputs[2]); let mut size = ty.bytes() as u8;
let dst = get_output_reg(ctx, outputs[0]); let lhs = if is_int_or_ref_ty(ty) {
let ty = ctx.output_ty(insn, 0); if size < 4 {
ctx.emit(Inst::gen_move(dst, rhs, ty));
if is_int_ty(ty) {
let mut size = ty.bytes() as u8;
let lhs = if size < 4 {
// Special case: since the higher bits are undefined per CLIF semantics, we // Special case: since the higher bits are undefined per CLIF semantics, we
// can just apply a 32-bit cmove here. Force inputs into registers, to // can just apply a 32-bit cmove here. Force inputs into registers, to
// avoid partial spilling out-of-bounds with memory accesses, though. // avoid partial spilling out-of-bounds with memory accesses, though.
@@ -2243,17 +2251,44 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
RegMem::reg(put_input_in_reg(ctx, inputs[1])) RegMem::reg(put_input_in_reg(ctx, inputs[1]))
} else { } else {
input_to_reg_mem(ctx, inputs[1]) input_to_reg_mem(ctx, inputs[1])
}; }
} else {
input_to_reg_mem(ctx, inputs[1])
};
let rhs = put_input_in_reg(ctx, inputs[2]);
let dst = get_output_reg(ctx, outputs[0]);
let cc = if let Some(icmp) = matches_input(ctx, flag_input, Opcode::Icmp) {
emit_cmp(ctx, icmp);
let cond_code = ctx.data(icmp).cond_code().unwrap();
CC::from_intcc(cond_code)
} else {
// The input is a boolean value, compare it against zero.
let size = ctx.input_ty(insn, 0).bytes() as u8;
let test = put_input_in_reg(ctx, flag_input);
ctx.emit(Inst::cmp_rmi_r(size, RegMemImm::imm(0), test));
CC::NZ
};
// This doesn't affect the flags.
ctx.emit(Inst::gen_move(dst, rhs, ty));
if is_int_or_ref_ty(ty) {
ctx.emit(Inst::cmove(size, cc, lhs, dst)); ctx.emit(Inst::cmove(size, cc, lhs, dst));
} else { } else {
debug_assert!(ty == types::F32 || ty == types::F64); debug_assert!(ty == types::F32 || ty == types::F64);
let lhs = input_to_reg_mem(ctx, inputs[1]);
ctx.emit(Inst::xmm_cmove(ty == types::F64, cc, lhs, dst)); ctx.emit(Inst::xmm_cmove(ty == types::F64, cc, lhs, dst));
} }
} }
} }
Opcode::Selectif | Opcode::SelectifSpectreGuard => { Opcode::Selectif | Opcode::SelectifSpectreGuard => {
let lhs = input_to_reg_mem(ctx, inputs[1]);
let rhs = put_input_in_reg(ctx, inputs[2]);
let dst = get_output_reg(ctx, outputs[0]);
let ty = ctx.output_ty(insn, 0);
// Verification ensures that the input is always a single-def ifcmp. // Verification ensures that the input is always a single-def ifcmp.
let cmp_insn = ctx let cmp_insn = ctx
.get_input(inputs[0].insn, inputs[0].input) .get_input(inputs[0].insn, inputs[0].input)
@@ -2265,13 +2300,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let cc = CC::from_intcc(ctx.data(insn).cond_code().unwrap()); let cc = CC::from_intcc(ctx.data(insn).cond_code().unwrap());
let lhs = input_to_reg_mem(ctx, inputs[1]); if is_int_or_ref_ty(ty) {
let rhs = put_input_in_reg(ctx, inputs[2]);
let dst = get_output_reg(ctx, outputs[0]);
let ty = ctx.output_ty(insn, 0);
if is_int_ty(ty) {
let size = ty.bytes() as u8; let size = ty.bytes() as u8;
if size == 1 { if size == 1 {
// Sign-extend operands to 32, then do a cmove of size 4. // Sign-extend operands to 32, then do a cmove of size 4.
@@ -2331,7 +2360,12 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
} else { } else {
None None
}; };
ctx.emit(Inst::imm_r(true, 0, Writable::from_reg(regs::rdx()))); // TODO use xor
ctx.emit(Inst::imm(
OperandSize::Size32,
0,
Writable::from_reg(regs::rdx()),
));
ctx.emit(Inst::checked_div_or_rem_seq( ctx.emit(Inst::checked_div_or_rem_seq(
kind, kind,
size, size,
@@ -2343,30 +2377,24 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let divisor = input_to_reg_mem(ctx, inputs[1]); let divisor = input_to_reg_mem(ctx, inputs[1]);
// Fill in the high parts: // Fill in the high parts:
if input_ty == types::I8 { if kind.is_signed() {
if kind.is_signed() { // sign-extend the sign-bit of al into ah for size 1, or rax into rdx, for
// sign-extend the sign-bit of al into ah, for signed opcodes. // signed opcodes.
ctx.emit(Inst::sign_extend_data(1)); ctx.emit(Inst::sign_extend_data(size));
} else { } else if input_ty == types::I8 {
ctx.emit(Inst::movzx_rm_r( ctx.emit(Inst::movzx_rm_r(
ExtMode::BL, ExtMode::BL,
RegMem::reg(regs::rax()), RegMem::reg(regs::rax()),
Writable::from_reg(regs::rax()), Writable::from_reg(regs::rax()),
/* infallible */ None, /* infallible */ None,
)); ));
}
} else { } else {
if kind.is_signed() { // zero for unsigned opcodes.
// sign-extend the sign-bit of rax into rdx, for signed opcodes. ctx.emit(Inst::imm(
ctx.emit(Inst::sign_extend_data(size)); OperandSize::Size64,
} else { 0,
// zero for unsigned opcodes. Writable::from_reg(regs::rdx()),
ctx.emit(Inst::imm_r( ));
true, /* is_64 */
0,
Writable::from_reg(regs::rdx()),
));
}
} }
// Emit the actual idiv. // Emit the actual idiv.
@@ -2565,7 +2593,7 @@ impl LowerBackend for X64Backend {
} }
FcmpCondResult::InvertedEqualOrConditions(_, _) => unreachable!(), FcmpCondResult::InvertedEqualOrConditions(_, _) => unreachable!(),
} }
} else if is_int_ty(src_ty) || is_bool_ty(src_ty) { } else if is_int_or_ref_ty(src_ty) || is_bool_ty(src_ty) {
let src = put_input_in_reg( let src = put_input_in_reg(
ctx, ctx,
InsnInput { InsnInput {
@@ -2588,7 +2616,7 @@ impl LowerBackend for X64Backend {
Opcode::BrIcmp => { Opcode::BrIcmp => {
let src_ty = ctx.input_ty(branches[0], 0); let src_ty = ctx.input_ty(branches[0], 0);
if is_int_ty(src_ty) || is_bool_ty(src_ty) { if is_int_or_ref_ty(src_ty) || is_bool_ty(src_ty) {
let lhs = put_input_in_reg( let lhs = put_input_in_reg(
ctx, ctx,
InsnInput { InsnInput {