Address review comments.

This commit is contained in:
Benjamin Bouvier
2020-08-21 12:40:47 +02:00
parent ee76e01efc
commit 7c85654285
4 changed files with 300 additions and 263 deletions

View File

@@ -43,7 +43,7 @@ pub fn has_side_effect(func: &Function, inst: Inst) -> bool {
/// Does the given instruction have any side-effect as per [has_side_effect], or else is a load,
/// but not the get_pinned_reg opcode?
pub fn has_side_effect_or_load_not_get_pinned_reg(func: &Function, inst: Inst) -> bool {
pub fn has_lowering_side_effect(func: &Function, inst: Inst) -> bool {
let op = func.dfg[inst].opcode();
op != Opcode::GetPinnedReg && (has_side_effect(func, inst) || op.can_load())
}

View File

@@ -360,6 +360,13 @@ pub enum Inst {
JmpKnown { dst: BranchTarget },
/// One-way conditional branch: jcond cond target.
///
/// This instruction is useful when we have conditional jumps depending on more than two
/// conditions, see for instance the lowering of Brz/brnz with Fcmp inputs.
///
/// A note of caution: in contexts where the branch target is another block, this has to be the
/// same successor as the one specified in the terminator branch of the current block.
/// Otherwise, this might confuse register allocation by creating new invisible edges.
JmpIf { cc: CC, taken: BranchTarget },
/// Two-way conditional branch: jcond cond target target.

View File

@@ -124,29 +124,37 @@ struct InsnOutput {
output: usize,
}
/// Returns whether the given specified `input` is a result produced by an instruction with Opcode
/// `op`.
// TODO investigate failures with checking against the result index.
fn matches_input<C: LowerCtx<I = Inst>>(
ctx: &mut C,
input: InsnInput,
op: Opcode,
) -> Option<IRInst> {
let inputs = ctx.get_input(input.insn, input.input);
if let Some((src_inst, _)) = inputs.inst {
inputs.inst.and_then(|(src_inst, _)| {
let data = ctx.data(src_inst);
if data.opcode() == op {
return Some(src_inst);
}
}
None
None
})
}
fn lowerinput_to_reg(ctx: Ctx, input: LowerInput) -> Reg {
ctx.use_input_reg(input);
input.reg
}
/// Put the given input into a register, and mark it as used (side-effect).
fn input_to_reg(ctx: Ctx, spec: InsnInput) -> Reg {
let inputs = ctx.get_input(spec.insn, spec.input);
ctx.use_input_reg(inputs);
inputs.reg
let input = ctx.get_input(spec.insn, spec.input);
lowerinput_to_reg(ctx, input)
}
/// An extension specification for `extend_input_to_reg`.
#[derive(Clone, Copy)]
enum ExtSpec {
ZeroExtendTo32,
ZeroExtendTo64,
@@ -163,6 +171,12 @@ fn extend_input_to_reg(ctx: Ctx, spec: InsnInput, ext_spec: ExtSpec) -> Reg {
};
let input_size = ctx.input_ty(spec.insn, spec.input).bits();
let requested_ty = if requested_size == 32 {
types::I32
} else {
types::I64
};
let ext_mode = match (input_size, requested_size) {
(a, b) if a == b => return input_to_reg(ctx, spec),
(a, 32) if a == 1 || a == 8 => ExtMode::BL,
@@ -173,12 +187,6 @@ fn extend_input_to_reg(ctx: Ctx, spec: InsnInput, ext_spec: ExtSpec) -> Reg {
_ => unreachable!(),
};
let requested_ty = if requested_size == 32 {
types::I32
} else {
types::I64
};
let src = input_to_reg_mem(ctx, spec);
let dst = ctx.alloc_tmp(RegClass::I64, requested_ty);
match ext_spec {
@@ -196,21 +204,26 @@ fn extend_input_to_reg(ctx: Ctx, spec: InsnInput, ext_spec: ExtSpec) -> Reg {
dst.to_reg()
}
fn lowerinput_to_reg_mem(ctx: Ctx, input: LowerInput) -> RegMem {
// TODO handle memory.
RegMem::reg(lowerinput_to_reg(ctx, input))
}
/// Put the given input into a register or a memory operand.
/// Effectful: may mark the given input as used, when returning the register form.
fn input_to_reg_mem(ctx: Ctx, spec: InsnInput) -> RegMem {
// TODO handle memory.
RegMem::reg(input_to_reg(ctx, spec))
let input = ctx.get_input(spec.insn, spec.input);
lowerinput_to_reg_mem(ctx, input)
}
/// Returns whether the given input is an immediate that can be properly sign-extended, without any
/// possible side-effect.
fn input_to_sext_imm(ctx: Ctx, spec: InsnInput) -> Option<u32> {
ctx.get_input(spec.insn, spec.input).constant.and_then(|x| {
fn lowerinput_to_sext_imm(input: LowerInput, input_ty: Type) -> Option<u32> {
input.constant.and_then(|x| {
// For i64 instructions (prefixed with REX.W), require that the immediate will sign-extend
// to 64 bits. For other sizes, it doesn't matter and we can just use the plain
// constant.
if ctx.input_ty(spec.insn, spec.input).bytes() != 8 || low32_will_sign_extend_to_64(x) {
if input_ty.bytes() != 8 || low32_will_sign_extend_to_64(x) {
Some(x as u32)
} else {
None
@@ -218,6 +231,12 @@ fn input_to_sext_imm(ctx: Ctx, spec: InsnInput) -> Option<u32> {
})
}
fn input_to_sext_imm(ctx: Ctx, spec: InsnInput) -> Option<u32> {
let input = ctx.get_input(spec.insn, spec.input);
let input_ty = ctx.input_ty(spec.insn, spec.input);
lowerinput_to_sext_imm(input, input_ty)
}
fn input_to_imm(ctx: Ctx, spec: InsnInput) -> Option<u64> {
ctx.get_input(spec.insn, spec.input).constant
}
@@ -225,9 +244,11 @@ fn input_to_imm(ctx: Ctx, spec: InsnInput) -> Option<u64> {
/// Put the given input into an immediate, a register or a memory operand.
/// Effectful: may mark the given input as used, when returning the register form.
fn input_to_reg_mem_imm(ctx: Ctx, spec: InsnInput) -> RegMemImm {
match input_to_sext_imm(ctx, spec) {
let input = ctx.get_input(spec.insn, spec.input);
let input_ty = ctx.input_ty(spec.insn, spec.input);
match lowerinput_to_sext_imm(input, input_ty) {
Some(x) => RegMemImm::imm(x),
None => match input_to_reg_mem(ctx, spec) {
None => match lowerinput_to_reg_mem(ctx, input) {
RegMem::Reg { reg } => RegMemImm::reg(reg),
RegMem::Mem { addr } => RegMemImm::mem(addr),
},
@@ -252,34 +273,88 @@ fn emit_cmp(ctx: Ctx, insn: IRInst) {
ctx.emit(Inst::cmp_rmi_r(ty.bytes() as u8, rhs, lhs));
}
#[derive(PartialEq)]
enum FcmpOperands {
Swap,
DontSwap,
/// A specification for a fcmp emission.
enum FcmpSpec {
/// Normal flow.
Normal,
/// Avoid emitting Equal at all costs by inverting it to NotEqual, and indicate when that
/// happens with `InvertedEqualOrConditions`.
///
/// This is useful in contexts where it is hard/inefficient to produce a single instruction (or
/// sequence of instructions) that check for an "AND" combination of condition codes; see for
/// instance lowering of Select.
InvertEqual,
}
fn emit_fcmp(ctx: Ctx, insn: IRInst, swap_operands: FcmpOperands) {
/// This explains how to interpret the results of an fcmp instruction.
enum FcmpCondResult {
/// The given condition code must be set.
Condition(CC),
/// Both condition codes must be set.
AndConditions(CC, CC),
/// Either of the conditions codes must be set.
OrConditions(CC, CC),
/// The associated spec was set to `FcmpSpec::InvertEqual` and Equal has been inverted. Either
/// of the condition codes must be set, and the user must invert meaning of analyzing the
/// condition code results. When the spec is set to `FcmpSpec::Normal`, then this case can't be
/// reached.
InvertedEqualOrConditions(CC, CC),
}
fn emit_fcmp(ctx: Ctx, insn: IRInst, mut cond_code: FloatCC, spec: FcmpSpec) -> FcmpCondResult {
let (flip_operands, inverted_equal) = match cond_code {
FloatCC::LessThan
| FloatCC::LessThanOrEqual
| FloatCC::UnorderedOrGreaterThan
| FloatCC::UnorderedOrGreaterThanOrEqual => {
cond_code = cond_code.reverse();
(true, false)
}
FloatCC::Equal => {
let inverted_equal = match spec {
FcmpSpec::Normal => false,
FcmpSpec::InvertEqual => {
cond_code = FloatCC::NotEqual; // same as .inverse()
true
}
};
(false, inverted_equal)
}
_ => (false, false),
};
// The only valid CC constructed with `from_floatcc` can be put in the flag
// register with a direct float comparison; do this here.
let input_ty = ctx.input_ty(insn, 0);
let op = match input_ty {
let op = match ctx.input_ty(insn, 0) {
types::F32 => SseOpcode::Ucomiss,
types::F64 => SseOpcode::Ucomisd,
_ => panic!("Bad input type to Fcmp"),
};
let inputs = &[InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }];
let (lhs, rhs) = if swap_operands == FcmpOperands::Swap {
(
input_to_reg(ctx, inputs[1]),
input_to_reg_mem(ctx, inputs[0]),
)
let (lhs_input, rhs_input) = if flip_operands {
(inputs[1], inputs[0])
} else {
(
input_to_reg(ctx, inputs[0]),
input_to_reg_mem(ctx, inputs[1]),
)
(inputs[0], inputs[1])
};
let lhs = input_to_reg(ctx, lhs_input);
let rhs = input_to_reg_mem(ctx, rhs_input);
ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs));
let cond_result = match cond_code {
FloatCC::Equal => FcmpCondResult::AndConditions(CC::NP, CC::Z),
FloatCC::NotEqual if inverted_equal => {
FcmpCondResult::InvertedEqualOrConditions(CC::P, CC::NZ)
}
FloatCC::NotEqual if !inverted_equal => FcmpCondResult::OrConditions(CC::P, CC::NZ),
_ => FcmpCondResult::Condition(CC::from_floatcc(cond_code)),
};
cond_result
}
fn make_libcall_sig(ctx: Ctx, insn: IRInst, call_conv: CallConv, ptr_ty: Type) -> Signature {
@@ -350,33 +425,31 @@ fn emit_vm_call<C: LowerCtx<I = Inst>>(
/// Returns whether the given input is a shift by a constant value less or equal than 3.
/// The goal is to embed it within an address mode.
fn matches_small_cst_shift<C: LowerCtx<I = Inst>>(
fn matches_small_constant_shift<C: LowerCtx<I = Inst>>(
ctx: &mut C,
spec: InsnInput,
) -> Option<(InsnInput, u8)> {
if let Some(shift) = matches_input(ctx, spec, Opcode::Ishl) {
if let Some(shift_amt) = input_to_imm(
matches_input(ctx, spec, Opcode::Ishl).and_then(|shift| {
match input_to_imm(
ctx,
InsnInput {
insn: shift,
input: 1,
},
) {
if shift_amt <= 3 {
return Some((
InsnInput {
insn: shift,
input: 0,
},
shift_amt as u8,
));
}
Some(shift_amt) if shift_amt <= 3 => Some((
InsnInput {
insn: shift,
input: 0,
},
shift_amt as u8,
)),
_ => None,
}
}
None
})
}
fn lower_amode<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput, offset: u32) -> Amode {
fn lower_to_amode<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput, offset: u32) -> Amode {
// We now either have an add that we must materialize, or some other input; as well as the
// final offset.
if let Some(add) = matches_input(ctx, spec, Opcode::Iadd) {
@@ -394,14 +467,16 @@ fn lower_amode<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput, offset: u32)
// TODO heap_addr legalization generates a uext64 *after* the shift, so these optimizations
// aren't happening in the wasm case. We could do better, given some range analysis.
let (base, index, shift) = if let Some((shift_input, shift_amt)) =
matches_small_cst_shift(ctx, add_inputs[0])
matches_small_constant_shift(ctx, add_inputs[0])
{
(
input_to_reg(ctx, add_inputs[1]),
input_to_reg(ctx, shift_input),
shift_amt,
)
} else if let Some((shift_input, shift_amt)) = matches_small_cst_shift(ctx, add_inputs[1]) {
} else if let Some((shift_input, shift_amt)) =
matches_small_constant_shift(ctx, add_inputs[1])
{
(
input_to_reg(ctx, add_inputs[0]),
input_to_reg(ctx, shift_input),
@@ -1027,15 +1102,9 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
Opcode::Fcmp => {
let condcode = inst_fp_condcode(ctx.data(insn));
let cond_code = inst_fp_condcode(ctx.data(insn));
let input_ty = ctx.input_ty(insn, 0);
if !input_ty.is_vector() {
let op = match input_ty {
types::F32 => SseOpcode::Ucomiss,
types::F64 => SseOpcode::Ucomisd,
_ => panic!("Bad input type to fcmp: {}", input_ty),
};
// Unordered is returned by setting ZF, PF, CF <- 111
// Greater than by ZF, PF, CF <- 000
// Less than by ZF, PF, CF <- 001
@@ -1051,71 +1120,35 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// set, then both the ZF and CF flag bits must also be set we can get away with using
// one setcc for most condition codes.
match condcode {
FloatCC::LessThan
| FloatCC::LessThanOrEqual
| FloatCC::UnorderedOrGreaterThan
| FloatCC::UnorderedOrGreaterThanOrEqual => {
// setb and setbe for ordered LessThan and LessThanOrEqual check if CF = 1
// which doesn't exclude unorderdness. To get around this we can reverse the
// operands and the cc test to instead check if CF and ZF are 0 which would
// also excludes unorderedness. Using similiar logic we also reverse
// UnorderedOrGreaterThan and UnorderedOrGreaterThanOrEqual and assure that ZF
// or CF is 1 to exclude orderedness.
let lhs = input_to_reg_mem(ctx, inputs[0]);
let rhs = input_to_reg(ctx, inputs[1]);
let dst = output_to_reg(ctx, outputs[0]);
ctx.emit(Inst::xmm_cmp_rm_r(op, lhs, rhs));
let condcode = condcode.reverse();
let cc = CC::from_floatcc(condcode);
let dst = output_to_reg(ctx, outputs[0]);
match emit_fcmp(ctx, insn, cond_code, FcmpSpec::Normal) {
FcmpCondResult::Condition(cc) => {
ctx.emit(Inst::setcc(cc, dst));
}
FloatCC::Equal => {
// Outlier case: equal means both the operands are ordered and equal; we cannot
// get around checking the parity bit to determine if the result was ordered.
let lhs = input_to_reg(ctx, inputs[0]);
let rhs = input_to_reg_mem(ctx, inputs[1]);
let dst = output_to_reg(ctx, outputs[0]);
let tmp_gpr1 = ctx.alloc_tmp(RegClass::I64, types::I32);
ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs));
ctx.emit(Inst::setcc(CC::NP, tmp_gpr1));
ctx.emit(Inst::setcc(CC::Z, dst));
FcmpCondResult::AndConditions(cc1, cc2) => {
let tmp = ctx.alloc_tmp(RegClass::I64, types::I32);
ctx.emit(Inst::setcc(cc1, tmp));
ctx.emit(Inst::setcc(cc2, dst));
ctx.emit(Inst::alu_rmi_r(
false,
AluRmiROpcode::And,
RegMemImm::reg(tmp_gpr1.to_reg()),
RegMemImm::reg(tmp.to_reg()),
dst,
));
}
FloatCC::NotEqual => {
// Outlier case: not equal means either the operands are unordered, or they're
// not the same value.
let lhs = input_to_reg(ctx, inputs[0]);
let rhs = input_to_reg_mem(ctx, inputs[1]);
let dst = output_to_reg(ctx, outputs[0]);
let tmp_gpr1 = ctx.alloc_tmp(RegClass::I64, types::I32);
ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs));
ctx.emit(Inst::setcc(CC::P, tmp_gpr1));
ctx.emit(Inst::setcc(CC::NZ, dst));
FcmpCondResult::OrConditions(cc1, cc2) => {
let tmp = ctx.alloc_tmp(RegClass::I64, types::I32);
ctx.emit(Inst::setcc(cc1, tmp));
ctx.emit(Inst::setcc(cc2, dst));
ctx.emit(Inst::alu_rmi_r(
false,
AluRmiROpcode::Or,
RegMemImm::reg(tmp_gpr1.to_reg()),
RegMemImm::reg(tmp.to_reg()),
dst,
));
}
_ => {
// For all remaining condition codes we can handle things with one check.
let lhs = input_to_reg(ctx, inputs[0]);
let rhs = input_to_reg_mem(ctx, inputs[1]);
let dst = output_to_reg(ctx, outputs[0]);
let cc = CC::from_floatcc(condcode);
ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs));
ctx.emit(Inst::setcc(cc, dst));
}
FcmpCondResult::InvertedEqualOrConditions(_, _) => unreachable!(),
}
} else {
let op = match input_ty {
@@ -1126,7 +1159,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// Since some packed comparisons are not available, some of the condition codes
// must be inverted, with a corresponding `flip` of the operands.
let (imm, flip) = match condcode {
let (imm, flip) = match cond_code {
FloatCC::GreaterThan => (FcmpImm::LessThan, true),
FloatCC::GreaterThanOrEqual => (FcmpImm::LessThanOrEqual, true),
FloatCC::UnorderedOrLessThan => (FcmpImm::UnorderedOrGreaterThan, true),
@@ -1134,9 +1167,9 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
(FcmpImm::UnorderedOrGreaterThanOrEqual, true)
}
FloatCC::OrderedNotEqual | FloatCC::UnorderedOrEqual => {
panic!("unsupported float condition code: {}", condcode)
panic!("unsupported float condition code: {}", cond_code)
}
_ => (FcmpImm::from(condcode), false),
_ => (FcmpImm::from(cond_code), false),
};
// Determine the operands of the comparison, possibly by flipping them.
@@ -1225,35 +1258,77 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let srcloc = ctx.srcloc(insn);
let trap_code = inst_trapcode(ctx.data(insn)).unwrap();
let cc = if matches_input(ctx, inputs[0], Opcode::IaddIfcout).is_some() {
let condcode = inst_condcode(ctx.data(insn));
if matches_input(ctx, inputs[0], Opcode::IaddIfcout).is_some() {
let cond_code = inst_condcode(ctx.data(insn));
// The flags must not have been clobbered by any other instruction between the
// iadd_ifcout and this instruction, as verified by the CLIF validator; so we can
// simply use the flags here.
CC::from_intcc(condcode)
let cc = CC::from_intcc(cond_code);
ctx.emit_safepoint(Inst::TrapIf {
trap_code,
srcloc,
cc,
});
} else if op == Opcode::Trapif {
let condcode = inst_condcode(ctx.data(insn));
let cc = CC::from_intcc(condcode);
let cond_code = inst_condcode(ctx.data(insn));
let cc = CC::from_intcc(cond_code);
// Verification ensures that the input is always a single-def ifcmp.
let ifcmp_insn = matches_input(ctx, inputs[0], Opcode::Ifcmp).unwrap();
emit_cmp(ctx, ifcmp_insn);
cc
let ifcmp = matches_input(ctx, inputs[0], Opcode::Ifcmp).unwrap();
emit_cmp(ctx, ifcmp);
ctx.emit_safepoint(Inst::TrapIf {
trap_code,
srcloc,
cc,
});
} else {
let condcode = inst_fp_condcode(ctx.data(insn));
let cc = CC::from_floatcc(condcode);
let cond_code = inst_fp_condcode(ctx.data(insn));
// Verification ensures that the input is always a single-def ffcmp.
let ffcmp_insn = matches_input(ctx, inputs[0], Opcode::Ffcmp).unwrap();
emit_fcmp(ctx, ffcmp_insn, FcmpOperands::DontSwap);
cc
};
let ffcmp = matches_input(ctx, inputs[0], Opcode::Ffcmp).unwrap();
ctx.emit_safepoint(Inst::TrapIf {
trap_code,
srcloc,
cc,
});
match emit_fcmp(ctx, ffcmp, cond_code, FcmpSpec::Normal) {
FcmpCondResult::Condition(cc) => ctx.emit_safepoint(Inst::TrapIf {
trap_code,
srcloc,
cc,
}),
FcmpCondResult::AndConditions(cc1, cc2) => {
// A bit unfortunate, but materialize the flags in their own register, and
// check against this.
let tmp = ctx.alloc_tmp(RegClass::I64, types::I32);
let tmp2 = ctx.alloc_tmp(RegClass::I64, types::I32);
ctx.emit(Inst::setcc(cc1, tmp));
ctx.emit(Inst::setcc(cc2, tmp2));
ctx.emit(Inst::alu_rmi_r(
false, /* is_64 */
AluRmiROpcode::And,
RegMemImm::reg(tmp.to_reg()),
tmp2,
));
ctx.emit_safepoint(Inst::TrapIf {
trap_code,
srcloc,
cc: CC::NZ,
});
}
FcmpCondResult::OrConditions(cc1, cc2) => {
ctx.emit_safepoint(Inst::TrapIf {
trap_code,
srcloc,
cc: cc1,
});
ctx.emit_safepoint(Inst::TrapIf {
trap_code,
srcloc,
cc: cc2,
});
}
FcmpCondResult::InvertedEqualOrConditions(_, _) => unreachable!(),
};
};
}
Opcode::F64const => {
@@ -1751,7 +1826,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::Uload32
| Opcode::Sload32 => {
assert_eq!(inputs.len(), 1, "only one input for load operands");
lower_amode(ctx, inputs[0], offset as u32)
lower_to_amode(ctx, inputs[0], offset as u32)
}
Opcode::LoadComplex
@@ -1842,7 +1917,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let addr = match op {
Opcode::Store | Opcode::Istore8 | Opcode::Istore16 | Opcode::Istore32 => {
assert_eq!(inputs.len(), 2, "only one input for store memory operands");
lower_amode(ctx, inputs[1], offset as u32)
lower_to_amode(ctx, inputs[1], offset as u32)
}
Opcode::StoreComplex
@@ -1899,11 +1974,13 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
} else {
None
};
// Make sure that both args are in virtual regs, since in effect we have to do a
// parallel copy to get them safely to the AtomicRmwSeq input regs, and that's not
// guaranteed safe if either is in a real reg.
addr = ctx.ensure_in_vreg(addr, types::I64);
arg2 = ctx.ensure_in_vreg(arg2, types::I64);
// Move the args to the preordained AtomicRMW input regs. Note that `AtomicRmwSeq`
// operates at whatever width is specified by `ty`, so there's no need to
// zero-extend `arg2` in the case of `ty` being I8/I16/I32.
@@ -1917,6 +1994,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
arg2,
types::I64,
));
// Now the AtomicRmwSeq (pseudo-) instruction itself
let op = inst_common::AtomicRmwOp::from(inst_atomic_rmw_op(ctx.data(insn)).unwrap());
ctx.emit(Inst::AtomicRmwSeq {
@@ -1924,6 +2002,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
op,
srcloc,
});
// And finally, copy the preordained AtomicRmwSeq output reg to its destination.
ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64));
}
@@ -1932,7 +2011,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// This is very similar to, but not identical to, the `AtomicRmw` case. As with
// `AtomicRmw`, there's no need to zero-extend narrow values here.
let dst = output_to_reg(ctx, outputs[0]);
let addr = input_to_reg(ctx, inputs[0]);
let addr = lower_to_amode(ctx, inputs[0], 0);
let expected = input_to_reg(ctx, inputs[1]);
let replacement = input_to_reg(ctx, inputs[2]);
let ty_access = ty.unwrap();
@@ -1943,6 +2022,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
} else {
None
};
// Move the expected value into %rax. Because there's only one fixed register on
// the input side, we don't have to use `ensure_in_vreg`, as is necessary in the
// `AtomicRmw` case.
@@ -1954,7 +2034,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::LockCmpxchg {
ty: ty_access,
src: replacement,
dst: Amode::imm_reg(0, addr).into(),
dst: addr.into(),
srcloc,
});
// And finally, copy the old value at the location to its destination reg.
@@ -1966,7 +2046,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// to satisfy the CLIF synchronisation requirements for `AtomicLoad` without the
// need for any fence instructions.
let data = output_to_reg(ctx, outputs[0]);
let addr = input_to_reg(ctx, inputs[0]);
let addr = lower_to_amode(ctx, inputs[0], 0);
let ty_access = ty.unwrap();
assert!(is_valid_atomic_transaction_ty(ty_access));
let memflags = ctx.memflags(insn).expect("memory flags");
@@ -1975,8 +2055,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
} else {
None
};
// For the amode, we could do better, but for now just use `0(addr)`.
let rm = RegMem::mem(Amode::imm_reg(0, addr));
let rm = RegMem::mem(addr);
if ty_access == types::I64 {
ctx.emit(Inst::mov64_rm_r(rm, data, srcloc));
} else {
@@ -1993,7 +2073,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
Opcode::AtomicStore => {
// This is a normal store, followed by an `mfence` instruction.
let data = input_to_reg(ctx, inputs[0]);
let addr = input_to_reg(ctx, inputs[1]);
let addr = lower_to_amode(ctx, inputs[1], 0);
let ty_access = ctx.input_ty(insn, 0);
assert!(is_valid_atomic_transaction_ty(ty_access));
let memflags = ctx.memflags(insn).expect("memory flags");
@@ -2002,13 +2082,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
} else {
None
};
// For the amode, we could do better, but for now just use `0(addr)`.
ctx.emit(Inst::mov_r_m(
ty_access.bytes() as u8,
data,
Amode::imm_reg(0, addr),
srcloc,
));
ctx.emit(Inst::mov_r_m(ty_access.bytes() as u8, data, addr, srcloc));
ctx.emit(Inst::Fence {
kind: FenceKind::MFence,
});
@@ -2068,81 +2143,36 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
if let Some(fcmp) = matches_input(ctx, flag_input, Opcode::Fcmp) {
let cond_code = inst_fp_condcode(ctx.data(fcmp));
// See comments in the lowering of Fcmp.
let (cond_code, swap_op, was_equal) = match cond_code {
FloatCC::LessThan
| FloatCC::LessThanOrEqual
| FloatCC::UnorderedOrGreaterThan
| FloatCC::UnorderedOrGreaterThanOrEqual => {
(cond_code.reverse(), FcmpOperands::Swap, false)
}
FloatCC::Equal => {
// Additionally, we invert Equal to NotEqual too: taking LHS if equal would
// mean take it if both CC::NP and CC::Z are set, the conjunction of which
// can't be modeled with a single cmov instruction. Instead, we'll swap LHS
// and RHS in the select operation, and invert the equal to a not-equal
// here.
(FloatCC::NotEqual, FcmpOperands::DontSwap, true)
}
_ => (cond_code, FcmpOperands::DontSwap, false),
};
emit_fcmp(ctx, fcmp, swap_op);
// we request inversion of Equal to NotEqual here: taking LHS if equal would mean
// take it if both CC::NP and CC::Z are set, the conjunction of which can't be
// modeled with a single cmov instruction. Instead, we'll swap LHS and RHS in the
// select operation, and invert the equal to a not-equal here.
let fcmp_results = emit_fcmp(ctx, fcmp, cond_code, FcmpSpec::InvertEqual);
let (lhs, rhs) = if was_equal {
// See comment above about inverting conditional code.
(
input_to_reg_mem(ctx, inputs[2]),
input_to_reg(ctx, inputs[1]),
)
} else {
(
input_to_reg_mem(ctx, inputs[1]),
input_to_reg(ctx, inputs[2]),
)
let (lhs_input, rhs_input) = match fcmp_results {
FcmpCondResult::InvertedEqualOrConditions(_, _) => (inputs[2], inputs[1]),
FcmpCondResult::Condition(_)
| FcmpCondResult::AndConditions(_, _)
| FcmpCondResult::OrConditions(_, _) => (inputs[1], inputs[2]),
};
let dst = output_to_reg(ctx, outputs[0]);
let ty = ctx.output_ty(insn, 0);
let lhs = if is_int_ty(ty) {
let size = ty.bytes() as u8;
if size == 1 {
// Sign-extend operands to 32, then do a cmove of size 4.
let lhs_se = ctx.alloc_tmp(RegClass::I64, types::I32);
ctx.emit(Inst::movsx_rm_r(ExtMode::BL, lhs, lhs_se, None));
ctx.emit(Inst::movsx_rm_r(ExtMode::BL, RegMem::reg(rhs), dst, None));
RegMem::reg(lhs_se.to_reg())
} else {
ctx.emit(Inst::gen_move(dst, rhs, ty));
lhs
}
let rhs = input_to_reg(ctx, rhs_input);
let dst = output_to_reg(ctx, outputs[0]);
let lhs = if is_int_ty(ty) && ty.bytes() < 4 {
// Special case: since the higher bits are undefined per CLIF semantics, we
// can just apply a 32-bit cmove here. Force inputs into registers, to
// avoid partial spilling out-of-bounds with memory accesses, though.
// Sign-extend operands to 32, then do a cmove of size 4.
RegMem::reg(input_to_reg(ctx, lhs_input))
} else {
debug_assert!(ty == types::F32 || ty == types::F64);
ctx.emit(Inst::gen_move(dst, rhs, ty));
lhs
input_to_reg_mem(ctx, lhs_input)
};
match cond_code {
FloatCC::Equal => {
// See comment above about inverting conditional code.
panic!("can't happen because of above guard");
}
ctx.emit(Inst::gen_move(dst, rhs, ty));
FloatCC::NotEqual => {
// Take lhs if not-equal, that is CC::P or CC:NZ.
if is_int_ty(ty) {
let size = u8::max(ty.bytes() as u8, 4);
ctx.emit(Inst::cmove(size, CC::P, lhs.clone(), dst));
ctx.emit(Inst::cmove(size, CC::NZ, lhs, dst));
} else {
ctx.emit(Inst::xmm_cmove(ty == types::F64, CC::P, lhs.clone(), dst));
ctx.emit(Inst::xmm_cmove(ty == types::F64, CC::NZ, lhs, dst));
}
}
_ => {
let cc = CC::from_floatcc(cond_code);
match fcmp_results {
FcmpCondResult::Condition(cc) => {
if is_int_ty(ty) {
let size = u8::max(ty.bytes() as u8, 4);
ctx.emit(Inst::cmove(size, cc, lhs, dst));
@@ -2150,6 +2180,22 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::xmm_cmove(ty == types::F64, cc, lhs, dst));
}
}
FcmpCondResult::AndConditions(_, _) => {
unreachable!(
"can't AND with select; see above comment about inverting equal"
);
}
FcmpCondResult::InvertedEqualOrConditions(cc1, cc2)
| FcmpCondResult::OrConditions(cc1, cc2) => {
if is_int_ty(ty) {
let size = u8::max(ty.bytes() as u8, 4);
ctx.emit(Inst::cmove(size, cc1, lhs.clone(), dst));
ctx.emit(Inst::cmove(size, cc2, lhs, dst));
} else {
ctx.emit(Inst::xmm_cmove(ty == types::F64, cc1, lhs.clone(), dst));
ctx.emit(Inst::xmm_cmove(ty == types::F64, cc2, lhs, dst));
}
}
}
} else {
let cc = if let Some(icmp) = matches_input(ctx, flag_input, Opcode::Icmp) {
@@ -2164,27 +2210,27 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
CC::NZ
};
let lhs = input_to_reg_mem(ctx, inputs[1]);
let rhs = input_to_reg(ctx, inputs[2]);
let dst = output_to_reg(ctx, outputs[0]);
let ty = ctx.output_ty(insn, 0);
ctx.emit(Inst::gen_move(dst, rhs, ty));
if is_int_ty(ty) {
let size = ty.bytes() as u8;
if size == 1 {
// Sign-extend operands to 32, then do a cmove of size 4.
let lhs_se = ctx.alloc_tmp(RegClass::I64, types::I32);
ctx.emit(Inst::movsx_rm_r(ExtMode::BL, lhs, lhs_se, None));
ctx.emit(Inst::movsx_rm_r(ExtMode::BL, RegMem::reg(rhs), dst, None));
ctx.emit(Inst::cmove(4, cc, RegMem::reg(lhs_se.to_reg()), dst));
let mut size = ty.bytes() as u8;
let lhs = if size < 4 {
// Special case: since the higher bits are undefined per CLIF semantics, we
// can just apply a 32-bit cmove here. Force inputs into registers, to
// avoid partial spilling out-of-bounds with memory accesses, though.
size = 4;
RegMem::reg(input_to_reg(ctx, inputs[1]))
} else {
ctx.emit(Inst::gen_move(dst, rhs, ty));
ctx.emit(Inst::cmove(size, cc, lhs, dst));
}
input_to_reg_mem(ctx, inputs[1])
};
ctx.emit(Inst::cmove(size, cc, lhs, dst));
} else {
debug_assert!(ty == types::F32 || ty == types::F64);
ctx.emit(Inst::gen_move(dst, rhs, ty));
let lhs = input_to_reg_mem(ctx, inputs[1]);
ctx.emit(Inst::xmm_cmove(ty == types::F64, cc, lhs, dst));
}
}
@@ -2464,47 +2510,29 @@ impl LowerBackend for X64Backend {
} else {
cond_code
};
let cc = CC::from_intcc(cond_code);
ctx.emit(Inst::jmp_cond(cc, taken, not_taken));
} else if let Some(fcmp) = matches_input(ctx, flag_input, Opcode::Fcmp) {
let cond_code = inst_fp_condcode(ctx.data(fcmp));
let cond_code = if op0 == Opcode::Brz {
cond_code.inverse()
} else {
cond_code
};
// See comments in the lowering of Fcmp.
let (cond_code, swap_op) = match cond_code {
FloatCC::LessThan
| FloatCC::LessThanOrEqual
| FloatCC::UnorderedOrGreaterThan
| FloatCC::UnorderedOrGreaterThanOrEqual => {
(cond_code.reverse(), FcmpOperands::Swap)
}
_ => (cond_code, FcmpOperands::DontSwap),
};
emit_fcmp(ctx, fcmp, swap_op);
match cond_code {
FloatCC::Equal => {
// Jump to taken if CC::NP and CC::Z, that is, jump to not-taken if
// CC::P or CC::NZ.
ctx.emit(Inst::jmp_if(CC::P, not_taken));
ctx.emit(Inst::jmp_cond(CC::NZ, not_taken, taken));
}
FloatCC::NotEqual => {
// Jump to taken if CC::P or CC::NZ.
ctx.emit(Inst::jmp_if(CC::P, taken));
ctx.emit(Inst::jmp_cond(CC::NZ, taken, not_taken));
}
_ => {
let cc = CC::from_floatcc(cond_code);
match emit_fcmp(ctx, fcmp, cond_code, FcmpSpec::Normal) {
FcmpCondResult::Condition(cc) => {
ctx.emit(Inst::jmp_cond(cc, taken, not_taken));
}
FcmpCondResult::AndConditions(cc1, cc2) => {
ctx.emit(Inst::jmp_if(cc1.invert(), not_taken));
ctx.emit(Inst::jmp_cond(cc2.invert(), not_taken, taken));
}
FcmpCondResult::OrConditions(cc1, cc2) => {
ctx.emit(Inst::jmp_if(cc1, taken));
ctx.emit(Inst::jmp_cond(cc2, taken, not_taken));
}
FcmpCondResult::InvertedEqualOrConditions(_, _) => unreachable!(),
}
} else if is_int_ty(src_ty) || is_bool_ty(src_ty) {
let src = input_to_reg(

View File

@@ -4,7 +4,7 @@
use crate::entity::SecondaryMap;
use crate::fx::{FxHashMap, FxHashSet};
use crate::inst_predicates::{has_side_effect_or_load_not_get_pinned_reg, is_constant_64bit};
use crate::inst_predicates::{has_lowering_side_effect, is_constant_64bit};
use crate::ir::instructions::BranchInfo;
use crate::ir::types::I64;
use crate::ir::{
@@ -372,7 +372,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
for bb in f.layout.blocks() {
cur_color += 1;
for inst in f.layout.block_insts(bb) {
let side_effect = has_side_effect_or_load_not_get_pinned_reg(f, inst);
let side_effect = has_lowering_side_effect(f, inst);
// Assign colors. A new color is chosen *after* any side-effecting instruction.
inst_colors[inst] = InstColor::new(cur_color);
@@ -799,15 +799,15 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
ValueDef::Result(src_inst, result_idx) => {
debug!(" -> src inst {}", src_inst);
debug!(
" -> has side effect: {}",
has_side_effect_or_load_not_get_pinned_reg(self.f, src_inst)
" -> has lowering side effect: {}",
has_lowering_side_effect(self.f, src_inst)
);
debug!(
" -> our color is {:?}, src inst is {:?}",
self.inst_color(at_inst),
self.inst_color(src_inst)
);
if !has_side_effect_or_load_not_get_pinned_reg(self.f, src_inst)
if !has_lowering_side_effect(self.f, src_inst)
|| self.inst_color(at_inst) == self.inst_color(src_inst)
{
Some((src_inst, result_idx))
@@ -989,6 +989,8 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
fn use_input_reg(&mut self, input: LowerInput) {
debug!("use_input_reg: vreg {:?} is needed", input.reg);
// We may directly return a real (machine) register when we know that register holds the
// result of an opcode (e.g. GetPinnedReg).
if input.reg.is_virtual() {
self.vreg_needed[input.reg.get_index()] = true;
}