Address review comments.

This commit is contained in:
Benjamin Bouvier
2020-08-21 12:40:47 +02:00
parent ee76e01efc
commit 7c85654285
4 changed files with 300 additions and 263 deletions

View File

@@ -43,7 +43,7 @@ pub fn has_side_effect(func: &Function, inst: Inst) -> bool {
/// Does the given instruction have any side-effect as per [has_side_effect], or else is a load, /// Does the given instruction have any side-effect as per [has_side_effect], or else is a load,
/// but not the get_pinned_reg opcode? /// but not the get_pinned_reg opcode?
pub fn has_side_effect_or_load_not_get_pinned_reg(func: &Function, inst: Inst) -> bool { pub fn has_lowering_side_effect(func: &Function, inst: Inst) -> bool {
let op = func.dfg[inst].opcode(); let op = func.dfg[inst].opcode();
op != Opcode::GetPinnedReg && (has_side_effect(func, inst) || op.can_load()) op != Opcode::GetPinnedReg && (has_side_effect(func, inst) || op.can_load())
} }

View File

@@ -360,6 +360,13 @@ pub enum Inst {
JmpKnown { dst: BranchTarget }, JmpKnown { dst: BranchTarget },
/// One-way conditional branch: jcond cond target. /// One-way conditional branch: jcond cond target.
///
/// This instruction is useful when we have conditional jumps depending on more than two
/// conditions, see for instance the lowering of Brz/brnz with Fcmp inputs.
///
/// A note of caution: in contexts where the branch target is another block, this has to be the
/// same successor as the one specified in the terminator branch of the current block.
/// Otherwise, this might confuse register allocation by creating new invisible edges.
JmpIf { cc: CC, taken: BranchTarget }, JmpIf { cc: CC, taken: BranchTarget },
/// Two-way conditional branch: jcond cond target target. /// Two-way conditional branch: jcond cond target target.

View File

@@ -124,29 +124,37 @@ struct InsnOutput {
output: usize, output: usize,
} }
/// Returns whether the given specified `input` is a result produced by an instruction with Opcode
/// `op`.
// TODO investigate failures with checking against the result index.
fn matches_input<C: LowerCtx<I = Inst>>( fn matches_input<C: LowerCtx<I = Inst>>(
ctx: &mut C, ctx: &mut C,
input: InsnInput, input: InsnInput,
op: Opcode, op: Opcode,
) -> Option<IRInst> { ) -> Option<IRInst> {
let inputs = ctx.get_input(input.insn, input.input); let inputs = ctx.get_input(input.insn, input.input);
if let Some((src_inst, _)) = inputs.inst { inputs.inst.and_then(|(src_inst, _)| {
let data = ctx.data(src_inst); let data = ctx.data(src_inst);
if data.opcode() == op { if data.opcode() == op {
return Some(src_inst); return Some(src_inst);
} }
} None
None })
}
fn lowerinput_to_reg(ctx: Ctx, input: LowerInput) -> Reg {
ctx.use_input_reg(input);
input.reg
} }
/// Put the given input into a register, and mark it as used (side-effect). /// Put the given input into a register, and mark it as used (side-effect).
fn input_to_reg(ctx: Ctx, spec: InsnInput) -> Reg { fn input_to_reg(ctx: Ctx, spec: InsnInput) -> Reg {
let inputs = ctx.get_input(spec.insn, spec.input); let input = ctx.get_input(spec.insn, spec.input);
ctx.use_input_reg(inputs); lowerinput_to_reg(ctx, input)
inputs.reg
} }
/// An extension specification for `extend_input_to_reg`. /// An extension specification for `extend_input_to_reg`.
#[derive(Clone, Copy)]
enum ExtSpec { enum ExtSpec {
ZeroExtendTo32, ZeroExtendTo32,
ZeroExtendTo64, ZeroExtendTo64,
@@ -163,6 +171,12 @@ fn extend_input_to_reg(ctx: Ctx, spec: InsnInput, ext_spec: ExtSpec) -> Reg {
}; };
let input_size = ctx.input_ty(spec.insn, spec.input).bits(); let input_size = ctx.input_ty(spec.insn, spec.input).bits();
let requested_ty = if requested_size == 32 {
types::I32
} else {
types::I64
};
let ext_mode = match (input_size, requested_size) { let ext_mode = match (input_size, requested_size) {
(a, b) if a == b => return input_to_reg(ctx, spec), (a, b) if a == b => return input_to_reg(ctx, spec),
(a, 32) if a == 1 || a == 8 => ExtMode::BL, (a, 32) if a == 1 || a == 8 => ExtMode::BL,
@@ -173,12 +187,6 @@ fn extend_input_to_reg(ctx: Ctx, spec: InsnInput, ext_spec: ExtSpec) -> Reg {
_ => unreachable!(), _ => unreachable!(),
}; };
let requested_ty = if requested_size == 32 {
types::I32
} else {
types::I64
};
let src = input_to_reg_mem(ctx, spec); let src = input_to_reg_mem(ctx, spec);
let dst = ctx.alloc_tmp(RegClass::I64, requested_ty); let dst = ctx.alloc_tmp(RegClass::I64, requested_ty);
match ext_spec { match ext_spec {
@@ -196,21 +204,26 @@ fn extend_input_to_reg(ctx: Ctx, spec: InsnInput, ext_spec: ExtSpec) -> Reg {
dst.to_reg() dst.to_reg()
} }
fn lowerinput_to_reg_mem(ctx: Ctx, input: LowerInput) -> RegMem {
// TODO handle memory.
RegMem::reg(lowerinput_to_reg(ctx, input))
}
/// Put the given input into a register or a memory operand. /// Put the given input into a register or a memory operand.
/// Effectful: may mark the given input as used, when returning the register form. /// Effectful: may mark the given input as used, when returning the register form.
fn input_to_reg_mem(ctx: Ctx, spec: InsnInput) -> RegMem { fn input_to_reg_mem(ctx: Ctx, spec: InsnInput) -> RegMem {
// TODO handle memory. let input = ctx.get_input(spec.insn, spec.input);
RegMem::reg(input_to_reg(ctx, spec)) lowerinput_to_reg_mem(ctx, input)
} }
/// Returns whether the given input is an immediate that can be properly sign-extended, without any /// Returns whether the given input is an immediate that can be properly sign-extended, without any
/// possible side-effect. /// possible side-effect.
fn input_to_sext_imm(ctx: Ctx, spec: InsnInput) -> Option<u32> { fn lowerinput_to_sext_imm(input: LowerInput, input_ty: Type) -> Option<u32> {
ctx.get_input(spec.insn, spec.input).constant.and_then(|x| { input.constant.and_then(|x| {
// For i64 instructions (prefixed with REX.W), require that the immediate will sign-extend // For i64 instructions (prefixed with REX.W), require that the immediate will sign-extend
// to 64 bits. For other sizes, it doesn't matter and we can just use the plain // to 64 bits. For other sizes, it doesn't matter and we can just use the plain
// constant. // constant.
if ctx.input_ty(spec.insn, spec.input).bytes() != 8 || low32_will_sign_extend_to_64(x) { if input_ty.bytes() != 8 || low32_will_sign_extend_to_64(x) {
Some(x as u32) Some(x as u32)
} else { } else {
None None
@@ -218,6 +231,12 @@ fn input_to_sext_imm(ctx: Ctx, spec: InsnInput) -> Option<u32> {
}) })
} }
fn input_to_sext_imm(ctx: Ctx, spec: InsnInput) -> Option<u32> {
let input = ctx.get_input(spec.insn, spec.input);
let input_ty = ctx.input_ty(spec.insn, spec.input);
lowerinput_to_sext_imm(input, input_ty)
}
fn input_to_imm(ctx: Ctx, spec: InsnInput) -> Option<u64> { fn input_to_imm(ctx: Ctx, spec: InsnInput) -> Option<u64> {
ctx.get_input(spec.insn, spec.input).constant ctx.get_input(spec.insn, spec.input).constant
} }
@@ -225,9 +244,11 @@ fn input_to_imm(ctx: Ctx, spec: InsnInput) -> Option<u64> {
/// Put the given input into an immediate, a register or a memory operand. /// Put the given input into an immediate, a register or a memory operand.
/// Effectful: may mark the given input as used, when returning the register form. /// Effectful: may mark the given input as used, when returning the register form.
fn input_to_reg_mem_imm(ctx: Ctx, spec: InsnInput) -> RegMemImm { fn input_to_reg_mem_imm(ctx: Ctx, spec: InsnInput) -> RegMemImm {
match input_to_sext_imm(ctx, spec) { let input = ctx.get_input(spec.insn, spec.input);
let input_ty = ctx.input_ty(spec.insn, spec.input);
match lowerinput_to_sext_imm(input, input_ty) {
Some(x) => RegMemImm::imm(x), Some(x) => RegMemImm::imm(x),
None => match input_to_reg_mem(ctx, spec) { None => match lowerinput_to_reg_mem(ctx, input) {
RegMem::Reg { reg } => RegMemImm::reg(reg), RegMem::Reg { reg } => RegMemImm::reg(reg),
RegMem::Mem { addr } => RegMemImm::mem(addr), RegMem::Mem { addr } => RegMemImm::mem(addr),
}, },
@@ -252,34 +273,88 @@ fn emit_cmp(ctx: Ctx, insn: IRInst) {
ctx.emit(Inst::cmp_rmi_r(ty.bytes() as u8, rhs, lhs)); ctx.emit(Inst::cmp_rmi_r(ty.bytes() as u8, rhs, lhs));
} }
#[derive(PartialEq)] /// A specification for a fcmp emission.
enum FcmpOperands { enum FcmpSpec {
Swap, /// Normal flow.
DontSwap, Normal,
/// Avoid emitting Equal at all costs by inverting it to NotEqual, and indicate when that
/// happens with `InvertedEqualOrConditions`.
///
/// This is useful in contexts where it is hard/inefficient to produce a single instruction (or
/// sequence of instructions) that check for an "AND" combination of condition codes; see for
/// instance lowering of Select.
InvertEqual,
} }
fn emit_fcmp(ctx: Ctx, insn: IRInst, swap_operands: FcmpOperands) { /// This explains how to interpret the results of an fcmp instruction.
enum FcmpCondResult {
/// The given condition code must be set.
Condition(CC),
/// Both condition codes must be set.
AndConditions(CC, CC),
/// Either of the conditions codes must be set.
OrConditions(CC, CC),
/// The associated spec was set to `FcmpSpec::InvertEqual` and Equal has been inverted. Either
/// of the condition codes must be set, and the user must invert meaning of analyzing the
/// condition code results. When the spec is set to `FcmpSpec::Normal`, then this case can't be
/// reached.
InvertedEqualOrConditions(CC, CC),
}
fn emit_fcmp(ctx: Ctx, insn: IRInst, mut cond_code: FloatCC, spec: FcmpSpec) -> FcmpCondResult {
let (flip_operands, inverted_equal) = match cond_code {
FloatCC::LessThan
| FloatCC::LessThanOrEqual
| FloatCC::UnorderedOrGreaterThan
| FloatCC::UnorderedOrGreaterThanOrEqual => {
cond_code = cond_code.reverse();
(true, false)
}
FloatCC::Equal => {
let inverted_equal = match spec {
FcmpSpec::Normal => false,
FcmpSpec::InvertEqual => {
cond_code = FloatCC::NotEqual; // same as .inverse()
true
}
};
(false, inverted_equal)
}
_ => (false, false),
};
// The only valid CC constructed with `from_floatcc` can be put in the flag // The only valid CC constructed with `from_floatcc` can be put in the flag
// register with a direct float comparison; do this here. // register with a direct float comparison; do this here.
let input_ty = ctx.input_ty(insn, 0); let op = match ctx.input_ty(insn, 0) {
let op = match input_ty {
types::F32 => SseOpcode::Ucomiss, types::F32 => SseOpcode::Ucomiss,
types::F64 => SseOpcode::Ucomisd, types::F64 => SseOpcode::Ucomisd,
_ => panic!("Bad input type to Fcmp"), _ => panic!("Bad input type to Fcmp"),
}; };
let inputs = &[InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }]; let inputs = &[InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }];
let (lhs, rhs) = if swap_operands == FcmpOperands::Swap { let (lhs_input, rhs_input) = if flip_operands {
( (inputs[1], inputs[0])
input_to_reg(ctx, inputs[1]),
input_to_reg_mem(ctx, inputs[0]),
)
} else { } else {
( (inputs[0], inputs[1])
input_to_reg(ctx, inputs[0]),
input_to_reg_mem(ctx, inputs[1]),
)
}; };
let lhs = input_to_reg(ctx, lhs_input);
let rhs = input_to_reg_mem(ctx, rhs_input);
ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs)); ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs));
let cond_result = match cond_code {
FloatCC::Equal => FcmpCondResult::AndConditions(CC::NP, CC::Z),
FloatCC::NotEqual if inverted_equal => {
FcmpCondResult::InvertedEqualOrConditions(CC::P, CC::NZ)
}
FloatCC::NotEqual if !inverted_equal => FcmpCondResult::OrConditions(CC::P, CC::NZ),
_ => FcmpCondResult::Condition(CC::from_floatcc(cond_code)),
};
cond_result
} }
fn make_libcall_sig(ctx: Ctx, insn: IRInst, call_conv: CallConv, ptr_ty: Type) -> Signature { fn make_libcall_sig(ctx: Ctx, insn: IRInst, call_conv: CallConv, ptr_ty: Type) -> Signature {
@@ -350,33 +425,31 @@ fn emit_vm_call<C: LowerCtx<I = Inst>>(
/// Returns whether the given input is a shift by a constant value less or equal than 3. /// Returns whether the given input is a shift by a constant value less or equal than 3.
/// The goal is to embed it within an address mode. /// The goal is to embed it within an address mode.
fn matches_small_cst_shift<C: LowerCtx<I = Inst>>( fn matches_small_constant_shift<C: LowerCtx<I = Inst>>(
ctx: &mut C, ctx: &mut C,
spec: InsnInput, spec: InsnInput,
) -> Option<(InsnInput, u8)> { ) -> Option<(InsnInput, u8)> {
if let Some(shift) = matches_input(ctx, spec, Opcode::Ishl) { matches_input(ctx, spec, Opcode::Ishl).and_then(|shift| {
if let Some(shift_amt) = input_to_imm( match input_to_imm(
ctx, ctx,
InsnInput { InsnInput {
insn: shift, insn: shift,
input: 1, input: 1,
}, },
) { ) {
if shift_amt <= 3 { Some(shift_amt) if shift_amt <= 3 => Some((
return Some(( InsnInput {
InsnInput { insn: shift,
insn: shift, input: 0,
input: 0, },
}, shift_amt as u8,
shift_amt as u8, )),
)); _ => None,
}
} }
} })
None
} }
fn lower_amode<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput, offset: u32) -> Amode { fn lower_to_amode<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput, offset: u32) -> Amode {
// We now either have an add that we must materialize, or some other input; as well as the // We now either have an add that we must materialize, or some other input; as well as the
// final offset. // final offset.
if let Some(add) = matches_input(ctx, spec, Opcode::Iadd) { if let Some(add) = matches_input(ctx, spec, Opcode::Iadd) {
@@ -394,14 +467,16 @@ fn lower_amode<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput, offset: u32)
// TODO heap_addr legalization generates a uext64 *after* the shift, so these optimizations // TODO heap_addr legalization generates a uext64 *after* the shift, so these optimizations
// aren't happening in the wasm case. We could do better, given some range analysis. // aren't happening in the wasm case. We could do better, given some range analysis.
let (base, index, shift) = if let Some((shift_input, shift_amt)) = let (base, index, shift) = if let Some((shift_input, shift_amt)) =
matches_small_cst_shift(ctx, add_inputs[0]) matches_small_constant_shift(ctx, add_inputs[0])
{ {
( (
input_to_reg(ctx, add_inputs[1]), input_to_reg(ctx, add_inputs[1]),
input_to_reg(ctx, shift_input), input_to_reg(ctx, shift_input),
shift_amt, shift_amt,
) )
} else if let Some((shift_input, shift_amt)) = matches_small_cst_shift(ctx, add_inputs[1]) { } else if let Some((shift_input, shift_amt)) =
matches_small_constant_shift(ctx, add_inputs[1])
{
( (
input_to_reg(ctx, add_inputs[0]), input_to_reg(ctx, add_inputs[0]),
input_to_reg(ctx, shift_input), input_to_reg(ctx, shift_input),
@@ -1027,15 +1102,9 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
} }
Opcode::Fcmp => { Opcode::Fcmp => {
let condcode = inst_fp_condcode(ctx.data(insn)); let cond_code = inst_fp_condcode(ctx.data(insn));
let input_ty = ctx.input_ty(insn, 0); let input_ty = ctx.input_ty(insn, 0);
if !input_ty.is_vector() { if !input_ty.is_vector() {
let op = match input_ty {
types::F32 => SseOpcode::Ucomiss,
types::F64 => SseOpcode::Ucomisd,
_ => panic!("Bad input type to fcmp: {}", input_ty),
};
// Unordered is returned by setting ZF, PF, CF <- 111 // Unordered is returned by setting ZF, PF, CF <- 111
// Greater than by ZF, PF, CF <- 000 // Greater than by ZF, PF, CF <- 000
// Less than by ZF, PF, CF <- 001 // Less than by ZF, PF, CF <- 001
@@ -1051,71 +1120,35 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// set, then both the ZF and CF flag bits must also be set we can get away with using // set, then both the ZF and CF flag bits must also be set we can get away with using
// one setcc for most condition codes. // one setcc for most condition codes.
match condcode { let dst = output_to_reg(ctx, outputs[0]);
FloatCC::LessThan
| FloatCC::LessThanOrEqual match emit_fcmp(ctx, insn, cond_code, FcmpSpec::Normal) {
| FloatCC::UnorderedOrGreaterThan FcmpCondResult::Condition(cc) => {
| FloatCC::UnorderedOrGreaterThanOrEqual => {
// setb and setbe for ordered LessThan and LessThanOrEqual check if CF = 1
// which doesn't exclude unorderdness. To get around this we can reverse the
// operands and the cc test to instead check if CF and ZF are 0 which would
// also excludes unorderedness. Using similiar logic we also reverse
// UnorderedOrGreaterThan and UnorderedOrGreaterThanOrEqual and assure that ZF
// or CF is 1 to exclude orderedness.
let lhs = input_to_reg_mem(ctx, inputs[0]);
let rhs = input_to_reg(ctx, inputs[1]);
let dst = output_to_reg(ctx, outputs[0]);
ctx.emit(Inst::xmm_cmp_rm_r(op, lhs, rhs));
let condcode = condcode.reverse();
let cc = CC::from_floatcc(condcode);
ctx.emit(Inst::setcc(cc, dst)); ctx.emit(Inst::setcc(cc, dst));
} }
FcmpCondResult::AndConditions(cc1, cc2) => {
FloatCC::Equal => { let tmp = ctx.alloc_tmp(RegClass::I64, types::I32);
// Outlier case: equal means both the operands are ordered and equal; we cannot ctx.emit(Inst::setcc(cc1, tmp));
// get around checking the parity bit to determine if the result was ordered. ctx.emit(Inst::setcc(cc2, dst));
let lhs = input_to_reg(ctx, inputs[0]);
let rhs = input_to_reg_mem(ctx, inputs[1]);
let dst = output_to_reg(ctx, outputs[0]);
let tmp_gpr1 = ctx.alloc_tmp(RegClass::I64, types::I32);
ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs));
ctx.emit(Inst::setcc(CC::NP, tmp_gpr1));
ctx.emit(Inst::setcc(CC::Z, dst));
ctx.emit(Inst::alu_rmi_r( ctx.emit(Inst::alu_rmi_r(
false, false,
AluRmiROpcode::And, AluRmiROpcode::And,
RegMemImm::reg(tmp_gpr1.to_reg()), RegMemImm::reg(tmp.to_reg()),
dst, dst,
)); ));
} }
FcmpCondResult::OrConditions(cc1, cc2) => {
FloatCC::NotEqual => { let tmp = ctx.alloc_tmp(RegClass::I64, types::I32);
// Outlier case: not equal means either the operands are unordered, or they're ctx.emit(Inst::setcc(cc1, tmp));
// not the same value. ctx.emit(Inst::setcc(cc2, dst));
let lhs = input_to_reg(ctx, inputs[0]);
let rhs = input_to_reg_mem(ctx, inputs[1]);
let dst = output_to_reg(ctx, outputs[0]);
let tmp_gpr1 = ctx.alloc_tmp(RegClass::I64, types::I32);
ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs));
ctx.emit(Inst::setcc(CC::P, tmp_gpr1));
ctx.emit(Inst::setcc(CC::NZ, dst));
ctx.emit(Inst::alu_rmi_r( ctx.emit(Inst::alu_rmi_r(
false, false,
AluRmiROpcode::Or, AluRmiROpcode::Or,
RegMemImm::reg(tmp_gpr1.to_reg()), RegMemImm::reg(tmp.to_reg()),
dst, dst,
)); ));
} }
FcmpCondResult::InvertedEqualOrConditions(_, _) => unreachable!(),
_ => {
// For all remaining condition codes we can handle things with one check.
let lhs = input_to_reg(ctx, inputs[0]);
let rhs = input_to_reg_mem(ctx, inputs[1]);
let dst = output_to_reg(ctx, outputs[0]);
let cc = CC::from_floatcc(condcode);
ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs));
ctx.emit(Inst::setcc(cc, dst));
}
} }
} else { } else {
let op = match input_ty { let op = match input_ty {
@@ -1126,7 +1159,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// Since some packed comparisons are not available, some of the condition codes // Since some packed comparisons are not available, some of the condition codes
// must be inverted, with a corresponding `flip` of the operands. // must be inverted, with a corresponding `flip` of the operands.
let (imm, flip) = match condcode { let (imm, flip) = match cond_code {
FloatCC::GreaterThan => (FcmpImm::LessThan, true), FloatCC::GreaterThan => (FcmpImm::LessThan, true),
FloatCC::GreaterThanOrEqual => (FcmpImm::LessThanOrEqual, true), FloatCC::GreaterThanOrEqual => (FcmpImm::LessThanOrEqual, true),
FloatCC::UnorderedOrLessThan => (FcmpImm::UnorderedOrGreaterThan, true), FloatCC::UnorderedOrLessThan => (FcmpImm::UnorderedOrGreaterThan, true),
@@ -1134,9 +1167,9 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
(FcmpImm::UnorderedOrGreaterThanOrEqual, true) (FcmpImm::UnorderedOrGreaterThanOrEqual, true)
} }
FloatCC::OrderedNotEqual | FloatCC::UnorderedOrEqual => { FloatCC::OrderedNotEqual | FloatCC::UnorderedOrEqual => {
panic!("unsupported float condition code: {}", condcode) panic!("unsupported float condition code: {}", cond_code)
} }
_ => (FcmpImm::from(condcode), false), _ => (FcmpImm::from(cond_code), false),
}; };
// Determine the operands of the comparison, possibly by flipping them. // Determine the operands of the comparison, possibly by flipping them.
@@ -1225,35 +1258,77 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let srcloc = ctx.srcloc(insn); let srcloc = ctx.srcloc(insn);
let trap_code = inst_trapcode(ctx.data(insn)).unwrap(); let trap_code = inst_trapcode(ctx.data(insn)).unwrap();
let cc = if matches_input(ctx, inputs[0], Opcode::IaddIfcout).is_some() { if matches_input(ctx, inputs[0], Opcode::IaddIfcout).is_some() {
let condcode = inst_condcode(ctx.data(insn)); let cond_code = inst_condcode(ctx.data(insn));
// The flags must not have been clobbered by any other instruction between the // The flags must not have been clobbered by any other instruction between the
// iadd_ifcout and this instruction, as verified by the CLIF validator; so we can // iadd_ifcout and this instruction, as verified by the CLIF validator; so we can
// simply use the flags here. // simply use the flags here.
CC::from_intcc(condcode) let cc = CC::from_intcc(cond_code);
ctx.emit_safepoint(Inst::TrapIf {
trap_code,
srcloc,
cc,
});
} else if op == Opcode::Trapif { } else if op == Opcode::Trapif {
let condcode = inst_condcode(ctx.data(insn)); let cond_code = inst_condcode(ctx.data(insn));
let cc = CC::from_intcc(condcode); let cc = CC::from_intcc(cond_code);
// Verification ensures that the input is always a single-def ifcmp. // Verification ensures that the input is always a single-def ifcmp.
let ifcmp_insn = matches_input(ctx, inputs[0], Opcode::Ifcmp).unwrap(); let ifcmp = matches_input(ctx, inputs[0], Opcode::Ifcmp).unwrap();
emit_cmp(ctx, ifcmp_insn); emit_cmp(ctx, ifcmp);
cc
ctx.emit_safepoint(Inst::TrapIf {
trap_code,
srcloc,
cc,
});
} else { } else {
let condcode = inst_fp_condcode(ctx.data(insn)); let cond_code = inst_fp_condcode(ctx.data(insn));
let cc = CC::from_floatcc(condcode);
// Verification ensures that the input is always a single-def ffcmp. // Verification ensures that the input is always a single-def ffcmp.
let ffcmp_insn = matches_input(ctx, inputs[0], Opcode::Ffcmp).unwrap(); let ffcmp = matches_input(ctx, inputs[0], Opcode::Ffcmp).unwrap();
emit_fcmp(ctx, ffcmp_insn, FcmpOperands::DontSwap);
cc
};
ctx.emit_safepoint(Inst::TrapIf { match emit_fcmp(ctx, ffcmp, cond_code, FcmpSpec::Normal) {
trap_code, FcmpCondResult::Condition(cc) => ctx.emit_safepoint(Inst::TrapIf {
srcloc, trap_code,
cc, srcloc,
}); cc,
}),
FcmpCondResult::AndConditions(cc1, cc2) => {
// A bit unfortunate, but materialize the flags in their own register, and
// check against this.
let tmp = ctx.alloc_tmp(RegClass::I64, types::I32);
let tmp2 = ctx.alloc_tmp(RegClass::I64, types::I32);
ctx.emit(Inst::setcc(cc1, tmp));
ctx.emit(Inst::setcc(cc2, tmp2));
ctx.emit(Inst::alu_rmi_r(
false, /* is_64 */
AluRmiROpcode::And,
RegMemImm::reg(tmp.to_reg()),
tmp2,
));
ctx.emit_safepoint(Inst::TrapIf {
trap_code,
srcloc,
cc: CC::NZ,
});
}
FcmpCondResult::OrConditions(cc1, cc2) => {
ctx.emit_safepoint(Inst::TrapIf {
trap_code,
srcloc,
cc: cc1,
});
ctx.emit_safepoint(Inst::TrapIf {
trap_code,
srcloc,
cc: cc2,
});
}
FcmpCondResult::InvertedEqualOrConditions(_, _) => unreachable!(),
};
};
} }
Opcode::F64const => { Opcode::F64const => {
@@ -1751,7 +1826,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::Uload32 | Opcode::Uload32
| Opcode::Sload32 => { | Opcode::Sload32 => {
assert_eq!(inputs.len(), 1, "only one input for load operands"); assert_eq!(inputs.len(), 1, "only one input for load operands");
lower_amode(ctx, inputs[0], offset as u32) lower_to_amode(ctx, inputs[0], offset as u32)
} }
Opcode::LoadComplex Opcode::LoadComplex
@@ -1842,7 +1917,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let addr = match op { let addr = match op {
Opcode::Store | Opcode::Istore8 | Opcode::Istore16 | Opcode::Istore32 => { Opcode::Store | Opcode::Istore8 | Opcode::Istore16 | Opcode::Istore32 => {
assert_eq!(inputs.len(), 2, "only one input for store memory operands"); assert_eq!(inputs.len(), 2, "only one input for store memory operands");
lower_amode(ctx, inputs[1], offset as u32) lower_to_amode(ctx, inputs[1], offset as u32)
} }
Opcode::StoreComplex Opcode::StoreComplex
@@ -1899,11 +1974,13 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
} else { } else {
None None
}; };
// Make sure that both args are in virtual regs, since in effect we have to do a // Make sure that both args are in virtual regs, since in effect we have to do a
// parallel copy to get them safely to the AtomicRmwSeq input regs, and that's not // parallel copy to get them safely to the AtomicRmwSeq input regs, and that's not
// guaranteed safe if either is in a real reg. // guaranteed safe if either is in a real reg.
addr = ctx.ensure_in_vreg(addr, types::I64); addr = ctx.ensure_in_vreg(addr, types::I64);
arg2 = ctx.ensure_in_vreg(arg2, types::I64); arg2 = ctx.ensure_in_vreg(arg2, types::I64);
// Move the args to the preordained AtomicRMW input regs. Note that `AtomicRmwSeq` // Move the args to the preordained AtomicRMW input regs. Note that `AtomicRmwSeq`
// operates at whatever width is specified by `ty`, so there's no need to // operates at whatever width is specified by `ty`, so there's no need to
// zero-extend `arg2` in the case of `ty` being I8/I16/I32. // zero-extend `arg2` in the case of `ty` being I8/I16/I32.
@@ -1917,6 +1994,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
arg2, arg2,
types::I64, types::I64,
)); ));
// Now the AtomicRmwSeq (pseudo-) instruction itself // Now the AtomicRmwSeq (pseudo-) instruction itself
let op = inst_common::AtomicRmwOp::from(inst_atomic_rmw_op(ctx.data(insn)).unwrap()); let op = inst_common::AtomicRmwOp::from(inst_atomic_rmw_op(ctx.data(insn)).unwrap());
ctx.emit(Inst::AtomicRmwSeq { ctx.emit(Inst::AtomicRmwSeq {
@@ -1924,6 +2002,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
op, op,
srcloc, srcloc,
}); });
// And finally, copy the preordained AtomicRmwSeq output reg to its destination. // And finally, copy the preordained AtomicRmwSeq output reg to its destination.
ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64)); ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64));
} }
@@ -1932,7 +2011,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// This is very similar to, but not identical to, the `AtomicRmw` case. As with // This is very similar to, but not identical to, the `AtomicRmw` case. As with
// `AtomicRmw`, there's no need to zero-extend narrow values here. // `AtomicRmw`, there's no need to zero-extend narrow values here.
let dst = output_to_reg(ctx, outputs[0]); let dst = output_to_reg(ctx, outputs[0]);
let addr = input_to_reg(ctx, inputs[0]); let addr = lower_to_amode(ctx, inputs[0], 0);
let expected = input_to_reg(ctx, inputs[1]); let expected = input_to_reg(ctx, inputs[1]);
let replacement = input_to_reg(ctx, inputs[2]); let replacement = input_to_reg(ctx, inputs[2]);
let ty_access = ty.unwrap(); let ty_access = ty.unwrap();
@@ -1943,6 +2022,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
} else { } else {
None None
}; };
// Move the expected value into %rax. Because there's only one fixed register on // Move the expected value into %rax. Because there's only one fixed register on
// the input side, we don't have to use `ensure_in_vreg`, as is necessary in the // the input side, we don't have to use `ensure_in_vreg`, as is necessary in the
// `AtomicRmw` case. // `AtomicRmw` case.
@@ -1954,7 +2034,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::LockCmpxchg { ctx.emit(Inst::LockCmpxchg {
ty: ty_access, ty: ty_access,
src: replacement, src: replacement,
dst: Amode::imm_reg(0, addr).into(), dst: addr.into(),
srcloc, srcloc,
}); });
// And finally, copy the old value at the location to its destination reg. // And finally, copy the old value at the location to its destination reg.
@@ -1966,7 +2046,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// to satisfy the CLIF synchronisation requirements for `AtomicLoad` without the // to satisfy the CLIF synchronisation requirements for `AtomicLoad` without the
// need for any fence instructions. // need for any fence instructions.
let data = output_to_reg(ctx, outputs[0]); let data = output_to_reg(ctx, outputs[0]);
let addr = input_to_reg(ctx, inputs[0]); let addr = lower_to_amode(ctx, inputs[0], 0);
let ty_access = ty.unwrap(); let ty_access = ty.unwrap();
assert!(is_valid_atomic_transaction_ty(ty_access)); assert!(is_valid_atomic_transaction_ty(ty_access));
let memflags = ctx.memflags(insn).expect("memory flags"); let memflags = ctx.memflags(insn).expect("memory flags");
@@ -1975,8 +2055,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
} else { } else {
None None
}; };
// For the amode, we could do better, but for now just use `0(addr)`.
let rm = RegMem::mem(Amode::imm_reg(0, addr)); let rm = RegMem::mem(addr);
if ty_access == types::I64 { if ty_access == types::I64 {
ctx.emit(Inst::mov64_rm_r(rm, data, srcloc)); ctx.emit(Inst::mov64_rm_r(rm, data, srcloc));
} else { } else {
@@ -1993,7 +2073,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
Opcode::AtomicStore => { Opcode::AtomicStore => {
// This is a normal store, followed by an `mfence` instruction. // This is a normal store, followed by an `mfence` instruction.
let data = input_to_reg(ctx, inputs[0]); let data = input_to_reg(ctx, inputs[0]);
let addr = input_to_reg(ctx, inputs[1]); let addr = lower_to_amode(ctx, inputs[1], 0);
let ty_access = ctx.input_ty(insn, 0); let ty_access = ctx.input_ty(insn, 0);
assert!(is_valid_atomic_transaction_ty(ty_access)); assert!(is_valid_atomic_transaction_ty(ty_access));
let memflags = ctx.memflags(insn).expect("memory flags"); let memflags = ctx.memflags(insn).expect("memory flags");
@@ -2002,13 +2082,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
} else { } else {
None None
}; };
// For the amode, we could do better, but for now just use `0(addr)`.
ctx.emit(Inst::mov_r_m( ctx.emit(Inst::mov_r_m(ty_access.bytes() as u8, data, addr, srcloc));
ty_access.bytes() as u8,
data,
Amode::imm_reg(0, addr),
srcloc,
));
ctx.emit(Inst::Fence { ctx.emit(Inst::Fence {
kind: FenceKind::MFence, kind: FenceKind::MFence,
}); });
@@ -2068,81 +2143,36 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
if let Some(fcmp) = matches_input(ctx, flag_input, Opcode::Fcmp) { if let Some(fcmp) = matches_input(ctx, flag_input, Opcode::Fcmp) {
let cond_code = inst_fp_condcode(ctx.data(fcmp)); let cond_code = inst_fp_condcode(ctx.data(fcmp));
// See comments in the lowering of Fcmp. // we request inversion of Equal to NotEqual here: taking LHS if equal would mean
let (cond_code, swap_op, was_equal) = match cond_code { // take it if both CC::NP and CC::Z are set, the conjunction of which can't be
FloatCC::LessThan // modeled with a single cmov instruction. Instead, we'll swap LHS and RHS in the
| FloatCC::LessThanOrEqual // select operation, and invert the equal to a not-equal here.
| FloatCC::UnorderedOrGreaterThan let fcmp_results = emit_fcmp(ctx, fcmp, cond_code, FcmpSpec::InvertEqual);
| FloatCC::UnorderedOrGreaterThanOrEqual => {
(cond_code.reverse(), FcmpOperands::Swap, false)
}
FloatCC::Equal => {
// Additionally, we invert Equal to NotEqual too: taking LHS if equal would
// mean take it if both CC::NP and CC::Z are set, the conjunction of which
// can't be modeled with a single cmov instruction. Instead, we'll swap LHS
// and RHS in the select operation, and invert the equal to a not-equal
// here.
(FloatCC::NotEqual, FcmpOperands::DontSwap, true)
}
_ => (cond_code, FcmpOperands::DontSwap, false),
};
emit_fcmp(ctx, fcmp, swap_op);
let (lhs, rhs) = if was_equal { let (lhs_input, rhs_input) = match fcmp_results {
// See comment above about inverting conditional code. FcmpCondResult::InvertedEqualOrConditions(_, _) => (inputs[2], inputs[1]),
( FcmpCondResult::Condition(_)
input_to_reg_mem(ctx, inputs[2]), | FcmpCondResult::AndConditions(_, _)
input_to_reg(ctx, inputs[1]), | FcmpCondResult::OrConditions(_, _) => (inputs[1], inputs[2]),
)
} else {
(
input_to_reg_mem(ctx, inputs[1]),
input_to_reg(ctx, inputs[2]),
)
}; };
let dst = output_to_reg(ctx, outputs[0]);
let ty = ctx.output_ty(insn, 0); let ty = ctx.output_ty(insn, 0);
let rhs = input_to_reg(ctx, rhs_input);
let lhs = if is_int_ty(ty) { let dst = output_to_reg(ctx, outputs[0]);
let size = ty.bytes() as u8; let lhs = if is_int_ty(ty) && ty.bytes() < 4 {
if size == 1 { // Special case: since the higher bits are undefined per CLIF semantics, we
// Sign-extend operands to 32, then do a cmove of size 4. // can just apply a 32-bit cmove here. Force inputs into registers, to
let lhs_se = ctx.alloc_tmp(RegClass::I64, types::I32); // avoid partial spilling out-of-bounds with memory accesses, though.
ctx.emit(Inst::movsx_rm_r(ExtMode::BL, lhs, lhs_se, None)); // Sign-extend operands to 32, then do a cmove of size 4.
ctx.emit(Inst::movsx_rm_r(ExtMode::BL, RegMem::reg(rhs), dst, None)); RegMem::reg(input_to_reg(ctx, lhs_input))
RegMem::reg(lhs_se.to_reg())
} else {
ctx.emit(Inst::gen_move(dst, rhs, ty));
lhs
}
} else { } else {
debug_assert!(ty == types::F32 || ty == types::F64); input_to_reg_mem(ctx, lhs_input)
ctx.emit(Inst::gen_move(dst, rhs, ty));
lhs
}; };
match cond_code { ctx.emit(Inst::gen_move(dst, rhs, ty));
FloatCC::Equal => {
// See comment above about inverting conditional code.
panic!("can't happen because of above guard");
}
FloatCC::NotEqual => { match fcmp_results {
// Take lhs if not-equal, that is CC::P or CC:NZ. FcmpCondResult::Condition(cc) => {
if is_int_ty(ty) {
let size = u8::max(ty.bytes() as u8, 4);
ctx.emit(Inst::cmove(size, CC::P, lhs.clone(), dst));
ctx.emit(Inst::cmove(size, CC::NZ, lhs, dst));
} else {
ctx.emit(Inst::xmm_cmove(ty == types::F64, CC::P, lhs.clone(), dst));
ctx.emit(Inst::xmm_cmove(ty == types::F64, CC::NZ, lhs, dst));
}
}
_ => {
let cc = CC::from_floatcc(cond_code);
if is_int_ty(ty) { if is_int_ty(ty) {
let size = u8::max(ty.bytes() as u8, 4); let size = u8::max(ty.bytes() as u8, 4);
ctx.emit(Inst::cmove(size, cc, lhs, dst)); ctx.emit(Inst::cmove(size, cc, lhs, dst));
@@ -2150,6 +2180,22 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::xmm_cmove(ty == types::F64, cc, lhs, dst)); ctx.emit(Inst::xmm_cmove(ty == types::F64, cc, lhs, dst));
} }
} }
FcmpCondResult::AndConditions(_, _) => {
unreachable!(
"can't AND with select; see above comment about inverting equal"
);
}
FcmpCondResult::InvertedEqualOrConditions(cc1, cc2)
| FcmpCondResult::OrConditions(cc1, cc2) => {
if is_int_ty(ty) {
let size = u8::max(ty.bytes() as u8, 4);
ctx.emit(Inst::cmove(size, cc1, lhs.clone(), dst));
ctx.emit(Inst::cmove(size, cc2, lhs, dst));
} else {
ctx.emit(Inst::xmm_cmove(ty == types::F64, cc1, lhs.clone(), dst));
ctx.emit(Inst::xmm_cmove(ty == types::F64, cc2, lhs, dst));
}
}
} }
} else { } else {
let cc = if let Some(icmp) = matches_input(ctx, flag_input, Opcode::Icmp) { let cc = if let Some(icmp) = matches_input(ctx, flag_input, Opcode::Icmp) {
@@ -2164,27 +2210,27 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
CC::NZ CC::NZ
}; };
let lhs = input_to_reg_mem(ctx, inputs[1]);
let rhs = input_to_reg(ctx, inputs[2]); let rhs = input_to_reg(ctx, inputs[2]);
let dst = output_to_reg(ctx, outputs[0]); let dst = output_to_reg(ctx, outputs[0]);
let ty = ctx.output_ty(insn, 0); let ty = ctx.output_ty(insn, 0);
ctx.emit(Inst::gen_move(dst, rhs, ty));
if is_int_ty(ty) { if is_int_ty(ty) {
let size = ty.bytes() as u8; let mut size = ty.bytes() as u8;
if size == 1 { let lhs = if size < 4 {
// Sign-extend operands to 32, then do a cmove of size 4. // Special case: since the higher bits are undefined per CLIF semantics, we
let lhs_se = ctx.alloc_tmp(RegClass::I64, types::I32); // can just apply a 32-bit cmove here. Force inputs into registers, to
ctx.emit(Inst::movsx_rm_r(ExtMode::BL, lhs, lhs_se, None)); // avoid partial spilling out-of-bounds with memory accesses, though.
ctx.emit(Inst::movsx_rm_r(ExtMode::BL, RegMem::reg(rhs), dst, None)); size = 4;
ctx.emit(Inst::cmove(4, cc, RegMem::reg(lhs_se.to_reg()), dst)); RegMem::reg(input_to_reg(ctx, inputs[1]))
} else { } else {
ctx.emit(Inst::gen_move(dst, rhs, ty)); input_to_reg_mem(ctx, inputs[1])
ctx.emit(Inst::cmove(size, cc, lhs, dst)); };
} ctx.emit(Inst::cmove(size, cc, lhs, dst));
} else { } else {
debug_assert!(ty == types::F32 || ty == types::F64); debug_assert!(ty == types::F32 || ty == types::F64);
ctx.emit(Inst::gen_move(dst, rhs, ty)); let lhs = input_to_reg_mem(ctx, inputs[1]);
ctx.emit(Inst::xmm_cmove(ty == types::F64, cc, lhs, dst)); ctx.emit(Inst::xmm_cmove(ty == types::F64, cc, lhs, dst));
} }
} }
@@ -2464,47 +2510,29 @@ impl LowerBackend for X64Backend {
} else { } else {
cond_code cond_code
}; };
let cc = CC::from_intcc(cond_code); let cc = CC::from_intcc(cond_code);
ctx.emit(Inst::jmp_cond(cc, taken, not_taken)); ctx.emit(Inst::jmp_cond(cc, taken, not_taken));
} else if let Some(fcmp) = matches_input(ctx, flag_input, Opcode::Fcmp) { } else if let Some(fcmp) = matches_input(ctx, flag_input, Opcode::Fcmp) {
let cond_code = inst_fp_condcode(ctx.data(fcmp)); let cond_code = inst_fp_condcode(ctx.data(fcmp));
let cond_code = if op0 == Opcode::Brz { let cond_code = if op0 == Opcode::Brz {
cond_code.inverse() cond_code.inverse()
} else { } else {
cond_code cond_code
}; };
match emit_fcmp(ctx, fcmp, cond_code, FcmpSpec::Normal) {
// See comments in the lowering of Fcmp. FcmpCondResult::Condition(cc) => {
let (cond_code, swap_op) = match cond_code {
FloatCC::LessThan
| FloatCC::LessThanOrEqual
| FloatCC::UnorderedOrGreaterThan
| FloatCC::UnorderedOrGreaterThanOrEqual => {
(cond_code.reverse(), FcmpOperands::Swap)
}
_ => (cond_code, FcmpOperands::DontSwap),
};
emit_fcmp(ctx, fcmp, swap_op);
match cond_code {
FloatCC::Equal => {
// Jump to taken if CC::NP and CC::Z, that is, jump to not-taken if
// CC::P or CC::NZ.
ctx.emit(Inst::jmp_if(CC::P, not_taken));
ctx.emit(Inst::jmp_cond(CC::NZ, not_taken, taken));
}
FloatCC::NotEqual => {
// Jump to taken if CC::P or CC::NZ.
ctx.emit(Inst::jmp_if(CC::P, taken));
ctx.emit(Inst::jmp_cond(CC::NZ, taken, not_taken));
}
_ => {
let cc = CC::from_floatcc(cond_code);
ctx.emit(Inst::jmp_cond(cc, taken, not_taken)); ctx.emit(Inst::jmp_cond(cc, taken, not_taken));
} }
FcmpCondResult::AndConditions(cc1, cc2) => {
ctx.emit(Inst::jmp_if(cc1.invert(), not_taken));
ctx.emit(Inst::jmp_cond(cc2.invert(), not_taken, taken));
}
FcmpCondResult::OrConditions(cc1, cc2) => {
ctx.emit(Inst::jmp_if(cc1, taken));
ctx.emit(Inst::jmp_cond(cc2, taken, not_taken));
}
FcmpCondResult::InvertedEqualOrConditions(_, _) => unreachable!(),
} }
} else if is_int_ty(src_ty) || is_bool_ty(src_ty) { } else if is_int_ty(src_ty) || is_bool_ty(src_ty) {
let src = input_to_reg( let src = input_to_reg(

View File

@@ -4,7 +4,7 @@
use crate::entity::SecondaryMap; use crate::entity::SecondaryMap;
use crate::fx::{FxHashMap, FxHashSet}; use crate::fx::{FxHashMap, FxHashSet};
use crate::inst_predicates::{has_side_effect_or_load_not_get_pinned_reg, is_constant_64bit}; use crate::inst_predicates::{has_lowering_side_effect, is_constant_64bit};
use crate::ir::instructions::BranchInfo; use crate::ir::instructions::BranchInfo;
use crate::ir::types::I64; use crate::ir::types::I64;
use crate::ir::{ use crate::ir::{
@@ -372,7 +372,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
for bb in f.layout.blocks() { for bb in f.layout.blocks() {
cur_color += 1; cur_color += 1;
for inst in f.layout.block_insts(bb) { for inst in f.layout.block_insts(bb) {
let side_effect = has_side_effect_or_load_not_get_pinned_reg(f, inst); let side_effect = has_lowering_side_effect(f, inst);
// Assign colors. A new color is chosen *after* any side-effecting instruction. // Assign colors. A new color is chosen *after* any side-effecting instruction.
inst_colors[inst] = InstColor::new(cur_color); inst_colors[inst] = InstColor::new(cur_color);
@@ -799,15 +799,15 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
ValueDef::Result(src_inst, result_idx) => { ValueDef::Result(src_inst, result_idx) => {
debug!(" -> src inst {}", src_inst); debug!(" -> src inst {}", src_inst);
debug!( debug!(
" -> has side effect: {}", " -> has lowering side effect: {}",
has_side_effect_or_load_not_get_pinned_reg(self.f, src_inst) has_lowering_side_effect(self.f, src_inst)
); );
debug!( debug!(
" -> our color is {:?}, src inst is {:?}", " -> our color is {:?}, src inst is {:?}",
self.inst_color(at_inst), self.inst_color(at_inst),
self.inst_color(src_inst) self.inst_color(src_inst)
); );
if !has_side_effect_or_load_not_get_pinned_reg(self.f, src_inst) if !has_lowering_side_effect(self.f, src_inst)
|| self.inst_color(at_inst) == self.inst_color(src_inst) || self.inst_color(at_inst) == self.inst_color(src_inst)
{ {
Some((src_inst, result_idx)) Some((src_inst, result_idx))
@@ -989,6 +989,8 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
fn use_input_reg(&mut self, input: LowerInput) { fn use_input_reg(&mut self, input: LowerInput) {
debug!("use_input_reg: vreg {:?} is needed", input.reg); debug!("use_input_reg: vreg {:?} is needed", input.reg);
// We may directly return a real (machine) register when we know that register holds the
// result of an opcode (e.g. GetPinnedReg).
if input.reg.is_virtual() { if input.reg.is_virtual() {
self.vreg_needed[input.reg.get_index()] = true; self.vreg_needed[input.reg.get_index()] = true;
} }