x64: Migrate selectif and selectif_spectre_guard to ISLE (#4619)

https://github.com/bytecodealliance/wasmtime/pull/4619
This commit is contained in:
Trevor Elliott
2022-08-05 09:36:11 -07:00
committed by GitHub
parent 1ce9e8aa5f
commit 0c2a48f682
4 changed files with 26 additions and 231 deletions

View File

@@ -3140,7 +3140,6 @@
(ConsumesFlags.ConsumesFlagsSideEffect
(MInst.JmpTableSeq idx tmp1 tmp2 default_target jt_targets)))))
;;;; Comparisons ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(type IcmpCondResult (enum (Condition (producer ProducesFlags) (cc CC))))
@@ -3157,6 +3156,19 @@
(rule (lower_icmp_bool (IcmpCondResult.Condition producer cc))
(with_flags producer (x64_setcc cc)))
;; Emit a conditional move based on the result of an icmp.
(decl select_icmp (IcmpCondResult Value Value) ValueRegs)
;; Ensure that we put the `x` argument into a register for single-register
;; gpr-typed arguments, as we rely on this for the legalization of heap_addr and
;; loading easily computed constants (like 0) from memory is too expensive.
(rule (select_icmp (IcmpCondResult.Condition producer cc) x @ (value_type (is_gpr_type (is_single_register_type ty))) y)
(with_flags producer (cmove ty cc (put_in_gpr x) y)))
;; Otherwise, fall back on the behavior of `cmove_from_values`.
(rule (select_icmp (IcmpCondResult.Condition producer cc) x @ (value_type ty) y)
(with_flags producer (cmove_from_values ty cc x y)))
(decl emit_cmp (IntCC Value Value) IcmpCondResult)
;; For GPR-held values we only need to emit `CMP + SETCC`. We rely here on

View File

@@ -645,21 +645,6 @@ impl Inst {
}
}
pub(crate) fn xmm_cmove(ty: Type, cc: CC, src: RegMem, dst: Writable<Reg>) -> Inst {
debug_assert!(ty == types::F32 || ty == types::F64 || ty.is_vector());
src.assert_regclass_is(RegClass::Float);
debug_assert!(dst.to_reg().class() == RegClass::Float);
let src = XmmMem::new(src).unwrap();
let dst = WritableXmm::from_writable_reg(dst).unwrap();
Inst::XmmCmove {
ty,
cc,
consequent: src,
alternative: dst.to_reg(),
dst,
}
}
pub(crate) fn push64(src: RegMemImm) -> Inst {
src.assert_regclass_is(RegClass::Int);
let src = GprMemImm::new(src).unwrap();

View File

@@ -2945,3 +2945,11 @@
(rule (lower_branch (br_table idx @ (value_type ty) _ _) (jump_table_targets default_target jt_targets))
(side_effect (jmp_table_seq ty idx default_target jt_targets)))
;; Rules for `selectif` and `selectif_spectre_guard` ;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (selectif cc (ifcmp a b) x y))
(select_icmp (emit_cmp cc a b) x y))
(rule (lower (selectif_spectre_guard cc (ifcmp a b) x y))
(select_icmp (emit_cmp cc a b) x y))

View File

@@ -5,8 +5,7 @@ pub(super) mod isle;
use crate::data_value::DataValue;
use crate::ir::{
condcodes::{FloatCC, IntCC},
types, ExternalName, Inst as IRInst, InstructionData, LibCall, Opcode, Type,
condcodes::FloatCC, types, ExternalName, Inst as IRInst, InstructionData, LibCall, Opcode, Type,
};
use crate::isa::x64::abi::*;
use crate::isa::x64::inst::args::*;
@@ -217,40 +216,11 @@ fn extend_input_to_reg<C: LowerCtx<I = Inst>>(
dst.to_reg()
}
/// Returns whether the given input is an immediate that can be properly sign-extended, without any
/// possible side-effect.
fn non_reg_input_to_sext_imm(input: NonRegInput, input_ty: Type) -> Option<u32> {
input.constant.and_then(|x| {
// For i64 instructions (prefixed with REX.W), require that the immediate will sign-extend
// to 64 bits. For other sizes, it doesn't matter and we can just use the plain
// constant.
if input_ty.bytes() != 8 || low32_will_sign_extend_to_64(x) {
Some(x as u32)
} else {
None
}
})
}
fn input_to_imm<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput) -> Option<u64> {
ctx.get_input_as_source_or_const(spec.insn, spec.input)
.constant
}
/// Put the given input into an immediate, a register or a memory operand.
/// Effectful: may mark the given input as used, when returning the register form.
fn input_to_reg_mem_imm<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput) -> RegMemImm {
let input = ctx.get_input_as_source_or_const(spec.insn, spec.input);
let input_ty = ctx.input_ty(spec.insn, spec.input);
match non_reg_input_to_sext_imm(input, input_ty) {
Some(x) => RegMemImm::imm(x),
None => match input_to_reg_mem(ctx, spec) {
RegMem::Reg { reg } => RegMemImm::reg(reg),
RegMem::Mem { addr } => RegMemImm::mem(addr),
},
}
}
/// Emit an instruction to insert a value `src` into a lane of `dst`.
fn emit_insert_lane<C: LowerCtx<I = Inst>>(
ctx: &mut C,
@@ -357,119 +327,6 @@ fn emit_extract_lane<C: LowerCtx<I = Inst>>(
}
}
/// Emits an int comparison instruction.
///
/// Note: make sure that there are no instructions modifying the flags between a call to this
/// function and the use of the flags!
///
/// Takes the condition code that will be tested, and returns
/// the condition code that should be used. This allows us to
/// synthesize comparisons out of multiple instructions for
/// special cases (e.g., 128-bit integers).
fn emit_cmp<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst, cc: IntCC) -> IntCC {
let ty = ctx.input_ty(insn, 0);
let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }];
if ty == types::I128 {
// We need to compare both halves and combine the results appropriately.
let cmp1 = ctx.alloc_tmp(types::I64).only_reg().unwrap();
let cmp2 = ctx.alloc_tmp(types::I64).only_reg().unwrap();
let lhs = put_input_in_regs(ctx, inputs[0]);
let lhs_lo = lhs.regs()[0];
let lhs_hi = lhs.regs()[1];
let rhs = put_input_in_regs(ctx, inputs[1]);
let rhs_lo = RegMemImm::reg(rhs.regs()[0]);
let rhs_hi = RegMemImm::reg(rhs.regs()[1]);
match cc {
IntCC::Equal => {
ctx.emit(Inst::cmp_rmi_r(OperandSize::Size64, rhs_hi, lhs_hi));
ctx.emit(Inst::setcc(CC::Z, cmp1));
ctx.emit(Inst::cmp_rmi_r(OperandSize::Size64, rhs_lo, lhs_lo));
ctx.emit(Inst::setcc(CC::Z, cmp2));
ctx.emit(Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::And,
RegMemImm::reg(cmp1.to_reg()),
cmp2,
));
ctx.emit(Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::And,
RegMemImm::imm(1),
cmp2,
));
IntCC::NotEqual
}
IntCC::NotEqual => {
ctx.emit(Inst::cmp_rmi_r(OperandSize::Size64, rhs_hi, lhs_hi));
ctx.emit(Inst::setcc(CC::NZ, cmp1));
ctx.emit(Inst::cmp_rmi_r(OperandSize::Size64, rhs_lo, lhs_lo));
ctx.emit(Inst::setcc(CC::NZ, cmp2));
ctx.emit(Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::Or,
RegMemImm::reg(cmp1.to_reg()),
cmp2,
));
ctx.emit(Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::And,
RegMemImm::imm(1),
cmp2,
));
IntCC::NotEqual
}
IntCC::SignedLessThan
| IntCC::SignedLessThanOrEqual
| IntCC::SignedGreaterThan
| IntCC::SignedGreaterThanOrEqual
| IntCC::UnsignedLessThan
| IntCC::UnsignedLessThanOrEqual
| IntCC::UnsignedGreaterThan
| IntCC::UnsignedGreaterThanOrEqual => {
// Result = (lhs_hi <> rhs_hi) ||
// (lhs_hi == rhs_hi && lhs_lo <> rhs_lo)
let cmp3 = ctx.alloc_tmp(types::I64).only_reg().unwrap();
ctx.emit(Inst::cmp_rmi_r(OperandSize::Size64, rhs_hi, lhs_hi));
ctx.emit(Inst::setcc(CC::from_intcc(cc.without_equal()), cmp1));
ctx.emit(Inst::setcc(CC::Z, cmp2));
ctx.emit(Inst::cmp_rmi_r(OperandSize::Size64, rhs_lo, lhs_lo));
ctx.emit(Inst::setcc(CC::from_intcc(cc.unsigned()), cmp3));
ctx.emit(Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::And,
RegMemImm::reg(cmp2.to_reg()),
cmp3,
));
ctx.emit(Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::Or,
RegMemImm::reg(cmp1.to_reg()),
cmp3,
));
ctx.emit(Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::And,
RegMemImm::imm(1),
cmp3,
));
IntCC::NotEqual
}
_ => panic!("Unhandled IntCC in I128 comparison: {:?}", cc),
}
} else {
// TODO Try to commute the operands (and invert the condition) if one is an immediate.
let lhs = put_input_in_reg(ctx, inputs[0]);
let rhs = input_to_reg_mem_imm(ctx, inputs[1]);
// Cranelift's icmp semantics want to compare lhs - rhs, while Intel gives
// us dst - src at the machine instruction level, so invert operands.
ctx.emit(Inst::cmp_rmi_r(OperandSize::from_ty(ty), rhs, lhs));
cc
}
}
fn emit_vm_call<C: LowerCtx<I = Inst>>(
ctx: &mut C,
flags: &Flags,
@@ -632,37 +489,6 @@ fn lower_to_amode<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput, offset: i
Amode::imm_reg(offset as u32, input).with_flags(flags)
}
fn emit_moves<C: LowerCtx<I = Inst>>(
ctx: &mut C,
dst: ValueRegs<Writable<Reg>>,
src: ValueRegs<Reg>,
ty: Type,
) {
let (_, tys) = Inst::rc_for_type(ty).unwrap();
for ((dst, src), ty) in dst.regs().iter().zip(src.regs().iter()).zip(tys.iter()) {
ctx.emit(Inst::gen_move(*dst, *src, *ty));
}
}
fn emit_cmoves<C: LowerCtx<I = Inst>>(
ctx: &mut C,
size: u8,
cc: CC,
src: ValueRegs<Reg>,
dst: ValueRegs<Writable<Reg>>,
) {
let size = size / src.len() as u8;
let size = u8::max(size, 4); // at least 32 bits
for (dst, src) in dst.regs().iter().zip(src.regs().iter()) {
ctx.emit(Inst::cmove(
OperandSize::from_bytes(size.into()),
cc,
RegMem::reg(*src),
*dst,
));
}
}
//=============================================================================
// Top-level instruction lowering entry point, for one instruction.
@@ -797,7 +623,10 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::Trapff
| Opcode::GetFramePointer
| Opcode::GetStackPointer
| Opcode::GetReturnAddress => {
| Opcode::GetReturnAddress
| Opcode::Select
| Opcode::Selectif
| Opcode::SelectifSpectreGuard => {
implemented_in_isle(ctx);
}
@@ -1881,45 +1710,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(inst);
}
Opcode::Select => {
implemented_in_isle(ctx);
}
Opcode::Selectif | Opcode::SelectifSpectreGuard => {
let lhs = put_input_in_regs(ctx, inputs[1]);
let rhs = put_input_in_regs(ctx, inputs[2]);
let dst = get_output_reg(ctx, outputs[0]);
let ty = ctx.output_ty(insn, 0);
// Verification ensures that the input is always a single-def ifcmp.
let cmp_insn = ctx
.get_input_as_source_or_const(inputs[0].insn, inputs[0].input)
.inst
.as_inst()
.unwrap()
.0;
debug_assert_eq!(ctx.data(cmp_insn).opcode(), Opcode::Ifcmp);
let cond_code = ctx.data(insn).cond_code().unwrap();
let cond_code = emit_cmp(ctx, cmp_insn, cond_code);
let cc = CC::from_intcc(cond_code);
if is_int_or_ref_ty(ty) || ty == types::I128 {
let size = ty.bytes() as u8;
emit_moves(ctx, dst, rhs, ty);
emit_cmoves(ctx, size, cc, lhs, dst);
} else {
debug_assert!(ty == types::F32 || ty == types::F64);
emit_moves(ctx, dst, rhs, ty);
ctx.emit(Inst::xmm_cmove(
ty,
cc,
RegMem::reg(lhs.only_reg().unwrap()),
dst.only_reg().unwrap(),
));
}
}
Opcode::Udiv | Opcode::Urem | Opcode::Sdiv | Opcode::Srem => {
let kind = match op {
Opcode::Udiv => DivOrRemKind::UnsignedDiv,