x64: Migrate selectif and selectif_spectre_guard to ISLE (#4619)

https://github.com/bytecodealliance/wasmtime/pull/4619
2022-08-05 09:36:11 -07:00
parent 1ce9e8aa5f
commit 0c2a48f682
4 changed files with 26 additions and 231 deletions
--- a/cranelift/codegen/src/isa/x64/inst.isle
+++ b/cranelift/codegen/src/isa/x64/inst.isle
@@ -3140,7 +3140,6 @@
          (ConsumesFlags.ConsumesFlagsSideEffect
            (MInst.JmpTableSeq idx tmp1 tmp2 default_target jt_targets)))))

-
 ;;;; Comparisons ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (type IcmpCondResult (enum (Condition (producer ProducesFlags) (cc CC))))
@@ -3157,6 +3156,19 @@
 (rule (lower_icmp_bool (IcmpCondResult.Condition producer cc))
      (with_flags producer (x64_setcc cc)))

+;; Emit a conditional move based on the result of an icmp.
+(decl select_icmp (IcmpCondResult Value Value) ValueRegs)
+
+;; Ensure that we put the `x` argument into a register for single-register
+;; gpr-typed arguments, as we rely on this for the legalization of heap_addr and
+;; loading easily computed constants (like 0) from memory is too expensive.
+(rule (select_icmp (IcmpCondResult.Condition producer cc) x @ (value_type (is_gpr_type (is_single_register_type ty))) y)
+      (with_flags producer (cmove ty cc (put_in_gpr x) y)))
+
+;; Otherwise, fall back on the behavior of `cmove_from_values`.
+(rule (select_icmp (IcmpCondResult.Condition producer cc) x @ (value_type ty) y)
+      (with_flags producer (cmove_from_values ty cc x y)))
+
 (decl emit_cmp (IntCC Value Value) IcmpCondResult)

 ;; For GPR-held values we only need to emit `CMP + SETCC`. We rely here on
--- a/cranelift/codegen/src/isa/x64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/x64/inst/mod.rs
@@ -645,21 +645,6 @@ impl Inst {
        }
    }

-    pub(crate) fn xmm_cmove(ty: Type, cc: CC, src: RegMem, dst: Writable<Reg>) -> Inst {
-        debug_assert!(ty == types::F32 || ty == types::F64 || ty.is_vector());
-        src.assert_regclass_is(RegClass::Float);
-        debug_assert!(dst.to_reg().class() == RegClass::Float);
-        let src = XmmMem::new(src).unwrap();
-        let dst = WritableXmm::from_writable_reg(dst).unwrap();
-        Inst::XmmCmove {
-            ty,
-            cc,
-            consequent: src,
-            alternative: dst.to_reg(),
-            dst,
-        }
-    }
-
    pub(crate) fn push64(src: RegMemImm) -> Inst {
        src.assert_regclass_is(RegClass::Int);
        let src = GprMemImm::new(src).unwrap();
--- a/cranelift/codegen/src/isa/x64/lower.isle
+++ b/cranelift/codegen/src/isa/x64/lower.isle
@@ -2945,3 +2945,11 @@

 (rule (lower_branch (br_table idx @ (value_type ty) _ _) (jump_table_targets default_target jt_targets))
      (side_effect (jmp_table_seq ty idx default_target jt_targets)))
+
+;; Rules for `selectif` and `selectif_spectre_guard` ;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (selectif cc (ifcmp a b) x y))
+      (select_icmp (emit_cmp cc a b) x y))
+
+(rule (lower (selectif_spectre_guard cc (ifcmp a b) x y))
+      (select_icmp (emit_cmp cc a b) x y))
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -5,8 +5,7 @@ pub(super) mod isle;

 use crate::data_value::DataValue;
 use crate::ir::{
-    condcodes::{FloatCC, IntCC},
-    types, ExternalName, Inst as IRInst, InstructionData, LibCall, Opcode, Type,
+    condcodes::FloatCC, types, ExternalName, Inst as IRInst, InstructionData, LibCall, Opcode, Type,
 };
 use crate::isa::x64::abi::*;
 use crate::isa::x64::inst::args::*;
@@ -217,40 +216,11 @@ fn extend_input_to_reg<C: LowerCtx<I = Inst>>(
    dst.to_reg()
 }

-/// Returns whether the given input is an immediate that can be properly sign-extended, without any
-/// possible side-effect.
-fn non_reg_input_to_sext_imm(input: NonRegInput, input_ty: Type) -> Option<u32> {
-    input.constant.and_then(|x| {
-        // For i64 instructions (prefixed with REX.W), require that the immediate will sign-extend
-        // to 64 bits. For other sizes, it doesn't matter and we can just use the plain
-        // constant.
-        if input_ty.bytes() != 8 || low32_will_sign_extend_to_64(x) {
-            Some(x as u32)
-        } else {
-            None
-        }
-    })
-}
-
 fn input_to_imm<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput) -> Option<u64> {
    ctx.get_input_as_source_or_const(spec.insn, spec.input)
        .constant
 }

-/// Put the given input into an immediate, a register or a memory operand.
-/// Effectful: may mark the given input as used, when returning the register form.
-fn input_to_reg_mem_imm<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput) -> RegMemImm {
-    let input = ctx.get_input_as_source_or_const(spec.insn, spec.input);
-    let input_ty = ctx.input_ty(spec.insn, spec.input);
-    match non_reg_input_to_sext_imm(input, input_ty) {
-        Some(x) => RegMemImm::imm(x),
-        None => match input_to_reg_mem(ctx, spec) {
-            RegMem::Reg { reg } => RegMemImm::reg(reg),
-            RegMem::Mem { addr } => RegMemImm::mem(addr),
-        },
-    }
-}
-
 /// Emit an instruction to insert a value `src` into a lane of `dst`.
 fn emit_insert_lane<C: LowerCtx<I = Inst>>(
    ctx: &mut C,
@@ -357,119 +327,6 @@ fn emit_extract_lane<C: LowerCtx<I = Inst>>(
    }
 }

-/// Emits an int comparison instruction.
-///
-/// Note: make sure that there are no instructions modifying the flags between a call to this
-/// function and the use of the flags!
-///
-/// Takes the condition code that will be tested, and returns
-/// the condition code that should be used. This allows us to
-/// synthesize comparisons out of multiple instructions for
-/// special cases (e.g., 128-bit integers).
-fn emit_cmp<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst, cc: IntCC) -> IntCC {
-    let ty = ctx.input_ty(insn, 0);
-
-    let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }];
-
-    if ty == types::I128 {
-        // We need to compare both halves and combine the results appropriately.
-        let cmp1 = ctx.alloc_tmp(types::I64).only_reg().unwrap();
-        let cmp2 = ctx.alloc_tmp(types::I64).only_reg().unwrap();
-        let lhs = put_input_in_regs(ctx, inputs[0]);
-        let lhs_lo = lhs.regs()[0];
-        let lhs_hi = lhs.regs()[1];
-        let rhs = put_input_in_regs(ctx, inputs[1]);
-        let rhs_lo = RegMemImm::reg(rhs.regs()[0]);
-        let rhs_hi = RegMemImm::reg(rhs.regs()[1]);
-        match cc {
-            IntCC::Equal => {
-                ctx.emit(Inst::cmp_rmi_r(OperandSize::Size64, rhs_hi, lhs_hi));
-                ctx.emit(Inst::setcc(CC::Z, cmp1));
-                ctx.emit(Inst::cmp_rmi_r(OperandSize::Size64, rhs_lo, lhs_lo));
-                ctx.emit(Inst::setcc(CC::Z, cmp2));
-                ctx.emit(Inst::alu_rmi_r(
-                    OperandSize::Size64,
-                    AluRmiROpcode::And,
-                    RegMemImm::reg(cmp1.to_reg()),
-                    cmp2,
-                ));
-                ctx.emit(Inst::alu_rmi_r(
-                    OperandSize::Size64,
-                    AluRmiROpcode::And,
-                    RegMemImm::imm(1),
-                    cmp2,
-                ));
-                IntCC::NotEqual
-            }
-            IntCC::NotEqual => {
-                ctx.emit(Inst::cmp_rmi_r(OperandSize::Size64, rhs_hi, lhs_hi));
-                ctx.emit(Inst::setcc(CC::NZ, cmp1));
-                ctx.emit(Inst::cmp_rmi_r(OperandSize::Size64, rhs_lo, lhs_lo));
-                ctx.emit(Inst::setcc(CC::NZ, cmp2));
-                ctx.emit(Inst::alu_rmi_r(
-                    OperandSize::Size64,
-                    AluRmiROpcode::Or,
-                    RegMemImm::reg(cmp1.to_reg()),
-                    cmp2,
-                ));
-                ctx.emit(Inst::alu_rmi_r(
-                    OperandSize::Size64,
-                    AluRmiROpcode::And,
-                    RegMemImm::imm(1),
-                    cmp2,
-                ));
-                IntCC::NotEqual
-            }
-            IntCC::SignedLessThan
-            | IntCC::SignedLessThanOrEqual
-            | IntCC::SignedGreaterThan
-            | IntCC::SignedGreaterThanOrEqual
-            | IntCC::UnsignedLessThan
-            | IntCC::UnsignedLessThanOrEqual
-            | IntCC::UnsignedGreaterThan
-            | IntCC::UnsignedGreaterThanOrEqual => {
-                // Result = (lhs_hi <> rhs_hi) ||
-                //          (lhs_hi == rhs_hi && lhs_lo <> rhs_lo)
-                let cmp3 = ctx.alloc_tmp(types::I64).only_reg().unwrap();
-                ctx.emit(Inst::cmp_rmi_r(OperandSize::Size64, rhs_hi, lhs_hi));
-                ctx.emit(Inst::setcc(CC::from_intcc(cc.without_equal()), cmp1));
-                ctx.emit(Inst::setcc(CC::Z, cmp2));
-                ctx.emit(Inst::cmp_rmi_r(OperandSize::Size64, rhs_lo, lhs_lo));
-                ctx.emit(Inst::setcc(CC::from_intcc(cc.unsigned()), cmp3));
-                ctx.emit(Inst::alu_rmi_r(
-                    OperandSize::Size64,
-                    AluRmiROpcode::And,
-                    RegMemImm::reg(cmp2.to_reg()),
-                    cmp3,
-                ));
-                ctx.emit(Inst::alu_rmi_r(
-                    OperandSize::Size64,
-                    AluRmiROpcode::Or,
-                    RegMemImm::reg(cmp1.to_reg()),
-                    cmp3,
-                ));
-                ctx.emit(Inst::alu_rmi_r(
-                    OperandSize::Size64,
-                    AluRmiROpcode::And,
-                    RegMemImm::imm(1),
-                    cmp3,
-                ));
-                IntCC::NotEqual
-            }
-            _ => panic!("Unhandled IntCC in I128 comparison: {:?}", cc),
-        }
-    } else {
-        // TODO Try to commute the operands (and invert the condition) if one is an immediate.
-        let lhs = put_input_in_reg(ctx, inputs[0]);
-        let rhs = input_to_reg_mem_imm(ctx, inputs[1]);
-
-        // Cranelift's icmp semantics want to compare lhs - rhs, while Intel gives
-        // us dst - src at the machine instruction level, so invert operands.
-        ctx.emit(Inst::cmp_rmi_r(OperandSize::from_ty(ty), rhs, lhs));
-        cc
-    }
-}
-
 fn emit_vm_call<C: LowerCtx<I = Inst>>(
    ctx: &mut C,
    flags: &Flags,
@@ -632,37 +489,6 @@ fn lower_to_amode<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput, offset: i
    Amode::imm_reg(offset as u32, input).with_flags(flags)
 }

-fn emit_moves<C: LowerCtx<I = Inst>>(
-    ctx: &mut C,
-    dst: ValueRegs<Writable<Reg>>,
-    src: ValueRegs<Reg>,
-    ty: Type,
-) {
-    let (_, tys) = Inst::rc_for_type(ty).unwrap();
-    for ((dst, src), ty) in dst.regs().iter().zip(src.regs().iter()).zip(tys.iter()) {
-        ctx.emit(Inst::gen_move(*dst, *src, *ty));
-    }
-}
-
-fn emit_cmoves<C: LowerCtx<I = Inst>>(
-    ctx: &mut C,
-    size: u8,
-    cc: CC,
-    src: ValueRegs<Reg>,
-    dst: ValueRegs<Writable<Reg>>,
-) {
-    let size = size / src.len() as u8;
-    let size = u8::max(size, 4); // at least 32 bits
-    for (dst, src) in dst.regs().iter().zip(src.regs().iter()) {
-        ctx.emit(Inst::cmove(
-            OperandSize::from_bytes(size.into()),
-            cc,
-            RegMem::reg(*src),
-            *dst,
-        ));
-    }
-}
-
 //=============================================================================
 // Top-level instruction lowering entry point, for one instruction.

@@ -797,7 +623,10 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        | Opcode::Trapff
        | Opcode::GetFramePointer
        | Opcode::GetStackPointer
-        | Opcode::GetReturnAddress => {
+        | Opcode::GetReturnAddress
+        | Opcode::Select
+        | Opcode::Selectif
+        | Opcode::SelectifSpectreGuard => {
            implemented_in_isle(ctx);
        }

@@ -1881,45 +1710,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            ctx.emit(inst);
        }

-        Opcode::Select => {
-            implemented_in_isle(ctx);
-        }
-
-        Opcode::Selectif | Opcode::SelectifSpectreGuard => {
-            let lhs = put_input_in_regs(ctx, inputs[1]);
-            let rhs = put_input_in_regs(ctx, inputs[2]);
-            let dst = get_output_reg(ctx, outputs[0]);
-            let ty = ctx.output_ty(insn, 0);
-
-            // Verification ensures that the input is always a single-def ifcmp.
-            let cmp_insn = ctx
-                .get_input_as_source_or_const(inputs[0].insn, inputs[0].input)
-                .inst
-                .as_inst()
-                .unwrap()
-                .0;
-            debug_assert_eq!(ctx.data(cmp_insn).opcode(), Opcode::Ifcmp);
-            let cond_code = ctx.data(insn).cond_code().unwrap();
-            let cond_code = emit_cmp(ctx, cmp_insn, cond_code);
-
-            let cc = CC::from_intcc(cond_code);
-
-            if is_int_or_ref_ty(ty) || ty == types::I128 {
-                let size = ty.bytes() as u8;
-                emit_moves(ctx, dst, rhs, ty);
-                emit_cmoves(ctx, size, cc, lhs, dst);
-            } else {
-                debug_assert!(ty == types::F32 || ty == types::F64);
-                emit_moves(ctx, dst, rhs, ty);
-                ctx.emit(Inst::xmm_cmove(
-                    ty,
-                    cc,
-                    RegMem::reg(lhs.only_reg().unwrap()),
-                    dst.only_reg().unwrap(),
-                ));
-            }
-        }
-
        Opcode::Udiv | Opcode::Urem | Opcode::Sdiv | Opcode::Srem => {
            let kind = match op {
                Opcode::Udiv => DivOrRemKind::UnsignedDiv,