Add a DynRex recipe type for x86, decreasing the number of recipes (#1298)

This patch adds a third mode for templates: REX inference is requestable at template instantiation time. This reduces the number of recipes by removing rex()/nonrex() redundancy for many instructions.
2019-12-19 15:49:34 -07:00
parent b486289ab8
commit cf9e762f16
13 changed files with 875 additions and 514 deletions
--- a/cranelift/codegen/meta/src/cdsl/recipes.rs
+++ b/cranelift/codegen/meta/src/cdsl/recipes.rs
@@ -172,7 +172,7 @@ pub(crate) struct EncodingRecipeBuilder {
    pub base_size: u64,
    pub operands_in: Option<Vec<OperandConstraint>>,
    pub operands_out: Option<Vec<OperandConstraint>>,
-    compute_size: Option<&'static str>,
+    pub compute_size: Option<&'static str>,
    pub branch_range: Option<BranchRange>,
    pub emit: Option<String>,
    clobbers_flags: Option<bool>,
--- a/cranelift/codegen/meta/src/isa/x86/encodings.rs
+++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs
@@ -140,32 +140,59 @@ impl PerCpuModeEncodings {
        self.enc64.push(encoding);
    }

+    /// Adds I32/I64 encodings as appropriate for a typed instruction.
+    /// The REX prefix is always inferred at runtime.
+    ///
    /// Add encodings for `inst.i32` to X86_32.
-    /// Add encodings for `inst.i32` to X86_64 with and without REX.
+    /// Add encodings for `inst.i32` to X86_64 with optional, inferred REX.
    /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
    fn enc_i32_i64(&mut self, inst: impl Into<InstSpec>, template: Template) {
        let inst: InstSpec = inst.into();
+
+        // I32 on x86: no REX prefix.
+        self.enc32(inst.bind(I32), template.infer_rex());
+
+        // I32 on x86_64: REX.W unset; REX.RXB determined at runtime from registers.
+        self.enc64(inst.bind(I32), template.infer_rex());
+
+        // I64 on x86_64: REX.W set; REX.RXB determined at runtime from registers.
+        self.enc64(inst.bind(I64), template.infer_rex().w());
+    }
+
+    /// Adds I32/I64 encodings as appropriate for a typed instruction.
+    /// All variants of REX prefix are explicitly emitted, not inferred.
+    ///
+    /// Add encodings for `inst.i32` to X86_32.
+    /// Add encodings for `inst.i32` to X86_64 with and without REX.
+    /// Add encodings for `inst.i64` to X86_64 with and without REX.
+    fn enc_i32_i64_explicit_rex(&mut self, inst: impl Into<InstSpec>, template: Template) {
+        let inst: InstSpec = inst.into();
        self.enc32(inst.bind(I32), template.nonrex());

-        // REX-less encoding must come after REX encoding so we don't use it by default. Otherwise
-        // reg-alloc would never use r8 and up.
+        // REX-less encoding must come after REX encoding so we don't use it by default.
+        // Otherwise reg-alloc would never use r8 and up.
        self.enc64(inst.bind(I32), template.rex());
        self.enc64(inst.bind(I32), template.nonrex());
        self.enc64(inst.bind(I64), template.rex().w());
    }

-    /// Add encodings for `inst.b32` to X86_32.
-    /// Add encodings for `inst.b32` to X86_64 with and without REX.
-    /// Add encodings for `inst.b64` to X86_64 with a REX.W prefix.
+    /// Adds B32/B64 encodings as appropriate for a typed instruction.
+    /// The REX prefix is always inferred at runtime.
+    ///
+    /// Adds encoding for `inst.b32` to X86_32.
+    /// Adds encoding for `inst.b32` to X86_64 with optional, inferred REX.
+    /// Adds encoding for `inst.b64` to X86_64 with a REX.W prefix.
    fn enc_b32_b64(&mut self, inst: impl Into<InstSpec>, template: Template) {
        let inst: InstSpec = inst.into();
-        self.enc32(inst.bind(B32), template.nonrex());

-        // REX-less encoding must come after REX encoding so we don't use it by default. Otherwise
-        // reg-alloc would never use r8 and up.
-        self.enc64(inst.bind(B32), template.rex());
-        self.enc64(inst.bind(B32), template.nonrex());
-        self.enc64(inst.bind(B64), template.rex().w());
+        // B32 on x86: no REX prefix.
+        self.enc32(inst.bind(B32), template.infer_rex());
+
+        // B32 on x86_64: REX.W unset; REX.RXB determined at runtime from registers.
+        self.enc64(inst.bind(B32), template.infer_rex());
+
+        // B64 on x86_64: REX.W set; REX.RXB determined at runtime from registers.
+        self.enc64(inst.bind(B64), template.infer_rex().w());
    }

    /// Add encodings for `inst.i32` to X86_32.
@@ -994,8 +1021,8 @@ pub(crate) fn define(
        e.enc_x86_64(istore8.bind(I64).bind(Any), recipe.opcodes(&MOV_BYTE_STORE));
    }

-    e.enc_i32_i64(spill, rec_spillSib32.opcodes(&MOV_STORE));
-    e.enc_i32_i64(regspill, rec_regspill32.opcodes(&MOV_STORE));
+    e.enc_i32_i64_explicit_rex(spill, rec_spillSib32.opcodes(&MOV_STORE));
+    e.enc_i32_i64_explicit_rex(regspill, rec_regspill32.opcodes(&MOV_STORE));
    e.enc_r32_r64_rex_only(spill, rec_spillSib32.opcodes(&MOV_STORE));
    e.enc_r32_r64_rex_only(regspill, rec_regspill32.opcodes(&MOV_STORE));

@@ -1020,8 +1047,8 @@ pub(crate) fn define(
        e.enc_i32_i64_ld_st(sload8, true, recipe.opcodes(&MOVSX_BYTE));
    }

-    e.enc_i32_i64(fill, rec_fillSib32.opcodes(&MOV_LOAD));
-    e.enc_i32_i64(regfill, rec_regfill32.opcodes(&MOV_LOAD));
+    e.enc_i32_i64_explicit_rex(fill, rec_fillSib32.opcodes(&MOV_LOAD));
+    e.enc_i32_i64_explicit_rex(regfill, rec_regfill32.opcodes(&MOV_LOAD));
    e.enc_r32_r64_rex_only(fill, rec_fillSib32.opcodes(&MOV_LOAD));
    e.enc_r32_r64_rex_only(regfill, rec_regfill32.opcodes(&MOV_LOAD));

--- a/cranelift/codegen/meta/src/isa/x86/recipes.rs
+++ b/cranelift/codegen/meta/src/isa/x86/recipes.rs
--- a/cranelift/codegen/shared/src/isa/x86/encoding_bits.rs
+++ b/cranelift/codegen/shared/src/isa/x86/encoding_bits.rs
@@ -57,6 +57,24 @@ impl EncodingBits {
        new
    }

+    /// Returns a copy of the EncodingBits with the RRR bits set.
+    #[inline]
+    pub fn with_rrr(self, rrr: u8) -> Self {
+        debug_assert_eq!(u8::from(self.rrr()), 0);
+        let mut enc = self.clone();
+        enc.write(RRR, rrr.into());
+        enc
+    }
+
+    /// Returns a copy of the EncodingBits with the REX.W bit set.
+    #[inline]
+    pub fn with_rex_w(self) -> Self {
+        debug_assert_eq!(self.rex_w(), 0);
+        let mut enc = self.clone();
+        enc.write(REX_W, 1);
+        enc
+    }
+
    /// Returns the raw bits.
    #[inline]
    pub fn bits(self) -> u16 {
--- a/cranelift/codegen/src/isa/x86/binemit.rs
+++ b/cranelift/codegen/src/isa/x86/binemit.rs
@@ -61,6 +61,12 @@ fn rex3(rm: RegUnit, reg: RegUnit, index: RegUnit) -> u8 {
    BASE_REX | b | (x << 1) | (r << 2)
 }

+/// Determines whether a REX prefix should be emitted.
+#[inline]
+fn needs_rex(bits: u16, rex: u8) -> bool {
+    rex != BASE_REX || u8::from(EncodingBits::from(bits).rex_w()) == 1
+}
+
 // Emit a REX prefix.
 //
 // The R, X, and B bits are computed from registers using the functions above. The W bit is
@@ -80,11 +86,20 @@ fn put_op1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {

 // Emit a single-byte opcode with REX prefix.
 fn put_rexop1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
-    debug_assert_eq!(bits & 0x0f00, 0, "Invalid encoding bits for Op1*");
+    debug_assert_eq!(bits & 0x0f00, 0, "Invalid encoding bits for RexOp1*");
    rex_prefix(bits, rex, sink);
    sink.put1(bits as u8);
 }

+/// Emit a single-byte opcode with inferred REX prefix.
+fn put_dynrexop1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x0f00, 0, "Invalid encoding bits for DynRexOp1*");
+    if needs_rex(bits, rex) {
+        rex_prefix(bits, rex, sink);
+    }
+    sink.put1(bits as u8);
+}
+
 // Emit two-byte opcode: 0F XX
 fn put_op2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
    debug_assert_eq!(bits & 0x8f00, 0x0400, "Invalid encoding bits for Op2*");
@@ -101,6 +116,20 @@ fn put_rexop2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
    sink.put1(bits as u8);
 }

+/// Emit two-byte opcode: 0F XX with inferred REX prefix.
+fn put_dynrexop2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(
+        bits & 0x0f00,
+        0x0400,
+        "Invalid encoding bits for DynRexOp2*"
+    );
+    if needs_rex(bits, rex) {
+        rex_prefix(bits, rex, sink);
+    }
+    sink.put1(0x0f);
+    sink.put1(bits as u8);
+}
+
 // Emit single-byte opcode with mandatory prefix.
 fn put_mp1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
    debug_assert_eq!(bits & 0x8c00, 0, "Invalid encoding bits for Mp1*");
@@ -112,7 +141,7 @@ fn put_mp1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {

 // Emit single-byte opcode with mandatory prefix and REX.
 fn put_rexmp1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
-    debug_assert_eq!(bits & 0x0c00, 0, "Invalid encoding bits for Mp1*");
+    debug_assert_eq!(bits & 0x0c00, 0, "Invalid encoding bits for RexMp1*");
    let enc = EncodingBits::from(bits);
    sink.put1(PREFIX[(enc.pp() - 1) as usize]);
    rex_prefix(bits, rex, sink);
@@ -131,7 +160,7 @@ fn put_mp2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {

 // Emit two-byte opcode (0F XX) with mandatory prefix and REX.
 fn put_rexmp2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
-    debug_assert_eq!(bits & 0x0c00, 0x0400, "Invalid encoding bits for Mp2*");
+    debug_assert_eq!(bits & 0x0c00, 0x0400, "Invalid encoding bits for RexMp2*");
    let enc = EncodingBits::from(bits);
    sink.put1(PREFIX[(enc.pp() - 1) as usize]);
    rex_prefix(bits, rex, sink);
@@ -139,6 +168,22 @@ fn put_rexmp2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
    sink.put1(bits as u8);
 }

+/// Emit two-byte opcode (0F XX) with mandatory prefix and inferred REX.
+fn put_dynrexmp2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(
+        bits & 0x0c00,
+        0x0400,
+        "Invalid encoding bits for DynRexMp2*"
+    );
+    let enc = EncodingBits::from(bits);
+    sink.put1(PREFIX[(enc.pp() - 1) as usize]);
+    if needs_rex(bits, rex) {
+        rex_prefix(bits, rex, sink);
+    }
+    sink.put1(0x0f);
+    sink.put1(bits as u8);
+}
+
 // Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix.
 fn put_mp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
    debug_assert_eq!(bits & 0x8800, 0x0800, "Invalid encoding bits for Mp3*");
@@ -152,7 +197,7 @@ fn put_mp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {

 // Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix and REX
 fn put_rexmp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
-    debug_assert_eq!(bits & 0x0800, 0x0800, "Invalid encoding bits for Mp3*");
+    debug_assert_eq!(bits & 0x0800, 0x0800, "Invalid encoding bits for RexMp3*");
    let enc = EncodingBits::from(bits);
    sink.put1(PREFIX[(enc.pp() - 1) as usize]);
    rex_prefix(bits, rex, sink);
--- a/cranelift/codegen/src/isa/x86/enc_tables.rs
+++ b/cranelift/codegen/src/isa/x86/enc_tables.rs
@@ -16,9 +16,20 @@ use crate::isa::{self, TargetIsa};
 use crate::predicates;
 use crate::regalloc::RegDiversions;

+use cranelift_codegen_shared::isa::x86::EncodingBits;
+
 include!(concat!(env!("OUT_DIR"), "/encoding-x86.rs"));
 include!(concat!(env!("OUT_DIR"), "/legalize-x86.rs"));

+/// Whether the REX prefix is needed for encoding extended registers (via REX.RXB).
+///
+/// Normal x86 instructions have only 3 bits for encoding a register.
+/// The REX prefix adds REX.R, REX,X, and REX.B bits, interpreted as fourth bits.
+pub fn is_extended_reg(reg: RegUnit) -> bool {
+    // Extended registers have the fourth bit set.
+    reg as u8 & 0b1000 != 0
+}
+
 pub fn needs_sib_byte(reg: RegUnit) -> bool {
    reg == RU::r12 as RegUnit || reg == RU::rsp as RegUnit
 }
@@ -29,74 +40,179 @@ pub fn needs_sib_byte_or_offset(reg: RegUnit) -> bool {
    needs_sib_byte(reg) || needs_offset(reg)
 }

-fn additional_size_if(
+fn test_input(
    op_index: usize,
    inst: Inst,
    divert: &RegDiversions,
    func: &Function,
    condition_func: fn(RegUnit) -> bool,
-) -> u8 {
-    let addr_reg = divert.reg(func.dfg.inst_args(inst)[op_index], &func.locations);
-    if condition_func(addr_reg) {
-        1
-    } else {
-        0
-    }
+) -> bool {
+    let in_reg = divert.reg(func.dfg.inst_args(inst)[op_index], &func.locations);
+    condition_func(in_reg)
 }

-fn size_plus_maybe_offset_for_in_reg_0(
-    sizing: &RecipeSizing,
-    _enc: Encoding,
+fn test_result(
+    result_index: usize,
    inst: Inst,
    divert: &RegDiversions,
    func: &Function,
-) -> u8 {
-    sizing.base_size + additional_size_if(0, inst, divert, func, needs_offset)
+    condition_func: fn(RegUnit) -> bool,
+) -> bool {
+    let out_reg = divert.reg(func.dfg.inst_results(inst)[result_index], &func.locations);
+    condition_func(out_reg)
 }
-fn size_plus_maybe_offset_for_in_reg_1(
+
+fn size_plus_maybe_offset_for_inreg_0(
    sizing: &RecipeSizing,
    _enc: Encoding,
    inst: Inst,
    divert: &RegDiversions,
    func: &Function,
 ) -> u8 {
-    sizing.base_size + additional_size_if(1, inst, divert, func, needs_offset)
+    let needs_offset = test_input(0, inst, divert, func, needs_offset);
+    sizing.base_size + if needs_offset { 1 } else { 0 }
 }
-fn size_plus_maybe_sib_for_in_reg_0(
+fn size_plus_maybe_offset_for_inreg_1(
    sizing: &RecipeSizing,
    _enc: Encoding,
    inst: Inst,
    divert: &RegDiversions,
    func: &Function,
 ) -> u8 {
-    sizing.base_size + additional_size_if(0, inst, divert, func, needs_sib_byte)
+    let needs_offset = test_input(1, inst, divert, func, needs_offset);
+    sizing.base_size + if needs_offset { 1 } else { 0 }
 }
-fn size_plus_maybe_sib_for_in_reg_1(
+fn size_plus_maybe_sib_for_inreg_0(
    sizing: &RecipeSizing,
    _enc: Encoding,
    inst: Inst,
    divert: &RegDiversions,
    func: &Function,
 ) -> u8 {
-    sizing.base_size + additional_size_if(1, inst, divert, func, needs_sib_byte)
+    let needs_sib = test_input(0, inst, divert, func, needs_sib_byte);
+    sizing.base_size + if needs_sib { 1 } else { 0 }
 }
-fn size_plus_maybe_sib_or_offset_for_in_reg_0(
+fn size_plus_maybe_sib_for_inreg_1(
    sizing: &RecipeSizing,
    _enc: Encoding,
    inst: Inst,
    divert: &RegDiversions,
    func: &Function,
 ) -> u8 {
-    sizing.base_size + additional_size_if(0, inst, divert, func, needs_sib_byte_or_offset)
+    let needs_sib = test_input(1, inst, divert, func, needs_sib_byte);
+    sizing.base_size + if needs_sib { 1 } else { 0 }
 }
-fn size_plus_maybe_sib_or_offset_for_in_reg_1(
+fn size_plus_maybe_sib_or_offset_for_inreg_0(
    sizing: &RecipeSizing,
    _enc: Encoding,
    inst: Inst,
    divert: &RegDiversions,
    func: &Function,
 ) -> u8 {
-    sizing.base_size + additional_size_if(1, inst, divert, func, needs_sib_byte_or_offset)
+    let needs_sib_or_offset = test_input(0, inst, divert, func, needs_sib_byte_or_offset);
+    sizing.base_size + if needs_sib_or_offset { 1 } else { 0 }
+}
+fn size_plus_maybe_sib_or_offset_for_inreg_1(
+    sizing: &RecipeSizing,
+    _enc: Encoding,
+    inst: Inst,
+    divert: &RegDiversions,
+    func: &Function,
+) -> u8 {
+    let needs_sib_or_offset = test_input(1, inst, divert, func, needs_sib_byte_or_offset);
+    sizing.base_size + if needs_sib_or_offset { 1 } else { 0 }
+}
+
+/// Infers whether a dynamic REX prefix will be emitted, for use with one input reg.
+///
+/// A REX prefix is known to be emitted if either:
+///  1. The EncodingBits specify that REX.W is to be set.
+///  2. Registers are used that require REX.R or REX.B bits for encoding.
+fn size_with_inferred_rex_for_inreg0(
+    sizing: &RecipeSizing,
+    enc: Encoding,
+    inst: Inst,
+    divert: &RegDiversions,
+    func: &Function,
+) -> u8 {
+    let needs_rex = (EncodingBits::from(enc.bits()).rex_w() != 0)
+        || test_input(0, inst, divert, func, is_extended_reg);
+    sizing.base_size + if needs_rex { 1 } else { 0 }
+}
+
+/// Infers whether a dynamic REX prefix will be emitted, based on the second operand.
+fn size_with_inferred_rex_for_inreg1(
+    sizing: &RecipeSizing,
+    enc: Encoding,
+    inst: Inst,
+    divert: &RegDiversions,
+    func: &Function,
+) -> u8 {
+    let needs_rex = (EncodingBits::from(enc.bits()).rex_w() != 0)
+        || test_input(1, inst, divert, func, is_extended_reg);
+    sizing.base_size + if needs_rex { 1 } else { 0 }
+}
+
+/// Infers whether a dynamic REX prefix will be emitted, based on the third operand.
+fn size_with_inferred_rex_for_inreg2(
+    sizing: &RecipeSizing,
+    enc: Encoding,
+    inst: Inst,
+    divert: &RegDiversions,
+    func: &Function,
+) -> u8 {
+    let needs_rex = (EncodingBits::from(enc.bits()).rex_w() != 0)
+        || test_input(2, inst, divert, func, is_extended_reg);
+    sizing.base_size + if needs_rex { 1 } else { 0 }
+}
+
+/// Infers whether a dynamic REX prefix will be emitted, for use with two input registers.
+///
+/// A REX prefix is known to be emitted if either:
+///  1. The EncodingBits specify that REX.W is to be set.
+///  2. Registers are used that require REX.R or REX.B bits for encoding.
+fn size_with_inferred_rex_for_inreg0_inreg1(
+    sizing: &RecipeSizing,
+    enc: Encoding,
+    inst: Inst,
+    divert: &RegDiversions,
+    func: &Function,
+) -> u8 {
+    let needs_rex = (EncodingBits::from(enc.bits()).rex_w() != 0)
+        || test_input(0, inst, divert, func, is_extended_reg)
+        || test_input(1, inst, divert, func, is_extended_reg);
+    sizing.base_size + if needs_rex { 1 } else { 0 }
+}
+
+/// Infers whether a dynamic REX prefix will be emitted, based on a single
+/// input register and a single output register.
+fn size_with_inferred_rex_for_inreg0_outreg0(
+    sizing: &RecipeSizing,
+    enc: Encoding,
+    inst: Inst,
+    divert: &RegDiversions,
+    func: &Function,
+) -> u8 {
+    let needs_rex = (EncodingBits::from(enc.bits()).rex_w() != 0)
+        || test_input(0, inst, divert, func, is_extended_reg)
+        || test_result(0, inst, divert, func, is_extended_reg);
+    sizing.base_size + if needs_rex { 1 } else { 0 }
+}
+
+/// Infers whether a dynamic REX prefix will be emitted, for use with CMOV.
+///
+/// CMOV uses 3 inputs, with the REX is inferred from reg1 and reg2.
+fn size_with_inferred_rex_for_cmov(
+    sizing: &RecipeSizing,
+    enc: Encoding,
+    inst: Inst,
+    divert: &RegDiversions,
+    func: &Function,
+) -> u8 {
+    let needs_rex = (EncodingBits::from(enc.bits()).rex_w() != 0)
+        || test_input(1, inst, divert, func, is_extended_reg)
+        || test_input(2, inst, divert, func, is_extended_reg);
+    sizing.base_size + if needs_rex { 1 } else { 0 }
 }

 /// If the value's definition is a constant immediate, returns its unpacked value, or None