x64: Migrate br_table to ISLE (#4615)
https://github.com/bytecodealliance/wasmtime/pull/4615
This commit is contained in:
@@ -366,7 +366,7 @@
|
|||||||
;; The generated code sequence is described in the emit's function match
|
;; The generated code sequence is described in the emit's function match
|
||||||
;; arm for this instruction.
|
;; arm for this instruction.
|
||||||
;;
|
;;
|
||||||
;; See comment in lowering about the temporaries signedness.
|
;; See comment on jmp_table_seq below about the temporaries signedness.
|
||||||
(JmpTableSeq (idx Reg)
|
(JmpTableSeq (idx Reg)
|
||||||
(tmp1 WritableReg)
|
(tmp1 WritableReg)
|
||||||
(tmp2 WritableReg)
|
(tmp2 WritableReg)
|
||||||
@@ -517,6 +517,10 @@
|
|||||||
|
|
||||||
(type MachLabelSlice extern (enum))
|
(type MachLabelSlice extern (enum))
|
||||||
|
|
||||||
|
;; The size of the jump table.
|
||||||
|
(decl jump_table_size (BoxVecMachLabel) u32)
|
||||||
|
(extern constructor jump_table_size jump_table_size)
|
||||||
|
|
||||||
;; Extract a the target from a MachLabelSlice with exactly one target.
|
;; Extract a the target from a MachLabelSlice with exactly one target.
|
||||||
(decl single_target (MachLabel) MachLabelSlice)
|
(decl single_target (MachLabel) MachLabelSlice)
|
||||||
(extern extractor single_target single_target)
|
(extern extractor single_target single_target)
|
||||||
@@ -525,6 +529,10 @@
|
|||||||
(decl two_targets (MachLabel MachLabel) MachLabelSlice)
|
(decl two_targets (MachLabel MachLabel) MachLabelSlice)
|
||||||
(extern extractor two_targets two_targets)
|
(extern extractor two_targets two_targets)
|
||||||
|
|
||||||
|
;; Extract the default target and jump table from a MachLabelSlice.
|
||||||
|
(decl jump_table_targets (MachLabel BoxVecMachLabel) MachLabelSlice)
|
||||||
|
(extern extractor jump_table_targets jump_table_targets)
|
||||||
|
|
||||||
;; Get the `OperandSize` for a given `Type`, rounding smaller types up to 32 bits.
|
;; Get the `OperandSize` for a given `Type`, rounding smaller types up to 32 bits.
|
||||||
(decl operand_size_of_type_32_64 (Type) OperandSize)
|
(decl operand_size_of_type_32_64 (Type) OperandSize)
|
||||||
(extern constructor operand_size_of_type_32_64 operand_size_of_type_32_64)
|
(extern constructor operand_size_of_type_32_64 operand_size_of_type_32_64)
|
||||||
@@ -3094,6 +3102,45 @@
|
|||||||
(jmp_if cc1 taken)
|
(jmp_if cc1 taken)
|
||||||
(jmp_cond cc2 taken not_taken))))
|
(jmp_cond cc2 taken not_taken))))
|
||||||
|
|
||||||
|
;; Emit the compound instruction that does:
|
||||||
|
;;
|
||||||
|
;; lea $jt, %rA
|
||||||
|
;; movsbl [%rA, %rIndex, 2], %rB
|
||||||
|
;; add %rB, %rA
|
||||||
|
;; j *%rA
|
||||||
|
;; [jt entries]
|
||||||
|
;;
|
||||||
|
;; This must be *one* instruction in the vcode because we cannot allow regalloc
|
||||||
|
;; to insert any spills/fills in the middle of the sequence; otherwise, the
|
||||||
|
;; lea PC-rel offset to the jumptable would be incorrect. (The alternative
|
||||||
|
;; is to introduce a relocation pass for inlined jumptables, which is much
|
||||||
|
;; worse.)
|
||||||
|
(decl jmp_table_seq (Type Gpr MachLabel BoxVecMachLabel) SideEffectNoResult)
|
||||||
|
(rule (jmp_table_seq ty idx default_target jt_targets)
|
||||||
|
(let (;; This temporary is used as a signed integer of 64-bits (to hold
|
||||||
|
;; addresses).
|
||||||
|
(tmp1 WritableGpr (temp_writable_gpr))
|
||||||
|
|
||||||
|
;; Put a zero in tmp1. This is needed for Spectre mitigations (a
|
||||||
|
;; CMOV that zeroes the index on misspeculation).
|
||||||
|
(_ Unit (emit (MInst.Imm (OperandSize.Size32) 0 tmp1)))
|
||||||
|
|
||||||
|
;; This temporary is used as a signed integer of 32-bits (for the
|
||||||
|
;; wasm-table index) and then 64-bits (address addend). The small
|
||||||
|
;; lie about the I64 type is benign, since the temporary is dead
|
||||||
|
;; after this instruction (and its Cranelift type is thus unused).
|
||||||
|
(tmp2 WritableGpr (temp_writable_gpr))
|
||||||
|
|
||||||
|
(size OperandSize (raw_operand_size_of_type ty))
|
||||||
|
|
||||||
|
(jt_size u32 (jump_table_size jt_targets)))
|
||||||
|
|
||||||
|
(with_flags_side_effect
|
||||||
|
(x64_cmp size (RegMemImm.Imm jt_size) idx)
|
||||||
|
(ConsumesFlags.ConsumesFlagsSideEffect
|
||||||
|
(MInst.JmpTableSeq idx tmp1 tmp2 default_target jt_targets)))))
|
||||||
|
|
||||||
|
|
||||||
;;;; Comparisons ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Comparisons ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
(type IcmpCondResult (enum (Condition (producer ProducesFlags) (cc CC))))
|
(type IcmpCondResult (enum (Condition (producer ProducesFlags) (cc CC))))
|
||||||
|
|||||||
@@ -2940,3 +2940,8 @@
|
|||||||
|
|
||||||
(rule (lower_branch (br_icmp cc a b _ _) (two_targets taken not_taken))
|
(rule (lower_branch (br_icmp cc a b _ _) (two_targets taken not_taken))
|
||||||
(side_effect (jmp_cond_icmp (emit_cmp cc a b) taken not_taken)))
|
(side_effect (jmp_cond_icmp (emit_cmp cc a b) taken not_taken)))
|
||||||
|
|
||||||
|
;; Rules for `br_table` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
(rule (lower_branch (br_table idx @ (value_type ty) _ _) (jump_table_targets default_target jt_targets))
|
||||||
|
(side_effect (jmp_table_seq ty idx default_target jt_targets)))
|
||||||
|
|||||||
@@ -16,7 +16,6 @@ use crate::machinst::lower::*;
|
|||||||
use crate::machinst::*;
|
use crate::machinst::*;
|
||||||
use crate::result::CodegenResult;
|
use crate::result::CodegenResult;
|
||||||
use crate::settings::{Flags, TlsModel};
|
use crate::settings::{Flags, TlsModel};
|
||||||
use alloc::boxed::Box;
|
|
||||||
use smallvec::SmallVec;
|
use smallvec::SmallVec;
|
||||||
use std::convert::TryFrom;
|
use std::convert::TryFrom;
|
||||||
use target_lexicon::Triple;
|
use target_lexicon::Triple;
|
||||||
@@ -171,6 +170,7 @@ fn input_to_reg_mem<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput) -> RegM
|
|||||||
/// An extension specification for `extend_input_to_reg`.
|
/// An extension specification for `extend_input_to_reg`.
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy)]
|
||||||
enum ExtSpec {
|
enum ExtSpec {
|
||||||
|
#[allow(dead_code)]
|
||||||
ZeroExtendTo32,
|
ZeroExtendTo32,
|
||||||
ZeroExtendTo64,
|
ZeroExtendTo64,
|
||||||
SignExtendTo32,
|
SignExtendTo32,
|
||||||
@@ -2730,6 +2730,10 @@ impl LowerBackend for X64Backend {
|
|||||||
// trap. These conditions are verified by `is_ebb_basic()` during the
|
// trap. These conditions are verified by `is_ebb_basic()` during the
|
||||||
// verifier pass.
|
// verifier pass.
|
||||||
assert!(branches.len() <= 2);
|
assert!(branches.len() <= 2);
|
||||||
|
if branches.len() == 2 {
|
||||||
|
let op1 = ctx.data(branches[1]).opcode();
|
||||||
|
assert!(op1 == Opcode::Jump);
|
||||||
|
}
|
||||||
|
|
||||||
if let Ok(()) = isle::lower_branch(
|
if let Ok(()) = isle::lower_branch(
|
||||||
ctx,
|
ctx,
|
||||||
@@ -2742,96 +2746,10 @@ impl LowerBackend for X64Backend {
|
|||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
let implemented_in_isle = |ctx: &mut C| {
|
unreachable!(
|
||||||
unreachable!(
|
"implemented in ISLE: branch = `{}`",
|
||||||
"branch implemented in ISLE: inst = `{}`",
|
ctx.dfg().display_inst(branches[0]),
|
||||||
ctx.dfg().display_inst(branches[0])
|
);
|
||||||
)
|
|
||||||
};
|
|
||||||
|
|
||||||
if branches.len() == 2 {
|
|
||||||
implemented_in_isle(ctx)
|
|
||||||
} else {
|
|
||||||
assert_eq!(branches.len(), 1);
|
|
||||||
|
|
||||||
// Must be an unconditional branch or trap.
|
|
||||||
let op = ctx.data(branches[0]).opcode();
|
|
||||||
match op {
|
|
||||||
Opcode::Jump => implemented_in_isle(ctx),
|
|
||||||
|
|
||||||
Opcode::BrTable => {
|
|
||||||
let jt_size = targets.len() - 1;
|
|
||||||
assert!(jt_size <= u32::MAX as usize);
|
|
||||||
let jt_size = jt_size as u32;
|
|
||||||
|
|
||||||
let ty = ctx.input_ty(branches[0], 0);
|
|
||||||
let idx = extend_input_to_reg(
|
|
||||||
ctx,
|
|
||||||
InsnInput {
|
|
||||||
insn: branches[0],
|
|
||||||
input: 0,
|
|
||||||
},
|
|
||||||
ExtSpec::ZeroExtendTo32,
|
|
||||||
);
|
|
||||||
|
|
||||||
// Emit the compound instruction that does:
|
|
||||||
//
|
|
||||||
// lea $jt, %rA
|
|
||||||
// movsbl [%rA, %rIndex, 2], %rB
|
|
||||||
// add %rB, %rA
|
|
||||||
// j *%rA
|
|
||||||
// [jt entries]
|
|
||||||
//
|
|
||||||
// This must be *one* instruction in the vcode because we cannot allow regalloc
|
|
||||||
// to insert any spills/fills in the middle of the sequence; otherwise, the
|
|
||||||
// lea PC-rel offset to the jumptable would be incorrect. (The alternative
|
|
||||||
// is to introduce a relocation pass for inlined jumptables, which is much
|
|
||||||
// worse.)
|
|
||||||
|
|
||||||
// This temporary is used as a signed integer of 64-bits (to hold addresses).
|
|
||||||
let tmp1 = ctx.alloc_tmp(types::I64).only_reg().unwrap();
|
|
||||||
// This temporary is used as a signed integer of 32-bits (for the wasm-table
|
|
||||||
// index) and then 64-bits (address addend). The small lie about the I64 type
|
|
||||||
// is benign, since the temporary is dead after this instruction (and its
|
|
||||||
// Cranelift type is thus unused).
|
|
||||||
let tmp2 = ctx.alloc_tmp(types::I64).only_reg().unwrap();
|
|
||||||
|
|
||||||
// Put a zero in tmp1. This is needed for Spectre
|
|
||||||
// mitigations (a CMOV that zeroes the index on
|
|
||||||
// misspeculation).
|
|
||||||
let inst = Inst::imm(OperandSize::Size64, 0, tmp1);
|
|
||||||
ctx.emit(inst);
|
|
||||||
|
|
||||||
// Bounds-check (compute flags from idx - jt_size)
|
|
||||||
// and branch to default. We only support
|
|
||||||
// u32::MAX entries, but we compare the full 64
|
|
||||||
// bit register when doing the bounds check.
|
|
||||||
let cmp_size = if ty == types::I64 {
|
|
||||||
OperandSize::Size64
|
|
||||||
} else {
|
|
||||||
OperandSize::Size32
|
|
||||||
};
|
|
||||||
ctx.emit(Inst::cmp_rmi_r(cmp_size, RegMemImm::imm(jt_size), idx));
|
|
||||||
|
|
||||||
let default_target = targets[0];
|
|
||||||
|
|
||||||
let jt_targets: Box<SmallVec<[MachLabel; 4]>> =
|
|
||||||
Box::new(targets.iter().skip(1).cloned().collect());
|
|
||||||
|
|
||||||
ctx.emit(Inst::JmpTableSeq {
|
|
||||||
idx,
|
|
||||||
tmp1,
|
|
||||||
tmp2,
|
|
||||||
default_target,
|
|
||||||
targets: jt_targets,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
_ => panic!("Unknown branch type {:?}", op),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn maybe_pinned_reg(&self) -> Option<Reg> {
|
fn maybe_pinned_reg(&self) -> Option<Reg> {
|
||||||
|
|||||||
@@ -746,6 +746,25 @@ where
|
|||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn jump_table_targets(
|
||||||
|
&mut self,
|
||||||
|
targets: &MachLabelSlice,
|
||||||
|
) -> Option<(MachLabel, BoxVecMachLabel)> {
|
||||||
|
if targets.is_empty() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let default_label = targets[0];
|
||||||
|
let jt_targets = Box::new(SmallVec::from(&targets[1..]));
|
||||||
|
Some((default_label, jt_targets))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn jump_table_size(&mut self, targets: &BoxVecMachLabel) -> u32 {
|
||||||
|
targets.len() as u32
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<C> IsleContext<'_, C, Flags, IsaFlags, 6>
|
impl<C> IsleContext<'_, C, Flags, IsaFlags, 6>
|
||||||
|
|||||||
@@ -185,3 +185,41 @@ block2:
|
|||||||
; movq %rbp, %rsp
|
; movq %rbp, %rsp
|
||||||
; popq %rbp
|
; popq %rbp
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
|
|
||||||
|
function %f5(i32) -> b1 {
|
||||||
|
jt0 = jump_table [block1, block2]
|
||||||
|
|
||||||
|
block0(v0: i32):
|
||||||
|
br_table v0, block1, jt0
|
||||||
|
|
||||||
|
block1:
|
||||||
|
v1 = bconst.b1 true
|
||||||
|
return v1
|
||||||
|
|
||||||
|
block2:
|
||||||
|
v2 = bconst.b1 false
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; movl $0, %r8d
|
||||||
|
; cmpl $2, %edi
|
||||||
|
; br_table %rdi
|
||||||
|
; block1:
|
||||||
|
; jmp label3
|
||||||
|
; block2:
|
||||||
|
; jmp label3
|
||||||
|
; block3:
|
||||||
|
; movl $1, %eax
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
; block4:
|
||||||
|
; xorl %eax, %eax, %eax
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user