x64: Migrate br_table to ISLE (#4615)
https://github.com/bytecodealliance/wasmtime/pull/4615
This commit is contained in:
@@ -366,7 +366,7 @@
|
||||
;; The generated code sequence is described in the emit's function match
|
||||
;; arm for this instruction.
|
||||
;;
|
||||
;; See comment in lowering about the temporaries signedness.
|
||||
;; See comment on jmp_table_seq below about the temporaries signedness.
|
||||
(JmpTableSeq (idx Reg)
|
||||
(tmp1 WritableReg)
|
||||
(tmp2 WritableReg)
|
||||
@@ -517,6 +517,10 @@
|
||||
|
||||
(type MachLabelSlice extern (enum))
|
||||
|
||||
;; The size of the jump table.
|
||||
(decl jump_table_size (BoxVecMachLabel) u32)
|
||||
(extern constructor jump_table_size jump_table_size)
|
||||
|
||||
;; Extract a the target from a MachLabelSlice with exactly one target.
|
||||
(decl single_target (MachLabel) MachLabelSlice)
|
||||
(extern extractor single_target single_target)
|
||||
@@ -525,6 +529,10 @@
|
||||
(decl two_targets (MachLabel MachLabel) MachLabelSlice)
|
||||
(extern extractor two_targets two_targets)
|
||||
|
||||
;; Extract the default target and jump table from a MachLabelSlice.
|
||||
(decl jump_table_targets (MachLabel BoxVecMachLabel) MachLabelSlice)
|
||||
(extern extractor jump_table_targets jump_table_targets)
|
||||
|
||||
;; Get the `OperandSize` for a given `Type`, rounding smaller types up to 32 bits.
|
||||
(decl operand_size_of_type_32_64 (Type) OperandSize)
|
||||
(extern constructor operand_size_of_type_32_64 operand_size_of_type_32_64)
|
||||
@@ -3094,6 +3102,45 @@
|
||||
(jmp_if cc1 taken)
|
||||
(jmp_cond cc2 taken not_taken))))
|
||||
|
||||
;; Emit the compound instruction that does:
|
||||
;;
|
||||
;; lea $jt, %rA
|
||||
;; movsbl [%rA, %rIndex, 2], %rB
|
||||
;; add %rB, %rA
|
||||
;; j *%rA
|
||||
;; [jt entries]
|
||||
;;
|
||||
;; This must be *one* instruction in the vcode because we cannot allow regalloc
|
||||
;; to insert any spills/fills in the middle of the sequence; otherwise, the
|
||||
;; lea PC-rel offset to the jumptable would be incorrect. (The alternative
|
||||
;; is to introduce a relocation pass for inlined jumptables, which is much
|
||||
;; worse.)
|
||||
(decl jmp_table_seq (Type Gpr MachLabel BoxVecMachLabel) SideEffectNoResult)
|
||||
(rule (jmp_table_seq ty idx default_target jt_targets)
|
||||
(let (;; This temporary is used as a signed integer of 64-bits (to hold
|
||||
;; addresses).
|
||||
(tmp1 WritableGpr (temp_writable_gpr))
|
||||
|
||||
;; Put a zero in tmp1. This is needed for Spectre mitigations (a
|
||||
;; CMOV that zeroes the index on misspeculation).
|
||||
(_ Unit (emit (MInst.Imm (OperandSize.Size32) 0 tmp1)))
|
||||
|
||||
;; This temporary is used as a signed integer of 32-bits (for the
|
||||
;; wasm-table index) and then 64-bits (address addend). The small
|
||||
;; lie about the I64 type is benign, since the temporary is dead
|
||||
;; after this instruction (and its Cranelift type is thus unused).
|
||||
(tmp2 WritableGpr (temp_writable_gpr))
|
||||
|
||||
(size OperandSize (raw_operand_size_of_type ty))
|
||||
|
||||
(jt_size u32 (jump_table_size jt_targets)))
|
||||
|
||||
(with_flags_side_effect
|
||||
(x64_cmp size (RegMemImm.Imm jt_size) idx)
|
||||
(ConsumesFlags.ConsumesFlagsSideEffect
|
||||
(MInst.JmpTableSeq idx tmp1 tmp2 default_target jt_targets)))))
|
||||
|
||||
|
||||
;;;; Comparisons ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(type IcmpCondResult (enum (Condition (producer ProducesFlags) (cc CC))))
|
||||
|
||||
@@ -2940,3 +2940,8 @@
|
||||
|
||||
(rule (lower_branch (br_icmp cc a b _ _) (two_targets taken not_taken))
|
||||
(side_effect (jmp_cond_icmp (emit_cmp cc a b) taken not_taken)))
|
||||
|
||||
;; Rules for `br_table` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower_branch (br_table idx @ (value_type ty) _ _) (jump_table_targets default_target jt_targets))
|
||||
(side_effect (jmp_table_seq ty idx default_target jt_targets)))
|
||||
|
||||
@@ -16,7 +16,6 @@ use crate::machinst::lower::*;
|
||||
use crate::machinst::*;
|
||||
use crate::result::CodegenResult;
|
||||
use crate::settings::{Flags, TlsModel};
|
||||
use alloc::boxed::Box;
|
||||
use smallvec::SmallVec;
|
||||
use std::convert::TryFrom;
|
||||
use target_lexicon::Triple;
|
||||
@@ -171,6 +170,7 @@ fn input_to_reg_mem<C: LowerCtx<I = Inst>>(ctx: &mut C, spec: InsnInput) -> RegM
|
||||
/// An extension specification for `extend_input_to_reg`.
|
||||
#[derive(Clone, Copy)]
|
||||
enum ExtSpec {
|
||||
#[allow(dead_code)]
|
||||
ZeroExtendTo32,
|
||||
ZeroExtendTo64,
|
||||
SignExtendTo32,
|
||||
@@ -2730,6 +2730,10 @@ impl LowerBackend for X64Backend {
|
||||
// trap. These conditions are verified by `is_ebb_basic()` during the
|
||||
// verifier pass.
|
||||
assert!(branches.len() <= 2);
|
||||
if branches.len() == 2 {
|
||||
let op1 = ctx.data(branches[1]).opcode();
|
||||
assert!(op1 == Opcode::Jump);
|
||||
}
|
||||
|
||||
if let Ok(()) = isle::lower_branch(
|
||||
ctx,
|
||||
@@ -2742,96 +2746,10 @@ impl LowerBackend for X64Backend {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let implemented_in_isle = |ctx: &mut C| {
|
||||
unreachable!(
|
||||
"branch implemented in ISLE: inst = `{}`",
|
||||
ctx.dfg().display_inst(branches[0])
|
||||
)
|
||||
};
|
||||
|
||||
if branches.len() == 2 {
|
||||
implemented_in_isle(ctx)
|
||||
} else {
|
||||
assert_eq!(branches.len(), 1);
|
||||
|
||||
// Must be an unconditional branch or trap.
|
||||
let op = ctx.data(branches[0]).opcode();
|
||||
match op {
|
||||
Opcode::Jump => implemented_in_isle(ctx),
|
||||
|
||||
Opcode::BrTable => {
|
||||
let jt_size = targets.len() - 1;
|
||||
assert!(jt_size <= u32::MAX as usize);
|
||||
let jt_size = jt_size as u32;
|
||||
|
||||
let ty = ctx.input_ty(branches[0], 0);
|
||||
let idx = extend_input_to_reg(
|
||||
ctx,
|
||||
InsnInput {
|
||||
insn: branches[0],
|
||||
input: 0,
|
||||
},
|
||||
ExtSpec::ZeroExtendTo32,
|
||||
"implemented in ISLE: branch = `{}`",
|
||||
ctx.dfg().display_inst(branches[0]),
|
||||
);
|
||||
|
||||
// Emit the compound instruction that does:
|
||||
//
|
||||
// lea $jt, %rA
|
||||
// movsbl [%rA, %rIndex, 2], %rB
|
||||
// add %rB, %rA
|
||||
// j *%rA
|
||||
// [jt entries]
|
||||
//
|
||||
// This must be *one* instruction in the vcode because we cannot allow regalloc
|
||||
// to insert any spills/fills in the middle of the sequence; otherwise, the
|
||||
// lea PC-rel offset to the jumptable would be incorrect. (The alternative
|
||||
// is to introduce a relocation pass for inlined jumptables, which is much
|
||||
// worse.)
|
||||
|
||||
// This temporary is used as a signed integer of 64-bits (to hold addresses).
|
||||
let tmp1 = ctx.alloc_tmp(types::I64).only_reg().unwrap();
|
||||
// This temporary is used as a signed integer of 32-bits (for the wasm-table
|
||||
// index) and then 64-bits (address addend). The small lie about the I64 type
|
||||
// is benign, since the temporary is dead after this instruction (and its
|
||||
// Cranelift type is thus unused).
|
||||
let tmp2 = ctx.alloc_tmp(types::I64).only_reg().unwrap();
|
||||
|
||||
// Put a zero in tmp1. This is needed for Spectre
|
||||
// mitigations (a CMOV that zeroes the index on
|
||||
// misspeculation).
|
||||
let inst = Inst::imm(OperandSize::Size64, 0, tmp1);
|
||||
ctx.emit(inst);
|
||||
|
||||
// Bounds-check (compute flags from idx - jt_size)
|
||||
// and branch to default. We only support
|
||||
// u32::MAX entries, but we compare the full 64
|
||||
// bit register when doing the bounds check.
|
||||
let cmp_size = if ty == types::I64 {
|
||||
OperandSize::Size64
|
||||
} else {
|
||||
OperandSize::Size32
|
||||
};
|
||||
ctx.emit(Inst::cmp_rmi_r(cmp_size, RegMemImm::imm(jt_size), idx));
|
||||
|
||||
let default_target = targets[0];
|
||||
|
||||
let jt_targets: Box<SmallVec<[MachLabel; 4]>> =
|
||||
Box::new(targets.iter().skip(1).cloned().collect());
|
||||
|
||||
ctx.emit(Inst::JmpTableSeq {
|
||||
idx,
|
||||
tmp1,
|
||||
tmp2,
|
||||
default_target,
|
||||
targets: jt_targets,
|
||||
});
|
||||
}
|
||||
|
||||
_ => panic!("Unknown branch type {:?}", op),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn maybe_pinned_reg(&self) -> Option<Reg> {
|
||||
|
||||
@@ -746,6 +746,25 @@ where
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn jump_table_targets(
|
||||
&mut self,
|
||||
targets: &MachLabelSlice,
|
||||
) -> Option<(MachLabel, BoxVecMachLabel)> {
|
||||
if targets.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let default_label = targets[0];
|
||||
let jt_targets = Box::new(SmallVec::from(&targets[1..]));
|
||||
Some((default_label, jt_targets))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn jump_table_size(&mut self, targets: &BoxVecMachLabel) -> u32 {
|
||||
targets.len() as u32
|
||||
}
|
||||
}
|
||||
|
||||
impl<C> IsleContext<'_, C, Flags, IsaFlags, 6>
|
||||
|
||||
@@ -185,3 +185,41 @@ block2:
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
|
||||
|
||||
function %f5(i32) -> b1 {
|
||||
jt0 = jump_table [block1, block2]
|
||||
|
||||
block0(v0: i32):
|
||||
br_table v0, block1, jt0
|
||||
|
||||
block1:
|
||||
v1 = bconst.b1 true
|
||||
return v1
|
||||
|
||||
block2:
|
||||
v2 = bconst.b1 false
|
||||
return v2
|
||||
}
|
||||
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; movl $0, %r8d
|
||||
; cmpl $2, %edi
|
||||
; br_table %rdi
|
||||
; block1:
|
||||
; jmp label3
|
||||
; block2:
|
||||
; jmp label3
|
||||
; block3:
|
||||
; movl $1, %eax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
; block4:
|
||||
; xorl %eax, %eax, %eax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
|
||||
|
||||
Reference in New Issue
Block a user