diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index c55d0310af..b7d22deb7e 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -366,7 +366,7 @@ ;; The generated code sequence is described in the emit's function match ;; arm for this instruction. ;; - ;; See comment in lowering about the temporaries signedness. + ;; See comment on jmp_table_seq below about the temporaries signedness. (JmpTableSeq (idx Reg) (tmp1 WritableReg) (tmp2 WritableReg) @@ -517,6 +517,10 @@ (type MachLabelSlice extern (enum)) +;; The size of the jump table. +(decl jump_table_size (BoxVecMachLabel) u32) +(extern constructor jump_table_size jump_table_size) + ;; Extract a the target from a MachLabelSlice with exactly one target. (decl single_target (MachLabel) MachLabelSlice) (extern extractor single_target single_target) @@ -525,6 +529,10 @@ (decl two_targets (MachLabel MachLabel) MachLabelSlice) (extern extractor two_targets two_targets) +;; Extract the default target and jump table from a MachLabelSlice. +(decl jump_table_targets (MachLabel BoxVecMachLabel) MachLabelSlice) +(extern extractor jump_table_targets jump_table_targets) + ;; Get the `OperandSize` for a given `Type`, rounding smaller types up to 32 bits. (decl operand_size_of_type_32_64 (Type) OperandSize) (extern constructor operand_size_of_type_32_64 operand_size_of_type_32_64) @@ -3094,6 +3102,45 @@ (jmp_if cc1 taken) (jmp_cond cc2 taken not_taken)))) +;; Emit the compound instruction that does: +;; +;; lea $jt, %rA +;; movsbl [%rA, %rIndex, 2], %rB +;; add %rB, %rA +;; j *%rA +;; [jt entries] +;; +;; This must be *one* instruction in the vcode because we cannot allow regalloc +;; to insert any spills/fills in the middle of the sequence; otherwise, the +;; lea PC-rel offset to the jumptable would be incorrect. (The alternative +;; is to introduce a relocation pass for inlined jumptables, which is much +;; worse.) +(decl jmp_table_seq (Type Gpr MachLabel BoxVecMachLabel) SideEffectNoResult) +(rule (jmp_table_seq ty idx default_target jt_targets) + (let (;; This temporary is used as a signed integer of 64-bits (to hold + ;; addresses). + (tmp1 WritableGpr (temp_writable_gpr)) + + ;; Put a zero in tmp1. This is needed for Spectre mitigations (a + ;; CMOV that zeroes the index on misspeculation). + (_ Unit (emit (MInst.Imm (OperandSize.Size32) 0 tmp1))) + + ;; This temporary is used as a signed integer of 32-bits (for the + ;; wasm-table index) and then 64-bits (address addend). The small + ;; lie about the I64 type is benign, since the temporary is dead + ;; after this instruction (and its Cranelift type is thus unused). + (tmp2 WritableGpr (temp_writable_gpr)) + + (size OperandSize (raw_operand_size_of_type ty)) + + (jt_size u32 (jump_table_size jt_targets))) + + (with_flags_side_effect + (x64_cmp size (RegMemImm.Imm jt_size) idx) + (ConsumesFlags.ConsumesFlagsSideEffect + (MInst.JmpTableSeq idx tmp1 tmp2 default_target jt_targets))))) + + ;;;; Comparisons ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (type IcmpCondResult (enum (Condition (producer ProducesFlags) (cc CC)))) diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index cfd0ab619f..422f62b5bf 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -2940,3 +2940,8 @@ (rule (lower_branch (br_icmp cc a b _ _) (two_targets taken not_taken)) (side_effect (jmp_cond_icmp (emit_cmp cc a b) taken not_taken))) + +;; Rules for `br_table` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower_branch (br_table idx @ (value_type ty) _ _) (jump_table_targets default_target jt_targets)) + (side_effect (jmp_table_seq ty idx default_target jt_targets))) diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 943fa1e16c..225a62c081 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -16,7 +16,6 @@ use crate::machinst::lower::*; use crate::machinst::*; use crate::result::CodegenResult; use crate::settings::{Flags, TlsModel}; -use alloc::boxed::Box; use smallvec::SmallVec; use std::convert::TryFrom; use target_lexicon::Triple; @@ -171,6 +170,7 @@ fn input_to_reg_mem>(ctx: &mut C, spec: InsnInput) -> RegM /// An extension specification for `extend_input_to_reg`. #[derive(Clone, Copy)] enum ExtSpec { + #[allow(dead_code)] ZeroExtendTo32, ZeroExtendTo64, SignExtendTo32, @@ -2730,6 +2730,10 @@ impl LowerBackend for X64Backend { // trap. These conditions are verified by `is_ebb_basic()` during the // verifier pass. assert!(branches.len() <= 2); + if branches.len() == 2 { + let op1 = ctx.data(branches[1]).opcode(); + assert!(op1 == Opcode::Jump); + } if let Ok(()) = isle::lower_branch( ctx, @@ -2742,96 +2746,10 @@ impl LowerBackend for X64Backend { return Ok(()); } - let implemented_in_isle = |ctx: &mut C| { - unreachable!( - "branch implemented in ISLE: inst = `{}`", - ctx.dfg().display_inst(branches[0]) - ) - }; - - if branches.len() == 2 { - implemented_in_isle(ctx) - } else { - assert_eq!(branches.len(), 1); - - // Must be an unconditional branch or trap. - let op = ctx.data(branches[0]).opcode(); - match op { - Opcode::Jump => implemented_in_isle(ctx), - - Opcode::BrTable => { - let jt_size = targets.len() - 1; - assert!(jt_size <= u32::MAX as usize); - let jt_size = jt_size as u32; - - let ty = ctx.input_ty(branches[0], 0); - let idx = extend_input_to_reg( - ctx, - InsnInput { - insn: branches[0], - input: 0, - }, - ExtSpec::ZeroExtendTo32, - ); - - // Emit the compound instruction that does: - // - // lea $jt, %rA - // movsbl [%rA, %rIndex, 2], %rB - // add %rB, %rA - // j *%rA - // [jt entries] - // - // This must be *one* instruction in the vcode because we cannot allow regalloc - // to insert any spills/fills in the middle of the sequence; otherwise, the - // lea PC-rel offset to the jumptable would be incorrect. (The alternative - // is to introduce a relocation pass for inlined jumptables, which is much - // worse.) - - // This temporary is used as a signed integer of 64-bits (to hold addresses). - let tmp1 = ctx.alloc_tmp(types::I64).only_reg().unwrap(); - // This temporary is used as a signed integer of 32-bits (for the wasm-table - // index) and then 64-bits (address addend). The small lie about the I64 type - // is benign, since the temporary is dead after this instruction (and its - // Cranelift type is thus unused). - let tmp2 = ctx.alloc_tmp(types::I64).only_reg().unwrap(); - - // Put a zero in tmp1. This is needed for Spectre - // mitigations (a CMOV that zeroes the index on - // misspeculation). - let inst = Inst::imm(OperandSize::Size64, 0, tmp1); - ctx.emit(inst); - - // Bounds-check (compute flags from idx - jt_size) - // and branch to default. We only support - // u32::MAX entries, but we compare the full 64 - // bit register when doing the bounds check. - let cmp_size = if ty == types::I64 { - OperandSize::Size64 - } else { - OperandSize::Size32 - }; - ctx.emit(Inst::cmp_rmi_r(cmp_size, RegMemImm::imm(jt_size), idx)); - - let default_target = targets[0]; - - let jt_targets: Box> = - Box::new(targets.iter().skip(1).cloned().collect()); - - ctx.emit(Inst::JmpTableSeq { - idx, - tmp1, - tmp2, - default_target, - targets: jt_targets, - }); - } - - _ => panic!("Unknown branch type {:?}", op), - } - } - - Ok(()) + unreachable!( + "implemented in ISLE: branch = `{}`", + ctx.dfg().display_inst(branches[0]), + ); } fn maybe_pinned_reg(&self) -> Option { diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index f234717a20..be1b51e425 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -746,6 +746,25 @@ where None } } + + #[inline] + fn jump_table_targets( + &mut self, + targets: &MachLabelSlice, + ) -> Option<(MachLabel, BoxVecMachLabel)> { + if targets.is_empty() { + return None; + } + + let default_label = targets[0]; + let jt_targets = Box::new(SmallVec::from(&targets[1..])); + Some((default_label, jt_targets)) + } + + #[inline] + fn jump_table_size(&mut self, targets: &BoxVecMachLabel) -> u32 { + targets.len() as u32 + } } impl IsleContext<'_, C, Flags, IsaFlags, 6> diff --git a/cranelift/filetests/filetests/isa/x64/branches.clif b/cranelift/filetests/filetests/isa/x64/branches.clif index 231dfdca0d..4b4a587b6b 100644 --- a/cranelift/filetests/filetests/isa/x64/branches.clif +++ b/cranelift/filetests/filetests/isa/x64/branches.clif @@ -185,3 +185,41 @@ block2: ; movq %rbp, %rsp ; popq %rbp ; ret + + +function %f5(i32) -> b1 { + jt0 = jump_table [block1, block2] + +block0(v0: i32): + br_table v0, block1, jt0 + +block1: + v1 = bconst.b1 true + return v1 + +block2: + v2 = bconst.b1 false + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movl $0, %r8d +; cmpl $2, %edi +; br_table %rdi +; block1: +; jmp label3 +; block2: +; jmp label3 +; block3: +; movl $1, %eax +; movq %rbp, %rsp +; popq %rbp +; ret +; block4: +; xorl %eax, %eax, %eax +; movq %rbp, %rsp +; popq %rbp +; ret +