From 9e9e0431749861a9e0258b38b927bf8a669d81d9 Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Mon, 25 Jul 2022 14:28:52 -0700 Subject: [PATCH] x64: Migrate the return and fallthrough_return lowerings to ISLE (#4518) https://github.com/bytecodealliance/wasmtime/pull/4518 --- cranelift/codegen/src/isa/x64/inst.isle | 26 +++++++++++++++++++++ cranelift/codegen/src/isa/x64/lower.isle | 16 +++++++++++++ cranelift/codegen/src/isa/x64/lower.rs | 23 +++--------------- cranelift/codegen/src/isa/x64/lower/isle.rs | 8 ++++++- cranelift/codegen/src/machinst/isle.rs | 17 ++++++++++++++ cranelift/codegen/src/prelude.isle | 16 +++++++++++++ 6 files changed, 85 insertions(+), 21 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 94f4e0e752..58396b569a 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -1486,8 +1486,34 @@ r (OperandSize.Size32))) + ;;;; Helpers for Emitting Loads ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Generate a move between two registers. +(decl gen_move (Type WritableReg Reg) MInst) +(extern constructor gen_move gen_move) + +;; Copy a return value to a set of registers. +(decl copy_to_regs (WritableValueRegs Value) Unit) +(rule (copy_to_regs dsts val @ (value_type ty)) + (let ((srcs ValueRegs (put_in_regs val))) + (copy_to_regs_range ty (value_regs_range srcs) dsts srcs))) + +;; Helper for `copy_to_regs` that uses a range to index into the reg/value +;; vectors. Fails for the empty range. +(decl copy_to_regs_range (Type Range WritableValueRegs ValueRegs) Unit) + +(rule (copy_to_regs_range ty (range_singleton idx) dsts srcs) + (let ((dst WritableReg (writable_regs_get dsts idx)) + (src Reg (value_regs_get srcs idx))) + (emit (gen_move ty dst src)))) + +(rule (copy_to_regs_range ty (range_unwrap head tail) dsts srcs) + (let ((dst WritableReg (writable_regs_get dsts head)) + (src Reg (value_regs_get srcs head)) + (_ Unit (emit (gen_move ty dst src)))) + (copy_to_regs_range ty tail dsts srcs))) + ;; Helper for constructing a LoadExtName instruction. (decl load_ext_name (ExternalName i64) Reg) (rule (load_ext_name extname offset) diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index d76dae5cd9..4c388c533c 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -1443,6 +1443,22 @@ (rule (lower (resumable_trap code)) (side_effect (x64_ud2 code))) +;;;; Rules for `return` and `fallthrough_return` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; N.B.: the Ret itself is generated by the ABI. +(rule (lower (return args)) + (lower_return (range 0 (value_slice_len args)) args)) + +(rule (lower (fallthrough_return args)) + (lower_return (range 0 (value_slice_len args)) args)) + +(decl lower_return (Range ValueSlice) InstOutput) +(rule (lower_return (range_empty) _) (output_none)) +(rule (lower_return (range_unwrap head tail) args) + (let ((_ Unit (copy_to_regs (retval head) (value_slice_get args head)))) + (lower_return tail args))) + + ;;;; Rules for `icmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; For GPR-held values we only need to emit `CMP + SETCC`. We rely here on diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index fc31196042..8caba83e9e 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -923,29 +923,12 @@ fn lower_insn_to_regs>( | Opcode::AtomicStore | Opcode::Fence | Opcode::FuncAddr - | Opcode::SymbolValue => { + | Opcode::SymbolValue + | Opcode::FallthroughReturn + | Opcode::Return => { implemented_in_isle(ctx); } - Opcode::FallthroughReturn | Opcode::Return => { - for i in 0..ctx.num_inputs(insn) { - let src_reg = put_input_in_regs(ctx, inputs[i]); - let retval_reg = ctx.retval(i); - let ty = ctx.input_ty(insn, i); - assert!(src_reg.len() == retval_reg.len()); - let (_, tys) = Inst::rc_for_type(ty)?; - for ((&src, &dst), &ty) in src_reg - .regs() - .iter() - .zip(retval_reg.regs().iter()) - .zip(tys.iter()) - { - ctx.emit(Inst::gen_move(dst, src, ty)); - } - } - // N.B.: the Ret itself is generated by the ABI. - } - Opcode::Call | Opcode::CallIndirect => { let caller_conv = ctx.abi().call_conv(); let (mut abi, inputs) = match op { diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index 9b068b1eba..9166e7b90c 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -26,7 +26,8 @@ use crate::{ }, }, machinst::{ - isle::*, InsnInput, InsnOutput, LowerCtx, MachAtomicRmwOp, VCodeConstant, VCodeConstantData, + isle::*, InsnInput, InsnOutput, LowerCtx, MachAtomicRmwOp, MachInst, VCodeConstant, + VCodeConstantData, }, }; use std::boxed::Box; @@ -573,6 +574,11 @@ where fn atomic_rmw_op_to_mach_atomic_rmw_op(&mut self, op: &AtomicRmwOp) -> MachAtomicRmwOp { MachAtomicRmwOp::from(*op) } + + #[inline] + fn gen_move(&mut self, ty: Type, dst: WritableReg, src: Reg) -> MInst { + MInst::gen_move(dst, src, ty) + } } // Since x64 doesn't have 8x16 shifts and we must use a 16x8 shift instead, we diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index 1ca00533c6..dc09b8c7c1 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -163,6 +163,11 @@ macro_rules! isle_prelude_methods { regs.regs()[i] } + #[inline] + fn value_regs_len(&mut self, regs: ValueRegs) -> usize { + regs.regs().len() + } + #[inline] fn u8_as_u32(&mut self, x: u8) -> Option { Some(x.into()) @@ -736,6 +741,14 @@ macro_rules! isle_prelude_methods { } } + fn range_singleton(&mut self, r: Range) -> Option { + if r.0 + 1 == r.1 { + Some(r.0) + } else { + None + } + } + fn range_unwrap(&mut self, r: Range) -> Option<(usize, Range)> { if r.0 < r.1 { Some((r.0, (r.0 + 1, r.1))) @@ -752,6 +765,10 @@ macro_rules! isle_prelude_methods { regs.only_reg() } + fn writable_regs_get(&mut self, regs: WritableValueRegs, idx: usize) -> WritableReg { + regs.regs()[idx] + } + fn abi_copy_to_arg_order(&mut self, abi: &ABISig, idx: usize) -> usize { abi.copy_to_arg_order(idx) } diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index a18828ca38..1998c31925 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -171,6 +171,14 @@ (decl value_regs_get (ValueRegs usize) Reg) (extern constructor value_regs_get value_regs_get) +;; Get the number of registers in a `ValueRegs`. +(decl value_regs_len (ValueRegs) usize) +(extern constructor value_regs_len value_regs_len) + +;; Get a range for the number of regs in a `ValueRegs`. +(decl value_regs_range (ValueRegs) Range) +(rule (value_regs_range regs) (range 0 (value_regs_len regs))) + ;; Put the value into one or more registers and return the first register. ;; ;; Unlike `put_in_reg`, this does not assert that the value fits in a single @@ -707,6 +715,10 @@ (decl range_empty () Range) (extern extractor range_empty range_empty) +;; Extractor to test whether a range has a single element in it +(decl range_singleton (usize) Range) +(extern extractor range_singleton range_singleton) + ;; Extractor to return the first value in the range, and a sub-range ;; containing the remaining values. (decl range_unwrap (usize Range) Range) @@ -723,6 +735,10 @@ (decl only_writable_reg (WritableReg) WritableValueRegs) (extern extractor only_writable_reg only_writable_reg) +;; Get the `n`th register inside a `WritableValueRegs`. +(decl writable_regs_get (WritableValueRegs usize) WritableReg) +(extern constructor writable_regs_get writable_regs_get) + ;;;; Helpers for generating calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Type to hold information about a function call signature.