diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 405caffac6..724c1529f7 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -1196,6 +1196,10 @@ (decl temp_writable_xmm () WritableXmm) (extern constructor temp_writable_xmm temp_writable_xmm) +;; Fetch the special pinned register. +(decl pinned_writable_gpr () WritableGpr) +(extern constructor pinned_writable_gpr pinned_writable_gpr) + ;; Construct a new `XmmMem` from the given `RegMem`. ;; ;; Asserts that the `RegMem`'s register, if any, is an XMM register. @@ -3606,6 +3610,17 @@ (_ Unit (emit_div_or_rem kind ty dst a b))) dst)) +;;;; Pinned Register ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl read_pinned_gpr () Gpr) +(rule (read_pinned_gpr) + (pinned_writable_gpr)) + +(decl write_pinned_gpr (Gpr) SideEffectNoResult) +(rule (write_pinned_gpr val) + (let ((dst WritableGpr (pinned_writable_gpr))) + (SideEffectNoResult.Inst (gen_move $I64 dst val)))) + ;;;; Automatic conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (convert Gpr InstOutput output_gpr) diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 98fffdb53f..72fadf17c8 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -3485,3 +3485,28 @@ (let ((res ValueRegs (mul_hi $I64 $true a b)) (hi Gpr (value_regs_get_gpr res 1))) hi)) + +;; Rules for `get_pinned_reg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (get_pinned_reg)) + (read_pinned_gpr)) + +;; Rules for `set_pinned_reg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (set_pinned_reg a @ (value_type ty))) + (side_effect (write_pinned_gpr a))) + +;; Rules for `vconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type ty (vconst const))) + ;; TODO use Inst::gen_constant() instead. + (x64_xmm_load_const ty (const_to_vconst const))) + +;; Rules for `raw_bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; A raw_bitcast is just a mechanism for correcting the type of V128 values (see +;; https://github.com/bytecodealliance/wasmtime/issues/1147). As such, this IR +;; instruction should emit no machine code but a move is necessary to give the +;; register allocator a definition for the output virtual register. +(rule (lower (raw_bitcast val)) + (put_in_regs val)) diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index c074521c48..248a1858e8 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -580,55 +580,17 @@ fn lower_insn_to_regs( | Opcode::Sdiv | Opcode::Srem | Opcode::Umulhi - | Opcode::Smulhi => { + | Opcode::Smulhi + | Opcode::GetPinnedReg + | Opcode::SetPinnedReg + | Opcode::Vconst + | Opcode::RawBitcast + | Opcode::Insertlane => { implemented_in_isle(ctx); } Opcode::DynamicStackAddr => unimplemented!("DynamicStackAddr"), - Opcode::GetPinnedReg => { - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - ctx.emit(Inst::gen_move(dst, regs::pinned_reg(), types::I64)); - } - - Opcode::SetPinnedReg => { - let src = put_input_in_reg(ctx, inputs[0]); - ctx.emit(Inst::gen_move( - Writable::from_reg(regs::pinned_reg()), - src, - types::I64, - )); - } - - Opcode::Vconst => { - let used_constant = if let &InstructionData::UnaryConst { - constant_handle, .. - } = ctx.data(insn) - { - ctx.use_constant(VCodeConstantData::Pool( - constant_handle, - ctx.get_constant_data(constant_handle).clone(), - )) - } else { - unreachable!("vconst should always have unary_const format") - }; - // TODO use Inst::gen_constant() instead. - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - let ty = ty.unwrap(); - ctx.emit(Inst::xmm_load_const(used_constant, dst, ty)); - } - - Opcode::RawBitcast => { - // A raw_bitcast is just a mechanism for correcting the type of V128 values (see - // https://github.com/bytecodealliance/wasmtime/issues/1147). As such, this IR - // instruction should emit no machine code but a move is necessary to give the register - // allocator a definition for the output virtual register. - let src = put_input_in_reg(ctx, inputs[0]); - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - let ty = ty.unwrap(); - ctx.emit(Inst::gen_move(dst, src, ty)); - } - Opcode::Shuffle => { let ty = ty.unwrap(); let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); @@ -756,14 +718,6 @@ fn lower_insn_to_regs( )); } - Opcode::Insertlane => { - unreachable!( - "implemented in ISLE: inst = `{}`, type = `{:?}`", - ctx.dfg().display_inst(insn), - ty - ); - } - Opcode::Extractlane => { // The instruction format maps to variables like: %dst = extractlane %src, %lane let ty = ty.unwrap(); diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index 504fd18bf4..b4cdd3c708 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -849,6 +849,11 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { .use_constant(VCodeConstantData::WellKnown(&UMAX_MASK)) } + #[inline] + fn pinned_writable_gpr(&mut self) -> WritableGpr { + Writable::from_reg(Gpr::new(regs::pinned_reg()).unwrap()) + } + fn emit_div_or_rem( &mut self, kind: &DivOrRemKind, diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index 257df85de0..efa8ae608d 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -774,6 +774,14 @@ macro_rules! isle_prelude_methods { self.lower_ctx.use_constant(data) } + #[inline] + fn const_to_vconst(&mut self, constant: Constant) -> VCodeConstant { + self.lower_ctx.use_constant(VCodeConstantData::Pool( + constant, + self.lower_ctx.get_constant_data(constant).clone(), + )) + } + fn range(&mut self, start: usize, end: usize) -> Range { (start, end) } diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index e75287f9a5..890c597ef1 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -559,6 +559,10 @@ (decl emit_u64_le_const (u64) VCodeConstant) (extern constructor emit_u64_le_const emit_u64_le_const) +;; Fetch the VCodeConstant associated with a Constant. +(decl const_to_vconst (Constant) VCodeConstant) +(extern constructor const_to_vconst const_to_vconst) + ;;;; Helpers for Side-Effectful Instructions Without Results ;;;;;;;;;;;;;;;;;;; (type SideEffectNoResult (enum @@ -804,7 +808,6 @@ (decl u64_from_constant (u64) Constant) (extern extractor u64_from_constant u64_from_constant) - ;;;; Helpers for tail recursion loops ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; A range of integers to loop through. diff --git a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif index 7433faab5a..4b72f66ddd 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif @@ -141,11 +141,11 @@ block0: ; movq %rsp, %rbp ; block0: ; load_const VCodeConstant(0), %xmm0 -; load_const VCodeConstant(0), %xmm5 -; load_const VCodeConstant(0), %xmm4 -; pand %xmm5, %xmm0, %xmm5 -; pandn %xmm0, %xmm4, %xmm0 -; por %xmm0, %xmm5, %xmm0 +; load_const VCodeConstant(0), %xmm2 +; load_const VCodeConstant(0), %xmm6 +; pand %xmm2, %xmm0, %xmm2 +; pandn %xmm0, %xmm6, %xmm0 +; por %xmm0, %xmm2, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -207,12 +207,12 @@ block0(v0: i32): ; block0: ; load_const VCodeConstant(1), %xmm0 ; andq %rdi, $7, %rdi -; movd %edi, %xmm7 -; psllw %xmm0, %xmm7, %xmm0 +; movd %edi, %xmm6 +; psllw %xmm0, %xmm6, %xmm0 ; lea const(VCodeConstant(0)), %rax ; shlq $4, %rdi, %rdi -; movdqu 0(%rax,%rdi,1), %xmm15 -; pand %xmm0, %xmm15, %xmm0 +; movdqu 0(%rax,%rdi,1), %xmm14 +; pand %xmm0, %xmm14, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -229,14 +229,14 @@ block0: ; movq %rsp, %rbp ; block0: ; load_const VCodeConstant(1), %xmm0 -; movl $1, %r11d -; andq %r11, $7, %r11 -; movd %r11d, %xmm7 -; psrlw %xmm0, %xmm7, %xmm0 -; lea const(VCodeConstant(0)), %rax -; shlq $4, %r11, %r11 -; movdqu 0(%rax,%r11,1), %xmm15 -; pand %xmm0, %xmm15, %xmm0 +; movl $1, %r10d +; andq %r10, $7, %r10 +; movd %r10d, %xmm6 +; psrlw %xmm0, %xmm6, %xmm0 +; lea const(VCodeConstant(0)), %rdi +; shlq $4, %r10, %r10 +; movdqu 0(%rdi,%r10,1), %xmm14 +; pand %xmm0, %xmm14, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -251,16 +251,16 @@ block0(v0: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; load_const VCodeConstant(0), %xmm10 +; load_const VCodeConstant(0), %xmm9 ; andq %rdi, $7, %rdi -; movdqa %xmm10, %xmm0 -; punpcklbw %xmm0, %xmm10, %xmm0 -; punpckhbw %xmm10, %xmm10, %xmm10 +; movdqa %xmm9, %xmm0 +; punpcklbw %xmm0, %xmm9, %xmm0 +; punpckhbw %xmm9, %xmm9, %xmm9 ; addl %edi, $8, %edi -; movd %edi, %xmm13 -; psraw %xmm0, %xmm13, %xmm0 -; psraw %xmm10, %xmm13, %xmm10 -; packsswb %xmm0, %xmm10, %xmm0 +; movd %edi, %xmm12 +; psraw %xmm0, %xmm12, %xmm0 +; psraw %xmm9, %xmm12, %xmm9 +; packsswb %xmm0, %xmm9, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif index d894de3163..98a04dac05 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif @@ -15,13 +15,13 @@ block0: ; pushq %rbp ; movq %rsp, %rbp ; block0: -; load_const VCodeConstant(3), %xmm1 +; load_const VCodeConstant(3), %xmm6 ; load_const VCodeConstant(2), %xmm0 -; load_const VCodeConstant(0), %xmm9 -; pshufb %xmm1, %xmm9, %xmm1 -; load_const VCodeConstant(1), %xmm12 -; pshufb %xmm0, %xmm12, %xmm0 -; orps %xmm0, %xmm1, %xmm0 +; load_const VCodeConstant(0), %xmm7 +; pshufb %xmm6, %xmm7, %xmm6 +; load_const VCodeConstant(1), %xmm10 +; pshufb %xmm0, %xmm10, %xmm0 +; orps %xmm0, %xmm6, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -37,8 +37,8 @@ block0: ; movq %rsp, %rbp ; block0: ; load_const VCodeConstant(1), %xmm0 -; load_const VCodeConstant(0), %xmm5 -; pshufb %xmm0, %xmm5, %xmm0 +; load_const VCodeConstant(0), %xmm4 +; pshufb %xmm0, %xmm4, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -55,10 +55,10 @@ block0: ; movq %rsp, %rbp ; block0: ; load_const VCodeConstant(1), %xmm0 -; load_const VCodeConstant(1), %xmm2 -; load_const VCodeConstant(0), %xmm7 -; paddusb %xmm2, %xmm7, %xmm2 -; pshufb %xmm0, %xmm2, %xmm0 +; load_const VCodeConstant(1), %xmm5 +; load_const VCodeConstant(0), %xmm6 +; paddusb %xmm5, %xmm6, %xmm5 +; pshufb %xmm0, %xmm5, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret