From b519c975cb4949bf01733b576504742e5b6d40a5 Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Mon, 18 Jul 2022 14:26:23 -0700 Subject: [PATCH] x64: Port fdemote and fvdemote to ISLE (#4449) https://github.com/bytecodealliance/wasmtime/pull/4449 --- cranelift/codegen/src/isa/x64/inst.isle | 14 +++ cranelift/codegen/src/isa/x64/lower.isle | 8 ++ cranelift/codegen/src/isa/x64/lower.rs | 45 ++-------- .../filetests/runtests/conversions.clif | 85 +++++++++++++++++++ 4 files changed, 115 insertions(+), 37 deletions(-) create mode 100644 cranelift/filetests/filetests/runtests/conversions.clif diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 0df0f53c5b..cd09599d91 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -2841,6 +2841,13 @@ (_ Unit (emit (MInst.XmmUnaryRmR (SseOpcode.Cvtss2sd) x dst)))) dst)) +;; Helper for creating `cvtsd2ss` instructions. +(decl x64_cvtsd2ss (Xmm) Xmm) +(rule (x64_cvtsd2ss x) + (let ((dst WritableXmm (temp_writable_xmm)) + (_ Unit (emit (MInst.XmmUnaryRmR (SseOpcode.Cvtsd2ss) x dst)))) + dst)) + ;; Helper for creating `cvtps2pd` instructions. (decl x64_cvtps2pd (Xmm) Xmm) (rule (x64_cvtps2pd x) @@ -2848,6 +2855,13 @@ (_ Unit (emit (MInst.XmmUnaryRmR (SseOpcode.Cvtps2pd) x dst)))) dst)) +;; Helper for creating `cvtpd2ps` instructions. +(decl x64_cvtpd2ps (Xmm) Xmm) +(rule (x64_cvtpd2ps x) + (let ((dst WritableXmm (temp_writable_xmm)) + (_ Unit (emit (MInst.XmmUnaryRmR (SseOpcode.Cvtpd2ps) x dst)))) + dst)) + ;; Helpers for creating `pcmpeq*` instructions. (decl x64_pcmpeq (Type Xmm XmmMem) Xmm) (rule (x64_pcmpeq $I8X16 x y) (x64_pcmpeqb x y)) diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 848794f85e..1dbeb12af5 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -2351,6 +2351,14 @@ (rule (lower (has_type $F64X2 (fvpromote_low x))) (x64_cvtps2pd (put_in_xmm x))) +;; Rules for `fdemote` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type $F32 (fdemote x))) + (x64_cvtsd2ss x)) + +;; Rules for `fvdemote` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower (has_type $F32X4 (fvdemote x))) + (x64_cvtpd2ps x)) + ;; Rules for `fmin` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $F32 (fmin x y))) diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 07bf0c6e74..f4a6468454 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -893,13 +893,14 @@ fn lower_insn_to_regs>( | Opcode::Fmin | Opcode::Fmax | Opcode::FminPseudo - | Opcode::FmaxPseudo => implemented_in_isle(ctx), - - Opcode::Icmp => { - implemented_in_isle(ctx); - } - - Opcode::Fcmp => { + | Opcode::FmaxPseudo + | Opcode::Sqrt + | Opcode::Fpromote + | Opcode::FvpromoteLow + | Opcode::Fdemote + | Opcode::Fvdemote + | Opcode::Icmp + | Opcode::Fcmp => { implemented_in_isle(ctx); } @@ -1020,36 +1021,6 @@ fn lower_insn_to_regs>( }; } - Opcode::Sqrt => { - implemented_in_isle(ctx); - } - - Opcode::Fpromote => { - implemented_in_isle(ctx); - } - - Opcode::FvpromoteLow => { - implemented_in_isle(ctx); - } - - Opcode::Fdemote => { - // We can't guarantee the RHS (if a load) is 128-bit aligned, so we - // must avoid merging a load here. - let src = RegMem::reg(put_input_in_reg(ctx, inputs[0])); - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - ctx.emit(Inst::xmm_unary_rm_r(SseOpcode::Cvtsd2ss, src, dst)); - } - - Opcode::Fvdemote => { - let src = RegMem::reg(put_input_in_reg(ctx, inputs[0])); - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - ctx.emit(Inst::xmm_unary_rm_r( - SseOpcode::Cvtpd2ps, - RegMem::from(src), - dst, - )); - } - Opcode::FcvtFromSint => { let output_ty = ty.unwrap(); if !output_ty.is_vector() { diff --git a/cranelift/filetests/filetests/runtests/conversions.clif b/cranelift/filetests/filetests/runtests/conversions.clif new file mode 100644 index 0000000000..42becd8b00 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/conversions.clif @@ -0,0 +1,85 @@ +test run + +target x86_64 +target aarch64 + +function %fpromote_f32_f64(i64 vmctx, i64, f32) -> f64 { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0+0 + heap0 = static gv1, min 0x10, bound 0x10, offset_guard 0x0, index_type i64 + +block0(v0: i64, v1: i64, v2: f32): + v3 = heap_addr.i64 heap0, v1, 4 + store.f32 v2, v3 + v4 = load.f32 v3 + v5 = fpromote.f64 v4 + return v5 +} + +; heap: static, size=0x10, ptr=vmctx+0, bound=vmctx+8 +; run: %fpromote_f32_f64(0, 0x0.0) == 0x0.0 +; run: %fpromote_f32_f64(1, 0x0.1) == 0x0.1 +; run: %fpromote_f32_f64(2, 0x0.2) == 0x0.2 +; run: %fpromote_f32_f64(3, 0x3.2) == 0x3.2 +; run: %fpromote_f32_f64(0xc, 0x3.2) == 0x3.2 + +function %fdemote_test(i64 vmctx, i64, f64) -> f32 { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0+0 + heap0 = static gv1, min 0x10, bound 0x10, offset_guard 0x0, index_type i64 + +block0(v0: i64, v1: i64, v2: f64): + v3 = heap_addr.i64 heap0, v1, 8 + store.f64 v2, v3 + v4 = load.f64 v3 + v5 = fdemote.f32 v4 + return v5 +} + +; heap: static, size=0x10, ptr=vmctx+0, bound=vmctx+8 +; run: %fdemote_test(0, 0x0.0) == 0x0.0 +; run: %fdemote_test(1, 0x0.1) == 0x0.1 +; run: %fdemote_test(2, 0x0.2) == 0x0.2 +; run: %fdemote_test(3, 0x3.2) == 0x3.2 +; run: %fdemote_test(0x8, 0x3.2) == 0x3.2 + +function %fvdemote_test(i64 vmctx, i64, f64x2) -> f32x4 { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0+0 + heap0 = static gv1, min 0x20, bound 0x20, offset_guard 0, index_type i64 + +block0(v0: i64, v1: i64, v2: f64x2): + v3 = heap_addr.i64 heap0, v1, 16 + store.f64x2 v2, v3 + v4 = load.f64x2 v3 + v5 = fvdemote v4 + return v5 +} + +; heap: static, size=0x20, ptr=vmctx+0, bound=vmctx+8 +; run: %fvdemote_test(0, [0x0.0 0x0.0]) == [0x0.0 0x0.0 0x0.0 0x0.0] +; run: %fvdemote_test(1, [0x0.1 0x0.2]) == [0x0.1 0x0.2 0x0.0 0x0.0] +; run: %fvdemote_test(2, [0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0] +; run: %fvdemote_test(8, [0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0] +; run: %fvdemote_test(16, [0x2.1 0x1.2]) == [0x2.1 0x1.2 0x0.0 0x0.0] + + +function %fvpromote_low_test(i64 vmctx, i64, f32x4) -> f64x2 { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0+0 + heap0 = static gv1, min 0x20, bound 0x20, offset_guard 0, index_type i64 + +block0(v0: i64, v1: i64, v2: f32x4): + v3 = heap_addr.i64 heap0, v1, 16 + store.f32x4 v2, v3 + v4 = load.f32x4 v3 + v5 = fvpromote_low v4 + return v5 +} + +; heap: static, size=0x20, ptr=vmctx+0, bound=vmctx+8 +; run: %fvpromote_low_test(0, [0x0.0 0x0.0 0x0.0 0x0.0]) == [0x0.0 0x0.0] +; run: %fvpromote_low_test(1, [0x0.1 0x0.2 0x0.0 0x0.0]) == [0x0.1 0x0.2] +; run: %fvpromote_low_test(2, [0x2.1 0x1.2 0x0.0 0x0.0]) == [0x2.1 0x1.2] +; run: %fvpromote_low_test(5, [0x0.0 0x0.0 0x2.1 0x1.2]) == [0x0.0 0x0.0] +; run: %fvpromote_low_test(16, [0x0.0 0x0.0 0x2.1 0x1.2]) == [0x0.0 0x0.0]