From 043571fee05ea4cb21e6568cb90e3b81ced29d8c Mon Sep 17 00:00:00 2001 From: Johnnie Birch <45402135+jlb6740@users.noreply.github.com> Date: Mon, 1 Jun 2020 00:39:31 -0700 Subject: [PATCH] Adds f32.mul, f32.div for vcode backend for x64. Adds support for lowering clif instructions Fdiv and Fmul for new vcode backend. Misc adds lowering and test for sqrtss and removes a redundant to_string() func for the SseOpcode struct. --- cranelift/codegen/src/isa/x64/inst/args.rs | 12 +-------- cranelift/codegen/src/isa/x64/inst/emit.rs | 3 +++ .../codegen/src/isa/x64/inst/emit_tests.rs | 26 ++++++++++++++----- cranelift/codegen/src/isa/x64/lower.rs | 14 +++++----- 4 files changed, 31 insertions(+), 24 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 10541c86f3..fe65145c50 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -291,16 +291,6 @@ impl SseOpcode { SseOpcode::Insertps | SseOpcode::Roundss | SseOpcode::Roundsd => SSE41, } } - - pub(crate) fn to_string(&self) -> String { - match self { - SseOpcode::Addss => "addss".to_string(), - SseOpcode::Subss => "subss".to_string(), - SseOpcode::Movss => "movss".to_string(), - SseOpcode::Movsd => "movsd".to_string(), - _ => "unimplemented sse_op".to_string(), - } - } } impl fmt::Debug for SseOpcode { @@ -332,7 +322,7 @@ impl fmt::Debug for SseOpcode { SseOpcode::Roundss => "roundss", SseOpcode::Roundsd => "roundsd", SseOpcode::Rsqrtss => "rsqrtss", - SseOpcode::Sqrtss => "srtqss", + SseOpcode::Sqrtss => "sqrtss", SseOpcode::Sqrtsd => "sqrtsd", SseOpcode::Subss => "subss", SseOpcode::Subsd => "subsd", diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 22a750111f..efd6386de2 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1059,6 +1059,9 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { let opcode = match op { SseOpcode::Addss => 0x0F58, SseOpcode::Subss => 0x0F5C, + SseOpcode::Mulss => 0x0F59, + SseOpcode::Divss => 0x0F5E, + SseOpcode::Sqrtss => 0x0F51, _ => unimplemented!("XMM_RM_R opcode"), }; diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index e1a91633c1..670621c3de 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -72,11 +72,11 @@ fn test_x64_emit() { let w_xmm1 = Writable::::from_reg(xmm1); let w_xmm2 = Writable::::from_reg(xmm2); let w_xmm3 = Writable::::from_reg(xmm3); - let _w_xmm4 = Writable::::from_reg(xmm4); + let w_xmm4 = Writable::::from_reg(xmm4); let _w_xmm5 = Writable::::from_reg(xmm5); let _w_xmm6 = Writable::::from_reg(xmm6); - let _w_xmm7 = Writable::::from_reg(xmm7); - let _w_xmm8 = Writable::::from_reg(xmm8); + let w_xmm7 = Writable::::from_reg(xmm7); + let w_xmm8 = Writable::::from_reg(xmm8); let _w_xmm9 = Writable::::from_reg(xmm9); let w_xmm10 = Writable::::from_reg(xmm10); let _w_xmm11 = Writable::::from_reg(xmm11); @@ -2324,19 +2324,16 @@ fn test_x64_emit() { "F30F5CC8", "subss %xmm0, %xmm1", )); - insns.push(( Inst::xmm_rm_r(SseOpcode::Addss, RegMem::reg(xmm11), w_xmm13), "F3450F58EB", "addss %xmm11, %xmm13", )); - insns.push(( Inst::xmm_rm_r(SseOpcode::Subss, RegMem::reg(xmm12), w_xmm1), "F3410F5CCC", "subss %xmm12, %xmm1", )); - insns.push(( Inst::xmm_rm_r( SseOpcode::Addss, @@ -2346,7 +2343,6 @@ fn test_x64_emit() { "F3410F5844927B", "addss 123(%r10,%rdx,4), %xmm0", )); - insns.push(( Inst::xmm_rm_r( SseOpcode::Subss, @@ -2356,6 +2352,22 @@ fn test_x64_emit() { "F3450F5C94C241010000", "subss 321(%r10,%rax,8), %xmm10", )); + insns.push(( + Inst::xmm_rm_r(SseOpcode::Mulss, RegMem::reg(xmm5), w_xmm4), + "F30F59E5", + "mulss %xmm5, %xmm4", + )); + insns.push(( + Inst::xmm_rm_r(SseOpcode::Divss, RegMem::reg(xmm8), w_xmm7), + "F3410F5EF8", + "divss %xmm8, %xmm7", + )); + + insns.push(( + Inst::xmm_rm_r(SseOpcode::Sqrtss, RegMem::reg(xmm7), w_xmm8), + "F3440F51C7", + "sqrtss %xmm7, %xmm8", + )); // ======================================================== // XMM_R_R diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 6aa1e3df4a..65c0684077 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -194,16 +194,19 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, inst: IRInst) { // N.B.: the Ret itself is generated by the ABI. } - Opcode::Fadd | Opcode::Fsub => { + Opcode::Fadd | Opcode::Fsub | Opcode::Fmul | Opcode::Fdiv => { let dst = output_to_reg(ctx, inst, 0); let lhs = input_to_reg(ctx, inst, 0); let rhs = input_to_reg(ctx, inst, 1); let is_64 = flt_ty_is_64(ty.unwrap()); if !is_64 { - let sse_op = if op == Opcode::Fadd { - SseOpcode::Addss - } else { - SseOpcode::Subss + let sse_op = match op { + Opcode::Fadd => SseOpcode::Addss, + Opcode::Fsub => SseOpcode::Subss, + Opcode::Fmul => SseOpcode::Mulss, + Opcode::Fdiv => SseOpcode::Divss, + // TODO Fmax, Fmin. + _ => unimplemented!(), }; ctx.emit(Inst::xmm_r_r(SseOpcode::Movss, lhs, dst)); ctx.emit(Inst::xmm_rm_r(sse_op, RegMem::reg(rhs), dst)); @@ -241,7 +244,6 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, inst: IRInst) { | Opcode::SshrImm => { panic!("ALU+imm and ALU+carry ops should not appear here!"); } - _ => unimplemented!("unimplemented lowering for opcode {:?}", op), } }