Adds f32.mul, f32.div for vcode backend for x64.

Adds support for lowering clif instructions Fdiv and Fmul
for new vcode backend. Misc adds lowering and test for
sqrtss and removes a redundant to_string() func for the
SseOpcode struct.
This commit is contained in:
Johnnie Birch
2020-06-01 00:39:31 -07:00
parent 4f72a7483b
commit 043571fee0
4 changed files with 31 additions and 24 deletions

View File

@@ -291,16 +291,6 @@ impl SseOpcode {
SseOpcode::Insertps | SseOpcode::Roundss | SseOpcode::Roundsd => SSE41, SseOpcode::Insertps | SseOpcode::Roundss | SseOpcode::Roundsd => SSE41,
} }
} }
pub(crate) fn to_string(&self) -> String {
match self {
SseOpcode::Addss => "addss".to_string(),
SseOpcode::Subss => "subss".to_string(),
SseOpcode::Movss => "movss".to_string(),
SseOpcode::Movsd => "movsd".to_string(),
_ => "unimplemented sse_op".to_string(),
}
}
} }
impl fmt::Debug for SseOpcode { impl fmt::Debug for SseOpcode {
@@ -332,7 +322,7 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Roundss => "roundss", SseOpcode::Roundss => "roundss",
SseOpcode::Roundsd => "roundsd", SseOpcode::Roundsd => "roundsd",
SseOpcode::Rsqrtss => "rsqrtss", SseOpcode::Rsqrtss => "rsqrtss",
SseOpcode::Sqrtss => "srtqss", SseOpcode::Sqrtss => "sqrtss",
SseOpcode::Sqrtsd => "sqrtsd", SseOpcode::Sqrtsd => "sqrtsd",
SseOpcode::Subss => "subss", SseOpcode::Subss => "subss",
SseOpcode::Subsd => "subsd", SseOpcode::Subsd => "subsd",

View File

@@ -1059,6 +1059,9 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
let opcode = match op { let opcode = match op {
SseOpcode::Addss => 0x0F58, SseOpcode::Addss => 0x0F58,
SseOpcode::Subss => 0x0F5C, SseOpcode::Subss => 0x0F5C,
SseOpcode::Mulss => 0x0F59,
SseOpcode::Divss => 0x0F5E,
SseOpcode::Sqrtss => 0x0F51,
_ => unimplemented!("XMM_RM_R opcode"), _ => unimplemented!("XMM_RM_R opcode"),
}; };

View File

@@ -72,11 +72,11 @@ fn test_x64_emit() {
let w_xmm1 = Writable::<Reg>::from_reg(xmm1); let w_xmm1 = Writable::<Reg>::from_reg(xmm1);
let w_xmm2 = Writable::<Reg>::from_reg(xmm2); let w_xmm2 = Writable::<Reg>::from_reg(xmm2);
let w_xmm3 = Writable::<Reg>::from_reg(xmm3); let w_xmm3 = Writable::<Reg>::from_reg(xmm3);
let _w_xmm4 = Writable::<Reg>::from_reg(xmm4); let w_xmm4 = Writable::<Reg>::from_reg(xmm4);
let _w_xmm5 = Writable::<Reg>::from_reg(xmm5); let _w_xmm5 = Writable::<Reg>::from_reg(xmm5);
let _w_xmm6 = Writable::<Reg>::from_reg(xmm6); let _w_xmm6 = Writable::<Reg>::from_reg(xmm6);
let _w_xmm7 = Writable::<Reg>::from_reg(xmm7); let w_xmm7 = Writable::<Reg>::from_reg(xmm7);
let _w_xmm8 = Writable::<Reg>::from_reg(xmm8); let w_xmm8 = Writable::<Reg>::from_reg(xmm8);
let _w_xmm9 = Writable::<Reg>::from_reg(xmm9); let _w_xmm9 = Writable::<Reg>::from_reg(xmm9);
let w_xmm10 = Writable::<Reg>::from_reg(xmm10); let w_xmm10 = Writable::<Reg>::from_reg(xmm10);
let _w_xmm11 = Writable::<Reg>::from_reg(xmm11); let _w_xmm11 = Writable::<Reg>::from_reg(xmm11);
@@ -2324,19 +2324,16 @@ fn test_x64_emit() {
"F30F5CC8", "F30F5CC8",
"subss %xmm0, %xmm1", "subss %xmm0, %xmm1",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Addss, RegMem::reg(xmm11), w_xmm13), Inst::xmm_rm_r(SseOpcode::Addss, RegMem::reg(xmm11), w_xmm13),
"F3450F58EB", "F3450F58EB",
"addss %xmm11, %xmm13", "addss %xmm11, %xmm13",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Subss, RegMem::reg(xmm12), w_xmm1), Inst::xmm_rm_r(SseOpcode::Subss, RegMem::reg(xmm12), w_xmm1),
"F3410F5CCC", "F3410F5CCC",
"subss %xmm12, %xmm1", "subss %xmm12, %xmm1",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r( Inst::xmm_rm_r(
SseOpcode::Addss, SseOpcode::Addss,
@@ -2346,7 +2343,6 @@ fn test_x64_emit() {
"F3410F5844927B", "F3410F5844927B",
"addss 123(%r10,%rdx,4), %xmm0", "addss 123(%r10,%rdx,4), %xmm0",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r( Inst::xmm_rm_r(
SseOpcode::Subss, SseOpcode::Subss,
@@ -2356,6 +2352,22 @@ fn test_x64_emit() {
"F3450F5C94C241010000", "F3450F5C94C241010000",
"subss 321(%r10,%rax,8), %xmm10", "subss 321(%r10,%rax,8), %xmm10",
)); ));
insns.push((
Inst::xmm_rm_r(SseOpcode::Mulss, RegMem::reg(xmm5), w_xmm4),
"F30F59E5",
"mulss %xmm5, %xmm4",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Divss, RegMem::reg(xmm8), w_xmm7),
"F3410F5EF8",
"divss %xmm8, %xmm7",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Sqrtss, RegMem::reg(xmm7), w_xmm8),
"F3440F51C7",
"sqrtss %xmm7, %xmm8",
));
// ======================================================== // ========================================================
// XMM_R_R // XMM_R_R

View File

@@ -194,16 +194,19 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, inst: IRInst) {
// N.B.: the Ret itself is generated by the ABI. // N.B.: the Ret itself is generated by the ABI.
} }
Opcode::Fadd | Opcode::Fsub => { Opcode::Fadd | Opcode::Fsub | Opcode::Fmul | Opcode::Fdiv => {
let dst = output_to_reg(ctx, inst, 0); let dst = output_to_reg(ctx, inst, 0);
let lhs = input_to_reg(ctx, inst, 0); let lhs = input_to_reg(ctx, inst, 0);
let rhs = input_to_reg(ctx, inst, 1); let rhs = input_to_reg(ctx, inst, 1);
let is_64 = flt_ty_is_64(ty.unwrap()); let is_64 = flt_ty_is_64(ty.unwrap());
if !is_64 { if !is_64 {
let sse_op = if op == Opcode::Fadd { let sse_op = match op {
SseOpcode::Addss Opcode::Fadd => SseOpcode::Addss,
} else { Opcode::Fsub => SseOpcode::Subss,
SseOpcode::Subss Opcode::Fmul => SseOpcode::Mulss,
Opcode::Fdiv => SseOpcode::Divss,
// TODO Fmax, Fmin.
_ => unimplemented!(),
}; };
ctx.emit(Inst::xmm_r_r(SseOpcode::Movss, lhs, dst)); ctx.emit(Inst::xmm_r_r(SseOpcode::Movss, lhs, dst));
ctx.emit(Inst::xmm_rm_r(sse_op, RegMem::reg(rhs), dst)); ctx.emit(Inst::xmm_rm_r(sse_op, RegMem::reg(rhs), dst));
@@ -241,7 +244,6 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, inst: IRInst) {
| Opcode::SshrImm => { | Opcode::SshrImm => {
panic!("ALU+imm and ALU+carry ops should not appear here!"); panic!("ALU+imm and ALU+carry ops should not appear here!");
} }
_ => unimplemented!("unimplemented lowering for opcode {:?}", op), _ => unimplemented!("unimplemented lowering for opcode {:?}", op),
} }
} }