Adds f32.mul, f32.div for vcode backend for x64.
Adds support for lowering clif instructions Fdiv and Fmul for new vcode backend. Misc adds lowering and test for sqrtss and removes a redundant to_string() func for the SseOpcode struct.
This commit is contained in:
@@ -291,16 +291,6 @@ impl SseOpcode {
|
|||||||
SseOpcode::Insertps | SseOpcode::Roundss | SseOpcode::Roundsd => SSE41,
|
SseOpcode::Insertps | SseOpcode::Roundss | SseOpcode::Roundsd => SSE41,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn to_string(&self) -> String {
|
|
||||||
match self {
|
|
||||||
SseOpcode::Addss => "addss".to_string(),
|
|
||||||
SseOpcode::Subss => "subss".to_string(),
|
|
||||||
SseOpcode::Movss => "movss".to_string(),
|
|
||||||
SseOpcode::Movsd => "movsd".to_string(),
|
|
||||||
_ => "unimplemented sse_op".to_string(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Debug for SseOpcode {
|
impl fmt::Debug for SseOpcode {
|
||||||
@@ -332,7 +322,7 @@ impl fmt::Debug for SseOpcode {
|
|||||||
SseOpcode::Roundss => "roundss",
|
SseOpcode::Roundss => "roundss",
|
||||||
SseOpcode::Roundsd => "roundsd",
|
SseOpcode::Roundsd => "roundsd",
|
||||||
SseOpcode::Rsqrtss => "rsqrtss",
|
SseOpcode::Rsqrtss => "rsqrtss",
|
||||||
SseOpcode::Sqrtss => "srtqss",
|
SseOpcode::Sqrtss => "sqrtss",
|
||||||
SseOpcode::Sqrtsd => "sqrtsd",
|
SseOpcode::Sqrtsd => "sqrtsd",
|
||||||
SseOpcode::Subss => "subss",
|
SseOpcode::Subss => "subss",
|
||||||
SseOpcode::Subsd => "subsd",
|
SseOpcode::Subsd => "subsd",
|
||||||
|
|||||||
@@ -1059,6 +1059,9 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
|
|||||||
let opcode = match op {
|
let opcode = match op {
|
||||||
SseOpcode::Addss => 0x0F58,
|
SseOpcode::Addss => 0x0F58,
|
||||||
SseOpcode::Subss => 0x0F5C,
|
SseOpcode::Subss => 0x0F5C,
|
||||||
|
SseOpcode::Mulss => 0x0F59,
|
||||||
|
SseOpcode::Divss => 0x0F5E,
|
||||||
|
SseOpcode::Sqrtss => 0x0F51,
|
||||||
_ => unimplemented!("XMM_RM_R opcode"),
|
_ => unimplemented!("XMM_RM_R opcode"),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -72,11 +72,11 @@ fn test_x64_emit() {
|
|||||||
let w_xmm1 = Writable::<Reg>::from_reg(xmm1);
|
let w_xmm1 = Writable::<Reg>::from_reg(xmm1);
|
||||||
let w_xmm2 = Writable::<Reg>::from_reg(xmm2);
|
let w_xmm2 = Writable::<Reg>::from_reg(xmm2);
|
||||||
let w_xmm3 = Writable::<Reg>::from_reg(xmm3);
|
let w_xmm3 = Writable::<Reg>::from_reg(xmm3);
|
||||||
let _w_xmm4 = Writable::<Reg>::from_reg(xmm4);
|
let w_xmm4 = Writable::<Reg>::from_reg(xmm4);
|
||||||
let _w_xmm5 = Writable::<Reg>::from_reg(xmm5);
|
let _w_xmm5 = Writable::<Reg>::from_reg(xmm5);
|
||||||
let _w_xmm6 = Writable::<Reg>::from_reg(xmm6);
|
let _w_xmm6 = Writable::<Reg>::from_reg(xmm6);
|
||||||
let _w_xmm7 = Writable::<Reg>::from_reg(xmm7);
|
let w_xmm7 = Writable::<Reg>::from_reg(xmm7);
|
||||||
let _w_xmm8 = Writable::<Reg>::from_reg(xmm8);
|
let w_xmm8 = Writable::<Reg>::from_reg(xmm8);
|
||||||
let _w_xmm9 = Writable::<Reg>::from_reg(xmm9);
|
let _w_xmm9 = Writable::<Reg>::from_reg(xmm9);
|
||||||
let w_xmm10 = Writable::<Reg>::from_reg(xmm10);
|
let w_xmm10 = Writable::<Reg>::from_reg(xmm10);
|
||||||
let _w_xmm11 = Writable::<Reg>::from_reg(xmm11);
|
let _w_xmm11 = Writable::<Reg>::from_reg(xmm11);
|
||||||
@@ -2324,19 +2324,16 @@ fn test_x64_emit() {
|
|||||||
"F30F5CC8",
|
"F30F5CC8",
|
||||||
"subss %xmm0, %xmm1",
|
"subss %xmm0, %xmm1",
|
||||||
));
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::xmm_rm_r(SseOpcode::Addss, RegMem::reg(xmm11), w_xmm13),
|
Inst::xmm_rm_r(SseOpcode::Addss, RegMem::reg(xmm11), w_xmm13),
|
||||||
"F3450F58EB",
|
"F3450F58EB",
|
||||||
"addss %xmm11, %xmm13",
|
"addss %xmm11, %xmm13",
|
||||||
));
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::xmm_rm_r(SseOpcode::Subss, RegMem::reg(xmm12), w_xmm1),
|
Inst::xmm_rm_r(SseOpcode::Subss, RegMem::reg(xmm12), w_xmm1),
|
||||||
"F3410F5CCC",
|
"F3410F5CCC",
|
||||||
"subss %xmm12, %xmm1",
|
"subss %xmm12, %xmm1",
|
||||||
));
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::xmm_rm_r(
|
Inst::xmm_rm_r(
|
||||||
SseOpcode::Addss,
|
SseOpcode::Addss,
|
||||||
@@ -2346,7 +2343,6 @@ fn test_x64_emit() {
|
|||||||
"F3410F5844927B",
|
"F3410F5844927B",
|
||||||
"addss 123(%r10,%rdx,4), %xmm0",
|
"addss 123(%r10,%rdx,4), %xmm0",
|
||||||
));
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::xmm_rm_r(
|
Inst::xmm_rm_r(
|
||||||
SseOpcode::Subss,
|
SseOpcode::Subss,
|
||||||
@@ -2356,6 +2352,22 @@ fn test_x64_emit() {
|
|||||||
"F3450F5C94C241010000",
|
"F3450F5C94C241010000",
|
||||||
"subss 321(%r10,%rax,8), %xmm10",
|
"subss 321(%r10,%rax,8), %xmm10",
|
||||||
));
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_rm_r(SseOpcode::Mulss, RegMem::reg(xmm5), w_xmm4),
|
||||||
|
"F30F59E5",
|
||||||
|
"mulss %xmm5, %xmm4",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_rm_r(SseOpcode::Divss, RegMem::reg(xmm8), w_xmm7),
|
||||||
|
"F3410F5EF8",
|
||||||
|
"divss %xmm8, %xmm7",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_rm_r(SseOpcode::Sqrtss, RegMem::reg(xmm7), w_xmm8),
|
||||||
|
"F3440F51C7",
|
||||||
|
"sqrtss %xmm7, %xmm8",
|
||||||
|
));
|
||||||
|
|
||||||
// ========================================================
|
// ========================================================
|
||||||
// XMM_R_R
|
// XMM_R_R
|
||||||
|
|||||||
@@ -194,16 +194,19 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, inst: IRInst) {
|
|||||||
// N.B.: the Ret itself is generated by the ABI.
|
// N.B.: the Ret itself is generated by the ABI.
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Fadd | Opcode::Fsub => {
|
Opcode::Fadd | Opcode::Fsub | Opcode::Fmul | Opcode::Fdiv => {
|
||||||
let dst = output_to_reg(ctx, inst, 0);
|
let dst = output_to_reg(ctx, inst, 0);
|
||||||
let lhs = input_to_reg(ctx, inst, 0);
|
let lhs = input_to_reg(ctx, inst, 0);
|
||||||
let rhs = input_to_reg(ctx, inst, 1);
|
let rhs = input_to_reg(ctx, inst, 1);
|
||||||
let is_64 = flt_ty_is_64(ty.unwrap());
|
let is_64 = flt_ty_is_64(ty.unwrap());
|
||||||
if !is_64 {
|
if !is_64 {
|
||||||
let sse_op = if op == Opcode::Fadd {
|
let sse_op = match op {
|
||||||
SseOpcode::Addss
|
Opcode::Fadd => SseOpcode::Addss,
|
||||||
} else {
|
Opcode::Fsub => SseOpcode::Subss,
|
||||||
SseOpcode::Subss
|
Opcode::Fmul => SseOpcode::Mulss,
|
||||||
|
Opcode::Fdiv => SseOpcode::Divss,
|
||||||
|
// TODO Fmax, Fmin.
|
||||||
|
_ => unimplemented!(),
|
||||||
};
|
};
|
||||||
ctx.emit(Inst::xmm_r_r(SseOpcode::Movss, lhs, dst));
|
ctx.emit(Inst::xmm_r_r(SseOpcode::Movss, lhs, dst));
|
||||||
ctx.emit(Inst::xmm_rm_r(sse_op, RegMem::reg(rhs), dst));
|
ctx.emit(Inst::xmm_rm_r(sse_op, RegMem::reg(rhs), dst));
|
||||||
@@ -241,7 +244,6 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, inst: IRInst) {
|
|||||||
| Opcode::SshrImm => {
|
| Opcode::SshrImm => {
|
||||||
panic!("ALU+imm and ALU+carry ops should not appear here!");
|
panic!("ALU+imm and ALU+carry ops should not appear here!");
|
||||||
}
|
}
|
||||||
|
|
||||||
_ => unimplemented!("unimplemented lowering for opcode {:?}", op),
|
_ => unimplemented!("unimplemented lowering for opcode {:?}", op),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user