diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 713b783f30..0b64f3bb4c 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -377,6 +377,8 @@ pub enum SseOpcode { Subsd, Ucomiss, Ucomisd, + Xorps, + Xorpd, } impl SseOpcode { @@ -403,7 +405,8 @@ impl SseOpcode { | SseOpcode::Ucomiss | SseOpcode::Sqrtss | SseOpcode::Comiss - | SseOpcode::Cmpss => SSE, + | SseOpcode::Cmpss + | SseOpcode::Xorps => SSE, SseOpcode::Addsd | SseOpcode::Andpd @@ -424,7 +427,8 @@ impl SseOpcode { | SseOpcode::Subsd | SseOpcode::Ucomisd | SseOpcode::Comisd - | SseOpcode::Cmpsd => SSE2, + | SseOpcode::Cmpsd + | SseOpcode::Xorpd => SSE2, SseOpcode::Insertps | SseOpcode::Roundss | SseOpcode::Roundsd => SSE41, } @@ -485,6 +489,8 @@ impl fmt::Debug for SseOpcode { SseOpcode::Cmpss => "cmpss", SseOpcode::Cmpsd => "cmpsd", SseOpcode::Insertps => "insertps", + SseOpcode::Xorps => "xorps", + SseOpcode::Xorpd => "xorpd", }; write!(fmt, "{}", name) } diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index c91e9f81c1..9e022bdf27 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1545,6 +1545,8 @@ pub(crate) fn emit( SseOpcode::Divsd => (LegacyPrefix::_F2, 0x0F5E), SseOpcode::Maxss => (LegacyPrefix::_F3, 0x0F5F), SseOpcode::Maxsd => (LegacyPrefix::_F2, 0x0F5F), + SseOpcode::Xorps => (LegacyPrefix::None, 0x0F57), + SseOpcode::Xorpd => (LegacyPrefix::_66, 0x0F57), _ => unimplemented!("Opcode {:?} not implemented", op), }; diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 1cd8ed224b..37a4ad687d 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -1120,6 +1120,39 @@ fn lower_insn_to_regs>( } } + Opcode::Fabs | Opcode::Fneg => { + let src = input_to_reg_mem(ctx, inputs[0]); + let dst = output_to_reg(ctx, outputs[0]); + + // In both cases, generate a constant and apply a single binary instruction: + // - to compute the absolute value, set all bits to 1 but the MSB to 0, and bit-AND the + // src with it. + // - to compute the negated value, set all bits to 0 but the MSB to 1, and bit-XOR the + // src with it. + let output_ty = ty.unwrap(); + let (val, opcode) = match output_ty { + F32 => match op { + Opcode::Fabs => (0x7fffffff, SseOpcode::Andps), + Opcode::Fneg => (0x80000000, SseOpcode::Xorps), + _ => unreachable!(), + }, + F64 => match op { + Opcode::Fabs => (0x7fffffffffffffff, SseOpcode::Andpd), + Opcode::Fneg => (0x8000000000000000, SseOpcode::Xorpd), + _ => unreachable!(), + }, + _ => panic!("unexpected type {:?} for Fabs", output_ty), + }; + + for inst in Inst::gen_constant(dst, val, output_ty, |reg_class, ty| { + ctx.alloc_tmp(reg_class, ty) + }) { + ctx.emit(inst); + } + + ctx.emit(Inst::xmm_rm_r(opcode, src, dst)); + } + Opcode::Fcopysign => { let dst = output_to_reg(ctx, outputs[0]); let lhs = input_to_reg(ctx, inputs[0]);