diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 2d47d71a5d..3a2dd0de09 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -413,6 +413,8 @@ pub enum SseOpcode { Paddsw, Paddusb, Paddusw, + Pand, + Pandn, Pavgb, Pavgw, Pcmpeqb, @@ -556,6 +558,8 @@ impl SseOpcode { | SseOpcode::Paddsw | SseOpcode::Paddusb | SseOpcode::Paddusw + | SseOpcode::Pand + | SseOpcode::Pandn | SseOpcode::Pavgb | SseOpcode::Pavgw | SseOpcode::Pcmpeqb @@ -698,6 +702,8 @@ impl fmt::Debug for SseOpcode { SseOpcode::Paddsw => "paddsw", SseOpcode::Paddusb => "paddusb", SseOpcode::Paddusw => "paddusw", + SseOpcode::Pand => "pand", + SseOpcode::Pandn => "pandn", SseOpcode::Pavgb => "pavgb", SseOpcode::Pavgw => "pavgw", SseOpcode::Pcmpeqb => "pcmpeqb", diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 63d6884b28..688e620d83 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1735,8 +1735,8 @@ pub(crate) fn emit( SseOpcode::Addpd => (LegacyPrefixes::_66, 0x0F58, 2), SseOpcode::Addss => (LegacyPrefixes::_F3, 0x0F58, 2), SseOpcode::Addsd => (LegacyPrefixes::_F2, 0x0F58, 2), - SseOpcode::Andpd => (LegacyPrefixes::_66, 0x0F54, 2), SseOpcode::Andps => (LegacyPrefixes::None, 0x0F54, 2), + SseOpcode::Andpd => (LegacyPrefixes::_66, 0x0F54, 2), SseOpcode::Andnps => (LegacyPrefixes::None, 0x0F55, 2), SseOpcode::Andnpd => (LegacyPrefixes::_66, 0x0F55, 2), SseOpcode::Divps => (LegacyPrefixes::None, 0x0F5E, 2), @@ -1767,6 +1767,8 @@ pub(crate) fn emit( SseOpcode::Paddsw => (LegacyPrefixes::_66, 0x0FED, 2), SseOpcode::Paddusb => (LegacyPrefixes::_66, 0x0FDC, 2), SseOpcode::Paddusw => (LegacyPrefixes::_66, 0x0FDD, 2), + SseOpcode::Pand => (LegacyPrefixes::_66, 0x0FDB, 2), + SseOpcode::Pandn => (LegacyPrefixes::_66, 0x0FDF, 2), SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2), SseOpcode::Pavgw => (LegacyPrefixes::_66, 0x0FE3, 2), SseOpcode::Pcmpeqb => (LegacyPrefixes::_66, 0x0F74, 2), diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 9ae0149565..30ce3915fd 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -747,6 +747,21 @@ fn lower_insn_to_regs>( types::I16X8 => SseOpcode::Pavgw, _ => panic!("Unsupported type for packed avg_round instruction: {}", ty), }, + Opcode::Band => match ty { + types::F32X4 => SseOpcode::Andps, + types::F64X2 => SseOpcode::Andpd, + _ => SseOpcode::Pand, + }, + Opcode::Bor => match ty { + types::F32X4 => SseOpcode::Orps, + types::F64X2 => SseOpcode::Orpd, + _ => SseOpcode::Por, + }, + Opcode::Bxor => match ty { + types::F32X4 => SseOpcode::Xorps, + types::F64X2 => SseOpcode::Xorpd, + _ => SseOpcode::Pxor, + }, _ => panic!("Unsupported packed instruction: {}", op), }; let lhs = put_input_in_reg(ctx, inputs[0]); @@ -799,6 +814,23 @@ fn lower_insn_to_regs>( } } + Opcode::BandNot => { + let ty = ty.unwrap(); + debug_assert!(ty.is_vector() && ty.bytes() == 16); + let lhs = input_to_reg_mem(ctx, inputs[0]); + let rhs = put_input_in_reg(ctx, inputs[1]); + let dst = get_output_reg(ctx, outputs[0]); + let sse_op = match ty { + types::F32X4 => SseOpcode::Andnps, + types::F64X2 => SseOpcode::Andnpd, + _ => SseOpcode::Pandn, + }; + // Note the flipping of operands: the `rhs` operand is used as the destination instead + // of the `lhs` as in the other bit operations above (e.g. `band`). + ctx.emit(Inst::gen_move(dst, rhs, ty)); + ctx.emit(Inst::xmm_rm_r(sse_op, lhs, dst)); + } + Opcode::Iabs => { let src = input_to_reg_mem(ctx, inputs[0]); let dst = get_output_reg(ctx, outputs[0]);