From ac2bf9d2463534ae89ec6e51ee71160a6f212656 Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Mon, 21 Sep 2020 12:32:32 -0700 Subject: [PATCH] [machinst x64]: add packed min/max implementations --- cranelift/codegen/src/isa/x64/inst/args.rs | 43 ++++++++++- cranelift/codegen/src/isa/x64/inst/emit.rs | 12 ++++ .../codegen/src/isa/x64/inst/emit_tests.rs | 72 +++++++++++++++++++ cranelift/codegen/src/isa/x64/lower.rs | 42 +++++++++++ 4 files changed, 166 insertions(+), 3 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index caa00eed3d..7cb64898e6 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -403,6 +403,18 @@ pub enum SseOpcode { Paddw, Pavgb, Pavgw, + Pmaxsb, + Pmaxsw, + Pmaxsd, + Pmaxub, + Pmaxuw, + Pmaxud, + Pminsb, + Pminsw, + Pminsd, + Pminub, + Pminuw, + Pminud, Pmulld, Pmullw, Pmuludq, @@ -507,6 +519,10 @@ impl SseOpcode { | SseOpcode::Paddw | SseOpcode::Pavgb | SseOpcode::Pavgw + | SseOpcode::Pmaxsw + | SseOpcode::Pmaxub + | SseOpcode::Pminsw + | SseOpcode::Pminub | SseOpcode::Pmullw | SseOpcode::Pmuludq | SseOpcode::Psllw @@ -531,9 +547,18 @@ impl SseOpcode { SseOpcode::Pabsb | SseOpcode::Pabsw | SseOpcode::Pabsd => SSSE3, - SseOpcode::Insertps | SseOpcode::Pmulld | SseOpcode::Roundss | SseOpcode::Roundsd => { - SSE41 - } + SseOpcode::Insertps + | SseOpcode::Pmaxsb + | SseOpcode::Pmaxsd + | SseOpcode::Pmaxuw + | SseOpcode::Pmaxud + | SseOpcode::Pminsb + | SseOpcode::Pminsd + | SseOpcode::Pminuw + | SseOpcode::Pminud + | SseOpcode::Pmulld + | SseOpcode::Roundss + | SseOpcode::Roundsd => SSE41, } } @@ -609,6 +634,18 @@ impl fmt::Debug for SseOpcode { SseOpcode::Paddw => "paddw", SseOpcode::Pavgb => "pavgb", SseOpcode::Pavgw => "pavgw", + SseOpcode::Pmaxsb => "pmaxsb", + SseOpcode::Pmaxsw => "pmaxsw", + SseOpcode::Pmaxsd => "pmaxsd", + SseOpcode::Pmaxub => "pmaxub", + SseOpcode::Pmaxuw => "pmaxuw", + SseOpcode::Pmaxud => "pmaxud", + SseOpcode::Pminsb => "pminsb", + SseOpcode::Pminsw => "pminsw", + SseOpcode::Pminsd => "pminsd", + SseOpcode::Pminub => "pminub", + SseOpcode::Pminuw => "pminuw", + SseOpcode::Pminud => "pminud", SseOpcode::Pmulld => "pmulld", SseOpcode::Pmullw => "pmullw", SseOpcode::Pmuludq => "pmuludq", diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 399d091198..341133fc85 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1780,6 +1780,18 @@ pub(crate) fn emit( SseOpcode::Paddw => (LegacyPrefixes::_66, 0x0FFD, 2), SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2), SseOpcode::Pavgw => (LegacyPrefixes::_66, 0x0FE3, 2), + SseOpcode::Pmaxsb => (LegacyPrefixes::_66, 0x0F383C, 3), + SseOpcode::Pmaxsw => (LegacyPrefixes::_66, 0x0FEE, 2), + SseOpcode::Pmaxsd => (LegacyPrefixes::_66, 0x0F383D, 3), + SseOpcode::Pmaxub => (LegacyPrefixes::_66, 0x0FDE, 2), + SseOpcode::Pmaxuw => (LegacyPrefixes::_66, 0x0F383E, 3), + SseOpcode::Pmaxud => (LegacyPrefixes::_66, 0x0F383F, 3), + SseOpcode::Pminsb => (LegacyPrefixes::_66, 0x0F3838, 3), + SseOpcode::Pminsw => (LegacyPrefixes::_66, 0x0FEA, 2), + SseOpcode::Pminsd => (LegacyPrefixes::_66, 0x0F3839, 3), + SseOpcode::Pminub => (LegacyPrefixes::_66, 0x0FDA, 2), + SseOpcode::Pminuw => (LegacyPrefixes::_66, 0x0F383A, 3), + SseOpcode::Pminud => (LegacyPrefixes::_66, 0x0F383B, 3), SseOpcode::Pmulld => (LegacyPrefixes::_66, 0x0F3840, 3), SseOpcode::Pmullw => (LegacyPrefixes::_66, 0x0FD5, 2), SseOpcode::Pmuludq => (LegacyPrefixes::_66, 0x0FF4, 2), diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index 269922b764..05e645cf3c 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -3165,6 +3165,78 @@ fn test_x64_emit() { "pmuludq %xmm8, %xmm9", )); + insns.push(( + Inst::xmm_rm_r(SseOpcode::Pmaxsb, RegMem::reg(xmm15), w_xmm6), + "66410F383CF7", + "pmaxsb %xmm15, %xmm6", + )); + + insns.push(( + Inst::xmm_rm_r(SseOpcode::Pmaxsw, RegMem::reg(xmm15), w_xmm6), + "66410FEEF7", + "pmaxsw %xmm15, %xmm6", + )); + + insns.push(( + Inst::xmm_rm_r(SseOpcode::Pmaxsd, RegMem::reg(xmm15), w_xmm6), + "66410F383DF7", + "pmaxsd %xmm15, %xmm6", + )); + + insns.push(( + Inst::xmm_rm_r(SseOpcode::Pmaxub, RegMem::reg(xmm14), w_xmm1), + "66410FDECE", + "pmaxub %xmm14, %xmm1", + )); + + insns.push(( + Inst::xmm_rm_r(SseOpcode::Pmaxuw, RegMem::reg(xmm14), w_xmm1), + "66410F383ECE", + "pmaxuw %xmm14, %xmm1", + )); + + insns.push(( + Inst::xmm_rm_r(SseOpcode::Pmaxud, RegMem::reg(xmm14), w_xmm1), + "66410F383FCE", + "pmaxud %xmm14, %xmm1", + )); + + insns.push(( + Inst::xmm_rm_r(SseOpcode::Pminsb, RegMem::reg(xmm8), w_xmm9), + "66450F3838C8", + "pminsb %xmm8, %xmm9", + )); + + insns.push(( + Inst::xmm_rm_r(SseOpcode::Pminsw, RegMem::reg(xmm8), w_xmm9), + "66450FEAC8", + "pminsw %xmm8, %xmm9", + )); + + insns.push(( + Inst::xmm_rm_r(SseOpcode::Pminsd, RegMem::reg(xmm8), w_xmm9), + "66450F3839C8", + "pminsd %xmm8, %xmm9", + )); + + insns.push(( + Inst::xmm_rm_r(SseOpcode::Pminub, RegMem::reg(xmm3), w_xmm2), + "660FDAD3", + "pminub %xmm3, %xmm2", + )); + + insns.push(( + Inst::xmm_rm_r(SseOpcode::Pminuw, RegMem::reg(xmm3), w_xmm2), + "660F383AD3", + "pminuw %xmm3, %xmm2", + )); + + insns.push(( + Inst::xmm_rm_r(SseOpcode::Pminud, RegMem::reg(xmm3), w_xmm2), + "660F383BD3", + "pminud %xmm3, %xmm2", + )); + insns.push(( Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::reg(xmm11), w_xmm2), "66410FEFD3", diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 22c1a7720d..2bde1c31f7 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -709,6 +709,48 @@ fn lower_insn_to_regs>( } } + Opcode::Imax | Opcode::Umax | Opcode::Imin | Opcode::Umin => { + let lhs = put_input_in_reg(ctx, inputs[0]); + let rhs = input_to_reg_mem(ctx, inputs[1]); + let dst = get_output_reg(ctx, outputs[0]); + let ty = ty.unwrap(); + if ty.is_vector() { + let sse_op = match op { + Opcode::Imax => match ty { + types::I8X16 => SseOpcode::Pmaxsb, + types::I16X8 => SseOpcode::Pmaxsw, + types::I32X4 => SseOpcode::Pmaxsd, + _ => panic!("Unsupported type for packed {} instruction: {}", op, ty), + }, + Opcode::Umax => match ty { + types::I8X16 => SseOpcode::Pmaxub, + types::I16X8 => SseOpcode::Pmaxuw, + types::I32X4 => SseOpcode::Pmaxud, + _ => panic!("Unsupported type for packed {} instruction: {}", op, ty), + }, + Opcode::Imin => match ty { + types::I8X16 => SseOpcode::Pminsb, + types::I16X8 => SseOpcode::Pminsw, + types::I32X4 => SseOpcode::Pminsd, + _ => panic!("Unsupported type for packed {} instruction: {}", op, ty), + }, + Opcode::Umin => match ty { + types::I8X16 => SseOpcode::Pminub, + types::I16X8 => SseOpcode::Pminuw, + types::I32X4 => SseOpcode::Pminud, + _ => panic!("Unsupported type for packed {} instruction: {}", op, ty), + }, + _ => unreachable!("This is a bug: the external and internal `match op` should be over the same opcodes."), + }; + + // Move the `lhs` to the same register as `dst`. + ctx.emit(Inst::gen_move(dst, lhs, ty)); + ctx.emit(Inst::xmm_rm_r(sse_op, rhs, dst)); + } else { + panic!("Unsupported type for {} instruction: {}", op, ty); + } + } + Opcode::Bnot => { let ty = ty.unwrap(); if ty.is_vector() {