From c8ddf8a34ced624b2c1fbb63bc786059a6387b29 Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Tue, 7 Jul 2020 16:13:50 -0700 Subject: [PATCH] Encode `[u|s]widen_low` for x86 --- .../codegen/meta/src/isa/x86/encodings.rs | 12 +++ cranelift/codegen/meta/src/isa/x86/opcodes.rs | 4 +- .../codegen/meta/src/shared/instructions.rs | 81 +++++++++++++++++-- .../codegen/src/isa/aarch64/lower_inst.rs | 7 +- .../isa/x86/simd-conversion-binemit.clif | 9 ++- 5 files changed, 103 insertions(+), 10 deletions(-) diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index a58348d49b..da04019a1b 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -1669,6 +1669,7 @@ fn define_simd( let ssub_sat = shared.by_name("ssub_sat"); let store = shared.by_name("store"); let store_complex = shared.by_name("store_complex"); + let swiden_low = shared.by_name("swiden_low"); let uadd_sat = shared.by_name("uadd_sat"); let uload8x8 = shared.by_name("uload8x8"); let uload8x8_complex = shared.by_name("uload8x8_complex"); @@ -1678,6 +1679,7 @@ fn define_simd( let uload32x2_complex = shared.by_name("uload32x2_complex"); let snarrow = shared.by_name("snarrow"); let unarrow = shared.by_name("unarrow"); + let uwiden_low = shared.by_name("uwiden_low"); let ushr_imm = shared.by_name("ushr_imm"); let usub_sat = shared.by_name("usub_sat"); let vconst = shared.by_name("vconst"); @@ -1915,6 +1917,16 @@ fn define_simd( let unarrow = unarrow.bind(vector(*ty, sse_vector_size)); e.enc_both_inferred_maybe_isap(unarrow, rec_fa.opcodes(*opcodes), *isap); } + for (ty, swiden_opcode, uwiden_opcode) in &[ + (I8, &PMOVSXBW[..], &PMOVZXBW[..]), + (I16, &PMOVSXWD[..], &PMOVZXWD[..]), + ] { + let isap = Some(use_sse41_simd); + let swiden_low = swiden_low.bind(vector(*ty, sse_vector_size)); + e.enc_both_inferred_maybe_isap(swiden_low, rec_furm.opcodes(*swiden_opcode), isap); + let uwiden_low = uwiden_low.bind(vector(*ty, sse_vector_size)); + e.enc_both_inferred_maybe_isap(uwiden_low, rec_furm.opcodes(*uwiden_opcode), isap); + } for ty in &[I8, I16, I32, I64] { e.enc_both_inferred_maybe_isap( x86_palignr.bind(vector(*ty, sse_vector_size)), diff --git a/cranelift/codegen/meta/src/isa/x86/opcodes.rs b/cranelift/codegen/meta/src/isa/x86/opcodes.rs index 25685593a6..09c07c458f 100644 --- a/cranelift/codegen/meta/src/isa/x86/opcodes.rs +++ b/cranelift/codegen/meta/src/isa/x86/opcodes.rs @@ -477,7 +477,7 @@ pub static PMOVSXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x20]; pub static PMOVSXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x23]; /// Sign extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit -/// integers in xmm1. +/// integers in xmm1 (SSE4.1). pub static PMOVSXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x25]; /// Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit @@ -489,7 +489,7 @@ pub static PMOVZXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x30]; pub static PMOVZXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x33]; /// Zero extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit -/// integers in xmm1. +/// integers in xmm1 (SSE4.1). pub static PMOVZXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x35]; /// Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index c78787ce82..1c06c4a325 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -3883,9 +3883,9 @@ pub(crate) fn define( .constraints(vec![WiderOrEq(Int.clone(), IntTo.clone())]), ); - let I16xN = &TypeVar::new( - "I16xN", - "A SIMD vector type containing integers 16-bits wide and up", + let I16or32xN = &TypeVar::new( + "I16or32xN", + "A SIMD vector type containing integer lanes 16 or 32 bits wide", TypeSetBuilder::new() .ints(16..32) .simd_lanes(4..8) @@ -3893,9 +3893,9 @@ pub(crate) fn define( .build(), ); - let x = &Operand::new("x", I16xN); - let y = &Operand::new("y", I16xN); - let a = &Operand::new("a", &I16xN.split_lanes()); + let x = &Operand::new("x", I16or32xN); + let y = &Operand::new("y", I16or32xN); + let a = &Operand::new("a", &I16or32xN.split_lanes()); ig.push( Inst::new( @@ -3934,6 +3934,75 @@ pub(crate) fn define( .operands_out(vec![a]), ); + let I8or16xN = &TypeVar::new( + "I8or16xN", + "A SIMD vector type containing integer lanes 8 or 16 bits wide.", + TypeSetBuilder::new() + .ints(8..16) + .simd_lanes(8..16) + .includes_scalars(false) + .build(), + ); + + let x = &Operand::new("x", I8or16xN); + let a = &Operand::new("a", &I8or16xN.merge_lanes()); + + ig.push( + Inst::new( + "swiden_low", + r#" + Widen the low lanes of `x` using signed extension. + + This will double the lane width and halve the number of lanes. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "swiden_high", + r#" + Widen the high lanes of `x` using signed extension. + + This will double the lane width and halve the number of lanes. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "uwiden_low", + r#" + Widen the low lanes of `x` using unsigned extension. + + This will double the lane width and halve the number of lanes. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "uwiden_high", + r#" + Widen the high lanes of `x` using unsigned extension. + + This will double the lane width and halve the number of lanes. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + let IntTo = &TypeVar::new( "IntTo", "A larger integer type with the same number of lanes", diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 7fb878c87a..88751a1478 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -2154,7 +2154,12 @@ pub(crate) fn lower_insn_to_regs>( Opcode::AvgRound => unimplemented!(), Opcode::Iabs => unimplemented!(), - Opcode::Snarrow | Opcode::Unarrow => unimplemented!(), + Opcode::Snarrow + | Opcode::Unarrow + | Opcode::SwidenLow + | Opcode::SwidenHigh + | Opcode::UwidenLow + | Opcode::UwidenHigh => unimplemented!(), Opcode::TlsValue => unimplemented!(), } diff --git a/cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif index b1a95c52d7..72e3412279 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif @@ -1,6 +1,6 @@ test binemit set enable_simd -target x86_64 has_ssse3=true +target x86_64 nehalem ; Ensure raw_bitcast emits no instructions. function %raw_bitcast_i16x8_to_b32x4() { @@ -17,3 +17,10 @@ block0(v0: i32x4 [%xmm6], v1: i32x4 [%xmm4]): [-, %xmm6] v3 = x86_palignr v0, v1, 3 ; bin: 66 0f 3a 0f f4 03 return } + +function %conversions_i16x8(i16x8) { +block0(v0: i16x8 [%xmm6]): +[-, %xmm2] v1 = swiden_low v0 ; bin: 66 0f 38 23 d6 +[-, %xmm11] v2 = uwiden_low v0 ; bin: 66 44 0f 38 33 de + return +}