[machinst x64]: enable packed saturated arithmetic
This commit is contained in:
2
build.rs
2
build.rs
@@ -184,9 +184,11 @@ fn experimental_x64_should_panic(testsuite: &str, testname: &str, strategy: &str
|
|||||||
("simd", "simd_i8x16_arith") => return false,
|
("simd", "simd_i8x16_arith") => return false,
|
||||||
("simd", "simd_i8x16_arith2") => return false,
|
("simd", "simd_i8x16_arith2") => return false,
|
||||||
("simd", "simd_i8x16_cmp") => return false,
|
("simd", "simd_i8x16_cmp") => return false,
|
||||||
|
("simd", "simd_i8x16_sat_arith") => return false,
|
||||||
("simd", "simd_i16x8_arith") => return false,
|
("simd", "simd_i16x8_arith") => return false,
|
||||||
("simd", "simd_i16x8_arith2") => return false,
|
("simd", "simd_i16x8_arith2") => return false,
|
||||||
("simd", "simd_i16x8_cmp") => return false,
|
("simd", "simd_i16x8_cmp") => return false,
|
||||||
|
("simd", "simd_i16x8_sat_arith") => return false,
|
||||||
("simd", "simd_i32x4_arith") => return false,
|
("simd", "simd_i32x4_arith") => return false,
|
||||||
("simd", "simd_i32x4_arith2") => return false,
|
("simd", "simd_i32x4_arith2") => return false,
|
||||||
("simd", "simd_i32x4_cmp") => return false,
|
("simd", "simd_i32x4_cmp") => return false,
|
||||||
|
|||||||
@@ -459,6 +459,10 @@ pub enum SseOpcode {
|
|||||||
Psubd,
|
Psubd,
|
||||||
Psubq,
|
Psubq,
|
||||||
Psubw,
|
Psubw,
|
||||||
|
Psubsb,
|
||||||
|
Psubsw,
|
||||||
|
Psubusb,
|
||||||
|
Psubusw,
|
||||||
Ptest,
|
Ptest,
|
||||||
Pxor,
|
Pxor,
|
||||||
Rcpss,
|
Rcpss,
|
||||||
@@ -582,6 +586,10 @@ impl SseOpcode {
|
|||||||
| SseOpcode::Psubd
|
| SseOpcode::Psubd
|
||||||
| SseOpcode::Psubq
|
| SseOpcode::Psubq
|
||||||
| SseOpcode::Psubw
|
| SseOpcode::Psubw
|
||||||
|
| SseOpcode::Psubsb
|
||||||
|
| SseOpcode::Psubsw
|
||||||
|
| SseOpcode::Psubusb
|
||||||
|
| SseOpcode::Psubusw
|
||||||
| SseOpcode::Pxor
|
| SseOpcode::Pxor
|
||||||
| SseOpcode::Sqrtpd
|
| SseOpcode::Sqrtpd
|
||||||
| SseOpcode::Sqrtsd
|
| SseOpcode::Sqrtsd
|
||||||
@@ -736,6 +744,10 @@ impl fmt::Debug for SseOpcode {
|
|||||||
SseOpcode::Psubd => "psubd",
|
SseOpcode::Psubd => "psubd",
|
||||||
SseOpcode::Psubq => "psubq",
|
SseOpcode::Psubq => "psubq",
|
||||||
SseOpcode::Psubw => "psubw",
|
SseOpcode::Psubw => "psubw",
|
||||||
|
SseOpcode::Psubsb => "psubsb",
|
||||||
|
SseOpcode::Psubsw => "psubsw",
|
||||||
|
SseOpcode::Psubusb => "psubusb",
|
||||||
|
SseOpcode::Psubusw => "psubusw",
|
||||||
SseOpcode::Ptest => "ptest",
|
SseOpcode::Ptest => "ptest",
|
||||||
SseOpcode::Pxor => "pxor",
|
SseOpcode::Pxor => "pxor",
|
||||||
SseOpcode::Rcpss => "rcpss",
|
SseOpcode::Rcpss => "rcpss",
|
||||||
|
|||||||
@@ -1798,6 +1798,10 @@ pub(crate) fn emit(
|
|||||||
SseOpcode::Psubd => (LegacyPrefixes::_66, 0x0FFA, 2),
|
SseOpcode::Psubd => (LegacyPrefixes::_66, 0x0FFA, 2),
|
||||||
SseOpcode::Psubq => (LegacyPrefixes::_66, 0x0FFB, 2),
|
SseOpcode::Psubq => (LegacyPrefixes::_66, 0x0FFB, 2),
|
||||||
SseOpcode::Psubw => (LegacyPrefixes::_66, 0x0FF9, 2),
|
SseOpcode::Psubw => (LegacyPrefixes::_66, 0x0FF9, 2),
|
||||||
|
SseOpcode::Psubsb => (LegacyPrefixes::_66, 0x0FE8, 2),
|
||||||
|
SseOpcode::Psubsw => (LegacyPrefixes::_66, 0x0FE9, 2),
|
||||||
|
SseOpcode::Psubusb => (LegacyPrefixes::_66, 0x0FD8, 2),
|
||||||
|
SseOpcode::Psubusw => (LegacyPrefixes::_66, 0x0FD9, 2),
|
||||||
SseOpcode::Pxor => (LegacyPrefixes::_66, 0x0FEF, 2),
|
SseOpcode::Pxor => (LegacyPrefixes::_66, 0x0FEF, 2),
|
||||||
SseOpcode::Subps => (LegacyPrefixes::None, 0x0F5C, 2),
|
SseOpcode::Subps => (LegacyPrefixes::None, 0x0F5C, 2),
|
||||||
SseOpcode::Subpd => (LegacyPrefixes::_66, 0x0F5C, 2),
|
SseOpcode::Subpd => (LegacyPrefixes::_66, 0x0F5C, 2),
|
||||||
|
|||||||
@@ -3128,6 +3128,30 @@ fn test_x64_emit() {
|
|||||||
"paddusw %xmm1, %xmm8",
|
"paddusw %xmm1, %xmm8",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_rm_r(SseOpcode::Psubsb, RegMem::reg(xmm9), w_xmm5),
|
||||||
|
"66410FE8E9",
|
||||||
|
"psubsb %xmm9, %xmm5",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_rm_r(SseOpcode::Psubsw, RegMem::reg(xmm7), w_xmm6),
|
||||||
|
"660FE9F7",
|
||||||
|
"psubsw %xmm7, %xmm6",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_rm_r(SseOpcode::Psubusb, RegMem::reg(xmm12), w_xmm13),
|
||||||
|
"66450FD8EC",
|
||||||
|
"psubusb %xmm12, %xmm13",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::xmm_rm_r(SseOpcode::Psubusw, RegMem::reg(xmm1), w_xmm8),
|
||||||
|
"66440FD9C1",
|
||||||
|
"psubusw %xmm1, %xmm8",
|
||||||
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::xmm_rm_r(SseOpcode::Pavgb, RegMem::reg(xmm12), w_xmm13),
|
Inst::xmm_rm_r(SseOpcode::Pavgb, RegMem::reg(xmm12), w_xmm13),
|
||||||
"66450FE0EC",
|
"66450FE0EC",
|
||||||
|
|||||||
@@ -546,6 +546,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
| Opcode::SaddSat
|
| Opcode::SaddSat
|
||||||
| Opcode::UaddSat
|
| Opcode::UaddSat
|
||||||
| Opcode::Isub
|
| Opcode::Isub
|
||||||
|
| Opcode::SsubSat
|
||||||
|
| Opcode::UsubSat
|
||||||
| Opcode::Imul
|
| Opcode::Imul
|
||||||
| Opcode::AvgRound
|
| Opcode::AvgRound
|
||||||
| Opcode::Band
|
| Opcode::Band
|
||||||
@@ -578,6 +580,16 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
types::I64X2 => SseOpcode::Psubq,
|
types::I64X2 => SseOpcode::Psubq,
|
||||||
_ => panic!("Unsupported type for packed isub instruction: {}", ty),
|
_ => panic!("Unsupported type for packed isub instruction: {}", ty),
|
||||||
},
|
},
|
||||||
|
Opcode::SsubSat => match ty {
|
||||||
|
types::I8X16 => SseOpcode::Psubsb,
|
||||||
|
types::I16X8 => SseOpcode::Psubsw,
|
||||||
|
_ => panic!("Unsupported type for packed ssub_sat instruction: {}", ty),
|
||||||
|
},
|
||||||
|
Opcode::UsubSat => match ty {
|
||||||
|
types::I8X16 => SseOpcode::Psubusb,
|
||||||
|
types::I16X8 => SseOpcode::Psubusw,
|
||||||
|
_ => panic!("Unsupported type for packed usub_sat instruction: {}", ty),
|
||||||
|
},
|
||||||
Opcode::Imul => match ty {
|
Opcode::Imul => match ty {
|
||||||
types::I16X8 => SseOpcode::Pmullw,
|
types::I16X8 => SseOpcode::Pmullw,
|
||||||
types::I32X4 => SseOpcode::Pmulld,
|
types::I32X4 => SseOpcode::Pmulld,
|
||||||
|
|||||||
@@ -127,24 +127,24 @@ block0:
|
|||||||
}
|
}
|
||||||
; run
|
; run
|
||||||
|
|
||||||
;function %sub_sat_i8x16() -> b1 {
|
function %sub_sat_i8x16() -> b1 {
|
||||||
;block0:
|
block0:
|
||||||
; v0 = vconst.i8x16 [128 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] ; 128 == 0x80 == -128
|
v0 = vconst.i8x16 [128 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] ; 128 == 0x80 == -128
|
||||||
; v1 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
|
v1 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
|
||||||
;
|
|
||||||
; v2 = ssub_sat v0, v1
|
v2 = ssub_sat v0, v1
|
||||||
; v3 = extractlane v2, 0
|
v3 = extractlane v2, 0
|
||||||
; v4 = icmp_imm eq v3, 0x80 ; 0x80 == -128
|
v4 = icmp_imm eq v3, 0x80 ; 0x80 == -128
|
||||||
;
|
|
||||||
; ; now re-use 0x80 as an unsigned 128
|
; now re-use 0x80 as an unsigned 128
|
||||||
; v5 = usub_sat v0, v2
|
v5 = usub_sat v0, v2
|
||||||
; v6 = extractlane v5, 0
|
v6 = extractlane v5, 0
|
||||||
; v7 = icmp_imm eq v6, 0
|
v7 = icmp_imm eq v6, 0
|
||||||
;
|
|
||||||
; v8 = band v4, v7
|
v8 = band v4, v7
|
||||||
; return v8
|
return v8
|
||||||
;}
|
}
|
||||||
; _run
|
; run
|
||||||
|
|
||||||
;function %add_sub_f32x4() -> b1 {
|
;function %add_sub_f32x4() -> b1 {
|
||||||
;block0:
|
;block0:
|
||||||
|
|||||||
Reference in New Issue
Block a user