x64: Lower tlsvalue, sqmul_round_sat, and uunarrow in ISLE (#4793)
Lower tlsvalue, sqmul_round_sat, and uunarrow in ISLE.
This commit is contained in:
@@ -486,16 +486,19 @@
|
|||||||
(XmmUninitializedValue (dst WritableXmm))
|
(XmmUninitializedValue (dst WritableXmm))
|
||||||
|
|
||||||
;; A call to the `ElfTlsGetAddr` libcall. Returns address of TLS symbol
|
;; A call to the `ElfTlsGetAddr` libcall. Returns address of TLS symbol
|
||||||
;; in `rax`.
|
;; `dst`, which is constrained to `rax`.
|
||||||
(ElfTlsGetAddr (symbol ExternalName))
|
(ElfTlsGetAddr (symbol ExternalName)
|
||||||
|
(dst WritableGpr))
|
||||||
|
|
||||||
;; A Mach-O TLS symbol access. Returns address of the TLS symbol in
|
;; A Mach-O TLS symbol access. Returns address of the TLS symbol in
|
||||||
;; `rax`.
|
;; `dst`, which is constrained to `rax`.
|
||||||
(MachOTlsGetAddr (symbol ExternalName))
|
(MachOTlsGetAddr (symbol ExternalName)
|
||||||
|
(dst WritableGpr))
|
||||||
|
|
||||||
;; A Coff TLS symbol access. Returns address of the TLS symbol in
|
;; A Coff TLS symbol access. Returns address of the TLS symbol in
|
||||||
;; `rax`.
|
;; `dst`, which is constrained to `rax`.
|
||||||
(CoffTlsGetAddr (symbol ExternalName))
|
(CoffTlsGetAddr (symbol ExternalName)
|
||||||
|
(dst WritableGpr))
|
||||||
|
|
||||||
;; An unwind pseudoinstruction describing the state of the machine at
|
;; An unwind pseudoinstruction describing the state of the machine at
|
||||||
;; this program point.
|
;; this program point.
|
||||||
@@ -2275,6 +2278,11 @@
|
|||||||
(rule (x64_pmulhw src1 src2)
|
(rule (x64_pmulhw src1 src2)
|
||||||
(xmm_rm_r $I16X8 (SseOpcode.Pmulhw) src1 src2))
|
(xmm_rm_r $I16X8 (SseOpcode.Pmulhw) src1 src2))
|
||||||
|
|
||||||
|
;; Helper for creating `pmulhrsw` instructions.
|
||||||
|
(decl x64_pmulhrsw (Xmm XmmMem) Xmm)
|
||||||
|
(rule (x64_pmulhrsw src1 src2)
|
||||||
|
(xmm_rm_r $I16X8 (SseOpcode.Pmulhrsw) src1 src2))
|
||||||
|
|
||||||
;; Helper for creating `pmulhuw` instructions.
|
;; Helper for creating `pmulhuw` instructions.
|
||||||
(decl x64_pmulhuw (Xmm XmmMem) Xmm)
|
(decl x64_pmulhuw (Xmm XmmMem) Xmm)
|
||||||
(rule (x64_pmulhuw src1 src2)
|
(rule (x64_pmulhuw src1 src2)
|
||||||
@@ -2683,6 +2691,15 @@
|
|||||||
dst))))
|
dst))))
|
||||||
dst))
|
dst))
|
||||||
|
|
||||||
|
;; Helper for creating `shufps` instructions.
|
||||||
|
(decl x64_shufps (Xmm XmmMem u8) Xmm)
|
||||||
|
(rule (x64_shufps src1 src2 byte)
|
||||||
|
(xmm_rm_r_imm (SseOpcode.Shufps)
|
||||||
|
src1
|
||||||
|
src2
|
||||||
|
byte
|
||||||
|
(OperandSize.Size32)))
|
||||||
|
|
||||||
;; Helper for creating `MInst.XmmUnaryRmR` instructions.
|
;; Helper for creating `MInst.XmmUnaryRmR` instructions.
|
||||||
(decl xmm_unary_rm_r (SseOpcode XmmMem) Xmm)
|
(decl xmm_unary_rm_r (SseOpcode XmmMem) Xmm)
|
||||||
(rule (xmm_unary_rm_r op src)
|
(rule (xmm_unary_rm_r op src)
|
||||||
@@ -3733,6 +3750,42 @@
|
|||||||
(decl swizzle_zero_mask () VCodeConstant)
|
(decl swizzle_zero_mask () VCodeConstant)
|
||||||
(extern constructor swizzle_zero_mask swizzle_zero_mask)
|
(extern constructor swizzle_zero_mask swizzle_zero_mask)
|
||||||
|
|
||||||
|
;;;; TLS Values ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;; Helper for emitting ElfTlsGetAddr.
|
||||||
|
(decl elf_tls_get_addr (ExternalName) Gpr)
|
||||||
|
(rule (elf_tls_get_addr name)
|
||||||
|
(let ((dst WritableGpr (temp_writable_gpr))
|
||||||
|
(_ Unit (emit (MInst.ElfTlsGetAddr name dst))))
|
||||||
|
dst))
|
||||||
|
|
||||||
|
;; Helper for emitting MachOTlsGetAddr.
|
||||||
|
(decl macho_tls_get_addr (ExternalName) Gpr)
|
||||||
|
(rule (macho_tls_get_addr name)
|
||||||
|
(let ((dst WritableGpr (temp_writable_gpr))
|
||||||
|
(_ Unit (emit (MInst.MachOTlsGetAddr name dst))))
|
||||||
|
dst))
|
||||||
|
|
||||||
|
;; Helper for emitting CoffTlsGetAddr.
|
||||||
|
(decl coff_tls_get_addr (ExternalName) Gpr)
|
||||||
|
(rule (coff_tls_get_addr name)
|
||||||
|
(let ((dst WritableGpr (temp_writable_gpr))
|
||||||
|
(_ Unit (emit (MInst.CoffTlsGetAddr name dst))))
|
||||||
|
dst))
|
||||||
|
|
||||||
|
;;;; sqmul_round_sat ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
(decl sqmul_round_sat_mask () VCodeConstant)
|
||||||
|
(extern constructor sqmul_round_sat_mask sqmul_round_sat_mask)
|
||||||
|
|
||||||
|
;;;; uunarrow ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
(decl uunarrow_umax_mask () VCodeConstant)
|
||||||
|
(extern constructor uunarrow_umax_mask uunarrow_umax_mask)
|
||||||
|
|
||||||
|
(decl uunarrow_uint_mask () VCodeConstant)
|
||||||
|
(extern constructor uunarrow_uint_mask uunarrow_uint_mask)
|
||||||
|
|
||||||
;;;; Automatic conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Automatic conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
(convert Gpr InstOutput output_gpr)
|
(convert Gpr InstOutput output_gpr)
|
||||||
|
|||||||
@@ -2915,7 +2915,10 @@ pub(crate) fn emit(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Inst::ElfTlsGetAddr { ref symbol } => {
|
Inst::ElfTlsGetAddr { ref symbol, dst } => {
|
||||||
|
let dst = allocs.next(dst.to_reg().to_reg());
|
||||||
|
debug_assert_eq!(dst, regs::rax());
|
||||||
|
|
||||||
// N.B.: Must be exactly this byte sequence; the linker requires it,
|
// N.B.: Must be exactly this byte sequence; the linker requires it,
|
||||||
// because it must know how to rewrite the bytes.
|
// because it must know how to rewrite the bytes.
|
||||||
|
|
||||||
@@ -2941,7 +2944,10 @@ pub(crate) fn emit(
|
|||||||
sink.put4(0); // offset
|
sink.put4(0); // offset
|
||||||
}
|
}
|
||||||
|
|
||||||
Inst::MachOTlsGetAddr { ref symbol } => {
|
Inst::MachOTlsGetAddr { ref symbol, dst } => {
|
||||||
|
let dst = allocs.next(dst.to_reg().to_reg());
|
||||||
|
debug_assert_eq!(dst, regs::rax());
|
||||||
|
|
||||||
// movq gv@tlv(%rip), %rdi
|
// movq gv@tlv(%rip), %rdi
|
||||||
sink.put1(0x48); // REX.w
|
sink.put1(0x48); // REX.w
|
||||||
sink.put1(0x8b); // MOV
|
sink.put1(0x8b); // MOV
|
||||||
@@ -2954,7 +2960,10 @@ pub(crate) fn emit(
|
|||||||
sink.put1(0x17);
|
sink.put1(0x17);
|
||||||
}
|
}
|
||||||
|
|
||||||
Inst::CoffTlsGetAddr { ref symbol } => {
|
Inst::CoffTlsGetAddr { ref symbol, dst } => {
|
||||||
|
let dst = allocs.next(dst.to_reg().to_reg());
|
||||||
|
debug_assert_eq!(dst, regs::rax());
|
||||||
|
|
||||||
// See: https://gcc.godbolt.org/z/M8or9x6ss
|
// See: https://gcc.godbolt.org/z/M8or9x6ss
|
||||||
// And: https://github.com/bjorn3/rustc_codegen_cranelift/issues/388#issuecomment-532930282
|
// And: https://github.com/bjorn3/rustc_codegen_cranelift/issues/388#issuecomment-532930282
|
||||||
|
|
||||||
|
|||||||
@@ -95,6 +95,24 @@ impl Inst {
|
|||||||
let dst = WritableGpr::from_writable_reg(dst).unwrap();
|
let dst = WritableGpr::from_writable_reg(dst).unwrap();
|
||||||
Inst::Setcc { cc, dst }
|
Inst::Setcc { cc, dst }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn xmm_rm_r_imm(
|
||||||
|
op: SseOpcode,
|
||||||
|
src: RegMem,
|
||||||
|
dst: Writable<Reg>,
|
||||||
|
imm: u8,
|
||||||
|
size: OperandSize,
|
||||||
|
) -> Inst {
|
||||||
|
debug_assert!(size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
|
||||||
|
Inst::XmmRmRImm {
|
||||||
|
op,
|
||||||
|
src1: dst.to_reg(),
|
||||||
|
src2: src,
|
||||||
|
dst,
|
||||||
|
imm,
|
||||||
|
size,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -4738,6 +4756,7 @@ fn test_x64_emit() {
|
|||||||
insns.push((
|
insns.push((
|
||||||
Inst::ElfTlsGetAddr {
|
Inst::ElfTlsGetAddr {
|
||||||
symbol: ExternalName::User(UserExternalNameRef::new(0)),
|
symbol: ExternalName::User(UserExternalNameRef::new(0)),
|
||||||
|
dst: WritableGpr::from_writable_reg(w_rax).unwrap(),
|
||||||
},
|
},
|
||||||
"66488D3D00000000666648E800000000",
|
"66488D3D00000000666648E800000000",
|
||||||
"%rax = elf_tls_get_addr User(userextname0)",
|
"%rax = elf_tls_get_addr User(userextname0)",
|
||||||
@@ -4746,6 +4765,7 @@ fn test_x64_emit() {
|
|||||||
insns.push((
|
insns.push((
|
||||||
Inst::MachOTlsGetAddr {
|
Inst::MachOTlsGetAddr {
|
||||||
symbol: ExternalName::User(UserExternalNameRef::new(0)),
|
symbol: ExternalName::User(UserExternalNameRef::new(0)),
|
||||||
|
dst: WritableGpr::from_writable_reg(w_rax).unwrap(),
|
||||||
},
|
},
|
||||||
"488B3D00000000FF17",
|
"488B3D00000000FF17",
|
||||||
"%rax = macho_tls_get_addr User(userextname0)",
|
"%rax = macho_tls_get_addr User(userextname0)",
|
||||||
@@ -4754,6 +4774,7 @@ fn test_x64_emit() {
|
|||||||
insns.push((
|
insns.push((
|
||||||
Inst::CoffTlsGetAddr {
|
Inst::CoffTlsGetAddr {
|
||||||
symbol: ExternalName::User(UserExternalNameRef::new(0)),
|
symbol: ExternalName::User(UserExternalNameRef::new(0)),
|
||||||
|
dst: WritableGpr::from_writable_reg(w_rax).unwrap(),
|
||||||
},
|
},
|
||||||
"8B050000000065488B0C2558000000488B04C1488D8000000000",
|
"8B050000000065488B0C2558000000488B04C1488D8000000000",
|
||||||
"%rax = coff_tls_get_addr User(userextname0)",
|
"%rax = coff_tls_get_addr User(userextname0)",
|
||||||
|
|||||||
@@ -263,12 +263,6 @@ impl Inst {
|
|||||||
Inst::MovRR { size, src, dst }
|
Inst::MovRR { size, src, dst }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn xmm_load_const(src: VCodeConstant, dst: Writable<Reg>, ty: Type) -> Inst {
|
|
||||||
debug_assert!(dst.to_reg().class() == RegClass::Float);
|
|
||||||
debug_assert!(ty.is_vector() && ty.bits() == 128);
|
|
||||||
Inst::XmmLoadConst { src, dst, ty }
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Convenient helper for unary float operations.
|
/// Convenient helper for unary float operations.
|
||||||
pub(crate) fn xmm_unary_rm_r(op: SseOpcode, src: RegMem, dst: Writable<Reg>) -> Inst {
|
pub(crate) fn xmm_unary_rm_r(op: SseOpcode, src: RegMem, dst: Writable<Reg>) -> Inst {
|
||||||
src.assert_regclass_is(RegClass::Float);
|
src.assert_regclass_is(RegClass::Float);
|
||||||
@@ -377,24 +371,6 @@ impl Inst {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn xmm_rm_r_imm(
|
|
||||||
op: SseOpcode,
|
|
||||||
src: RegMem,
|
|
||||||
dst: Writable<Reg>,
|
|
||||||
imm: u8,
|
|
||||||
size: OperandSize,
|
|
||||||
) -> Inst {
|
|
||||||
debug_assert!(size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
|
|
||||||
Inst::XmmRmRImm {
|
|
||||||
op,
|
|
||||||
src1: dst.to_reg(),
|
|
||||||
src2: src,
|
|
||||||
dst,
|
|
||||||
imm,
|
|
||||||
size,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn movzx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
|
pub(crate) fn movzx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
|
||||||
src.assert_regclass_is(RegClass::Int);
|
src.assert_regclass_is(RegClass::Int);
|
||||||
debug_assert!(dst.to_reg().class() == RegClass::Int);
|
debug_assert!(dst.to_reg().class() == RegClass::Int);
|
||||||
@@ -1544,16 +1520,19 @@ impl PrettyPrint for Inst {
|
|||||||
|
|
||||||
Inst::Ud2 { trap_code } => format!("ud2 {}", trap_code),
|
Inst::Ud2 { trap_code } => format!("ud2 {}", trap_code),
|
||||||
|
|
||||||
Inst::ElfTlsGetAddr { ref symbol } => {
|
Inst::ElfTlsGetAddr { ref symbol, dst } => {
|
||||||
format!("%rax = elf_tls_get_addr {:?}", symbol)
|
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
|
||||||
|
format!("{} = elf_tls_get_addr {:?}", dst, symbol)
|
||||||
}
|
}
|
||||||
|
|
||||||
Inst::MachOTlsGetAddr { ref symbol } => {
|
Inst::MachOTlsGetAddr { ref symbol, dst } => {
|
||||||
format!("%rax = macho_tls_get_addr {:?}", symbol)
|
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
|
||||||
|
format!("{} = macho_tls_get_addr {:?}", dst, symbol)
|
||||||
}
|
}
|
||||||
|
|
||||||
Inst::CoffTlsGetAddr { ref symbol } => {
|
Inst::CoffTlsGetAddr { ref symbol, dst } => {
|
||||||
format!("%rax = coff_tls_get_addr {:?}", symbol)
|
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
|
||||||
|
format!("{} = coff_tls_get_addr {:?}", dst, symbol)
|
||||||
}
|
}
|
||||||
|
|
||||||
Inst::Unwind { inst } => {
|
Inst::Unwind { inst } => {
|
||||||
@@ -1994,8 +1973,8 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
|
|||||||
// No registers are used.
|
// No registers are used.
|
||||||
}
|
}
|
||||||
|
|
||||||
Inst::ElfTlsGetAddr { .. } | Inst::MachOTlsGetAddr { .. } => {
|
Inst::ElfTlsGetAddr { dst, .. } | Inst::MachOTlsGetAddr { dst, .. } => {
|
||||||
collector.reg_def(Writable::from_reg(regs::rax()));
|
collector.reg_fixed_def(dst.to_writable_reg(), regs::rax());
|
||||||
// All caller-saves are clobbered.
|
// All caller-saves are clobbered.
|
||||||
//
|
//
|
||||||
// We use the SysV calling convention here because the
|
// We use the SysV calling convention here because the
|
||||||
@@ -2007,12 +1986,12 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
|
|||||||
collector.reg_clobbers(clobbers);
|
collector.reg_clobbers(clobbers);
|
||||||
}
|
}
|
||||||
|
|
||||||
Inst::CoffTlsGetAddr { .. } => {
|
Inst::CoffTlsGetAddr { dst, .. } => {
|
||||||
// We also use the gs register. But that register is not allocatable by the
|
// We also use the gs register. But that register is not allocatable by the
|
||||||
// register allocator, so we don't need to mark it as used here.
|
// register allocator, so we don't need to mark it as used here.
|
||||||
|
|
||||||
// We use %rax to set the address
|
// We use %rax to set the address
|
||||||
collector.reg_def(Writable::from_reg(regs::rax()));
|
collector.reg_fixed_def(dst.to_writable_reg(), regs::rax());
|
||||||
|
|
||||||
// We use %rcx as a temporary variable to load the _tls_index
|
// We use %rcx as a temporary variable to load the _tls_index
|
||||||
collector.reg_def(Writable::from_reg(regs::rcx()));
|
collector.reg_def(Writable::from_reg(regs::rcx()));
|
||||||
|
|||||||
@@ -3694,3 +3694,66 @@
|
|||||||
(lo Reg (value_regs_get regs 0))
|
(lo Reg (value_regs_get regs 0))
|
||||||
(hi Reg (value_regs_get regs 1)))
|
(hi Reg (value_regs_get regs 1)))
|
||||||
(output_pair lo hi)))
|
(output_pair lo hi)))
|
||||||
|
|
||||||
|
;; Rules for `tls_value` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
(rule (lower (tls_value (symbol_value_data name _ _)))
|
||||||
|
(if (tls_model_is_elf_gd))
|
||||||
|
(elf_tls_get_addr name))
|
||||||
|
|
||||||
|
(rule (lower (tls_value (symbol_value_data name _ _)))
|
||||||
|
(if (tls_model_is_macho))
|
||||||
|
(macho_tls_get_addr name))
|
||||||
|
|
||||||
|
(rule (lower (tls_value (symbol_value_data name _ _)))
|
||||||
|
(if (tls_model_is_coff))
|
||||||
|
(coff_tls_get_addr name))
|
||||||
|
|
||||||
|
;; Rules for `sqmul_round_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
(rule (lower (sqmul_round_sat qx @ (value_type $I16X8) qy))
|
||||||
|
(let ((src1 Xmm qx)
|
||||||
|
(src2 Xmm qy)
|
||||||
|
|
||||||
|
(mask Xmm (x64_xmm_load_const $I16X8 (sqmul_round_sat_mask)))
|
||||||
|
(dst Xmm (x64_pmulhrsw src1 src2))
|
||||||
|
(cmp Xmm (x64_pcmpeqw mask dst)))
|
||||||
|
(x64_pxor dst cmp)))
|
||||||
|
|
||||||
|
;; Rules for `sqmul_round_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;; TODO: currently we only lower a special case of `uunarrow` needed to support
|
||||||
|
;; the translation of wasm's i32x4.trunc_sat_f64x2_u_zero operation.
|
||||||
|
;; https://github.com/bytecodealliance/wasmtime/issues/4791
|
||||||
|
;;
|
||||||
|
;; y = i32x4.trunc_sat_f64x2_u_zero(x) is lowered to:
|
||||||
|
;; MOVAPD xmm_y, xmm_x
|
||||||
|
;; XORPD xmm_tmp, xmm_tmp
|
||||||
|
;; MAXPD xmm_y, xmm_tmp
|
||||||
|
;; MINPD xmm_y, [wasm_f64x2_splat(4294967295.0)]
|
||||||
|
;; ROUNDPD xmm_y, xmm_y, 0x0B
|
||||||
|
;; ADDPD xmm_y, [wasm_f64x2_splat(0x1.0p+52)]
|
||||||
|
;; SHUFPS xmm_y, xmm_xmp, 0x88
|
||||||
|
(rule (lower (uunarrow (fcvt_to_uint_sat src @ (value_type $F64X2))
|
||||||
|
(vconst (u128_from_constant 0))))
|
||||||
|
(let ((src Xmm src)
|
||||||
|
|
||||||
|
;; MOVAPD xmm_y, xmm_x
|
||||||
|
;; XORPD xmm_tmp, xmm_tmp
|
||||||
|
(zeros Xmm (x64_xorpd src src))
|
||||||
|
(dst Xmm (x64_maxpd src zeros))
|
||||||
|
|
||||||
|
(umax_mask Xmm (x64_xmm_load_const $F64X2 (uunarrow_umax_mask)))
|
||||||
|
|
||||||
|
;; MINPD xmm_y, [wasm_f64x2_splat(4294967295.0)]
|
||||||
|
(dst Xmm (x64_minpd dst umax_mask))
|
||||||
|
|
||||||
|
;; ROUNDPD xmm_y, xmm_y, 0x0B
|
||||||
|
(dst Xmm (x64_roundpd dst (RoundImm.RoundZero)))
|
||||||
|
|
||||||
|
;; ADDPD xmm_y, [wasm_f64x2_splat(0x1.0p+52)]
|
||||||
|
(uint_mask Xmm (x64_xmm_load_const $F64X2 (uunarrow_uint_mask)))
|
||||||
|
(dst Xmm (x64_addpd dst uint_mask)))
|
||||||
|
|
||||||
|
;; SHUFPS xmm_y, xmm_xmp, 0x88
|
||||||
|
(x64_shufps dst zeros 0x88)))
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ use crate::isa::{x64::settings as x64_settings, x64::X64Backend, CallConv};
|
|||||||
use crate::machinst::lower::*;
|
use crate::machinst::lower::*;
|
||||||
use crate::machinst::*;
|
use crate::machinst::*;
|
||||||
use crate::result::CodegenResult;
|
use crate::result::CodegenResult;
|
||||||
use crate::settings::{Flags, TlsModel};
|
use crate::settings::Flags;
|
||||||
use smallvec::SmallVec;
|
use smallvec::SmallVec;
|
||||||
use target_lexicon::Triple;
|
use target_lexicon::Triple;
|
||||||
|
|
||||||
@@ -304,33 +304,15 @@ fn lower_insn_to_regs(
|
|||||||
isa_flags: &x64_settings::Flags,
|
isa_flags: &x64_settings::Flags,
|
||||||
triple: &Triple,
|
triple: &Triple,
|
||||||
) -> CodegenResult<()> {
|
) -> CodegenResult<()> {
|
||||||
let op = ctx.data(insn).opcode();
|
|
||||||
|
|
||||||
let inputs: SmallVec<[InsnInput; 4]> = (0..ctx.num_inputs(insn))
|
|
||||||
.map(|i| InsnInput { insn, input: i })
|
|
||||||
.collect();
|
|
||||||
let outputs: SmallVec<[InsnOutput; 2]> = (0..ctx.num_outputs(insn))
|
let outputs: SmallVec<[InsnOutput; 2]> = (0..ctx.num_outputs(insn))
|
||||||
.map(|i| InsnOutput { insn, output: i })
|
.map(|i| InsnOutput { insn, output: i })
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let ty = if outputs.len() > 0 {
|
|
||||||
Some(ctx.output_ty(insn, 0))
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
|
|
||||||
if let Ok(()) = isle::lower(ctx, triple, flags, isa_flags, &outputs, insn) {
|
if let Ok(()) = isle::lower(ctx, triple, flags, isa_flags, &outputs, insn) {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
let implemented_in_isle = |ctx: &mut Lower<Inst>| {
|
let op = ctx.data(insn).opcode();
|
||||||
unreachable!(
|
|
||||||
"implemented in ISLE: inst = `{}`, type = `{:?}`",
|
|
||||||
ctx.dfg().display_inst(insn),
|
|
||||||
ty
|
|
||||||
)
|
|
||||||
};
|
|
||||||
|
|
||||||
match op {
|
match op {
|
||||||
Opcode::Iconst
|
Opcode::Iconst
|
||||||
| Opcode::Bconst
|
| Opcode::Bconst
|
||||||
@@ -474,152 +456,25 @@ fn lower_insn_to_regs(
|
|||||||
| Opcode::VallTrue
|
| Opcode::VallTrue
|
||||||
| Opcode::VhighBits
|
| Opcode::VhighBits
|
||||||
| Opcode::Iconcat
|
| Opcode::Iconcat
|
||||||
| Opcode::Isplit => {
|
| Opcode::Isplit
|
||||||
implemented_in_isle(ctx);
|
| Opcode::TlsValue
|
||||||
|
| Opcode::SqmulRoundSat
|
||||||
|
| Opcode::Uunarrow => {
|
||||||
|
let ty = if outputs.len() > 0 {
|
||||||
|
Some(ctx.output_ty(insn, 0))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
unreachable!(
|
||||||
|
"implemented in ISLE: inst = `{}`, type = `{:?}`",
|
||||||
|
ctx.dfg().display_inst(insn),
|
||||||
|
ty
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::DynamicStackAddr => unimplemented!("DynamicStackAddr"),
|
Opcode::DynamicStackAddr => unimplemented!("DynamicStackAddr"),
|
||||||
|
|
||||||
Opcode::TlsValue => {
|
|
||||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
|
||||||
let (name, _, _) = ctx.symbol_value(insn).unwrap();
|
|
||||||
let symbol = name.clone();
|
|
||||||
|
|
||||||
match flags.tls_model() {
|
|
||||||
TlsModel::ElfGd => {
|
|
||||||
ctx.emit(Inst::ElfTlsGetAddr { symbol });
|
|
||||||
ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64));
|
|
||||||
}
|
|
||||||
TlsModel::Macho => {
|
|
||||||
ctx.emit(Inst::MachOTlsGetAddr { symbol });
|
|
||||||
ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64));
|
|
||||||
}
|
|
||||||
TlsModel::Coff => {
|
|
||||||
ctx.emit(Inst::CoffTlsGetAddr { symbol });
|
|
||||||
ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64));
|
|
||||||
}
|
|
||||||
_ => todo!(
|
|
||||||
"Unimplemented TLS model in x64 backend: {:?}",
|
|
||||||
flags.tls_model()
|
|
||||||
),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Opcode::SqmulRoundSat => {
|
|
||||||
// Lane-wise saturating rounding multiplication in Q15 format
|
|
||||||
// Optimal lowering taken from instruction proposal https://github.com/WebAssembly/simd/pull/365
|
|
||||||
// y = i16x8.q15mulr_sat_s(a, b) is lowered to:
|
|
||||||
//MOVDQA xmm_y, xmm_a
|
|
||||||
//MOVDQA xmm_tmp, wasm_i16x8_splat(0x8000)
|
|
||||||
//PMULHRSW xmm_y, xmm_b
|
|
||||||
//PCMPEQW xmm_tmp, xmm_y
|
|
||||||
//PXOR xmm_y, xmm_tmp
|
|
||||||
let input_ty = ctx.input_ty(insn, 0);
|
|
||||||
let src1 = put_input_in_reg(ctx, inputs[0]);
|
|
||||||
let src2 = put_input_in_reg(ctx, inputs[1]);
|
|
||||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
|
||||||
|
|
||||||
ctx.emit(Inst::gen_move(dst, src1, input_ty));
|
|
||||||
static SAT_MASK: [u8; 16] = [
|
|
||||||
0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
|
|
||||||
0x00, 0x80,
|
|
||||||
];
|
|
||||||
let mask_const = ctx.use_constant(VCodeConstantData::WellKnown(&SAT_MASK));
|
|
||||||
let mask = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
|
||||||
ctx.emit(Inst::xmm_load_const(mask_const, mask, types::I16X8));
|
|
||||||
|
|
||||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmulhrsw, RegMem::reg(src2), dst));
|
|
||||||
ctx.emit(Inst::xmm_rm_r(
|
|
||||||
SseOpcode::Pcmpeqw,
|
|
||||||
RegMem::reg(dst.to_reg()),
|
|
||||||
mask,
|
|
||||||
));
|
|
||||||
ctx.emit(Inst::xmm_rm_r(
|
|
||||||
SseOpcode::Pxor,
|
|
||||||
RegMem::reg(mask.to_reg()),
|
|
||||||
dst,
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
Opcode::Uunarrow => {
|
|
||||||
if let Some(fcvt_inst) = matches_input(ctx, inputs[0], Opcode::FcvtToUintSat) {
|
|
||||||
//y = i32x4.trunc_sat_f64x2_u_zero(x) is lowered to:
|
|
||||||
//MOVAPD xmm_y, xmm_x
|
|
||||||
//XORPD xmm_tmp, xmm_tmp
|
|
||||||
//MAXPD xmm_y, xmm_tmp
|
|
||||||
//MINPD xmm_y, [wasm_f64x2_splat(4294967295.0)]
|
|
||||||
//ROUNDPD xmm_y, xmm_y, 0x0B
|
|
||||||
//ADDPD xmm_y, [wasm_f64x2_splat(0x1.0p+52)]
|
|
||||||
//SHUFPS xmm_y, xmm_xmp, 0x88
|
|
||||||
|
|
||||||
let fcvt_input = InsnInput {
|
|
||||||
insn: fcvt_inst,
|
|
||||||
input: 0,
|
|
||||||
};
|
|
||||||
let input_ty = ctx.input_ty(fcvt_inst, 0);
|
|
||||||
let output_ty = ctx.output_ty(insn, 0);
|
|
||||||
let src = put_input_in_reg(ctx, fcvt_input);
|
|
||||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
|
||||||
|
|
||||||
ctx.emit(Inst::gen_move(dst, src, input_ty));
|
|
||||||
let tmp1 = ctx.alloc_tmp(output_ty).only_reg().unwrap();
|
|
||||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Xorpd, RegMem::from(tmp1), tmp1));
|
|
||||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Maxpd, RegMem::from(tmp1), dst));
|
|
||||||
|
|
||||||
// 4294967295.0 is equivalent to 0x41EFFFFFFFE00000
|
|
||||||
static UMAX_MASK: [u8; 16] = [
|
|
||||||
0x00, 0x00, 0xE0, 0xFF, 0xFF, 0xFF, 0xEF, 0x41, 0x00, 0x00, 0xE0, 0xFF, 0xFF,
|
|
||||||
0xFF, 0xEF, 0x41,
|
|
||||||
];
|
|
||||||
let umax_const = ctx.use_constant(VCodeConstantData::WellKnown(&UMAX_MASK));
|
|
||||||
let umax_mask = ctx.alloc_tmp(types::F64X2).only_reg().unwrap();
|
|
||||||
ctx.emit(Inst::xmm_load_const(umax_const, umax_mask, types::F64X2));
|
|
||||||
|
|
||||||
//MINPD xmm_y, [wasm_f64x2_splat(4294967295.0)]
|
|
||||||
ctx.emit(Inst::xmm_rm_r(
|
|
||||||
SseOpcode::Minpd,
|
|
||||||
RegMem::from(umax_mask),
|
|
||||||
dst,
|
|
||||||
));
|
|
||||||
//ROUNDPD xmm_y, xmm_y, 0x0B
|
|
||||||
ctx.emit(Inst::xmm_rm_r_imm(
|
|
||||||
SseOpcode::Roundpd,
|
|
||||||
RegMem::reg(dst.to_reg()),
|
|
||||||
dst,
|
|
||||||
RoundImm::RoundZero.encode(),
|
|
||||||
OperandSize::Size32,
|
|
||||||
));
|
|
||||||
//ADDPD xmm_y, [wasm_f64x2_splat(0x1.0p+52)]
|
|
||||||
static UINT_MASK: [u8; 16] = [
|
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0x00, 0x30, 0x43,
|
|
||||||
];
|
|
||||||
let uint_mask_const = ctx.use_constant(VCodeConstantData::WellKnown(&UINT_MASK));
|
|
||||||
let uint_mask = ctx.alloc_tmp(types::F64X2).only_reg().unwrap();
|
|
||||||
ctx.emit(Inst::xmm_load_const(
|
|
||||||
uint_mask_const,
|
|
||||||
uint_mask,
|
|
||||||
types::F64X2,
|
|
||||||
));
|
|
||||||
ctx.emit(Inst::xmm_rm_r(
|
|
||||||
SseOpcode::Addpd,
|
|
||||||
RegMem::from(uint_mask),
|
|
||||||
dst,
|
|
||||||
));
|
|
||||||
|
|
||||||
//SHUFPS xmm_y, xmm_xmp, 0x88
|
|
||||||
ctx.emit(Inst::xmm_rm_r_imm(
|
|
||||||
SseOpcode::Shufps,
|
|
||||||
RegMem::reg(tmp1.to_reg()),
|
|
||||||
dst,
|
|
||||||
0x88,
|
|
||||||
OperandSize::Size32,
|
|
||||||
));
|
|
||||||
} else {
|
|
||||||
println!("Did not match fcvt input!");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Unimplemented opcodes below. These are not currently used by Wasm
|
// Unimplemented opcodes below. These are not currently used by Wasm
|
||||||
// lowering or other known embeddings, but should be either supported or
|
// lowering or other known embeddings, but should be either supported or
|
||||||
// removed eventually
|
// removed eventually
|
||||||
|
|||||||
@@ -911,6 +911,39 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
|
|||||||
.use_constant(VCodeConstantData::WellKnown(&ZERO_MASK_VALUE))
|
.use_constant(VCodeConstantData::WellKnown(&ZERO_MASK_VALUE))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn sqmul_round_sat_mask(&mut self) -> VCodeConstant {
|
||||||
|
static SAT_MASK: [u8; 16] = [
|
||||||
|
0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
|
||||||
|
0x00, 0x80,
|
||||||
|
];
|
||||||
|
self.lower_ctx
|
||||||
|
.use_constant(VCodeConstantData::WellKnown(&SAT_MASK))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn uunarrow_umax_mask(&mut self) -> VCodeConstant {
|
||||||
|
// 4294967295.0 is equivalent to 0x41EFFFFFFFE00000
|
||||||
|
static UMAX_MASK: [u8; 16] = [
|
||||||
|
0x00, 0x00, 0xE0, 0xFF, 0xFF, 0xFF, 0xEF, 0x41, 0x00, 0x00, 0xE0, 0xFF, 0xFF, 0xFF,
|
||||||
|
0xEF, 0x41,
|
||||||
|
];
|
||||||
|
|
||||||
|
self.lower_ctx
|
||||||
|
.use_constant(VCodeConstantData::WellKnown(&UMAX_MASK))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn uunarrow_uint_mask(&mut self) -> VCodeConstant {
|
||||||
|
static UINT_MASK: [u8; 16] = [
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x30, 0x43,
|
||||||
|
];
|
||||||
|
|
||||||
|
self.lower_ctx
|
||||||
|
.use_constant(VCodeConstantData::WellKnown(&UINT_MASK))
|
||||||
|
}
|
||||||
|
|
||||||
fn emit_div_or_rem(
|
fn emit_div_or_rem(
|
||||||
&mut self,
|
&mut self,
|
||||||
kind: &DivOrRemKind,
|
kind: &DivOrRemKind,
|
||||||
|
|||||||
@@ -664,6 +664,24 @@ macro_rules! isle_prelude_methods {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn tls_model_is_macho(&mut self) -> Option<()> {
|
||||||
|
if self.flags.tls_model() == TlsModel::Macho {
|
||||||
|
Some(())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn tls_model_is_coff(&mut self) -> Option<()> {
|
||||||
|
if self.flags.tls_model() == TlsModel::Coff {
|
||||||
|
Some(())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn func_ref_data(&mut self, func_ref: FuncRef) -> (SigRef, ExternalName, RelocDistance) {
|
fn func_ref_data(&mut self, func_ref: FuncRef) -> (SigRef, ExternalName, RelocDistance) {
|
||||||
let funcdata = &self.lower_ctx.dfg().ext_funcs[func_ref];
|
let funcdata = &self.lower_ctx.dfg().ext_funcs[func_ref];
|
||||||
|
|||||||
@@ -790,6 +790,12 @@
|
|||||||
(decl pure tls_model_is_elf_gd () Unit)
|
(decl pure tls_model_is_elf_gd () Unit)
|
||||||
(extern constructor tls_model_is_elf_gd tls_model_is_elf_gd)
|
(extern constructor tls_model_is_elf_gd tls_model_is_elf_gd)
|
||||||
|
|
||||||
|
(decl pure tls_model_is_macho () Unit)
|
||||||
|
(extern constructor tls_model_is_macho tls_model_is_macho)
|
||||||
|
|
||||||
|
(decl pure tls_model_is_coff () Unit)
|
||||||
|
(extern constructor tls_model_is_coff tls_model_is_coff)
|
||||||
|
|
||||||
;;;; Helpers for accessing instruction data ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Helpers for accessing instruction data ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
;; Accessor for `FuncRef`.
|
;; Accessor for `FuncRef`.
|
||||||
|
|||||||
19
cranelift/filetests/filetests/isa/x64/sqmul_round_sat.clif
Normal file
19
cranelift/filetests/filetests/isa/x64/sqmul_round_sat.clif
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
test compile precise-output
|
||||||
|
target x86_64
|
||||||
|
|
||||||
|
function %f1(i16x8, i16x8) -> i16x8 {
|
||||||
|
block0(v0: i16x8, v1: i16x8):
|
||||||
|
v2 = sqmul_round_sat v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; load_const VCodeConstant(0), %xmm7
|
||||||
|
; pmulhrsw %xmm0, %xmm1, %xmm0
|
||||||
|
; pcmpeqw %xmm7, %xmm0, %xmm7
|
||||||
|
; pxor %xmm0, %xmm7, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
26
cranelift/filetests/filetests/isa/x64/uunarrow.clif
Normal file
26
cranelift/filetests/filetests/isa/x64/uunarrow.clif
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
test compile precise-output
|
||||||
|
target x86_64
|
||||||
|
|
||||||
|
function %f1(f64x2) -> i32x4 {
|
||||||
|
block0(v0: f64x2):
|
||||||
|
v1 = fcvt_to_uint_sat.i64x2 v0
|
||||||
|
v2 = vconst.i64x2 [0 0]
|
||||||
|
v3 = uunarrow v1, v2
|
||||||
|
return v3
|
||||||
|
}
|
||||||
|
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; xorpd %xmm3, %xmm3, %xmm3
|
||||||
|
; maxpd %xmm0, %xmm3, %xmm0
|
||||||
|
; load_const VCodeConstant(0), %xmm7
|
||||||
|
; minpd %xmm0, %xmm7, %xmm0
|
||||||
|
; roundpd $3, %xmm0, %xmm0
|
||||||
|
; load_const VCodeConstant(1), %xmm13
|
||||||
|
; addpd %xmm0, %xmm13, %xmm0
|
||||||
|
; shufps $136, %xmm0, %xmm3, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
|
||||||
Reference in New Issue
Block a user