From 2d676d838fc718e62a8b5c46ba00745b1f8590c9 Mon Sep 17 00:00:00 2001 From: Johnnie Birch Date: Sat, 5 Jun 2021 23:15:50 -0700 Subject: [PATCH] Implements f64x2.convert_low_i32x4_u for x64 --- build.rs | 1 - .../codegen/meta/src/shared/instructions.rs | 21 ++++++++ .../codegen/src/isa/aarch64/lower_inst.rs | 1 + cranelift/codegen/src/isa/s390x/lower.rs | 1 + cranelift/codegen/src/isa/x64/inst/args.rs | 3 ++ cranelift/codegen/src/isa/x64/inst/emit.rs | 1 + .../codegen/src/isa/x64/inst/emit_tests.rs | 6 +++ cranelift/codegen/src/isa/x64/lower.rs | 52 +++++++++++++++++++ cranelift/codegen/src/isa/x64/mod.rs | 6 +-- cranelift/interpreter/src/step.rs | 1 + cranelift/wasm/src/code_translator.rs | 7 ++- 11 files changed, 94 insertions(+), 6 deletions(-) diff --git a/build.rs b/build.rs index 59d0914562..f049208a60 100644 --- a/build.rs +++ b/build.rs @@ -189,7 +189,6 @@ fn x64_should_panic(testsuite: &str, testname: &str, strategy: &str) -> bool { } match (testsuite, testname) { - ("simd", "simd_conversions") => return true, // unknown operator or unexpected token: tests/spec_testsuite/proposals/simd/simd_conversions.wast:724:6 ("simd", "simd_i16x8_extadd_pairwise_i8x16") => return true, ("simd", "simd_i16x8_extmul_i8x16") => return true, ("simd", "simd_i16x8_q15mulr_sat_s") => return true, diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index 06e20c6198..1964ea8f75 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -4457,6 +4457,27 @@ pub(crate) fn define( .operands_out(vec![a]), ); + ig.push( + Inst::new( + "fcvt_low_from_uint", + r#" + + Converts packed unsigned 32-bit integers to packed double precision floating point. + + Considering only the low half of the register, each lane in `x` is interpreted as a + unsigned 32-bit integer that is then converted to a double precision float. This + which are converted to occupy twice the number of bits. No rounding should be needed + for the resulting float. + + The result type will have half the number of vector lanes as the input. + + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + let WideInt = &TypeVar::new( "WideInt", "An integer type with lanes from `i16` upwards", diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 8c46602cbd..67e99917e6 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -3557,6 +3557,7 @@ pub(crate) fn lower_insn_to_regs>( Opcode::ConstAddr | Opcode::FcvtLowFromSint + | Opcode::FcvtLowFromUint | Opcode::Fvdemote | Opcode::FvpromoteLow | Opcode::Vconcat diff --git a/cranelift/codegen/src/isa/s390x/lower.rs b/cranelift/codegen/src/isa/s390x/lower.rs index 8ab66add04..9cc33eff50 100644 --- a/cranelift/codegen/src/isa/s390x/lower.rs +++ b/cranelift/codegen/src/isa/s390x/lower.rs @@ -2867,6 +2867,7 @@ fn lower_insn_to_regs>( | Opcode::UwidenHigh | Opcode::WideningPairwiseDotProductS | Opcode::SqmulRoundSat + | Opcode::FcvtLowFromUint | Opcode::FvpromoteLow | Opcode::Fvdemote => { // TODO diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index c362075061..13ad1ca836 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -635,6 +635,7 @@ pub enum SseOpcode { Subsd, Ucomiss, Ucomisd, + Unpcklps, Xorps, Xorpd, } @@ -675,6 +676,7 @@ impl SseOpcode { | SseOpcode::Subps | SseOpcode::Subss | SseOpcode::Ucomiss + | SseOpcode::Unpcklps | SseOpcode::Xorps => SSE, SseOpcode::Addpd @@ -993,6 +995,7 @@ impl fmt::Debug for SseOpcode { SseOpcode::Subsd => "subsd", SseOpcode::Ucomiss => "ucomiss", SseOpcode::Ucomisd => "ucomisd", + SseOpcode::Unpcklps => "unpcklps", SseOpcode::Xorps => "xorps", SseOpcode::Xorpd => "xorpd", }; diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 441d89fa91..534b6be168 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1529,6 +1529,7 @@ pub(crate) fn emit( SseOpcode::Subpd => (LegacyPrefixes::_66, 0x0F5C, 2), SseOpcode::Subss => (LegacyPrefixes::_F3, 0x0F5C, 2), SseOpcode::Subsd => (LegacyPrefixes::_F2, 0x0F5C, 2), + SseOpcode::Unpcklps => (LegacyPrefixes::None, 0x0F14, 2), SseOpcode::Xorps => (LegacyPrefixes::None, 0x0F57, 2), SseOpcode::Xorpd => (LegacyPrefixes::_66, 0x0F57, 2), _ => unimplemented!("Opcode {:?} not implemented", op), diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index a77882c3f6..3ecf4b7e62 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -3717,6 +3717,12 @@ fn test_x64_emit() { "punpcklbw %xmm1, %xmm8", )); + insns.push(( + Inst::xmm_rm_r(SseOpcode::Unpcklps, RegMem::reg(xmm11), w_xmm2), + "410F14D3", + "unpcklps %xmm11, %xmm2", + )); + // ======================================================== // XMM_RM_R: Integer Conversion insns.push(( diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index ad74062e7e..4e0d67e2d1 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -4154,6 +4154,58 @@ fn lower_insn_to_regs>( dst, )); } + Opcode::FcvtLowFromUint => { + // Algorithm uses unpcklps to help create a float that is equivalent + // 0x1.0p52 + double(src). 0x1.0p52 is unique because at this exponent + // every value of the mantissa represents a corresponding uint32 number. + // When we subtract 0x1.0p52 we are left with double(src). + let src = put_input_in_reg(ctx, inputs[0]); + let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let uint_mask = ctx.alloc_tmp(types::I32X4).only_reg().unwrap(); + + ctx.emit(Inst::gen_move(dst, src, types::I32X4)); + + static UINT_MASK: [u8; 16] = [ + 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, + ]; + + let uint_mask_const = ctx.use_constant(VCodeConstantData::WellKnown(&UINT_MASK)); + + ctx.emit(Inst::xmm_load_const( + uint_mask_const, + uint_mask, + types::I32X4, + )); + + // Creates 0x1.0p52 + double(src) + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Unpcklps, + RegMem::from(uint_mask), + dst, + )); + + static UINT_MASK_HIGH: [u8; 16] = [ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x30, 0x43, + ]; + + let uint_mask_high_const = + ctx.use_constant(VCodeConstantData::WellKnown(&UINT_MASK_HIGH)); + let uint_mask_high = ctx.alloc_tmp(types::I32X4).only_reg().unwrap(); + ctx.emit(Inst::xmm_load_const( + uint_mask_high_const, + uint_mask_high, + types::I32X4, + )); + + // 0x1.0p52 + double(src) - 0x1.0p52 + ctx.emit(Inst::xmm_rm_r( + SseOpcode::Subpd, + RegMem::from(uint_mask_high), + dst, + )); + } Opcode::FcvtFromUint => { let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let ty = ty.unwrap(); diff --git a/cranelift/codegen/src/isa/x64/mod.rs b/cranelift/codegen/src/isa/x64/mod.rs index e4933c0586..381898e485 100644 --- a/cranelift/codegen/src/isa/x64/mod.rs +++ b/cranelift/codegen/src/isa/x64/mod.rs @@ -4,6 +4,8 @@ use self::inst::EmitInfo; use super::TargetIsa; use crate::ir::{condcodes::IntCC, Function}; +#[cfg(feature = "unwind")] +use crate::isa::unwind::systemv; use crate::isa::x64::{inst::regs::create_reg_universe_systemv, settings as x64_settings}; use crate::isa::Builder as IsaBuilder; use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode}; @@ -11,12 +13,10 @@ use crate::result::CodegenResult; use crate::settings::{self as shared_settings, Flags}; use alloc::{boxed::Box, vec::Vec}; use core::hash::{Hash, Hasher}; + use regalloc::{PrettyPrint, RealRegUniverse, Reg}; use target_lexicon::Triple; -#[cfg(feature = "unwind")] -use crate::isa::unwind::systemv; - mod abi; pub mod encoding; mod inst; diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index 609603f624..9440632f44 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -565,6 +565,7 @@ where Opcode::FcvtFromUint => unimplemented!("FcvtFromUint"), Opcode::FcvtFromSint => unimplemented!("FcvtFromSint"), Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"), + Opcode::FcvtLowFromUint => unimplemented!("FcvtLowFromUint"), Opcode::FvpromoteLow => unimplemented!("FvpromoteLow"), Opcode::Fvdemote => unimplemented!("Fvdemote"), Opcode::Isplit => unimplemented!("Isplit"), diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs index 6741de2c64..00fd529248 100644 --- a/cranelift/wasm/src/code_translator.rs +++ b/cranelift/wasm/src/code_translator.rs @@ -1778,6 +1778,10 @@ pub fn translate_operator( let a = pop1_with_bitcast(state, I32X4, builder); state.push1(builder.ins().fcvt_low_from_sint(F64X2, a)); } + Operator::F64x2ConvertLowI32x4U => { + let a = pop1_with_bitcast(state, I32X4, builder); + state.push1(builder.ins().fcvt_low_from_uint(F64X2, a)); + } Operator::F64x2PromoteLowF32x4 => { let a = pop1_with_bitcast(state, F32X4, builder); state.push1(builder.ins().fvpromote_low(a)); @@ -1921,8 +1925,7 @@ pub fn translate_operator( | Operator::I16x8ExtAddPairwiseI8x16S | Operator::I16x8ExtAddPairwiseI8x16U | Operator::I32x4ExtAddPairwiseI16x8S - | Operator::I32x4ExtAddPairwiseI16x8U - | Operator::F64x2ConvertLowI32x4U => { + | Operator::I32x4ExtAddPairwiseI16x8U => { return Err(wasm_unsupported!("proposed simd operator {:?}", op)); } Operator::ReturnCall { .. } | Operator::ReturnCallIndirect { .. } => {