diff --git a/build.rs b/build.rs index a0e374082b..e23e8c2180 100644 --- a/build.rs +++ b/build.rs @@ -196,7 +196,6 @@ fn experimental_x64_should_panic(testsuite: &str, testname: &str, strategy: &str /// Ignore tests that aren't supported yet. fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { - let target = env::var("TARGET").unwrap(); match strategy { #[cfg(feature = "lightbeam")] "Lightbeam" => match (testsuite, testname) { @@ -207,38 +206,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { _ => (), }, "Cranelift" => match (testsuite, testname) { - ("simd", "simd_address") => return false, - ("simd", "simd_align") => return false, - ("simd", "simd_bitwise") => return false, - ("simd", "simd_bit_shift") => return false, - ("simd", "simd_boolean") => return false, - ("simd", "simd_const") => return false, - ("simd", "simd_f32x4") => return false, - ("simd", "simd_f32x4_arith") => return false, - ("simd", "simd_f32x4_cmp") => return false, - ("simd", "simd_f64x2") => return false, - ("simd", "simd_f64x2_arith") => return false, - ("simd", "simd_f64x2_cmp") => return false, - ("simd", "simd_i8x16_arith") => return false, - ("simd", "simd_i8x16_arith2") => return false, - ("simd", "simd_i8x16_cmp") => return false, - ("simd", "simd_i8x16_sat_arith") => return false, - ("simd", "simd_i16x8_arith") => return false, - ("simd", "simd_i16x8_arith2") => return false, - ("simd", "simd_i16x8_cmp") => return false, - ("simd", "simd_i16x8_sat_arith") => return false, - ("simd", "simd_i32x4_arith") => return false, - ("simd", "simd_i32x4_arith2") => return false, - ("simd", "simd_i32x4_cmp") => return false, - ("simd", "simd_i64x2_arith") => return false, - ("simd", "simd_lane") => return false, - ("simd", "simd_load_extend") => return false, - ("simd", "simd_load_splat") => return false, - ("simd", "simd_store") => return false, - // Most simd tests are known to fail on aarch64 for now, it's going - // to be a big chunk of work to implement them all there! - ("simd", _) if target.contains("aarch64") => return true, - // TODO(#1886): Ignore reference types tests if this isn't x64, // because Cranelift only supports reference types on x64. ("reference_types", _) => { diff --git a/cranelift/codegen/src/isa/aarch64/inst/args.rs b/cranelift/codegen/src/isa/aarch64/inst/args.rs index 0045e5b088..729d21d121 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/args.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs @@ -671,6 +671,15 @@ impl VectorSize { VectorSize::Size64x2 => unreachable!(), } } + + pub fn halve(&self) -> VectorSize { + match self { + VectorSize::Size8x16 => VectorSize::Size8x8, + VectorSize::Size16x8 => VectorSize::Size16x4, + VectorSize::Size32x4 => VectorSize::Size32x2, + _ => *self, + } + } } //============================================================================= diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index fb69790981..32fe3aa6cf 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -1400,6 +1400,22 @@ impl MachInstEmit for Inst { debug_assert!(!size.is_128bits()); (0b1, 0b10011, enc_size) } + VecMisc2::Fcvtzs => { + debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2); + (0b0, 0b11011, enc_size) + } + VecMisc2::Fcvtzu => { + debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2); + (0b1, 0b11011, enc_size) + } + VecMisc2::Scvtf => { + debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2); + (0b0, 0b11101, enc_size & 0b1) + } + VecMisc2::Ucvtf => { + debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2); + (0b1, 0b11101, enc_size & 0b1) + } }; sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn)); } @@ -1644,7 +1660,12 @@ impl MachInstEmit for Inst { | machreg_to_vec(rd.to_reg()), ); } - &Inst::VecExtend { t, rd, rn } => { + &Inst::VecExtend { + t, + rd, + rn, + high_half, + } => { let (u, immh) = match t { VecExtendOp::Sxtl8 => (0b0, 0b001), VecExtendOp::Sxtl16 => (0b0, 0b010), @@ -1655,22 +1676,38 @@ impl MachInstEmit for Inst { }; sink.put4( 0b000_011110_0000_000_101001_00000_00000 + | ((high_half as u32) << 30) | (u << 29) | (immh << 19) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg()), ); } - &Inst::VecMiscNarrow { op, rd, rn, size } => { - debug_assert!(!size.is_128bits()); - let size = match size.widen() { - VectorSize::Size64x2 => 0b10, - _ => unimplemented!(), + &Inst::VecMiscNarrow { + op, + rd, + rn, + size, + high_half, + } => { + let size = match size.lane_size() { + ScalarSize::Size8 => 0b00, + ScalarSize::Size16 => 0b01, + ScalarSize::Size32 => 0b10, + _ => panic!("Unexpected vector operand lane size!"), }; let (u, bits_12_16) = match op { VecMiscNarrowOp::Xtn => (0b0, 0b10010), + VecMiscNarrowOp::Sqxtn => (0b0, 0b10100), + VecMiscNarrowOp::Sqxtun => (0b1, 0b10010), }; - sink.put4(enc_vec_rr_misc(u, size, bits_12_16, rd, rn)); + sink.put4(enc_vec_rr_misc( + ((high_half as u32) << 1) | u, + size, + bits_12_16, + rd, + rn, + )); } &Inst::VecMovElement { rd, diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index 7fba35f2bc..e2f08abb21 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -2008,6 +2008,7 @@ fn test_aarch64_binemit() { t: VecExtendOp::Sxtl8, rd: writable_vreg(4), rn: vreg(27), + high_half: false, }, "64A7080F", "sxtl v4.8h, v27.8b", @@ -2017,15 +2018,17 @@ fn test_aarch64_binemit() { t: VecExtendOp::Sxtl16, rd: writable_vreg(17), rn: vreg(19), + high_half: true, }, - "71A6100F", - "sxtl v17.4s, v19.4h", + "71A6104F", + "sxtl2 v17.4s, v19.8h", )); insns.push(( Inst::VecExtend { t: VecExtendOp::Sxtl32, rd: writable_vreg(30), rn: vreg(6), + high_half: false, }, "DEA4200F", "sxtl v30.2d, v6.2s", @@ -2035,15 +2038,17 @@ fn test_aarch64_binemit() { t: VecExtendOp::Uxtl8, rd: writable_vreg(3), rn: vreg(29), + high_half: true, }, - "A3A7082F", - "uxtl v3.8h, v29.8b", + "A3A7086F", + "uxtl2 v3.8h, v29.16b", )); insns.push(( Inst::VecExtend { t: VecExtendOp::Uxtl16, rd: writable_vreg(15), rn: vreg(12), + high_half: false, }, "8FA5102F", "uxtl v15.4s, v12.4h", @@ -2053,9 +2058,10 @@ fn test_aarch64_binemit() { t: VecExtendOp::Uxtl32, rd: writable_vreg(28), rn: vreg(2), + high_half: true, }, - "5CA4202F", - "uxtl v28.2d, v2.2s", + "5CA4206F", + "uxtl2 v28.2d, v2.4s", )); insns.push(( @@ -2088,11 +2094,36 @@ fn test_aarch64_binemit() { rd: writable_vreg(22), rn: vreg(8), size: VectorSize::Size32x2, + high_half: false, }, "1629A10E", "xtn v22.2s, v8.2d", )); + insns.push(( + Inst::VecMiscNarrow { + op: VecMiscNarrowOp::Sqxtn, + rd: writable_vreg(31), + rn: vreg(0), + size: VectorSize::Size16x8, + high_half: true, + }, + "1F48614E", + "sqxtn2 v31.8h, v0.4s", + )); + + insns.push(( + Inst::VecMiscNarrow { + op: VecMiscNarrowOp::Sqxtun, + rd: writable_vreg(16), + rn: vreg(23), + size: VectorSize::Size8x16, + high_half: false, + }, + "F02A212E", + "sqxtun v16.8b, v23.8h", + )); + insns.push(( Inst::VecRRR { alu_op: VecALUOp::Sqadd, @@ -3322,6 +3353,50 @@ fn test_aarch64_binemit() { "shll v1.2d, v10.2s, #32", )); + insns.push(( + Inst::VecMisc { + op: VecMisc2::Fcvtzs, + rd: writable_vreg(4), + rn: vreg(22), + size: VectorSize::Size32x4, + }, + "C4BAA14E", + "fcvtzs v4.4s, v22.4s", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Fcvtzu, + rd: writable_vreg(29), + rn: vreg(15), + size: VectorSize::Size64x2, + }, + "FDB9E16E", + "fcvtzu v29.2d, v15.2d", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Scvtf, + rd: writable_vreg(20), + rn: vreg(8), + size: VectorSize::Size32x4, + }, + "14D9214E", + "scvtf v20.4s, v8.4s", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Ucvtf, + rd: writable_vreg(10), + rn: vreg(19), + size: VectorSize::Size64x2, + }, + "6ADA616E", + "ucvtf v10.2d, v19.2d", + )); + insns.push(( Inst::VecLanes { op: VecLanesOp::Uminv, diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 46f6edc2e8..b90dccd41a 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -308,6 +308,14 @@ pub enum VecMisc2 { Rev64, /// Shift left long (by element size) Shll, + /// Floating-point convert to signed integer, rounding toward zero + Fcvtzs, + /// Floating-point convert to unsigned integer, rounding toward zero + Fcvtzu, + /// Signed integer convert to floating-point + Scvtf, + /// Unsigned integer convert to floating-point + Ucvtf, } /// A Vector narrowing operation with two registers. @@ -315,6 +323,10 @@ pub enum VecMisc2 { pub enum VecMiscNarrowOp { /// Extract Narrow Xtn, + /// Signed saturating extract narrow + Sqxtn, + /// Signed saturating extract unsigned narrow + Sqxtun, } /// An operation across the lanes of vectors. @@ -884,6 +896,7 @@ pub enum Inst { t: VecExtendOp, rd: Writable, rn: Reg, + high_half: bool, }, /// Move vector element to another vector element. @@ -901,6 +914,7 @@ pub enum Inst { rd: Writable, rn: Reg, size: VectorSize, + high_half: bool, }, /// A vector ALU op. @@ -1628,9 +1642,16 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { collector.add_mod(rd); collector.add_use(rn); } - &Inst::VecMiscNarrow { rd, rn, .. } => { - collector.add_def(rd); + &Inst::VecMiscNarrow { + rd, rn, high_half, .. + } => { collector.add_use(rn); + + if high_half { + collector.add_mod(rd); + } else { + collector.add_def(rd); + } } &Inst::VecRRR { alu_op, rd, rn, rm, .. @@ -2300,10 +2321,16 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RUM) { &mut Inst::VecMiscNarrow { ref mut rd, ref mut rn, + high_half, .. } => { - map_def(mapper, rd); map_use(mapper, rn); + + if high_half { + map_mod(mapper, rd); + } else { + map_def(mapper, rd); + } } &mut Inst::VecRRR { alu_op, @@ -3155,14 +3182,20 @@ impl Inst { let rn = show_vreg_element(rn, mb_rru, 0, size); format!("dup {}, {}", rd, rn) } - &Inst::VecExtend { t, rd, rn } => { - let (op, dest, src) = match t { - VecExtendOp::Sxtl8 => ("sxtl", VectorSize::Size16x8, VectorSize::Size8x8), - VecExtendOp::Sxtl16 => ("sxtl", VectorSize::Size32x4, VectorSize::Size16x4), - VecExtendOp::Sxtl32 => ("sxtl", VectorSize::Size64x2, VectorSize::Size32x2), - VecExtendOp::Uxtl8 => ("uxtl", VectorSize::Size16x8, VectorSize::Size8x8), - VecExtendOp::Uxtl16 => ("uxtl", VectorSize::Size32x4, VectorSize::Size16x4), - VecExtendOp::Uxtl32 => ("uxtl", VectorSize::Size64x2, VectorSize::Size32x2), + &Inst::VecExtend { t, rd, rn, high_half } => { + let (op, dest, src) = match (t, high_half) { + (VecExtendOp::Sxtl8, false) => ("sxtl", VectorSize::Size16x8, VectorSize::Size8x8), + (VecExtendOp::Sxtl8, true) => ("sxtl2", VectorSize::Size16x8, VectorSize::Size8x16), + (VecExtendOp::Sxtl16, false) => ("sxtl", VectorSize::Size32x4, VectorSize::Size16x4), + (VecExtendOp::Sxtl16, true) => ("sxtl2", VectorSize::Size32x4, VectorSize::Size16x8), + (VecExtendOp::Sxtl32, false) => ("sxtl", VectorSize::Size64x2, VectorSize::Size32x2), + (VecExtendOp::Sxtl32, true) => ("sxtl2", VectorSize::Size64x2, VectorSize::Size32x4), + (VecExtendOp::Uxtl8, false) => ("uxtl", VectorSize::Size16x8, VectorSize::Size8x8), + (VecExtendOp::Uxtl8, true) => ("uxtl2", VectorSize::Size16x8, VectorSize::Size8x16), + (VecExtendOp::Uxtl16, false) => ("uxtl", VectorSize::Size32x4, VectorSize::Size16x4), + (VecExtendOp::Uxtl16, true) => ("uxtl2", VectorSize::Size32x4, VectorSize::Size16x8), + (VecExtendOp::Uxtl32, false) => ("uxtl", VectorSize::Size64x2, VectorSize::Size32x2), + (VecExtendOp::Uxtl32, true) => ("uxtl2", VectorSize::Size64x2, VectorSize::Size32x4), }; let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest); let rn = show_vreg_vector(rn, mb_rru, src); @@ -3179,11 +3212,22 @@ impl Inst { let rn = show_vreg_element(rn, mb_rru, idx2, size); format!("mov {}, {}", rd, rn) } - &Inst::VecMiscNarrow { op, rd, rn, size } => { - let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); + &Inst::VecMiscNarrow { op, rd, rn, size, high_half } => { + let dest_size = if high_half { + assert!(size.is_128bits()); + size + } else { + size.halve() + }; + let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest_size); let rn = show_vreg_vector(rn, mb_rru, size.widen()); - let op = match op { - VecMiscNarrowOp::Xtn => "xtn", + let op = match (op, high_half) { + (VecMiscNarrowOp::Xtn, false) => "xtn", + (VecMiscNarrowOp::Xtn, true) => "xtn2", + (VecMiscNarrowOp::Sqxtn, false) => "sqxtn", + (VecMiscNarrowOp::Sqxtn, true) => "sqxtn2", + (VecMiscNarrowOp::Sqxtun, false) => "sqxtun", + (VecMiscNarrowOp::Sqxtun, true) => "sqxtun2", }; format!("{} {}, {}", op, rd, rn) } @@ -3267,6 +3311,10 @@ impl Inst { VecMisc2::Fsqrt => ("fsqrt", size), VecMisc2::Rev64 => ("rev64", size), VecMisc2::Shll => ("shll", size), + VecMisc2::Fcvtzs => ("fcvtzs", size), + VecMisc2::Fcvtzu => ("fcvtzu", size), + VecMisc2::Scvtf => ("scvtf", size), + VecMisc2::Ucvtf => ("ucvtf", size), }; let rd_size = if is_shll { size.widen() } else { size }; diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index ecc4c1ca67..b2915d024e 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -7,7 +7,7 @@ use crate::ir::Inst as IRInst; use crate::ir::{InstructionData, Opcode, TrapCode}; use crate::machinst::lower::*; use crate::machinst::*; -use crate::CodegenResult; +use crate::{CodegenError, CodegenResult}; use crate::isa::aarch64::abi::*; use crate::isa::aarch64::inst::*; @@ -66,7 +66,7 @@ pub(crate) fn lower_insn_to_regs>( let rd = get_output_reg(ctx, outputs[0]); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let ty = ty.unwrap(); - if ty_bits(ty) < 128 { + if !ty.is_vector() { let (rm, negated) = put_input_in_rse_imm12_maybe_negated( ctx, inputs[1], @@ -94,7 +94,7 @@ pub(crate) fn lower_insn_to_regs>( let rd = get_output_reg(ctx, outputs[0]); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let ty = ty.unwrap(); - if ty_bits(ty) < 128 { + if !ty.is_vector() { let (rm, negated) = put_input_in_rse_imm12_maybe_negated( ctx, inputs[1], @@ -124,7 +124,7 @@ pub(crate) fn lower_insn_to_regs>( let is_signed = op == Opcode::SaddSat || op == Opcode::SsubSat; let ty = ty.unwrap(); let rd = get_output_reg(ctx, outputs[0]); - if ty_bits(ty) < 128 { + if !ty.is_vector() { let narrow_mode = if is_signed { NarrowValueMode::SignExtend64 } else { @@ -180,7 +180,7 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Ineg => { let rd = get_output_reg(ctx, outputs[0]); let ty = ty.unwrap(); - if ty_bits(ty) < 128 { + if !ty.is_vector() { let rn = zero_reg(); let rm = put_input_in_rse_imm12(ctx, inputs[0], NarrowValueMode::None); let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64); @@ -201,7 +201,7 @@ pub(crate) fn lower_insn_to_regs>( let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); let ty = ty.unwrap(); - if ty_bits(ty) < 128 { + if !ty.is_vector() { let alu_op = choose_32_64(ty, ALUOp::MAdd32, ALUOp::MAdd64); ctx.emit(Inst::AluRRRR { alu_op, @@ -274,6 +274,7 @@ pub(crate) fn lower_insn_to_regs>( rd: tmp1, rn, size: VectorSize::Size32x2, + high_half: false, }); // Sum the respective high half components. @@ -293,6 +294,7 @@ pub(crate) fn lower_insn_to_regs>( rd: tmp2, rn: rm, size: VectorSize::Size32x2, + high_half: false, }); // Shift the high half components, into the high half. @@ -570,7 +572,7 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Bnot => { let rd = get_output_reg(ctx, outputs[0]); let ty = ty.unwrap(); - if ty_bits(ty) < 128 { + if !ty.is_vector() { let rm = put_input_in_rs_immlogic(ctx, inputs[0], NarrowValueMode::None); let alu_op = choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64); // NOT rd, rm ==> ORR_NOT rd, zero, rm @@ -594,7 +596,7 @@ pub(crate) fn lower_insn_to_regs>( | Opcode::BxorNot => { let rd = get_output_reg(ctx, outputs[0]); let ty = ty.unwrap(); - if ty_bits(ty) < 128 { + if !ty.is_vector() { let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rm = put_input_in_rs_immlogic(ctx, inputs[1], NarrowValueMode::None); let alu_op = match op { @@ -633,7 +635,7 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => { let ty = ty.unwrap(); let rd = get_output_reg(ctx, outputs[0]); - if ty_bits(ty) < 128 { + if !ty.is_vector() { let size = OperandSize::from_bits(ty_bits(ty)); let narrow_mode = match (op, size) { (Opcode::Ishl, _) => NarrowValueMode::None, @@ -1159,6 +1161,7 @@ pub(crate) fn lower_insn_to_regs>( t, rd, rn: rd.to_reg(), + high_half: false, }); } } @@ -1433,7 +1436,7 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Bitselect | Opcode::Vselect => { let ty = ty.unwrap(); - if ty_bits(ty) < 128 { + if !ty.is_vector() { debug_assert_ne!(Opcode::Vselect, op); let tmp = ctx.alloc_tmp(RegClass::I64, I64); let rd = get_output_reg(ctx, outputs[0]); @@ -1696,7 +1699,7 @@ pub(crate) fn lower_insn_to_regs>( }; let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); - if ty_bits(ty) < 128 { + if !ty.is_vector() { let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64); let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode); ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm)); @@ -1716,7 +1719,7 @@ pub(crate) fn lower_insn_to_regs>( let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); let rd = get_output_reg(ctx, outputs[0]); - if ty_bits(ty) < 128 { + if !ty.is_vector() { match ty_bits(ty) { 32 => { ctx.emit(Inst::FpuCmp32 { rn, rm }); @@ -2106,7 +2109,7 @@ pub(crate) fn lower_insn_to_regs>( let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); let rd = get_output_reg(ctx, outputs[0]); - if bits < 128 { + if !ty.is_vector() { let fpu_op = match (op, bits) { (Opcode::Fadd, 32) => FPUOp2::Add32, (Opcode::Fadd, 64) => FPUOp2::Add64, @@ -2149,7 +2152,7 @@ pub(crate) fn lower_insn_to_regs>( let bits = ty_bits(ty); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rd = get_output_reg(ctx, outputs[0]); - if bits < 128 { + if !ty.is_vector() { let fpu_op = match (op, bits) { (Opcode::Sqrt, 32) => FPUOp1::Sqrt32, (Opcode::Sqrt, 64) => FPUOp1::Sqrt64, @@ -2414,153 +2417,186 @@ pub(crate) fn lower_insn_to_regs>( } Opcode::FcvtFromUint | Opcode::FcvtFromSint => { - let in_bits = ty_bits(ctx.input_ty(insn, 0)); - let out_bits = ty_bits(ctx.output_ty(insn, 0)); + let ty = ty.unwrap(); let signed = op == Opcode::FcvtFromSint; - let op = match (signed, in_bits, out_bits) { - (false, 8, 32) | (false, 16, 32) | (false, 32, 32) => IntToFpuOp::U32ToF32, - (true, 8, 32) | (true, 16, 32) | (true, 32, 32) => IntToFpuOp::I32ToF32, - (false, 8, 64) | (false, 16, 64) | (false, 32, 64) => IntToFpuOp::U32ToF64, - (true, 8, 64) | (true, 16, 64) | (true, 32, 64) => IntToFpuOp::I32ToF64, - (false, 64, 32) => IntToFpuOp::U64ToF32, - (true, 64, 32) => IntToFpuOp::I64ToF32, - (false, 64, 64) => IntToFpuOp::U64ToF64, - (true, 64, 64) => IntToFpuOp::I64ToF64, - _ => panic!("Unknown input/output-bits combination"), - }; - let narrow_mode = match (signed, in_bits) { - (false, 8) | (false, 16) | (false, 32) => NarrowValueMode::ZeroExtend32, - (true, 8) | (true, 16) | (true, 32) => NarrowValueMode::SignExtend32, - (false, 64) => NarrowValueMode::ZeroExtend64, - (true, 64) => NarrowValueMode::SignExtend64, - _ => panic!("Unknown input size"), - }; - let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); let rd = get_output_reg(ctx, outputs[0]); - ctx.emit(Inst::IntToFpu { op, rd, rn }); + + if ty.is_vector() { + let op = if signed { + VecMisc2::Scvtf + } else { + VecMisc2::Ucvtf + }; + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + + ctx.emit(Inst::VecMisc { + op, + rd, + rn, + size: VectorSize::from_ty(ty), + }); + } else { + let in_bits = ty_bits(ctx.input_ty(insn, 0)); + let out_bits = ty_bits(ty); + let op = match (signed, in_bits, out_bits) { + (false, 8, 32) | (false, 16, 32) | (false, 32, 32) => IntToFpuOp::U32ToF32, + (true, 8, 32) | (true, 16, 32) | (true, 32, 32) => IntToFpuOp::I32ToF32, + (false, 8, 64) | (false, 16, 64) | (false, 32, 64) => IntToFpuOp::U32ToF64, + (true, 8, 64) | (true, 16, 64) | (true, 32, 64) => IntToFpuOp::I32ToF64, + (false, 64, 32) => IntToFpuOp::U64ToF32, + (true, 64, 32) => IntToFpuOp::I64ToF32, + (false, 64, 64) => IntToFpuOp::U64ToF64, + (true, 64, 64) => IntToFpuOp::I64ToF64, + _ => panic!("Unknown input/output-bits combination"), + }; + let narrow_mode = match (signed, in_bits) { + (false, 8) | (false, 16) | (false, 32) => NarrowValueMode::ZeroExtend32, + (true, 8) | (true, 16) | (true, 32) => NarrowValueMode::SignExtend32, + (false, 64) => NarrowValueMode::ZeroExtend64, + (true, 64) => NarrowValueMode::SignExtend64, + _ => panic!("Unknown input size"), + }; + let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); + ctx.emit(Inst::IntToFpu { op, rd, rn }); + } } Opcode::FcvtToUintSat | Opcode::FcvtToSintSat => { - let in_ty = ctx.input_ty(insn, 0); - let in_bits = ty_bits(in_ty); - let out_ty = ctx.output_ty(insn, 0); - let out_bits = ty_bits(out_ty); + let ty = ty.unwrap(); let out_signed = op == Opcode::FcvtToSintSat; let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rd = get_output_reg(ctx, outputs[0]); - // FIMM Vtmp1, u32::MAX or u64::MAX or i32::MAX or i64::MAX - // FMIN Vtmp2, Vin, Vtmp1 - // FIMM Vtmp1, 0 or 0 or i32::MIN or i64::MIN - // FMAX Vtmp2, Vtmp2, Vtmp1 - // (if signed) FIMM Vtmp1, 0 - // FCMP Vin, Vin - // FCSEL Vtmp2, Vtmp1, Vtmp2, NE // on NaN, select 0 - // convert Rout, Vtmp2 + if ty.is_vector() { + let op = if out_signed { + VecMisc2::Fcvtzs + } else { + VecMisc2::Fcvtzu + }; - assert!(in_bits == 32 || in_bits == 64); - assert!(out_bits == 32 || out_bits == 64); - - let min: f64 = match (out_bits, out_signed) { - (32, true) => std::i32::MIN as f64, - (32, false) => 0.0, - (64, true) => std::i64::MIN as f64, - (64, false) => 0.0, - _ => unreachable!(), - }; - - let max = match (out_bits, out_signed) { - (32, true) => std::i32::MAX as f64, - (32, false) => std::u32::MAX as f64, - (64, true) => std::i64::MAX as f64, - (64, false) => std::u64::MAX as f64, - _ => unreachable!(), - }; - - let rtmp1 = ctx.alloc_tmp(RegClass::V128, in_ty); - let rtmp2 = ctx.alloc_tmp(RegClass::V128, in_ty); - - if in_bits == 32 { - ctx.emit(Inst::LoadFpuConst32 { - rd: rtmp1, - const_data: max as f32, + ctx.emit(Inst::VecMisc { + op, + rd, + rn, + size: VectorSize::from_ty(ty), }); } else { - ctx.emit(Inst::LoadFpuConst64 { - rd: rtmp1, - const_data: max, - }); - } - ctx.emit(Inst::FpuRRR { - fpu_op: choose_32_64(in_ty, FPUOp2::Min32, FPUOp2::Min64), - rd: rtmp2, - rn: rn, - rm: rtmp1.to_reg(), - }); - if in_bits == 32 { - ctx.emit(Inst::LoadFpuConst32 { - rd: rtmp1, - const_data: min as f32, - }); - } else { - ctx.emit(Inst::LoadFpuConst64 { - rd: rtmp1, - const_data: min, - }); - } - ctx.emit(Inst::FpuRRR { - fpu_op: choose_32_64(in_ty, FPUOp2::Max32, FPUOp2::Max64), - rd: rtmp2, - rn: rtmp2.to_reg(), - rm: rtmp1.to_reg(), - }); - if out_signed { + let in_ty = ctx.input_ty(insn, 0); + let in_bits = ty_bits(in_ty); + let out_bits = ty_bits(ty); + // FIMM Vtmp1, u32::MAX or u64::MAX or i32::MAX or i64::MAX + // FMIN Vtmp2, Vin, Vtmp1 + // FIMM Vtmp1, 0 or 0 or i32::MIN or i64::MIN + // FMAX Vtmp2, Vtmp2, Vtmp1 + // (if signed) FIMM Vtmp1, 0 + // FCMP Vin, Vin + // FCSEL Vtmp2, Vtmp1, Vtmp2, NE // on NaN, select 0 + // convert Rout, Vtmp2 + + assert!(in_bits == 32 || in_bits == 64); + assert!(out_bits == 32 || out_bits == 64); + + let min: f64 = match (out_bits, out_signed) { + (32, true) => std::i32::MIN as f64, + (32, false) => 0.0, + (64, true) => std::i64::MIN as f64, + (64, false) => 0.0, + _ => unreachable!(), + }; + + let max = match (out_bits, out_signed) { + (32, true) => std::i32::MAX as f64, + (32, false) => std::u32::MAX as f64, + (64, true) => std::i64::MAX as f64, + (64, false) => std::u64::MAX as f64, + _ => unreachable!(), + }; + + let rtmp1 = ctx.alloc_tmp(RegClass::V128, in_ty); + let rtmp2 = ctx.alloc_tmp(RegClass::V128, in_ty); + if in_bits == 32 { ctx.emit(Inst::LoadFpuConst32 { rd: rtmp1, - const_data: 0.0, + const_data: max as f32, }); } else { ctx.emit(Inst::LoadFpuConst64 { rd: rtmp1, - const_data: 0.0, + const_data: max, }); } - } - if in_bits == 32 { - ctx.emit(Inst::FpuCmp32 { rn: rn, rm: rn }); - ctx.emit(Inst::FpuCSel32 { + ctx.emit(Inst::FpuRRR { + fpu_op: choose_32_64(in_ty, FPUOp2::Min32, FPUOp2::Min64), rd: rtmp2, - rn: rtmp1.to_reg(), - rm: rtmp2.to_reg(), - cond: Cond::Ne, + rn: rn, + rm: rtmp1.to_reg(), }); - } else { - ctx.emit(Inst::FpuCmp64 { rn: rn, rm: rn }); - ctx.emit(Inst::FpuCSel64 { + if in_bits == 32 { + ctx.emit(Inst::LoadFpuConst32 { + rd: rtmp1, + const_data: min as f32, + }); + } else { + ctx.emit(Inst::LoadFpuConst64 { + rd: rtmp1, + const_data: min, + }); + } + ctx.emit(Inst::FpuRRR { + fpu_op: choose_32_64(in_ty, FPUOp2::Max32, FPUOp2::Max64), rd: rtmp2, - rn: rtmp1.to_reg(), - rm: rtmp2.to_reg(), - cond: Cond::Ne, + rn: rtmp2.to_reg(), + rm: rtmp1.to_reg(), }); - } + if out_signed { + if in_bits == 32 { + ctx.emit(Inst::LoadFpuConst32 { + rd: rtmp1, + const_data: 0.0, + }); + } else { + ctx.emit(Inst::LoadFpuConst64 { + rd: rtmp1, + const_data: 0.0, + }); + } + } + if in_bits == 32 { + ctx.emit(Inst::FpuCmp32 { rn: rn, rm: rn }); + ctx.emit(Inst::FpuCSel32 { + rd: rtmp2, + rn: rtmp1.to_reg(), + rm: rtmp2.to_reg(), + cond: Cond::Ne, + }); + } else { + ctx.emit(Inst::FpuCmp64 { rn: rn, rm: rn }); + ctx.emit(Inst::FpuCSel64 { + rd: rtmp2, + rn: rtmp1.to_reg(), + rm: rtmp2.to_reg(), + cond: Cond::Ne, + }); + } - let cvt = match (in_bits, out_bits, out_signed) { - (32, 32, false) => FpuToIntOp::F32ToU32, - (32, 32, true) => FpuToIntOp::F32ToI32, - (32, 64, false) => FpuToIntOp::F32ToU64, - (32, 64, true) => FpuToIntOp::F32ToI64, - (64, 32, false) => FpuToIntOp::F64ToU32, - (64, 32, true) => FpuToIntOp::F64ToI32, - (64, 64, false) => FpuToIntOp::F64ToU64, - (64, 64, true) => FpuToIntOp::F64ToI64, - _ => unreachable!(), - }; - ctx.emit(Inst::FpuToInt { - op: cvt, - rd, - rn: rtmp2.to_reg(), - }); + let cvt = match (in_bits, out_bits, out_signed) { + (32, 32, false) => FpuToIntOp::F32ToU32, + (32, 32, true) => FpuToIntOp::F32ToI32, + (32, 64, false) => FpuToIntOp::F32ToU64, + (32, 64, true) => FpuToIntOp::F32ToI64, + (64, 32, false) => FpuToIntOp::F64ToU32, + (64, 32, true) => FpuToIntOp::F64ToI32, + (64, 64, false) => FpuToIntOp::F64ToU64, + (64, 64, true) => FpuToIntOp::F64ToI64, + _ => unreachable!(), + }; + ctx.emit(Inst::FpuToInt { + op: cvt, + rd, + rn: rtmp2.to_reg(), + }); + } } Opcode::IaddIfcout => { @@ -2689,12 +2725,62 @@ pub(crate) fn lower_insn_to_regs>( }); } - Opcode::Snarrow - | Opcode::Unarrow - | Opcode::SwidenLow - | Opcode::SwidenHigh - | Opcode::UwidenLow - | Opcode::UwidenHigh => unimplemented!(), + Opcode::Snarrow | Opcode::Unarrow => { + let op = if op == Opcode::Snarrow { + VecMiscNarrowOp::Sqxtn + } else { + VecMiscNarrowOp::Sqxtun + }; + let rd = get_output_reg(ctx, outputs[0]); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rn2 = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + let ty = ty.unwrap(); + + ctx.emit(Inst::VecMiscNarrow { + op, + rd, + rn, + size: VectorSize::from_ty(ty), + high_half: false, + }); + ctx.emit(Inst::VecMiscNarrow { + op, + rd, + rn: rn2, + size: VectorSize::from_ty(ty), + high_half: true, + }); + } + + Opcode::SwidenLow | Opcode::SwidenHigh | Opcode::UwidenLow | Opcode::UwidenHigh => { + let lane_type = ty.unwrap().lane_type(); + let rd = get_output_reg(ctx, outputs[0]); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let (t, high_half) = match (lane_type, op) { + (I16, Opcode::SwidenLow) => (VecExtendOp::Sxtl8, false), + (I16, Opcode::SwidenHigh) => (VecExtendOp::Sxtl8, true), + (I16, Opcode::UwidenLow) => (VecExtendOp::Uxtl8, false), + (I16, Opcode::UwidenHigh) => (VecExtendOp::Uxtl8, true), + (I32, Opcode::SwidenLow) => (VecExtendOp::Sxtl16, false), + (I32, Opcode::SwidenHigh) => (VecExtendOp::Sxtl16, true), + (I32, Opcode::UwidenLow) => (VecExtendOp::Uxtl16, false), + (I32, Opcode::UwidenHigh) => (VecExtendOp::Uxtl16, true), + _ => { + return Err(CodegenError::Unsupported(format!( + "Unsupported SIMD vector lane type: {:?}", + lane_type + ))); + } + }; + + ctx.emit(Inst::VecExtend { + t, + rd, + rn, + high_half, + }); + } + Opcode::TlsValue => unimplemented!(), }