diff --git a/build.rs b/build.rs index b551b80ee6..753e28b80f 100644 --- a/build.rs +++ b/build.rs @@ -185,7 +185,11 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { ("simd", "simd_bitwise") => return false, ("simd", "simd_bit_shift") => return false, ("simd", "simd_boolean") => return false, + ("simd", "simd_f32x4") => return false, + ("simd", "simd_f32x4_arith") => return false, ("simd", "simd_f32x4_cmp") => return false, + ("simd", "simd_f64x2") => return false, + ("simd", "simd_f64x2_arith") => return false, ("simd", "simd_f64x2_cmp") => return false, ("simd", "simd_i8x16_arith") => return false, ("simd", "simd_i8x16_arith2") => return false, diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 9cee1345b4..9a280e0d01 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -1123,6 +1123,18 @@ impl MachInstEmit for Inst { VecMisc2::Not => (0b1, 0b00101, 0b00), VecMisc2::Neg => (0b1, 0b01011, enc_size), VecMisc2::Abs => (0b0, 0b01011, enc_size), + VecMisc2::Fabs => { + debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2); + (0b0, 0b01111, enc_size) + } + VecMisc2::Fneg => { + debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2); + (0b1, 0b01111, enc_size) + } + VecMisc2::Fsqrt => { + debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2); + (0b1, 0b11111, enc_size) + } }; sink.put4(enc_vec_rr_misc(u, size, bits_12_16, rd, rn)); } @@ -1363,9 +1375,22 @@ impl MachInstEmit for Inst { VectorSize::Size64x2 => 0b11, _ => 0, }; - let enc_size_for_fcmp = match size { - VectorSize::Size32x4 => 0b0, - VectorSize::Size64x2 => 0b1, + let is_float = match alu_op { + VecALUOp::Fcmeq + | VecALUOp::Fcmgt + | VecALUOp::Fcmge + | VecALUOp::Fadd + | VecALUOp::Fsub + | VecALUOp::Fdiv + | VecALUOp::Fmax + | VecALUOp::Fmin + | VecALUOp::Fmul => true, + _ => false, + }; + let enc_float_size = match (is_float, size) { + (true, VectorSize::Size32x4) => 0b0, + (true, VectorSize::Size64x2) => 0b1, + (true, _) => unimplemented!(), _ => 0, }; @@ -1379,9 +1404,9 @@ impl MachInstEmit for Inst { VecALUOp::Cmgt => (0b010_01110_00_1 | enc_size << 1, 0b001101), VecALUOp::Cmhi => (0b011_01110_00_1 | enc_size << 1, 0b001101), VecALUOp::Cmhs => (0b011_01110_00_1 | enc_size << 1, 0b001111), - VecALUOp::Fcmeq => (0b010_01110_00_1 | enc_size_for_fcmp << 1, 0b111001), - VecALUOp::Fcmgt => (0b011_01110_10_1 | enc_size_for_fcmp << 1, 0b111001), - VecALUOp::Fcmge => (0b011_01110_00_1 | enc_size_for_fcmp << 1, 0b111001), + VecALUOp::Fcmeq => (0b010_01110_00_1, 0b111001), + VecALUOp::Fcmgt => (0b011_01110_10_1, 0b111001), + VecALUOp::Fcmge => (0b011_01110_00_1, 0b111001), // The following logical instructions operate on bytes, so are not encoded differently // for the different vector types. VecALUOp::And => (0b010_01110_00_1, 0b000111), @@ -1403,6 +1428,17 @@ impl MachInstEmit for Inst { VecALUOp::Umax => (0b011_01110_00_1 | enc_size << 1, 0b011001), VecALUOp::Smax => (0b010_01110_00_1 | enc_size << 1, 0b011001), VecALUOp::Urhadd => (0b011_01110_00_1 | enc_size << 1, 0b000101), + VecALUOp::Fadd => (0b010_01110_00_1, 0b110101), + VecALUOp::Fsub => (0b010_01110_10_1, 0b110101), + VecALUOp::Fdiv => (0b011_01110_00_1, 0b111111), + VecALUOp::Fmax => (0b010_01110_00_1, 0b111101), + VecALUOp::Fmin => (0b010_01110_10_1, 0b111101), + VecALUOp::Fmul => (0b011_01110_00_1, 0b110111), + }; + let top11 = if is_float { + top11 | enc_float_size << 1 + } else { + top11 }; sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd)); } diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index f6ad003c01..c7d01d679e 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -2953,6 +2953,78 @@ fn test_aarch64_binemit() { "urhadd v8.4s, v12.4s, v14.4s", )); + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Fadd, + rd: writable_vreg(31), + rn: vreg(0), + rm: vreg(16), + size: VectorSize::Size32x4, + }, + "1FD4304E", + "fadd v31.4s, v0.4s, v16.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Fsub, + rd: writable_vreg(8), + rn: vreg(7), + rm: vreg(15), + size: VectorSize::Size64x2, + }, + "E8D4EF4E", + "fsub v8.2d, v7.2d, v15.2d", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Fdiv, + rd: writable_vreg(1), + rn: vreg(3), + rm: vreg(4), + size: VectorSize::Size32x4, + }, + "61FC246E", + "fdiv v1.4s, v3.4s, v4.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Fmax, + rd: writable_vreg(31), + rn: vreg(16), + rm: vreg(0), + size: VectorSize::Size64x2, + }, + "1FF6604E", + "fmax v31.2d, v16.2d, v0.2d", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Fmin, + rd: writable_vreg(5), + rn: vreg(19), + rm: vreg(26), + size: VectorSize::Size32x4, + }, + "65F6BA4E", + "fmin v5.4s, v19.4s, v26.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Fmul, + rd: writable_vreg(2), + rn: vreg(0), + rm: vreg(5), + size: VectorSize::Size64x2, + }, + "02DC656E", + "fmul v2.2d, v0.2d, v5.2d", + )); + insns.push(( Inst::VecMisc { op: VecMisc2::Not, @@ -3052,6 +3124,39 @@ fn test_aarch64_binemit() { "abs v1.2d, v10.2d", )); + insns.push(( + Inst::VecMisc { + op: VecMisc2::Fabs, + rd: writable_vreg(15), + rn: vreg(16), + size: VectorSize::Size32x4, + }, + "0FFAA04E", + "fabs v15.4s, v16.4s", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Fneg, + rd: writable_vreg(31), + rn: vreg(0), + size: VectorSize::Size32x4, + }, + "1FF8A06E", + "fneg v31.4s, v0.4s", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Fsqrt, + rd: writable_vreg(7), + rn: vreg(18), + size: VectorSize::Size64x2, + }, + "47FAE16E", + "fsqrt v7.2d, v18.2d", + )); + insns.push(( Inst::VecLanes { op: VecLanesOp::Uminv, diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 958a4e3bf2..bfa296dba3 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -271,6 +271,18 @@ pub enum VecALUOp { Smax, /// Unsigned rounding halving add Urhadd, + /// Floating-point add + Fadd, + /// Floating-point subtract + Fsub, + /// Floating-point divide + Fdiv, + /// Floating-point maximum + Fmax, + /// Floating-point minimum + Fmin, + /// Floating-point multiply + Fmul, } /// A Vector miscellaneous operation with two registers. @@ -282,6 +294,12 @@ pub enum VecMisc2 { Neg, /// Absolute value Abs, + /// Floating-point absolute value + Fabs, + /// Floating-point negate + Fneg, + /// Floating-point square root + Fsqrt, } /// An operation across the lanes of vectors. @@ -2810,6 +2828,12 @@ impl Inst { VecALUOp::Umax => ("umax", size), VecALUOp::Smax => ("smax", size), VecALUOp::Urhadd => ("urhadd", size), + VecALUOp::Fadd => ("fadd", size), + VecALUOp::Fsub => ("fsub", size), + VecALUOp::Fdiv => ("fdiv", size), + VecALUOp::Fmax => ("fmax", size), + VecALUOp::Fmin => ("fmin", size), + VecALUOp::Fmul => ("fmul", size), }; let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); let rn = show_vreg_vector(rn, mb_rru, size); @@ -2821,6 +2845,9 @@ impl Inst { VecMisc2::Not => ("mvn", VectorSize::Size8x16), VecMisc2::Neg => ("neg", size), VecMisc2::Abs => ("abs", size), + VecMisc2::Fabs => ("fabs", size), + VecMisc2::Fneg => ("fneg", size), + VecMisc2::Fsqrt => ("fsqrt", size), }; let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 5fa0ebdc66..c90530c21f 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1802,46 +1802,84 @@ pub(crate) fn lower_insn_to_regs>( } Opcode::Fadd | Opcode::Fsub | Opcode::Fmul | Opcode::Fdiv | Opcode::Fmin | Opcode::Fmax => { - let bits = ty_bits(ctx.output_ty(insn, 0)); - let fpu_op = match (op, bits) { - (Opcode::Fadd, 32) => FPUOp2::Add32, - (Opcode::Fadd, 64) => FPUOp2::Add64, - (Opcode::Fsub, 32) => FPUOp2::Sub32, - (Opcode::Fsub, 64) => FPUOp2::Sub64, - (Opcode::Fmul, 32) => FPUOp2::Mul32, - (Opcode::Fmul, 64) => FPUOp2::Mul64, - (Opcode::Fdiv, 32) => FPUOp2::Div32, - (Opcode::Fdiv, 64) => FPUOp2::Div64, - (Opcode::Fmin, 32) => FPUOp2::Min32, - (Opcode::Fmin, 64) => FPUOp2::Min64, - (Opcode::Fmax, 32) => FPUOp2::Max32, - (Opcode::Fmax, 64) => FPUOp2::Max64, - _ => panic!("Unknown op/bits combination"), - }; + let ty = ty.unwrap(); + let bits = ty_bits(ty); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); let rd = get_output_reg(ctx, outputs[0]); - ctx.emit(Inst::FpuRRR { fpu_op, rd, rn, rm }); + if bits < 128 { + let fpu_op = match (op, bits) { + (Opcode::Fadd, 32) => FPUOp2::Add32, + (Opcode::Fadd, 64) => FPUOp2::Add64, + (Opcode::Fsub, 32) => FPUOp2::Sub32, + (Opcode::Fsub, 64) => FPUOp2::Sub64, + (Opcode::Fmul, 32) => FPUOp2::Mul32, + (Opcode::Fmul, 64) => FPUOp2::Mul64, + (Opcode::Fdiv, 32) => FPUOp2::Div32, + (Opcode::Fdiv, 64) => FPUOp2::Div64, + (Opcode::Fmin, 32) => FPUOp2::Min32, + (Opcode::Fmin, 64) => FPUOp2::Min64, + (Opcode::Fmax, 32) => FPUOp2::Max32, + (Opcode::Fmax, 64) => FPUOp2::Max64, + _ => panic!("Unknown op/bits combination"), + }; + ctx.emit(Inst::FpuRRR { fpu_op, rd, rn, rm }); + } else { + let alu_op = match op { + Opcode::Fadd => VecALUOp::Fadd, + Opcode::Fsub => VecALUOp::Fsub, + Opcode::Fdiv => VecALUOp::Fdiv, + Opcode::Fmax => VecALUOp::Fmax, + Opcode::Fmin => VecALUOp::Fmin, + Opcode::Fmul => VecALUOp::Fmul, + _ => unreachable!(), + }; + + ctx.emit(Inst::VecRRR { + rd, + rn, + rm, + alu_op, + size: VectorSize::from_ty(ty), + }); + } } Opcode::Sqrt | Opcode::Fneg | Opcode::Fabs | Opcode::Fpromote | Opcode::Fdemote => { - let bits = ty_bits(ctx.output_ty(insn, 0)); - let fpu_op = match (op, bits) { - (Opcode::Sqrt, 32) => FPUOp1::Sqrt32, - (Opcode::Sqrt, 64) => FPUOp1::Sqrt64, - (Opcode::Fneg, 32) => FPUOp1::Neg32, - (Opcode::Fneg, 64) => FPUOp1::Neg64, - (Opcode::Fabs, 32) => FPUOp1::Abs32, - (Opcode::Fabs, 64) => FPUOp1::Abs64, - (Opcode::Fpromote, 32) => panic!("Cannot promote to 32 bits"), - (Opcode::Fpromote, 64) => FPUOp1::Cvt32To64, - (Opcode::Fdemote, 32) => FPUOp1::Cvt64To32, - (Opcode::Fdemote, 64) => panic!("Cannot demote to 64 bits"), - _ => panic!("Unknown op/bits combination"), - }; + let ty = ty.unwrap(); + let bits = ty_bits(ty); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rd = get_output_reg(ctx, outputs[0]); - ctx.emit(Inst::FpuRR { fpu_op, rd, rn }); + if bits < 128 { + let fpu_op = match (op, bits) { + (Opcode::Sqrt, 32) => FPUOp1::Sqrt32, + (Opcode::Sqrt, 64) => FPUOp1::Sqrt64, + (Opcode::Fneg, 32) => FPUOp1::Neg32, + (Opcode::Fneg, 64) => FPUOp1::Neg64, + (Opcode::Fabs, 32) => FPUOp1::Abs32, + (Opcode::Fabs, 64) => FPUOp1::Abs64, + (Opcode::Fpromote, 32) => panic!("Cannot promote to 32 bits"), + (Opcode::Fpromote, 64) => FPUOp1::Cvt32To64, + (Opcode::Fdemote, 32) => FPUOp1::Cvt64To32, + (Opcode::Fdemote, 64) => panic!("Cannot demote to 64 bits"), + _ => panic!("Unknown op/bits combination"), + }; + ctx.emit(Inst::FpuRR { fpu_op, rd, rn }); + } else { + let op = match op { + Opcode::Fabs => VecMisc2::Fabs, + Opcode::Fneg => VecMisc2::Fneg, + Opcode::Sqrt => VecMisc2::Fsqrt, + _ => unimplemented!(), + }; + + ctx.emit(Inst::VecMisc { + op, + rd, + rn, + size: VectorSize::from_ty(ty), + }); + } } Opcode::Ceil | Opcode::Floor | Opcode::Trunc | Opcode::Nearest => {