From 95b0b05af283ded8a6130856c4d385ea49833ef1 Mon Sep 17 00:00:00 2001 From: Anton Kirilov Date: Fri, 19 Jun 2020 01:00:47 +0100 Subject: [PATCH 01/11] AArch64: Introduce an enum to specify vector instruction operand sizes Copyright (c) 2020, Arm Limited. --- .../codegen/src/isa/aarch64/inst/args.rs | 53 ++++ .../codegen/src/isa/aarch64/inst/emit.rs | 133 ++++------ .../src/isa/aarch64/inst/emit_tests.rs | 250 +++++++++--------- cranelift/codegen/src/isa/aarch64/inst/mod.rs | 188 ++++++------- .../codegen/src/isa/aarch64/inst/regs.rs | 50 ++-- cranelift/codegen/src/isa/aarch64/lower.rs | 17 +- .../codegen/src/isa/aarch64/lower_inst.rs | 71 +++-- 7 files changed, 374 insertions(+), 388 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/inst/args.rs b/cranelift/codegen/src/isa/aarch64/inst/args.rs index 6bbd618685..43e8471ac7 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/args.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs @@ -3,6 +3,7 @@ // Some variants are never constructed, but we still want them as options in the future. #![allow(dead_code)] +use crate::ir::types::{F32X2, F32X4, F64X2, I16X4, I16X8, I32X2, I32X4, I64X2, I8X16, I8X8}; use crate::ir::Type; use crate::isa::aarch64::inst::*; use crate::isa::aarch64::lower::ty_bits; @@ -587,3 +588,55 @@ impl ScalarSize { } } } + +/// Type used to communicate the size of a vector operand. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum VectorSize { + Size8x8, + Size8x16, + Size16x4, + Size16x8, + Size32x2, + Size32x4, + Size64x2, +} + +impl VectorSize { + /// Convert from a type into a vector operand size. + pub fn from_ty(ty: Type) -> VectorSize { + match ty { + F32X2 => VectorSize::Size32x2, + F32X4 => VectorSize::Size32x4, + F64X2 => VectorSize::Size64x2, + I8X8 => VectorSize::Size8x8, + I8X16 => VectorSize::Size8x16, + I16X4 => VectorSize::Size16x4, + I16X8 => VectorSize::Size16x8, + I32X2 => VectorSize::Size32x2, + I32X4 => VectorSize::Size32x4, + I64X2 => VectorSize::Size64x2, + _ => unimplemented!(), + } + } + + /// Get the integer operand size that corresponds to a lane of a vector with a certain size. + pub fn operand_size(&self) -> OperandSize { + match self { + VectorSize::Size64x2 => OperandSize::Size64, + _ => OperandSize::Size32, + } + } + + /// Get the scalar operand size that corresponds to a lane of a vector with a certain size. + pub fn lane_size(&self) -> ScalarSize { + match self { + VectorSize::Size8x8 => ScalarSize::Size8, + VectorSize::Size8x16 => ScalarSize::Size8, + VectorSize::Size16x4 => ScalarSize::Size16, + VectorSize::Size16x8 => ScalarSize::Size16, + VectorSize::Size32x2 => ScalarSize::Size32, + VectorSize::Size32x4 => ScalarSize::Size32, + VectorSize::Size64x2 => ScalarSize::Size64, + } + } +} diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 9fc952f644..f12205dbd4 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -1007,7 +1007,7 @@ impl MachInstEmit for Inst { sink.put4(enc_vecmov(/* 16b = */ true, rd, rn)); } &Inst::FpuMoveFromVec { rd, rn, idx, size } => { - let (imm5, shift, mask) = match size { + let (imm5, shift, mask) = match size.lane_size() { ScalarSize::Size32 => (0b00100, 3, 0b011), ScalarSize::Size64 => (0b01000, 4, 0b001), _ => unimplemented!(), @@ -1048,6 +1048,10 @@ impl MachInstEmit for Inst { FPUOp2::Max64 => 0b000_11110_01_1_00000_010010, FPUOp2::Min32 => 0b000_11110_00_1_00000_010110, FPUOp2::Min64 => 0b000_11110_01_1_00000_010110, + FPUOp2::Sqadd64 => 0b010_11110_11_1_00000_000011, + FPUOp2::Uqadd64 => 0b011_11110_11_1_00000_000011, + FPUOp2::Sqsub64 => 0b010_11110_11_1_00000_001011, + FPUOp2::Uqsub64 => 0b011_11110_11_1_00000_001011, }; sink.put4(enc_fpurrr(top22, rd, rn, rm)); } @@ -1102,31 +1106,25 @@ impl MachInstEmit for Inst { }; sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra)); } - &Inst::VecMisc { op, rd, rn, ty } => { - let enc_size = match ty { - I8X16 => 0b00, - I16X8 => 0b01, - I32X4 => 0b10, - I64X2 => 0b11, - _ => 0, + &Inst::VecMisc { op, rd, rn, size } => { + let enc_size = match size { + VectorSize::Size8x16 => 0b00, + VectorSize::Size16x8 => 0b01, + VectorSize::Size32x4 => 0b10, + VectorSize::Size64x2 => 0b11, + _ => unimplemented!(), }; let (bits_12_16, size) = match op { - VecMisc2::Not => { - debug_assert_eq!(128, ty_bits(ty)); - (0b00101, 0b00) - } - VecMisc2::Neg => { - debug_assert_eq!(128, ty_bits(ty)); - (0b01011, enc_size) - } + VecMisc2::Not => (0b00101, 0b00), + VecMisc2::Neg => (0b01011, enc_size), }; sink.put4(enc_vec_rr_misc(size, bits_12_16, rd, rn)); } - &Inst::VecLanes { op, rd, rn, ty } => { - let (q, size) = match ty { - I8X16 => (0b1, 0b00), - I16X8 => (0b1, 0b01), - I32X4 => (0b1, 0b10), + &Inst::VecLanes { op, rd, rn, size } => { + let (q, size) = match size { + VectorSize::Size8x16 => (0b1, 0b00), + VectorSize::Size16x8 => (0b1, 0b01), + VectorSize::Size32x4 => (0b1, 0b10), _ => unreachable!(), }; let (u, opcode) = match op { @@ -1250,12 +1248,12 @@ impl MachInstEmit for Inst { | machreg_to_vec(rd.to_reg()), ); } - &Inst::MovFromVec { rd, rn, idx, ty } => { - let (q, imm5, shift, mask) = match ty { - I8 => (0b0, 0b00001, 1, 0b1111), - I16 => (0b0, 0b00010, 2, 0b0111), - I32 => (0b0, 0b00100, 3, 0b0011), - I64 => (0b1, 0b01000, 4, 0b0001), + &Inst::MovFromVec { rd, rn, idx, size } => { + let (q, imm5, shift, mask) = match size { + VectorSize::Size8x16 => (0b0, 0b00001, 1, 0b1111), + VectorSize::Size16x8 => (0b0, 0b00010, 2, 0b0111), + VectorSize::Size32x4 => (0b0, 0b00100, 3, 0b0011), + VectorSize::Size64x2 => (0b1, 0b01000, 4, 0b0001), _ => unreachable!(), }; debug_assert_eq!(idx & mask, idx); @@ -1268,12 +1266,12 @@ impl MachInstEmit for Inst { | machreg_to_gpr(rd.to_reg()), ); } - &Inst::VecDup { rd, rn, ty } => { - let imm5 = match ty { - I8 => 0b00001, - I16 => 0b00010, - I32 => 0b00100, - I64 => 0b01000, + &Inst::VecDup { rd, rn, size } => { + let imm5 = match size { + VectorSize::Size8x16 => 0b00001, + VectorSize::Size16x8 => 0b00010, + VectorSize::Size32x4 => 0b00100, + VectorSize::Size64x2 => 0b01000, _ => unimplemented!(), }; sink.put4( @@ -1283,10 +1281,10 @@ impl MachInstEmit for Inst { | machreg_to_vec(rd.to_reg()), ); } - &Inst::VecDupFromFpu { rd, rn, ty } => { - let imm5 = match ty { - F32 => 0b00100, - F64 => 0b01000, + &Inst::VecDupFromFpu { rd, rn, size } => { + let imm5 = match size { + VectorSize::Size32x4 => 0b00100, + VectorSize::Size64x2 => 0b01000, _ => unimplemented!(), }; sink.put4( @@ -1318,41 +1316,25 @@ impl MachInstEmit for Inst { rn, rm, alu_op, - ty, + size, } => { - let enc_size = match ty { - I8X16 => 0b00, - I16X8 => 0b01, - I32X4 => 0b10, - I64X2 => 0b11, + let enc_size = match size { + VectorSize::Size8x16 => 0b00, + VectorSize::Size16x8 => 0b01, + VectorSize::Size32x4 => 0b10, + VectorSize::Size64x2 => 0b11, _ => 0, }; - let enc_size_for_fcmp = match ty { - F32X4 => 0b0, - F64X2 => 0b1, + let enc_size_for_fcmp = match size { + VectorSize::Size32x4 => 0b0, + VectorSize::Size64x2 => 0b1, _ => 0, }; let (top11, bit15_10) = match alu_op { - VecALUOp::SQAddScalar => { - debug_assert_eq!(I64, ty); - (0b010_11110_11_1, 0b000011) - } VecALUOp::Sqadd => (0b010_01110_00_1 | enc_size << 1, 0b000011), - VecALUOp::SQSubScalar => { - debug_assert_eq!(I64, ty); - (0b010_11110_11_1, 0b001011) - } VecALUOp::Sqsub => (0b010_01110_00_1 | enc_size << 1, 0b001011), - VecALUOp::UQAddScalar => { - debug_assert_eq!(I64, ty); - (0b011_11110_11_1, 0b000011) - } VecALUOp::Uqadd => (0b011_01110_00_1 | enc_size << 1, 0b000011), - VecALUOp::UQSubScalar => { - debug_assert_eq!(I64, ty); - (0b011_11110_11_1, 0b001011) - } VecALUOp::Uqsub => (0b011_01110_00_1 | enc_size << 1, 0b001011), VecALUOp::Cmeq => (0b011_01110_00_1 | enc_size << 1, 0b100011), VecALUOp::Cmge => (0b010_01110_00_1 | enc_size << 1, 0b001111), @@ -1364,31 +1346,16 @@ impl MachInstEmit for Inst { VecALUOp::Fcmge => (0b011_01110_00_1 | enc_size_for_fcmp << 1, 0b111001), // The following logical instructions operate on bytes, so are not encoded differently // for the different vector types. - VecALUOp::And => { - debug_assert_eq!(128, ty_bits(ty)); - (0b010_01110_00_1, 0b000111) - } - VecALUOp::Bic => { - debug_assert_eq!(128, ty_bits(ty)); - (0b010_01110_01_1, 0b000111) - } - VecALUOp::Orr => { - debug_assert_eq!(128, ty_bits(ty)); - (0b010_01110_10_1, 0b000111) - } - VecALUOp::Eor => { - debug_assert_eq!(128, ty_bits(ty)); - (0b011_01110_00_1, 0b000111) - } - VecALUOp::Bsl => { - debug_assert_eq!(128, ty_bits(ty)); - (0b011_01110_01_1, 0b000111) - } + VecALUOp::And => (0b010_01110_00_1, 0b000111), + VecALUOp::Bic => (0b010_01110_01_1, 0b000111), + VecALUOp::Orr => (0b010_01110_10_1, 0b000111), + VecALUOp::Eor => (0b011_01110_00_1, 0b000111), + VecALUOp::Bsl => (0b011_01110_01_1, 0b000111), VecALUOp::Umaxp => (0b011_01110_00_1 | enc_size << 1, 0b101001), VecALUOp::Add => (0b010_01110_00_1 | enc_size << 1, 0b100001), VecALUOp::Sub => (0b011_01110_00_1 | enc_size << 1, 0b100001), VecALUOp::Mul => { - debug_assert_ne!(I64X2, ty); + debug_assert_ne!(size, VectorSize::Size64x2); (0b010_01110_00_1 | enc_size << 1, 0b100111) } VecALUOp::Sshl => (0b010_01110_00_1 | enc_size << 1, 0b010001), diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index e8148dbe41..29e3036e16 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -1841,7 +1841,7 @@ fn test_aarch64_binemit() { rd: writable_xreg(3), rn: vreg(27), idx: 14, - ty: I8, + size: VectorSize::Size8x16, }, "633F1D0E", "umov w3, v27.b[14]", @@ -1851,7 +1851,7 @@ fn test_aarch64_binemit() { rd: writable_xreg(24), rn: vreg(5), idx: 3, - ty: I16, + size: VectorSize::Size16x8, }, "B83C0E0E", "umov w24, v5.h[3]", @@ -1861,7 +1861,7 @@ fn test_aarch64_binemit() { rd: writable_xreg(12), rn: vreg(17), idx: 1, - ty: I32, + size: VectorSize::Size32x4, }, "2C3E0C0E", "mov w12, v17.s[1]", @@ -1871,7 +1871,7 @@ fn test_aarch64_binemit() { rd: writable_xreg(21), rn: vreg(20), idx: 0, - ty: I64, + size: VectorSize::Size64x2, }, "953E084E", "mov x21, v20.d[0]", @@ -1900,7 +1900,7 @@ fn test_aarch64_binemit() { Inst::VecDup { rd: writable_vreg(25), rn: xreg(7), - ty: I8, + size: VectorSize::Size8x16, }, "F90C014E", "dup v25.16b, w7", @@ -1909,7 +1909,7 @@ fn test_aarch64_binemit() { Inst::VecDup { rd: writable_vreg(2), rn: xreg(23), - ty: I16, + size: VectorSize::Size16x8, }, "E20E024E", "dup v2.8h, w23", @@ -1918,7 +1918,7 @@ fn test_aarch64_binemit() { Inst::VecDup { rd: writable_vreg(0), rn: xreg(28), - ty: I32, + size: VectorSize::Size32x4, }, "800F044E", "dup v0.4s, w28", @@ -1927,7 +1927,7 @@ fn test_aarch64_binemit() { Inst::VecDup { rd: writable_vreg(31), rn: xreg(5), - ty: I64, + size: VectorSize::Size64x2, }, "BF0C084E", "dup v31.2d, x5", @@ -1936,7 +1936,7 @@ fn test_aarch64_binemit() { Inst::VecDupFromFpu { rd: writable_vreg(14), rn: vreg(19), - ty: F32, + size: VectorSize::Size32x4, }, "6E06044E", "dup v14.4s, v19.s[0]", @@ -1945,7 +1945,7 @@ fn test_aarch64_binemit() { Inst::VecDupFromFpu { rd: writable_vreg(18), rn: vreg(10), - ty: F64, + size: VectorSize::Size64x2, }, "5205084E", "dup v18.2d, v10.d[0]", @@ -2004,50 +2004,6 @@ fn test_aarch64_binemit() { "5CA4202F", "uxtl v28.2d, v2.2s", )); - insns.push(( - Inst::VecRRR { - rd: writable_vreg(21), - rn: vreg(22), - rm: vreg(23), - alu_op: VecALUOp::UQAddScalar, - ty: I64, - }, - "D50EF77E", - "uqadd d21, d22, d23", - )); - insns.push(( - Inst::VecRRR { - rd: writable_vreg(21), - rn: vreg(22), - rm: vreg(23), - alu_op: VecALUOp::SQAddScalar, - ty: I64, - }, - "D50EF75E", - "sqadd d21, d22, d23", - )); - insns.push(( - Inst::VecRRR { - rd: writable_vreg(21), - rn: vreg(22), - rm: vreg(23), - alu_op: VecALUOp::UQSubScalar, - ty: I64, - }, - "D52EF77E", - "uqsub d21, d22, d23", - )); - insns.push(( - Inst::VecRRR { - rd: writable_vreg(21), - rn: vreg(22), - rm: vreg(23), - alu_op: VecALUOp::SQSubScalar, - ty: I64, - }, - "D52EF75E", - "sqsub d21, d22, d23", - )); insns.push(( Inst::VecRRR { @@ -2055,7 +2011,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(1), rn: vreg(2), rm: vreg(8), - ty: I8X16, + size: VectorSize::Size8x16, }, "410C284E", "sqadd v1.16b, v2.16b, v8.16b", @@ -2067,7 +2023,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(1), rn: vreg(12), rm: vreg(28), - ty: I16X8, + size: VectorSize::Size16x8, }, "810D7C4E", "sqadd v1.8h, v12.8h, v28.8h", @@ -2079,7 +2035,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(12), rn: vreg(2), rm: vreg(6), - ty: I32X4, + size: VectorSize::Size32x4, }, "4C0CA64E", "sqadd v12.4s, v2.4s, v6.4s", @@ -2091,7 +2047,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(20), rn: vreg(7), rm: vreg(13), - ty: I64X2, + size: VectorSize::Size64x2, }, "F40CED4E", "sqadd v20.2d, v7.2d, v13.2d", @@ -2103,7 +2059,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(1), rn: vreg(2), rm: vreg(8), - ty: I8X16, + size: VectorSize::Size8x16, }, "412C284E", "sqsub v1.16b, v2.16b, v8.16b", @@ -2115,7 +2071,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(1), rn: vreg(12), rm: vreg(28), - ty: I16X8, + size: VectorSize::Size16x8, }, "812D7C4E", "sqsub v1.8h, v12.8h, v28.8h", @@ -2127,7 +2083,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(12), rn: vreg(2), rm: vreg(6), - ty: I32X4, + size: VectorSize::Size32x4, }, "4C2CA64E", "sqsub v12.4s, v2.4s, v6.4s", @@ -2139,7 +2095,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(20), rn: vreg(7), rm: vreg(13), - ty: I64X2, + size: VectorSize::Size64x2, }, "F42CED4E", "sqsub v20.2d, v7.2d, v13.2d", @@ -2151,7 +2107,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(1), rn: vreg(2), rm: vreg(8), - ty: I8X16, + size: VectorSize::Size8x16, }, "410C286E", "uqadd v1.16b, v2.16b, v8.16b", @@ -2163,7 +2119,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(1), rn: vreg(12), rm: vreg(28), - ty: I16X8, + size: VectorSize::Size16x8, }, "810D7C6E", "uqadd v1.8h, v12.8h, v28.8h", @@ -2175,7 +2131,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(12), rn: vreg(2), rm: vreg(6), - ty: I32X4, + size: VectorSize::Size32x4, }, "4C0CA66E", "uqadd v12.4s, v2.4s, v6.4s", @@ -2187,7 +2143,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(20), rn: vreg(7), rm: vreg(13), - ty: I64X2, + size: VectorSize::Size64x2, }, "F40CED6E", "uqadd v20.2d, v7.2d, v13.2d", @@ -2199,7 +2155,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(1), rn: vreg(2), rm: vreg(8), - ty: I8X16, + size: VectorSize::Size8x16, }, "412C286E", "uqsub v1.16b, v2.16b, v8.16b", @@ -2211,7 +2167,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(1), rn: vreg(12), rm: vreg(28), - ty: I16X8, + size: VectorSize::Size16x8, }, "812D7C6E", "uqsub v1.8h, v12.8h, v28.8h", @@ -2223,7 +2179,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(12), rn: vreg(2), rm: vreg(6), - ty: I32X4, + size: VectorSize::Size32x4, }, "4C2CA66E", "uqsub v12.4s, v2.4s, v6.4s", @@ -2235,7 +2191,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(20), rn: vreg(7), rm: vreg(13), - ty: I64X2, + size: VectorSize::Size64x2, }, "F42CED6E", "uqsub v20.2d, v7.2d, v13.2d", @@ -2247,7 +2203,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(3), rn: vreg(23), rm: vreg(24), - ty: I8X16, + size: VectorSize::Size8x16, }, "E38E386E", "cmeq v3.16b, v23.16b, v24.16b", @@ -2259,7 +2215,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(3), rn: vreg(23), rm: vreg(24), - ty: I8X16, + size: VectorSize::Size8x16, }, "E336384E", "cmgt v3.16b, v23.16b, v24.16b", @@ -2271,7 +2227,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(23), rn: vreg(9), rm: vreg(12), - ty: I8X16, + size: VectorSize::Size8x16, }, "373D2C4E", "cmge v23.16b, v9.16b, v12.16b", @@ -2283,7 +2239,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(5), rn: vreg(1), rm: vreg(1), - ty: I8X16, + size: VectorSize::Size8x16, }, "2534216E", "cmhi v5.16b, v1.16b, v1.16b", @@ -2295,7 +2251,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(8), rn: vreg(2), rm: vreg(15), - ty: I8X16, + size: VectorSize::Size8x16, }, "483C2F6E", "cmhs v8.16b, v2.16b, v15.16b", @@ -2307,7 +2263,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(3), rn: vreg(23), rm: vreg(24), - ty: I16X8, + size: VectorSize::Size16x8, }, "E38E786E", "cmeq v3.8h, v23.8h, v24.8h", @@ -2319,7 +2275,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(3), rn: vreg(23), rm: vreg(24), - ty: I16X8, + size: VectorSize::Size16x8, }, "E336784E", "cmgt v3.8h, v23.8h, v24.8h", @@ -2331,7 +2287,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(23), rn: vreg(9), rm: vreg(12), - ty: I16X8, + size: VectorSize::Size16x8, }, "373D6C4E", "cmge v23.8h, v9.8h, v12.8h", @@ -2343,7 +2299,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(5), rn: vreg(1), rm: vreg(1), - ty: I16X8, + size: VectorSize::Size16x8, }, "2534616E", "cmhi v5.8h, v1.8h, v1.8h", @@ -2355,7 +2311,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(8), rn: vreg(2), rm: vreg(15), - ty: I16X8, + size: VectorSize::Size16x8, }, "483C6F6E", "cmhs v8.8h, v2.8h, v15.8h", @@ -2367,7 +2323,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(3), rn: vreg(23), rm: vreg(24), - ty: I32X4, + size: VectorSize::Size32x4, }, "E38EB86E", "cmeq v3.4s, v23.4s, v24.4s", @@ -2379,7 +2335,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(3), rn: vreg(23), rm: vreg(24), - ty: I32X4, + size: VectorSize::Size32x4, }, "E336B84E", "cmgt v3.4s, v23.4s, v24.4s", @@ -2391,7 +2347,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(23), rn: vreg(9), rm: vreg(12), - ty: I32X4, + size: VectorSize::Size32x4, }, "373DAC4E", "cmge v23.4s, v9.4s, v12.4s", @@ -2403,7 +2359,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(5), rn: vreg(1), rm: vreg(1), - ty: I32X4, + size: VectorSize::Size32x4, }, "2534A16E", "cmhi v5.4s, v1.4s, v1.4s", @@ -2415,7 +2371,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(8), rn: vreg(2), rm: vreg(15), - ty: I32X4, + size: VectorSize::Size32x4, }, "483CAF6E", "cmhs v8.4s, v2.4s, v15.4s", @@ -2427,7 +2383,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(28), rn: vreg(12), rm: vreg(4), - ty: F32X4, + size: VectorSize::Size32x4, }, "9CE5244E", "fcmeq v28.4s, v12.4s, v4.4s", @@ -2439,7 +2395,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(3), rn: vreg(16), rm: vreg(31), - ty: F64X2, + size: VectorSize::Size64x2, }, "03E6FF6E", "fcmgt v3.2d, v16.2d, v31.2d", @@ -2451,7 +2407,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(18), rn: vreg(23), rm: vreg(0), - ty: F64X2, + size: VectorSize::Size64x2, }, "F2E6606E", "fcmge v18.2d, v23.2d, v0.2d", @@ -2463,7 +2419,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(20), rn: vreg(19), rm: vreg(18), - ty: I32X4, + size: VectorSize::Size32x4, }, "741E324E", "and v20.16b, v19.16b, v18.16b", @@ -2475,7 +2431,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(8), rn: vreg(11), rm: vreg(1), - ty: I8X16, + size: VectorSize::Size8x16, }, "681D614E", "bic v8.16b, v11.16b, v1.16b", @@ -2487,7 +2443,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(15), rn: vreg(2), rm: vreg(12), - ty: I16X8, + size: VectorSize::Size16x8, }, "4F1CAC4E", "orr v15.16b, v2.16b, v12.16b", @@ -2499,7 +2455,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(18), rn: vreg(3), rm: vreg(22), - ty: I8X16, + size: VectorSize::Size8x16, }, "721C366E", "eor v18.16b, v3.16b, v22.16b", @@ -2511,7 +2467,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(8), rn: vreg(9), rm: vreg(1), - ty: I8X16, + size: VectorSize::Size8x16, }, "281D616E", "bsl v8.16b, v9.16b, v1.16b", @@ -2523,7 +2479,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(8), rn: vreg(12), rm: vreg(1), - ty: I8X16, + size: VectorSize::Size8x16, }, "88A5216E", "umaxp v8.16b, v12.16b, v1.16b", @@ -2535,7 +2491,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(1), rn: vreg(6), rm: vreg(1), - ty: I16X8, + size: VectorSize::Size16x8, }, "C1A4616E", "umaxp v1.8h, v6.8h, v1.8h", @@ -2547,7 +2503,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(1), rn: vreg(20), rm: vreg(16), - ty: I32X4, + size: VectorSize::Size32x4, }, "81A6B06E", "umaxp v1.4s, v20.4s, v16.4s", @@ -2559,7 +2515,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(5), rn: vreg(1), rm: vreg(1), - ty: I8X16, + size: VectorSize::Size8x16, }, "2584214E", "add v5.16b, v1.16b, v1.16b", @@ -2571,7 +2527,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(7), rn: vreg(13), rm: vreg(2), - ty: I16X8, + size: VectorSize::Size16x8, }, "A785624E", "add v7.8h, v13.8h, v2.8h", @@ -2583,7 +2539,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(18), rn: vreg(9), rm: vreg(6), - ty: I32X4, + size: VectorSize::Size32x4, }, "3285A64E", "add v18.4s, v9.4s, v6.4s", @@ -2595,7 +2551,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(1), rn: vreg(3), rm: vreg(2), - ty: I64X2, + size: VectorSize::Size64x2, }, "6184E24E", "add v1.2d, v3.2d, v2.2d", @@ -2607,7 +2563,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(5), rn: vreg(1), rm: vreg(1), - ty: I8X16, + size: VectorSize::Size8x16, }, "2584216E", "sub v5.16b, v1.16b, v1.16b", @@ -2619,7 +2575,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(7), rn: vreg(13), rm: vreg(2), - ty: I16X8, + size: VectorSize::Size16x8, }, "A785626E", "sub v7.8h, v13.8h, v2.8h", @@ -2631,7 +2587,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(18), rn: vreg(9), rm: vreg(6), - ty: I32X4, + size: VectorSize::Size32x4, }, "3285A66E", "sub v18.4s, v9.4s, v6.4s", @@ -2643,7 +2599,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(18), rn: vreg(0), rm: vreg(8), - ty: I64X2, + size: VectorSize::Size64x2, }, "1284E86E", "sub v18.2d, v0.2d, v8.2d", @@ -2655,7 +2611,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(25), rn: vreg(9), rm: vreg(8), - ty: I8X16, + size: VectorSize::Size8x16, }, "399D284E", "mul v25.16b, v9.16b, v8.16b", @@ -2667,7 +2623,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(30), rn: vreg(30), rm: vreg(12), - ty: I16X8, + size: VectorSize::Size16x8, }, "DE9F6C4E", "mul v30.8h, v30.8h, v12.8h", @@ -2679,7 +2635,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(18), rn: vreg(18), rm: vreg(18), - ty: I32X4, + size: VectorSize::Size32x4, }, "529EB24E", "mul v18.4s, v18.4s, v18.4s", @@ -2691,7 +2647,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(18), rn: vreg(18), rm: vreg(18), - ty: I8X16, + size: VectorSize::Size8x16, }, "5246326E", "ushl v18.16b, v18.16b, v18.16b", @@ -2703,7 +2659,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(18), rn: vreg(18), rm: vreg(18), - ty: I16X8, + size: VectorSize::Size16x8, }, "5246726E", "ushl v18.8h, v18.8h, v18.8h", @@ -2715,7 +2671,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(18), rn: vreg(1), rm: vreg(21), - ty: I32X4, + size: VectorSize::Size32x4, }, "3244B56E", "ushl v18.4s, v1.4s, v21.4s", @@ -2727,7 +2683,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(5), rn: vreg(7), rm: vreg(19), - ty: I64X2, + size: VectorSize::Size64x2, }, "E544F36E", "ushl v5.2d, v7.2d, v19.2d", @@ -2739,7 +2695,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(18), rn: vreg(18), rm: vreg(18), - ty: I8X16, + size: VectorSize::Size8x16, }, "5246324E", "sshl v18.16b, v18.16b, v18.16b", @@ -2751,7 +2707,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(30), rn: vreg(1), rm: vreg(29), - ty: I16X8, + size: VectorSize::Size16x8, }, "3E447D4E", "sshl v30.8h, v1.8h, v29.8h", @@ -2763,7 +2719,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(8), rn: vreg(22), rm: vreg(21), - ty: I32X4, + size: VectorSize::Size32x4, }, "C846B54E", "sshl v8.4s, v22.4s, v21.4s", @@ -2775,7 +2731,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(8), rn: vreg(22), rm: vreg(2), - ty: I64X2, + size: VectorSize::Size64x2, }, "C846E24E", "sshl v8.2d, v22.2d, v2.2d", @@ -2786,7 +2742,7 @@ fn test_aarch64_binemit() { op: VecMisc2::Not, rd: writable_vreg(2), rn: vreg(1), - ty: I32X4, + size: VectorSize::Size32x4, }, "2258206E", "mvn v2.16b, v1.16b", @@ -2797,7 +2753,7 @@ fn test_aarch64_binemit() { op: VecMisc2::Neg, rd: writable_vreg(8), rn: vreg(12), - ty: I8X16, + size: VectorSize::Size8x16, }, "88B9206E", "neg v8.16b, v12.16b", @@ -2808,7 +2764,7 @@ fn test_aarch64_binemit() { op: VecMisc2::Neg, rd: writable_vreg(0), rn: vreg(31), - ty: I16X8, + size: VectorSize::Size16x8, }, "E0BB606E", "neg v0.8h, v31.8h", @@ -2819,7 +2775,7 @@ fn test_aarch64_binemit() { op: VecMisc2::Neg, rd: writable_vreg(2), rn: vreg(3), - ty: I32X4, + size: VectorSize::Size32x4, }, "62B8A06E", "neg v2.4s, v3.4s", @@ -2830,7 +2786,7 @@ fn test_aarch64_binemit() { op: VecMisc2::Neg, rd: writable_vreg(10), rn: vreg(8), - ty: I64X2, + size: VectorSize::Size64x2, }, "0AB9E06E", "neg v10.2d, v8.2d", @@ -2841,7 +2797,7 @@ fn test_aarch64_binemit() { op: VecLanesOp::Uminv, rd: writable_vreg(2), rn: vreg(1), - ty: I8X16, + size: VectorSize::Size8x16, }, "22A8316E", "uminv b2, v1.16b", @@ -2852,7 +2808,7 @@ fn test_aarch64_binemit() { op: VecLanesOp::Uminv, rd: writable_vreg(3), rn: vreg(11), - ty: I16X8, + size: VectorSize::Size16x8, }, "63A9716E", "uminv h3, v11.8h", @@ -2863,7 +2819,7 @@ fn test_aarch64_binemit() { op: VecLanesOp::Uminv, rd: writable_vreg(18), rn: vreg(4), - ty: I32X4, + size: VectorSize::Size32x4, }, "92A8B16E", "uminv s18, v4.4s", @@ -3214,7 +3170,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(1), rn: vreg(30), idx: 2, - size: ScalarSize::Size32, + size: VectorSize::Size32x4, }, "C107145E", "mov s1, v30.s[2]", @@ -3225,7 +3181,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(23), rn: vreg(11), idx: 0, - size: ScalarSize::Size64, + size: VectorSize::Size64x2, }, "7705085E", "mov d23, v11.d[0]", @@ -3443,6 +3399,50 @@ fn test_aarch64_binemit() { "fmin d15, d30, d31", )); + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Uqadd64, + rd: writable_vreg(21), + rn: vreg(22), + rm: vreg(23), + }, + "D50EF77E", + "uqadd d21, d22, d23", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Sqadd64, + rd: writable_vreg(21), + rn: vreg(22), + rm: vreg(23), + }, + "D50EF75E", + "sqadd d21, d22, d23", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Uqsub64, + rd: writable_vreg(21), + rn: vreg(22), + rm: vreg(23), + }, + "D52EF77E", + "uqsub d21, d22, d23", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Sqsub64, + rd: writable_vreg(21), + rn: vreg(22), + rm: vreg(23), + }, + "D52EF75E", + "sqsub d21, d22, d23", + )); + insns.push(( Inst::FpuRRRR { fpu_op: FPUOp3::MAdd32, diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 79a72c245c..1c5c6f9a1c 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -5,8 +5,8 @@ use crate::binemit::CodeOffset; use crate::ir::types::{ - B1, B16, B16X8, B32, B32X4, B64, B64X2, B8, B8X16, F32, F32X2, F32X4, F64, F64X2, FFLAGS, I16, - I16X4, I16X8, I32, I32X2, I32X4, I64, I64X2, I8, I8X16, I8X8, IFLAGS, R32, R64, + B1, B16, B16X8, B32, B32X4, B64, B64X2, B8, B8X16, F32, F32X4, F64, F64X2, FFLAGS, I16, I16X8, + I32, I32X4, I64, I64X2, I8, I8X16, IFLAGS, R32, R64, }; use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type}; use crate::machinst::*; @@ -125,6 +125,14 @@ pub enum FPUOp2 { Max64, Min32, Min64, + /// Signed saturating add + Sqadd64, + /// Unsigned saturating add + Uqadd64, + /// Signed saturating subtract + Sqsub64, + /// Unsigned saturating subtract + Uqsub64, } /// A floating-point unit (FPU) operation with two args, a register and an immediate. @@ -208,16 +216,12 @@ pub enum VecExtendOp { #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] pub enum VecALUOp { /// Signed saturating add - SQAddScalar, Sqadd, /// Unsigned saturating add - UQAddScalar, Uqadd, /// Signed saturating subtract - SQSubScalar, Sqsub, /// Unsigned saturating subtract - UQSubScalar, Uqsub, /// Compare bitwise equal Cmeq, @@ -590,7 +594,7 @@ pub enum Inst { rd: Writable, rn: Reg, idx: u8, - size: ScalarSize, + size: VectorSize, }, /// 1-op FPU instruction. @@ -734,21 +738,21 @@ pub enum Inst { rd: Writable, rn: Reg, idx: u8, - ty: Type, + size: VectorSize, }, /// Duplicate general-purpose register to vector. VecDup { rd: Writable, rn: Reg, - ty: Type, + size: VectorSize, }, /// Duplicate scalar to vector. VecDupFromFpu { rd: Writable, rn: Reg, - ty: Type, + size: VectorSize, }, /// Vector extend. @@ -764,7 +768,7 @@ pub enum Inst { rd: Writable, rn: Reg, rm: Reg, - ty: Type, + size: VectorSize, }, /// Vector two register miscellaneous instruction. @@ -772,7 +776,7 @@ pub enum Inst { op: VecMisc2, rd: Writable, rn: Reg, - ty: Type, + size: VectorSize, }, /// Vector instruction across lanes. @@ -780,7 +784,7 @@ pub enum Inst { op: VecLanesOp, rd: Writable, rn: Reg, - ty: Type, + size: VectorSize, }, /// Move to the NZCV flags (actually a `MSR NZCV, Xn` insn). @@ -2504,13 +2508,8 @@ impl Inst { format!("mov {}.16b, {}.16b", rd, rn) } &Inst::FpuMoveFromVec { rd, rn, idx, size } => { - let vector_type = match size { - ScalarSize::Size32 => F32, - ScalarSize::Size64 => F64, - _ => unimplemented!(), - }; - let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size); - let rn = show_vreg_element(rn, mb_rru, idx, vector_type); + let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size.lane_size()); + let rn = show_vreg_element(rn, mb_rru, idx, size); format!("mov {}, {}", rd, rn) } &Inst::FpuRR { fpu_op, rd, rn } => { @@ -2542,6 +2541,10 @@ impl Inst { FPUOp2::Max64 => ("fmax", ScalarSize::Size64), FPUOp2::Min32 => ("fmin", ScalarSize::Size32), FPUOp2::Min64 => ("fmin", ScalarSize::Size64), + FPUOp2::Sqadd64 => ("sqadd", ScalarSize::Size64), + FPUOp2::Uqadd64 => ("uqadd", ScalarSize::Size64), + FPUOp2::Sqsub64 => ("sqsub", ScalarSize::Size64), + FPUOp2::Uqsub64 => ("uqsub", ScalarSize::Size64), }; let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size); let rn = show_vreg_scalar(rn, mb_rru, size); @@ -2557,7 +2560,7 @@ impl Inst { }; let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>) -> String = if vector { - |reg, mb_rru| show_vreg_vector(reg, mb_rru, F32X2) + |reg, mb_rru| show_vreg_vector(reg, mb_rru, VectorSize::Size32x2) } else { |reg, mb_rru| show_vreg_scalar(reg, mb_rru, ScalarSize::Size64) }; @@ -2706,45 +2709,36 @@ impl Inst { let rn = rn.show_rru(mb_rru); format!("mov {}.d[0], {}", rd, rn) } - &Inst::MovFromVec { rd, rn, idx, ty } => { - let op = match ty { - I32 | I64 => "mov", - _ => "umov", + &Inst::MovFromVec { rd, rn, idx, size } => { + let op = match size { + VectorSize::Size8x16 => "umov", + VectorSize::Size16x8 => "umov", + VectorSize::Size32x4 => "mov", + VectorSize::Size64x2 => "mov", + _ => unimplemented!(), }; - let rd = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::from_ty(ty)); - let rn = show_vreg_element(rn, mb_rru, idx, ty); + let rd = show_ireg_sized(rd.to_reg(), mb_rru, size.operand_size()); + let rn = show_vreg_element(rn, mb_rru, idx, size); format!("{} {}, {}", op, rd, rn) } - &Inst::VecDup { rd, rn, ty } => { - let vector_type = match ty { - I8 => I8X16, - I16 => I16X8, - I32 => I32X4, - I64 => I64X2, - _ => unimplemented!(), - }; - let rd = show_vreg_vector(rd.to_reg(), mb_rru, vector_type); - let rn = show_ireg_sized(rn, mb_rru, OperandSize::from_ty(ty)); + &Inst::VecDup { rd, rn, size } => { + let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); + let rn = show_ireg_sized(rn, mb_rru, size.operand_size()); format!("dup {}, {}", rd, rn) } - &Inst::VecDupFromFpu { rd, rn, ty } => { - let vector_type = match ty { - F32 => F32X4, - F64 => F64X2, - _ => unimplemented!(), - }; - let rd = show_vreg_vector(rd.to_reg(), mb_rru, vector_type); - let rn = show_vreg_element(rn, mb_rru, 0, ty); + &Inst::VecDupFromFpu { rd, rn, size } => { + let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); + let rn = show_vreg_element(rn, mb_rru, 0, size); format!("dup {}, {}", rd, rn) } &Inst::VecExtend { t, rd, rn } => { let (op, dest, src) = match t { - VecExtendOp::Sxtl8 => ("sxtl", I16X8, I8X8), - VecExtendOp::Sxtl16 => ("sxtl", I32X4, I16X4), - VecExtendOp::Sxtl32 => ("sxtl", I64X2, I32X2), - VecExtendOp::Uxtl8 => ("uxtl", I16X8, I8X8), - VecExtendOp::Uxtl16 => ("uxtl", I32X4, I16X4), - VecExtendOp::Uxtl32 => ("uxtl", I64X2, I32X2), + VecExtendOp::Sxtl8 => ("sxtl", VectorSize::Size16x8, VectorSize::Size8x8), + VecExtendOp::Sxtl16 => ("sxtl", VectorSize::Size32x4, VectorSize::Size16x4), + VecExtendOp::Sxtl32 => ("sxtl", VectorSize::Size64x2, VectorSize::Size32x2), + VecExtendOp::Uxtl8 => ("uxtl", VectorSize::Size16x8, VectorSize::Size8x8), + VecExtendOp::Uxtl16 => ("uxtl", VectorSize::Size32x4, VectorSize::Size16x4), + VecExtendOp::Uxtl32 => ("uxtl", VectorSize::Size64x2, VectorSize::Size32x2), }; let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest); let rn = show_vreg_vector(rn, mb_rru, src); @@ -2755,72 +2749,54 @@ impl Inst { rn, rm, alu_op, - ty, + size, } => { - let (op, vector, ty) = match alu_op { - VecALUOp::SQAddScalar => ("sqadd", false, ty), - VecALUOp::Sqadd => ("sqadd", true, ty), - VecALUOp::UQAddScalar => ("uqadd", false, ty), - VecALUOp::Uqadd => ("uqadd", true, ty), - VecALUOp::SQSubScalar => ("sqsub", false, ty), - VecALUOp::Sqsub => ("sqsub", true, ty), - VecALUOp::UQSubScalar => ("uqsub", false, ty), - VecALUOp::Uqsub => ("uqsub", true, ty), - VecALUOp::Cmeq => ("cmeq", true, ty), - VecALUOp::Cmge => ("cmge", true, ty), - VecALUOp::Cmgt => ("cmgt", true, ty), - VecALUOp::Cmhs => ("cmhs", true, ty), - VecALUOp::Cmhi => ("cmhi", true, ty), - VecALUOp::Fcmeq => ("fcmeq", true, ty), - VecALUOp::Fcmgt => ("fcmgt", true, ty), - VecALUOp::Fcmge => ("fcmge", true, ty), - VecALUOp::And => ("and", true, I8X16), - VecALUOp::Bic => ("bic", true, I8X16), - VecALUOp::Orr => ("orr", true, I8X16), - VecALUOp::Eor => ("eor", true, I8X16), - VecALUOp::Bsl => ("bsl", true, I8X16), - VecALUOp::Umaxp => ("umaxp", true, ty), - VecALUOp::Add => ("add", true, ty), - VecALUOp::Sub => ("sub", true, ty), - VecALUOp::Mul => ("mul", true, ty), - VecALUOp::Sshl => ("sshl", true, ty), - VecALUOp::Ushl => ("ushl", true, ty), + let (op, size) = match alu_op { + VecALUOp::Sqadd => ("sqadd", size), + VecALUOp::Uqadd => ("uqadd", size), + VecALUOp::Sqsub => ("sqsub", size), + VecALUOp::Uqsub => ("uqsub", size), + VecALUOp::Cmeq => ("cmeq", size), + VecALUOp::Cmge => ("cmge", size), + VecALUOp::Cmgt => ("cmgt", size), + VecALUOp::Cmhs => ("cmhs", size), + VecALUOp::Cmhi => ("cmhi", size), + VecALUOp::Fcmeq => ("fcmeq", size), + VecALUOp::Fcmgt => ("fcmgt", size), + VecALUOp::Fcmge => ("fcmge", size), + VecALUOp::And => ("and", VectorSize::Size8x16), + VecALUOp::Bic => ("bic", VectorSize::Size8x16), + VecALUOp::Orr => ("orr", VectorSize::Size8x16), + VecALUOp::Eor => ("eor", VectorSize::Size8x16), + VecALUOp::Bsl => ("bsl", VectorSize::Size8x16), + VecALUOp::Umaxp => ("umaxp", size), + VecALUOp::Add => ("add", size), + VecALUOp::Sub => ("sub", size), + VecALUOp::Mul => ("mul", size), + VecALUOp::Sshl => ("sshl", size), + VecALUOp::Ushl => ("ushl", size), }; - - let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>, Type) -> String = if vector { - |reg, mb_rru, ty| show_vreg_vector(reg, mb_rru, ty) - } else { - |reg, mb_rru, _ty| show_vreg_scalar(reg, mb_rru, ScalarSize::Size64) - }; - - let rd = show_vreg_fn(rd.to_reg(), mb_rru, ty); - let rn = show_vreg_fn(rn, mb_rru, ty); - let rm = show_vreg_fn(rm, mb_rru, ty); + let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); + let rn = show_vreg_vector(rn, mb_rru, size); + let rm = show_vreg_vector(rm, mb_rru, size); format!("{} {}, {}, {}", op, rd, rn, rm) } - &Inst::VecMisc { op, rd, rn, ty } => { - let (op, ty) = match op { - VecMisc2::Not => ("mvn", I8X16), - VecMisc2::Neg => ("neg", ty), + &Inst::VecMisc { op, rd, rn, size } => { + let (op, size) = match op { + VecMisc2::Not => ("mvn", VectorSize::Size8x16), + VecMisc2::Neg => ("neg", size), }; - let rd = show_vreg_vector(rd.to_reg(), mb_rru, ty); - let rn = show_vreg_vector(rn, mb_rru, ty); + let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); + let rn = show_vreg_vector(rn, mb_rru, size); format!("{} {}, {}", op, rd, rn) } - &Inst::VecLanes { op, rd, rn, ty } => { + &Inst::VecLanes { op, rd, rn, size } => { let op = match op { VecLanesOp::Uminv => "uminv", }; - let size = match ty { - I8X16 => ScalarSize::Size8, - I16X8 => ScalarSize::Size16, - I32X4 => ScalarSize::Size32, - _ => unimplemented!(), - }; - - let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size); - let rn = show_vreg_vector(rn, mb_rru, ty); + let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size.lane_size()); + let rn = show_vreg_vector(rn, mb_rru, size); format!("{} {}, {}", op, rd, rn) } &Inst::MovToNZCV { rn } => { diff --git a/cranelift/codegen/src/isa/aarch64/inst/regs.rs b/cranelift/codegen/src/isa/aarch64/inst/regs.rs index 88d67fb257..cbf1440927 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/regs.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/regs.rs @@ -1,8 +1,8 @@ //! AArch64 ISA definitions: registers. -use crate::ir::types::*; use crate::isa::aarch64::inst::OperandSize; use crate::isa::aarch64::inst::ScalarSize; +use crate::isa::aarch64::inst::VectorSize; use crate::machinst::*; use crate::settings; @@ -307,40 +307,42 @@ pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: Scalar } /// Show a vector register. -pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, ty: Type) -> String { +pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: VectorSize) -> String { assert_eq!(RegClass::V128, reg.get_class()); let mut s = reg.show_rru(mb_rru); - match ty { - F32X2 => s.push_str(".2s"), - F32X4 => s.push_str(".4s"), - F64X2 => s.push_str(".2d"), - I8X8 => s.push_str(".8b"), - I8X16 => s.push_str(".16b"), - I16X4 => s.push_str(".4h"), - I16X8 => s.push_str(".8h"), - I32X2 => s.push_str(".2s"), - I32X4 => s.push_str(".4s"), - I64X2 => s.push_str(".2d"), - _ => unimplemented!(), - } + let suffix = match size { + VectorSize::Size8x8 => ".8b", + VectorSize::Size8x16 => ".16b", + VectorSize::Size16x4 => ".4h", + VectorSize::Size16x8 => ".8h", + VectorSize::Size32x2 => ".2s", + VectorSize::Size32x4 => ".4s", + VectorSize::Size64x2 => ".2d", + }; + s.push_str(suffix); s } /// Show an indexed vector element. -pub fn show_vreg_element(reg: Reg, mb_rru: Option<&RealRegUniverse>, idx: u8, ty: Type) -> String { +pub fn show_vreg_element( + reg: Reg, + mb_rru: Option<&RealRegUniverse>, + idx: u8, + size: VectorSize, +) -> String { assert_eq!(RegClass::V128, reg.get_class()); let mut s = reg.show_rru(mb_rru); - let suffix = match ty { - I8 => "b", - I16 => "h", - I32 => "s", - I64 => "d", - F32 => "s", - F64 => "d", - _ => unimplemented!(), + let suffix = match size { + VectorSize::Size8x8 => "b", + VectorSize::Size8x16 => "b", + VectorSize::Size16x4 => "h", + VectorSize::Size16x8 => "h", + VectorSize::Size32x2 => "s", + VectorSize::Size32x4 => "s", + VectorSize::Size64x2 => "d", }; s.push_str(&format!(".{}[{}]", suffix, idx)); diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 03a464be9a..d60fdfe144 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -14,7 +14,7 @@ use crate::ir::Inst as IRInst; use crate::ir::{InstructionData, Opcode, TrapCode, Type}; use crate::machinst::lower::*; use crate::machinst::*; -use crate::{CodegenError, CodegenResult}; +use crate::CodegenResult; use crate::isa::aarch64::inst::*; use crate::isa::aarch64::AArch64Backend; @@ -736,20 +736,11 @@ pub(crate) fn lower_vector_compare>( ty: Type, cond: Cond, ) -> CodegenResult<()> { - match ty { - F32X4 | F64X2 | I8X16 | I16X8 | I32X4 => {} - _ => { - return Err(CodegenError::Unsupported(format!( - "unsupported SIMD type: {:?}", - ty - ))); - } - }; - let is_float = match ty { F32X4 | F64X2 => true, _ => false, }; + let size = VectorSize::from_ty(ty); // 'Less than' operations are implemented by swapping // the order of operands and using the 'greater than' // instructions. @@ -784,7 +775,7 @@ pub(crate) fn lower_vector_compare>( rd, rn, rm, - ty, + size, }); if cond == Cond::Ne { @@ -792,7 +783,7 @@ pub(crate) fn lower_vector_compare>( op: VecMisc2::Not, rd, rn: rd.to_reg(), - ty: I8X16, + size, }); } diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 004e59441d..80b4518f9f 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -70,7 +70,7 @@ pub(crate) fn lower_insn_to_regs>( rn, rm, alu_op: VecALUOp::Add, - ty, + size: VectorSize::from_ty(ty), }); } } @@ -89,13 +89,13 @@ pub(crate) fn lower_insn_to_regs>( rn, rm, alu_op: VecALUOp::Sub, - ty, + size: VectorSize::from_ty(ty), }); } } Opcode::UaddSat | Opcode::SaddSat | Opcode::UsubSat | Opcode::SsubSat => { - // We use the vector instruction set's saturating adds (UQADD / - // SQADD), which require vector registers. + // We use the scalar SIMD & FP saturating additions and subtractions + // (SQADD / UQADD / SQSUB / UQSUB), which require scalar FP registers. let is_signed = op == Opcode::SaddSat || op == Opcode::SsubSat; let ty = ty.unwrap(); let rd = get_output_reg(ctx, outputs[0]); @@ -105,11 +105,11 @@ pub(crate) fn lower_insn_to_regs>( } else { NarrowValueMode::ZeroExtend64 }; - let alu_op = match op { - Opcode::UaddSat => VecALUOp::UQAddScalar, - Opcode::SaddSat => VecALUOp::SQAddScalar, - Opcode::UsubSat => VecALUOp::UQSubScalar, - Opcode::SsubSat => VecALUOp::SQSubScalar, + let fpu_op = match op { + Opcode::UaddSat => FPUOp2::Uqadd64, + Opcode::SaddSat => FPUOp2::Sqadd64, + Opcode::UsubSat => FPUOp2::Uqsub64, + Opcode::SsubSat => FPUOp2::Sqsub64, _ => unreachable!(), }; let va = ctx.alloc_tmp(RegClass::V128, I128); @@ -118,18 +118,17 @@ pub(crate) fn lower_insn_to_regs>( let rb = put_input_in_reg(ctx, inputs[1], narrow_mode); ctx.emit(Inst::MovToVec64 { rd: va, rn: ra }); ctx.emit(Inst::MovToVec64 { rd: vb, rn: rb }); - ctx.emit(Inst::VecRRR { + ctx.emit(Inst::FpuRRR { + fpu_op, rd: va, rn: va.to_reg(), rm: vb.to_reg(), - alu_op, - ty: I64, }); ctx.emit(Inst::MovFromVec { rd, rn: va.to_reg(), idx: 0, - ty: I64, + size: VectorSize::Size64x2, }); } else { let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); @@ -148,7 +147,7 @@ pub(crate) fn lower_insn_to_regs>( rn, rm, alu_op, - ty, + size: VectorSize::from_ty(ty), }); } } @@ -167,7 +166,7 @@ pub(crate) fn lower_insn_to_regs>( op: VecMisc2::Neg, rd, rn, - ty, + size: VectorSize::from_ty(ty), }); } } @@ -192,7 +191,7 @@ pub(crate) fn lower_insn_to_regs>( rd, rn, rm, - ty, + size: VectorSize::from_ty(ty), }); } } @@ -422,7 +421,7 @@ pub(crate) fn lower_insn_to_regs>( op: VecMisc2::Not, rd, rn: rm, - ty, + size: VectorSize::from_ty(ty), }); } } @@ -466,7 +465,7 @@ pub(crate) fn lower_insn_to_regs>( rd, rn, rm, - ty, + size: VectorSize::from_ty(ty), }); } } @@ -495,7 +494,7 @@ pub(crate) fn lower_insn_to_regs>( ctx.emit(alu_inst_immshift(alu_op, rd, rn, rm)); } else { let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - + let size = VectorSize::from_ty(ty); let (alu_op, is_right_shift) = match op { Opcode::Ishl => (VecALUOp::Sshl, false), Opcode::Ushr => (VecALUOp::Ushl, true), @@ -514,18 +513,14 @@ pub(crate) fn lower_insn_to_regs>( put_input_in_reg(ctx, inputs[1], NarrowValueMode::None) }; - ctx.emit(Inst::VecDup { - rd, - rn: rm, - ty: ty.lane_type(), - }); + ctx.emit(Inst::VecDup { rd, rn: rm, size }); ctx.emit(Inst::VecRRR { alu_op, rd, rn, rm: rd.to_reg(), - ty, + size, }); } } @@ -1167,7 +1162,7 @@ pub(crate) fn lower_insn_to_regs>( rd, rn, rm, - ty, + size: VectorSize::from_ty(ty), }); } } @@ -1297,7 +1292,7 @@ pub(crate) fn lower_insn_to_regs>( rd, rn, idx: 0, - ty: I64, + size: VectorSize::Size64x2, }); } } @@ -1557,15 +1552,15 @@ pub(crate) fn lower_insn_to_regs>( let idx = *imm; let rd = get_output_reg(ctx, outputs[0]); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let size = VectorSize::from_ty(ctx.input_ty(insn, 0)); let ty = ty.unwrap(); if ty_is_int(ty) { - ctx.emit(Inst::MovFromVec { rd, rn, idx, ty }); + ctx.emit(Inst::MovFromVec { rd, rn, idx, size }); // Plain moves are faster on some processors. } else if idx == 0 { ctx.emit(Inst::gen_move(rd, rn, ty)); } else { - let size = ScalarSize::from_ty(ty); ctx.emit(Inst::FpuMoveFromVec { rd, rn, idx, size }); } } else { @@ -1576,11 +1571,12 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Splat => { let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rd = get_output_reg(ctx, outputs[0]); - let ty = ctx.input_ty(insn, 0); - let inst = if ty_is_int(ty) { - Inst::VecDup { rd, rn, ty } + let input_ty = ctx.input_ty(insn, 0); + let size = VectorSize::from_ty(ty.unwrap()); + let inst = if ty_is_int(input_ty) { + Inst::VecDup { rd, rn, size } } else { - Inst::VecDupFromFpu { rd, rn, ty } + Inst::VecDupFromFpu { rd, rn, size } }; ctx.emit(inst); } @@ -1598,21 +1594,22 @@ pub(crate) fn lower_insn_to_regs>( // cmp xm, #0 // cset xm, ne - let input_ty = ctx.input_ty(insn, 0); + let size = VectorSize::from_ty(ctx.input_ty(insn, 0)); + if op == Opcode::VanyTrue { ctx.emit(Inst::VecRRR { alu_op: VecALUOp::Umaxp, rd: tmp, rn: rm, rm: rm, - ty: input_ty, + size, }); } else { ctx.emit(Inst::VecLanes { op: VecLanesOp::Uminv, rd: tmp, rn: rm, - ty: input_ty, + size, }); }; @@ -1620,7 +1617,7 @@ pub(crate) fn lower_insn_to_regs>( rd, rn: tmp.to_reg(), idx: 0, - ty: I64, + size: VectorSize::Size64x2, }); ctx.emit(Inst::AluRRImm12 { From abf157bd6999caa3c96d902cb2396c746f91a877 Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Thu, 9 Jul 2020 17:18:22 +0200 Subject: [PATCH 02/11] machinst x64: Only use the feature flag to enable the x64 new backend; Before this patch, running the x64 new backend would require both compiling with --features experimental_x64 and running with `use_new_backend`. This patches changes this behavior so that the runtime flag is not needed anymore: using the feature flag will enforce usage of the new backend everywhere, making using and testing it much simpler: cargo run --features experimental_x64 ;; other CLI options/flags This also gives a hint at what the meta language generation would look like after switching to the new backend. Compiling only with the x64 codegen flag gives a nice compile time speedup. --- cranelift/Cargo.toml | 1 + cranelift/codegen/Cargo.toml | 1 - cranelift/codegen/build.rs | 27 +++++++++-- cranelift/codegen/meta/src/gen_legalizer.rs | 7 +++ cranelift/codegen/meta/src/isa/mod.rs | 4 +- cranelift/codegen/meta/src/isa/x86/mod.rs | 2 +- .../codegen/meta/src/isa/x86/settings.rs | 6 --- cranelift/codegen/meta/src/lib.rs | 48 +++++++++++++++++-- cranelift/codegen/src/isa/mod.rs | 6 ++- cranelift/codegen/src/isa/x64/mod.rs | 28 +++++++---- cranelift/codegen/src/isa/x64/settings.rs | 9 ++++ cranelift/codegen/src/isa/x86/mod.rs | 20 +++----- cranelift/codegen/src/legalizer/mod.rs | 16 ++++++- crates/jit/src/link.rs | 3 +- 14 files changed, 134 insertions(+), 44 deletions(-) create mode 100644 cranelift/codegen/src/isa/x64/settings.rs diff --git a/cranelift/Cargo.toml b/cranelift/Cargo.toml index 7c63bea275..d72bcc4d91 100644 --- a/cranelift/Cargo.toml +++ b/cranelift/Cargo.toml @@ -48,3 +48,4 @@ default = ["disas", "wasm", "cranelift-codegen/all-arch"] disas = ["capstone"] enable-peepmatic = ["cranelift-codegen/enable-peepmatic", "cranelift-filetests/enable-peepmatic"] wasm = ["wat", "cranelift-wasm"] +experimental_x64 = ["cranelift-codegen/x64"] diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index cdafe049e2..a78869265a 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -66,7 +66,6 @@ x64 = [] # New work-in-progress codegen backend for x86_64 based on the new isel # Option to enable all architectures. all-arch = [ "x86", - "x64", "arm32", "arm64", "riscv" diff --git a/cranelift/codegen/build.rs b/cranelift/codegen/build.rs index b7352f37c3..2caf32609d 100644 --- a/cranelift/codegen/build.rs +++ b/cranelift/codegen/build.rs @@ -26,7 +26,15 @@ fn main() { let out_dir = env::var("OUT_DIR").expect("The OUT_DIR environment variable must be set"); let target_triple = env::var("TARGET").expect("The TARGET environment variable must be set"); - // Configure isa targets cfg. + let new_backend_isas = if env::var("CARGO_FEATURE_X64").is_ok() { + // The x64 (new backend for x86_64) is a bit particular: it only requires generating + // the shared meta code; the only ISA-specific code is for settings. + vec![meta::isa::Isa::X86] + } else { + Vec::new() + }; + + // Configure isa targets using the old backend. let isa_targets = meta::isa::Isa::all() .iter() .cloned() @@ -36,7 +44,7 @@ fn main() { }) .collect::>(); - let isas = if isa_targets.is_empty() { + let old_backend_isas = if new_backend_isas.is_empty() && isa_targets.is_empty() { // Try to match native target. let target_name = target_triple.split('-').next().unwrap(); let isa = meta::isa_from_arch(&target_name).expect("error when identifying target"); @@ -56,14 +64,23 @@ fn main() { crate_dir.join("build.rs").to_str().unwrap() ); - if let Err(err) = meta::generate(&isas, &out_dir) { + if let Err(err) = meta::generate(&old_backend_isas, &new_backend_isas, &out_dir) { eprintln!("Error: {}", err); process::exit(1); } if env::var("CRANELIFT_VERBOSE").is_ok() { - for isa in &isas { - println!("cargo:warning=Includes support for {} ISA", isa.to_string()); + for isa in &old_backend_isas { + println!( + "cargo:warning=Includes old-backend support for {} ISA", + isa.to_string() + ); + } + for isa in &new_backend_isas { + println!( + "cargo:warning=Includes new-backend support for {} ISA", + isa.to_string() + ); } println!( "cargo:warning=Build step took {:?}.", diff --git a/cranelift/codegen/meta/src/gen_legalizer.rs b/cranelift/codegen/meta/src/gen_legalizer.rs index 759121894f..7b56b8db48 100644 --- a/cranelift/codegen/meta/src/gen_legalizer.rs +++ b/cranelift/codegen/meta/src/gen_legalizer.rs @@ -700,6 +700,7 @@ fn gen_isa( pub(crate) fn generate( isas: &[TargetIsa], transform_groups: &TransformGroups, + extra_legalization_groups: &[&'static str], filename_prefix: &str, out_dir: &str, ) -> Result<(), error::Error> { @@ -711,8 +712,14 @@ pub(crate) fn generate( fmt.update_file(format!("{}-{}.rs", filename_prefix, isa.name), out_dir)?; } + // Add extra legalization groups that were explicitly requested. + for group in extra_legalization_groups { + shared_group_names.insert(group); + } + // Generate shared legalize groups. let mut fmt = Formatter::new(); + // Generate shared legalize groups. let mut type_sets = UniqueTable::new(); let mut sorted_shared_group_names = Vec::from_iter(shared_group_names); sorted_shared_group_names.sort(); diff --git a/cranelift/codegen/meta/src/isa/mod.rs b/cranelift/codegen/meta/src/isa/mod.rs index 39cd913300..ed8db85f0d 100644 --- a/cranelift/codegen/meta/src/isa/mod.rs +++ b/cranelift/codegen/meta/src/isa/mod.rs @@ -6,10 +6,10 @@ use std::fmt; mod arm32; mod arm64; mod riscv; -mod x86; +pub(crate) mod x86; /// Represents known ISA target. -#[derive(Copy, Clone)] +#[derive(PartialEq, Copy, Clone)] pub enum Isa { Riscv, X86, diff --git a/cranelift/codegen/meta/src/isa/x86/mod.rs b/cranelift/codegen/meta/src/isa/x86/mod.rs index 56f35770a8..a272e83900 100644 --- a/cranelift/codegen/meta/src/isa/x86/mod.rs +++ b/cranelift/codegen/meta/src/isa/x86/mod.rs @@ -14,7 +14,7 @@ mod legalize; mod opcodes; mod recipes; mod registers; -mod settings; +pub(crate) mod settings; pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { let settings = settings::define(&shared_defs.settings); diff --git a/cranelift/codegen/meta/src/isa/x86/settings.rs b/cranelift/codegen/meta/src/isa/x86/settings.rs index 0059bf0864..dddd69abb3 100644 --- a/cranelift/codegen/meta/src/isa/x86/settings.rs +++ b/cranelift/codegen/meta/src/isa/x86/settings.rs @@ -3,12 +3,6 @@ use crate::cdsl::settings::{PredicateNode, SettingGroup, SettingGroupBuilder}; pub(crate) fn define(shared: &SettingGroup) -> SettingGroup { let mut settings = SettingGroupBuilder::new("x86"); - settings.add_bool( - "use_new_backend", - "Whether to use the new codegen backend using the new isel", - false, - ); - // CPUID.01H:ECX let has_sse3 = settings.add_bool("has_sse3", "SSE3: CPUID.01H:ECX.SSE3[bit 0]", false); let has_ssse3 = settings.add_bool("has_ssse3", "SSSE3: CPUID.01H:ECX.SSSE3[bit 9]", false); diff --git a/cranelift/codegen/meta/src/lib.rs b/cranelift/codegen/meta/src/lib.rs index 796e2a110d..ead2c4442f 100644 --- a/cranelift/codegen/meta/src/lib.rs +++ b/cranelift/codegen/meta/src/lib.rs @@ -25,7 +25,11 @@ pub fn isa_from_arch(arch: &str) -> Result { } /// Generates all the Rust source files used in Cranelift from the meta-language. -pub fn generate(isas: &[isa::Isa], out_dir: &str) -> Result<(), error::Error> { +pub fn generate( + old_backend_isas: &[isa::Isa], + new_backend_isas: &[isa::Isa], + out_dir: &str, +) -> Result<(), error::Error> { // Create all the definitions: // - common definitions. let mut shared_defs = shared::define(); @@ -39,7 +43,7 @@ pub fn generate(isas: &[isa::Isa], out_dir: &str) -> Result<(), error::Error> { gen_types::generate("types.rs", &out_dir)?; // - per ISA definitions. - let isas = isa::define(isas, &mut shared_defs); + let target_isas = isa::define(old_backend_isas, &mut shared_defs); // At this point, all definitions are done. let all_formats = shared_defs.verify_instruction_formats(); @@ -53,9 +57,22 @@ pub fn generate(isas: &[isa::Isa], out_dir: &str) -> Result<(), error::Error> { &out_dir, )?; - gen_legalizer::generate(&isas, &shared_defs.transform_groups, "legalize", &out_dir)?; + let extra_legalization_groups: &[&'static str] = if !new_backend_isas.is_empty() { + // The new backend only requires the "expand" legalization group. + &["expand"] + } else { + &[] + }; - for isa in isas { + gen_legalizer::generate( + &target_isas, + &shared_defs.transform_groups, + extra_legalization_groups, + "legalize", + &out_dir, + )?; + + for isa in target_isas { gen_registers::generate(&isa, &format!("registers-{}.rs", isa.name), &out_dir)?; gen_settings::generate( @@ -80,5 +97,28 @@ pub fn generate(isas: &[isa::Isa], out_dir: &str) -> Result<(), error::Error> { )?; } + for isa in new_backend_isas { + match isa { + isa::Isa::X86 => { + // If the old backend ISAs contained x86, this file has already been generated. + if old_backend_isas.iter().any(|isa| *isa == isa::Isa::X86) { + continue; + } + + let settings = crate::isa::x86::settings::define(&shared_defs.settings); + gen_settings::generate( + &settings, + gen_settings::ParentGroup::Shared, + "settings-x86.rs", + &out_dir, + )?; + } + isa::Isa::Arm64 => { + // aarch64 doesn't have platform-specific settings. + } + isa::Isa::Arm32 | isa::Isa::Riscv => todo!(), + } + } + Ok(()) } diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs index 3bd84fbc6e..4ac40c06a4 100644 --- a/cranelift/codegen/src/isa/mod.rs +++ b/cranelift/codegen/src/isa/mod.rs @@ -121,7 +121,11 @@ pub fn lookup(triple: Triple) -> Result { match triple.architecture { Architecture::Riscv32 | Architecture::Riscv64 => isa_builder!(riscv, "riscv", triple), Architecture::I386 | Architecture::I586 | Architecture::I686 | Architecture::X86_64 => { - isa_builder!(x86, "x86", triple) + if cfg!(feature = "x64") { + isa_builder!(x64, "x64", triple) + } else { + isa_builder!(x86, "x86", triple) + } } Architecture::Arm { .. } => isa_builder!(arm32, "arm32", triple), Architecture::Aarch64 { .. } => isa_builder!(aarch64, "arm64", triple), diff --git a/cranelift/codegen/src/isa/x64/mod.rs b/cranelift/codegen/src/isa/x64/mod.rs index 7666875a0e..271542378a 100644 --- a/cranelift/codegen/src/isa/x64/mod.rs +++ b/cranelift/codegen/src/isa/x64/mod.rs @@ -11,28 +11,33 @@ use crate::isa::Builder as IsaBuilder; use crate::machinst::pretty_print::ShowWithRRU; use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode}; use crate::result::CodegenResult; -use crate::settings::{self, Flags}; +use crate::settings::{self as shared_settings, Flags}; -use crate::isa::x64::inst::regs::create_reg_universe_systemv; +use crate::isa::x64::{inst::regs::create_reg_universe_systemv, settings as x64_settings}; + +use super::TargetIsa; mod abi; mod inst; mod lower; +mod settings; /// An X64 backend. pub(crate) struct X64Backend { triple: Triple, flags: Flags, + _x64_flags: x64_settings::Flags, reg_universe: RealRegUniverse, } impl X64Backend { /// Create a new X64 backend with the given (shared) flags. - fn new_with_flags(triple: Triple, flags: Flags) -> Self { + fn new_with_flags(triple: Triple, flags: Flags, x64_flags: x64_settings::Flags) -> Self { let reg_universe = create_reg_universe_systemv(&flags); Self { triple, flags, + _x64_flags: x64_flags, reg_universe, } } @@ -103,10 +108,17 @@ impl MachBackend for X64Backend { pub(crate) fn isa_builder(triple: Triple) -> IsaBuilder { IsaBuilder { triple, - setup: settings::builder(), - constructor: |triple: Triple, flags: Flags, _arch_flag_builder: settings::Builder| { - let backend = X64Backend::new_with_flags(triple, flags); - Box::new(TargetIsaAdapter::new(backend)) - }, + setup: x64_settings::builder(), + constructor: isa_constructor, } } + +fn isa_constructor( + triple: Triple, + shared_flags: Flags, + builder: shared_settings::Builder, +) -> Box { + let isa_flags = x64_settings::Flags::new(&shared_flags, builder); + let backend = X64Backend::new_with_flags(triple, shared_flags, isa_flags); + Box::new(TargetIsaAdapter::new(backend)) +} diff --git a/cranelift/codegen/src/isa/x64/settings.rs b/cranelift/codegen/src/isa/x64/settings.rs new file mode 100644 index 0000000000..c5371bb132 --- /dev/null +++ b/cranelift/codegen/src/isa/x64/settings.rs @@ -0,0 +1,9 @@ +//! x86 Settings. + +use crate::settings::{self, detail, Builder}; +use core::fmt; + +// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a +// public `Flags` struct with an impl for all of the settings defined in +// `cranelift-codegen/meta/src/isa/x86/settings.rs`. +include!(concat!(env!("OUT_DIR"), "/settings-x86.rs")); diff --git a/cranelift/codegen/src/isa/x86/mod.rs b/cranelift/codegen/src/isa/x86/mod.rs index 4da21a879f..cbdeb3069d 100644 --- a/cranelift/codegen/src/isa/x86/mod.rs +++ b/cranelift/codegen/src/isa/x86/mod.rs @@ -57,20 +57,12 @@ fn isa_constructor( let isa_flags = settings::Flags::new(&shared_flags, builder); - if isa_flags.use_new_backend() { - #[cfg(not(feature = "x64"))] - panic!("new backend x86 support not included by cargo features!"); - - #[cfg(feature = "x64")] - super::x64::isa_builder(triple).finish(shared_flags) - } else { - Box::new(Isa { - triple, - isa_flags, - shared_flags, - cpumode: level1, - }) - } + Box::new(Isa { + triple, + isa_flags, + shared_flags, + cpumode: level1, + }) } impl TargetIsa for Isa { diff --git a/cranelift/codegen/src/legalizer/mod.rs b/cranelift/codegen/src/legalizer/mod.rs index 5bd5ac8f5a..3b33e55b1e 100644 --- a/cranelift/codegen/src/legalizer/mod.rs +++ b/cranelift/codegen/src/legalizer/mod.rs @@ -19,10 +19,24 @@ use crate::flowgraph::ControlFlowGraph; use crate::ir::types::{I32, I64}; use crate::ir::{self, InstBuilder, MemFlags}; use crate::isa::TargetIsa; + +#[cfg(any( + feature = "x86", + feature = "arm32", + feature = "arm64", + feature = "riscv" +))] use crate::predicates; +#[cfg(any( + feature = "x86", + feature = "arm32", + feature = "arm64", + feature = "riscv" +))] +use alloc::vec::Vec; + use crate::timing; use alloc::collections::BTreeSet; -use alloc::vec::Vec; mod boundary; mod call; diff --git a/crates/jit/src/link.rs b/crates/jit/src/link.rs index 68996f45ab..71284e8755 100644 --- a/crates/jit/src/link.rs +++ b/crates/jit/src/link.rs @@ -98,12 +98,13 @@ fn apply_reloc( write_unaligned(reloc_address as *mut u32, reloc_delta_u32); }, #[cfg(target_pointer_width = "64")] - (RelocationKind::Relative, RelocationEncoding::X86Branch, 32) => unsafe { + (RelocationKind::Relative, RelocationEncoding::Generic, 32) => unsafe { let reloc_address = body.add(offset as usize) as usize; let reloc_addend = r.addend() as isize; let reloc_delta_u64 = (target_func_address as u64) .wrapping_sub(reloc_address as u64) .wrapping_add(reloc_addend as u64); + // TODO implement far calls mode in x64 new backend. assert!( reloc_delta_u64 as isize <= i32::max_value() as isize, "relocation too large to fit in i32" From 806d197472ebcbfe32075c61fac562449ad7f48f Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 15 Jul 2020 10:22:08 -0500 Subject: [PATCH 03/11] Update platform support docs (#2023) Be sure to mention Linux AArch64 as a supported platform of Wasmtime now. --- docs/stability-platform-support.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/stability-platform-support.md b/docs/stability-platform-support.md index d2ed7fb8bf..4d9f007848 100644 --- a/docs/stability-platform-support.md +++ b/docs/stability-platform-support.md @@ -10,6 +10,7 @@ snapshot of what the current state of the world looks like. All features of `wasmtime` should work on the following platforms: * Linux x86\_64 +* Linux aarch64 * macOS x86\_64 * Windows x86\_64 @@ -18,9 +19,8 @@ sections below! ## JIT compiler support -The JIT compiler, backed by either `lightbeam` or `cranelift` supports only the -x86\_64 architecture at this time. Support for at least ARM, AArch64, and x86 is -planned at this time. +The JIT compiler, backed by Cranelift, supports the x86\_64 and aarch64 +architectures at this time. Support for at least ARM and x86 is planned as well. Usage of the JIT compiler will require a host operating system which supports creating executable memory pages on-the-fly. In Rust terms this generally means @@ -39,5 +39,6 @@ much else will be needed. The `wasmtime` project does not currently use `#[no_std]` for its crates, but this is not because it won't support it! At this time we're still gathering use cases for for what `#[no_std]` might entail, so if you're interested in this -we'd love to hear about your use case! Feel free to open an issue on the +we'd love to hear about your use case! Feel free to [open an +issue](https://github.com/bytecodealliance/wasmtime/issues/new) on the `wasmtime` repository to discuss this. From 0e5e8a62c85e717ad53bd1b8756c1a6f2aec4ac8 Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Tue, 7 Jul 2020 13:37:37 -0700 Subject: [PATCH 04/11] Add `DerivedFunction` for doubling lane widths and halving the number of lanes (i.e. merging) Certain operations (e.g. widening) will have operands with types like `NxM` but will return results with types like `(N*2)x(M/2)` (double the lane width, halve the number of lanes; maintain the same number of vector bits). This is equivalent to applying two `DerivedFunction`s to the type: `DerivedFunction::DoubleWidth` then `DerivedFunction::HalfVector`. Since there is no easy way to apply multiple `DerivedFunction`s (e.g. most of the logic is one-level deep, https://github.com/bytecodealliance/wasmtime/blob/1d5a678124e0f035f7614cafe43066c834a5113b/cranelift/codegen/meta/src/gen_inst.rs#L618-L621), I added `DerivedFunction::MergeLanes` to do the necessary type conversion. --- cranelift/codegen/meta/src/cdsl/typevar.rs | 29 +++++++++++++++++++++- cranelift/codegen/src/ir/instructions.rs | 8 ++++++ cranelift/codegen/src/ir/types.rs | 13 +++++++++- 3 files changed, 48 insertions(+), 2 deletions(-) diff --git a/cranelift/codegen/meta/src/cdsl/typevar.rs b/cranelift/codegen/meta/src/cdsl/typevar.rs index 0c0b2e9647..752b458b2a 100644 --- a/cranelift/codegen/meta/src/cdsl/typevar.rs +++ b/cranelift/codegen/meta/src/cdsl/typevar.rs @@ -211,6 +211,24 @@ impl TypeVar { "can't double 256 lanes" ); } + DerivedFunc::MergeLanes => { + assert!( + ts.ints.is_empty() || *ts.ints.iter().max().unwrap() < MAX_BITS, + "can't double all integer types" + ); + assert!( + ts.floats.is_empty() || *ts.floats.iter().max().unwrap() < MAX_FLOAT_BITS, + "can't double all float types" + ); + assert!( + ts.bools.is_empty() || *ts.bools.iter().max().unwrap() < MAX_BITS, + "can't double all boolean types" + ); + assert!( + *ts.lanes.iter().min().unwrap() > 1, + "can't halve a scalar type" + ); + } DerivedFunc::LaneOf | DerivedFunc::AsBool => { /* no particular assertions */ } } @@ -248,6 +266,9 @@ impl TypeVar { pub fn split_lanes(&self) -> TypeVar { self.derived(DerivedFunc::SplitLanes) } + pub fn merge_lanes(&self) -> TypeVar { + self.derived(DerivedFunc::MergeLanes) + } /// Constrain the range of types this variable can assume to a subset of those in the typeset /// ts. @@ -355,6 +376,7 @@ pub(crate) enum DerivedFunc { HalfVector, DoubleVector, SplitLanes, + MergeLanes, } impl DerivedFunc { @@ -367,6 +389,7 @@ impl DerivedFunc { DerivedFunc::HalfVector => "half_vector", DerivedFunc::DoubleVector => "double_vector", DerivedFunc::SplitLanes => "split_lanes", + DerivedFunc::MergeLanes => "merge_lanes", } } @@ -377,6 +400,8 @@ impl DerivedFunc { DerivedFunc::DoubleWidth => Some(DerivedFunc::HalfWidth), DerivedFunc::HalfVector => Some(DerivedFunc::DoubleVector), DerivedFunc::DoubleVector => Some(DerivedFunc::HalfVector), + DerivedFunc::MergeLanes => Some(DerivedFunc::SplitLanes), + DerivedFunc::SplitLanes => Some(DerivedFunc::MergeLanes), _ => None, } } @@ -462,6 +487,7 @@ impl TypeSet { DerivedFunc::HalfVector => self.half_vector(), DerivedFunc::DoubleVector => self.double_vector(), DerivedFunc::SplitLanes => self.half_width().double_vector(), + DerivedFunc::MergeLanes => self.double_width().half_vector(), } } @@ -601,7 +627,8 @@ impl TypeSet { DerivedFunc::DoubleWidth => self.half_width(), DerivedFunc::HalfVector => self.double_vector(), DerivedFunc::DoubleVector => self.half_vector(), - DerivedFunc::SplitLanes => self.half_vector().double_width(), + DerivedFunc::SplitLanes => self.double_width().half_vector(), + DerivedFunc::MergeLanes => self.half_width().double_vector(), } } diff --git a/cranelift/codegen/src/ir/instructions.rs b/cranelift/codegen/src/ir/instructions.rs index f835bd5f4a..2ba730b687 100644 --- a/cranelift/codegen/src/ir/instructions.rs +++ b/cranelift/codegen/src/ir/instructions.rs @@ -584,6 +584,9 @@ enum OperandConstraint { /// This operand is `ctrlType.split_lanes()`. SplitLanes, + + /// This operand is `ctrlType.merge_lanes()`. + MergeLanes, } impl OperandConstraint { @@ -615,6 +618,11 @@ impl OperandConstraint { .split_lanes() .expect("invalid type for split_lanes"), ), + MergeLanes => Bound( + ctrl_type + .merge_lanes() + .expect("invalid type for merge_lanes"), + ), } } } diff --git a/cranelift/codegen/src/ir/types.rs b/cranelift/codegen/src/ir/types.rs index 319f3ae66f..c669839da5 100644 --- a/cranelift/codegen/src/ir/types.rs +++ b/cranelift/codegen/src/ir/types.rs @@ -284,7 +284,7 @@ impl Type { /// Split the lane width in half and double the number of lanes to maintain the same bit-width. /// - /// If this is a scalar type of n bits, it produces a SIMD vector type of (n/2)x2. + /// If this is a scalar type of `n` bits, it produces a SIMD vector type of `(n/2)x2`. pub fn split_lanes(self) -> Option { match self.half_width() { Some(half_width) => half_width.by(2), @@ -292,6 +292,17 @@ impl Type { } } + /// Merge lanes to half the number of lanes and double the lane width to maintain the same + /// bit-width. + /// + /// If this is a scalar type, it will return `None`. + pub fn merge_lanes(self) -> Option { + match self.double_width() { + Some(double_width) => double_width.half_vector(), + None => None, + } + } + /// Index of this type, for use with hash tables etc. pub fn index(self) -> usize { usize::from(self.0) From fafef7db77e811ab329c08f5edacc78b3a3e3e53 Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Tue, 7 Jul 2020 15:56:02 -0700 Subject: [PATCH 05/11] Add `x86_palignr` instructions This instruction is necessary for implementing `[s|u]widen_high`. --- cranelift/codegen/meta/src/isa/x86/encodings.rs | 10 ++++++++++ .../codegen/meta/src/isa/x86/instructions.rs | 15 +++++++++++++++ cranelift/codegen/meta/src/isa/x86/opcodes.rs | 4 ++++ cranelift/codegen/src/isa/aarch64/lower_inst.rs | 1 + .../isa/x86/simd-conversion-binemit.clif | 10 ++++++---- 5 files changed, 36 insertions(+), 4 deletions(-) diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index 8f4a77d814..a58348d49b 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -1697,6 +1697,7 @@ fn define_simd( let x86_pminu = x86.by_name("x86_pminu"); let x86_pmullq = x86.by_name("x86_pmullq"); let x86_pmuludq = x86.by_name("x86_pmuludq"); + let x86_palignr = x86.by_name("x86_palignr"); let x86_pshufb = x86.by_name("x86_pshufb"); let x86_pshufd = x86.by_name("x86_pshufd"); let x86_psll = x86.by_name("x86_psll"); @@ -1901,6 +1902,8 @@ fn define_simd( rec_fa.opcodes(low), ); } + + // SIMD narrow/widen for (ty, opcodes) in &[(I16, &PACKSSWB), (I32, &PACKSSDW)] { let snarrow = snarrow.bind(vector(*ty, sse_vector_size)); e.enc_both_inferred(snarrow, rec_fa.opcodes(*opcodes)); @@ -1912,6 +1915,13 @@ fn define_simd( let unarrow = unarrow.bind(vector(*ty, sse_vector_size)); e.enc_both_inferred_maybe_isap(unarrow, rec_fa.opcodes(*opcodes), *isap); } + for ty in &[I8, I16, I32, I64] { + e.enc_both_inferred_maybe_isap( + x86_palignr.bind(vector(*ty, sse_vector_size)), + rec_fa_ib.opcodes(&PALIGNR[..]), + Some(use_ssse3_simd), + ); + } // SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8). for from_type in ValueType::all_lane_types().filter(allowed_simd_type) { diff --git a/cranelift/codegen/meta/src/isa/x86/instructions.rs b/cranelift/codegen/meta/src/isa/x86/instructions.rs index 0e48784f23..7acd2e2c50 100644 --- a/cranelift/codegen/meta/src/isa/x86/instructions.rs +++ b/cranelift/codegen/meta/src/isa/x86/instructions.rs @@ -664,6 +664,21 @@ pub(crate) fn define( .operands_out(vec![a]), ); + let c = &Operand::new("c", uimm8) + .with_doc("The number of bytes to shift right; see PALIGNR in Intel manual for details"); + ig.push( + Inst::new( + "x86_palignr", + r#" + Concatenate destination and source operands, extracting a byte-aligned result shifted to + the right by `c`. + "#, + &formats.ternary_imm8, + ) + .operands_in(vec![x, y, c]) + .operands_out(vec![a]), + ); + let i64_t = &TypeVar::new( "i64_t", "A scalar 64bit integer", diff --git a/cranelift/codegen/meta/src/isa/x86/opcodes.rs b/cranelift/codegen/meta/src/isa/x86/opcodes.rs index c357488ddd..25685593a6 100644 --- a/cranelift/codegen/meta/src/isa/x86/opcodes.rs +++ b/cranelift/codegen/meta/src/isa/x86/opcodes.rs @@ -354,6 +354,10 @@ pub static PADDUSB: [u8; 3] = [0x66, 0x0f, 0xdc]; /// Add packed unsigned word integers from xmm2/m128 and xmm1 saturate the results (SSE). pub static PADDUSW: [u8; 3] = [0x66, 0x0f, 0xdd]; +/// Concatenate destination and source operands, extract a byte-aligned result into xmm1 that is +/// shifted to the right by the constant number of bytes in imm8 (SSSE3). +pub static PALIGNR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0f]; + /// Bitwise AND of xmm2/m128 and xmm1 (SSE2). pub static PAND: [u8; 3] = [0x66, 0x0f, 0xdb]; diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 80b4518f9f..7fb878c87a 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -2133,6 +2133,7 @@ pub(crate) fn lower_insn_to_regs>( | Opcode::X86Insertps | Opcode::X86Movsd | Opcode::X86Movlhps + | Opcode::X86Palignr | Opcode::X86Psll | Opcode::X86Psrl | Opcode::X86Psra diff --git a/cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif index ae1cdda753..b1a95c52d7 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif @@ -1,5 +1,6 @@ test binemit -target x86_64 +set enable_simd +target x86_64 has_ssse3=true ; Ensure raw_bitcast emits no instructions. function %raw_bitcast_i16x8_to_b32x4() { @@ -10,8 +11,9 @@ block0: return } -function %fcvt_32(i32x4) { -block0(v0: i32x4 [%xmm6]): -[-, %xmm2] v1 = fcvt_from_sint.f32x4 v0 ; bin: 40 0f 5b d6 +function %conversions_i32x4(i32x4, i32x4) { +block0(v0: i32x4 [%xmm6], v1: i32x4 [%xmm4]): +[-, %xmm2] v2 = fcvt_from_sint.f32x4 v0 ; bin: 40 0f 5b d6 +[-, %xmm6] v3 = x86_palignr v0, v1, 3 ; bin: 66 0f 3a 0f f4 03 return } From c8ddf8a34ced624b2c1fbb63bc786059a6387b29 Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Tue, 7 Jul 2020 16:13:50 -0700 Subject: [PATCH 06/11] Encode `[u|s]widen_low` for x86 --- .../codegen/meta/src/isa/x86/encodings.rs | 12 +++ cranelift/codegen/meta/src/isa/x86/opcodes.rs | 4 +- .../codegen/meta/src/shared/instructions.rs | 81 +++++++++++++++++-- .../codegen/src/isa/aarch64/lower_inst.rs | 7 +- .../isa/x86/simd-conversion-binemit.clif | 9 ++- 5 files changed, 103 insertions(+), 10 deletions(-) diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index a58348d49b..da04019a1b 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -1669,6 +1669,7 @@ fn define_simd( let ssub_sat = shared.by_name("ssub_sat"); let store = shared.by_name("store"); let store_complex = shared.by_name("store_complex"); + let swiden_low = shared.by_name("swiden_low"); let uadd_sat = shared.by_name("uadd_sat"); let uload8x8 = shared.by_name("uload8x8"); let uload8x8_complex = shared.by_name("uload8x8_complex"); @@ -1678,6 +1679,7 @@ fn define_simd( let uload32x2_complex = shared.by_name("uload32x2_complex"); let snarrow = shared.by_name("snarrow"); let unarrow = shared.by_name("unarrow"); + let uwiden_low = shared.by_name("uwiden_low"); let ushr_imm = shared.by_name("ushr_imm"); let usub_sat = shared.by_name("usub_sat"); let vconst = shared.by_name("vconst"); @@ -1915,6 +1917,16 @@ fn define_simd( let unarrow = unarrow.bind(vector(*ty, sse_vector_size)); e.enc_both_inferred_maybe_isap(unarrow, rec_fa.opcodes(*opcodes), *isap); } + for (ty, swiden_opcode, uwiden_opcode) in &[ + (I8, &PMOVSXBW[..], &PMOVZXBW[..]), + (I16, &PMOVSXWD[..], &PMOVZXWD[..]), + ] { + let isap = Some(use_sse41_simd); + let swiden_low = swiden_low.bind(vector(*ty, sse_vector_size)); + e.enc_both_inferred_maybe_isap(swiden_low, rec_furm.opcodes(*swiden_opcode), isap); + let uwiden_low = uwiden_low.bind(vector(*ty, sse_vector_size)); + e.enc_both_inferred_maybe_isap(uwiden_low, rec_furm.opcodes(*uwiden_opcode), isap); + } for ty in &[I8, I16, I32, I64] { e.enc_both_inferred_maybe_isap( x86_palignr.bind(vector(*ty, sse_vector_size)), diff --git a/cranelift/codegen/meta/src/isa/x86/opcodes.rs b/cranelift/codegen/meta/src/isa/x86/opcodes.rs index 25685593a6..09c07c458f 100644 --- a/cranelift/codegen/meta/src/isa/x86/opcodes.rs +++ b/cranelift/codegen/meta/src/isa/x86/opcodes.rs @@ -477,7 +477,7 @@ pub static PMOVSXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x20]; pub static PMOVSXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x23]; /// Sign extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit -/// integers in xmm1. +/// integers in xmm1 (SSE4.1). pub static PMOVSXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x25]; /// Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit @@ -489,7 +489,7 @@ pub static PMOVZXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x30]; pub static PMOVZXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x33]; /// Zero extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit -/// integers in xmm1. +/// integers in xmm1 (SSE4.1). pub static PMOVZXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x35]; /// Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index c78787ce82..1c06c4a325 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -3883,9 +3883,9 @@ pub(crate) fn define( .constraints(vec![WiderOrEq(Int.clone(), IntTo.clone())]), ); - let I16xN = &TypeVar::new( - "I16xN", - "A SIMD vector type containing integers 16-bits wide and up", + let I16or32xN = &TypeVar::new( + "I16or32xN", + "A SIMD vector type containing integer lanes 16 or 32 bits wide", TypeSetBuilder::new() .ints(16..32) .simd_lanes(4..8) @@ -3893,9 +3893,9 @@ pub(crate) fn define( .build(), ); - let x = &Operand::new("x", I16xN); - let y = &Operand::new("y", I16xN); - let a = &Operand::new("a", &I16xN.split_lanes()); + let x = &Operand::new("x", I16or32xN); + let y = &Operand::new("y", I16or32xN); + let a = &Operand::new("a", &I16or32xN.split_lanes()); ig.push( Inst::new( @@ -3934,6 +3934,75 @@ pub(crate) fn define( .operands_out(vec![a]), ); + let I8or16xN = &TypeVar::new( + "I8or16xN", + "A SIMD vector type containing integer lanes 8 or 16 bits wide.", + TypeSetBuilder::new() + .ints(8..16) + .simd_lanes(8..16) + .includes_scalars(false) + .build(), + ); + + let x = &Operand::new("x", I8or16xN); + let a = &Operand::new("a", &I8or16xN.merge_lanes()); + + ig.push( + Inst::new( + "swiden_low", + r#" + Widen the low lanes of `x` using signed extension. + + This will double the lane width and halve the number of lanes. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "swiden_high", + r#" + Widen the high lanes of `x` using signed extension. + + This will double the lane width and halve the number of lanes. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "uwiden_low", + r#" + Widen the low lanes of `x` using unsigned extension. + + This will double the lane width and halve the number of lanes. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "uwiden_high", + r#" + Widen the high lanes of `x` using unsigned extension. + + This will double the lane width and halve the number of lanes. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + let IntTo = &TypeVar::new( "IntTo", "A larger integer type with the same number of lanes", diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 7fb878c87a..88751a1478 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -2154,7 +2154,12 @@ pub(crate) fn lower_insn_to_regs>( Opcode::AvgRound => unimplemented!(), Opcode::Iabs => unimplemented!(), - Opcode::Snarrow | Opcode::Unarrow => unimplemented!(), + Opcode::Snarrow + | Opcode::Unarrow + | Opcode::SwidenLow + | Opcode::SwidenHigh + | Opcode::UwidenLow + | Opcode::UwidenHigh => unimplemented!(), Opcode::TlsValue => unimplemented!(), } diff --git a/cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif index b1a95c52d7..72e3412279 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif @@ -1,6 +1,6 @@ test binemit set enable_simd -target x86_64 has_ssse3=true +target x86_64 nehalem ; Ensure raw_bitcast emits no instructions. function %raw_bitcast_i16x8_to_b32x4() { @@ -17,3 +17,10 @@ block0(v0: i32x4 [%xmm6], v1: i32x4 [%xmm4]): [-, %xmm6] v3 = x86_palignr v0, v1, 3 ; bin: 66 0f 3a 0f f4 03 return } + +function %conversions_i16x8(i16x8) { +block0(v0: i16x8 [%xmm6]): +[-, %xmm2] v1 = swiden_low v0 ; bin: 66 0f 38 23 d6 +[-, %xmm11] v2 = uwiden_low v0 ; bin: 66 44 0f 38 33 de + return +} From f0b083c6ad2658d375abb690f711c2c0d41d0745 Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Tue, 7 Jul 2020 16:22:26 -0700 Subject: [PATCH 07/11] Legalize `[u|s]widen_high` for x86 Use `x86_palignr` and `[u|s]widen_low` for legalizing this instruction. --- .../codegen/meta/src/isa/x86/legalize.rs | 25 +++++++++++++++++++ .../isa/x86/simd-conversion-legalize.clif | 16 ++++++++++++ 2 files changed, 41 insertions(+) diff --git a/cranelift/codegen/meta/src/isa/x86/legalize.rs b/cranelift/codegen/meta/src/isa/x86/legalize.rs index 20f87ac265..de78c3b3b7 100644 --- a/cranelift/codegen/meta/src/isa/x86/legalize.rs +++ b/cranelift/codegen/meta/src/isa/x86/legalize.rs @@ -407,13 +407,18 @@ fn define_simd( let umax = insts.by_name("umax"); let umin = insts.by_name("umin"); let snarrow = insts.by_name("snarrow"); + let swiden_high = insts.by_name("swiden_high"); + let swiden_low = insts.by_name("swiden_low"); let ushr_imm = insts.by_name("ushr_imm"); let ushr = insts.by_name("ushr"); + let uwiden_high = insts.by_name("uwiden_high"); + let uwiden_low = insts.by_name("uwiden_low"); let vconst = insts.by_name("vconst"); let vall_true = insts.by_name("vall_true"); let vany_true = insts.by_name("vany_true"); let vselect = insts.by_name("vselect"); + let x86_palignr = x86_instructions.by_name("x86_palignr"); let x86_pmaxs = x86_instructions.by_name("x86_pmaxs"); let x86_pmaxu = x86_instructions.by_name("x86_pmaxu"); let x86_pmins = x86_instructions.by_name("x86_pmins"); @@ -786,6 +791,26 @@ fn define_simd( ); } + // SIMD widen + for ty in &[I8, I16] { + let swiden_high = swiden_high.bind(vector(*ty, sse_vector_size)); + narrow.legalize( + def!(b = swiden_high(a)), + vec![ + def!(c = x86_palignr(a, a, uimm8_eight)), + def!(b = swiden_low(c)), + ], + ); + let uwiden_high = uwiden_high.bind(vector(*ty, sse_vector_size)); + narrow.legalize( + def!(b = uwiden_high(a)), + vec![ + def!(c = x86_palignr(a, a, uimm8_eight)), + def!(b = uwiden_low(c)), + ], + ); + } + narrow.custom_legalize(shuffle, "convert_shuffle"); narrow.custom_legalize(extractlane, "convert_extractlane"); narrow.custom_legalize(insertlane, "convert_insertlane"); diff --git a/cranelift/filetests/filetests/isa/x86/simd-conversion-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-conversion-legalize.clif index ccea16de2c..0115107810 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-conversion-legalize.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-conversion-legalize.clif @@ -52,3 +52,19 @@ block0(v0:f32x4): ; nextln: v1 = iadd v12, v11 return v1 } + +function %uwiden_high(i8x16) -> i16x8 { +block0(v0: i8x16): + v1 = uwiden_high v0 + ; check: v2 = x86_palignr v0, v0, 8 + ; nextln: v1 = uwiden_low v2 + return v1 +} + +function %swiden_high(i16x8) -> i32x4 { +block0(v0: i16x8): + v1 = swiden_high v0 + ; check: v2 = x86_palignr v0, v0, 8 + ; nextln: v1 = swiden_low v2 + return v1 +} From 3576d8c5bb135c8a44ca6595ad041f84be11246f Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Tue, 7 Jul 2020 16:54:18 -0700 Subject: [PATCH 08/11] Translate Wasm's `widen` instructions to Cranelift's `[u|s]widen_[low|high]` --- cranelift/wasm/src/code_translator.rs | 44 ++++++++++++++++++++------- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs index 64556bdddb..79eae5c2a6 100644 --- a/cranelift/wasm/src/code_translator.rs +++ b/cranelift/wasm/src/code_translator.rs @@ -1582,17 +1582,39 @@ pub fn translate_operator( let (a, b) = pop2_with_bitcast(state, I32X4, builder); state.push1(builder.ins().unarrow(a, b)) } - Operator::I16x8WidenLowI8x16S { .. } - | Operator::I16x8WidenHighI8x16S { .. } - | Operator::I16x8WidenLowI8x16U { .. } - | Operator::I16x8WidenHighI8x16U { .. } - | Operator::I32x4WidenLowI16x8S { .. } - | Operator::I32x4WidenHighI16x8S { .. } - | Operator::I32x4WidenLowI16x8U { .. } - | Operator::I32x4WidenHighI16x8U { .. } - | Operator::I8x16Bitmask - | Operator::I16x8Bitmask - | Operator::I32x4Bitmask => { + Operator::I16x8WidenLowI8x16S => { + let a = pop1_with_bitcast(state, I8X16, builder); + state.push1(builder.ins().swiden_low(a)) + } + Operator::I16x8WidenHighI8x16S => { + let a = pop1_with_bitcast(state, I8X16, builder); + state.push1(builder.ins().swiden_high(a)) + } + Operator::I16x8WidenLowI8x16U => { + let a = pop1_with_bitcast(state, I8X16, builder); + state.push1(builder.ins().uwiden_low(a)) + } + Operator::I16x8WidenHighI8x16U => { + let a = pop1_with_bitcast(state, I8X16, builder); + state.push1(builder.ins().uwiden_high(a)) + } + Operator::I32x4WidenLowI16x8S => { + let a = pop1_with_bitcast(state, I16X8, builder); + state.push1(builder.ins().swiden_low(a)) + } + Operator::I32x4WidenHighI16x8S => { + let a = pop1_with_bitcast(state, I16X8, builder); + state.push1(builder.ins().swiden_high(a)) + } + Operator::I32x4WidenLowI16x8U => { + let a = pop1_with_bitcast(state, I16X8, builder); + state.push1(builder.ins().uwiden_low(a)) + } + Operator::I32x4WidenHighI16x8U => { + let a = pop1_with_bitcast(state, I16X8, builder); + state.push1(builder.ins().uwiden_high(a)) + } + Operator::I8x16Bitmask | Operator::I16x8Bitmask | Operator::I32x4Bitmask => { return Err(wasm_unsupported!("proposed SIMD operator {:?}", op)); } From 6a01b32474d765d83cc2cd172629e7061930caa3 Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Tue, 7 Jul 2020 16:56:48 -0700 Subject: [PATCH 09/11] Enable final SIMD spec tests for x86 --- build.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/build.rs b/build.rs index f658c3a6da..95c4b03b63 100644 --- a/build.rs +++ b/build.rs @@ -202,8 +202,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { // to be a big chunk of work to implement them all there! ("simd", _) if target.contains("aarch64") => return true, - ("simd", "simd_conversions") => return true, // FIXME Unsupported feature: proposed SIMD operator I32x4TruncSatF32x4S - // TODO(#1886): Ignore reference types tests if this isn't x64, // because Cranelift only supports reference types on x64. ("reference_types", _) => { From a817470fab2bd2b13ea730830f06d618dd2d2425 Mon Sep 17 00:00:00 2001 From: Yury Delendik Date: Wed, 15 Jul 2020 14:07:36 -0500 Subject: [PATCH 10/11] Fix signature of wasmtime_module_new --- crates/c-api/include/wasmtime.h | 2 +- crates/c-api/src/module.rs | 14 ++++++++------ examples/externref.c | 2 +- examples/fib-debug/main.c | 2 +- examples/gcd.c | 2 +- examples/hello.c | 2 +- examples/hello.cc | 2 +- examples/interrupt.c | 2 +- examples/linking.c | 4 ++-- examples/memory.c | 2 +- examples/multi.c | 2 +- examples/wasi/main.c | 2 +- 12 files changed, 20 insertions(+), 18 deletions(-) diff --git a/crates/c-api/include/wasmtime.h b/crates/c-api/include/wasmtime.h index eb6253b728..351b15433e 100644 --- a/crates/c-api/include/wasmtime.h +++ b/crates/c-api/include/wasmtime.h @@ -740,7 +740,7 @@ WASM_API_EXTERN own wasmtime_error_t *wasmtime_instance_new( * returned error and module are owned by the caller. */ WASM_API_EXTERN own wasmtime_error_t *wasmtime_module_new( - wasm_store_t *store, + wasm_engine_t *engine, const wasm_byte_vec_t *binary, own wasm_module_t **ret ); diff --git a/crates/c-api/src/module.rs b/crates/c-api/src/module.rs index ede8401e2f..7d2f0ff7c4 100644 --- a/crates/c-api/src/module.rs +++ b/crates/c-api/src/module.rs @@ -1,6 +1,6 @@ use crate::{ - handle_result, wasm_byte_vec_t, wasm_exporttype_t, wasm_exporttype_vec_t, wasm_importtype_t, - wasm_importtype_vec_t, wasm_store_t, wasmtime_error_t, + handle_result, wasm_byte_vec_t, wasm_engine_t, wasm_exporttype_t, wasm_exporttype_vec_t, + wasm_importtype_t, wasm_importtype_vec_t, wasm_store_t, wasmtime_error_t, }; use std::ptr; use wasmtime::{Engine, Module}; @@ -29,7 +29,10 @@ pub extern "C" fn wasm_module_new( binary: &wasm_byte_vec_t, ) -> Option> { let mut ret = ptr::null_mut(); - match wasmtime_module_new(store, binary, &mut ret) { + let engine = wasm_engine_t { + engine: store.store.engine().clone(), + }; + match wasmtime_module_new(&engine, binary, &mut ret) { Some(_err) => None, None => { assert!(!ret.is_null()); @@ -40,13 +43,12 @@ pub extern "C" fn wasm_module_new( #[no_mangle] pub extern "C" fn wasmtime_module_new( - store: &wasm_store_t, + engine: &wasm_engine_t, binary: &wasm_byte_vec_t, ret: &mut *mut wasm_module_t, ) -> Option> { let binary = binary.as_slice(); - let store = &store.store; - handle_result(Module::from_binary(store.engine(), binary), |module| { + handle_result(Module::from_binary(&engine.engine, binary), |module| { let imports = module .imports() .map(|i| wasm_importtype_t::new(i.module().to_owned(), i.name().to_owned(), i.ty())) diff --git a/examples/externref.c b/examples/externref.c index 92785a2022..ded28bd072 100644 --- a/examples/externref.c +++ b/examples/externref.c @@ -66,7 +66,7 @@ int main() { // Now that we've got our binary webassembly we can compile our module. printf("Compiling module...\n"); wasm_module_t *module = NULL; - error = wasmtime_module_new(store, &wasm, &module); + error = wasmtime_module_new(engine, &wasm, &module); wasm_byte_vec_delete(&wasm); if (error != NULL) exit_with_error("failed to compile module", error, NULL); diff --git a/examples/fib-debug/main.c b/examples/fib-debug/main.c index e133f8d6ac..a4e22dee3c 100644 --- a/examples/fib-debug/main.c +++ b/examples/fib-debug/main.c @@ -43,7 +43,7 @@ int main(int argc, const char* argv[]) { // Compile. printf("Compiling module...\n"); wasm_module_t *module = NULL; - wasmtime_error_t* error = wasmtime_module_new(store, &binary, &module); + wasmtime_error_t* error = wasmtime_module_new(engine, &binary, &module); if (!module) exit_with_error("failed to compile module", error, NULL); wasm_byte_vec_delete(&binary); diff --git a/examples/gcd.c b/examples/gcd.c index bcbeed0940..285bc1593f 100644 --- a/examples/gcd.c +++ b/examples/gcd.c @@ -59,7 +59,7 @@ int main() { // Compile and instantiate our module wasm_module_t *module = NULL; - error = wasmtime_module_new(store, &wasm, &module); + error = wasmtime_module_new(engine, &wasm, &module); if (module == NULL) exit_with_error("failed to compile module", error, NULL); wasm_byte_vec_delete(&wasm); diff --git a/examples/hello.c b/examples/hello.c index f9d4b5982a..fd268a84de 100644 --- a/examples/hello.c +++ b/examples/hello.c @@ -67,7 +67,7 @@ int main() { // Now that we've got our binary webassembly we can compile our module. printf("Compiling module...\n"); wasm_module_t *module = NULL; - error = wasmtime_module_new(store, &wasm, &module); + error = wasmtime_module_new(engine, &wasm, &module); wasm_byte_vec_delete(&wasm); if (error != NULL) exit_with_error("failed to compile module", error, NULL); diff --git a/examples/hello.cc b/examples/hello.cc index 57cfc5f360..45ac5302ec 100644 --- a/examples/hello.cc +++ b/examples/hello.cc @@ -67,7 +67,7 @@ int main() { // Now that we've got our binary webassembly we can compile our module. printf("Compiling module...\n"); wasm_module_t *module = NULL; - error = wasmtime_module_new(store, &wasm, &module); + error = wasmtime_module_new(engine, &wasm, &module); wasm_byte_vec_delete(&wasm); if (error != NULL) exit_with_error("failed to compile module", error, NULL); diff --git a/examples/interrupt.c b/examples/interrupt.c index 81971b66bc..d2b5a3ac75 100644 --- a/examples/interrupt.c +++ b/examples/interrupt.c @@ -89,7 +89,7 @@ int main() { wasm_module_t *module = NULL; wasm_trap_t *trap = NULL; wasm_instance_t *instance = NULL; - error = wasmtime_module_new(store, &wasm, &module); + error = wasmtime_module_new(engine, &wasm, &module); wasm_byte_vec_delete(&wasm); if (error != NULL) exit_with_error("failed to compile module", error, NULL); diff --git a/examples/linking.c b/examples/linking.c index 7e6a29aed0..bc13f3cb97 100644 --- a/examples/linking.c +++ b/examples/linking.c @@ -45,10 +45,10 @@ int main() { wasmtime_error_t *error; wasm_module_t *linking1_module = NULL; wasm_module_t *linking2_module = NULL; - error = wasmtime_module_new(store, &linking1_wasm, &linking1_module); + error = wasmtime_module_new(engine, &linking1_wasm, &linking1_module); if (error != NULL) exit_with_error("failed to compile linking1", error, NULL); - error = wasmtime_module_new(store, &linking2_wasm, &linking2_module); + error = wasmtime_module_new(engine, &linking2_wasm, &linking2_module); if (error != NULL) exit_with_error("failed to compile linking2", error, NULL); wasm_byte_vec_delete(&linking1_wasm); diff --git a/examples/memory.c b/examples/memory.c index e2be709270..f430fe415b 100644 --- a/examples/memory.c +++ b/examples/memory.c @@ -158,7 +158,7 @@ int main(int argc, const char* argv[]) { // Compile. printf("Compiling module...\n"); wasm_module_t* module = NULL; - error = wasmtime_module_new(store, &binary, &module); + error = wasmtime_module_new(engine, &binary, &module); if (error) exit_with_error("failed to compile module", error, NULL); wasm_byte_vec_delete(&binary); diff --git a/examples/multi.c b/examples/multi.c index 3248ec0215..a56883884f 100644 --- a/examples/multi.c +++ b/examples/multi.c @@ -91,7 +91,7 @@ int main(int argc, const char* argv[]) { // Compile. printf("Compiling module...\n"); wasm_module_t* module = NULL; - error = wasmtime_module_new(store, &binary, &module); + error = wasmtime_module_new(engine, &binary, &module); if (error) exit_with_error("failed to compile module", error, NULL); diff --git a/examples/wasi/main.c b/examples/wasi/main.c index 68a978ccd2..2ad9592f4e 100644 --- a/examples/wasi/main.c +++ b/examples/wasi/main.c @@ -54,7 +54,7 @@ int main() { // Compile our modules wasm_module_t *module = NULL; - wasmtime_error_t *error = wasmtime_module_new(store, &wasm, &module); + wasmtime_error_t *error = wasmtime_module_new(engine, &wasm, &module); if (!module) exit_with_error("failed to compile module", error, NULL); wasm_byte_vec_delete(&wasm); From a9455a8e5188ba70a2831279b5a3968e2c192539 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Wed, 15 Jul 2020 17:55:31 -0700 Subject: [PATCH 11/11] C API tweaks for wasmtime-py (#2029) * wasmtime-c-api: Only drop non-null `*mut wasm_ref_t`s * wasmtime-c-api: Handle null refs in `wasm_val_t` to `Val` conversion * wasmtime-c-api: Don't unwrap and rewrap `Option`s The `unwrap` can panic, and there isn't any point to this unwrap+rewrap. * wasmtime-c-api: Add conversions between `funcref` and `wasm_func_t` * wasmtime-c-api: More ownership documentation for `wasmtime.h` --- crates/c-api/include/wasmtime.h | 36 ++++++++++++++++++++++++++++----- crates/c-api/src/func.rs | 20 +++++++++++++++++- crates/c-api/src/table.rs | 2 +- crates/c-api/src/val.rs | 19 +++++++++++++++-- 4 files changed, 68 insertions(+), 9 deletions(-) diff --git a/crates/c-api/include/wasmtime.h b/crates/c-api/include/wasmtime.h index 351b15433e..972dfb2f8e 100644 --- a/crates/c-api/include/wasmtime.h +++ b/crates/c-api/include/wasmtime.h @@ -515,8 +515,7 @@ typedef own wasm_trap_t* (*wasmtime_func_callback_t)(const wasmtime_caller_t* ca * * This function is the same as #wasm_func_callback_with_env_t except that its * first argument is a #wasmtime_caller_t which allows learning information - * about the - * caller. + * about the caller. */ typedef own wasm_trap_t* (*wasmtime_func_callback_with_env_t)(const wasmtime_caller_t* caller, void* env, const wasm_val_t args[], wasm_val_t results[]); @@ -544,6 +543,28 @@ WASM_API_EXTERN own wasm_func_t* wasmtime_func_new_with_env( void (*finalizer)(void*) ); +/** + * \brief Creates a new `funcref` value referencing `func`. + * + * Create a `funcref` value that references `func` and writes it to `funcrefp`. + * + * Gives ownership fo the `funcref` value written to `funcrefp`. + * + * Both `func` and `funcrefp` must not be NULL. + */ +WASM_API_EXTERN void wasmtime_func_as_funcref(const wasm_func_t* func, wasm_val_t* funcrefp); + +/** + * \brief Get the `wasm_func_t*` referenced by the given `funcref` value. + * + * Gets an owning handle to the `wasm_func_t*` that the given `funcref` value is + * referencing. Returns NULL if the value is not a `funcref`, or if the value is + * a null function reference. + * + * The `val` pointer must not be NULL. + */ +WASM_API_EXTERN own wasm_func_t* wasmtime_funcref_as_func(const wasm_val_t* val); + /** * \brief Loads a #wasm_extern_t from the caller's context * @@ -845,8 +866,10 @@ WASM_API_EXTERN wasmtime_error_t *wasmtime_funcref_table_grow( * This function does not take an associated finalizer to clean up the data when * the reference is reclaimed. If you need a finalizer to clean up the data, * then use #wasmtime_externref_new_with_finalizer. + * + * Gives ownership of the newly created `externref` value. */ -WASM_API_EXTERN void wasmtime_externref_new(void *data, wasm_val_t *valp); +WASM_API_EXTERN void wasmtime_externref_new(own void *data, wasm_val_t *valp); /** * \brief A finalizer for an `externref`'s wrapped data. @@ -866,9 +889,11 @@ typedef void (*wasmtime_externref_finalizer_t)(void*); * When the reference is reclaimed, the wrapped data is cleaned up with the * provided finalizer. If you do not need to clean up the wrapped data, then use * #wasmtime_externref_new. + * + * Gives ownership of the newly created `externref` value. */ WASM_API_EXTERN void wasmtime_externref_new_with_finalizer( - void *data, + own void *data, wasmtime_externref_finalizer_t finalizer, wasm_val_t *valp ); @@ -887,7 +912,8 @@ WASM_API_EXTERN void wasmtime_externref_new_with_finalizer( * If the given value is not an `externref`, returns `false` and leaves `datap` * unmodified. * - * Does not take ownership of `val`. + * Does not take ownership of `val`. Does not give up ownership of the `void*` + * data written to `datap`. * * Both `val` and `datap` must not be `NULL`. */ diff --git a/crates/c-api/src/func.rs b/crates/c-api/src/func.rs index fe494dbbcb..5c63e9782f 100644 --- a/crates/c-api/src/func.rs +++ b/crates/c-api/src/func.rs @@ -6,7 +6,7 @@ use std::mem::MaybeUninit; use std::panic::{self, AssertUnwindSafe}; use std::ptr; use std::str; -use wasmtime::{Caller, Extern, Func, Trap}; +use wasmtime::{Caller, Extern, Func, Trap, Val}; #[derive(Clone)] #[repr(transparent)] @@ -275,3 +275,21 @@ pub extern "C" fn wasmtime_caller_export_get( let which = caller.caller.get_export(name)?; Some(Box::new(wasm_extern_t { which })) } + +#[no_mangle] +pub extern "C" fn wasmtime_func_as_funcref( + func: &wasm_func_t, + funcrefp: &mut MaybeUninit, +) { + let funcref = wasm_val_t::from_val(Val::FuncRef(Some(func.func().clone()))); + crate::initialize(funcrefp, funcref); +} + +#[no_mangle] +pub extern "C" fn wasmtime_funcref_as_func(val: &wasm_val_t) -> Option> { + if let Val::FuncRef(Some(f)) = val.val() { + Some(Box::new(f.into())) + } else { + None + } +} diff --git a/crates/c-api/src/table.rs b/crates/c-api/src/table.rs index c88620da85..6438f4976f 100644 --- a/crates/c-api/src/table.rs +++ b/crates/c-api/src/table.rs @@ -91,7 +91,7 @@ pub extern "C" fn wasm_table_get( index: wasm_table_size_t, ) -> Option> { let val = t.table().get(index)?; - Some(val_into_ref(val).unwrap()) + val_into_ref(val) } #[no_mangle] diff --git a/crates/c-api/src/val.rs b/crates/c-api/src/val.rs index 25754d4ed0..243df313f3 100644 --- a/crates/c-api/src/val.rs +++ b/crates/c-api/src/val.rs @@ -26,7 +26,9 @@ impl Drop for wasm_val_t { fn drop(&mut self) { match into_valtype(self.kind) { ValType::ExternRef => unsafe { - drop(Box::from_raw(self.of.ref_)); + if !self.of.ref_.is_null() { + drop(Box::from_raw(self.of.ref_)); + } }, _ => {} } @@ -116,7 +118,20 @@ impl wasm_val_t { ValType::I64 => Val::from(unsafe { self.of.i64 }), ValType::F32 => Val::from(unsafe { self.of.f32 }), ValType::F64 => Val::from(unsafe { self.of.f64 }), - ValType::ExternRef | ValType::FuncRef => ref_to_val(unsafe { &*self.of.ref_ }), + ValType::ExternRef => unsafe { + if self.of.ref_.is_null() { + Val::ExternRef(None) + } else { + ref_to_val(&*self.of.ref_) + } + }, + ValType::FuncRef => unsafe { + if self.of.ref_.is_null() { + Val::FuncRef(None) + } else { + ref_to_val(&*self.of.ref_) + } + }, _ => unimplemented!("wasm_val_t::val {:?}", self.kind), } }