riscv64: Implement a few SIMD arithmetic ops (#6268)

* riscv64: Swap order of `VecAluRRR` source registers

These were accidentally reversed from what we declare in the isle emit helper

* riscv64: Add SIMD `isub`

* riscv64: Add SIMD `imul`

* riscv64: Add `{u,s}mulhi`

* riscv64: Add `b{and,or,xor}`

* cranelift: Move `imul.i8x16` runtest to separate file

Looks like x86 does not implement it

* riscv64: Better formatting for `VecAluOpRRR`

* cranelift: Enable x86 SIMD tests with `has_sse41=false`
This commit is contained in:
Afonso Bordado
2023-04-25 17:39:33 +01:00
committed by GitHub
parent 4337ccd4b7
commit 62cbb5045e
25 changed files with 872 additions and 78 deletions

View File

@@ -326,8 +326,8 @@
(VecAluRRR
(op VecAluOpRRR)
(vd WritableReg)
(vs1 Reg)
(vs2 Reg)
(vs1 Reg)
(vstate VState))
(VecSetState

View File

@@ -50,7 +50,7 @@ pub fn encode_valu(
) -> u32 {
let funct6 = funct6 & 0b111111;
let vm = vm & 0b1;
let funct7 = (funct6 << 6) | vm;
let funct7 = (funct6 << 1) | vm;
encode_r_type(opcode, vd, funct3, vs1, vs2, funct7)
}

View File

@@ -214,27 +214,42 @@ impl fmt::Display for VState {
impl VecAluOpRRR {
pub fn opcode(&self) -> u32 {
match self {
VecAluOpRRR::Vadd => 0x57,
}
// Vector Opcode
0x57
}
pub fn funct3(&self) -> u32 {
match self {
VecAluOpRRR::Vadd => 0b000,
// OPIVV
VecAluOpRRR::Vadd
| VecAluOpRRR::Vsub
| VecAluOpRRR::Vand
| VecAluOpRRR::Vor
| VecAluOpRRR::Vxor => 0b000,
// OPIMV
VecAluOpRRR::Vmul | VecAluOpRRR::Vmulh | VecAluOpRRR::Vmulhu => 0b010,
}
}
pub fn funct6(&self) -> u32 {
// See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc
match self {
VecAluOpRRR::Vadd => 0b000000,
VecAluOpRRR::Vsub => 0b000010,
VecAluOpRRR::Vmul => 0b100101,
VecAluOpRRR::Vmulh => 0b100111,
VecAluOpRRR::Vmulhu => 0b100100,
VecAluOpRRR::Vand => 0b001001,
VecAluOpRRR::Vor => 0b001010,
VecAluOpRRR::Vxor => 0b001011,
}
}
}
impl fmt::Display for VecAluOpRRR {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
VecAluOpRRR::Vadd => write!(f, "vadd.vv"),
}
let mut s = format!("{self:?}");
s.make_ascii_lowercase();
s.push_str(".vv");
f.write_str(&s)
}
}

View File

@@ -59,6 +59,13 @@
;; Register to Register ALU Ops
(type VecAluOpRRR (enum
(Vadd)
(Vsub)
(Vmul)
(Vmulh)
(Vmulhu)
(Vand)
(Vor)
(Vxor)
))
@@ -138,3 +145,38 @@
(decl rv_vadd_vv (Reg Reg VState) Reg)
(rule (rv_vadd_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vadd) vs2 vs1 vstate))
;; Helper for emitting the `vsub.vv` instruction.
(decl rv_vsub_vv (Reg Reg VState) Reg)
(rule (rv_vsub_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vsub) vs2 vs1 vstate))
;; Helper for emitting the `vmul.vv` instruction.
(decl rv_vmul_vv (Reg Reg VState) Reg)
(rule (rv_vmul_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vmul) vs2 vs1 vstate))
;; Helper for emitting the `vmulh.vv` instruction.
(decl rv_vmulh_vv (Reg Reg VState) Reg)
(rule (rv_vmulh_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vmulh) vs2 vs1 vstate))
;; Helper for emitting the `vmulhu.vv` instruction.
(decl rv_vmulhu_vv (Reg Reg VState) Reg)
(rule (rv_vmulhu_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vmulhu) vs2 vs1 vstate))
;; Helper for emitting the `vand.vv` instruction.
(decl rv_vand_vv (Reg Reg VState) Reg)
(rule (rv_vand_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vand) vs2 vs1 vstate))
;; Helper for emitting the `vor.vv` instruction.
(decl rv_vor_vv (Reg Reg VState) Reg)
(rule (rv_vor_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vor) vs2 vs1 vstate))
;; Helper for emitting the `vxor.vv` instruction.
(decl rv_vxor_vv (Reg Reg VState) Reg)
(rule (rv_vxor_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vxor) vs2 vs1 vstate))

View File

@@ -112,15 +112,19 @@
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Base case, simply subtracting things in registers.
(rule -2 (lower (has_type (fits_in_64 ty) (isub x y)))
(rule (lower (has_type (ty_int_ref_scalar_64 ty) (isub x y)))
(rv_sub x y))
(rule -1 (lower (has_type (fits_in_32 ty) (isub x y)))
(rule 1 (lower (has_type (fits_in_32 (ty_int ty)) (isub x y)))
(rv_subw x y))
(rule (lower (has_type $I128 (isub x y)))
(rule 2 (lower (has_type $I128 (isub x y)))
(i128_sub x y))
;; SIMD Vectors
(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (isub x y)))
(rv_vsub_vv x y ty))
;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; `i64` and smaller.
@@ -129,21 +133,14 @@
;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule -2 (lower (has_type (fits_in_64 ty) (imul x y)))
(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (imul x y)))
(rv_mul x y))
(rule -1 (lower (has_type (fits_in_32 ty) (imul x y)))
(rule 1 (lower (has_type (fits_in_32 (ty_int ty)) (imul x y)))
(rv_mulw x y))
;;;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (fits_in_64 ty) (smulhi x y)))
(lower_smlhi ty (ext_int_if_need $true x ty) (ext_int_if_need $true y ty)))
;;;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (fits_in_64 ty) (umulhi x y)))
(lower_umlhi ty (ext_int_if_need $false x ty) (ext_int_if_need $false y ty)))
;; for I128
(rule (lower (has_type $I128 (imul x y)))
(rule 2 (lower (has_type $I128 (imul x y)))
(let
((x_regs ValueRegs x)
(x_lo Reg (value_regs_get x_regs 0))
@@ -169,6 +166,22 @@
(dst_lo Reg (madd x_lo y_lo (zero_reg))))
(value_regs dst_lo dst_hi)))
(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (imul x y)))
(rv_vmul_vv x y ty))
;;;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (smulhi x y)))
(lower_smlhi ty (ext_int_if_need $true x ty) (ext_int_if_need $true y ty)))
(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (smulhi x y)))
(rv_vmulh_vv x y ty))
;;;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (umulhi x y)))
(lower_umlhi ty (ext_int_if_need $false x ty) (ext_int_if_need $false y ty)))
(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (umulhi x y)))
(rv_vmulhu_vv x y ty))
;;;; Rules for `div` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -277,6 +290,10 @@
(value_regs low high)))
(rule 7 (lower (has_type (ty_vec_fits_in_register ty) (band x y)))
(rv_vand_vv x y ty))
;;;; Rules for `or` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule -1 (lower (has_type (ty_int ty) (bor x y)))
(gen_or ty x y))
@@ -320,6 +337,8 @@
(high Reg (rv_orn (value_regs_get x 1) (value_regs_get y 1))))
(value_regs low high)))
(rule 7 (lower (has_type (ty_vec_fits_in_register ty) (bor x y)))
(rv_vor_vv x y ty))
;;;; Rules for `xor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule -1 (lower (has_type (fits_in_64 (ty_int ty)) (bxor x y)))
@@ -341,6 +360,8 @@
(rule (lower (has_type $F64 (bxor x y)))
(lower_float_binary (AluOPRRR.Xor) x y $F64))
(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (bxor x y)))
(rv_vxor_vv x y ty))
;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule -1 (lower (has_type (ty_int ty) (bnot x)))

View File

@@ -0,0 +1,73 @@
test compile precise-output
set unwind_info=false
target riscv64 has_v
function %band_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = band v0, v1
return v2
}
; VCode:
; block0:
; vand.vv v10,v10,v11 #avl=16, #vtype=(e8, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0x57, 0x85, 0xa5, 0x26
; ret
function %band_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = band v0, v1
return v2
}
; VCode:
; block0:
; vand.vv v10,v10,v11 #avl=8, #vtype=(e16, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x84, 0xcc
; .byte 0x57, 0x85, 0xa5, 0x26
; ret
function %band_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = band v0, v1
return v2
}
; VCode:
; block0:
; vand.vv v10,v10,v11 #avl=4, #vtype=(e32, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x02, 0xcd
; .byte 0x57, 0x85, 0xa5, 0x26
; ret
function %band_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = band v0, v1
return v2
}
; VCode:
; block0:
; vand.vv v10,v10,v11 #avl=2, #vtype=(e64, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x81, 0xcd
; .byte 0x57, 0x85, 0xa5, 0x26
; ret

View File

@@ -0,0 +1,73 @@
test compile precise-output
set unwind_info=false
target riscv64 has_v
function %bor_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = bor v0, v1
return v2
}
; VCode:
; block0:
; vor.vv v10,v10,v11 #avl=16, #vtype=(e8, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0x57, 0x85, 0xa5, 0x2a
; ret
function %bor_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = bor v0, v1
return v2
}
; VCode:
; block0:
; vor.vv v10,v10,v11 #avl=8, #vtype=(e16, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x84, 0xcc
; .byte 0x57, 0x85, 0xa5, 0x2a
; ret
function %bor_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = bor v0, v1
return v2
}
; VCode:
; block0:
; vor.vv v10,v10,v11 #avl=4, #vtype=(e32, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x02, 0xcd
; .byte 0x57, 0x85, 0xa5, 0x2a
; ret
function %bor_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = bor v0, v1
return v2
}
; VCode:
; block0:
; vor.vv v10,v10,v11 #avl=2, #vtype=(e64, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x81, 0xcd
; .byte 0x57, 0x85, 0xa5, 0x2a
; ret

View File

@@ -0,0 +1,73 @@
test compile precise-output
set unwind_info=false
target riscv64 has_v
function %bxor_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = bxor v0, v1
return v2
}
; VCode:
; block0:
; vxor.vv v10,v10,v11 #avl=16, #vtype=(e8, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0x57, 0x85, 0xa5, 0x2e
; ret
function %bxor_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = bxor v0, v1
return v2
}
; VCode:
; block0:
; vxor.vv v10,v10,v11 #avl=8, #vtype=(e16, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x84, 0xcc
; .byte 0x57, 0x85, 0xa5, 0x2e
; ret
function %bxor_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = bxor v0, v1
return v2
}
; VCode:
; block0:
; vxor.vv v10,v10,v11 #avl=4, #vtype=(e32, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x02, 0xcd
; .byte 0x57, 0x85, 0xa5, 0x2e
; ret
function %bxor_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = bxor v0, v1
return v2
}
; VCode:
; block0:
; vxor.vv v10,v10,v11 #avl=2, #vtype=(e64, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x81, 0xcd
; .byte 0x57, 0x85, 0xa5, 0x2e
; ret

View File

@@ -12,13 +12,13 @@ block0(v0:i64x4, v1:i64x4):
; VCode:
; block0:
; vadd.vv v10,v11,v10 #avl=4, #vtype=(e64, m1, ta, ma)
; vadd.vv v10,v10,v11 #avl=4, #vtype=(e64, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x82, 0xcd
; .byte 0x57, 0x05, 0xb5, 0x02
; .byte 0x57, 0x85, 0xa5, 0x02
; ret
function %iadd_i64x8(i64x8, i64x8) -> i64x8 {
@@ -29,12 +29,12 @@ block0(v0:i64x8, v1:i64x8):
; VCode:
; block0:
; vadd.vv v10,v11,v10 #avl=8, #vtype=(e64, m1, ta, ma)
; vadd.vv v10,v10,v11 #avl=8, #vtype=(e64, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x84, 0xcd
; .byte 0x57, 0x05, 0xb5, 0x02
; .byte 0x57, 0x85, 0xa5, 0x02
; ret

View File

@@ -11,13 +11,13 @@ block0(v0:i8x8, v1:i8x8):
; VCode:
; block0:
; vadd.vv v10,v11,v10 #avl=8, #vtype=(e8, m1, ta, ma)
; vadd.vv v10,v10,v11 #avl=8, #vtype=(e8, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x04, 0xcc
; .byte 0x57, 0x05, 0xb5, 0x02
; .byte 0x57, 0x85, 0xa5, 0x02
; ret
function %iadd_i16x4(i16x4, i16x4) -> i16x4 {
@@ -28,13 +28,13 @@ block0(v0:i16x4, v1:i16x4):
; VCode:
; block0:
; vadd.vv v10,v11,v10 #avl=4, #vtype=(e16, m1, ta, ma)
; vadd.vv v10,v10,v11 #avl=4, #vtype=(e16, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x82, 0xcc
; .byte 0x57, 0x05, 0xb5, 0x02
; .byte 0x57, 0x85, 0xa5, 0x02
; ret
function %iadd_i32x2(i32x2, i32x2) -> i32x2 {
@@ -45,12 +45,12 @@ block0(v0:i32x2, v1:i32x2):
; VCode:
; block0:
; vadd.vv v10,v11,v10 #avl=2, #vtype=(e32, m1, ta, ma)
; vadd.vv v10,v10,v11 #avl=2, #vtype=(e32, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x01, 0xcd
; .byte 0x57, 0x05, 0xb5, 0x02
; .byte 0x57, 0x85, 0xa5, 0x02
; ret

View File

@@ -11,13 +11,13 @@ block0(v0: i8x16, v1: i8x16):
; VCode:
; block0:
; vadd.vv v10,v11,v10 #avl=16, #vtype=(e8, m1, ta, ma)
; vadd.vv v10,v10,v11 #avl=16, #vtype=(e8, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0x57, 0x05, 0xb5, 0x02
; .byte 0x57, 0x85, 0xa5, 0x02
; ret
function %iadd_i16x8(i16x8, i16x8) -> i16x8 {
@@ -28,13 +28,13 @@ block0(v0: i16x8, v1: i16x8):
; VCode:
; block0:
; vadd.vv v10,v11,v10 #avl=8, #vtype=(e16, m1, ta, ma)
; vadd.vv v10,v10,v11 #avl=8, #vtype=(e16, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x84, 0xcc
; .byte 0x57, 0x05, 0xb5, 0x02
; .byte 0x57, 0x85, 0xa5, 0x02
; ret
function %iadd_i32x4(i32x4, i32x4) -> i32x4 {
@@ -45,13 +45,13 @@ block0(v0: i32x4, v1: i32x4):
; VCode:
; block0:
; vadd.vv v10,v11,v10 #avl=4, #vtype=(e32, m1, ta, ma)
; vadd.vv v10,v10,v11 #avl=4, #vtype=(e32, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x02, 0xcd
; .byte 0x57, 0x05, 0xb5, 0x02
; .byte 0x57, 0x85, 0xa5, 0x02
; ret
function %iadd_i64x2(i64x2, i64x2) -> i64x2 {
@@ -62,12 +62,12 @@ block0(v0: i64x2, v1: i64x2):
; VCode:
; block0:
; vadd.vv v10,v11,v10 #avl=2, #vtype=(e64, m1, ta, ma)
; vadd.vv v10,v10,v11 #avl=2, #vtype=(e64, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x81, 0xcd
; .byte 0x57, 0x05, 0xb5, 0x02
; .byte 0x57, 0x85, 0xa5, 0x02
; ret

View File

@@ -0,0 +1,73 @@
test compile precise-output
set unwind_info=false
target riscv64 has_v
function %imul_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = imul v0, v1
return v2
}
; VCode:
; block0:
; vmul.vv v10,v10,v11 #avl=16, #vtype=(e8, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0x57, 0xa5, 0xa5, 0x96
; ret
function %imul_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = imul v0, v1
return v2
}
; VCode:
; block0:
; vmul.vv v10,v10,v11 #avl=8, #vtype=(e16, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x84, 0xcc
; .byte 0x57, 0xa5, 0xa5, 0x96
; ret
function %imul_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = imul v0, v1
return v2
}
; VCode:
; block0:
; vmul.vv v10,v10,v11 #avl=4, #vtype=(e32, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x02, 0xcd
; .byte 0x57, 0xa5, 0xa5, 0x96
; ret
function %imul_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = imul v0, v1
return v2
}
; VCode:
; block0:
; vmul.vv v10,v10,v11 #avl=2, #vtype=(e64, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x81, 0xcd
; .byte 0x57, 0xa5, 0xa5, 0x96
; ret

View File

@@ -0,0 +1,73 @@
test compile precise-output
set unwind_info=false
target riscv64 has_v
function %isub_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = isub v0, v1
return v2
}
; VCode:
; block0:
; vsub.vv v10,v10,v11 #avl=16, #vtype=(e8, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0x57, 0x85, 0xa5, 0x0a
; ret
function %isub_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = isub v0, v1
return v2
}
; VCode:
; block0:
; vsub.vv v10,v10,v11 #avl=8, #vtype=(e16, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x84, 0xcc
; .byte 0x57, 0x85, 0xa5, 0x0a
; ret
function %isub_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = isub v0, v1
return v2
}
; VCode:
; block0:
; vsub.vv v10,v10,v11 #avl=4, #vtype=(e32, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x02, 0xcd
; .byte 0x57, 0x85, 0xa5, 0x0a
; ret
function %isub_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = isub v0, v1
return v2
}
; VCode:
; block0:
; vsub.vv v10,v10,v11 #avl=2, #vtype=(e64, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x81, 0xcd
; .byte 0x57, 0x85, 0xa5, 0x0a
; ret

View File

@@ -0,0 +1,73 @@
test compile precise-output
set unwind_info=false
target riscv64 has_v
function %smulhi_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = smulhi v0, v1
return v2
}
; VCode:
; block0:
; vmulh.vv v10,v10,v11 #avl=16, #vtype=(e8, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0x57, 0xa5, 0xa5, 0x9e
; ret
function %smulhi_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = smulhi v0, v1
return v2
}
; VCode:
; block0:
; vmulh.vv v10,v10,v11 #avl=8, #vtype=(e16, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x84, 0xcc
; .byte 0x57, 0xa5, 0xa5, 0x9e
; ret
function %smulhi_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = smulhi v0, v1
return v2
}
; VCode:
; block0:
; vmulh.vv v10,v10,v11 #avl=4, #vtype=(e32, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x02, 0xcd
; .byte 0x57, 0xa5, 0xa5, 0x9e
; ret
function %smulhi_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = smulhi v0, v1
return v2
}
; VCode:
; block0:
; vmulh.vv v10,v10,v11 #avl=2, #vtype=(e64, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x81, 0xcd
; .byte 0x57, 0xa5, 0xa5, 0x9e
; ret

View File

@@ -0,0 +1,73 @@
test compile precise-output
set unwind_info=false
target riscv64 has_v
function %umulhi_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = umulhi v0, v1
return v2
}
; VCode:
; block0:
; vmulhu.vv v10,v10,v11 #avl=16, #vtype=(e8, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0x57, 0xa5, 0xa5, 0x92
; ret
function %umulhi_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = umulhi v0, v1
return v2
}
; VCode:
; block0:
; vmulhu.vv v10,v10,v11 #avl=8, #vtype=(e16, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x84, 0xcc
; .byte 0x57, 0xa5, 0xa5, 0x92
; ret
function %umulhi_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = umulhi v0, v1
return v2
}
; VCode:
; block0:
; vmulhu.vv v10,v10,v11 #avl=4, #vtype=(e32, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x02, 0xcd
; .byte 0x57, 0xa5, 0xa5, 0x92
; ret
function %umulhi_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = umulhi v0, v1
return v2
}
; VCode:
; block0:
; vmulhu.vv v10,v10,v11 #avl=2, #vtype=(e64, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x81, 0xcd
; .byte 0x57, 0xa5, 0xa5, 0x92
; ret

View File

@@ -45,24 +45,24 @@ block2(v6: i8x16, v7: i8x16):
; VCode:
; block0:
; vadd.vv v5,v11,v10 #avl=16, #vtype=(e8, m1, ta, ma)
; vadd.vv v5,v10,v11 #avl=16, #vtype=(e8, m1, ta, ma)
; j label1
; block1:
; vadd.vv v6,v5,v11 #avl=16, #vtype=(e8, m1, ta, ma)
; vadd.vv v6,v11,v5 #avl=16, #vtype=(e8, m1, ta, ma)
; j label2
; block2:
; vadd.vv v10,v6,v5 #avl=16, #vtype=(e8, m1, ta, ma)
; vadd.vv v10,v5,v6 #avl=16, #vtype=(e8, m1, ta, ma)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0xd7, 0x02, 0xb5, 0x02
; .byte 0xd7, 0x82, 0xa5, 0x02
; block1: ; offset 0x8
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0x57, 0x83, 0x55, 0x02
; .byte 0x57, 0x83, 0xb2, 0x02
; block2: ; offset 0x10
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0x57, 0x85, 0x62, 0x02
; .byte 0x57, 0x05, 0x53, 0x02
; ret

View File

@@ -8,37 +8,6 @@ target x86_64
target x86_64 skylake
function %isub_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = isub v0, v1
return v2
}
; run: %isub_i32x4([1 1 1 1], [1 2 3 4]) == [0 -1 -2 -3]
function %imul_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0: i64x2, v1: i64x2):
v2 = imul v0, v1
return v2
}
; run: %imul_i64x2([0 2], [0 2]) == [0 4]
function %imul_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = imul v0, v1
return v2
}
; run: %imul_i32x4([-1 0 1 0x80_00_00_01], [2 2 2 2]) == [-2 0 2 2]
; Note above how bits are truncated: 0x80_00_00_01 * 2 == 0x1_00_00_00_02, but
; the leading 1 is dropped.
function %imul_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0: i16x8, v1: i16x8):
v2 = imul v0, v1
return v2
}
; run: %imul_i16x8([-1 0 1 0x7f_ff 0 0 0 0], [2 2 2 2 0 0 0 0]) == [-2 0 2 0xff_fe 0 0 0 0]
function %sadd_sat_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = sadd_sat v0, v1

View File

@@ -0,0 +1,47 @@
test interpret
test run
target aarch64
target s390x
target x86_64 has_sse41=false
set enable_simd
target x86_64
target x86_64 skylake
target riscv64 has_v
function %band_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0:i8x16, v1:i8x16):
v2 = band v0, v1
return v2
}
; run: %band_i8x16([0xFE 0xDC 0xBA 0x98 0x76 0x54 0x32 0x10 0x01 0x23 0x45 0x67 0x89 0xAB 0xCD 0xEF], [0x01 0x23 0x45 0x67 0x89 0xAB 0xCD 0xEF 0xFE 0xDC 0xBA 0x98 0x76 0x54 0x32 0x10]) == [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
; run: %band_i8x16([0xFE 0xEE 0xFF 0xFF 0xFE 0xEE 0xFF 0xFF 0xF1 0xFF 0xFE 0xFE 0xF1 0xFF 0xFE 0xFE], [0xDF 0xDB 0xFF 0xFF 0xDF 0xDB 0xFF 0xFF 0xCE 0xFF 0xEF 0xEF 0xCE 0xFF 0xEF 0xEF]) == [0xDE 0xCA 0xFF 0xFF 0xDE 0xCA 0xFF 0xFF 0xC0 0xFF 0xEE 0xEE 0xC0 0xFF 0xEE 0xEE]
function %band_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0:i16x8, v1:i16x8):
v2 = band v0, v1
return v2
}
; run: %band_i16x8([0xFEDC 0xBA98 0x7654 0x3210 0x0123 0x4567 0x89AB 0xCDEF], [0x0123 0x4567 0x89AB 0xCDEF 0xFEDC 0xBA98 0x7654 0x3210]) == [0 0 0 0 0 0 0 0]
; run: %band_i16x8([0xFEEE 0xFFFF 0xFEEE 0xFFFF 0xF1FF 0xFEFE 0xF1FF 0xFEFE], [0xDFDB 0xFFFF 0xDFDB 0xFFFF 0xCEFF 0xEFEF 0xCEFF 0xEFEF]) == [0xDECA 0xFFFF 0xDECA 0xFFFF 0xC0FF 0xEEEE 0xC0FF 0xEEEE]
function %band_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0:i32x4, v1:i32x4):
v2 = band v0, v1
return v2
}
; run: %band_i32x4([0xFEDCBA98 0x76543210 0x01234567 0x89ABCDEF], [0x01234567 0x89ABCDEF 0xFEDCBA98 0x76543210]) == [0 0 0 0]
; run: %band_i32x4([0xFEEEFFFF 0xFEEEFFFF 0xF1FFFEFE 0xF1FFFEFE], [0xDFDBFFFF 0xDFDBFFFF 0xCEFFEFEF 0xCEFFEFEF]) == [0xDECAFFFF 0xDECAFFFF 0xC0FFEEEE 0xC0FFEEEE]
function %band_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0:i64x2, v1:i64x2):
v2 = band v0, v1
return v2
}
; run: %band_i64x2([0xFEDCBA9876543210 0x0123456789ABCDEF], [0x0123456789ABCDEF 0xFEDCBA9876543210]) == [0 0]
; run: %band_i64x2([0xFEEEFFFFFEEEFFFF 0xF1FFFEFEF1FFFEFE], [0xDFDBFFFFDFDBFFFF 0xCEFFEFEFCEFFEFEF]) == [0xDECAFFFFDECAFFFF 0xC0FFEEEEC0FFEEEE]

View File

@@ -0,0 +1,45 @@
test interpret
test run
target aarch64
target s390x
target x86_64 has_sse41=false
set enable_simd
target x86_64
target x86_64 skylake
target riscv64 has_v
function %bor_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0:i8x16, v1:i8x16):
v2 = bor v0, v1
return v2
}
; run: %bor_i8x16([0xFE 0xDC 0xBA 0x98 0x76 0x54 0x32 0x10 0x01 0x23 0x45 0x67 0x89 0xAB 0xCD 0xEF], [0x01 0x23 0x45 0x67 0x89 0xAB 0xCD 0xEF 0xFE 0xDC 0xBA 0x98 0x76 0x54 0x32 0x10]) == [0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF]
; run: %bor_i8x16([0x8A 0x8A 0xAA 0xAA 0x8A 0x8A 0xAA 0xAA 0x8A 0x8A 0xAA 0xAA 0x8A 0x8A 0xAA 0xAA], [0x54 0x40 0x55 0x55 0x54 0x40 0x55 0x55 0x54 0x40 0x55 0x55 0x54 0x40 0x55 0x55]) == [0xDE 0xCA 0xFF 0xFF 0xDE 0xCA 0xFF 0xFF 0xDE 0xCA 0xFF 0xFF 0xDE 0xCA 0xFF 0xFF]
function %bor_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0:i16x8, v1:i16x8):
v2 = bor v0, v1
return v2
}
; run: %bor_i16x8([0xFEDC 0xBA98 0x7654 0x3210 0x0123 0x4567 0x89AB 0xCDEF], [0x0123 0x4567 0x89AB 0xCDEF 0xFEDC 0xBA98 0x7654 0x3210]) == [0xFFFF 0xFFFF 0xFFFF 0xFFFF 0xFFFF 0xFFFF 0xFFFF 0xFFFF]
; run: %bor_i16x8([0x8A8A 0xAAAA 0x8A8A 0xAAAA 0x8A8A 0xAAAA 0x8A8A 0xAAAA], [0x5440 0x5555 0x5440 0x5555 0x5440 0x5555 0x5440 0x5555]) == [0xDECA 0xFFFF 0xDECA 0xFFFF 0xDECA 0xFFFF 0xDECA 0xFFFF]
function %bor_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0:i32x4, v1:i32x4):
v2 = bor v0, v1
return v2
}
; run: %bor_i32x4([0xFEDCBA98 0x76543210 0x01234567 0x89ABCDEF], [0x01234567 0x89ABCDEF 0xFEDCBA98 0x76543210]) == [0xFFFFFFFF 0xFFFFFFFF 0xFFFFFFFF 0xFFFFFFFF]
; run: %bor_i32x4([0x8A8AAAAA 0x8A8AAAAA 0x8A8AAAAA 0x8A8AAAAA], [0x54405555 0x54405555 0x54405555 0x54405555]) == [0xDECAFFFF 0xDECAFFFF 0xDECAFFFF 0xDECAFFFF]
function %bor_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0:i64x2, v1:i64x2):
v2 = bor v0, v1
return v2
}
; run: %bor_i64x2([0xFEDCBA9876543210 0x0123456789ABCDEF], [0x0123456789ABCDEF 0xFEDCBA9876543210]) == [0xFFFFFFFFFFFFFFFF 0xFFFFFFFFFFFFFFFF]
; run: %bor_i64x2([0x8A8AAAAA8A8AAAAA 0x8A8AAAAA8A8AAAAA], [0x5440555554405555 0x5440555554405555]) == [0xDECAFFFFDECAFFFF 0xDECAFFFFDECAFFFF]

View File

@@ -0,0 +1,45 @@
test interpret
test run
target aarch64
target s390x
target x86_64 has_sse41=false
set enable_simd
target x86_64
target x86_64 skylake
target riscv64 has_v
function %bxor_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0:i8x16, v1:i8x16):
v2 = bxor v0, v1
return v2
}
; run: %bxor_i8x16([0xFE 0xDC 0xBA 0x98 0x76 0x54 0x32 0x10 0x01 0x23 0x45 0x67 0x89 0xAB 0xCD 0xEF], [0x01 0x23 0x45 0x67 0x89 0xAB 0xCD 0xEF 0xFE 0xDC 0xBA 0x98 0x76 0x54 0x32 0x10]) == [0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF]
; run: %bxor_i8x16([0x94 0x40 0xA0 0x7D 0x94 0x40 0xA0 0x7D 0x94 0x40 0xA0 0x7D 0x94 0x40 0xA0 0x7D], [0x4A 0x8A 0x5F 0x82 0x4A 0x8A 0x5F 0x82 0x4A 0x8A 0x5F 0x82 0x4A 0x8A 0x5F 0x82]) == [0xDE 0xCA 0xFF 0xFF 0xDE 0xCA 0xFF 0xFF 0xDE 0xCA 0xFF 0xFF 0xDE 0xCA 0xFF 0xFF]
function %bxor_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0:i16x8, v1:i16x8):
v2 = bxor v0, v1
return v2
}
; run: %bxor_i16x8([0xFEDC 0xBA98 0x7654 0x3210 0x0123 0x4567 0x89AB 0xCDEF], [0x0123 0x4567 0x89AB 0xCDEF 0xFEDC 0xBA98 0x7654 0x3210]) == [0xFFFF 0xFFFF 0xFFFF 0xFFFF 0xFFFF 0xFFFF 0xFFFF 0xFFFF]
; run: %bxor_i16x8([0x9440 0xA07D 0x9440 0xA07D 0x9440 0xA07D 0x9440 0xA07D], [0x4A8A 0x5F82 0x4A8A 0x5F82 0x4A8A 0x5F82 0x4A8A 0x5F82]) == [0xDECA 0xFFFF 0xDECA 0xFFFF 0xDECA 0xFFFF 0xDECA 0xFFFF]
function %bxor_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0:i32x4, v1:i32x4):
v2 = bxor v0, v1
return v2
}
; run: %bxor_i32x4([0xFEDCBA98 0x76543210 0x01234567 0x89ABCDEF], [0x01234567 0x89ABCDEF 0xFEDCBA98 0x76543210]) == [0xFFFFFFFF 0xFFFFFFFF 0xFFFFFFFF 0xFFFFFFFF]
; run: %bxor_i32x4([0x9440A07D 0x9440A07D 0x9440A07D 0x9440A07D], [0x4A8A5F82 0x4A8A5F82 0x4A8A5F82 0x4A8A5F82]) == [0xDECAFFFF 0xDECAFFFF 0xDECAFFFF 0xDECAFFFF]
function %bxor_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0:i64x2, v1:i64x2):
v2 = bxor v0, v1
return v2
}
; run: %bxor_i64x2([0xFEDCBA9876543210 0x0123456789ABCDEF], [0x0123456789ABCDEF 0xFEDCBA9876543210]) == [0xFFFFFFFFFFFFFFFF 0xFFFFFFFFFFFFFFFF]
; run: %bxor_i64x2([0x9440A07D9440A07D 0x9440A07D9440A07D], [0x4A8A5F824A8A5F82 0x4A8A5F824A8A5F82]) == [0xDECAFFFFDECAFFFF 0xDECAFFFFDECAFFFF]

View File

@@ -0,0 +1,13 @@
test interpret
test run
target aarch64
target s390x
target riscv64 has_v
function %imul_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0:i8x16, v1:i8x16):
v2 = imul v0, v1
return v2
}
; run: %imul_i8x16([1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16], [1 -2 3 -4 5 -6 7 -8 9 -10 11 -12 -13 14 -15 16]) == [1 -4 9 -16 25 -36 49 -64 81 -100 121 112 87 -60 31 0]

View File

@@ -0,0 +1,38 @@
test interpret
test run
target aarch64
target s390x
set enable_simd
target x86_64
target x86_64 skylake
target riscv64 has_v
function %imul_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0:i16x8, v1:i16x8):
v2 = imul v0, v1
return v2
}
; run: %imul_i16x8([1 2 3 4 5 6 7 8], [1 -2 3 -4 5 -6 7 -8]) == [1 -4 9 -16 25 -36 49 -64]
; run: %imul_i16x8([-1 0 1 0x7f_ff 0 0 0 0], [2 2 2 2 0 0 0 0]) == [-2 0 2 0xff_fe 0 0 0 0]
function %imul_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0:i32x4, v1:i32x4):
v2 = imul v0, v1
return v2
}
; run: %imul_i32x4([1 2 3 4], [1 -2 3 -4]) == [1 -4 9 -16]
; run: %imul_i32x4([-1 0 1 0x80_00_00_01], [2 2 2 2]) == [-2 0 2 2]
; Note above how bits are truncated: 0x80_00_00_01 * 2 == 0x1_00_00_00_02, but
; the leading 1 is dropped.
function %imul_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0:i64x2, v1:i64x2):
v2 = imul v0, v1
return v2
}
; run: %imul_i64x2([1 1], [1 2]) == [1 2]
; run: %imul_i64x2([2 2], [-1 5]) == [-2 10]

View File

@@ -0,0 +1,44 @@
test interpret
test run
target aarch64
target s390x
set enable_simd
target x86_64
target x86_64 skylake
target riscv64 has_v
function %isub_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0:i8x16, v1:i8x16):
v2 = isub v0, v1
return v2
}
; run: %isub_i8x16([1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1], [1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]) == [0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 -11 -12 -13 -14 -15]
; run: %isub_i8x16([2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2], [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]) == [3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3]
function %isub_i16x8(i16x8, i16x8) -> i16x8 {
block0(v0:i16x8, v1:i16x8):
v2 = isub v0, v1
return v2
}
; run: %isub_i16x8([1 1 1 1 1 1 1 1], [1 2 3 4 5 6 7 8]) == [0 -1 -2 -3 -4 -5 -6 -7]
; run: %isub_i16x8([2 2 2 2 2 2 2 2], [-1 -1 -1 -1 -1 -1 -1 -1]) == [3 3 3 3 3 3 3 3]
function %isub_i32x4(i32x4, i32x4) -> i32x4 {
block0(v0:i32x4, v1:i32x4):
v2 = isub v0, v1
return v2
}
; run: %isub_i32x4([1 1 1 1], [1 2 3 4]) == [0 -1 -2 -3]
; run: %isub_i32x4([2 2 2 2], [-1 -1 -1 -1]) == [3 3 3 3]
function %isub_i64x2(i64x2, i64x2) -> i64x2 {
block0(v0:i64x2, v1:i64x2):
v2 = isub v0, v1
return v2
}
; run: %isub_i64x2([1 1], [1 2]) == [0 -1]
; run: %isub_i64x2([2 2], [-1 -1]) == [3 3]

View File

@@ -1,4 +1,6 @@
test interpret
test run
target riscv64 has_v
; The AArch64 and x86_64 backends only support scalar values.
function %smulhi_i8x16(i8x16, i8x16) -> i8x16 {

View File

@@ -1,4 +1,6 @@
test interpret
test run
target riscv64 has_v
; x86_64 only supports `i16`, `i32`, and `i64`
function %umulhi_i8(i8, i8) -> i8 {