riscv64: Implement a few SIMD arithmetic ops (#6268)

* riscv64: Swap order of `VecAluRRR` source registers

These were accidentally reversed from what we declare in the isle emit helper

* riscv64: Add SIMD `isub`

* riscv64: Add SIMD `imul`

* riscv64: Add `{u,s}mulhi`

* riscv64: Add `b{and,or,xor}`

* cranelift: Move `imul.i8x16` runtest to separate file

Looks like x86 does not implement it

* riscv64: Better formatting for `VecAluOpRRR`

* cranelift: Enable x86 SIMD tests with `has_sse41=false`
This commit is contained in:
Afonso Bordado
2023-04-25 17:39:33 +01:00
committed by GitHub
parent 4337ccd4b7
commit 62cbb5045e
25 changed files with 872 additions and 78 deletions

View File

@@ -326,8 +326,8 @@
(VecAluRRR
(op VecAluOpRRR)
(vd WritableReg)
(vs1 Reg)
(vs2 Reg)
(vs1 Reg)
(vstate VState))
(VecSetState

View File

@@ -50,7 +50,7 @@ pub fn encode_valu(
) -> u32 {
let funct6 = funct6 & 0b111111;
let vm = vm & 0b1;
let funct7 = (funct6 << 6) | vm;
let funct7 = (funct6 << 1) | vm;
encode_r_type(opcode, vd, funct3, vs1, vs2, funct7)
}

View File

@@ -214,27 +214,42 @@ impl fmt::Display for VState {
impl VecAluOpRRR {
pub fn opcode(&self) -> u32 {
match self {
VecAluOpRRR::Vadd => 0x57,
}
// Vector Opcode
0x57
}
pub fn funct3(&self) -> u32 {
match self {
VecAluOpRRR::Vadd => 0b000,
// OPIVV
VecAluOpRRR::Vadd
| VecAluOpRRR::Vsub
| VecAluOpRRR::Vand
| VecAluOpRRR::Vor
| VecAluOpRRR::Vxor => 0b000,
// OPIMV
VecAluOpRRR::Vmul | VecAluOpRRR::Vmulh | VecAluOpRRR::Vmulhu => 0b010,
}
}
pub fn funct6(&self) -> u32 {
// See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc
match self {
VecAluOpRRR::Vadd => 0b000000,
VecAluOpRRR::Vsub => 0b000010,
VecAluOpRRR::Vmul => 0b100101,
VecAluOpRRR::Vmulh => 0b100111,
VecAluOpRRR::Vmulhu => 0b100100,
VecAluOpRRR::Vand => 0b001001,
VecAluOpRRR::Vor => 0b001010,
VecAluOpRRR::Vxor => 0b001011,
}
}
}
impl fmt::Display for VecAluOpRRR {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
VecAluOpRRR::Vadd => write!(f, "vadd.vv"),
}
let mut s = format!("{self:?}");
s.make_ascii_lowercase();
s.push_str(".vv");
f.write_str(&s)
}
}

View File

@@ -59,6 +59,13 @@
;; Register to Register ALU Ops
(type VecAluOpRRR (enum
(Vadd)
(Vsub)
(Vmul)
(Vmulh)
(Vmulhu)
(Vand)
(Vor)
(Vxor)
))
@@ -138,3 +145,38 @@
(decl rv_vadd_vv (Reg Reg VState) Reg)
(rule (rv_vadd_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vadd) vs2 vs1 vstate))
;; Helper for emitting the `vsub.vv` instruction.
(decl rv_vsub_vv (Reg Reg VState) Reg)
(rule (rv_vsub_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vsub) vs2 vs1 vstate))
;; Helper for emitting the `vmul.vv` instruction.
(decl rv_vmul_vv (Reg Reg VState) Reg)
(rule (rv_vmul_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vmul) vs2 vs1 vstate))
;; Helper for emitting the `vmulh.vv` instruction.
(decl rv_vmulh_vv (Reg Reg VState) Reg)
(rule (rv_vmulh_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vmulh) vs2 vs1 vstate))
;; Helper for emitting the `vmulhu.vv` instruction.
(decl rv_vmulhu_vv (Reg Reg VState) Reg)
(rule (rv_vmulhu_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vmulhu) vs2 vs1 vstate))
;; Helper for emitting the `vand.vv` instruction.
(decl rv_vand_vv (Reg Reg VState) Reg)
(rule (rv_vand_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vand) vs2 vs1 vstate))
;; Helper for emitting the `vor.vv` instruction.
(decl rv_vor_vv (Reg Reg VState) Reg)
(rule (rv_vor_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vor) vs2 vs1 vstate))
;; Helper for emitting the `vxor.vv` instruction.
(decl rv_vxor_vv (Reg Reg VState) Reg)
(rule (rv_vxor_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vxor) vs2 vs1 vstate))

View File

@@ -112,15 +112,19 @@
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Base case, simply subtracting things in registers.
(rule -2 (lower (has_type (fits_in_64 ty) (isub x y)))
(rule (lower (has_type (ty_int_ref_scalar_64 ty) (isub x y)))
(rv_sub x y))
(rule -1 (lower (has_type (fits_in_32 ty) (isub x y)))
(rule 1 (lower (has_type (fits_in_32 (ty_int ty)) (isub x y)))
(rv_subw x y))
(rule (lower (has_type $I128 (isub x y)))
(rule 2 (lower (has_type $I128 (isub x y)))
(i128_sub x y))
;; SIMD Vectors
(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (isub x y)))
(rv_vsub_vv x y ty))
;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; `i64` and smaller.
@@ -129,21 +133,14 @@
;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule -2 (lower (has_type (fits_in_64 ty) (imul x y)))
(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (imul x y)))
(rv_mul x y))
(rule -1 (lower (has_type (fits_in_32 ty) (imul x y)))
(rule 1 (lower (has_type (fits_in_32 (ty_int ty)) (imul x y)))
(rv_mulw x y))
;;;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (fits_in_64 ty) (smulhi x y)))
(lower_smlhi ty (ext_int_if_need $true x ty) (ext_int_if_need $true y ty)))
;;;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (fits_in_64 ty) (umulhi x y)))
(lower_umlhi ty (ext_int_if_need $false x ty) (ext_int_if_need $false y ty)))
;; for I128
(rule (lower (has_type $I128 (imul x y)))
(rule 2 (lower (has_type $I128 (imul x y)))
(let
((x_regs ValueRegs x)
(x_lo Reg (value_regs_get x_regs 0))
@@ -169,6 +166,22 @@
(dst_lo Reg (madd x_lo y_lo (zero_reg))))
(value_regs dst_lo dst_hi)))
(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (imul x y)))
(rv_vmul_vv x y ty))
;;;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (smulhi x y)))
(lower_smlhi ty (ext_int_if_need $true x ty) (ext_int_if_need $true y ty)))
(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (smulhi x y)))
(rv_vmulh_vv x y ty))
;;;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (umulhi x y)))
(lower_umlhi ty (ext_int_if_need $false x ty) (ext_int_if_need $false y ty)))
(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (umulhi x y)))
(rv_vmulhu_vv x y ty))
;;;; Rules for `div` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -277,6 +290,10 @@
(value_regs low high)))
(rule 7 (lower (has_type (ty_vec_fits_in_register ty) (band x y)))
(rv_vand_vv x y ty))
;;;; Rules for `or` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule -1 (lower (has_type (ty_int ty) (bor x y)))
(gen_or ty x y))
@@ -320,6 +337,8 @@
(high Reg (rv_orn (value_regs_get x 1) (value_regs_get y 1))))
(value_regs low high)))
(rule 7 (lower (has_type (ty_vec_fits_in_register ty) (bor x y)))
(rv_vor_vv x y ty))
;;;; Rules for `xor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule -1 (lower (has_type (fits_in_64 (ty_int ty)) (bxor x y)))
@@ -341,6 +360,8 @@
(rule (lower (has_type $F64 (bxor x y)))
(lower_float_binary (AluOPRRR.Xor) x y $F64))
(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (bxor x y)))
(rv_vxor_vv x y ty))
;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule -1 (lower (has_type (ty_int ty) (bnot x)))