riscv64: Implement a few SIMD arithmetic ops (#6268)
* riscv64: Swap order of `VecAluRRR` source registers
These were accidentally reversed from what we declare in the isle emit helper
* riscv64: Add SIMD `isub`
* riscv64: Add SIMD `imul`
* riscv64: Add `{u,s}mulhi`
* riscv64: Add `b{and,or,xor}`
* cranelift: Move `imul.i8x16` runtest to separate file
Looks like x86 does not implement it
* riscv64: Better formatting for `VecAluOpRRR`
* cranelift: Enable x86 SIMD tests with `has_sse41=false`
This commit is contained in:
@@ -326,8 +326,8 @@
|
||||
(VecAluRRR
|
||||
(op VecAluOpRRR)
|
||||
(vd WritableReg)
|
||||
(vs1 Reg)
|
||||
(vs2 Reg)
|
||||
(vs1 Reg)
|
||||
(vstate VState))
|
||||
|
||||
(VecSetState
|
||||
|
||||
@@ -50,7 +50,7 @@ pub fn encode_valu(
|
||||
) -> u32 {
|
||||
let funct6 = funct6 & 0b111111;
|
||||
let vm = vm & 0b1;
|
||||
let funct7 = (funct6 << 6) | vm;
|
||||
let funct7 = (funct6 << 1) | vm;
|
||||
encode_r_type(opcode, vd, funct3, vs1, vs2, funct7)
|
||||
}
|
||||
|
||||
|
||||
@@ -214,27 +214,42 @@ impl fmt::Display for VState {
|
||||
|
||||
impl VecAluOpRRR {
|
||||
pub fn opcode(&self) -> u32 {
|
||||
match self {
|
||||
VecAluOpRRR::Vadd => 0x57,
|
||||
}
|
||||
// Vector Opcode
|
||||
0x57
|
||||
}
|
||||
pub fn funct3(&self) -> u32 {
|
||||
match self {
|
||||
VecAluOpRRR::Vadd => 0b000,
|
||||
// OPIVV
|
||||
VecAluOpRRR::Vadd
|
||||
| VecAluOpRRR::Vsub
|
||||
| VecAluOpRRR::Vand
|
||||
| VecAluOpRRR::Vor
|
||||
| VecAluOpRRR::Vxor => 0b000,
|
||||
// OPIMV
|
||||
VecAluOpRRR::Vmul | VecAluOpRRR::Vmulh | VecAluOpRRR::Vmulhu => 0b010,
|
||||
}
|
||||
}
|
||||
pub fn funct6(&self) -> u32 {
|
||||
// See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc
|
||||
match self {
|
||||
VecAluOpRRR::Vadd => 0b000000,
|
||||
VecAluOpRRR::Vsub => 0b000010,
|
||||
VecAluOpRRR::Vmul => 0b100101,
|
||||
VecAluOpRRR::Vmulh => 0b100111,
|
||||
VecAluOpRRR::Vmulhu => 0b100100,
|
||||
VecAluOpRRR::Vand => 0b001001,
|
||||
VecAluOpRRR::Vor => 0b001010,
|
||||
VecAluOpRRR::Vxor => 0b001011,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for VecAluOpRRR {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
VecAluOpRRR::Vadd => write!(f, "vadd.vv"),
|
||||
}
|
||||
let mut s = format!("{self:?}");
|
||||
s.make_ascii_lowercase();
|
||||
s.push_str(".vv");
|
||||
f.write_str(&s)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -59,6 +59,13 @@
|
||||
;; Register to Register ALU Ops
|
||||
(type VecAluOpRRR (enum
|
||||
(Vadd)
|
||||
(Vsub)
|
||||
(Vmul)
|
||||
(Vmulh)
|
||||
(Vmulhu)
|
||||
(Vand)
|
||||
(Vor)
|
||||
(Vxor)
|
||||
))
|
||||
|
||||
|
||||
@@ -138,3 +145,38 @@
|
||||
(decl rv_vadd_vv (Reg Reg VState) Reg)
|
||||
(rule (rv_vadd_vv vs2 vs1 vstate)
|
||||
(vec_alu_rrr (VecAluOpRRR.Vadd) vs2 vs1 vstate))
|
||||
|
||||
;; Helper for emitting the `vsub.vv` instruction.
|
||||
(decl rv_vsub_vv (Reg Reg VState) Reg)
|
||||
(rule (rv_vsub_vv vs2 vs1 vstate)
|
||||
(vec_alu_rrr (VecAluOpRRR.Vsub) vs2 vs1 vstate))
|
||||
|
||||
;; Helper for emitting the `vmul.vv` instruction.
|
||||
(decl rv_vmul_vv (Reg Reg VState) Reg)
|
||||
(rule (rv_vmul_vv vs2 vs1 vstate)
|
||||
(vec_alu_rrr (VecAluOpRRR.Vmul) vs2 vs1 vstate))
|
||||
|
||||
;; Helper for emitting the `vmulh.vv` instruction.
|
||||
(decl rv_vmulh_vv (Reg Reg VState) Reg)
|
||||
(rule (rv_vmulh_vv vs2 vs1 vstate)
|
||||
(vec_alu_rrr (VecAluOpRRR.Vmulh) vs2 vs1 vstate))
|
||||
|
||||
;; Helper for emitting the `vmulhu.vv` instruction.
|
||||
(decl rv_vmulhu_vv (Reg Reg VState) Reg)
|
||||
(rule (rv_vmulhu_vv vs2 vs1 vstate)
|
||||
(vec_alu_rrr (VecAluOpRRR.Vmulhu) vs2 vs1 vstate))
|
||||
|
||||
;; Helper for emitting the `vand.vv` instruction.
|
||||
(decl rv_vand_vv (Reg Reg VState) Reg)
|
||||
(rule (rv_vand_vv vs2 vs1 vstate)
|
||||
(vec_alu_rrr (VecAluOpRRR.Vand) vs2 vs1 vstate))
|
||||
|
||||
;; Helper for emitting the `vor.vv` instruction.
|
||||
(decl rv_vor_vv (Reg Reg VState) Reg)
|
||||
(rule (rv_vor_vv vs2 vs1 vstate)
|
||||
(vec_alu_rrr (VecAluOpRRR.Vor) vs2 vs1 vstate))
|
||||
|
||||
;; Helper for emitting the `vxor.vv` instruction.
|
||||
(decl rv_vxor_vv (Reg Reg VState) Reg)
|
||||
(rule (rv_vxor_vv vs2 vs1 vstate)
|
||||
(vec_alu_rrr (VecAluOpRRR.Vxor) vs2 vs1 vstate))
|
||||
|
||||
@@ -112,15 +112,19 @@
|
||||
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; Base case, simply subtracting things in registers.
|
||||
|
||||
(rule -2 (lower (has_type (fits_in_64 ty) (isub x y)))
|
||||
(rule (lower (has_type (ty_int_ref_scalar_64 ty) (isub x y)))
|
||||
(rv_sub x y))
|
||||
|
||||
(rule -1 (lower (has_type (fits_in_32 ty) (isub x y)))
|
||||
(rule 1 (lower (has_type (fits_in_32 (ty_int ty)) (isub x y)))
|
||||
(rv_subw x y))
|
||||
|
||||
(rule (lower (has_type $I128 (isub x y)))
|
||||
(rule 2 (lower (has_type $I128 (isub x y)))
|
||||
(i128_sub x y))
|
||||
|
||||
;; SIMD Vectors
|
||||
(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (isub x y)))
|
||||
(rv_vsub_vv x y ty))
|
||||
|
||||
;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; `i64` and smaller.
|
||||
@@ -129,21 +133,14 @@
|
||||
|
||||
;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule -2 (lower (has_type (fits_in_64 ty) (imul x y)))
|
||||
(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (imul x y)))
|
||||
(rv_mul x y))
|
||||
(rule -1 (lower (has_type (fits_in_32 ty) (imul x y)))
|
||||
|
||||
(rule 1 (lower (has_type (fits_in_32 (ty_int ty)) (imul x y)))
|
||||
(rv_mulw x y))
|
||||
|
||||
;;;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
(rule (lower (has_type (fits_in_64 ty) (smulhi x y)))
|
||||
(lower_smlhi ty (ext_int_if_need $true x ty) (ext_int_if_need $true y ty)))
|
||||
|
||||
;;;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
(rule (lower (has_type (fits_in_64 ty) (umulhi x y)))
|
||||
(lower_umlhi ty (ext_int_if_need $false x ty) (ext_int_if_need $false y ty)))
|
||||
|
||||
;; for I128
|
||||
(rule (lower (has_type $I128 (imul x y)))
|
||||
(rule 2 (lower (has_type $I128 (imul x y)))
|
||||
(let
|
||||
((x_regs ValueRegs x)
|
||||
(x_lo Reg (value_regs_get x_regs 0))
|
||||
@@ -169,6 +166,22 @@
|
||||
(dst_lo Reg (madd x_lo y_lo (zero_reg))))
|
||||
(value_regs dst_lo dst_hi)))
|
||||
|
||||
(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (imul x y)))
|
||||
(rv_vmul_vv x y ty))
|
||||
|
||||
;;;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (smulhi x y)))
|
||||
(lower_smlhi ty (ext_int_if_need $true x ty) (ext_int_if_need $true y ty)))
|
||||
|
||||
(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (smulhi x y)))
|
||||
(rv_vmulh_vv x y ty))
|
||||
|
||||
;;;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (umulhi x y)))
|
||||
(lower_umlhi ty (ext_int_if_need $false x ty) (ext_int_if_need $false y ty)))
|
||||
|
||||
(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (umulhi x y)))
|
||||
(rv_vmulhu_vv x y ty))
|
||||
|
||||
;;;; Rules for `div` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
@@ -277,6 +290,10 @@
|
||||
(value_regs low high)))
|
||||
|
||||
|
||||
(rule 7 (lower (has_type (ty_vec_fits_in_register ty) (band x y)))
|
||||
(rv_vand_vv x y ty))
|
||||
|
||||
|
||||
;;;; Rules for `or` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
(rule -1 (lower (has_type (ty_int ty) (bor x y)))
|
||||
(gen_or ty x y))
|
||||
@@ -320,6 +337,8 @@
|
||||
(high Reg (rv_orn (value_regs_get x 1) (value_regs_get y 1))))
|
||||
(value_regs low high)))
|
||||
|
||||
(rule 7 (lower (has_type (ty_vec_fits_in_register ty) (bor x y)))
|
||||
(rv_vor_vv x y ty))
|
||||
|
||||
;;;; Rules for `xor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
(rule -1 (lower (has_type (fits_in_64 (ty_int ty)) (bxor x y)))
|
||||
@@ -341,6 +360,8 @@
|
||||
(rule (lower (has_type $F64 (bxor x y)))
|
||||
(lower_float_binary (AluOPRRR.Xor) x y $F64))
|
||||
|
||||
(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (bxor x y)))
|
||||
(rv_vxor_vv x y ty))
|
||||
|
||||
;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
(rule -1 (lower (has_type (ty_int ty) (bnot x)))
|
||||
|
||||
Reference in New Issue
Block a user