riscv64: Support non 128bit vector sizes (#6266)

* riscv64: Add `Zvl` extensions

* riscv64: Allow lowering SIMD operations that fit in a vector register

* riscv64: Support non 128bit vector sizes

* riscv64: Add Zvl Presets

* riscv64: Precompute `min_vec_reg_size`
This commit is contained in:
Afonso Bordado
2023-04-25 15:50:00 +01:00
committed by GitHub
parent c7b83e8ef9
commit 4337ccd4b7
12 changed files with 291 additions and 32 deletions

View File

@@ -761,7 +761,25 @@ impl MachInst for Inst {
F32 => Ok((&[RegClass::Float], &[F32])),
F64 => Ok((&[RegClass::Float], &[F64])),
I128 => Ok((&[RegClass::Int, RegClass::Int], &[I64, I64])),
_ if ty.is_vector() && ty.bits() == 128 => Ok((&[RegClass::Float], &[types::I8X16])),
_ if ty.is_vector() => {
debug_assert!(ty.bits() <= 512);
// Here we only need to return a SIMD type with the same size as `ty`.
// We use these types for spills and reloads, so prefer types with lanes <= 31
// since that fits in the immediate field of `vsetivli`.
const SIMD_TYPES: [[Type; 1]; 6] = [
[types::I8X2],
[types::I8X4],
[types::I8X8],
[types::I8X16],
[types::I16X16],
[types::I32X16],
];
let idx = (ty.bytes().ilog2() - 1) as usize;
let ty = &SIMD_TYPES[idx][..];
Ok((&[RegClass::Float], ty))
}
_ => Err(CodegenError::Unsupported(format!(
"Unexpected SSA-value type: {}",
ty

View File

@@ -99,6 +99,14 @@
(if-let $I64 (lane_type ty))
(VecElementWidth.E64))
(decl pure min_vec_reg_size () u64)
(extern constructor min_vec_reg_size min_vec_reg_size)
;; An extractor that matches any type that is known to fit in a single vector
;; register.
(decl ty_vec_fits_in_register (Type) Type)
(extern extractor ty_vec_fits_in_register ty_vec_fits_in_register)
;;;; Instruction Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; As noted in the RISC-V Vector Extension Specification, rs2 is the first

View File

@@ -28,14 +28,14 @@
;;;; Rules for `iadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Base case, simply adding things in registers.
(rule 0 (lower (has_type (fits_in_64 ty) (iadd x y)))
(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (iadd x y)))
(rv_add x y))
;; Special cases for when one operand is an immediate that fits in 12 bits.
(rule 1 (lower (has_type (fits_in_64 ty) (iadd x (imm12_from_value y))))
(rule 1 (lower (has_type (ty_int_ref_scalar_64 ty) (iadd x (imm12_from_value y))))
(alu_rr_imm12 (select_addi ty) x y))
(rule 2 (lower (has_type (fits_in_64 ty) (iadd (imm12_from_value x) y)))
(rule 2 (lower (has_type (ty_int_ref_scalar_64 ty) (iadd (imm12_from_value x) y)))
(alu_rr_imm12 (select_addi ty) y x))
;; Special case when one of the operands is uextended
@@ -98,8 +98,7 @@
(value_regs low high)))
;; SIMD Vectors
(rule 8 (lower (has_type (ty_vec128_int ty) (iadd x y)))
(if-let $true (has_v))
(rule 8 (lower (has_type (ty_vec_fits_in_register ty) (iadd x y)))
(rv_vadd_vv x y ty))
;;; Rules for `uadd_overflow_trap` ;;;;;;;;;;;;;
@@ -815,8 +814,7 @@
(gen_load_128 p offset flags))
(rule 2
(lower (has_type (ty_vec128_int ty) (load flags p @ (value_type (ty_addr64 _)) offset)))
(if-let $true (has_v))
(lower (has_type (ty_vec_fits_in_register ty) (load flags p @ (value_type (ty_addr64 _)) offset)))
(let ((eew VecElementWidth (element_width_from_type ty)))
(vec_load eew (VecAMode.UnitStride (gen_amode p offset $I64)) flags ty)))
@@ -845,8 +843,7 @@
(gen_store_128 p offset flags x))
(rule 2
(lower (store flags x @ (value_type (ty_vec128_int ty)) p @ (value_type (ty_addr64 _)) offset))
(if-let $true (has_v))
(lower (store flags x @ (value_type (ty_vec_fits_in_register ty)) p @ (value_type (ty_addr64 _)) offset))
(let ((eew VecElementWidth (element_width_from_type ty)))
(vec_store eew (VecAMode.UnitStride (gen_amode p offset $I64)) x flags ty)))

View File

@@ -33,23 +33,67 @@ type VecMachLabel = Vec<MachLabel>;
type VecArgPair = Vec<ArgPair>;
use crate::machinst::valueregs;
/// The main entry point for lowering with ISLE.
pub(crate) fn lower(
lower_ctx: &mut Lower<MInst>,
backend: &Riscv64Backend,
inst: Inst,
) -> Option<InstOutput> {
// TODO: reuse the ISLE context across lowerings so we can reuse its
// internal heap allocations.
let mut isle_ctx = IsleContext { lower_ctx, backend };
generated_code::constructor_lower(&mut isle_ctx, inst)
pub(crate) struct RV64IsleContext<'a, 'b, I, B>
where
I: VCodeInst,
B: LowerBackend,
{
pub lower_ctx: &'a mut Lower<'b, I>,
pub backend: &'a B,
/// Precalucated value for the minimum vector register size. Will be 0 if
/// vectors are not supported.
min_vec_reg_size: u64,
}
impl IsleContext<'_, '_, MInst, Riscv64Backend> {
impl<'a, 'b> RV64IsleContext<'a, 'b, MInst, Riscv64Backend> {
isle_prelude_method_helpers!(Riscv64ABICaller);
fn new(lower_ctx: &'a mut Lower<'b, MInst>, backend: &'a Riscv64Backend) -> Self {
Self {
lower_ctx,
backend,
min_vec_reg_size: Self::compute_min_vec_reg_size(backend),
}
}
fn compute_min_vec_reg_size(backend: &Riscv64Backend) -> u64 {
let flags = &backend.isa_flags;
let entries = [
(flags.has_zvl65536b(), 65536),
(flags.has_zvl32768b(), 32768),
(flags.has_zvl16384b(), 16384),
(flags.has_zvl8192b(), 8192),
(flags.has_zvl4096b(), 4096),
(flags.has_zvl2048b(), 2048),
(flags.has_zvl1024b(), 1024),
(flags.has_zvl512b(), 512),
(flags.has_zvl256b(), 256),
// In order to claim the Application Profile V extension, a minimum
// register size of 128 is required. i.e. V implies Zvl128b.
(flags.has_v(), 128),
(flags.has_zvl128b(), 128),
(flags.has_zvl64b(), 64),
(flags.has_zvl32b(), 32),
];
for (has_flag, size) in entries.into_iter() {
if has_flag {
return size;
}
}
return 0;
}
#[inline]
fn emit_list(&mut self, list: &SmallInstVec<MInst>) {
for i in list {
self.lower_ctx.emit(i.clone());
}
}
}
impl generated_code::Context for IsleContext<'_, '_, MInst, Riscv64Backend> {
impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend> {
isle_lower_prelude_methods!();
isle_prelude_caller_methods!(Riscv64MachineDeps, Riscv64ABICaller);
@@ -437,17 +481,33 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Riscv64Backend> {
fn vstate_from_type(&mut self, ty: Type) -> VState {
VState::from_type(ty)
}
}
impl IsleContext<'_, '_, MInst, Riscv64Backend> {
fn min_vec_reg_size(&mut self) -> u64 {
self.min_vec_reg_size
}
#[inline]
fn emit_list(&mut self, list: &SmallInstVec<MInst>) {
for i in list {
self.lower_ctx.emit(i.clone());
fn ty_vec_fits_in_register(&mut self, ty: Type) -> Option<Type> {
if ty.is_vector() && (ty.bits() as u64) <= self.min_vec_reg_size() {
Some(ty)
} else {
None
}
}
}
/// The main entry point for lowering with ISLE.
pub(crate) fn lower(
lower_ctx: &mut Lower<MInst>,
backend: &Riscv64Backend,
inst: Inst,
) -> Option<InstOutput> {
// TODO: reuse the ISLE context across lowerings so we can reuse its
// internal heap allocations.
let mut isle_ctx = RV64IsleContext::new(lower_ctx, backend);
generated_code::constructor_lower(&mut isle_ctx, inst)
}
/// The main entry point for branch lowering with ISLE.
pub(crate) fn lower_branch(
lower_ctx: &mut Lower<MInst>,
@@ -457,7 +517,7 @@ pub(crate) fn lower_branch(
) -> Option<()> {
// TODO: reuse the ISLE context across lowerings so we can reuse its
// internal heap allocations.
let mut isle_ctx = IsleContext { lower_ctx, backend };
let mut isle_ctx = RV64IsleContext::new(lower_ctx, backend);
generated_code::constructor_lower_branch(&mut isle_ctx, branch, &targets.to_vec())
}

View File

@@ -303,6 +303,11 @@ macro_rules! isle_common_prelude_methods {
}
}
#[inline]
fn ty_int_ref_scalar_64_extract(&mut self, ty: Type) -> Option<Type> {
self.ty_int_ref_scalar_64(ty)
}
#[inline]
fn ty_32(&mut self, ty: Type) -> Option<Type> {
if ty.bits() == 32 {

View File

@@ -340,10 +340,11 @@
(decl ty_64 (Type) Type)
(extern extractor ty_64 ty_64)
;; A pure constructor that only matches scalar integers, and references that can
;; fit in 64 bits.
;; A pure constructor/extractor that only matches scalar integers, and
;; references that can fit in 64 bits.
(decl pure partial ty_int_ref_scalar_64 (Type) Type)
(extern constructor ty_int_ref_scalar_64 ty_int_ref_scalar_64)
(extern extractor ty_int_ref_scalar_64 ty_int_ref_scalar_64_extract)
;; An extractor that matches 32- and 64-bit types only.
(decl ty_32_or_64 (Type) Type)