riscv64: Initial SIMD Vector Implementation (#6240)

* riscv64: Remove unused code

* riscv64: Add vector types

* riscv64: Initial Vector ABI Load/Stores

* riscv64: Vector Loads/Stores

* riscv64: Fix `vsetvli` encoding error

* riscv64: Add SIMD `iadd` runtests

* riscv64: Rename `VecSew`

The SEW name is correct, but only for VType. We also use this type
in loads/stores as the Efective Element Width, so the name isn't
quite correct in that case.

* ci: Add V extension to RISC-V QEMU

* riscv64: Misc Cleanups

* riscv64: Check V extension in `load`/`store` for SIMD

* riscv64: Fix `sumop` doc comment

* cranelift: Fix comment typo

* riscv64: Add convert for VType and VecElementWidth

* riscv64: Remove VecElementWidth converter
This commit is contained in:
Afonso Bordado
2023-04-20 22:54:43 +01:00
committed by GitHub
parent 7ad2fe32c9
commit 60e4a00413
22 changed files with 1945 additions and 137 deletions

View File

@@ -329,6 +329,31 @@
(guard_size u32)
(probe_count u32)
(tmp WritableReg))
(VecAluRRR
(op VecAluOpRRR)
(vd WritableReg)
(vs1 Reg)
(vs2 Reg)
(vstate VState))
(VecSetState
(rd WritableReg)
(vstate VState))
(VecLoad
(eew VecElementWidth)
(to WritableReg)
(from VecAMode)
(flags MemFlags)
(vstate VState))
(VecStore
(eew VecElementWidth)
(to VecAMode)
(from Reg)
(flags MemFlags)
(vstate VState))
))
@@ -711,6 +736,9 @@
;; ISA Extension helpers
(decl pure has_v () bool)
(extern constructor has_v has_v)
(decl pure has_zbkb () bool)
(extern constructor has_zbkb has_zbkb)

View File

@@ -1668,55 +1668,6 @@ impl CsrAddress {
}
}
pub(crate) struct VType {
vma: bool,
vta: bool,
vsew: Vsew,
valmul: Vlmul,
}
impl VType {
fn as_u32(self) -> u32 {
self.valmul.as_u32()
| self.vsew.as_u32() << 3
| if self.vta { 1 << 7 } else { 0 }
| if self.vma { 1 << 8 } else { 0 }
}
const fn vill_bit() -> u64 {
1 << 63
}
}
enum Vlmul {
vlmul_1_div_8 = 0b101,
vlmul_1_div_4 = 0b110,
vlmul_1_div_2 = 0b111,
vlmul_1 = 0b000,
vlmul_2 = 0b001,
vlmul_4 = 0b010,
vlmul_8 = 0b011,
}
impl Vlmul {
fn as_u32(self) -> u32 {
self as u32
}
}
enum Vsew {
sew_8 = 0b000,
sew_16 = 0b001,
sew_32 = 0b010,
sew_64 = 0b011,
}
impl Vsew {
fn as_u32(self) -> u32 {
self as u32
}
}
impl CsrOP {
pub(crate) fn op_name(self) -> &'static str {
match self {
@@ -1754,40 +1705,11 @@ impl CsrOP {
if self.need_rs() {
reg_to_gpr_num(rs.unwrap())
} else {
zimm.unwrap().as_u32()
zimm.unwrap().bits()
}
}
}
enum Vxrm {
// round-to-nearest-up (add +0.5 LSB)
rnu = 0b00,
// round-to-nearest-even
rne = 0b01,
//round-down (truncate)
rdn = 0b10,
// round-to-odd (OR bits into LSB, aka "jam")
rod = 0b11,
}
impl Vxrm {
pub(crate) fn as_u32(self) -> u32 {
self as u32
}
}
pub(crate) struct Vcsr {
xvrm: Vxrm,
// Fixed-point accrued saturation flag
vxsat: bool,
}
impl Vcsr {
pub(crate) fn as_u32(self) -> u32 {
return if self.vxsat { 1 } else { 0 } | self.xvrm.as_u32();
}
}
///Atomic Memory ordering.
#[derive(Copy, Clone, Debug)]
pub enum AMO {

View File

@@ -102,6 +102,13 @@ pub(crate) fn reg_to_gpr_num(m: Reg) -> u32 {
u32::try_from(m.to_real_reg().unwrap().hw_enc() & 31).unwrap()
}
#[derive(Clone, Debug, PartialEq, Default)]
pub enum EmitVState {
#[default]
Unknown,
Known(VState),
}
/// State carried between emissions of a sequence of instructions.
#[derive(Default, Clone, Debug)]
pub struct EmitState {
@@ -114,6 +121,9 @@ pub struct EmitState {
/// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and
/// optimized away at compiletime. See [cranelift_control].
ctrl_plane: ControlPlane,
/// Vector State
/// Controls the current state of the vector unit at the emission point.
vstate: EmitVState,
}
impl EmitState {
@@ -141,6 +151,7 @@ impl MachInstEmitState<Inst> for EmitState {
stack_map: None,
cur_srcloc: RelSourceLoc::default(),
ctrl_plane,
vstate: EmitVState::Unknown,
}
}
@@ -159,6 +170,11 @@ impl MachInstEmitState<Inst> for EmitState {
fn take_ctrl_plane(self) -> ControlPlane {
self.ctrl_plane
}
fn on_new_block(&mut self) {
// Reset the vector state.
self.vstate = EmitVState::Unknown;
}
}
impl Inst {
@@ -386,6 +402,80 @@ impl Inst {
}
insts
}
/// Returns Some(VState) if this insturction is expecting a specific vector state
/// before emission.
fn expected_vstate(&self) -> Option<&VState> {
match self {
Inst::Nop0
| Inst::Nop4
| Inst::BrTable { .. }
| Inst::Auipc { .. }
| Inst::Lui { .. }
| Inst::LoadConst32 { .. }
| Inst::LoadConst64 { .. }
| Inst::AluRRR { .. }
| Inst::FpuRRR { .. }
| Inst::AluRRImm12 { .. }
| Inst::Load { .. }
| Inst::Store { .. }
| Inst::Args { .. }
| Inst::Ret { .. }
| Inst::Extend { .. }
| Inst::AjustSp { .. }
| Inst::Call { .. }
| Inst::CallInd { .. }
| Inst::TrapIf { .. }
| Inst::Jal { .. }
| Inst::CondBr { .. }
| Inst::LoadExtName { .. }
| Inst::LoadAddr { .. }
| Inst::VirtualSPOffsetAdj { .. }
| Inst::Mov { .. }
| Inst::MovFromPReg { .. }
| Inst::Fence { .. }
| Inst::FenceI
| Inst::ECall
| Inst::EBreak
| Inst::Udf { .. }
| Inst::FpuRR { .. }
| Inst::FpuRRRR { .. }
| Inst::Jalr { .. }
| Inst::Atomic { .. }
| Inst::Select { .. }
| Inst::AtomicCas { .. }
| Inst::IntSelect { .. }
| Inst::Csr { .. }
| Inst::Icmp { .. }
| Inst::SelectReg { .. }
| Inst::FcvtToInt { .. }
| Inst::RawData { .. }
| Inst::AtomicStore { .. }
| Inst::AtomicLoad { .. }
| Inst::AtomicRmwLoop { .. }
| Inst::TrapIfC { .. }
| Inst::Unwind { .. }
| Inst::DummyUse { .. }
| Inst::FloatRound { .. }
| Inst::FloatSelect { .. }
| Inst::FloatSelectPseudo { .. }
| Inst::Popcnt { .. }
| Inst::Rev8 { .. }
| Inst::Cltz { .. }
| Inst::Brev8 { .. }
| Inst::StackProbeLoop { .. } => None,
// VecSetState does not expect any vstate, rather it updates it.
Inst::VecSetState { .. } => None,
Inst::VecAluRRR { vstate, .. } |
// TODO: Unit-stride loads and stores only need the AVL to be correct, not
// the full vtype. A future optimization could be to decouple these two when
// updating vstate. This would allow us to avoid emitting a VecSetState in
// some cases.
Inst::VecLoad { vstate, .. }
| Inst::VecStore { vstate, .. } => Some(vstate),
}
}
}
impl MachInstEmit for Inst {
@@ -400,6 +490,19 @@ impl MachInstEmit for Inst {
state: &mut EmitState,
) {
let mut allocs = AllocationConsumer::new(allocs);
// Check if we need to update the vector state before emitting this instruction
if let Some(expected) = self.expected_vstate() {
if state.vstate != EmitVState::Known(expected.clone()) {
// Update the vector state.
Inst::VecSetState {
rd: writable_zero_reg(),
vstate: expected.clone(),
}
.emit(&[], sink, emit_info, state);
}
}
// N.B.: we *must* not exceed the "worst-case size" used to compute
// where to insert islands, except when islands are explicitly triggered
// (with an `EmitIsland`). We check this in debug builds. This is `mut`
@@ -530,13 +633,14 @@ impl MachInstEmit for Inst {
(rs1, rs2)
};
let x: u32 = alu_op.op_code()
| reg_to_gpr_num(rd.to_reg()) << 7
| (alu_op.funct3()) << 12
| reg_to_gpr_num(rs1) << 15
| reg_to_gpr_num(rs2) << 20
| alu_op.funct7() << 25;
sink.put4(x);
sink.put4(encode_r_type(
alu_op.op_code(),
rd.to_reg(),
alu_op.funct3(),
rs1,
rs2,
alu_op.funct7(),
));
}
&Inst::AluRRImm12 {
alu_op,
@@ -2695,6 +2799,120 @@ impl MachInstEmit for Inst {
.emit(&[], sink, emit_info, state);
sink.bind_label(label_done, &mut state.ctrl_plane);
}
&Inst::VecAluRRR {
op, vd, vs1, vs2, ..
} => {
let vs1 = allocs.next(vs1);
let vs2 = allocs.next(vs2);
let vd = allocs.next_writable(vd);
// This is the mask bit, we don't yet implement masking, so set it to 1, which means
// masking disabled.
let vm = 1;
sink.put4(encode_valu(
op.opcode(),
vd.to_reg(),
op.funct3(),
vs1,
vs2,
vm,
op.funct6(),
));
}
&Inst::VecSetState { rd, ref vstate } => {
let rd = allocs.next_writable(rd);
sink.put4(encode_vcfg_imm(
0x57,
rd.to_reg(),
vstate.avl.unwrap_static(),
&vstate.vtype,
));
// Update the current vector emit state.
state.vstate = EmitVState::Known(vstate.clone());
}
&Inst::VecLoad {
eew,
to,
ref from,
flags,
..
} => {
let offset = from.get_offset_with_state(state);
let from_reg = allocs.next(from.get_base_register());
let to = allocs.next_writable(to);
// Vector Loads don't support immediate offsets, so we need to load it into a register.
let addr = writable_spilltmp_reg();
LoadConstant::U64(offset as u64)
.load_constant_and_add(addr, from_reg)
.into_iter()
.for_each(|inst| inst.emit(&[], sink, emit_info, state));
let srcloc = state.cur_srcloc();
if !srcloc.is_default() && !flags.notrap() {
// Register the offset at which the actual load instruction starts.
sink.add_trap(TrapCode::HeapOutOfBounds);
}
// This is the mask bit, we don't yet implement masking, so set it to 1, which means
// masking disabled.
let vm = 1;
sink.put4(encode_vmem_load(
0x07,
to.to_reg(),
eew,
addr.to_reg(),
from.lumop(),
vm,
from.mop(),
from.nf(),
));
}
&Inst::VecStore {
eew,
ref to,
from,
flags,
..
} => {
let offset = to.get_offset_with_state(state);
let to_reg = allocs.next(to.get_base_register());
let from = allocs.next(from);
// Vector Stores don't support immediate offsets, so we need to load it into a register.
let addr = writable_spilltmp_reg();
LoadConstant::U64(offset as u64)
.load_constant_and_add(addr, to_reg)
.into_iter()
.for_each(|inst| inst.emit(&[], sink, emit_info, state));
let srcloc = state.cur_srcloc();
if !srcloc.is_default() && !flags.notrap() {
// Register the offset at which the actual load instruction starts.
sink.add_trap(TrapCode::HeapOutOfBounds);
}
// This is the mask bit, we don't yet implement masking, so set it to 1, which means
// masking disabled.
let vm = 1;
sink.put4(encode_vmem_store(
0x27,
from,
eew,
addr.to_reg(),
to.sumop(),
vm,
to.mop(),
to.nf(),
));
}
};
let end_off = sink.cur_offset();
assert!(

View File

@@ -0,0 +1,128 @@
//! Contains the RISC-V instruction encoding logic.
//!
//! These formats are specified in the RISC-V specification in section 2.2.
//! See: https://riscv.org/wp-content/uploads/2017/05/riscv-spec-v2.2.pdf
//!
//! Some instructions especially in extensions have slight variations from
//! the base RISC-V specification.
use super::{UImm5, VType};
use crate::isa::riscv64::inst::reg_to_gpr_num;
use crate::isa::riscv64::lower::isle::generated_code::VecElementWidth;
use crate::Reg;
/// Encode an R-type instruction.
///
/// Layout:
/// 0-------6-7-------11-12------14-15------19-20------24-25-------31
/// | Opcode | rd | funct3 | rs1 | rs2 | funct7 |
pub fn encode_r_type(opcode: u32, rd: Reg, funct3: u32, rs1: Reg, rs2: Reg, funct7: u32) -> u32 {
let mut bits = 0;
bits |= opcode & 0b1111111;
bits |= reg_to_gpr_num(rd) << 7;
bits |= (funct3 & 0b111) << 12;
bits |= reg_to_gpr_num(rs1) << 15;
bits |= reg_to_gpr_num(rs2) << 20;
bits |= (funct7 & 0b1111111) << 25;
bits
}
/// Encodes a Vector ALU instruction.
///
/// Fields:
/// - opcode (7 bits)
/// - vd (5 bits)
/// - funct3 (3 bits)
/// - vs1 (5 bits)
/// - vs2 (5 bits)
/// - vm (1 bit)
/// - funct6 (6 bits)
///
/// See: https://github.com/riscv/riscv-v-spec/blob/master/valu-format.adoc
pub fn encode_valu(
opcode: u32,
vd: Reg,
funct3: u32,
vs1: Reg,
vs2: Reg,
vm: u32,
funct6: u32,
) -> u32 {
let funct6 = funct6 & 0b111111;
let vm = vm & 0b1;
let funct7 = (funct6 << 6) | vm;
encode_r_type(opcode, vd, funct3, vs1, vs2, funct7)
}
/// Encodes a Vector CFG Imm instruction.
///
/// See: https://github.com/riscv/riscv-v-spec/blob/master/vcfg-format.adoc
// TODO: Check if this is any of the known instruction types in the spec.
pub fn encode_vcfg_imm(opcode: u32, rd: Reg, imm: UImm5, vtype: &VType) -> u32 {
let mut bits = 0;
bits |= opcode & 0b1111111;
bits |= reg_to_gpr_num(rd) << 7;
bits |= 0b111 << 12;
bits |= (imm.bits() & 0b11111) << 15;
bits |= (vtype.encode() & 0b1111111111) << 20;
bits |= 0b11 << 30;
bits
}
/// Encodes a Vector Mem Unit Stride Load instruction.
///
/// See: https://github.com/riscv/riscv-v-spec/blob/master/vmem-format.adoc
/// TODO: These instructions share opcode space with LOAD-FP and STORE-FP
pub fn encode_vmem_load(
opcode: u32,
vd: Reg,
width: VecElementWidth,
rs1: Reg,
lumop: u32,
vm: u32,
mop: u32,
nf: u32,
) -> u32 {
// Width is encoded differently to avoid a clash with the FP load/store sizes.
let width = match width {
VecElementWidth::E8 => 0b000,
VecElementWidth::E16 => 0b101,
VecElementWidth::E32 => 0b110,
VecElementWidth::E64 => 0b111,
};
let mut bits = 0;
bits |= opcode & 0b1111111;
bits |= reg_to_gpr_num(vd) << 7;
bits |= width << 12;
bits |= reg_to_gpr_num(rs1) << 15;
bits |= (lumop & 0b11111) << 20;
bits |= (vm & 0b1) << 25;
bits |= (mop & 0b11) << 26;
// The mew bit (inst[28]) when set is expected to be used to encode expanded
// memory sizes of 128 bits and above, but these encodings are currently reserved.
bits |= 0b0 << 28;
bits |= (nf & 0b111) << 29;
bits
}
/// Encodes a Vector Mem Unit Stride Load instruction.
///
/// See: https://github.com/riscv/riscv-v-spec/blob/master/vmem-format.adoc
/// TODO: These instructions share opcode space with LOAD-FP and STORE-FP
pub fn encode_vmem_store(
opcode: u32,
vs3: Reg,
width: VecElementWidth,
rs1: Reg,
sumop: u32,
vm: u32,
mop: u32,
nf: u32,
) -> u32 {
// This is pretty much the same as the load instruction, just
// with different names on the fields.
encode_vmem_load(opcode, vs3, width, rs1, sumop, vm, mop, nf)
}

View File

@@ -99,33 +99,31 @@ impl Display for Imm20 {
}
}
#[derive(Clone, Copy)]
pub struct Uimm5 {
bits: u8,
/// An unsigned 5-bit immediate.
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct UImm5 {
value: u8,
}
impl Uimm5 {
pub fn from_bits(bits: u8) -> Self {
Self { bits }
impl UImm5 {
/// Create an unsigned 5-bit immediate from u8.
pub fn maybe_from_u8(value: u8) -> Option<UImm5> {
if value < 32 {
Some(UImm5 { value })
} else {
None
}
}
/// Create a zero immediate of this format.
pub fn zero() -> Self {
Self { bits: 0 }
}
pub fn as_u32(&self) -> u32 {
(self.bits as u32) & 0b1_1111
/// Bits for encoding.
pub fn bits(&self) -> u32 {
u32::from(self.value)
}
}
impl Debug for Uimm5 {
impl Display for UImm5 {
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
write!(f, "{}", self.bits)
}
}
impl Display for Uimm5 {
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
write!(f, "{}", self.bits)
write!(f, "{}", self.value)
}
}

View File

@@ -4,9 +4,10 @@
#![allow(dead_code)]
#![allow(non_camel_case_types)]
use super::lower::isle::generated_code::{VecAMode, VecElementWidth};
use crate::binemit::{Addend, CodeOffset, Reloc};
pub use crate::ir::condcodes::IntCC;
use crate::ir::types::{F32, F64, I128, I16, I32, I64, I8, R32, R64};
use crate::ir::types::{self, F32, F64, I128, I16, I32, I64, I8, R32, R64};
pub use crate::ir::{ExternalName, MemFlags, Opcode, SourceLoc, Type, ValueLabel};
use crate::isa::CallConv;
@@ -29,6 +30,10 @@ pub mod args;
pub use self::args::*;
pub mod emit;
pub use self::emit::*;
pub mod vector;
pub use self::vector::*;
pub mod encode;
pub use self::encode::*;
pub mod unwind;
use crate::isa::riscv64::abi::Riscv64MachineDeps;
@@ -41,7 +46,7 @@ use std::fmt::{Display, Formatter};
pub(crate) type OptionReg = Option<Reg>;
pub(crate) type OptionImm12 = Option<Imm12>;
pub(crate) type VecBranchTarget = Vec<BranchTarget>;
pub(crate) type OptionUimm5 = Option<Uimm5>;
pub(crate) type OptionUimm5 = Option<UImm5>;
pub(crate) type OptionFloatRoundingMode = Option<FRM>;
pub(crate) type VecU8 = Vec<u8>;
pub(crate) type VecWritableReg = Vec<Writable<Reg>>;
@@ -313,21 +318,41 @@ impl Inst {
/// Generic constructor for a load (zero-extending where appropriate).
pub fn gen_load(into_reg: Writable<Reg>, mem: AMode, ty: Type, flags: MemFlags) -> Inst {
Inst::Load {
rd: into_reg,
op: LoadOP::from_type(ty),
from: mem,
flags,
if ty.is_vector() {
Inst::VecLoad {
eew: VecElementWidth::from_type(ty),
to: into_reg,
from: VecAMode::UnitStride { base: mem },
flags,
vstate: VState::from_type(ty),
}
} else {
Inst::Load {
rd: into_reg,
op: LoadOP::from_type(ty),
from: mem,
flags,
}
}
}
/// Generic constructor for a store.
pub fn gen_store(mem: AMode, from_reg: Reg, ty: Type, flags: MemFlags) -> Inst {
Inst::Store {
src: from_reg,
op: StoreOP::from_type(ty),
to: mem,
flags,
if ty.is_vector() {
Inst::VecStore {
eew: VecElementWidth::from_type(ty),
to: VecAMode::UnitStride { base: mem },
from: from_reg,
flags,
vstate: VState::from_type(ty),
}
} else {
Inst::Store {
src: from_reg,
op: StoreOP::from_type(ty),
to: mem,
flags,
}
}
}
}
@@ -623,6 +648,22 @@ fn riscv64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
// gen_prologue is called at emit stage.
// no need let reg alloc know.
}
&Inst::VecAluRRR { vd, vs1, vs2, .. } => {
collector.reg_use(vs1);
collector.reg_use(vs2);
collector.reg_def(vd);
}
&Inst::VecSetState { rd, .. } => {
collector.reg_def(rd);
}
&Inst::VecLoad { to, ref from, .. } => {
collector.reg_use(from.get_base_register());
collector.reg_def(to);
}
&Inst::VecStore { ref to, from, .. } => {
collector.reg_use(to.get_base_register());
collector.reg_use(from);
}
}
}
@@ -727,6 +768,7 @@ impl MachInst for Inst {
F32 => Ok((&[RegClass::Float], &[F32])),
F64 => Ok((&[RegClass::Float], &[F64])),
I128 => Ok((&[RegClass::Int, RegClass::Int], &[I64, I64])),
_ if ty.is_vector() && ty.bits() == 128 => Ok((&[RegClass::Float], &[types::I8X16])),
_ => Err(CodegenError::Unsupported(format!(
"Unexpected SSA-value type: {}",
ty
@@ -784,6 +826,17 @@ pub fn reg_name(reg: Reg) -> String {
}
}
}
pub fn vec_reg_name(reg: Reg) -> String {
match reg.to_real_reg() {
Some(real) => {
assert_eq!(real.class(), RegClass::Float);
format!("v{}", real.hw_enc())
}
None => {
format!("{:?}", reg)
}
}
}
impl Inst {
fn print_with_state(
@@ -795,6 +848,16 @@ impl Inst {
let reg = allocs.next(reg);
reg_name(reg)
};
let format_vec_reg = |reg: Reg, allocs: &mut AllocationConsumer<'_>| -> String {
let reg = allocs.next(reg);
vec_reg_name(reg)
};
let format_vec_amode = |amode: &VecAMode, allocs: &mut AllocationConsumer<'_>| -> String {
match amode {
VecAMode::UnitStride { base } => base.to_string_with_alloc(allocs),
}
};
let format_regs = |regs: &[Reg], allocs: &mut AllocationConsumer<'_>| -> String {
let mut x = if regs.len() > 1 {
@@ -839,6 +902,7 @@ impl Inst {
"".into()
}
}
match self {
&Inst::Nop0 => {
format!("##zero length nop")
@@ -1501,6 +1565,48 @@ impl Inst {
&MInst::Udf { trap_code } => format!("udf##trap_code={}", trap_code),
&MInst::EBreak {} => String::from("ebreak"),
&MInst::ECall {} => String::from("ecall"),
&Inst::VecAluRRR {
op,
vd,
vs1,
vs2,
ref vstate,
} => {
let vs1_s = format_vec_reg(vs1, allocs);
let vs2_s = format_vec_reg(vs2, allocs);
let vd_s = format_vec_reg(vd.to_reg(), allocs);
// Note: vs2 and vs1 here are opposite to the standard scalar ordering.
// This is noted in Section 10.1 of the RISC-V Vector spec.
format!("{} {},{},{} {}", op, vd_s, vs2_s, vs1_s, vstate)
}
&Inst::VecSetState { rd, ref vstate } => {
let rd_s = format_reg(rd.to_reg(), allocs);
assert!(vstate.avl.is_static());
format!("vsetivli {}, {}, {}", rd_s, vstate.avl, vstate.vtype)
}
Inst::VecLoad {
eew,
to,
from,
ref vstate,
..
} => {
let base = format_vec_amode(from, allocs);
let vd = format_vec_reg(to.to_reg(), allocs);
format!("vl{}.v {},{} {}", eew, vd, base, vstate)
}
Inst::VecStore {
eew,
to,
from,
ref vstate,
..
} => {
let dst = format_vec_amode(to, allocs);
let vs3 = format_vec_reg(*from, allocs);
format!("vs{}.v {},{} {}", eew, vs3, dst, vstate)
}
}
}
}

View File

@@ -218,3 +218,13 @@ pub(crate) fn x_reg_range(start: usize, end: usize) -> Vec<Writable<Reg>> {
}
regs
}
#[inline]
pub fn v_reg(enc: usize) -> Reg {
let p_reg = PReg::new(enc, RegClass::Float);
let v_reg = VReg::new(p_reg.index(), p_reg.class());
Reg::from(v_reg)
}
pub fn vx_reg(enc: usize) -> PReg {
PReg::new(enc, RegClass::Float)
}

View File

@@ -0,0 +1,289 @@
use crate::isa::riscv64::inst::EmitState;
use crate::isa::riscv64::lower::isle::generated_code::{
VecAMode, VecAluOpRRR, VecAvl, VecElementWidth, VecLmul, VecMaskMode, VecTailMode,
};
use crate::Reg;
use core::fmt;
use super::{Type, UImm5};
impl VecAvl {
pub fn _static(size: u32) -> Self {
VecAvl::Static {
size: UImm5::maybe_from_u8(size as u8).expect("Invalid size for AVL"),
}
}
pub fn is_static(&self) -> bool {
match self {
VecAvl::Static { .. } => true,
}
}
pub fn unwrap_static(&self) -> UImm5 {
match self {
VecAvl::Static { size } => *size,
}
}
}
// TODO: Can we tell ISLE to derive this?
impl PartialEq for VecAvl {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(VecAvl::Static { size: lhs }, VecAvl::Static { size: rhs }) => lhs == rhs,
}
}
}
impl fmt::Display for VecAvl {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
VecAvl::Static { size } => write!(f, "{}", size),
}
}
}
impl VecElementWidth {
pub fn from_type(ty: Type) -> Self {
Self::from_bits(ty.lane_bits())
}
pub fn from_bits(bits: u32) -> Self {
match bits {
8 => VecElementWidth::E8,
16 => VecElementWidth::E16,
32 => VecElementWidth::E32,
64 => VecElementWidth::E64,
_ => panic!("Invalid number of bits for VecElementWidth: {}", bits),
}
}
pub fn bits(&self) -> u32 {
match self {
VecElementWidth::E8 => 8,
VecElementWidth::E16 => 16,
VecElementWidth::E32 => 32,
VecElementWidth::E64 => 64,
}
}
pub fn encode(&self) -> u32 {
match self {
VecElementWidth::E8 => 0b000,
VecElementWidth::E16 => 0b001,
VecElementWidth::E32 => 0b010,
VecElementWidth::E64 => 0b011,
}
}
}
impl fmt::Display for VecElementWidth {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "e{}", self.bits())
}
}
impl VecLmul {
pub fn encode(&self) -> u32 {
match self {
VecLmul::LmulF8 => 0b101,
VecLmul::LmulF4 => 0b110,
VecLmul::LmulF2 => 0b111,
VecLmul::Lmul1 => 0b000,
VecLmul::Lmul2 => 0b001,
VecLmul::Lmul4 => 0b010,
VecLmul::Lmul8 => 0b011,
}
}
}
impl fmt::Display for VecLmul {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
VecLmul::LmulF8 => write!(f, "mf8"),
VecLmul::LmulF4 => write!(f, "mf4"),
VecLmul::LmulF2 => write!(f, "mf2"),
VecLmul::Lmul1 => write!(f, "m1"),
VecLmul::Lmul2 => write!(f, "m2"),
VecLmul::Lmul4 => write!(f, "m4"),
VecLmul::Lmul8 => write!(f, "m8"),
}
}
}
impl VecTailMode {
pub fn encode(&self) -> u32 {
match self {
VecTailMode::Agnostic => 1,
VecTailMode::Undisturbed => 0,
}
}
}
impl fmt::Display for VecTailMode {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
VecTailMode::Agnostic => write!(f, "ta"),
VecTailMode::Undisturbed => write!(f, "tu"),
}
}
}
impl VecMaskMode {
pub fn encode(&self) -> u32 {
match self {
VecMaskMode::Agnostic => 1,
VecMaskMode::Undisturbed => 0,
}
}
}
impl fmt::Display for VecMaskMode {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
VecMaskMode::Agnostic => write!(f, "ma"),
VecMaskMode::Undisturbed => write!(f, "mu"),
}
}
}
/// Vector Type (VType)
///
/// vtype provides the default type used to interpret the contents of the vector register file.
#[derive(Clone, Debug, PartialEq)]
pub struct VType {
pub sew: VecElementWidth,
pub lmul: VecLmul,
pub tail_mode: VecTailMode,
pub mask_mode: VecMaskMode,
}
impl VType {
// https://github.com/riscv/riscv-v-spec/blob/master/vtype-format.adoc
pub fn encode(&self) -> u32 {
let mut bits = 0;
bits |= self.lmul.encode();
bits |= self.sew.encode() << 3;
bits |= self.tail_mode.encode() << 6;
bits |= self.mask_mode.encode() << 7;
bits
}
}
impl fmt::Display for VType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"{}, {}, {}, {}",
self.sew, self.lmul, self.tail_mode, self.mask_mode
)
}
}
/// Vector State (VState)
///
/// VState represents the state of the vector unit that each instruction expects before execution.
/// Unlike VType or any of the other types here, VState is not a part of the RISC-V ISA. It is
/// used by our instruction emission code to ensure that the vector unit is in the correct state.
#[derive(Clone, Debug, PartialEq)]
pub struct VState {
pub avl: VecAvl,
pub vtype: VType,
}
impl VState {
pub fn from_type(ty: Type) -> Self {
VState {
avl: VecAvl::_static(ty.lane_count()),
vtype: VType {
sew: VecElementWidth::from_type(ty),
lmul: VecLmul::Lmul1,
tail_mode: VecTailMode::Agnostic,
mask_mode: VecMaskMode::Agnostic,
},
}
}
}
impl fmt::Display for VState {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "#avl={}, #vtype=({})", self.avl, self.vtype)
}
}
impl VecAluOpRRR {
pub fn opcode(&self) -> u32 {
match self {
VecAluOpRRR::Vadd => 0x57,
}
}
pub fn funct3(&self) -> u32 {
match self {
VecAluOpRRR::Vadd => 0b000,
}
}
pub fn funct6(&self) -> u32 {
match self {
VecAluOpRRR::Vadd => 0b000000,
}
}
}
impl fmt::Display for VecAluOpRRR {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
VecAluOpRRR::Vadd => write!(f, "vadd.vv"),
}
}
}
impl VecAMode {
pub fn get_base_register(&self) -> Reg {
match self {
VecAMode::UnitStride { base, .. } => base.get_base_register(),
}
}
pub(crate) fn get_offset_with_state(&self, state: &EmitState) -> i64 {
match self {
VecAMode::UnitStride { base, .. } => base.get_offset_with_state(state),
}
}
/// `mop` field, described in Table 7 of Section 7.2. Vector Load/Store Addressing Modes
/// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#72-vector-loadstore-addressing-modes
pub fn mop(&self) -> u32 {
match self {
VecAMode::UnitStride { .. } => 0b00,
}
}
/// `lumop` field, described in Table 9 of Section 7.2. Vector Load/Store Addressing Modes
/// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#72-vector-loadstore-addressing-modes
pub fn lumop(&self) -> u32 {
match self {
VecAMode::UnitStride { .. } => 0b00000,
}
}
/// `sumop` field, described in Table 10 of Section 7.2. Vector Load/Store Addressing Modes
/// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#72-vector-loadstore-addressing-modes
pub fn sumop(&self) -> u32 {
match self {
VecAMode::UnitStride { .. } => 0b00000,
}
}
/// The `nf[2:0]` field encodes the number of fields in each segment. For regular vector loads and
/// stores, nf=0, indicating that a single value is moved between a vector register group and memory
/// at each element position. Larger values in the nf field are used to access multiple contiguous
/// fields within a segment as described in Section 7.8 Vector Load/Store Segment Instructions.
///
/// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#72-vector-loadstore-addressing-modes
pub fn nf(&self) -> u32 {
match self {
VecAMode::UnitStride { .. } => 0b000,
}
}
}

View File

@@ -0,0 +1,132 @@
;; Represents the possible widths of an element when used in an operation.
(type VecElementWidth (enum
(E8)
(E16)
(E32)
(E64)
))
;; Vector Register Group Multiplier (LMUL)
;;
;; The LMUL setting specifies how we should group registers together. LMUL can
;; also be a fractional value, reducing the number of bits used in a single
;; vector register. Fractional LMUL is used to increase the number of effective
;; usable vector register groups when operating on mixed-width values.
(type VecLmul (enum
(LmulF8)
(LmulF4)
(LmulF2)
(Lmul1)
(Lmul2)
(Lmul4)
(Lmul8)
))
;; Tail Mode
;;
;; The tail mode specifies how the tail elements of a vector register are handled.
(type VecTailMode (enum
;; Tail Agnostic means that the tail elements are left in an undefined state.
(Agnostic)
;; Tail Undisturbed means that the tail elements are left in their original values.
(Undisturbed)
))
;; Mask Mode
;;
;; The mask mode specifies how the masked elements of a vector register are handled.
(type VecMaskMode (enum
;; Mask Agnostic means that the masked out elements are left in an undefined state.
(Agnostic)
;; Mask Undisturbed means that the masked out elements are left in their original values.
(Undisturbed)
))
;; Application Vector Length (AVL)
;;
;; This setting specifies the number of elements that are going to be processed
;; in a single instruction. Note: We may end up processing fewer elements than
;; the AVL setting, if they don't fit in a single register.
(type VecAvl (enum
;; Static AVL emits a `vsetivli` that uses a constant value
(Static (size UImm5))
;; TODO: Add a dynamic, register based AVL mode when we are able to properly test it
))
(type VType (primitive VType))
(type VState (primitive VState))
;; Register to Register ALU Ops
(type VecAluOpRRR (enum
(Vadd)
))
;; Vector Addressing Mode
(type VecAMode (enum
;; Vector unit-stride operations access elements stored contiguously in memory
;; starting from the base effective address.
(UnitStride
(base AMode))
;; TODO: Constant Stride
;; TODO: Indexed Operations
))
;; Builds a static VState matching a SIMD type.
;; The VState is guaranteed to be static with AVL set to the number of lanes.
;; Element size is set to the size of the type.
;; LMUL is set to 1.
;; Tail mode is set to agnostic.
;; Mask mode is set to agnostic.
(decl pure vstate_from_type (Type) VState)
(extern constructor vstate_from_type vstate_from_type)
(convert Type VState vstate_from_type)
;; Extracts an element width from a SIMD type.
(decl pure element_width_from_type (Type) VecElementWidth)
(rule (element_width_from_type ty)
(if-let $I8 (lane_type ty))
(VecElementWidth.E8))
(rule (element_width_from_type ty)
(if-let $I16 (lane_type ty))
(VecElementWidth.E16))
(rule (element_width_from_type ty)
(if-let $I32 (lane_type ty))
(VecElementWidth.E32))
(rule (element_width_from_type ty)
(if-let $I64 (lane_type ty))
(VecElementWidth.E64))
;;;; Instruction Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; As noted in the RISC-V Vector Extension Specification, rs2 is the first
;; source register and rs1 is the second source register. This is the opposite
;; of the usual RISC-V register order.
;; See Section 10.1 of the RISC-V Vector Extension Specification.
;; Helper for emitting `MInst.VecAluRRR` instructions.
(decl vec_alu_rrr (VecAluOpRRR Reg Reg VState) Reg)
(rule (vec_alu_rrr op vs2 vs1 vstate)
(let ((vd WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.VecAluRRR op vd vs2 vs1 vstate))))
vd))
;; Helper for emitting `MInst.VecLoad` instructions.
(decl vec_load (VecElementWidth VecAMode MemFlags VState) Reg)
(rule (vec_load eew from flags vstate)
(let ((vd WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.VecLoad eew vd from flags vstate))))
vd))
;; Helper for emitting `MInst.VecStore` instructions.
(decl vec_store (VecElementWidth VecAMode Reg MemFlags VState) InstOutput)
(rule (vec_store eew to from flags vstate)
(side_effect
(SideEffectNoResult.Inst (MInst.VecStore eew to from flags vstate))))
;; Helper for emitting the `vadd.vv` instruction.
(decl rv_vadd_vv (Reg Reg VState) Reg)
(rule (rv_vadd_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.Vadd) vs2 vs1 vstate))

View File

@@ -53,7 +53,7 @@
(rule (match_shnadd (u64_from_imm64 1)) (AluOPRRR.Sh1add))
(rule (match_shnadd (u64_from_imm64 2)) (AluOPRRR.Sh2add))
(rule (match_shnadd (u64_from_imm64 3)) (AluOPRRR.Sh3add))
(rule 3 (lower (has_type $I64 (iadd x (ishl y (maybe_uextend (iconst n))))))
(if-let $true (has_zba))
(if-let shnadd (match_shnadd n))
@@ -75,7 +75,7 @@
(rule (match_shnadd_uw (u64_from_imm64 1)) (AluOPRRR.Sh1adduw))
(rule (match_shnadd_uw (u64_from_imm64 2)) (AluOPRRR.Sh2adduw))
(rule (match_shnadd_uw (u64_from_imm64 3)) (AluOPRRR.Sh3adduw))
(rule 5 (lower (has_type $I64 (iadd x (ishl (uextend y @ (value_type $I32)) (maybe_uextend (iconst n))))))
(if-let $true (has_zba))
(if-let shnadd_uw (match_shnadd_uw n))
@@ -97,6 +97,11 @@
(high Reg (rv_add high_tmp carry)))
(value_regs low high)))
;; SIMD Vectors
(rule 8 (lower (has_type (ty_vec128_int ty) (iadd x y)))
(if-let $true (has_v))
(rv_vadd_vv x y ty))
;;; Rules for `uadd_overflow_trap` ;;;;;;;;;;;;;
(rule
(lower (has_type (fits_in_64 ty) (uadd_overflow_trap x y tc)))
@@ -374,7 +379,7 @@
(rule 1 (lower (has_type $I128 (clz x)))
(lower_clz_i128 x))
;;;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (fits_in_64 ty) (cls x)))
(lower_cls ty x))
@@ -809,6 +814,12 @@
(lower (has_type $I128 (load flags p @ (value_type (ty_addr64 _)) offset)))
(gen_load_128 p offset flags))
(rule 2
(lower (has_type (ty_vec128_int ty) (load flags p @ (value_type (ty_addr64 _)) offset)))
(if-let $true (has_v))
(let ((eew VecElementWidth (element_width_from_type ty)))
(vec_load eew (VecAMode.UnitStride (gen_amode p offset $I64)) flags ty)))
;;;;; Rules for `istore8`;;;;;;;;;
(rule
(lower (istore8 flags x p @ (value_type (ty_addr64 _)) offset))
@@ -833,6 +844,12 @@
(lower (store flags x @ (value_type $I128 ) p @ (value_type (ty_addr64 _)) offset))
(gen_store_128 p offset flags x))
(rule 2
(lower (store flags x @ (value_type (ty_vec128_int ty)) p @ (value_type (ty_addr64 _)) offset))
(if-let $true (has_v))
(let ((eew VecElementWidth (element_width_from_type ty)))
(vec_store eew (VecAMode.UnitStride (gen_amode p offset $I64)) x flags ty)))
(decl gen_icmp (IntCC ValueRegs ValueRegs Type) Reg)
(rule
(gen_icmp cc x y ty)

View File

@@ -283,6 +283,10 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Riscv64Backend> {
ValueRegs::two(shamt, len_sub_shamt)
}
fn has_v(&mut self) -> bool {
self.backend.isa_flags.has_v()
}
fn has_zbkb(&mut self) -> bool {
self.backend.isa_flags.has_zbkb()
}
@@ -428,6 +432,11 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Riscv64Backend> {
rs2,
}
}
#[inline]
fn vstate_from_type(&mut self, ty: Type) -> VState {
VState::from_type(ty)
}
}
impl IsleContext<'_, '_, MInst, Riscv64Backend> {

View File

@@ -288,6 +288,9 @@ pub trait MachInstEmitState<I: VCodeInst>: Default + Clone + Debug {
/// Used to continue using a control plane after the emission state is
/// not needed anymore.
fn take_ctrl_plane(self) -> ControlPlane;
/// A hook that triggers when first emitting a new block.
/// It is guaranteed to be called before any instructions are emitted.
fn on_new_block(&mut self) {}
}
/// The result of a `MachBackend::compile_function()` call. Contains machine

View File

@@ -843,6 +843,11 @@ impl<I: VCodeInst> VCode<I> {
for (block_order_idx, &block) in final_order.iter().enumerate() {
trace!("emitting block {:?}", block);
// Call the new block hook for state
state.on_new_block();
// Emit NOPs to align the block.
let new_offset = I::align_basic_block(buffer.cur_offset());
while new_offset > buffer.cur_offset() {
// Pad with NOPs up to the aligned block offset.