riscv64: Initial SIMD Vector Implementation (#6240)
* riscv64: Remove unused code * riscv64: Add vector types * riscv64: Initial Vector ABI Load/Stores * riscv64: Vector Loads/Stores * riscv64: Fix `vsetvli` encoding error * riscv64: Add SIMD `iadd` runtests * riscv64: Rename `VecSew` The SEW name is correct, but only for VType. We also use this type in loads/stores as the Efective Element Width, so the name isn't quite correct in that case. * ci: Add V extension to RISC-V QEMU * riscv64: Misc Cleanups * riscv64: Check V extension in `load`/`store` for SIMD * riscv64: Fix `sumop` doc comment * cranelift: Fix comment typo * riscv64: Add convert for VType and VecElementWidth * riscv64: Remove VecElementWidth converter
This commit is contained in:
@@ -82,7 +82,7 @@ const array = [
|
||||
"target": "riscv64gc-unknown-linux-gnu",
|
||||
"gcc_package": "gcc-riscv64-linux-gnu",
|
||||
"gcc": "riscv64-linux-gnu-gcc",
|
||||
"qemu": "qemu-riscv64 -cpu rv64,zba=true,zbb=true,zbc=true,zbs=true,zbkb=true -L /usr/riscv64-linux-gnu",
|
||||
"qemu": "qemu-riscv64 -cpu rv64,v=true,vlen=256,vext_spec=v1.0,zba=true,zbb=true,zbc=true,zbs=true,zbkb=true -L /usr/riscv64-linux-gnu",
|
||||
"qemu_target": "riscv64-linux-user",
|
||||
"name": "Test Linux riscv64",
|
||||
"filter": "linux-riscv64",
|
||||
|
||||
@@ -274,6 +274,7 @@ fn get_isle_compilations(
|
||||
prelude_isle.clone(),
|
||||
prelude_lower_isle.clone(),
|
||||
src_isa_risc_v.join("inst.isle"),
|
||||
src_isa_risc_v.join("inst_vector.isle"),
|
||||
src_isa_risc_v.join("lower.isle"),
|
||||
],
|
||||
untracked_inputs: vec![clif_lower_isle.clone()],
|
||||
|
||||
@@ -329,6 +329,31 @@
|
||||
(guard_size u32)
|
||||
(probe_count u32)
|
||||
(tmp WritableReg))
|
||||
|
||||
(VecAluRRR
|
||||
(op VecAluOpRRR)
|
||||
(vd WritableReg)
|
||||
(vs1 Reg)
|
||||
(vs2 Reg)
|
||||
(vstate VState))
|
||||
|
||||
(VecSetState
|
||||
(rd WritableReg)
|
||||
(vstate VState))
|
||||
|
||||
(VecLoad
|
||||
(eew VecElementWidth)
|
||||
(to WritableReg)
|
||||
(from VecAMode)
|
||||
(flags MemFlags)
|
||||
(vstate VState))
|
||||
|
||||
(VecStore
|
||||
(eew VecElementWidth)
|
||||
(to VecAMode)
|
||||
(from Reg)
|
||||
(flags MemFlags)
|
||||
(vstate VState))
|
||||
))
|
||||
|
||||
|
||||
@@ -711,6 +736,9 @@
|
||||
|
||||
;; ISA Extension helpers
|
||||
|
||||
(decl pure has_v () bool)
|
||||
(extern constructor has_v has_v)
|
||||
|
||||
(decl pure has_zbkb () bool)
|
||||
(extern constructor has_zbkb has_zbkb)
|
||||
|
||||
|
||||
@@ -1668,55 +1668,6 @@ impl CsrAddress {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct VType {
|
||||
vma: bool,
|
||||
vta: bool,
|
||||
vsew: Vsew,
|
||||
valmul: Vlmul,
|
||||
}
|
||||
|
||||
impl VType {
|
||||
fn as_u32(self) -> u32 {
|
||||
self.valmul.as_u32()
|
||||
| self.vsew.as_u32() << 3
|
||||
| if self.vta { 1 << 7 } else { 0 }
|
||||
| if self.vma { 1 << 8 } else { 0 }
|
||||
}
|
||||
|
||||
const fn vill_bit() -> u64 {
|
||||
1 << 63
|
||||
}
|
||||
}
|
||||
|
||||
enum Vlmul {
|
||||
vlmul_1_div_8 = 0b101,
|
||||
vlmul_1_div_4 = 0b110,
|
||||
vlmul_1_div_2 = 0b111,
|
||||
vlmul_1 = 0b000,
|
||||
vlmul_2 = 0b001,
|
||||
vlmul_4 = 0b010,
|
||||
vlmul_8 = 0b011,
|
||||
}
|
||||
|
||||
impl Vlmul {
|
||||
fn as_u32(self) -> u32 {
|
||||
self as u32
|
||||
}
|
||||
}
|
||||
|
||||
enum Vsew {
|
||||
sew_8 = 0b000,
|
||||
sew_16 = 0b001,
|
||||
sew_32 = 0b010,
|
||||
sew_64 = 0b011,
|
||||
}
|
||||
|
||||
impl Vsew {
|
||||
fn as_u32(self) -> u32 {
|
||||
self as u32
|
||||
}
|
||||
}
|
||||
|
||||
impl CsrOP {
|
||||
pub(crate) fn op_name(self) -> &'static str {
|
||||
match self {
|
||||
@@ -1754,40 +1705,11 @@ impl CsrOP {
|
||||
if self.need_rs() {
|
||||
reg_to_gpr_num(rs.unwrap())
|
||||
} else {
|
||||
zimm.unwrap().as_u32()
|
||||
zimm.unwrap().bits()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum Vxrm {
|
||||
// round-to-nearest-up (add +0.5 LSB)
|
||||
rnu = 0b00,
|
||||
// round-to-nearest-even
|
||||
rne = 0b01,
|
||||
//round-down (truncate)
|
||||
rdn = 0b10,
|
||||
// round-to-odd (OR bits into LSB, aka "jam")
|
||||
rod = 0b11,
|
||||
}
|
||||
|
||||
impl Vxrm {
|
||||
pub(crate) fn as_u32(self) -> u32 {
|
||||
self as u32
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct Vcsr {
|
||||
xvrm: Vxrm,
|
||||
// Fixed-point accrued saturation flag
|
||||
vxsat: bool,
|
||||
}
|
||||
|
||||
impl Vcsr {
|
||||
pub(crate) fn as_u32(self) -> u32 {
|
||||
return if self.vxsat { 1 } else { 0 } | self.xvrm.as_u32();
|
||||
}
|
||||
}
|
||||
|
||||
///Atomic Memory ordering.
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub enum AMO {
|
||||
|
||||
@@ -102,6 +102,13 @@ pub(crate) fn reg_to_gpr_num(m: Reg) -> u32 {
|
||||
u32::try_from(m.to_real_reg().unwrap().hw_enc() & 31).unwrap()
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Default)]
|
||||
pub enum EmitVState {
|
||||
#[default]
|
||||
Unknown,
|
||||
Known(VState),
|
||||
}
|
||||
|
||||
/// State carried between emissions of a sequence of instructions.
|
||||
#[derive(Default, Clone, Debug)]
|
||||
pub struct EmitState {
|
||||
@@ -114,6 +121,9 @@ pub struct EmitState {
|
||||
/// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and
|
||||
/// optimized away at compiletime. See [cranelift_control].
|
||||
ctrl_plane: ControlPlane,
|
||||
/// Vector State
|
||||
/// Controls the current state of the vector unit at the emission point.
|
||||
vstate: EmitVState,
|
||||
}
|
||||
|
||||
impl EmitState {
|
||||
@@ -141,6 +151,7 @@ impl MachInstEmitState<Inst> for EmitState {
|
||||
stack_map: None,
|
||||
cur_srcloc: RelSourceLoc::default(),
|
||||
ctrl_plane,
|
||||
vstate: EmitVState::Unknown,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -159,6 +170,11 @@ impl MachInstEmitState<Inst> for EmitState {
|
||||
fn take_ctrl_plane(self) -> ControlPlane {
|
||||
self.ctrl_plane
|
||||
}
|
||||
|
||||
fn on_new_block(&mut self) {
|
||||
// Reset the vector state.
|
||||
self.vstate = EmitVState::Unknown;
|
||||
}
|
||||
}
|
||||
|
||||
impl Inst {
|
||||
@@ -386,6 +402,80 @@ impl Inst {
|
||||
}
|
||||
insts
|
||||
}
|
||||
|
||||
/// Returns Some(VState) if this insturction is expecting a specific vector state
|
||||
/// before emission.
|
||||
fn expected_vstate(&self) -> Option<&VState> {
|
||||
match self {
|
||||
Inst::Nop0
|
||||
| Inst::Nop4
|
||||
| Inst::BrTable { .. }
|
||||
| Inst::Auipc { .. }
|
||||
| Inst::Lui { .. }
|
||||
| Inst::LoadConst32 { .. }
|
||||
| Inst::LoadConst64 { .. }
|
||||
| Inst::AluRRR { .. }
|
||||
| Inst::FpuRRR { .. }
|
||||
| Inst::AluRRImm12 { .. }
|
||||
| Inst::Load { .. }
|
||||
| Inst::Store { .. }
|
||||
| Inst::Args { .. }
|
||||
| Inst::Ret { .. }
|
||||
| Inst::Extend { .. }
|
||||
| Inst::AjustSp { .. }
|
||||
| Inst::Call { .. }
|
||||
| Inst::CallInd { .. }
|
||||
| Inst::TrapIf { .. }
|
||||
| Inst::Jal { .. }
|
||||
| Inst::CondBr { .. }
|
||||
| Inst::LoadExtName { .. }
|
||||
| Inst::LoadAddr { .. }
|
||||
| Inst::VirtualSPOffsetAdj { .. }
|
||||
| Inst::Mov { .. }
|
||||
| Inst::MovFromPReg { .. }
|
||||
| Inst::Fence { .. }
|
||||
| Inst::FenceI
|
||||
| Inst::ECall
|
||||
| Inst::EBreak
|
||||
| Inst::Udf { .. }
|
||||
| Inst::FpuRR { .. }
|
||||
| Inst::FpuRRRR { .. }
|
||||
| Inst::Jalr { .. }
|
||||
| Inst::Atomic { .. }
|
||||
| Inst::Select { .. }
|
||||
| Inst::AtomicCas { .. }
|
||||
| Inst::IntSelect { .. }
|
||||
| Inst::Csr { .. }
|
||||
| Inst::Icmp { .. }
|
||||
| Inst::SelectReg { .. }
|
||||
| Inst::FcvtToInt { .. }
|
||||
| Inst::RawData { .. }
|
||||
| Inst::AtomicStore { .. }
|
||||
| Inst::AtomicLoad { .. }
|
||||
| Inst::AtomicRmwLoop { .. }
|
||||
| Inst::TrapIfC { .. }
|
||||
| Inst::Unwind { .. }
|
||||
| Inst::DummyUse { .. }
|
||||
| Inst::FloatRound { .. }
|
||||
| Inst::FloatSelect { .. }
|
||||
| Inst::FloatSelectPseudo { .. }
|
||||
| Inst::Popcnt { .. }
|
||||
| Inst::Rev8 { .. }
|
||||
| Inst::Cltz { .. }
|
||||
| Inst::Brev8 { .. }
|
||||
| Inst::StackProbeLoop { .. } => None,
|
||||
// VecSetState does not expect any vstate, rather it updates it.
|
||||
Inst::VecSetState { .. } => None,
|
||||
|
||||
Inst::VecAluRRR { vstate, .. } |
|
||||
// TODO: Unit-stride loads and stores only need the AVL to be correct, not
|
||||
// the full vtype. A future optimization could be to decouple these two when
|
||||
// updating vstate. This would allow us to avoid emitting a VecSetState in
|
||||
// some cases.
|
||||
Inst::VecLoad { vstate, .. }
|
||||
| Inst::VecStore { vstate, .. } => Some(vstate),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl MachInstEmit for Inst {
|
||||
@@ -400,6 +490,19 @@ impl MachInstEmit for Inst {
|
||||
state: &mut EmitState,
|
||||
) {
|
||||
let mut allocs = AllocationConsumer::new(allocs);
|
||||
|
||||
// Check if we need to update the vector state before emitting this instruction
|
||||
if let Some(expected) = self.expected_vstate() {
|
||||
if state.vstate != EmitVState::Known(expected.clone()) {
|
||||
// Update the vector state.
|
||||
Inst::VecSetState {
|
||||
rd: writable_zero_reg(),
|
||||
vstate: expected.clone(),
|
||||
}
|
||||
.emit(&[], sink, emit_info, state);
|
||||
}
|
||||
}
|
||||
|
||||
// N.B.: we *must* not exceed the "worst-case size" used to compute
|
||||
// where to insert islands, except when islands are explicitly triggered
|
||||
// (with an `EmitIsland`). We check this in debug builds. This is `mut`
|
||||
@@ -530,13 +633,14 @@ impl MachInstEmit for Inst {
|
||||
(rs1, rs2)
|
||||
};
|
||||
|
||||
let x: u32 = alu_op.op_code()
|
||||
| reg_to_gpr_num(rd.to_reg()) << 7
|
||||
| (alu_op.funct3()) << 12
|
||||
| reg_to_gpr_num(rs1) << 15
|
||||
| reg_to_gpr_num(rs2) << 20
|
||||
| alu_op.funct7() << 25;
|
||||
sink.put4(x);
|
||||
sink.put4(encode_r_type(
|
||||
alu_op.op_code(),
|
||||
rd.to_reg(),
|
||||
alu_op.funct3(),
|
||||
rs1,
|
||||
rs2,
|
||||
alu_op.funct7(),
|
||||
));
|
||||
}
|
||||
&Inst::AluRRImm12 {
|
||||
alu_op,
|
||||
@@ -2695,6 +2799,120 @@ impl MachInstEmit for Inst {
|
||||
.emit(&[], sink, emit_info, state);
|
||||
sink.bind_label(label_done, &mut state.ctrl_plane);
|
||||
}
|
||||
&Inst::VecAluRRR {
|
||||
op, vd, vs1, vs2, ..
|
||||
} => {
|
||||
let vs1 = allocs.next(vs1);
|
||||
let vs2 = allocs.next(vs2);
|
||||
let vd = allocs.next_writable(vd);
|
||||
|
||||
// This is the mask bit, we don't yet implement masking, so set it to 1, which means
|
||||
// masking disabled.
|
||||
let vm = 1;
|
||||
|
||||
sink.put4(encode_valu(
|
||||
op.opcode(),
|
||||
vd.to_reg(),
|
||||
op.funct3(),
|
||||
vs1,
|
||||
vs2,
|
||||
vm,
|
||||
op.funct6(),
|
||||
));
|
||||
}
|
||||
&Inst::VecSetState { rd, ref vstate } => {
|
||||
let rd = allocs.next_writable(rd);
|
||||
|
||||
sink.put4(encode_vcfg_imm(
|
||||
0x57,
|
||||
rd.to_reg(),
|
||||
vstate.avl.unwrap_static(),
|
||||
&vstate.vtype,
|
||||
));
|
||||
|
||||
// Update the current vector emit state.
|
||||
state.vstate = EmitVState::Known(vstate.clone());
|
||||
}
|
||||
|
||||
&Inst::VecLoad {
|
||||
eew,
|
||||
to,
|
||||
ref from,
|
||||
flags,
|
||||
..
|
||||
} => {
|
||||
let offset = from.get_offset_with_state(state);
|
||||
let from_reg = allocs.next(from.get_base_register());
|
||||
let to = allocs.next_writable(to);
|
||||
|
||||
// Vector Loads don't support immediate offsets, so we need to load it into a register.
|
||||
let addr = writable_spilltmp_reg();
|
||||
LoadConstant::U64(offset as u64)
|
||||
.load_constant_and_add(addr, from_reg)
|
||||
.into_iter()
|
||||
.for_each(|inst| inst.emit(&[], sink, emit_info, state));
|
||||
|
||||
let srcloc = state.cur_srcloc();
|
||||
if !srcloc.is_default() && !flags.notrap() {
|
||||
// Register the offset at which the actual load instruction starts.
|
||||
sink.add_trap(TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
|
||||
// This is the mask bit, we don't yet implement masking, so set it to 1, which means
|
||||
// masking disabled.
|
||||
let vm = 1;
|
||||
|
||||
sink.put4(encode_vmem_load(
|
||||
0x07,
|
||||
to.to_reg(),
|
||||
eew,
|
||||
addr.to_reg(),
|
||||
from.lumop(),
|
||||
vm,
|
||||
from.mop(),
|
||||
from.nf(),
|
||||
));
|
||||
}
|
||||
|
||||
&Inst::VecStore {
|
||||
eew,
|
||||
ref to,
|
||||
from,
|
||||
flags,
|
||||
..
|
||||
} => {
|
||||
let offset = to.get_offset_with_state(state);
|
||||
let to_reg = allocs.next(to.get_base_register());
|
||||
let from = allocs.next(from);
|
||||
|
||||
// Vector Stores don't support immediate offsets, so we need to load it into a register.
|
||||
let addr = writable_spilltmp_reg();
|
||||
LoadConstant::U64(offset as u64)
|
||||
.load_constant_and_add(addr, to_reg)
|
||||
.into_iter()
|
||||
.for_each(|inst| inst.emit(&[], sink, emit_info, state));
|
||||
|
||||
let srcloc = state.cur_srcloc();
|
||||
if !srcloc.is_default() && !flags.notrap() {
|
||||
// Register the offset at which the actual load instruction starts.
|
||||
sink.add_trap(TrapCode::HeapOutOfBounds);
|
||||
}
|
||||
|
||||
// This is the mask bit, we don't yet implement masking, so set it to 1, which means
|
||||
// masking disabled.
|
||||
let vm = 1;
|
||||
|
||||
sink.put4(encode_vmem_store(
|
||||
0x27,
|
||||
from,
|
||||
eew,
|
||||
addr.to_reg(),
|
||||
to.sumop(),
|
||||
vm,
|
||||
to.mop(),
|
||||
to.nf(),
|
||||
));
|
||||
}
|
||||
};
|
||||
let end_off = sink.cur_offset();
|
||||
assert!(
|
||||
|
||||
128
cranelift/codegen/src/isa/riscv64/inst/encode.rs
Normal file
128
cranelift/codegen/src/isa/riscv64/inst/encode.rs
Normal file
@@ -0,0 +1,128 @@
|
||||
//! Contains the RISC-V instruction encoding logic.
|
||||
//!
|
||||
//! These formats are specified in the RISC-V specification in section 2.2.
|
||||
//! See: https://riscv.org/wp-content/uploads/2017/05/riscv-spec-v2.2.pdf
|
||||
//!
|
||||
//! Some instructions especially in extensions have slight variations from
|
||||
//! the base RISC-V specification.
|
||||
|
||||
use super::{UImm5, VType};
|
||||
use crate::isa::riscv64::inst::reg_to_gpr_num;
|
||||
use crate::isa::riscv64::lower::isle::generated_code::VecElementWidth;
|
||||
use crate::Reg;
|
||||
|
||||
/// Encode an R-type instruction.
|
||||
///
|
||||
/// Layout:
|
||||
/// 0-------6-7-------11-12------14-15------19-20------24-25-------31
|
||||
/// | Opcode | rd | funct3 | rs1 | rs2 | funct7 |
|
||||
pub fn encode_r_type(opcode: u32, rd: Reg, funct3: u32, rs1: Reg, rs2: Reg, funct7: u32) -> u32 {
|
||||
let mut bits = 0;
|
||||
bits |= opcode & 0b1111111;
|
||||
bits |= reg_to_gpr_num(rd) << 7;
|
||||
bits |= (funct3 & 0b111) << 12;
|
||||
bits |= reg_to_gpr_num(rs1) << 15;
|
||||
bits |= reg_to_gpr_num(rs2) << 20;
|
||||
bits |= (funct7 & 0b1111111) << 25;
|
||||
bits
|
||||
}
|
||||
|
||||
/// Encodes a Vector ALU instruction.
|
||||
///
|
||||
/// Fields:
|
||||
/// - opcode (7 bits)
|
||||
/// - vd (5 bits)
|
||||
/// - funct3 (3 bits)
|
||||
/// - vs1 (5 bits)
|
||||
/// - vs2 (5 bits)
|
||||
/// - vm (1 bit)
|
||||
/// - funct6 (6 bits)
|
||||
///
|
||||
/// See: https://github.com/riscv/riscv-v-spec/blob/master/valu-format.adoc
|
||||
pub fn encode_valu(
|
||||
opcode: u32,
|
||||
vd: Reg,
|
||||
funct3: u32,
|
||||
vs1: Reg,
|
||||
vs2: Reg,
|
||||
vm: u32,
|
||||
funct6: u32,
|
||||
) -> u32 {
|
||||
let funct6 = funct6 & 0b111111;
|
||||
let vm = vm & 0b1;
|
||||
let funct7 = (funct6 << 6) | vm;
|
||||
encode_r_type(opcode, vd, funct3, vs1, vs2, funct7)
|
||||
}
|
||||
|
||||
/// Encodes a Vector CFG Imm instruction.
|
||||
///
|
||||
/// See: https://github.com/riscv/riscv-v-spec/blob/master/vcfg-format.adoc
|
||||
// TODO: Check if this is any of the known instruction types in the spec.
|
||||
pub fn encode_vcfg_imm(opcode: u32, rd: Reg, imm: UImm5, vtype: &VType) -> u32 {
|
||||
let mut bits = 0;
|
||||
bits |= opcode & 0b1111111;
|
||||
bits |= reg_to_gpr_num(rd) << 7;
|
||||
bits |= 0b111 << 12;
|
||||
bits |= (imm.bits() & 0b11111) << 15;
|
||||
bits |= (vtype.encode() & 0b1111111111) << 20;
|
||||
bits |= 0b11 << 30;
|
||||
bits
|
||||
}
|
||||
|
||||
/// Encodes a Vector Mem Unit Stride Load instruction.
|
||||
///
|
||||
/// See: https://github.com/riscv/riscv-v-spec/blob/master/vmem-format.adoc
|
||||
/// TODO: These instructions share opcode space with LOAD-FP and STORE-FP
|
||||
pub fn encode_vmem_load(
|
||||
opcode: u32,
|
||||
vd: Reg,
|
||||
width: VecElementWidth,
|
||||
rs1: Reg,
|
||||
lumop: u32,
|
||||
vm: u32,
|
||||
mop: u32,
|
||||
nf: u32,
|
||||
) -> u32 {
|
||||
// Width is encoded differently to avoid a clash with the FP load/store sizes.
|
||||
let width = match width {
|
||||
VecElementWidth::E8 => 0b000,
|
||||
VecElementWidth::E16 => 0b101,
|
||||
VecElementWidth::E32 => 0b110,
|
||||
VecElementWidth::E64 => 0b111,
|
||||
};
|
||||
|
||||
let mut bits = 0;
|
||||
bits |= opcode & 0b1111111;
|
||||
bits |= reg_to_gpr_num(vd) << 7;
|
||||
bits |= width << 12;
|
||||
bits |= reg_to_gpr_num(rs1) << 15;
|
||||
bits |= (lumop & 0b11111) << 20;
|
||||
bits |= (vm & 0b1) << 25;
|
||||
bits |= (mop & 0b11) << 26;
|
||||
|
||||
// The mew bit (inst[28]) when set is expected to be used to encode expanded
|
||||
// memory sizes of 128 bits and above, but these encodings are currently reserved.
|
||||
bits |= 0b0 << 28;
|
||||
|
||||
bits |= (nf & 0b111) << 29;
|
||||
bits
|
||||
}
|
||||
|
||||
/// Encodes a Vector Mem Unit Stride Load instruction.
|
||||
///
|
||||
/// See: https://github.com/riscv/riscv-v-spec/blob/master/vmem-format.adoc
|
||||
/// TODO: These instructions share opcode space with LOAD-FP and STORE-FP
|
||||
pub fn encode_vmem_store(
|
||||
opcode: u32,
|
||||
vs3: Reg,
|
||||
width: VecElementWidth,
|
||||
rs1: Reg,
|
||||
sumop: u32,
|
||||
vm: u32,
|
||||
mop: u32,
|
||||
nf: u32,
|
||||
) -> u32 {
|
||||
// This is pretty much the same as the load instruction, just
|
||||
// with different names on the fields.
|
||||
encode_vmem_load(opcode, vs3, width, rs1, sumop, vm, mop, nf)
|
||||
}
|
||||
@@ -99,33 +99,31 @@ impl Display for Imm20 {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct Uimm5 {
|
||||
bits: u8,
|
||||
/// An unsigned 5-bit immediate.
|
||||
#[derive(Clone, Copy, Debug, PartialEq)]
|
||||
pub struct UImm5 {
|
||||
value: u8,
|
||||
}
|
||||
|
||||
impl Uimm5 {
|
||||
pub fn from_bits(bits: u8) -> Self {
|
||||
Self { bits }
|
||||
impl UImm5 {
|
||||
/// Create an unsigned 5-bit immediate from u8.
|
||||
pub fn maybe_from_u8(value: u8) -> Option<UImm5> {
|
||||
if value < 32 {
|
||||
Some(UImm5 { value })
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
/// Create a zero immediate of this format.
|
||||
pub fn zero() -> Self {
|
||||
Self { bits: 0 }
|
||||
}
|
||||
pub fn as_u32(&self) -> u32 {
|
||||
(self.bits as u32) & 0b1_1111
|
||||
|
||||
/// Bits for encoding.
|
||||
pub fn bits(&self) -> u32 {
|
||||
u32::from(self.value)
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for Uimm5 {
|
||||
impl Display for UImm5 {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
|
||||
write!(f, "{}", self.bits)
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Uimm5 {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
|
||||
write!(f, "{}", self.bits)
|
||||
write!(f, "{}", self.value)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -4,9 +4,10 @@
|
||||
#![allow(dead_code)]
|
||||
#![allow(non_camel_case_types)]
|
||||
|
||||
use super::lower::isle::generated_code::{VecAMode, VecElementWidth};
|
||||
use crate::binemit::{Addend, CodeOffset, Reloc};
|
||||
pub use crate::ir::condcodes::IntCC;
|
||||
use crate::ir::types::{F32, F64, I128, I16, I32, I64, I8, R32, R64};
|
||||
use crate::ir::types::{self, F32, F64, I128, I16, I32, I64, I8, R32, R64};
|
||||
|
||||
pub use crate::ir::{ExternalName, MemFlags, Opcode, SourceLoc, Type, ValueLabel};
|
||||
use crate::isa::CallConv;
|
||||
@@ -29,6 +30,10 @@ pub mod args;
|
||||
pub use self::args::*;
|
||||
pub mod emit;
|
||||
pub use self::emit::*;
|
||||
pub mod vector;
|
||||
pub use self::vector::*;
|
||||
pub mod encode;
|
||||
pub use self::encode::*;
|
||||
pub mod unwind;
|
||||
|
||||
use crate::isa::riscv64::abi::Riscv64MachineDeps;
|
||||
@@ -41,7 +46,7 @@ use std::fmt::{Display, Formatter};
|
||||
pub(crate) type OptionReg = Option<Reg>;
|
||||
pub(crate) type OptionImm12 = Option<Imm12>;
|
||||
pub(crate) type VecBranchTarget = Vec<BranchTarget>;
|
||||
pub(crate) type OptionUimm5 = Option<Uimm5>;
|
||||
pub(crate) type OptionUimm5 = Option<UImm5>;
|
||||
pub(crate) type OptionFloatRoundingMode = Option<FRM>;
|
||||
pub(crate) type VecU8 = Vec<u8>;
|
||||
pub(crate) type VecWritableReg = Vec<Writable<Reg>>;
|
||||
@@ -313,21 +318,41 @@ impl Inst {
|
||||
|
||||
/// Generic constructor for a load (zero-extending where appropriate).
|
||||
pub fn gen_load(into_reg: Writable<Reg>, mem: AMode, ty: Type, flags: MemFlags) -> Inst {
|
||||
Inst::Load {
|
||||
rd: into_reg,
|
||||
op: LoadOP::from_type(ty),
|
||||
from: mem,
|
||||
flags,
|
||||
if ty.is_vector() {
|
||||
Inst::VecLoad {
|
||||
eew: VecElementWidth::from_type(ty),
|
||||
to: into_reg,
|
||||
from: VecAMode::UnitStride { base: mem },
|
||||
flags,
|
||||
vstate: VState::from_type(ty),
|
||||
}
|
||||
} else {
|
||||
Inst::Load {
|
||||
rd: into_reg,
|
||||
op: LoadOP::from_type(ty),
|
||||
from: mem,
|
||||
flags,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Generic constructor for a store.
|
||||
pub fn gen_store(mem: AMode, from_reg: Reg, ty: Type, flags: MemFlags) -> Inst {
|
||||
Inst::Store {
|
||||
src: from_reg,
|
||||
op: StoreOP::from_type(ty),
|
||||
to: mem,
|
||||
flags,
|
||||
if ty.is_vector() {
|
||||
Inst::VecStore {
|
||||
eew: VecElementWidth::from_type(ty),
|
||||
to: VecAMode::UnitStride { base: mem },
|
||||
from: from_reg,
|
||||
flags,
|
||||
vstate: VState::from_type(ty),
|
||||
}
|
||||
} else {
|
||||
Inst::Store {
|
||||
src: from_reg,
|
||||
op: StoreOP::from_type(ty),
|
||||
to: mem,
|
||||
flags,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -623,6 +648,22 @@ fn riscv64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
|
||||
// gen_prologue is called at emit stage.
|
||||
// no need let reg alloc know.
|
||||
}
|
||||
&Inst::VecAluRRR { vd, vs1, vs2, .. } => {
|
||||
collector.reg_use(vs1);
|
||||
collector.reg_use(vs2);
|
||||
collector.reg_def(vd);
|
||||
}
|
||||
&Inst::VecSetState { rd, .. } => {
|
||||
collector.reg_def(rd);
|
||||
}
|
||||
&Inst::VecLoad { to, ref from, .. } => {
|
||||
collector.reg_use(from.get_base_register());
|
||||
collector.reg_def(to);
|
||||
}
|
||||
&Inst::VecStore { ref to, from, .. } => {
|
||||
collector.reg_use(to.get_base_register());
|
||||
collector.reg_use(from);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -727,6 +768,7 @@ impl MachInst for Inst {
|
||||
F32 => Ok((&[RegClass::Float], &[F32])),
|
||||
F64 => Ok((&[RegClass::Float], &[F64])),
|
||||
I128 => Ok((&[RegClass::Int, RegClass::Int], &[I64, I64])),
|
||||
_ if ty.is_vector() && ty.bits() == 128 => Ok((&[RegClass::Float], &[types::I8X16])),
|
||||
_ => Err(CodegenError::Unsupported(format!(
|
||||
"Unexpected SSA-value type: {}",
|
||||
ty
|
||||
@@ -784,6 +826,17 @@ pub fn reg_name(reg: Reg) -> String {
|
||||
}
|
||||
}
|
||||
}
|
||||
pub fn vec_reg_name(reg: Reg) -> String {
|
||||
match reg.to_real_reg() {
|
||||
Some(real) => {
|
||||
assert_eq!(real.class(), RegClass::Float);
|
||||
format!("v{}", real.hw_enc())
|
||||
}
|
||||
None => {
|
||||
format!("{:?}", reg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Inst {
|
||||
fn print_with_state(
|
||||
@@ -795,6 +848,16 @@ impl Inst {
|
||||
let reg = allocs.next(reg);
|
||||
reg_name(reg)
|
||||
};
|
||||
let format_vec_reg = |reg: Reg, allocs: &mut AllocationConsumer<'_>| -> String {
|
||||
let reg = allocs.next(reg);
|
||||
vec_reg_name(reg)
|
||||
};
|
||||
|
||||
let format_vec_amode = |amode: &VecAMode, allocs: &mut AllocationConsumer<'_>| -> String {
|
||||
match amode {
|
||||
VecAMode::UnitStride { base } => base.to_string_with_alloc(allocs),
|
||||
}
|
||||
};
|
||||
|
||||
let format_regs = |regs: &[Reg], allocs: &mut AllocationConsumer<'_>| -> String {
|
||||
let mut x = if regs.len() > 1 {
|
||||
@@ -839,6 +902,7 @@ impl Inst {
|
||||
"".into()
|
||||
}
|
||||
}
|
||||
|
||||
match self {
|
||||
&Inst::Nop0 => {
|
||||
format!("##zero length nop")
|
||||
@@ -1501,6 +1565,48 @@ impl Inst {
|
||||
&MInst::Udf { trap_code } => format!("udf##trap_code={}", trap_code),
|
||||
&MInst::EBreak {} => String::from("ebreak"),
|
||||
&MInst::ECall {} => String::from("ecall"),
|
||||
&Inst::VecAluRRR {
|
||||
op,
|
||||
vd,
|
||||
vs1,
|
||||
vs2,
|
||||
ref vstate,
|
||||
} => {
|
||||
let vs1_s = format_vec_reg(vs1, allocs);
|
||||
let vs2_s = format_vec_reg(vs2, allocs);
|
||||
let vd_s = format_vec_reg(vd.to_reg(), allocs);
|
||||
|
||||
// Note: vs2 and vs1 here are opposite to the standard scalar ordering.
|
||||
// This is noted in Section 10.1 of the RISC-V Vector spec.
|
||||
format!("{} {},{},{} {}", op, vd_s, vs2_s, vs1_s, vstate)
|
||||
}
|
||||
&Inst::VecSetState { rd, ref vstate } => {
|
||||
let rd_s = format_reg(rd.to_reg(), allocs);
|
||||
assert!(vstate.avl.is_static());
|
||||
format!("vsetivli {}, {}, {}", rd_s, vstate.avl, vstate.vtype)
|
||||
}
|
||||
Inst::VecLoad {
|
||||
eew,
|
||||
to,
|
||||
from,
|
||||
ref vstate,
|
||||
..
|
||||
} => {
|
||||
let base = format_vec_amode(from, allocs);
|
||||
let vd = format_vec_reg(to.to_reg(), allocs);
|
||||
format!("vl{}.v {},{} {}", eew, vd, base, vstate)
|
||||
}
|
||||
Inst::VecStore {
|
||||
eew,
|
||||
to,
|
||||
from,
|
||||
ref vstate,
|
||||
..
|
||||
} => {
|
||||
let dst = format_vec_amode(to, allocs);
|
||||
let vs3 = format_vec_reg(*from, allocs);
|
||||
format!("vs{}.v {},{} {}", eew, vs3, dst, vstate)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -218,3 +218,13 @@ pub(crate) fn x_reg_range(start: usize, end: usize) -> Vec<Writable<Reg>> {
|
||||
}
|
||||
regs
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn v_reg(enc: usize) -> Reg {
|
||||
let p_reg = PReg::new(enc, RegClass::Float);
|
||||
let v_reg = VReg::new(p_reg.index(), p_reg.class());
|
||||
Reg::from(v_reg)
|
||||
}
|
||||
pub fn vx_reg(enc: usize) -> PReg {
|
||||
PReg::new(enc, RegClass::Float)
|
||||
}
|
||||
|
||||
289
cranelift/codegen/src/isa/riscv64/inst/vector.rs
Normal file
289
cranelift/codegen/src/isa/riscv64/inst/vector.rs
Normal file
@@ -0,0 +1,289 @@
|
||||
use crate::isa::riscv64::inst::EmitState;
|
||||
use crate::isa::riscv64::lower::isle::generated_code::{
|
||||
VecAMode, VecAluOpRRR, VecAvl, VecElementWidth, VecLmul, VecMaskMode, VecTailMode,
|
||||
};
|
||||
use crate::Reg;
|
||||
use core::fmt;
|
||||
|
||||
use super::{Type, UImm5};
|
||||
|
||||
impl VecAvl {
|
||||
pub fn _static(size: u32) -> Self {
|
||||
VecAvl::Static {
|
||||
size: UImm5::maybe_from_u8(size as u8).expect("Invalid size for AVL"),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_static(&self) -> bool {
|
||||
match self {
|
||||
VecAvl::Static { .. } => true,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn unwrap_static(&self) -> UImm5 {
|
||||
match self {
|
||||
VecAvl::Static { size } => *size,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Can we tell ISLE to derive this?
|
||||
impl PartialEq for VecAvl {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
match (self, other) {
|
||||
(VecAvl::Static { size: lhs }, VecAvl::Static { size: rhs }) => lhs == rhs,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for VecAvl {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
VecAvl::Static { size } => write!(f, "{}", size),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl VecElementWidth {
|
||||
pub fn from_type(ty: Type) -> Self {
|
||||
Self::from_bits(ty.lane_bits())
|
||||
}
|
||||
|
||||
pub fn from_bits(bits: u32) -> Self {
|
||||
match bits {
|
||||
8 => VecElementWidth::E8,
|
||||
16 => VecElementWidth::E16,
|
||||
32 => VecElementWidth::E32,
|
||||
64 => VecElementWidth::E64,
|
||||
_ => panic!("Invalid number of bits for VecElementWidth: {}", bits),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn bits(&self) -> u32 {
|
||||
match self {
|
||||
VecElementWidth::E8 => 8,
|
||||
VecElementWidth::E16 => 16,
|
||||
VecElementWidth::E32 => 32,
|
||||
VecElementWidth::E64 => 64,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn encode(&self) -> u32 {
|
||||
match self {
|
||||
VecElementWidth::E8 => 0b000,
|
||||
VecElementWidth::E16 => 0b001,
|
||||
VecElementWidth::E32 => 0b010,
|
||||
VecElementWidth::E64 => 0b011,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for VecElementWidth {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "e{}", self.bits())
|
||||
}
|
||||
}
|
||||
|
||||
impl VecLmul {
|
||||
pub fn encode(&self) -> u32 {
|
||||
match self {
|
||||
VecLmul::LmulF8 => 0b101,
|
||||
VecLmul::LmulF4 => 0b110,
|
||||
VecLmul::LmulF2 => 0b111,
|
||||
VecLmul::Lmul1 => 0b000,
|
||||
VecLmul::Lmul2 => 0b001,
|
||||
VecLmul::Lmul4 => 0b010,
|
||||
VecLmul::Lmul8 => 0b011,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for VecLmul {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
VecLmul::LmulF8 => write!(f, "mf8"),
|
||||
VecLmul::LmulF4 => write!(f, "mf4"),
|
||||
VecLmul::LmulF2 => write!(f, "mf2"),
|
||||
VecLmul::Lmul1 => write!(f, "m1"),
|
||||
VecLmul::Lmul2 => write!(f, "m2"),
|
||||
VecLmul::Lmul4 => write!(f, "m4"),
|
||||
VecLmul::Lmul8 => write!(f, "m8"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl VecTailMode {
|
||||
pub fn encode(&self) -> u32 {
|
||||
match self {
|
||||
VecTailMode::Agnostic => 1,
|
||||
VecTailMode::Undisturbed => 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for VecTailMode {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
VecTailMode::Agnostic => write!(f, "ta"),
|
||||
VecTailMode::Undisturbed => write!(f, "tu"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl VecMaskMode {
|
||||
pub fn encode(&self) -> u32 {
|
||||
match self {
|
||||
VecMaskMode::Agnostic => 1,
|
||||
VecMaskMode::Undisturbed => 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for VecMaskMode {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
VecMaskMode::Agnostic => write!(f, "ma"),
|
||||
VecMaskMode::Undisturbed => write!(f, "mu"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Vector Type (VType)
|
||||
///
|
||||
/// vtype provides the default type used to interpret the contents of the vector register file.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct VType {
|
||||
pub sew: VecElementWidth,
|
||||
pub lmul: VecLmul,
|
||||
pub tail_mode: VecTailMode,
|
||||
pub mask_mode: VecMaskMode,
|
||||
}
|
||||
|
||||
impl VType {
|
||||
// https://github.com/riscv/riscv-v-spec/blob/master/vtype-format.adoc
|
||||
pub fn encode(&self) -> u32 {
|
||||
let mut bits = 0;
|
||||
bits |= self.lmul.encode();
|
||||
bits |= self.sew.encode() << 3;
|
||||
bits |= self.tail_mode.encode() << 6;
|
||||
bits |= self.mask_mode.encode() << 7;
|
||||
bits
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for VType {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"{}, {}, {}, {}",
|
||||
self.sew, self.lmul, self.tail_mode, self.mask_mode
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Vector State (VState)
|
||||
///
|
||||
/// VState represents the state of the vector unit that each instruction expects before execution.
|
||||
/// Unlike VType or any of the other types here, VState is not a part of the RISC-V ISA. It is
|
||||
/// used by our instruction emission code to ensure that the vector unit is in the correct state.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct VState {
|
||||
pub avl: VecAvl,
|
||||
pub vtype: VType,
|
||||
}
|
||||
|
||||
impl VState {
|
||||
pub fn from_type(ty: Type) -> Self {
|
||||
VState {
|
||||
avl: VecAvl::_static(ty.lane_count()),
|
||||
vtype: VType {
|
||||
sew: VecElementWidth::from_type(ty),
|
||||
lmul: VecLmul::Lmul1,
|
||||
tail_mode: VecTailMode::Agnostic,
|
||||
mask_mode: VecMaskMode::Agnostic,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for VState {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "#avl={}, #vtype=({})", self.avl, self.vtype)
|
||||
}
|
||||
}
|
||||
|
||||
impl VecAluOpRRR {
|
||||
pub fn opcode(&self) -> u32 {
|
||||
match self {
|
||||
VecAluOpRRR::Vadd => 0x57,
|
||||
}
|
||||
}
|
||||
pub fn funct3(&self) -> u32 {
|
||||
match self {
|
||||
VecAluOpRRR::Vadd => 0b000,
|
||||
}
|
||||
}
|
||||
pub fn funct6(&self) -> u32 {
|
||||
match self {
|
||||
VecAluOpRRR::Vadd => 0b000000,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for VecAluOpRRR {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
VecAluOpRRR::Vadd => write!(f, "vadd.vv"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl VecAMode {
|
||||
pub fn get_base_register(&self) -> Reg {
|
||||
match self {
|
||||
VecAMode::UnitStride { base, .. } => base.get_base_register(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn get_offset_with_state(&self, state: &EmitState) -> i64 {
|
||||
match self {
|
||||
VecAMode::UnitStride { base, .. } => base.get_offset_with_state(state),
|
||||
}
|
||||
}
|
||||
|
||||
/// `mop` field, described in Table 7 of Section 7.2. Vector Load/Store Addressing Modes
|
||||
/// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#72-vector-loadstore-addressing-modes
|
||||
pub fn mop(&self) -> u32 {
|
||||
match self {
|
||||
VecAMode::UnitStride { .. } => 0b00,
|
||||
}
|
||||
}
|
||||
|
||||
/// `lumop` field, described in Table 9 of Section 7.2. Vector Load/Store Addressing Modes
|
||||
/// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#72-vector-loadstore-addressing-modes
|
||||
pub fn lumop(&self) -> u32 {
|
||||
match self {
|
||||
VecAMode::UnitStride { .. } => 0b00000,
|
||||
}
|
||||
}
|
||||
|
||||
/// `sumop` field, described in Table 10 of Section 7.2. Vector Load/Store Addressing Modes
|
||||
/// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#72-vector-loadstore-addressing-modes
|
||||
pub fn sumop(&self) -> u32 {
|
||||
match self {
|
||||
VecAMode::UnitStride { .. } => 0b00000,
|
||||
}
|
||||
}
|
||||
|
||||
/// The `nf[2:0]` field encodes the number of fields in each segment. For regular vector loads and
|
||||
/// stores, nf=0, indicating that a single value is moved between a vector register group and memory
|
||||
/// at each element position. Larger values in the nf field are used to access multiple contiguous
|
||||
/// fields within a segment as described in Section 7.8 Vector Load/Store Segment Instructions.
|
||||
///
|
||||
/// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#72-vector-loadstore-addressing-modes
|
||||
pub fn nf(&self) -> u32 {
|
||||
match self {
|
||||
VecAMode::UnitStride { .. } => 0b000,
|
||||
}
|
||||
}
|
||||
}
|
||||
132
cranelift/codegen/src/isa/riscv64/inst_vector.isle
Normal file
132
cranelift/codegen/src/isa/riscv64/inst_vector.isle
Normal file
@@ -0,0 +1,132 @@
|
||||
;; Represents the possible widths of an element when used in an operation.
|
||||
(type VecElementWidth (enum
|
||||
(E8)
|
||||
(E16)
|
||||
(E32)
|
||||
(E64)
|
||||
))
|
||||
|
||||
;; Vector Register Group Multiplier (LMUL)
|
||||
;;
|
||||
;; The LMUL setting specifies how we should group registers together. LMUL can
|
||||
;; also be a fractional value, reducing the number of bits used in a single
|
||||
;; vector register. Fractional LMUL is used to increase the number of effective
|
||||
;; usable vector register groups when operating on mixed-width values.
|
||||
(type VecLmul (enum
|
||||
(LmulF8)
|
||||
(LmulF4)
|
||||
(LmulF2)
|
||||
(Lmul1)
|
||||
(Lmul2)
|
||||
(Lmul4)
|
||||
(Lmul8)
|
||||
))
|
||||
|
||||
;; Tail Mode
|
||||
;;
|
||||
;; The tail mode specifies how the tail elements of a vector register are handled.
|
||||
(type VecTailMode (enum
|
||||
;; Tail Agnostic means that the tail elements are left in an undefined state.
|
||||
(Agnostic)
|
||||
;; Tail Undisturbed means that the tail elements are left in their original values.
|
||||
(Undisturbed)
|
||||
))
|
||||
|
||||
;; Mask Mode
|
||||
;;
|
||||
;; The mask mode specifies how the masked elements of a vector register are handled.
|
||||
(type VecMaskMode (enum
|
||||
;; Mask Agnostic means that the masked out elements are left in an undefined state.
|
||||
(Agnostic)
|
||||
;; Mask Undisturbed means that the masked out elements are left in their original values.
|
||||
(Undisturbed)
|
||||
))
|
||||
|
||||
;; Application Vector Length (AVL)
|
||||
;;
|
||||
;; This setting specifies the number of elements that are going to be processed
|
||||
;; in a single instruction. Note: We may end up processing fewer elements than
|
||||
;; the AVL setting, if they don't fit in a single register.
|
||||
(type VecAvl (enum
|
||||
;; Static AVL emits a `vsetivli` that uses a constant value
|
||||
(Static (size UImm5))
|
||||
;; TODO: Add a dynamic, register based AVL mode when we are able to properly test it
|
||||
))
|
||||
|
||||
(type VType (primitive VType))
|
||||
(type VState (primitive VState))
|
||||
|
||||
;; Register to Register ALU Ops
|
||||
(type VecAluOpRRR (enum
|
||||
(Vadd)
|
||||
))
|
||||
|
||||
|
||||
|
||||
;; Vector Addressing Mode
|
||||
(type VecAMode (enum
|
||||
;; Vector unit-stride operations access elements stored contiguously in memory
|
||||
;; starting from the base effective address.
|
||||
(UnitStride
|
||||
(base AMode))
|
||||
;; TODO: Constant Stride
|
||||
;; TODO: Indexed Operations
|
||||
))
|
||||
|
||||
|
||||
;; Builds a static VState matching a SIMD type.
|
||||
;; The VState is guaranteed to be static with AVL set to the number of lanes.
|
||||
;; Element size is set to the size of the type.
|
||||
;; LMUL is set to 1.
|
||||
;; Tail mode is set to agnostic.
|
||||
;; Mask mode is set to agnostic.
|
||||
(decl pure vstate_from_type (Type) VState)
|
||||
(extern constructor vstate_from_type vstate_from_type)
|
||||
(convert Type VState vstate_from_type)
|
||||
|
||||
;; Extracts an element width from a SIMD type.
|
||||
(decl pure element_width_from_type (Type) VecElementWidth)
|
||||
(rule (element_width_from_type ty)
|
||||
(if-let $I8 (lane_type ty))
|
||||
(VecElementWidth.E8))
|
||||
(rule (element_width_from_type ty)
|
||||
(if-let $I16 (lane_type ty))
|
||||
(VecElementWidth.E16))
|
||||
(rule (element_width_from_type ty)
|
||||
(if-let $I32 (lane_type ty))
|
||||
(VecElementWidth.E32))
|
||||
(rule (element_width_from_type ty)
|
||||
(if-let $I64 (lane_type ty))
|
||||
(VecElementWidth.E64))
|
||||
|
||||
;;;; Instruction Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; As noted in the RISC-V Vector Extension Specification, rs2 is the first
|
||||
;; source register and rs1 is the second source register. This is the opposite
|
||||
;; of the usual RISC-V register order.
|
||||
;; See Section 10.1 of the RISC-V Vector Extension Specification.
|
||||
|
||||
;; Helper for emitting `MInst.VecAluRRR` instructions.
|
||||
(decl vec_alu_rrr (VecAluOpRRR Reg Reg VState) Reg)
|
||||
(rule (vec_alu_rrr op vs2 vs1 vstate)
|
||||
(let ((vd WritableReg (temp_writable_reg $I8X16))
|
||||
(_ Unit (emit (MInst.VecAluRRR op vd vs2 vs1 vstate))))
|
||||
vd))
|
||||
|
||||
;; Helper for emitting `MInst.VecLoad` instructions.
|
||||
(decl vec_load (VecElementWidth VecAMode MemFlags VState) Reg)
|
||||
(rule (vec_load eew from flags vstate)
|
||||
(let ((vd WritableReg (temp_writable_reg $I8X16))
|
||||
(_ Unit (emit (MInst.VecLoad eew vd from flags vstate))))
|
||||
vd))
|
||||
|
||||
;; Helper for emitting `MInst.VecStore` instructions.
|
||||
(decl vec_store (VecElementWidth VecAMode Reg MemFlags VState) InstOutput)
|
||||
(rule (vec_store eew to from flags vstate)
|
||||
(side_effect
|
||||
(SideEffectNoResult.Inst (MInst.VecStore eew to from flags vstate))))
|
||||
|
||||
;; Helper for emitting the `vadd.vv` instruction.
|
||||
(decl rv_vadd_vv (Reg Reg VState) Reg)
|
||||
(rule (rv_vadd_vv vs2 vs1 vstate)
|
||||
(vec_alu_rrr (VecAluOpRRR.Vadd) vs2 vs1 vstate))
|
||||
@@ -53,7 +53,7 @@
|
||||
(rule (match_shnadd (u64_from_imm64 1)) (AluOPRRR.Sh1add))
|
||||
(rule (match_shnadd (u64_from_imm64 2)) (AluOPRRR.Sh2add))
|
||||
(rule (match_shnadd (u64_from_imm64 3)) (AluOPRRR.Sh3add))
|
||||
|
||||
|
||||
(rule 3 (lower (has_type $I64 (iadd x (ishl y (maybe_uextend (iconst n))))))
|
||||
(if-let $true (has_zba))
|
||||
(if-let shnadd (match_shnadd n))
|
||||
@@ -75,7 +75,7 @@
|
||||
(rule (match_shnadd_uw (u64_from_imm64 1)) (AluOPRRR.Sh1adduw))
|
||||
(rule (match_shnadd_uw (u64_from_imm64 2)) (AluOPRRR.Sh2adduw))
|
||||
(rule (match_shnadd_uw (u64_from_imm64 3)) (AluOPRRR.Sh3adduw))
|
||||
|
||||
|
||||
(rule 5 (lower (has_type $I64 (iadd x (ishl (uextend y @ (value_type $I32)) (maybe_uextend (iconst n))))))
|
||||
(if-let $true (has_zba))
|
||||
(if-let shnadd_uw (match_shnadd_uw n))
|
||||
@@ -97,6 +97,11 @@
|
||||
(high Reg (rv_add high_tmp carry)))
|
||||
(value_regs low high)))
|
||||
|
||||
;; SIMD Vectors
|
||||
(rule 8 (lower (has_type (ty_vec128_int ty) (iadd x y)))
|
||||
(if-let $true (has_v))
|
||||
(rv_vadd_vv x y ty))
|
||||
|
||||
;;; Rules for `uadd_overflow_trap` ;;;;;;;;;;;;;
|
||||
(rule
|
||||
(lower (has_type (fits_in_64 ty) (uadd_overflow_trap x y tc)))
|
||||
@@ -374,7 +379,7 @@
|
||||
|
||||
(rule 1 (lower (has_type $I128 (clz x)))
|
||||
(lower_clz_i128 x))
|
||||
|
||||
|
||||
;;;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
(rule (lower (has_type (fits_in_64 ty) (cls x)))
|
||||
(lower_cls ty x))
|
||||
@@ -809,6 +814,12 @@
|
||||
(lower (has_type $I128 (load flags p @ (value_type (ty_addr64 _)) offset)))
|
||||
(gen_load_128 p offset flags))
|
||||
|
||||
(rule 2
|
||||
(lower (has_type (ty_vec128_int ty) (load flags p @ (value_type (ty_addr64 _)) offset)))
|
||||
(if-let $true (has_v))
|
||||
(let ((eew VecElementWidth (element_width_from_type ty)))
|
||||
(vec_load eew (VecAMode.UnitStride (gen_amode p offset $I64)) flags ty)))
|
||||
|
||||
;;;;; Rules for `istore8`;;;;;;;;;
|
||||
(rule
|
||||
(lower (istore8 flags x p @ (value_type (ty_addr64 _)) offset))
|
||||
@@ -833,6 +844,12 @@
|
||||
(lower (store flags x @ (value_type $I128 ) p @ (value_type (ty_addr64 _)) offset))
|
||||
(gen_store_128 p offset flags x))
|
||||
|
||||
(rule 2
|
||||
(lower (store flags x @ (value_type (ty_vec128_int ty)) p @ (value_type (ty_addr64 _)) offset))
|
||||
(if-let $true (has_v))
|
||||
(let ((eew VecElementWidth (element_width_from_type ty)))
|
||||
(vec_store eew (VecAMode.UnitStride (gen_amode p offset $I64)) x flags ty)))
|
||||
|
||||
(decl gen_icmp (IntCC ValueRegs ValueRegs Type) Reg)
|
||||
(rule
|
||||
(gen_icmp cc x y ty)
|
||||
|
||||
@@ -283,6 +283,10 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Riscv64Backend> {
|
||||
ValueRegs::two(shamt, len_sub_shamt)
|
||||
}
|
||||
|
||||
fn has_v(&mut self) -> bool {
|
||||
self.backend.isa_flags.has_v()
|
||||
}
|
||||
|
||||
fn has_zbkb(&mut self) -> bool {
|
||||
self.backend.isa_flags.has_zbkb()
|
||||
}
|
||||
@@ -428,6 +432,11 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Riscv64Backend> {
|
||||
rs2,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn vstate_from_type(&mut self, ty: Type) -> VState {
|
||||
VState::from_type(ty)
|
||||
}
|
||||
}
|
||||
|
||||
impl IsleContext<'_, '_, MInst, Riscv64Backend> {
|
||||
|
||||
@@ -288,6 +288,9 @@ pub trait MachInstEmitState<I: VCodeInst>: Default + Clone + Debug {
|
||||
/// Used to continue using a control plane after the emission state is
|
||||
/// not needed anymore.
|
||||
fn take_ctrl_plane(self) -> ControlPlane;
|
||||
/// A hook that triggers when first emitting a new block.
|
||||
/// It is guaranteed to be called before any instructions are emitted.
|
||||
fn on_new_block(&mut self) {}
|
||||
}
|
||||
|
||||
/// The result of a `MachBackend::compile_function()` call. Contains machine
|
||||
|
||||
@@ -843,6 +843,11 @@ impl<I: VCodeInst> VCode<I> {
|
||||
|
||||
for (block_order_idx, &block) in final_order.iter().enumerate() {
|
||||
trace!("emitting block {:?}", block);
|
||||
|
||||
// Call the new block hook for state
|
||||
state.on_new_block();
|
||||
|
||||
// Emit NOPs to align the block.
|
||||
let new_offset = I::align_basic_block(buffer.cur_offset());
|
||||
while new_offset > buffer.cur_offset() {
|
||||
// Pad with NOPs up to the aligned block offset.
|
||||
|
||||
578
cranelift/filetests/filetests/isa/riscv64/simd-abi.clif
Normal file
578
cranelift/filetests/filetests/isa/riscv64/simd-abi.clif
Normal file
@@ -0,0 +1,578 @@
|
||||
test compile precise-output
|
||||
target riscv64 has_v
|
||||
|
||||
;; Tests both ABI and Regalloc spill/reload.
|
||||
function %simd_spill(
|
||||
i32x4, i32x4, i32x4, i32x4, i32x4, i32x4, i32x4, i32x4,
|
||||
i32x4, i32x4, i32x4, i32x4, i32x4, i32x4, i32x4, i32x4,
|
||||
i32x4, i32x4, i32x4, i32x4, i32x4, i32x4, i32x4, i32x4,
|
||||
i32x4, i32x4, i32x4, i32x4, i32x4, i32x4, i32x4, i32x4,
|
||||
;; These cannot fit in registers.
|
||||
i32x4, i32x4
|
||||
) ->
|
||||
i32x4, i32x4, i32x4, i32x4, i32x4, i32x4, i32x4, i32x4,
|
||||
i32x4, i32x4, i32x4, i32x4, i32x4, i32x4, i32x4, i32x4,
|
||||
i32x4, i32x4, i32x4, i32x4, i32x4, i32x4, i32x4, i32x4,
|
||||
i32x4, i32x4, i32x4, i32x4, i32x4, i32x4, i32x4, i32x4,
|
||||
;; These cannot fit in registers.
|
||||
i32x4, i32x4 system_v
|
||||
{
|
||||
block0(
|
||||
v0:i32x4, v1:i32x4, v2:i32x4, v3:i32x4, v4:i32x4, v5:i32x4, v6:i32x4, v7:i32x4,
|
||||
v8:i32x4, v9:i32x4, v10:i32x4, v11:i32x4, v12:i32x4, v13:i32x4, v14:i32x4, v15:i32x4,
|
||||
v16:i32x4, v17:i32x4, v18:i32x4, v19:i32x4, v20:i32x4, v21:i32x4, v22:i32x4, v23:i32x4,
|
||||
v24:i32x4, v25:i32x4, v26:i32x4, v27:i32x4, v28:i32x4, v29:i32x4, v30:i32x4, v31:i32x4,
|
||||
v32:i32x4, v33:i32x4
|
||||
):
|
||||
;; This just reverses the args
|
||||
return v33, v32,
|
||||
v31, v30, v29, v28, v27, v26, v25, v24,
|
||||
v23, v22, v21, v20, v19, v18, v17, v16,
|
||||
v15, v14, v13, v12, v11, v10, v9, v8,
|
||||
v7, v6, v5, v4, v3, v2, v1, v0
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; add sp,-16
|
||||
; sd ra,8(sp)
|
||||
; sd fp,0(sp)
|
||||
; mv fp,sp
|
||||
; fsd fs0,-8(sp)
|
||||
; fsd fs2,-16(sp)
|
||||
; fsd fs3,-24(sp)
|
||||
; fsd fs4,-32(sp)
|
||||
; fsd fs5,-40(sp)
|
||||
; fsd fs6,-48(sp)
|
||||
; fsd fs7,-56(sp)
|
||||
; fsd fs8,-64(sp)
|
||||
; fsd fs9,-72(sp)
|
||||
; fsd fs10,-80(sp)
|
||||
; fsd fs11,-88(sp)
|
||||
; add sp,-112
|
||||
; block0:
|
||||
; fsd fa0,0(nominal_sp)
|
||||
; fsd fa1,8(nominal_sp)
|
||||
; vle8.v v28,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vle8.v v29,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vle8.v v30,48(fp) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vle8.v v31,64(fp) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vle8.v v0,80(fp) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vle8.v v1,96(fp) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vle8.v v2,112(fp) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vle8.v v3,128(fp) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vle8.v v5,144(fp) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vle8.v v7,160(fp) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vle8.v v4,176(fp) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vle8.v v6,192(fp) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vle8.v v25,208(fp) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vle8.v v27,224(fp) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vle8.v v9,240(fp) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vle8.v v19,256(fp) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vle8.v v21,272(fp) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vle8.v v23,288(fp) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vle8.v v26,304(fp) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vle8.v v8,320(fp) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vle8.v v18,336(fp) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vle8.v v20,352(fp) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vle8.v v22,368(fp) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vle8.v v24,384(fp) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vle8.v v11,400(fp) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vle8.v v10,416(fp) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vse8.v v24,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vse8.v v22,16(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vse8.v v20,32(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vse8.v v18,48(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vse8.v v8,64(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vse8.v v26,80(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vse8.v v23,96(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vse8.v v21,112(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vse8.v v19,128(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vse8.v v9,144(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vse8.v v27,160(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vse8.v v25,176(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vse8.v v6,192(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vse8.v v4,208(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vse8.v v7,224(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vse8.v v5,240(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vse8.v v3,256(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vse8.v v2,272(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vse8.v v1,288(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vse8.v v0,304(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vse8.v v31,320(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vse8.v v30,336(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vse8.v v29,352(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vse8.v v28,368(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vse8.v v17,384(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vse8.v v16,400(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vse8.v v15,416(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vse8.v v14,432(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vse8.v v13,448(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vse8.v v12,464(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; fld fa4,8(nominal_sp)
|
||||
; vse8.v v14,480(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; fld fa7,0(nominal_sp)
|
||||
; vse8.v v17,496(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; add sp,+112
|
||||
; fld fs0,-8(sp)
|
||||
; fld fs2,-16(sp)
|
||||
; fld fs3,-24(sp)
|
||||
; fld fs4,-32(sp)
|
||||
; fld fs5,-40(sp)
|
||||
; fld fs6,-48(sp)
|
||||
; fld fs7,-56(sp)
|
||||
; fld fs8,-64(sp)
|
||||
; fld fs9,-72(sp)
|
||||
; fld fs10,-80(sp)
|
||||
; fld fs11,-88(sp)
|
||||
; ld ra,8(sp)
|
||||
; ld fp,0(sp)
|
||||
; add sp,+16
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; addi sp, sp, -0x10
|
||||
; sd ra, 8(sp)
|
||||
; sd s0, 0(sp)
|
||||
; ori s0, sp, 0
|
||||
; fsd fs0, -8(sp)
|
||||
; fsd fs2, -0x10(sp)
|
||||
; fsd fs3, -0x18(sp)
|
||||
; fsd fs4, -0x20(sp)
|
||||
; fsd fs5, -0x28(sp)
|
||||
; fsd fs6, -0x30(sp)
|
||||
; fsd fs7, -0x38(sp)
|
||||
; fsd fs8, -0x40(sp)
|
||||
; fsd fs9, -0x48(sp)
|
||||
; fsd fs10, -0x50(sp)
|
||||
; fsd fs11, -0x58(sp)
|
||||
; addi sp, sp, -0x70
|
||||
; block1: ; offset 0x40
|
||||
; fsd fa0, 0(sp)
|
||||
; fsd fa1, 8(sp)
|
||||
; .byte 0x57, 0x70, 0x08, 0xcc
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x10, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, s0
|
||||
; .byte 0x07, 0x8e, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x20, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, s0
|
||||
; .byte 0x87, 0x8e, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x30, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, s0
|
||||
; .byte 0x07, 0x8f, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x40, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, s0
|
||||
; .byte 0x87, 0x8f, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x50, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, s0
|
||||
; .byte 0x07, 0x80, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x60, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, s0
|
||||
; .byte 0x87, 0x80, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x70, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, s0
|
||||
; .byte 0x07, 0x81, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x80, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, s0
|
||||
; .byte 0x87, 0x81, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x90, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, s0
|
||||
; .byte 0x87, 0x82, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0xa0, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, s0
|
||||
; .byte 0x87, 0x83, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0xb0, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, s0
|
||||
; .byte 0x07, 0x82, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0xc0, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, s0
|
||||
; .byte 0x07, 0x83, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0xd0, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, s0
|
||||
; .byte 0x87, 0x8c, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0xe0, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, s0
|
||||
; .byte 0x87, 0x8d, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0xf0, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, s0
|
||||
; .byte 0x87, 0x84, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x00, 0x01, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, s0
|
||||
; .byte 0x87, 0x89, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x10, 0x01, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, s0
|
||||
; .byte 0x87, 0x8a, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x20, 0x01, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, s0
|
||||
; .byte 0x87, 0x8b, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x30, 0x01, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, s0
|
||||
; .byte 0x07, 0x8d, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x40, 0x01, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, s0
|
||||
; .byte 0x07, 0x84, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x50, 0x01, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, s0
|
||||
; .byte 0x07, 0x89, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x60, 0x01, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, s0
|
||||
; .byte 0x07, 0x8a, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x70, 0x01, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, s0
|
||||
; .byte 0x07, 0x8b, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x80, 0x01, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, s0
|
||||
; .byte 0x07, 0x8c, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x90, 0x01, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, s0
|
||||
; .byte 0x87, 0x85, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0xa0, 0x01, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, s0
|
||||
; .byte 0x07, 0x85, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0x27, 0x8c, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x10, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0x27, 0x8b, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x20, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0x27, 0x8a, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x30, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0x27, 0x89, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x40, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0x27, 0x84, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x50, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0x27, 0x8d, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x60, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0xa7, 0x8b, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x70, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0xa7, 0x8a, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x80, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0xa7, 0x89, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x90, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0xa7, 0x84, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0xa0, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0xa7, 0x8d, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0xb0, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0xa7, 0x8c, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0xc0, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0x27, 0x83, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0xd0, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0x27, 0x82, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0xe0, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0xa7, 0x83, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0xf0, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0xa7, 0x82, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x00, 0x01, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0xa7, 0x81, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x10, 0x01, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0x27, 0x81, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x20, 0x01, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0xa7, 0x80, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x30, 0x01, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0x27, 0x80, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x40, 0x01, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0xa7, 0x8f, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x50, 0x01, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0x27, 0x8f, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x60, 0x01, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0xa7, 0x8e, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x70, 0x01, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0x27, 0x8e, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x80, 0x01, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0xa7, 0x88, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x90, 0x01, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0x27, 0x88, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0xa0, 0x01, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0xa7, 0x87, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0xb0, 0x01, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0x27, 0x87, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0xc0, 0x01, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0xa7, 0x86, 0x0f, 0x02
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0xd0, 0x01, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0x27, 0x86, 0x0f, 0x02
|
||||
; fld fa4, 8(sp)
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0xe0, 0x01, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0x27, 0x87, 0x0f, 0x02
|
||||
; fld fa7, 0(sp)
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0xf0, 0x01, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0xa7, 0x88, 0x0f, 0x02
|
||||
; addi sp, sp, 0x70
|
||||
; fld fs0, -8(sp)
|
||||
; fld fs2, -0x10(sp)
|
||||
; fld fs3, -0x18(sp)
|
||||
; fld fs4, -0x20(sp)
|
||||
; fld fs5, -0x28(sp)
|
||||
; fld fs6, -0x30(sp)
|
||||
; fld fs7, -0x38(sp)
|
||||
; fld fs8, -0x40(sp)
|
||||
; fld fs9, -0x48(sp)
|
||||
; fld fs10, -0x50(sp)
|
||||
; fld fs11, -0x58(sp)
|
||||
; ld ra, 8(sp)
|
||||
; ld s0, 0(sp)
|
||||
; addi sp, sp, 0x10
|
||||
; ret
|
||||
|
||||
73
cranelift/filetests/filetests/isa/riscv64/simd-iadd.clif
Normal file
73
cranelift/filetests/filetests/isa/riscv64/simd-iadd.clif
Normal file
@@ -0,0 +1,73 @@
|
||||
test compile precise-output
|
||||
set unwind_info=false
|
||||
target riscv64 has_v
|
||||
|
||||
|
||||
function %iadd_i8x16(i8x16, i8x16) -> i8x16 {
|
||||
block0(v0: i8x16, v1: i8x16):
|
||||
v2 = iadd v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; block0:
|
||||
; vadd.vv v10,v11,v10 #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; .byte 0x57, 0x70, 0x08, 0xcc
|
||||
; .byte 0x57, 0x05, 0xb5, 0x02
|
||||
; ret
|
||||
|
||||
function %iadd_i16x8(i16x8, i16x8) -> i16x8 {
|
||||
block0(v0: i16x8, v1: i16x8):
|
||||
v2 = iadd v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; block0:
|
||||
; vadd.vv v10,v11,v10 #avl=8, #vtype=(e16, m1, ta, ma)
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; .byte 0x57, 0x70, 0x84, 0xcc
|
||||
; .byte 0x57, 0x05, 0xb5, 0x02
|
||||
; ret
|
||||
|
||||
function %iadd_i32x4(i32x4, i32x4) -> i32x4 {
|
||||
block0(v0: i32x4, v1: i32x4):
|
||||
v2 = iadd v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; block0:
|
||||
; vadd.vv v10,v11,v10 #avl=4, #vtype=(e32, m1, ta, ma)
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; .byte 0x57, 0x70, 0x02, 0xcd
|
||||
; .byte 0x57, 0x05, 0xb5, 0x02
|
||||
; ret
|
||||
|
||||
function %iadd_i64x2(i64x2, i64x2) -> i64x2 {
|
||||
block0(v0: i64x2, v1: i64x2):
|
||||
v2 = iadd v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; block0:
|
||||
; vadd.vv v10,v11,v10 #avl=2, #vtype=(e64, m1, ta, ma)
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; .byte 0x57, 0x70, 0x81, 0xcd
|
||||
; .byte 0x57, 0x05, 0xb5, 0x02
|
||||
; ret
|
||||
|
||||
97
cranelift/filetests/filetests/isa/riscv64/simd-loads.clif
Normal file
97
cranelift/filetests/filetests/isa/riscv64/simd-loads.clif
Normal file
@@ -0,0 +1,97 @@
|
||||
test compile precise-output
|
||||
set unwind_info=false
|
||||
target riscv64 has_v
|
||||
|
||||
|
||||
function %load_i8x16(i64) -> i8x16 {
|
||||
block0(v0: i64):
|
||||
v1 = load.i8x16 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; block0:
|
||||
; vle8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; .byte 0x57, 0x70, 0x08, 0xcc
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0x07, 0x85, 0x0f, 0x02
|
||||
; ret
|
||||
|
||||
function %load_i16x8(i64) -> i16x8 {
|
||||
block0(v0: i64):
|
||||
v1 = load.i16x8 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; block0:
|
||||
; vle16.v v10,0(a0) #avl=8, #vtype=(e16, m1, ta, ma)
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; .byte 0x57, 0x70, 0x84, 0xcc
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0x07, 0xd5, 0x0f, 0x02
|
||||
; ret
|
||||
|
||||
function %load_i32x4(i64) -> i32x4 {
|
||||
block0(v0: i64):
|
||||
v1 = load.i32x4 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; block0:
|
||||
; vle32.v v10,0(a0) #avl=4, #vtype=(e32, m1, ta, ma)
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; .byte 0x57, 0x70, 0x02, 0xcd
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0x07, 0xe5, 0x0f, 0x02
|
||||
; ret
|
||||
|
||||
function %load_i64x2(i64) -> i64x2 {
|
||||
block0(v0: i64):
|
||||
v1 = load.i64x2 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; block0:
|
||||
; vle64.v v10,0(a0) #avl=2, #vtype=(e64, m1, ta, ma)
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; .byte 0x57, 0x70, 0x81, 0xcd
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0x07, 0xf5, 0x0f, 0x02
|
||||
; ret
|
||||
|
||||
97
cranelift/filetests/filetests/isa/riscv64/simd-stores.clif
Normal file
97
cranelift/filetests/filetests/isa/riscv64/simd-stores.clif
Normal file
@@ -0,0 +1,97 @@
|
||||
test compile precise-output
|
||||
set unwind_info=false
|
||||
target riscv64 has_v
|
||||
|
||||
|
||||
function %store_i8x16(i64, i8x16) {
|
||||
block0(v0: i64, v1: i8x16):
|
||||
store.i8x16 v1, v0
|
||||
return
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; block0:
|
||||
; vse8.v v10,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; .byte 0x57, 0x70, 0x08, 0xcc
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0x27, 0x85, 0x0f, 0x02
|
||||
; ret
|
||||
|
||||
function %store_i16x8(i64, i16x8) {
|
||||
block0(v0: i64, v1: i16x8):
|
||||
store.i16x8 v1, v0
|
||||
return
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; block0:
|
||||
; vse16.v v10,0(a0) #avl=8, #vtype=(e16, m1, ta, ma)
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; .byte 0x57, 0x70, 0x84, 0xcc
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0x27, 0xd5, 0x0f, 0x02
|
||||
; ret
|
||||
|
||||
function %store_i32x4(i64, i32x4) {
|
||||
block0(v0: i64, v1: i32x4):
|
||||
store.i32x4 v1, v0
|
||||
return
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; block0:
|
||||
; vse32.v v10,0(a0) #avl=4, #vtype=(e32, m1, ta, ma)
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; .byte 0x57, 0x70, 0x02, 0xcd
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0x27, 0xe5, 0x0f, 0x02
|
||||
; ret
|
||||
|
||||
function %store_i64x2(i64, i64x2) {
|
||||
block0(v0: i64, v1: i64x2):
|
||||
store.i64x2 v1, v0
|
||||
return
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; block0:
|
||||
; vse64.v v10,0(a0) #avl=2, #vtype=(e64, m1, ta, ma)
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; .byte 0x57, 0x70, 0x81, 0xcd
|
||||
; auipc t6, 0
|
||||
; ld t6, 0xc(t6)
|
||||
; j 0xc
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; .byte 0x00, 0x00, 0x00, 0x00
|
||||
; add t6, t6, a0
|
||||
; .byte 0x27, 0xf5, 0x0f, 0x02
|
||||
; ret
|
||||
|
||||
68
cranelift/filetests/filetests/isa/riscv64/simd-vstate.clif
Normal file
68
cranelift/filetests/filetests/isa/riscv64/simd-vstate.clif
Normal file
@@ -0,0 +1,68 @@
|
||||
test compile precise-output
|
||||
set unwind_info=false
|
||||
target riscv64 has_v
|
||||
|
||||
;; Interleaves vector operations to ensure that `vsetivli` is emitted
|
||||
function %iadd_multi(i8x16, i16x8) -> i8x16, i16x8 {
|
||||
block0(v0: i8x16, v1: i16x8):
|
||||
v4 = iadd v0, v0
|
||||
v5 = iadd v1, v1
|
||||
v6 = iadd v5, v5
|
||||
return v4, v6
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; block0:
|
||||
; vadd.vv v10,v10,v10 #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; vadd.vv v5,v11,v11 #avl=8, #vtype=(e16, m1, ta, ma)
|
||||
; vadd.vv v11,v5,v5 #avl=8, #vtype=(e16, m1, ta, ma)
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; .byte 0x57, 0x70, 0x08, 0xcc
|
||||
; .byte 0x57, 0x05, 0xa5, 0x02
|
||||
; .byte 0x57, 0x70, 0x84, 0xcc
|
||||
; .byte 0xd7, 0x82, 0xb5, 0x02
|
||||
; .byte 0xd7, 0x85, 0x52, 0x02
|
||||
; ret
|
||||
|
||||
;; When the block changes, we need to reemit the vector state instruction
|
||||
;; Even if vtype is the same.
|
||||
function %(i8x16, i8x16) -> i8x16 {
|
||||
block0(v0: i8x16, v1: i8x16):
|
||||
v2 = iadd v0, v1
|
||||
jump block1(v1, v2)
|
||||
|
||||
block1(v3: i8x16, v4: i8x16):
|
||||
v5 = iadd v3, v4
|
||||
jump block2(v4, v5)
|
||||
|
||||
block2(v6: i8x16, v7: i8x16):
|
||||
v8 = iadd v6, v7
|
||||
return v8
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; block0:
|
||||
; vadd.vv v5,v11,v10 #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; j label1
|
||||
; block1:
|
||||
; vadd.vv v6,v5,v11 #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; j label2
|
||||
; block2:
|
||||
; vadd.vv v10,v6,v5 #avl=16, #vtype=(e8, m1, ta, ma)
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; .byte 0x57, 0x70, 0x08, 0xcc
|
||||
; .byte 0xd7, 0x02, 0xb5, 0x02
|
||||
; block1: ; offset 0x8
|
||||
; .byte 0x57, 0x70, 0x08, 0xcc
|
||||
; .byte 0x57, 0x83, 0x55, 0x02
|
||||
; block2: ; offset 0x10
|
||||
; .byte 0x57, 0x70, 0x08, 0xcc
|
||||
; .byte 0x57, 0x85, 0x62, 0x02
|
||||
; ret
|
||||
|
||||
@@ -6,21 +6,6 @@ set enable_simd
|
||||
target x86_64
|
||||
target x86_64 skylake
|
||||
|
||||
function %iadd_i32x4(i32x4, i32x4) -> i32x4 {
|
||||
block0(v0:i32x4, v1:i32x4):
|
||||
v2 = iadd v0, v1
|
||||
return v2
|
||||
}
|
||||
; run: %iadd_i32x4([1 1 1 1], [1 2 3 4]) == [2 3 4 5]
|
||||
|
||||
function %iadd_i8x16_with_overflow() -> i8x16 {
|
||||
block0:
|
||||
v0 = vconst.i8x16 [255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255]
|
||||
v1 = vconst.i8x16 [2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]
|
||||
v2 = iadd v0, v1
|
||||
return v2
|
||||
}
|
||||
; run: %iadd_i8x16_with_overflow() == [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
|
||||
|
||||
function %isub_i32x4(i32x4, i32x4) -> i32x4 {
|
||||
block0(v0: i32x4, v1: i32x4):
|
||||
|
||||
44
cranelift/filetests/filetests/runtests/simd-iadd.clif
Normal file
44
cranelift/filetests/filetests/runtests/simd-iadd.clif
Normal file
@@ -0,0 +1,44 @@
|
||||
test interpret
|
||||
test run
|
||||
target aarch64
|
||||
target s390x
|
||||
set enable_simd
|
||||
target x86_64
|
||||
target x86_64 skylake
|
||||
target riscv64 has_v
|
||||
|
||||
|
||||
function %iadd_i8x16(i8x16, i8x16) -> i8x16 {
|
||||
block0(v0:i8x16, v1:i8x16):
|
||||
v2 = iadd v0, v1
|
||||
return v2
|
||||
}
|
||||
; run: %iadd_i8x16([1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1], [1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]) == [2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17]
|
||||
; run: %iadd_i8x16([2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2], [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]) == [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
|
||||
|
||||
|
||||
function %iadd_i16x8(i16x8, i16x8) -> i16x8 {
|
||||
block0(v0:i16x8, v1:i16x8):
|
||||
v2 = iadd v0, v1
|
||||
return v2
|
||||
}
|
||||
; run: %iadd_i16x8([1 1 1 1 1 1 1 1], [1 2 3 4 5 6 7 8]) == [2 3 4 5 6 7 8 9]
|
||||
; run: %iadd_i16x8([2 2 2 2 2 2 2 2], [-1 -1 -1 -1 -1 -1 -1 -1]) == [1 1 1 1 1 1 1 1]
|
||||
|
||||
|
||||
function %iadd_i32x4(i32x4, i32x4) -> i32x4 {
|
||||
block0(v0:i32x4, v1:i32x4):
|
||||
v2 = iadd v0, v1
|
||||
return v2
|
||||
}
|
||||
; run: %iadd_i32x4([1 1 1 1], [1 2 3 4]) == [2 3 4 5]
|
||||
; run: %iadd_i32x4([2 2 2 2], [-1 -1 -1 -1]) == [1 1 1 1]
|
||||
|
||||
|
||||
function %iadd_i64x2(i64x2, i64x2) -> i64x2 {
|
||||
block0(v0:i64x2, v1:i64x2):
|
||||
v2 = iadd v0, v1
|
||||
return v2
|
||||
}
|
||||
; run: %iadd_i64x2([1 1], [1 2]) == [2 3]
|
||||
; run: %iadd_i64x2([2 2], [-1 -1]) == [1 1]
|
||||
Reference in New Issue
Block a user