Merge branch 'main' into peepmatic-bnot
This commit is contained in:
2
build.rs
2
build.rs
@@ -202,8 +202,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
|
||||
// to be a big chunk of work to implement them all there!
|
||||
("simd", _) if target.contains("aarch64") => return true,
|
||||
|
||||
("simd", "simd_conversions") => return true, // FIXME Unsupported feature: proposed SIMD operator I32x4TruncSatF32x4S
|
||||
|
||||
// TODO(#1886): Ignore reference types tests if this isn't x64,
|
||||
// because Cranelift only supports reference types on x64.
|
||||
("reference_types", _) => {
|
||||
|
||||
@@ -48,3 +48,4 @@ default = ["disas", "wasm", "cranelift-codegen/all-arch"]
|
||||
disas = ["capstone"]
|
||||
enable-peepmatic = ["cranelift-codegen/enable-peepmatic", "cranelift-filetests/enable-peepmatic"]
|
||||
wasm = ["wat", "cranelift-wasm"]
|
||||
experimental_x64 = ["cranelift-codegen/x64"]
|
||||
|
||||
@@ -66,7 +66,6 @@ x64 = [] # New work-in-progress codegen backend for x86_64 based on the new isel
|
||||
# Option to enable all architectures.
|
||||
all-arch = [
|
||||
"x86",
|
||||
"x64",
|
||||
"arm32",
|
||||
"arm64",
|
||||
"riscv"
|
||||
|
||||
@@ -26,7 +26,15 @@ fn main() {
|
||||
let out_dir = env::var("OUT_DIR").expect("The OUT_DIR environment variable must be set");
|
||||
let target_triple = env::var("TARGET").expect("The TARGET environment variable must be set");
|
||||
|
||||
// Configure isa targets cfg.
|
||||
let new_backend_isas = if env::var("CARGO_FEATURE_X64").is_ok() {
|
||||
// The x64 (new backend for x86_64) is a bit particular: it only requires generating
|
||||
// the shared meta code; the only ISA-specific code is for settings.
|
||||
vec![meta::isa::Isa::X86]
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
// Configure isa targets using the old backend.
|
||||
let isa_targets = meta::isa::Isa::all()
|
||||
.iter()
|
||||
.cloned()
|
||||
@@ -36,7 +44,7 @@ fn main() {
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let isas = if isa_targets.is_empty() {
|
||||
let old_backend_isas = if new_backend_isas.is_empty() && isa_targets.is_empty() {
|
||||
// Try to match native target.
|
||||
let target_name = target_triple.split('-').next().unwrap();
|
||||
let isa = meta::isa_from_arch(&target_name).expect("error when identifying target");
|
||||
@@ -56,14 +64,23 @@ fn main() {
|
||||
crate_dir.join("build.rs").to_str().unwrap()
|
||||
);
|
||||
|
||||
if let Err(err) = meta::generate(&isas, &out_dir) {
|
||||
if let Err(err) = meta::generate(&old_backend_isas, &new_backend_isas, &out_dir) {
|
||||
eprintln!("Error: {}", err);
|
||||
process::exit(1);
|
||||
}
|
||||
|
||||
if env::var("CRANELIFT_VERBOSE").is_ok() {
|
||||
for isa in &isas {
|
||||
println!("cargo:warning=Includes support for {} ISA", isa.to_string());
|
||||
for isa in &old_backend_isas {
|
||||
println!(
|
||||
"cargo:warning=Includes old-backend support for {} ISA",
|
||||
isa.to_string()
|
||||
);
|
||||
}
|
||||
for isa in &new_backend_isas {
|
||||
println!(
|
||||
"cargo:warning=Includes new-backend support for {} ISA",
|
||||
isa.to_string()
|
||||
);
|
||||
}
|
||||
println!(
|
||||
"cargo:warning=Build step took {:?}.",
|
||||
|
||||
@@ -211,6 +211,24 @@ impl TypeVar {
|
||||
"can't double 256 lanes"
|
||||
);
|
||||
}
|
||||
DerivedFunc::MergeLanes => {
|
||||
assert!(
|
||||
ts.ints.is_empty() || *ts.ints.iter().max().unwrap() < MAX_BITS,
|
||||
"can't double all integer types"
|
||||
);
|
||||
assert!(
|
||||
ts.floats.is_empty() || *ts.floats.iter().max().unwrap() < MAX_FLOAT_BITS,
|
||||
"can't double all float types"
|
||||
);
|
||||
assert!(
|
||||
ts.bools.is_empty() || *ts.bools.iter().max().unwrap() < MAX_BITS,
|
||||
"can't double all boolean types"
|
||||
);
|
||||
assert!(
|
||||
*ts.lanes.iter().min().unwrap() > 1,
|
||||
"can't halve a scalar type"
|
||||
);
|
||||
}
|
||||
DerivedFunc::LaneOf | DerivedFunc::AsBool => { /* no particular assertions */ }
|
||||
}
|
||||
|
||||
@@ -248,6 +266,9 @@ impl TypeVar {
|
||||
pub fn split_lanes(&self) -> TypeVar {
|
||||
self.derived(DerivedFunc::SplitLanes)
|
||||
}
|
||||
pub fn merge_lanes(&self) -> TypeVar {
|
||||
self.derived(DerivedFunc::MergeLanes)
|
||||
}
|
||||
|
||||
/// Constrain the range of types this variable can assume to a subset of those in the typeset
|
||||
/// ts.
|
||||
@@ -355,6 +376,7 @@ pub(crate) enum DerivedFunc {
|
||||
HalfVector,
|
||||
DoubleVector,
|
||||
SplitLanes,
|
||||
MergeLanes,
|
||||
}
|
||||
|
||||
impl DerivedFunc {
|
||||
@@ -367,6 +389,7 @@ impl DerivedFunc {
|
||||
DerivedFunc::HalfVector => "half_vector",
|
||||
DerivedFunc::DoubleVector => "double_vector",
|
||||
DerivedFunc::SplitLanes => "split_lanes",
|
||||
DerivedFunc::MergeLanes => "merge_lanes",
|
||||
}
|
||||
}
|
||||
|
||||
@@ -377,6 +400,8 @@ impl DerivedFunc {
|
||||
DerivedFunc::DoubleWidth => Some(DerivedFunc::HalfWidth),
|
||||
DerivedFunc::HalfVector => Some(DerivedFunc::DoubleVector),
|
||||
DerivedFunc::DoubleVector => Some(DerivedFunc::HalfVector),
|
||||
DerivedFunc::MergeLanes => Some(DerivedFunc::SplitLanes),
|
||||
DerivedFunc::SplitLanes => Some(DerivedFunc::MergeLanes),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@@ -462,6 +487,7 @@ impl TypeSet {
|
||||
DerivedFunc::HalfVector => self.half_vector(),
|
||||
DerivedFunc::DoubleVector => self.double_vector(),
|
||||
DerivedFunc::SplitLanes => self.half_width().double_vector(),
|
||||
DerivedFunc::MergeLanes => self.double_width().half_vector(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -601,7 +627,8 @@ impl TypeSet {
|
||||
DerivedFunc::DoubleWidth => self.half_width(),
|
||||
DerivedFunc::HalfVector => self.double_vector(),
|
||||
DerivedFunc::DoubleVector => self.half_vector(),
|
||||
DerivedFunc::SplitLanes => self.half_vector().double_width(),
|
||||
DerivedFunc::SplitLanes => self.double_width().half_vector(),
|
||||
DerivedFunc::MergeLanes => self.half_width().double_vector(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -700,6 +700,7 @@ fn gen_isa(
|
||||
pub(crate) fn generate(
|
||||
isas: &[TargetIsa],
|
||||
transform_groups: &TransformGroups,
|
||||
extra_legalization_groups: &[&'static str],
|
||||
filename_prefix: &str,
|
||||
out_dir: &str,
|
||||
) -> Result<(), error::Error> {
|
||||
@@ -711,8 +712,14 @@ pub(crate) fn generate(
|
||||
fmt.update_file(format!("{}-{}.rs", filename_prefix, isa.name), out_dir)?;
|
||||
}
|
||||
|
||||
// Add extra legalization groups that were explicitly requested.
|
||||
for group in extra_legalization_groups {
|
||||
shared_group_names.insert(group);
|
||||
}
|
||||
|
||||
// Generate shared legalize groups.
|
||||
let mut fmt = Formatter::new();
|
||||
// Generate shared legalize groups.
|
||||
let mut type_sets = UniqueTable::new();
|
||||
let mut sorted_shared_group_names = Vec::from_iter(shared_group_names);
|
||||
sorted_shared_group_names.sort();
|
||||
|
||||
@@ -6,10 +6,10 @@ use std::fmt;
|
||||
mod arm32;
|
||||
mod arm64;
|
||||
mod riscv;
|
||||
mod x86;
|
||||
pub(crate) mod x86;
|
||||
|
||||
/// Represents known ISA target.
|
||||
#[derive(Copy, Clone)]
|
||||
#[derive(PartialEq, Copy, Clone)]
|
||||
pub enum Isa {
|
||||
Riscv,
|
||||
X86,
|
||||
|
||||
@@ -1669,6 +1669,7 @@ fn define_simd(
|
||||
let ssub_sat = shared.by_name("ssub_sat");
|
||||
let store = shared.by_name("store");
|
||||
let store_complex = shared.by_name("store_complex");
|
||||
let swiden_low = shared.by_name("swiden_low");
|
||||
let uadd_sat = shared.by_name("uadd_sat");
|
||||
let uload8x8 = shared.by_name("uload8x8");
|
||||
let uload8x8_complex = shared.by_name("uload8x8_complex");
|
||||
@@ -1678,6 +1679,7 @@ fn define_simd(
|
||||
let uload32x2_complex = shared.by_name("uload32x2_complex");
|
||||
let snarrow = shared.by_name("snarrow");
|
||||
let unarrow = shared.by_name("unarrow");
|
||||
let uwiden_low = shared.by_name("uwiden_low");
|
||||
let ushr_imm = shared.by_name("ushr_imm");
|
||||
let usub_sat = shared.by_name("usub_sat");
|
||||
let vconst = shared.by_name("vconst");
|
||||
@@ -1697,6 +1699,7 @@ fn define_simd(
|
||||
let x86_pminu = x86.by_name("x86_pminu");
|
||||
let x86_pmullq = x86.by_name("x86_pmullq");
|
||||
let x86_pmuludq = x86.by_name("x86_pmuludq");
|
||||
let x86_palignr = x86.by_name("x86_palignr");
|
||||
let x86_pshufb = x86.by_name("x86_pshufb");
|
||||
let x86_pshufd = x86.by_name("x86_pshufd");
|
||||
let x86_psll = x86.by_name("x86_psll");
|
||||
@@ -1901,6 +1904,8 @@ fn define_simd(
|
||||
rec_fa.opcodes(low),
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD narrow/widen
|
||||
for (ty, opcodes) in &[(I16, &PACKSSWB), (I32, &PACKSSDW)] {
|
||||
let snarrow = snarrow.bind(vector(*ty, sse_vector_size));
|
||||
e.enc_both_inferred(snarrow, rec_fa.opcodes(*opcodes));
|
||||
@@ -1912,6 +1917,23 @@ fn define_simd(
|
||||
let unarrow = unarrow.bind(vector(*ty, sse_vector_size));
|
||||
e.enc_both_inferred_maybe_isap(unarrow, rec_fa.opcodes(*opcodes), *isap);
|
||||
}
|
||||
for (ty, swiden_opcode, uwiden_opcode) in &[
|
||||
(I8, &PMOVSXBW[..], &PMOVZXBW[..]),
|
||||
(I16, &PMOVSXWD[..], &PMOVZXWD[..]),
|
||||
] {
|
||||
let isap = Some(use_sse41_simd);
|
||||
let swiden_low = swiden_low.bind(vector(*ty, sse_vector_size));
|
||||
e.enc_both_inferred_maybe_isap(swiden_low, rec_furm.opcodes(*swiden_opcode), isap);
|
||||
let uwiden_low = uwiden_low.bind(vector(*ty, sse_vector_size));
|
||||
e.enc_both_inferred_maybe_isap(uwiden_low, rec_furm.opcodes(*uwiden_opcode), isap);
|
||||
}
|
||||
for ty in &[I8, I16, I32, I64] {
|
||||
e.enc_both_inferred_maybe_isap(
|
||||
x86_palignr.bind(vector(*ty, sse_vector_size)),
|
||||
rec_fa_ib.opcodes(&PALIGNR[..]),
|
||||
Some(use_ssse3_simd),
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8).
|
||||
for from_type in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||
|
||||
@@ -664,6 +664,21 @@ pub(crate) fn define(
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let c = &Operand::new("c", uimm8)
|
||||
.with_doc("The number of bytes to shift right; see PALIGNR in Intel manual for details");
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_palignr",
|
||||
r#"
|
||||
Concatenate destination and source operands, extracting a byte-aligned result shifted to
|
||||
the right by `c`.
|
||||
"#,
|
||||
&formats.ternary_imm8,
|
||||
)
|
||||
.operands_in(vec![x, y, c])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let i64_t = &TypeVar::new(
|
||||
"i64_t",
|
||||
"A scalar 64bit integer",
|
||||
|
||||
@@ -407,13 +407,18 @@ fn define_simd(
|
||||
let umax = insts.by_name("umax");
|
||||
let umin = insts.by_name("umin");
|
||||
let snarrow = insts.by_name("snarrow");
|
||||
let swiden_high = insts.by_name("swiden_high");
|
||||
let swiden_low = insts.by_name("swiden_low");
|
||||
let ushr_imm = insts.by_name("ushr_imm");
|
||||
let ushr = insts.by_name("ushr");
|
||||
let uwiden_high = insts.by_name("uwiden_high");
|
||||
let uwiden_low = insts.by_name("uwiden_low");
|
||||
let vconst = insts.by_name("vconst");
|
||||
let vall_true = insts.by_name("vall_true");
|
||||
let vany_true = insts.by_name("vany_true");
|
||||
let vselect = insts.by_name("vselect");
|
||||
|
||||
let x86_palignr = x86_instructions.by_name("x86_palignr");
|
||||
let x86_pmaxs = x86_instructions.by_name("x86_pmaxs");
|
||||
let x86_pmaxu = x86_instructions.by_name("x86_pmaxu");
|
||||
let x86_pmins = x86_instructions.by_name("x86_pmins");
|
||||
@@ -786,6 +791,26 @@ fn define_simd(
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD widen
|
||||
for ty in &[I8, I16] {
|
||||
let swiden_high = swiden_high.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(b = swiden_high(a)),
|
||||
vec![
|
||||
def!(c = x86_palignr(a, a, uimm8_eight)),
|
||||
def!(b = swiden_low(c)),
|
||||
],
|
||||
);
|
||||
let uwiden_high = uwiden_high.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(b = uwiden_high(a)),
|
||||
vec![
|
||||
def!(c = x86_palignr(a, a, uimm8_eight)),
|
||||
def!(b = uwiden_low(c)),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
narrow.custom_legalize(shuffle, "convert_shuffle");
|
||||
narrow.custom_legalize(extractlane, "convert_extractlane");
|
||||
narrow.custom_legalize(insertlane, "convert_insertlane");
|
||||
|
||||
@@ -14,7 +14,7 @@ mod legalize;
|
||||
mod opcodes;
|
||||
mod recipes;
|
||||
mod registers;
|
||||
mod settings;
|
||||
pub(crate) mod settings;
|
||||
|
||||
pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
|
||||
let settings = settings::define(&shared_defs.settings);
|
||||
|
||||
@@ -354,6 +354,10 @@ pub static PADDUSB: [u8; 3] = [0x66, 0x0f, 0xdc];
|
||||
/// Add packed unsigned word integers from xmm2/m128 and xmm1 saturate the results (SSE).
|
||||
pub static PADDUSW: [u8; 3] = [0x66, 0x0f, 0xdd];
|
||||
|
||||
/// Concatenate destination and source operands, extract a byte-aligned result into xmm1 that is
|
||||
/// shifted to the right by the constant number of bytes in imm8 (SSSE3).
|
||||
pub static PALIGNR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0f];
|
||||
|
||||
/// Bitwise AND of xmm2/m128 and xmm1 (SSE2).
|
||||
pub static PAND: [u8; 3] = [0x66, 0x0f, 0xdb];
|
||||
|
||||
@@ -473,7 +477,7 @@ pub static PMOVSXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x20];
|
||||
pub static PMOVSXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x23];
|
||||
|
||||
/// Sign extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit
|
||||
/// integers in xmm1.
|
||||
/// integers in xmm1 (SSE4.1).
|
||||
pub static PMOVSXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x25];
|
||||
|
||||
/// Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit
|
||||
@@ -485,7 +489,7 @@ pub static PMOVZXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x30];
|
||||
pub static PMOVZXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x33];
|
||||
|
||||
/// Zero extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit
|
||||
/// integers in xmm1.
|
||||
/// integers in xmm1 (SSE4.1).
|
||||
pub static PMOVZXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x35];
|
||||
|
||||
/// Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of
|
||||
|
||||
@@ -3,12 +3,6 @@ use crate::cdsl::settings::{PredicateNode, SettingGroup, SettingGroupBuilder};
|
||||
pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
|
||||
let mut settings = SettingGroupBuilder::new("x86");
|
||||
|
||||
settings.add_bool(
|
||||
"use_new_backend",
|
||||
"Whether to use the new codegen backend using the new isel",
|
||||
false,
|
||||
);
|
||||
|
||||
// CPUID.01H:ECX
|
||||
let has_sse3 = settings.add_bool("has_sse3", "SSE3: CPUID.01H:ECX.SSE3[bit 0]", false);
|
||||
let has_ssse3 = settings.add_bool("has_ssse3", "SSSE3: CPUID.01H:ECX.SSSE3[bit 9]", false);
|
||||
|
||||
@@ -25,7 +25,11 @@ pub fn isa_from_arch(arch: &str) -> Result<isa::Isa, String> {
|
||||
}
|
||||
|
||||
/// Generates all the Rust source files used in Cranelift from the meta-language.
|
||||
pub fn generate(isas: &[isa::Isa], out_dir: &str) -> Result<(), error::Error> {
|
||||
pub fn generate(
|
||||
old_backend_isas: &[isa::Isa],
|
||||
new_backend_isas: &[isa::Isa],
|
||||
out_dir: &str,
|
||||
) -> Result<(), error::Error> {
|
||||
// Create all the definitions:
|
||||
// - common definitions.
|
||||
let mut shared_defs = shared::define();
|
||||
@@ -39,7 +43,7 @@ pub fn generate(isas: &[isa::Isa], out_dir: &str) -> Result<(), error::Error> {
|
||||
gen_types::generate("types.rs", &out_dir)?;
|
||||
|
||||
// - per ISA definitions.
|
||||
let isas = isa::define(isas, &mut shared_defs);
|
||||
let target_isas = isa::define(old_backend_isas, &mut shared_defs);
|
||||
|
||||
// At this point, all definitions are done.
|
||||
let all_formats = shared_defs.verify_instruction_formats();
|
||||
@@ -53,9 +57,22 @@ pub fn generate(isas: &[isa::Isa], out_dir: &str) -> Result<(), error::Error> {
|
||||
&out_dir,
|
||||
)?;
|
||||
|
||||
gen_legalizer::generate(&isas, &shared_defs.transform_groups, "legalize", &out_dir)?;
|
||||
let extra_legalization_groups: &[&'static str] = if !new_backend_isas.is_empty() {
|
||||
// The new backend only requires the "expand" legalization group.
|
||||
&["expand"]
|
||||
} else {
|
||||
&[]
|
||||
};
|
||||
|
||||
for isa in isas {
|
||||
gen_legalizer::generate(
|
||||
&target_isas,
|
||||
&shared_defs.transform_groups,
|
||||
extra_legalization_groups,
|
||||
"legalize",
|
||||
&out_dir,
|
||||
)?;
|
||||
|
||||
for isa in target_isas {
|
||||
gen_registers::generate(&isa, &format!("registers-{}.rs", isa.name), &out_dir)?;
|
||||
|
||||
gen_settings::generate(
|
||||
@@ -80,5 +97,28 @@ pub fn generate(isas: &[isa::Isa], out_dir: &str) -> Result<(), error::Error> {
|
||||
)?;
|
||||
}
|
||||
|
||||
for isa in new_backend_isas {
|
||||
match isa {
|
||||
isa::Isa::X86 => {
|
||||
// If the old backend ISAs contained x86, this file has already been generated.
|
||||
if old_backend_isas.iter().any(|isa| *isa == isa::Isa::X86) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let settings = crate::isa::x86::settings::define(&shared_defs.settings);
|
||||
gen_settings::generate(
|
||||
&settings,
|
||||
gen_settings::ParentGroup::Shared,
|
||||
"settings-x86.rs",
|
||||
&out_dir,
|
||||
)?;
|
||||
}
|
||||
isa::Isa::Arm64 => {
|
||||
// aarch64 doesn't have platform-specific settings.
|
||||
}
|
||||
isa::Isa::Arm32 | isa::Isa::Riscv => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -3883,9 +3883,9 @@ pub(crate) fn define(
|
||||
.constraints(vec![WiderOrEq(Int.clone(), IntTo.clone())]),
|
||||
);
|
||||
|
||||
let I16xN = &TypeVar::new(
|
||||
"I16xN",
|
||||
"A SIMD vector type containing integers 16-bits wide and up",
|
||||
let I16or32xN = &TypeVar::new(
|
||||
"I16or32xN",
|
||||
"A SIMD vector type containing integer lanes 16 or 32 bits wide",
|
||||
TypeSetBuilder::new()
|
||||
.ints(16..32)
|
||||
.simd_lanes(4..8)
|
||||
@@ -3893,9 +3893,9 @@ pub(crate) fn define(
|
||||
.build(),
|
||||
);
|
||||
|
||||
let x = &Operand::new("x", I16xN);
|
||||
let y = &Operand::new("y", I16xN);
|
||||
let a = &Operand::new("a", &I16xN.split_lanes());
|
||||
let x = &Operand::new("x", I16or32xN);
|
||||
let y = &Operand::new("y", I16or32xN);
|
||||
let a = &Operand::new("a", &I16or32xN.split_lanes());
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
@@ -3934,6 +3934,75 @@ pub(crate) fn define(
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let I8or16xN = &TypeVar::new(
|
||||
"I8or16xN",
|
||||
"A SIMD vector type containing integer lanes 8 or 16 bits wide.",
|
||||
TypeSetBuilder::new()
|
||||
.ints(8..16)
|
||||
.simd_lanes(8..16)
|
||||
.includes_scalars(false)
|
||||
.build(),
|
||||
);
|
||||
|
||||
let x = &Operand::new("x", I8or16xN);
|
||||
let a = &Operand::new("a", &I8or16xN.merge_lanes());
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"swiden_low",
|
||||
r#"
|
||||
Widen the low lanes of `x` using signed extension.
|
||||
|
||||
This will double the lane width and halve the number of lanes.
|
||||
"#,
|
||||
&formats.unary,
|
||||
)
|
||||
.operands_in(vec![x])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"swiden_high",
|
||||
r#"
|
||||
Widen the high lanes of `x` using signed extension.
|
||||
|
||||
This will double the lane width and halve the number of lanes.
|
||||
"#,
|
||||
&formats.unary,
|
||||
)
|
||||
.operands_in(vec![x])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"uwiden_low",
|
||||
r#"
|
||||
Widen the low lanes of `x` using unsigned extension.
|
||||
|
||||
This will double the lane width and halve the number of lanes.
|
||||
"#,
|
||||
&formats.unary,
|
||||
)
|
||||
.operands_in(vec![x])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"uwiden_high",
|
||||
r#"
|
||||
Widen the high lanes of `x` using unsigned extension.
|
||||
|
||||
This will double the lane width and halve the number of lanes.
|
||||
"#,
|
||||
&formats.unary,
|
||||
)
|
||||
.operands_in(vec![x])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let IntTo = &TypeVar::new(
|
||||
"IntTo",
|
||||
"A larger integer type with the same number of lanes",
|
||||
|
||||
@@ -584,6 +584,9 @@ enum OperandConstraint {
|
||||
|
||||
/// This operand is `ctrlType.split_lanes()`.
|
||||
SplitLanes,
|
||||
|
||||
/// This operand is `ctrlType.merge_lanes()`.
|
||||
MergeLanes,
|
||||
}
|
||||
|
||||
impl OperandConstraint {
|
||||
@@ -615,6 +618,11 @@ impl OperandConstraint {
|
||||
.split_lanes()
|
||||
.expect("invalid type for split_lanes"),
|
||||
),
|
||||
MergeLanes => Bound(
|
||||
ctrl_type
|
||||
.merge_lanes()
|
||||
.expect("invalid type for merge_lanes"),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -284,7 +284,7 @@ impl Type {
|
||||
|
||||
/// Split the lane width in half and double the number of lanes to maintain the same bit-width.
|
||||
///
|
||||
/// If this is a scalar type of n bits, it produces a SIMD vector type of (n/2)x2.
|
||||
/// If this is a scalar type of `n` bits, it produces a SIMD vector type of `(n/2)x2`.
|
||||
pub fn split_lanes(self) -> Option<Self> {
|
||||
match self.half_width() {
|
||||
Some(half_width) => half_width.by(2),
|
||||
@@ -292,6 +292,17 @@ impl Type {
|
||||
}
|
||||
}
|
||||
|
||||
/// Merge lanes to half the number of lanes and double the lane width to maintain the same
|
||||
/// bit-width.
|
||||
///
|
||||
/// If this is a scalar type, it will return `None`.
|
||||
pub fn merge_lanes(self) -> Option<Self> {
|
||||
match self.double_width() {
|
||||
Some(double_width) => double_width.half_vector(),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Index of this type, for use with hash tables etc.
|
||||
pub fn index(self) -> usize {
|
||||
usize::from(self.0)
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
// Some variants are never constructed, but we still want them as options in the future.
|
||||
#![allow(dead_code)]
|
||||
|
||||
use crate::ir::types::{F32X2, F32X4, F64X2, I16X4, I16X8, I32X2, I32X4, I64X2, I8X16, I8X8};
|
||||
use crate::ir::Type;
|
||||
use crate::isa::aarch64::inst::*;
|
||||
use crate::isa::aarch64::lower::ty_bits;
|
||||
@@ -587,3 +588,55 @@ impl ScalarSize {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Type used to communicate the size of a vector operand.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum VectorSize {
|
||||
Size8x8,
|
||||
Size8x16,
|
||||
Size16x4,
|
||||
Size16x8,
|
||||
Size32x2,
|
||||
Size32x4,
|
||||
Size64x2,
|
||||
}
|
||||
|
||||
impl VectorSize {
|
||||
/// Convert from a type into a vector operand size.
|
||||
pub fn from_ty(ty: Type) -> VectorSize {
|
||||
match ty {
|
||||
F32X2 => VectorSize::Size32x2,
|
||||
F32X4 => VectorSize::Size32x4,
|
||||
F64X2 => VectorSize::Size64x2,
|
||||
I8X8 => VectorSize::Size8x8,
|
||||
I8X16 => VectorSize::Size8x16,
|
||||
I16X4 => VectorSize::Size16x4,
|
||||
I16X8 => VectorSize::Size16x8,
|
||||
I32X2 => VectorSize::Size32x2,
|
||||
I32X4 => VectorSize::Size32x4,
|
||||
I64X2 => VectorSize::Size64x2,
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the integer operand size that corresponds to a lane of a vector with a certain size.
|
||||
pub fn operand_size(&self) -> OperandSize {
|
||||
match self {
|
||||
VectorSize::Size64x2 => OperandSize::Size64,
|
||||
_ => OperandSize::Size32,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the scalar operand size that corresponds to a lane of a vector with a certain size.
|
||||
pub fn lane_size(&self) -> ScalarSize {
|
||||
match self {
|
||||
VectorSize::Size8x8 => ScalarSize::Size8,
|
||||
VectorSize::Size8x16 => ScalarSize::Size8,
|
||||
VectorSize::Size16x4 => ScalarSize::Size16,
|
||||
VectorSize::Size16x8 => ScalarSize::Size16,
|
||||
VectorSize::Size32x2 => ScalarSize::Size32,
|
||||
VectorSize::Size32x4 => ScalarSize::Size32,
|
||||
VectorSize::Size64x2 => ScalarSize::Size64,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1007,7 +1007,7 @@ impl MachInstEmit for Inst {
|
||||
sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
|
||||
}
|
||||
&Inst::FpuMoveFromVec { rd, rn, idx, size } => {
|
||||
let (imm5, shift, mask) = match size {
|
||||
let (imm5, shift, mask) = match size.lane_size() {
|
||||
ScalarSize::Size32 => (0b00100, 3, 0b011),
|
||||
ScalarSize::Size64 => (0b01000, 4, 0b001),
|
||||
_ => unimplemented!(),
|
||||
@@ -1048,6 +1048,10 @@ impl MachInstEmit for Inst {
|
||||
FPUOp2::Max64 => 0b000_11110_01_1_00000_010010,
|
||||
FPUOp2::Min32 => 0b000_11110_00_1_00000_010110,
|
||||
FPUOp2::Min64 => 0b000_11110_01_1_00000_010110,
|
||||
FPUOp2::Sqadd64 => 0b010_11110_11_1_00000_000011,
|
||||
FPUOp2::Uqadd64 => 0b011_11110_11_1_00000_000011,
|
||||
FPUOp2::Sqsub64 => 0b010_11110_11_1_00000_001011,
|
||||
FPUOp2::Uqsub64 => 0b011_11110_11_1_00000_001011,
|
||||
};
|
||||
sink.put4(enc_fpurrr(top22, rd, rn, rm));
|
||||
}
|
||||
@@ -1102,31 +1106,25 @@ impl MachInstEmit for Inst {
|
||||
};
|
||||
sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
|
||||
}
|
||||
&Inst::VecMisc { op, rd, rn, ty } => {
|
||||
let enc_size = match ty {
|
||||
I8X16 => 0b00,
|
||||
I16X8 => 0b01,
|
||||
I32X4 => 0b10,
|
||||
I64X2 => 0b11,
|
||||
_ => 0,
|
||||
&Inst::VecMisc { op, rd, rn, size } => {
|
||||
let enc_size = match size {
|
||||
VectorSize::Size8x16 => 0b00,
|
||||
VectorSize::Size16x8 => 0b01,
|
||||
VectorSize::Size32x4 => 0b10,
|
||||
VectorSize::Size64x2 => 0b11,
|
||||
_ => unimplemented!(),
|
||||
};
|
||||
let (bits_12_16, size) = match op {
|
||||
VecMisc2::Not => {
|
||||
debug_assert_eq!(128, ty_bits(ty));
|
||||
(0b00101, 0b00)
|
||||
}
|
||||
VecMisc2::Neg => {
|
||||
debug_assert_eq!(128, ty_bits(ty));
|
||||
(0b01011, enc_size)
|
||||
}
|
||||
VecMisc2::Not => (0b00101, 0b00),
|
||||
VecMisc2::Neg => (0b01011, enc_size),
|
||||
};
|
||||
sink.put4(enc_vec_rr_misc(size, bits_12_16, rd, rn));
|
||||
}
|
||||
&Inst::VecLanes { op, rd, rn, ty } => {
|
||||
let (q, size) = match ty {
|
||||
I8X16 => (0b1, 0b00),
|
||||
I16X8 => (0b1, 0b01),
|
||||
I32X4 => (0b1, 0b10),
|
||||
&Inst::VecLanes { op, rd, rn, size } => {
|
||||
let (q, size) = match size {
|
||||
VectorSize::Size8x16 => (0b1, 0b00),
|
||||
VectorSize::Size16x8 => (0b1, 0b01),
|
||||
VectorSize::Size32x4 => (0b1, 0b10),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let (u, opcode) = match op {
|
||||
@@ -1250,12 +1248,12 @@ impl MachInstEmit for Inst {
|
||||
| machreg_to_vec(rd.to_reg()),
|
||||
);
|
||||
}
|
||||
&Inst::MovFromVec { rd, rn, idx, ty } => {
|
||||
let (q, imm5, shift, mask) = match ty {
|
||||
I8 => (0b0, 0b00001, 1, 0b1111),
|
||||
I16 => (0b0, 0b00010, 2, 0b0111),
|
||||
I32 => (0b0, 0b00100, 3, 0b0011),
|
||||
I64 => (0b1, 0b01000, 4, 0b0001),
|
||||
&Inst::MovFromVec { rd, rn, idx, size } => {
|
||||
let (q, imm5, shift, mask) = match size {
|
||||
VectorSize::Size8x16 => (0b0, 0b00001, 1, 0b1111),
|
||||
VectorSize::Size16x8 => (0b0, 0b00010, 2, 0b0111),
|
||||
VectorSize::Size32x4 => (0b0, 0b00100, 3, 0b0011),
|
||||
VectorSize::Size64x2 => (0b1, 0b01000, 4, 0b0001),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
debug_assert_eq!(idx & mask, idx);
|
||||
@@ -1268,12 +1266,12 @@ impl MachInstEmit for Inst {
|
||||
| machreg_to_gpr(rd.to_reg()),
|
||||
);
|
||||
}
|
||||
&Inst::VecDup { rd, rn, ty } => {
|
||||
let imm5 = match ty {
|
||||
I8 => 0b00001,
|
||||
I16 => 0b00010,
|
||||
I32 => 0b00100,
|
||||
I64 => 0b01000,
|
||||
&Inst::VecDup { rd, rn, size } => {
|
||||
let imm5 = match size {
|
||||
VectorSize::Size8x16 => 0b00001,
|
||||
VectorSize::Size16x8 => 0b00010,
|
||||
VectorSize::Size32x4 => 0b00100,
|
||||
VectorSize::Size64x2 => 0b01000,
|
||||
_ => unimplemented!(),
|
||||
};
|
||||
sink.put4(
|
||||
@@ -1283,10 +1281,10 @@ impl MachInstEmit for Inst {
|
||||
| machreg_to_vec(rd.to_reg()),
|
||||
);
|
||||
}
|
||||
&Inst::VecDupFromFpu { rd, rn, ty } => {
|
||||
let imm5 = match ty {
|
||||
F32 => 0b00100,
|
||||
F64 => 0b01000,
|
||||
&Inst::VecDupFromFpu { rd, rn, size } => {
|
||||
let imm5 = match size {
|
||||
VectorSize::Size32x4 => 0b00100,
|
||||
VectorSize::Size64x2 => 0b01000,
|
||||
_ => unimplemented!(),
|
||||
};
|
||||
sink.put4(
|
||||
@@ -1318,41 +1316,25 @@ impl MachInstEmit for Inst {
|
||||
rn,
|
||||
rm,
|
||||
alu_op,
|
||||
ty,
|
||||
size,
|
||||
} => {
|
||||
let enc_size = match ty {
|
||||
I8X16 => 0b00,
|
||||
I16X8 => 0b01,
|
||||
I32X4 => 0b10,
|
||||
I64X2 => 0b11,
|
||||
let enc_size = match size {
|
||||
VectorSize::Size8x16 => 0b00,
|
||||
VectorSize::Size16x8 => 0b01,
|
||||
VectorSize::Size32x4 => 0b10,
|
||||
VectorSize::Size64x2 => 0b11,
|
||||
_ => 0,
|
||||
};
|
||||
let enc_size_for_fcmp = match ty {
|
||||
F32X4 => 0b0,
|
||||
F64X2 => 0b1,
|
||||
let enc_size_for_fcmp = match size {
|
||||
VectorSize::Size32x4 => 0b0,
|
||||
VectorSize::Size64x2 => 0b1,
|
||||
_ => 0,
|
||||
};
|
||||
|
||||
let (top11, bit15_10) = match alu_op {
|
||||
VecALUOp::SQAddScalar => {
|
||||
debug_assert_eq!(I64, ty);
|
||||
(0b010_11110_11_1, 0b000011)
|
||||
}
|
||||
VecALUOp::Sqadd => (0b010_01110_00_1 | enc_size << 1, 0b000011),
|
||||
VecALUOp::SQSubScalar => {
|
||||
debug_assert_eq!(I64, ty);
|
||||
(0b010_11110_11_1, 0b001011)
|
||||
}
|
||||
VecALUOp::Sqsub => (0b010_01110_00_1 | enc_size << 1, 0b001011),
|
||||
VecALUOp::UQAddScalar => {
|
||||
debug_assert_eq!(I64, ty);
|
||||
(0b011_11110_11_1, 0b000011)
|
||||
}
|
||||
VecALUOp::Uqadd => (0b011_01110_00_1 | enc_size << 1, 0b000011),
|
||||
VecALUOp::UQSubScalar => {
|
||||
debug_assert_eq!(I64, ty);
|
||||
(0b011_11110_11_1, 0b001011)
|
||||
}
|
||||
VecALUOp::Uqsub => (0b011_01110_00_1 | enc_size << 1, 0b001011),
|
||||
VecALUOp::Cmeq => (0b011_01110_00_1 | enc_size << 1, 0b100011),
|
||||
VecALUOp::Cmge => (0b010_01110_00_1 | enc_size << 1, 0b001111),
|
||||
@@ -1364,31 +1346,16 @@ impl MachInstEmit for Inst {
|
||||
VecALUOp::Fcmge => (0b011_01110_00_1 | enc_size_for_fcmp << 1, 0b111001),
|
||||
// The following logical instructions operate on bytes, so are not encoded differently
|
||||
// for the different vector types.
|
||||
VecALUOp::And => {
|
||||
debug_assert_eq!(128, ty_bits(ty));
|
||||
(0b010_01110_00_1, 0b000111)
|
||||
}
|
||||
VecALUOp::Bic => {
|
||||
debug_assert_eq!(128, ty_bits(ty));
|
||||
(0b010_01110_01_1, 0b000111)
|
||||
}
|
||||
VecALUOp::Orr => {
|
||||
debug_assert_eq!(128, ty_bits(ty));
|
||||
(0b010_01110_10_1, 0b000111)
|
||||
}
|
||||
VecALUOp::Eor => {
|
||||
debug_assert_eq!(128, ty_bits(ty));
|
||||
(0b011_01110_00_1, 0b000111)
|
||||
}
|
||||
VecALUOp::Bsl => {
|
||||
debug_assert_eq!(128, ty_bits(ty));
|
||||
(0b011_01110_01_1, 0b000111)
|
||||
}
|
||||
VecALUOp::And => (0b010_01110_00_1, 0b000111),
|
||||
VecALUOp::Bic => (0b010_01110_01_1, 0b000111),
|
||||
VecALUOp::Orr => (0b010_01110_10_1, 0b000111),
|
||||
VecALUOp::Eor => (0b011_01110_00_1, 0b000111),
|
||||
VecALUOp::Bsl => (0b011_01110_01_1, 0b000111),
|
||||
VecALUOp::Umaxp => (0b011_01110_00_1 | enc_size << 1, 0b101001),
|
||||
VecALUOp::Add => (0b010_01110_00_1 | enc_size << 1, 0b100001),
|
||||
VecALUOp::Sub => (0b011_01110_00_1 | enc_size << 1, 0b100001),
|
||||
VecALUOp::Mul => {
|
||||
debug_assert_ne!(I64X2, ty);
|
||||
debug_assert_ne!(size, VectorSize::Size64x2);
|
||||
(0b010_01110_00_1 | enc_size << 1, 0b100111)
|
||||
}
|
||||
VecALUOp::Sshl => (0b010_01110_00_1 | enc_size << 1, 0b010001),
|
||||
|
||||
@@ -1841,7 +1841,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_xreg(3),
|
||||
rn: vreg(27),
|
||||
idx: 14,
|
||||
ty: I8,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"633F1D0E",
|
||||
"umov w3, v27.b[14]",
|
||||
@@ -1851,7 +1851,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_xreg(24),
|
||||
rn: vreg(5),
|
||||
idx: 3,
|
||||
ty: I16,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"B83C0E0E",
|
||||
"umov w24, v5.h[3]",
|
||||
@@ -1861,7 +1861,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_xreg(12),
|
||||
rn: vreg(17),
|
||||
idx: 1,
|
||||
ty: I32,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"2C3E0C0E",
|
||||
"mov w12, v17.s[1]",
|
||||
@@ -1871,7 +1871,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_xreg(21),
|
||||
rn: vreg(20),
|
||||
idx: 0,
|
||||
ty: I64,
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"953E084E",
|
||||
"mov x21, v20.d[0]",
|
||||
@@ -1900,7 +1900,7 @@ fn test_aarch64_binemit() {
|
||||
Inst::VecDup {
|
||||
rd: writable_vreg(25),
|
||||
rn: xreg(7),
|
||||
ty: I8,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"F90C014E",
|
||||
"dup v25.16b, w7",
|
||||
@@ -1909,7 +1909,7 @@ fn test_aarch64_binemit() {
|
||||
Inst::VecDup {
|
||||
rd: writable_vreg(2),
|
||||
rn: xreg(23),
|
||||
ty: I16,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"E20E024E",
|
||||
"dup v2.8h, w23",
|
||||
@@ -1918,7 +1918,7 @@ fn test_aarch64_binemit() {
|
||||
Inst::VecDup {
|
||||
rd: writable_vreg(0),
|
||||
rn: xreg(28),
|
||||
ty: I32,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"800F044E",
|
||||
"dup v0.4s, w28",
|
||||
@@ -1927,7 +1927,7 @@ fn test_aarch64_binemit() {
|
||||
Inst::VecDup {
|
||||
rd: writable_vreg(31),
|
||||
rn: xreg(5),
|
||||
ty: I64,
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"BF0C084E",
|
||||
"dup v31.2d, x5",
|
||||
@@ -1936,7 +1936,7 @@ fn test_aarch64_binemit() {
|
||||
Inst::VecDupFromFpu {
|
||||
rd: writable_vreg(14),
|
||||
rn: vreg(19),
|
||||
ty: F32,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"6E06044E",
|
||||
"dup v14.4s, v19.s[0]",
|
||||
@@ -1945,7 +1945,7 @@ fn test_aarch64_binemit() {
|
||||
Inst::VecDupFromFpu {
|
||||
rd: writable_vreg(18),
|
||||
rn: vreg(10),
|
||||
ty: F64,
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"5205084E",
|
||||
"dup v18.2d, v10.d[0]",
|
||||
@@ -2004,50 +2004,6 @@ fn test_aarch64_binemit() {
|
||||
"5CA4202F",
|
||||
"uxtl v28.2d, v2.2s",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
rd: writable_vreg(21),
|
||||
rn: vreg(22),
|
||||
rm: vreg(23),
|
||||
alu_op: VecALUOp::UQAddScalar,
|
||||
ty: I64,
|
||||
},
|
||||
"D50EF77E",
|
||||
"uqadd d21, d22, d23",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
rd: writable_vreg(21),
|
||||
rn: vreg(22),
|
||||
rm: vreg(23),
|
||||
alu_op: VecALUOp::SQAddScalar,
|
||||
ty: I64,
|
||||
},
|
||||
"D50EF75E",
|
||||
"sqadd d21, d22, d23",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
rd: writable_vreg(21),
|
||||
rn: vreg(22),
|
||||
rm: vreg(23),
|
||||
alu_op: VecALUOp::UQSubScalar,
|
||||
ty: I64,
|
||||
},
|
||||
"D52EF77E",
|
||||
"uqsub d21, d22, d23",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
rd: writable_vreg(21),
|
||||
rn: vreg(22),
|
||||
rm: vreg(23),
|
||||
alu_op: VecALUOp::SQSubScalar,
|
||||
ty: I64,
|
||||
},
|
||||
"D52EF75E",
|
||||
"sqsub d21, d22, d23",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
@@ -2055,7 +2011,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(1),
|
||||
rn: vreg(2),
|
||||
rm: vreg(8),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"410C284E",
|
||||
"sqadd v1.16b, v2.16b, v8.16b",
|
||||
@@ -2067,7 +2023,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(1),
|
||||
rn: vreg(12),
|
||||
rm: vreg(28),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"810D7C4E",
|
||||
"sqadd v1.8h, v12.8h, v28.8h",
|
||||
@@ -2079,7 +2035,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(12),
|
||||
rn: vreg(2),
|
||||
rm: vreg(6),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"4C0CA64E",
|
||||
"sqadd v12.4s, v2.4s, v6.4s",
|
||||
@@ -2091,7 +2047,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(20),
|
||||
rn: vreg(7),
|
||||
rm: vreg(13),
|
||||
ty: I64X2,
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"F40CED4E",
|
||||
"sqadd v20.2d, v7.2d, v13.2d",
|
||||
@@ -2103,7 +2059,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(1),
|
||||
rn: vreg(2),
|
||||
rm: vreg(8),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"412C284E",
|
||||
"sqsub v1.16b, v2.16b, v8.16b",
|
||||
@@ -2115,7 +2071,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(1),
|
||||
rn: vreg(12),
|
||||
rm: vreg(28),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"812D7C4E",
|
||||
"sqsub v1.8h, v12.8h, v28.8h",
|
||||
@@ -2127,7 +2083,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(12),
|
||||
rn: vreg(2),
|
||||
rm: vreg(6),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"4C2CA64E",
|
||||
"sqsub v12.4s, v2.4s, v6.4s",
|
||||
@@ -2139,7 +2095,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(20),
|
||||
rn: vreg(7),
|
||||
rm: vreg(13),
|
||||
ty: I64X2,
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"F42CED4E",
|
||||
"sqsub v20.2d, v7.2d, v13.2d",
|
||||
@@ -2151,7 +2107,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(1),
|
||||
rn: vreg(2),
|
||||
rm: vreg(8),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"410C286E",
|
||||
"uqadd v1.16b, v2.16b, v8.16b",
|
||||
@@ -2163,7 +2119,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(1),
|
||||
rn: vreg(12),
|
||||
rm: vreg(28),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"810D7C6E",
|
||||
"uqadd v1.8h, v12.8h, v28.8h",
|
||||
@@ -2175,7 +2131,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(12),
|
||||
rn: vreg(2),
|
||||
rm: vreg(6),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"4C0CA66E",
|
||||
"uqadd v12.4s, v2.4s, v6.4s",
|
||||
@@ -2187,7 +2143,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(20),
|
||||
rn: vreg(7),
|
||||
rm: vreg(13),
|
||||
ty: I64X2,
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"F40CED6E",
|
||||
"uqadd v20.2d, v7.2d, v13.2d",
|
||||
@@ -2199,7 +2155,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(1),
|
||||
rn: vreg(2),
|
||||
rm: vreg(8),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"412C286E",
|
||||
"uqsub v1.16b, v2.16b, v8.16b",
|
||||
@@ -2211,7 +2167,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(1),
|
||||
rn: vreg(12),
|
||||
rm: vreg(28),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"812D7C6E",
|
||||
"uqsub v1.8h, v12.8h, v28.8h",
|
||||
@@ -2223,7 +2179,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(12),
|
||||
rn: vreg(2),
|
||||
rm: vreg(6),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"4C2CA66E",
|
||||
"uqsub v12.4s, v2.4s, v6.4s",
|
||||
@@ -2235,7 +2191,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(20),
|
||||
rn: vreg(7),
|
||||
rm: vreg(13),
|
||||
ty: I64X2,
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"F42CED6E",
|
||||
"uqsub v20.2d, v7.2d, v13.2d",
|
||||
@@ -2247,7 +2203,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(3),
|
||||
rn: vreg(23),
|
||||
rm: vreg(24),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"E38E386E",
|
||||
"cmeq v3.16b, v23.16b, v24.16b",
|
||||
@@ -2259,7 +2215,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(3),
|
||||
rn: vreg(23),
|
||||
rm: vreg(24),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"E336384E",
|
||||
"cmgt v3.16b, v23.16b, v24.16b",
|
||||
@@ -2271,7 +2227,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(23),
|
||||
rn: vreg(9),
|
||||
rm: vreg(12),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"373D2C4E",
|
||||
"cmge v23.16b, v9.16b, v12.16b",
|
||||
@@ -2283,7 +2239,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(5),
|
||||
rn: vreg(1),
|
||||
rm: vreg(1),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"2534216E",
|
||||
"cmhi v5.16b, v1.16b, v1.16b",
|
||||
@@ -2295,7 +2251,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(8),
|
||||
rn: vreg(2),
|
||||
rm: vreg(15),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"483C2F6E",
|
||||
"cmhs v8.16b, v2.16b, v15.16b",
|
||||
@@ -2307,7 +2263,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(3),
|
||||
rn: vreg(23),
|
||||
rm: vreg(24),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"E38E786E",
|
||||
"cmeq v3.8h, v23.8h, v24.8h",
|
||||
@@ -2319,7 +2275,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(3),
|
||||
rn: vreg(23),
|
||||
rm: vreg(24),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"E336784E",
|
||||
"cmgt v3.8h, v23.8h, v24.8h",
|
||||
@@ -2331,7 +2287,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(23),
|
||||
rn: vreg(9),
|
||||
rm: vreg(12),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"373D6C4E",
|
||||
"cmge v23.8h, v9.8h, v12.8h",
|
||||
@@ -2343,7 +2299,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(5),
|
||||
rn: vreg(1),
|
||||
rm: vreg(1),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"2534616E",
|
||||
"cmhi v5.8h, v1.8h, v1.8h",
|
||||
@@ -2355,7 +2311,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(8),
|
||||
rn: vreg(2),
|
||||
rm: vreg(15),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"483C6F6E",
|
||||
"cmhs v8.8h, v2.8h, v15.8h",
|
||||
@@ -2367,7 +2323,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(3),
|
||||
rn: vreg(23),
|
||||
rm: vreg(24),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"E38EB86E",
|
||||
"cmeq v3.4s, v23.4s, v24.4s",
|
||||
@@ -2379,7 +2335,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(3),
|
||||
rn: vreg(23),
|
||||
rm: vreg(24),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"E336B84E",
|
||||
"cmgt v3.4s, v23.4s, v24.4s",
|
||||
@@ -2391,7 +2347,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(23),
|
||||
rn: vreg(9),
|
||||
rm: vreg(12),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"373DAC4E",
|
||||
"cmge v23.4s, v9.4s, v12.4s",
|
||||
@@ -2403,7 +2359,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(5),
|
||||
rn: vreg(1),
|
||||
rm: vreg(1),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"2534A16E",
|
||||
"cmhi v5.4s, v1.4s, v1.4s",
|
||||
@@ -2415,7 +2371,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(8),
|
||||
rn: vreg(2),
|
||||
rm: vreg(15),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"483CAF6E",
|
||||
"cmhs v8.4s, v2.4s, v15.4s",
|
||||
@@ -2427,7 +2383,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(28),
|
||||
rn: vreg(12),
|
||||
rm: vreg(4),
|
||||
ty: F32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"9CE5244E",
|
||||
"fcmeq v28.4s, v12.4s, v4.4s",
|
||||
@@ -2439,7 +2395,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(3),
|
||||
rn: vreg(16),
|
||||
rm: vreg(31),
|
||||
ty: F64X2,
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"03E6FF6E",
|
||||
"fcmgt v3.2d, v16.2d, v31.2d",
|
||||
@@ -2451,7 +2407,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(18),
|
||||
rn: vreg(23),
|
||||
rm: vreg(0),
|
||||
ty: F64X2,
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"F2E6606E",
|
||||
"fcmge v18.2d, v23.2d, v0.2d",
|
||||
@@ -2463,7 +2419,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(20),
|
||||
rn: vreg(19),
|
||||
rm: vreg(18),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"741E324E",
|
||||
"and v20.16b, v19.16b, v18.16b",
|
||||
@@ -2475,7 +2431,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(8),
|
||||
rn: vreg(11),
|
||||
rm: vreg(1),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"681D614E",
|
||||
"bic v8.16b, v11.16b, v1.16b",
|
||||
@@ -2487,7 +2443,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(15),
|
||||
rn: vreg(2),
|
||||
rm: vreg(12),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"4F1CAC4E",
|
||||
"orr v15.16b, v2.16b, v12.16b",
|
||||
@@ -2499,7 +2455,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(18),
|
||||
rn: vreg(3),
|
||||
rm: vreg(22),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"721C366E",
|
||||
"eor v18.16b, v3.16b, v22.16b",
|
||||
@@ -2511,7 +2467,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(8),
|
||||
rn: vreg(9),
|
||||
rm: vreg(1),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"281D616E",
|
||||
"bsl v8.16b, v9.16b, v1.16b",
|
||||
@@ -2523,7 +2479,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(8),
|
||||
rn: vreg(12),
|
||||
rm: vreg(1),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"88A5216E",
|
||||
"umaxp v8.16b, v12.16b, v1.16b",
|
||||
@@ -2535,7 +2491,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(1),
|
||||
rn: vreg(6),
|
||||
rm: vreg(1),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"C1A4616E",
|
||||
"umaxp v1.8h, v6.8h, v1.8h",
|
||||
@@ -2547,7 +2503,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(1),
|
||||
rn: vreg(20),
|
||||
rm: vreg(16),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"81A6B06E",
|
||||
"umaxp v1.4s, v20.4s, v16.4s",
|
||||
@@ -2559,7 +2515,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(5),
|
||||
rn: vreg(1),
|
||||
rm: vreg(1),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"2584214E",
|
||||
"add v5.16b, v1.16b, v1.16b",
|
||||
@@ -2571,7 +2527,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(7),
|
||||
rn: vreg(13),
|
||||
rm: vreg(2),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"A785624E",
|
||||
"add v7.8h, v13.8h, v2.8h",
|
||||
@@ -2583,7 +2539,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(18),
|
||||
rn: vreg(9),
|
||||
rm: vreg(6),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"3285A64E",
|
||||
"add v18.4s, v9.4s, v6.4s",
|
||||
@@ -2595,7 +2551,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(1),
|
||||
rn: vreg(3),
|
||||
rm: vreg(2),
|
||||
ty: I64X2,
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"6184E24E",
|
||||
"add v1.2d, v3.2d, v2.2d",
|
||||
@@ -2607,7 +2563,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(5),
|
||||
rn: vreg(1),
|
||||
rm: vreg(1),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"2584216E",
|
||||
"sub v5.16b, v1.16b, v1.16b",
|
||||
@@ -2619,7 +2575,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(7),
|
||||
rn: vreg(13),
|
||||
rm: vreg(2),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"A785626E",
|
||||
"sub v7.8h, v13.8h, v2.8h",
|
||||
@@ -2631,7 +2587,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(18),
|
||||
rn: vreg(9),
|
||||
rm: vreg(6),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"3285A66E",
|
||||
"sub v18.4s, v9.4s, v6.4s",
|
||||
@@ -2643,7 +2599,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(18),
|
||||
rn: vreg(0),
|
||||
rm: vreg(8),
|
||||
ty: I64X2,
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"1284E86E",
|
||||
"sub v18.2d, v0.2d, v8.2d",
|
||||
@@ -2655,7 +2611,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(25),
|
||||
rn: vreg(9),
|
||||
rm: vreg(8),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"399D284E",
|
||||
"mul v25.16b, v9.16b, v8.16b",
|
||||
@@ -2667,7 +2623,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(30),
|
||||
rn: vreg(30),
|
||||
rm: vreg(12),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"DE9F6C4E",
|
||||
"mul v30.8h, v30.8h, v12.8h",
|
||||
@@ -2679,7 +2635,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(18),
|
||||
rn: vreg(18),
|
||||
rm: vreg(18),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"529EB24E",
|
||||
"mul v18.4s, v18.4s, v18.4s",
|
||||
@@ -2691,7 +2647,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(18),
|
||||
rn: vreg(18),
|
||||
rm: vreg(18),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"5246326E",
|
||||
"ushl v18.16b, v18.16b, v18.16b",
|
||||
@@ -2703,7 +2659,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(18),
|
||||
rn: vreg(18),
|
||||
rm: vreg(18),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"5246726E",
|
||||
"ushl v18.8h, v18.8h, v18.8h",
|
||||
@@ -2715,7 +2671,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(18),
|
||||
rn: vreg(1),
|
||||
rm: vreg(21),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"3244B56E",
|
||||
"ushl v18.4s, v1.4s, v21.4s",
|
||||
@@ -2727,7 +2683,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(5),
|
||||
rn: vreg(7),
|
||||
rm: vreg(19),
|
||||
ty: I64X2,
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"E544F36E",
|
||||
"ushl v5.2d, v7.2d, v19.2d",
|
||||
@@ -2739,7 +2695,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(18),
|
||||
rn: vreg(18),
|
||||
rm: vreg(18),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"5246324E",
|
||||
"sshl v18.16b, v18.16b, v18.16b",
|
||||
@@ -2751,7 +2707,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(30),
|
||||
rn: vreg(1),
|
||||
rm: vreg(29),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"3E447D4E",
|
||||
"sshl v30.8h, v1.8h, v29.8h",
|
||||
@@ -2763,7 +2719,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(8),
|
||||
rn: vreg(22),
|
||||
rm: vreg(21),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"C846B54E",
|
||||
"sshl v8.4s, v22.4s, v21.4s",
|
||||
@@ -2775,7 +2731,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(8),
|
||||
rn: vreg(22),
|
||||
rm: vreg(2),
|
||||
ty: I64X2,
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"C846E24E",
|
||||
"sshl v8.2d, v22.2d, v2.2d",
|
||||
@@ -2786,7 +2742,7 @@ fn test_aarch64_binemit() {
|
||||
op: VecMisc2::Not,
|
||||
rd: writable_vreg(2),
|
||||
rn: vreg(1),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"2258206E",
|
||||
"mvn v2.16b, v1.16b",
|
||||
@@ -2797,7 +2753,7 @@ fn test_aarch64_binemit() {
|
||||
op: VecMisc2::Neg,
|
||||
rd: writable_vreg(8),
|
||||
rn: vreg(12),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"88B9206E",
|
||||
"neg v8.16b, v12.16b",
|
||||
@@ -2808,7 +2764,7 @@ fn test_aarch64_binemit() {
|
||||
op: VecMisc2::Neg,
|
||||
rd: writable_vreg(0),
|
||||
rn: vreg(31),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"E0BB606E",
|
||||
"neg v0.8h, v31.8h",
|
||||
@@ -2819,7 +2775,7 @@ fn test_aarch64_binemit() {
|
||||
op: VecMisc2::Neg,
|
||||
rd: writable_vreg(2),
|
||||
rn: vreg(3),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"62B8A06E",
|
||||
"neg v2.4s, v3.4s",
|
||||
@@ -2830,7 +2786,7 @@ fn test_aarch64_binemit() {
|
||||
op: VecMisc2::Neg,
|
||||
rd: writable_vreg(10),
|
||||
rn: vreg(8),
|
||||
ty: I64X2,
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"0AB9E06E",
|
||||
"neg v10.2d, v8.2d",
|
||||
@@ -2841,7 +2797,7 @@ fn test_aarch64_binemit() {
|
||||
op: VecLanesOp::Uminv,
|
||||
rd: writable_vreg(2),
|
||||
rn: vreg(1),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"22A8316E",
|
||||
"uminv b2, v1.16b",
|
||||
@@ -2852,7 +2808,7 @@ fn test_aarch64_binemit() {
|
||||
op: VecLanesOp::Uminv,
|
||||
rd: writable_vreg(3),
|
||||
rn: vreg(11),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"63A9716E",
|
||||
"uminv h3, v11.8h",
|
||||
@@ -2863,7 +2819,7 @@ fn test_aarch64_binemit() {
|
||||
op: VecLanesOp::Uminv,
|
||||
rd: writable_vreg(18),
|
||||
rn: vreg(4),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"92A8B16E",
|
||||
"uminv s18, v4.4s",
|
||||
@@ -3214,7 +3170,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(1),
|
||||
rn: vreg(30),
|
||||
idx: 2,
|
||||
size: ScalarSize::Size32,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"C107145E",
|
||||
"mov s1, v30.s[2]",
|
||||
@@ -3225,7 +3181,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(23),
|
||||
rn: vreg(11),
|
||||
idx: 0,
|
||||
size: ScalarSize::Size64,
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"7705085E",
|
||||
"mov d23, v11.d[0]",
|
||||
@@ -3443,6 +3399,50 @@ fn test_aarch64_binemit() {
|
||||
"fmin d15, d30, d31",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuRRR {
|
||||
fpu_op: FPUOp2::Uqadd64,
|
||||
rd: writable_vreg(21),
|
||||
rn: vreg(22),
|
||||
rm: vreg(23),
|
||||
},
|
||||
"D50EF77E",
|
||||
"uqadd d21, d22, d23",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuRRR {
|
||||
fpu_op: FPUOp2::Sqadd64,
|
||||
rd: writable_vreg(21),
|
||||
rn: vreg(22),
|
||||
rm: vreg(23),
|
||||
},
|
||||
"D50EF75E",
|
||||
"sqadd d21, d22, d23",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuRRR {
|
||||
fpu_op: FPUOp2::Uqsub64,
|
||||
rd: writable_vreg(21),
|
||||
rn: vreg(22),
|
||||
rm: vreg(23),
|
||||
},
|
||||
"D52EF77E",
|
||||
"uqsub d21, d22, d23",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuRRR {
|
||||
fpu_op: FPUOp2::Sqsub64,
|
||||
rd: writable_vreg(21),
|
||||
rn: vreg(22),
|
||||
rm: vreg(23),
|
||||
},
|
||||
"D52EF75E",
|
||||
"sqsub d21, d22, d23",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuRRRR {
|
||||
fpu_op: FPUOp3::MAdd32,
|
||||
|
||||
@@ -5,8 +5,8 @@
|
||||
|
||||
use crate::binemit::CodeOffset;
|
||||
use crate::ir::types::{
|
||||
B1, B16, B16X8, B32, B32X4, B64, B64X2, B8, B8X16, F32, F32X2, F32X4, F64, F64X2, FFLAGS, I16,
|
||||
I16X4, I16X8, I32, I32X2, I32X4, I64, I64X2, I8, I8X16, I8X8, IFLAGS, R32, R64,
|
||||
B1, B16, B16X8, B32, B32X4, B64, B64X2, B8, B8X16, F32, F32X4, F64, F64X2, FFLAGS, I16, I16X8,
|
||||
I32, I32X4, I64, I64X2, I8, I8X16, IFLAGS, R32, R64,
|
||||
};
|
||||
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type};
|
||||
use crate::machinst::*;
|
||||
@@ -125,6 +125,14 @@ pub enum FPUOp2 {
|
||||
Max64,
|
||||
Min32,
|
||||
Min64,
|
||||
/// Signed saturating add
|
||||
Sqadd64,
|
||||
/// Unsigned saturating add
|
||||
Uqadd64,
|
||||
/// Signed saturating subtract
|
||||
Sqsub64,
|
||||
/// Unsigned saturating subtract
|
||||
Uqsub64,
|
||||
}
|
||||
|
||||
/// A floating-point unit (FPU) operation with two args, a register and an immediate.
|
||||
@@ -208,16 +216,12 @@ pub enum VecExtendOp {
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum VecALUOp {
|
||||
/// Signed saturating add
|
||||
SQAddScalar,
|
||||
Sqadd,
|
||||
/// Unsigned saturating add
|
||||
UQAddScalar,
|
||||
Uqadd,
|
||||
/// Signed saturating subtract
|
||||
SQSubScalar,
|
||||
Sqsub,
|
||||
/// Unsigned saturating subtract
|
||||
UQSubScalar,
|
||||
Uqsub,
|
||||
/// Compare bitwise equal
|
||||
Cmeq,
|
||||
@@ -590,7 +594,7 @@ pub enum Inst {
|
||||
rd: Writable<Reg>,
|
||||
rn: Reg,
|
||||
idx: u8,
|
||||
size: ScalarSize,
|
||||
size: VectorSize,
|
||||
},
|
||||
|
||||
/// 1-op FPU instruction.
|
||||
@@ -734,21 +738,21 @@ pub enum Inst {
|
||||
rd: Writable<Reg>,
|
||||
rn: Reg,
|
||||
idx: u8,
|
||||
ty: Type,
|
||||
size: VectorSize,
|
||||
},
|
||||
|
||||
/// Duplicate general-purpose register to vector.
|
||||
VecDup {
|
||||
rd: Writable<Reg>,
|
||||
rn: Reg,
|
||||
ty: Type,
|
||||
size: VectorSize,
|
||||
},
|
||||
|
||||
/// Duplicate scalar to vector.
|
||||
VecDupFromFpu {
|
||||
rd: Writable<Reg>,
|
||||
rn: Reg,
|
||||
ty: Type,
|
||||
size: VectorSize,
|
||||
},
|
||||
|
||||
/// Vector extend.
|
||||
@@ -764,7 +768,7 @@ pub enum Inst {
|
||||
rd: Writable<Reg>,
|
||||
rn: Reg,
|
||||
rm: Reg,
|
||||
ty: Type,
|
||||
size: VectorSize,
|
||||
},
|
||||
|
||||
/// Vector two register miscellaneous instruction.
|
||||
@@ -772,7 +776,7 @@ pub enum Inst {
|
||||
op: VecMisc2,
|
||||
rd: Writable<Reg>,
|
||||
rn: Reg,
|
||||
ty: Type,
|
||||
size: VectorSize,
|
||||
},
|
||||
|
||||
/// Vector instruction across lanes.
|
||||
@@ -780,7 +784,7 @@ pub enum Inst {
|
||||
op: VecLanesOp,
|
||||
rd: Writable<Reg>,
|
||||
rn: Reg,
|
||||
ty: Type,
|
||||
size: VectorSize,
|
||||
},
|
||||
|
||||
/// Move to the NZCV flags (actually a `MSR NZCV, Xn` insn).
|
||||
@@ -2504,13 +2508,8 @@ impl Inst {
|
||||
format!("mov {}.16b, {}.16b", rd, rn)
|
||||
}
|
||||
&Inst::FpuMoveFromVec { rd, rn, idx, size } => {
|
||||
let vector_type = match size {
|
||||
ScalarSize::Size32 => F32,
|
||||
ScalarSize::Size64 => F64,
|
||||
_ => unimplemented!(),
|
||||
};
|
||||
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
|
||||
let rn = show_vreg_element(rn, mb_rru, idx, vector_type);
|
||||
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size.lane_size());
|
||||
let rn = show_vreg_element(rn, mb_rru, idx, size);
|
||||
format!("mov {}, {}", rd, rn)
|
||||
}
|
||||
&Inst::FpuRR { fpu_op, rd, rn } => {
|
||||
@@ -2542,6 +2541,10 @@ impl Inst {
|
||||
FPUOp2::Max64 => ("fmax", ScalarSize::Size64),
|
||||
FPUOp2::Min32 => ("fmin", ScalarSize::Size32),
|
||||
FPUOp2::Min64 => ("fmin", ScalarSize::Size64),
|
||||
FPUOp2::Sqadd64 => ("sqadd", ScalarSize::Size64),
|
||||
FPUOp2::Uqadd64 => ("uqadd", ScalarSize::Size64),
|
||||
FPUOp2::Sqsub64 => ("sqsub", ScalarSize::Size64),
|
||||
FPUOp2::Uqsub64 => ("uqsub", ScalarSize::Size64),
|
||||
};
|
||||
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
|
||||
let rn = show_vreg_scalar(rn, mb_rru, size);
|
||||
@@ -2557,7 +2560,7 @@ impl Inst {
|
||||
};
|
||||
|
||||
let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>) -> String = if vector {
|
||||
|reg, mb_rru| show_vreg_vector(reg, mb_rru, F32X2)
|
||||
|reg, mb_rru| show_vreg_vector(reg, mb_rru, VectorSize::Size32x2)
|
||||
} else {
|
||||
|reg, mb_rru| show_vreg_scalar(reg, mb_rru, ScalarSize::Size64)
|
||||
};
|
||||
@@ -2706,45 +2709,36 @@ impl Inst {
|
||||
let rn = rn.show_rru(mb_rru);
|
||||
format!("mov {}.d[0], {}", rd, rn)
|
||||
}
|
||||
&Inst::MovFromVec { rd, rn, idx, ty } => {
|
||||
let op = match ty {
|
||||
I32 | I64 => "mov",
|
||||
_ => "umov",
|
||||
&Inst::MovFromVec { rd, rn, idx, size } => {
|
||||
let op = match size {
|
||||
VectorSize::Size8x16 => "umov",
|
||||
VectorSize::Size16x8 => "umov",
|
||||
VectorSize::Size32x4 => "mov",
|
||||
VectorSize::Size64x2 => "mov",
|
||||
_ => unimplemented!(),
|
||||
};
|
||||
let rd = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::from_ty(ty));
|
||||
let rn = show_vreg_element(rn, mb_rru, idx, ty);
|
||||
let rd = show_ireg_sized(rd.to_reg(), mb_rru, size.operand_size());
|
||||
let rn = show_vreg_element(rn, mb_rru, idx, size);
|
||||
format!("{} {}, {}", op, rd, rn)
|
||||
}
|
||||
&Inst::VecDup { rd, rn, ty } => {
|
||||
let vector_type = match ty {
|
||||
I8 => I8X16,
|
||||
I16 => I16X8,
|
||||
I32 => I32X4,
|
||||
I64 => I64X2,
|
||||
_ => unimplemented!(),
|
||||
};
|
||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, vector_type);
|
||||
let rn = show_ireg_sized(rn, mb_rru, OperandSize::from_ty(ty));
|
||||
&Inst::VecDup { rd, rn, size } => {
|
||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
|
||||
let rn = show_ireg_sized(rn, mb_rru, size.operand_size());
|
||||
format!("dup {}, {}", rd, rn)
|
||||
}
|
||||
&Inst::VecDupFromFpu { rd, rn, ty } => {
|
||||
let vector_type = match ty {
|
||||
F32 => F32X4,
|
||||
F64 => F64X2,
|
||||
_ => unimplemented!(),
|
||||
};
|
||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, vector_type);
|
||||
let rn = show_vreg_element(rn, mb_rru, 0, ty);
|
||||
&Inst::VecDupFromFpu { rd, rn, size } => {
|
||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
|
||||
let rn = show_vreg_element(rn, mb_rru, 0, size);
|
||||
format!("dup {}, {}", rd, rn)
|
||||
}
|
||||
&Inst::VecExtend { t, rd, rn } => {
|
||||
let (op, dest, src) = match t {
|
||||
VecExtendOp::Sxtl8 => ("sxtl", I16X8, I8X8),
|
||||
VecExtendOp::Sxtl16 => ("sxtl", I32X4, I16X4),
|
||||
VecExtendOp::Sxtl32 => ("sxtl", I64X2, I32X2),
|
||||
VecExtendOp::Uxtl8 => ("uxtl", I16X8, I8X8),
|
||||
VecExtendOp::Uxtl16 => ("uxtl", I32X4, I16X4),
|
||||
VecExtendOp::Uxtl32 => ("uxtl", I64X2, I32X2),
|
||||
VecExtendOp::Sxtl8 => ("sxtl", VectorSize::Size16x8, VectorSize::Size8x8),
|
||||
VecExtendOp::Sxtl16 => ("sxtl", VectorSize::Size32x4, VectorSize::Size16x4),
|
||||
VecExtendOp::Sxtl32 => ("sxtl", VectorSize::Size64x2, VectorSize::Size32x2),
|
||||
VecExtendOp::Uxtl8 => ("uxtl", VectorSize::Size16x8, VectorSize::Size8x8),
|
||||
VecExtendOp::Uxtl16 => ("uxtl", VectorSize::Size32x4, VectorSize::Size16x4),
|
||||
VecExtendOp::Uxtl32 => ("uxtl", VectorSize::Size64x2, VectorSize::Size32x2),
|
||||
};
|
||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest);
|
||||
let rn = show_vreg_vector(rn, mb_rru, src);
|
||||
@@ -2755,72 +2749,54 @@ impl Inst {
|
||||
rn,
|
||||
rm,
|
||||
alu_op,
|
||||
ty,
|
||||
size,
|
||||
} => {
|
||||
let (op, vector, ty) = match alu_op {
|
||||
VecALUOp::SQAddScalar => ("sqadd", false, ty),
|
||||
VecALUOp::Sqadd => ("sqadd", true, ty),
|
||||
VecALUOp::UQAddScalar => ("uqadd", false, ty),
|
||||
VecALUOp::Uqadd => ("uqadd", true, ty),
|
||||
VecALUOp::SQSubScalar => ("sqsub", false, ty),
|
||||
VecALUOp::Sqsub => ("sqsub", true, ty),
|
||||
VecALUOp::UQSubScalar => ("uqsub", false, ty),
|
||||
VecALUOp::Uqsub => ("uqsub", true, ty),
|
||||
VecALUOp::Cmeq => ("cmeq", true, ty),
|
||||
VecALUOp::Cmge => ("cmge", true, ty),
|
||||
VecALUOp::Cmgt => ("cmgt", true, ty),
|
||||
VecALUOp::Cmhs => ("cmhs", true, ty),
|
||||
VecALUOp::Cmhi => ("cmhi", true, ty),
|
||||
VecALUOp::Fcmeq => ("fcmeq", true, ty),
|
||||
VecALUOp::Fcmgt => ("fcmgt", true, ty),
|
||||
VecALUOp::Fcmge => ("fcmge", true, ty),
|
||||
VecALUOp::And => ("and", true, I8X16),
|
||||
VecALUOp::Bic => ("bic", true, I8X16),
|
||||
VecALUOp::Orr => ("orr", true, I8X16),
|
||||
VecALUOp::Eor => ("eor", true, I8X16),
|
||||
VecALUOp::Bsl => ("bsl", true, I8X16),
|
||||
VecALUOp::Umaxp => ("umaxp", true, ty),
|
||||
VecALUOp::Add => ("add", true, ty),
|
||||
VecALUOp::Sub => ("sub", true, ty),
|
||||
VecALUOp::Mul => ("mul", true, ty),
|
||||
VecALUOp::Sshl => ("sshl", true, ty),
|
||||
VecALUOp::Ushl => ("ushl", true, ty),
|
||||
let (op, size) = match alu_op {
|
||||
VecALUOp::Sqadd => ("sqadd", size),
|
||||
VecALUOp::Uqadd => ("uqadd", size),
|
||||
VecALUOp::Sqsub => ("sqsub", size),
|
||||
VecALUOp::Uqsub => ("uqsub", size),
|
||||
VecALUOp::Cmeq => ("cmeq", size),
|
||||
VecALUOp::Cmge => ("cmge", size),
|
||||
VecALUOp::Cmgt => ("cmgt", size),
|
||||
VecALUOp::Cmhs => ("cmhs", size),
|
||||
VecALUOp::Cmhi => ("cmhi", size),
|
||||
VecALUOp::Fcmeq => ("fcmeq", size),
|
||||
VecALUOp::Fcmgt => ("fcmgt", size),
|
||||
VecALUOp::Fcmge => ("fcmge", size),
|
||||
VecALUOp::And => ("and", VectorSize::Size8x16),
|
||||
VecALUOp::Bic => ("bic", VectorSize::Size8x16),
|
||||
VecALUOp::Orr => ("orr", VectorSize::Size8x16),
|
||||
VecALUOp::Eor => ("eor", VectorSize::Size8x16),
|
||||
VecALUOp::Bsl => ("bsl", VectorSize::Size8x16),
|
||||
VecALUOp::Umaxp => ("umaxp", size),
|
||||
VecALUOp::Add => ("add", size),
|
||||
VecALUOp::Sub => ("sub", size),
|
||||
VecALUOp::Mul => ("mul", size),
|
||||
VecALUOp::Sshl => ("sshl", size),
|
||||
VecALUOp::Ushl => ("ushl", size),
|
||||
};
|
||||
|
||||
let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>, Type) -> String = if vector {
|
||||
|reg, mb_rru, ty| show_vreg_vector(reg, mb_rru, ty)
|
||||
} else {
|
||||
|reg, mb_rru, _ty| show_vreg_scalar(reg, mb_rru, ScalarSize::Size64)
|
||||
};
|
||||
|
||||
let rd = show_vreg_fn(rd.to_reg(), mb_rru, ty);
|
||||
let rn = show_vreg_fn(rn, mb_rru, ty);
|
||||
let rm = show_vreg_fn(rm, mb_rru, ty);
|
||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
|
||||
let rn = show_vreg_vector(rn, mb_rru, size);
|
||||
let rm = show_vreg_vector(rm, mb_rru, size);
|
||||
format!("{} {}, {}, {}", op, rd, rn, rm)
|
||||
}
|
||||
&Inst::VecMisc { op, rd, rn, ty } => {
|
||||
let (op, ty) = match op {
|
||||
VecMisc2::Not => ("mvn", I8X16),
|
||||
VecMisc2::Neg => ("neg", ty),
|
||||
&Inst::VecMisc { op, rd, rn, size } => {
|
||||
let (op, size) = match op {
|
||||
VecMisc2::Not => ("mvn", VectorSize::Size8x16),
|
||||
VecMisc2::Neg => ("neg", size),
|
||||
};
|
||||
|
||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, ty);
|
||||
let rn = show_vreg_vector(rn, mb_rru, ty);
|
||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
|
||||
let rn = show_vreg_vector(rn, mb_rru, size);
|
||||
format!("{} {}, {}", op, rd, rn)
|
||||
}
|
||||
&Inst::VecLanes { op, rd, rn, ty } => {
|
||||
&Inst::VecLanes { op, rd, rn, size } => {
|
||||
let op = match op {
|
||||
VecLanesOp::Uminv => "uminv",
|
||||
};
|
||||
let size = match ty {
|
||||
I8X16 => ScalarSize::Size8,
|
||||
I16X8 => ScalarSize::Size16,
|
||||
I32X4 => ScalarSize::Size32,
|
||||
_ => unimplemented!(),
|
||||
};
|
||||
|
||||
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
|
||||
let rn = show_vreg_vector(rn, mb_rru, ty);
|
||||
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size.lane_size());
|
||||
let rn = show_vreg_vector(rn, mb_rru, size);
|
||||
format!("{} {}, {}", op, rd, rn)
|
||||
}
|
||||
&Inst::MovToNZCV { rn } => {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
//! AArch64 ISA definitions: registers.
|
||||
|
||||
use crate::ir::types::*;
|
||||
use crate::isa::aarch64::inst::OperandSize;
|
||||
use crate::isa::aarch64::inst::ScalarSize;
|
||||
use crate::isa::aarch64::inst::VectorSize;
|
||||
use crate::machinst::*;
|
||||
use crate::settings;
|
||||
|
||||
@@ -307,40 +307,42 @@ pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: Scalar
|
||||
}
|
||||
|
||||
/// Show a vector register.
|
||||
pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, ty: Type) -> String {
|
||||
pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: VectorSize) -> String {
|
||||
assert_eq!(RegClass::V128, reg.get_class());
|
||||
let mut s = reg.show_rru(mb_rru);
|
||||
|
||||
match ty {
|
||||
F32X2 => s.push_str(".2s"),
|
||||
F32X4 => s.push_str(".4s"),
|
||||
F64X2 => s.push_str(".2d"),
|
||||
I8X8 => s.push_str(".8b"),
|
||||
I8X16 => s.push_str(".16b"),
|
||||
I16X4 => s.push_str(".4h"),
|
||||
I16X8 => s.push_str(".8h"),
|
||||
I32X2 => s.push_str(".2s"),
|
||||
I32X4 => s.push_str(".4s"),
|
||||
I64X2 => s.push_str(".2d"),
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
let suffix = match size {
|
||||
VectorSize::Size8x8 => ".8b",
|
||||
VectorSize::Size8x16 => ".16b",
|
||||
VectorSize::Size16x4 => ".4h",
|
||||
VectorSize::Size16x8 => ".8h",
|
||||
VectorSize::Size32x2 => ".2s",
|
||||
VectorSize::Size32x4 => ".4s",
|
||||
VectorSize::Size64x2 => ".2d",
|
||||
};
|
||||
|
||||
s.push_str(suffix);
|
||||
s
|
||||
}
|
||||
|
||||
/// Show an indexed vector element.
|
||||
pub fn show_vreg_element(reg: Reg, mb_rru: Option<&RealRegUniverse>, idx: u8, ty: Type) -> String {
|
||||
pub fn show_vreg_element(
|
||||
reg: Reg,
|
||||
mb_rru: Option<&RealRegUniverse>,
|
||||
idx: u8,
|
||||
size: VectorSize,
|
||||
) -> String {
|
||||
assert_eq!(RegClass::V128, reg.get_class());
|
||||
let mut s = reg.show_rru(mb_rru);
|
||||
|
||||
let suffix = match ty {
|
||||
I8 => "b",
|
||||
I16 => "h",
|
||||
I32 => "s",
|
||||
I64 => "d",
|
||||
F32 => "s",
|
||||
F64 => "d",
|
||||
_ => unimplemented!(),
|
||||
let suffix = match size {
|
||||
VectorSize::Size8x8 => "b",
|
||||
VectorSize::Size8x16 => "b",
|
||||
VectorSize::Size16x4 => "h",
|
||||
VectorSize::Size16x8 => "h",
|
||||
VectorSize::Size32x2 => "s",
|
||||
VectorSize::Size32x4 => "s",
|
||||
VectorSize::Size64x2 => "d",
|
||||
};
|
||||
|
||||
s.push_str(&format!(".{}[{}]", suffix, idx));
|
||||
|
||||
@@ -14,7 +14,7 @@ use crate::ir::Inst as IRInst;
|
||||
use crate::ir::{InstructionData, Opcode, TrapCode, Type};
|
||||
use crate::machinst::lower::*;
|
||||
use crate::machinst::*;
|
||||
use crate::{CodegenError, CodegenResult};
|
||||
use crate::CodegenResult;
|
||||
|
||||
use crate::isa::aarch64::inst::*;
|
||||
use crate::isa::aarch64::AArch64Backend;
|
||||
@@ -736,20 +736,11 @@ pub(crate) fn lower_vector_compare<C: LowerCtx<I = Inst>>(
|
||||
ty: Type,
|
||||
cond: Cond,
|
||||
) -> CodegenResult<()> {
|
||||
match ty {
|
||||
F32X4 | F64X2 | I8X16 | I16X8 | I32X4 => {}
|
||||
_ => {
|
||||
return Err(CodegenError::Unsupported(format!(
|
||||
"unsupported SIMD type: {:?}",
|
||||
ty
|
||||
)));
|
||||
}
|
||||
};
|
||||
|
||||
let is_float = match ty {
|
||||
F32X4 | F64X2 => true,
|
||||
_ => false,
|
||||
};
|
||||
let size = VectorSize::from_ty(ty);
|
||||
// 'Less than' operations are implemented by swapping
|
||||
// the order of operands and using the 'greater than'
|
||||
// instructions.
|
||||
@@ -784,7 +775,7 @@ pub(crate) fn lower_vector_compare<C: LowerCtx<I = Inst>>(
|
||||
rd,
|
||||
rn,
|
||||
rm,
|
||||
ty,
|
||||
size,
|
||||
});
|
||||
|
||||
if cond == Cond::Ne {
|
||||
@@ -792,7 +783,7 @@ pub(crate) fn lower_vector_compare<C: LowerCtx<I = Inst>>(
|
||||
op: VecMisc2::Not,
|
||||
rd,
|
||||
rn: rd.to_reg(),
|
||||
ty: I8X16,
|
||||
size,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -70,7 +70,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
rn,
|
||||
rm,
|
||||
alu_op: VecALUOp::Add,
|
||||
ty,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -89,13 +89,13 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
rn,
|
||||
rm,
|
||||
alu_op: VecALUOp::Sub,
|
||||
ty,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
}
|
||||
}
|
||||
Opcode::UaddSat | Opcode::SaddSat | Opcode::UsubSat | Opcode::SsubSat => {
|
||||
// We use the vector instruction set's saturating adds (UQADD /
|
||||
// SQADD), which require vector registers.
|
||||
// We use the scalar SIMD & FP saturating additions and subtractions
|
||||
// (SQADD / UQADD / SQSUB / UQSUB), which require scalar FP registers.
|
||||
let is_signed = op == Opcode::SaddSat || op == Opcode::SsubSat;
|
||||
let ty = ty.unwrap();
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
@@ -105,11 +105,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
} else {
|
||||
NarrowValueMode::ZeroExtend64
|
||||
};
|
||||
let alu_op = match op {
|
||||
Opcode::UaddSat => VecALUOp::UQAddScalar,
|
||||
Opcode::SaddSat => VecALUOp::SQAddScalar,
|
||||
Opcode::UsubSat => VecALUOp::UQSubScalar,
|
||||
Opcode::SsubSat => VecALUOp::SQSubScalar,
|
||||
let fpu_op = match op {
|
||||
Opcode::UaddSat => FPUOp2::Uqadd64,
|
||||
Opcode::SaddSat => FPUOp2::Sqadd64,
|
||||
Opcode::UsubSat => FPUOp2::Uqsub64,
|
||||
Opcode::SsubSat => FPUOp2::Sqsub64,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let va = ctx.alloc_tmp(RegClass::V128, I128);
|
||||
@@ -118,18 +118,17 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let rb = put_input_in_reg(ctx, inputs[1], narrow_mode);
|
||||
ctx.emit(Inst::MovToVec64 { rd: va, rn: ra });
|
||||
ctx.emit(Inst::MovToVec64 { rd: vb, rn: rb });
|
||||
ctx.emit(Inst::VecRRR {
|
||||
ctx.emit(Inst::FpuRRR {
|
||||
fpu_op,
|
||||
rd: va,
|
||||
rn: va.to_reg(),
|
||||
rm: vb.to_reg(),
|
||||
alu_op,
|
||||
ty: I64,
|
||||
});
|
||||
ctx.emit(Inst::MovFromVec {
|
||||
rd,
|
||||
rn: va.to_reg(),
|
||||
idx: 0,
|
||||
ty: I64,
|
||||
size: VectorSize::Size64x2,
|
||||
});
|
||||
} else {
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
@@ -148,7 +147,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
rn,
|
||||
rm,
|
||||
alu_op,
|
||||
ty,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -167,7 +166,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
op: VecMisc2::Neg,
|
||||
rd,
|
||||
rn,
|
||||
ty,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -192,7 +191,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
rd,
|
||||
rn,
|
||||
rm,
|
||||
ty,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -422,7 +421,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
op: VecMisc2::Not,
|
||||
rd,
|
||||
rn: rm,
|
||||
ty,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -466,7 +465,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
rd,
|
||||
rn,
|
||||
rm,
|
||||
ty,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -495,7 +494,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
ctx.emit(alu_inst_immshift(alu_op, rd, rn, rm));
|
||||
} else {
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
|
||||
let size = VectorSize::from_ty(ty);
|
||||
let (alu_op, is_right_shift) = match op {
|
||||
Opcode::Ishl => (VecALUOp::Sshl, false),
|
||||
Opcode::Ushr => (VecALUOp::Ushl, true),
|
||||
@@ -514,18 +513,14 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
put_input_in_reg(ctx, inputs[1], NarrowValueMode::None)
|
||||
};
|
||||
|
||||
ctx.emit(Inst::VecDup {
|
||||
rd,
|
||||
rn: rm,
|
||||
ty: ty.lane_type(),
|
||||
});
|
||||
ctx.emit(Inst::VecDup { rd, rn: rm, size });
|
||||
|
||||
ctx.emit(Inst::VecRRR {
|
||||
alu_op,
|
||||
rd,
|
||||
rn,
|
||||
rm: rd.to_reg(),
|
||||
ty,
|
||||
size,
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -1167,7 +1162,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
rd,
|
||||
rn,
|
||||
rm,
|
||||
ty,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -1297,7 +1292,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
rd,
|
||||
rn,
|
||||
idx: 0,
|
||||
ty: I64,
|
||||
size: VectorSize::Size64x2,
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -1557,15 +1552,15 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let idx = *imm;
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let size = VectorSize::from_ty(ctx.input_ty(insn, 0));
|
||||
let ty = ty.unwrap();
|
||||
|
||||
if ty_is_int(ty) {
|
||||
ctx.emit(Inst::MovFromVec { rd, rn, idx, ty });
|
||||
ctx.emit(Inst::MovFromVec { rd, rn, idx, size });
|
||||
// Plain moves are faster on some processors.
|
||||
} else if idx == 0 {
|
||||
ctx.emit(Inst::gen_move(rd, rn, ty));
|
||||
} else {
|
||||
let size = ScalarSize::from_ty(ty);
|
||||
ctx.emit(Inst::FpuMoveFromVec { rd, rn, idx, size });
|
||||
}
|
||||
} else {
|
||||
@@ -1576,11 +1571,12 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
Opcode::Splat => {
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
let ty = ctx.input_ty(insn, 0);
|
||||
let inst = if ty_is_int(ty) {
|
||||
Inst::VecDup { rd, rn, ty }
|
||||
let input_ty = ctx.input_ty(insn, 0);
|
||||
let size = VectorSize::from_ty(ty.unwrap());
|
||||
let inst = if ty_is_int(input_ty) {
|
||||
Inst::VecDup { rd, rn, size }
|
||||
} else {
|
||||
Inst::VecDupFromFpu { rd, rn, ty }
|
||||
Inst::VecDupFromFpu { rd, rn, size }
|
||||
};
|
||||
ctx.emit(inst);
|
||||
}
|
||||
@@ -1598,21 +1594,22 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
// cmp xm, #0
|
||||
// cset xm, ne
|
||||
|
||||
let input_ty = ctx.input_ty(insn, 0);
|
||||
let size = VectorSize::from_ty(ctx.input_ty(insn, 0));
|
||||
|
||||
if op == Opcode::VanyTrue {
|
||||
ctx.emit(Inst::VecRRR {
|
||||
alu_op: VecALUOp::Umaxp,
|
||||
rd: tmp,
|
||||
rn: rm,
|
||||
rm: rm,
|
||||
ty: input_ty,
|
||||
size,
|
||||
});
|
||||
} else {
|
||||
ctx.emit(Inst::VecLanes {
|
||||
op: VecLanesOp::Uminv,
|
||||
rd: tmp,
|
||||
rn: rm,
|
||||
ty: input_ty,
|
||||
size,
|
||||
});
|
||||
};
|
||||
|
||||
@@ -1620,7 +1617,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
rd,
|
||||
rn: tmp.to_reg(),
|
||||
idx: 0,
|
||||
ty: I64,
|
||||
size: VectorSize::Size64x2,
|
||||
});
|
||||
|
||||
ctx.emit(Inst::AluRRImm12 {
|
||||
@@ -2136,6 +2133,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
| Opcode::X86Insertps
|
||||
| Opcode::X86Movsd
|
||||
| Opcode::X86Movlhps
|
||||
| Opcode::X86Palignr
|
||||
| Opcode::X86Psll
|
||||
| Opcode::X86Psrl
|
||||
| Opcode::X86Psra
|
||||
@@ -2156,7 +2154,12 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
|
||||
Opcode::AvgRound => unimplemented!(),
|
||||
Opcode::Iabs => unimplemented!(),
|
||||
Opcode::Snarrow | Opcode::Unarrow => unimplemented!(),
|
||||
Opcode::Snarrow
|
||||
| Opcode::Unarrow
|
||||
| Opcode::SwidenLow
|
||||
| Opcode::SwidenHigh
|
||||
| Opcode::UwidenLow
|
||||
| Opcode::UwidenHigh => unimplemented!(),
|
||||
Opcode::TlsValue => unimplemented!(),
|
||||
}
|
||||
|
||||
|
||||
@@ -121,8 +121,12 @@ pub fn lookup(triple: Triple) -> Result<Builder, LookupError> {
|
||||
match triple.architecture {
|
||||
Architecture::Riscv32 | Architecture::Riscv64 => isa_builder!(riscv, "riscv", triple),
|
||||
Architecture::I386 | Architecture::I586 | Architecture::I686 | Architecture::X86_64 => {
|
||||
if cfg!(feature = "x64") {
|
||||
isa_builder!(x64, "x64", triple)
|
||||
} else {
|
||||
isa_builder!(x86, "x86", triple)
|
||||
}
|
||||
}
|
||||
Architecture::Arm { .. } => isa_builder!(arm32, "arm32", triple),
|
||||
Architecture::Aarch64 { .. } => isa_builder!(aarch64, "arm64", triple),
|
||||
_ => Err(LookupError::Unsupported),
|
||||
|
||||
@@ -11,28 +11,33 @@ use crate::isa::Builder as IsaBuilder;
|
||||
use crate::machinst::pretty_print::ShowWithRRU;
|
||||
use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode};
|
||||
use crate::result::CodegenResult;
|
||||
use crate::settings::{self, Flags};
|
||||
use crate::settings::{self as shared_settings, Flags};
|
||||
|
||||
use crate::isa::x64::inst::regs::create_reg_universe_systemv;
|
||||
use crate::isa::x64::{inst::regs::create_reg_universe_systemv, settings as x64_settings};
|
||||
|
||||
use super::TargetIsa;
|
||||
|
||||
mod abi;
|
||||
mod inst;
|
||||
mod lower;
|
||||
mod settings;
|
||||
|
||||
/// An X64 backend.
|
||||
pub(crate) struct X64Backend {
|
||||
triple: Triple,
|
||||
flags: Flags,
|
||||
_x64_flags: x64_settings::Flags,
|
||||
reg_universe: RealRegUniverse,
|
||||
}
|
||||
|
||||
impl X64Backend {
|
||||
/// Create a new X64 backend with the given (shared) flags.
|
||||
fn new_with_flags(triple: Triple, flags: Flags) -> Self {
|
||||
fn new_with_flags(triple: Triple, flags: Flags, x64_flags: x64_settings::Flags) -> Self {
|
||||
let reg_universe = create_reg_universe_systemv(&flags);
|
||||
Self {
|
||||
triple,
|
||||
flags,
|
||||
_x64_flags: x64_flags,
|
||||
reg_universe,
|
||||
}
|
||||
}
|
||||
@@ -103,10 +108,17 @@ impl MachBackend for X64Backend {
|
||||
pub(crate) fn isa_builder(triple: Triple) -> IsaBuilder {
|
||||
IsaBuilder {
|
||||
triple,
|
||||
setup: settings::builder(),
|
||||
constructor: |triple: Triple, flags: Flags, _arch_flag_builder: settings::Builder| {
|
||||
let backend = X64Backend::new_with_flags(triple, flags);
|
||||
Box::new(TargetIsaAdapter::new(backend))
|
||||
},
|
||||
setup: x64_settings::builder(),
|
||||
constructor: isa_constructor,
|
||||
}
|
||||
}
|
||||
|
||||
fn isa_constructor(
|
||||
triple: Triple,
|
||||
shared_flags: Flags,
|
||||
builder: shared_settings::Builder,
|
||||
) -> Box<dyn TargetIsa> {
|
||||
let isa_flags = x64_settings::Flags::new(&shared_flags, builder);
|
||||
let backend = X64Backend::new_with_flags(triple, shared_flags, isa_flags);
|
||||
Box::new(TargetIsaAdapter::new(backend))
|
||||
}
|
||||
|
||||
9
cranelift/codegen/src/isa/x64/settings.rs
Normal file
9
cranelift/codegen/src/isa/x64/settings.rs
Normal file
@@ -0,0 +1,9 @@
|
||||
//! x86 Settings.
|
||||
|
||||
use crate::settings::{self, detail, Builder};
|
||||
use core::fmt;
|
||||
|
||||
// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a
|
||||
// public `Flags` struct with an impl for all of the settings defined in
|
||||
// `cranelift-codegen/meta/src/isa/x86/settings.rs`.
|
||||
include!(concat!(env!("OUT_DIR"), "/settings-x86.rs"));
|
||||
@@ -57,20 +57,12 @@ fn isa_constructor(
|
||||
|
||||
let isa_flags = settings::Flags::new(&shared_flags, builder);
|
||||
|
||||
if isa_flags.use_new_backend() {
|
||||
#[cfg(not(feature = "x64"))]
|
||||
panic!("new backend x86 support not included by cargo features!");
|
||||
|
||||
#[cfg(feature = "x64")]
|
||||
super::x64::isa_builder(triple).finish(shared_flags)
|
||||
} else {
|
||||
Box::new(Isa {
|
||||
triple,
|
||||
isa_flags,
|
||||
shared_flags,
|
||||
cpumode: level1,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl TargetIsa for Isa {
|
||||
|
||||
@@ -19,10 +19,24 @@ use crate::flowgraph::ControlFlowGraph;
|
||||
use crate::ir::types::{I32, I64};
|
||||
use crate::ir::{self, InstBuilder, MemFlags};
|
||||
use crate::isa::TargetIsa;
|
||||
|
||||
#[cfg(any(
|
||||
feature = "x86",
|
||||
feature = "arm32",
|
||||
feature = "arm64",
|
||||
feature = "riscv"
|
||||
))]
|
||||
use crate::predicates;
|
||||
#[cfg(any(
|
||||
feature = "x86",
|
||||
feature = "arm32",
|
||||
feature = "arm64",
|
||||
feature = "riscv"
|
||||
))]
|
||||
use alloc::vec::Vec;
|
||||
|
||||
use crate::timing;
|
||||
use alloc::collections::BTreeSet;
|
||||
use alloc::vec::Vec;
|
||||
|
||||
mod boundary;
|
||||
mod call;
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
test binemit
|
||||
target x86_64
|
||||
set enable_simd
|
||||
target x86_64 nehalem
|
||||
|
||||
; Ensure raw_bitcast emits no instructions.
|
||||
function %raw_bitcast_i16x8_to_b32x4() {
|
||||
@@ -10,8 +11,16 @@ block0:
|
||||
return
|
||||
}
|
||||
|
||||
function %fcvt_32(i32x4) {
|
||||
block0(v0: i32x4 [%xmm6]):
|
||||
[-, %xmm2] v1 = fcvt_from_sint.f32x4 v0 ; bin: 40 0f 5b d6
|
||||
function %conversions_i32x4(i32x4, i32x4) {
|
||||
block0(v0: i32x4 [%xmm6], v1: i32x4 [%xmm4]):
|
||||
[-, %xmm2] v2 = fcvt_from_sint.f32x4 v0 ; bin: 40 0f 5b d6
|
||||
[-, %xmm6] v3 = x86_palignr v0, v1, 3 ; bin: 66 0f 3a 0f f4 03
|
||||
return
|
||||
}
|
||||
|
||||
function %conversions_i16x8(i16x8) {
|
||||
block0(v0: i16x8 [%xmm6]):
|
||||
[-, %xmm2] v1 = swiden_low v0 ; bin: 66 0f 38 23 d6
|
||||
[-, %xmm11] v2 = uwiden_low v0 ; bin: 66 44 0f 38 33 de
|
||||
return
|
||||
}
|
||||
|
||||
@@ -52,3 +52,19 @@ block0(v0:f32x4):
|
||||
; nextln: v1 = iadd v12, v11
|
||||
return v1
|
||||
}
|
||||
|
||||
function %uwiden_high(i8x16) -> i16x8 {
|
||||
block0(v0: i8x16):
|
||||
v1 = uwiden_high v0
|
||||
; check: v2 = x86_palignr v0, v0, 8
|
||||
; nextln: v1 = uwiden_low v2
|
||||
return v1
|
||||
}
|
||||
|
||||
function %swiden_high(i16x8) -> i32x4 {
|
||||
block0(v0: i16x8):
|
||||
v1 = swiden_high v0
|
||||
; check: v2 = x86_palignr v0, v0, 8
|
||||
; nextln: v1 = swiden_low v2
|
||||
return v1
|
||||
}
|
||||
|
||||
@@ -1582,17 +1582,39 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
||||
let (a, b) = pop2_with_bitcast(state, I32X4, builder);
|
||||
state.push1(builder.ins().unarrow(a, b))
|
||||
}
|
||||
Operator::I16x8WidenLowI8x16S { .. }
|
||||
| Operator::I16x8WidenHighI8x16S { .. }
|
||||
| Operator::I16x8WidenLowI8x16U { .. }
|
||||
| Operator::I16x8WidenHighI8x16U { .. }
|
||||
| Operator::I32x4WidenLowI16x8S { .. }
|
||||
| Operator::I32x4WidenHighI16x8S { .. }
|
||||
| Operator::I32x4WidenLowI16x8U { .. }
|
||||
| Operator::I32x4WidenHighI16x8U { .. }
|
||||
| Operator::I8x16Bitmask
|
||||
| Operator::I16x8Bitmask
|
||||
| Operator::I32x4Bitmask => {
|
||||
Operator::I16x8WidenLowI8x16S => {
|
||||
let a = pop1_with_bitcast(state, I8X16, builder);
|
||||
state.push1(builder.ins().swiden_low(a))
|
||||
}
|
||||
Operator::I16x8WidenHighI8x16S => {
|
||||
let a = pop1_with_bitcast(state, I8X16, builder);
|
||||
state.push1(builder.ins().swiden_high(a))
|
||||
}
|
||||
Operator::I16x8WidenLowI8x16U => {
|
||||
let a = pop1_with_bitcast(state, I8X16, builder);
|
||||
state.push1(builder.ins().uwiden_low(a))
|
||||
}
|
||||
Operator::I16x8WidenHighI8x16U => {
|
||||
let a = pop1_with_bitcast(state, I8X16, builder);
|
||||
state.push1(builder.ins().uwiden_high(a))
|
||||
}
|
||||
Operator::I32x4WidenLowI16x8S => {
|
||||
let a = pop1_with_bitcast(state, I16X8, builder);
|
||||
state.push1(builder.ins().swiden_low(a))
|
||||
}
|
||||
Operator::I32x4WidenHighI16x8S => {
|
||||
let a = pop1_with_bitcast(state, I16X8, builder);
|
||||
state.push1(builder.ins().swiden_high(a))
|
||||
}
|
||||
Operator::I32x4WidenLowI16x8U => {
|
||||
let a = pop1_with_bitcast(state, I16X8, builder);
|
||||
state.push1(builder.ins().uwiden_low(a))
|
||||
}
|
||||
Operator::I32x4WidenHighI16x8U => {
|
||||
let a = pop1_with_bitcast(state, I16X8, builder);
|
||||
state.push1(builder.ins().uwiden_high(a))
|
||||
}
|
||||
Operator::I8x16Bitmask | Operator::I16x8Bitmask | Operator::I32x4Bitmask => {
|
||||
return Err(wasm_unsupported!("proposed SIMD operator {:?}", op));
|
||||
}
|
||||
|
||||
|
||||
@@ -515,8 +515,7 @@ typedef own wasm_trap_t* (*wasmtime_func_callback_t)(const wasmtime_caller_t* ca
|
||||
*
|
||||
* This function is the same as #wasm_func_callback_with_env_t except that its
|
||||
* first argument is a #wasmtime_caller_t which allows learning information
|
||||
* about the
|
||||
* caller.
|
||||
* about the caller.
|
||||
*/
|
||||
typedef own wasm_trap_t* (*wasmtime_func_callback_with_env_t)(const wasmtime_caller_t* caller, void* env, const wasm_val_t args[], wasm_val_t results[]);
|
||||
|
||||
@@ -544,6 +543,28 @@ WASM_API_EXTERN own wasm_func_t* wasmtime_func_new_with_env(
|
||||
void (*finalizer)(void*)
|
||||
);
|
||||
|
||||
/**
|
||||
* \brief Creates a new `funcref` value referencing `func`.
|
||||
*
|
||||
* Create a `funcref` value that references `func` and writes it to `funcrefp`.
|
||||
*
|
||||
* Gives ownership fo the `funcref` value written to `funcrefp`.
|
||||
*
|
||||
* Both `func` and `funcrefp` must not be NULL.
|
||||
*/
|
||||
WASM_API_EXTERN void wasmtime_func_as_funcref(const wasm_func_t* func, wasm_val_t* funcrefp);
|
||||
|
||||
/**
|
||||
* \brief Get the `wasm_func_t*` referenced by the given `funcref` value.
|
||||
*
|
||||
* Gets an owning handle to the `wasm_func_t*` that the given `funcref` value is
|
||||
* referencing. Returns NULL if the value is not a `funcref`, or if the value is
|
||||
* a null function reference.
|
||||
*
|
||||
* The `val` pointer must not be NULL.
|
||||
*/
|
||||
WASM_API_EXTERN own wasm_func_t* wasmtime_funcref_as_func(const wasm_val_t* val);
|
||||
|
||||
/**
|
||||
* \brief Loads a #wasm_extern_t from the caller's context
|
||||
*
|
||||
@@ -740,7 +761,7 @@ WASM_API_EXTERN own wasmtime_error_t *wasmtime_instance_new(
|
||||
* returned error and module are owned by the caller.
|
||||
*/
|
||||
WASM_API_EXTERN own wasmtime_error_t *wasmtime_module_new(
|
||||
wasm_store_t *store,
|
||||
wasm_engine_t *engine,
|
||||
const wasm_byte_vec_t *binary,
|
||||
own wasm_module_t **ret
|
||||
);
|
||||
@@ -845,8 +866,10 @@ WASM_API_EXTERN wasmtime_error_t *wasmtime_funcref_table_grow(
|
||||
* This function does not take an associated finalizer to clean up the data when
|
||||
* the reference is reclaimed. If you need a finalizer to clean up the data,
|
||||
* then use #wasmtime_externref_new_with_finalizer.
|
||||
*
|
||||
* Gives ownership of the newly created `externref` value.
|
||||
*/
|
||||
WASM_API_EXTERN void wasmtime_externref_new(void *data, wasm_val_t *valp);
|
||||
WASM_API_EXTERN void wasmtime_externref_new(own void *data, wasm_val_t *valp);
|
||||
|
||||
/**
|
||||
* \brief A finalizer for an `externref`'s wrapped data.
|
||||
@@ -866,9 +889,11 @@ typedef void (*wasmtime_externref_finalizer_t)(void*);
|
||||
* When the reference is reclaimed, the wrapped data is cleaned up with the
|
||||
* provided finalizer. If you do not need to clean up the wrapped data, then use
|
||||
* #wasmtime_externref_new.
|
||||
*
|
||||
* Gives ownership of the newly created `externref` value.
|
||||
*/
|
||||
WASM_API_EXTERN void wasmtime_externref_new_with_finalizer(
|
||||
void *data,
|
||||
own void *data,
|
||||
wasmtime_externref_finalizer_t finalizer,
|
||||
wasm_val_t *valp
|
||||
);
|
||||
@@ -887,7 +912,8 @@ WASM_API_EXTERN void wasmtime_externref_new_with_finalizer(
|
||||
* If the given value is not an `externref`, returns `false` and leaves `datap`
|
||||
* unmodified.
|
||||
*
|
||||
* Does not take ownership of `val`.
|
||||
* Does not take ownership of `val`. Does not give up ownership of the `void*`
|
||||
* data written to `datap`.
|
||||
*
|
||||
* Both `val` and `datap` must not be `NULL`.
|
||||
*/
|
||||
|
||||
@@ -6,7 +6,7 @@ use std::mem::MaybeUninit;
|
||||
use std::panic::{self, AssertUnwindSafe};
|
||||
use std::ptr;
|
||||
use std::str;
|
||||
use wasmtime::{Caller, Extern, Func, Trap};
|
||||
use wasmtime::{Caller, Extern, Func, Trap, Val};
|
||||
|
||||
#[derive(Clone)]
|
||||
#[repr(transparent)]
|
||||
@@ -275,3 +275,21 @@ pub extern "C" fn wasmtime_caller_export_get(
|
||||
let which = caller.caller.get_export(name)?;
|
||||
Some(Box::new(wasm_extern_t { which }))
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn wasmtime_func_as_funcref(
|
||||
func: &wasm_func_t,
|
||||
funcrefp: &mut MaybeUninit<wasm_val_t>,
|
||||
) {
|
||||
let funcref = wasm_val_t::from_val(Val::FuncRef(Some(func.func().clone())));
|
||||
crate::initialize(funcrefp, funcref);
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn wasmtime_funcref_as_func(val: &wasm_val_t) -> Option<Box<wasm_func_t>> {
|
||||
if let Val::FuncRef(Some(f)) = val.val() {
|
||||
Some(Box::new(f.into()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use crate::{
|
||||
handle_result, wasm_byte_vec_t, wasm_exporttype_t, wasm_exporttype_vec_t, wasm_importtype_t,
|
||||
wasm_importtype_vec_t, wasm_store_t, wasmtime_error_t,
|
||||
handle_result, wasm_byte_vec_t, wasm_engine_t, wasm_exporttype_t, wasm_exporttype_vec_t,
|
||||
wasm_importtype_t, wasm_importtype_vec_t, wasm_store_t, wasmtime_error_t,
|
||||
};
|
||||
use std::ptr;
|
||||
use wasmtime::{Engine, Module};
|
||||
@@ -29,7 +29,10 @@ pub extern "C" fn wasm_module_new(
|
||||
binary: &wasm_byte_vec_t,
|
||||
) -> Option<Box<wasm_module_t>> {
|
||||
let mut ret = ptr::null_mut();
|
||||
match wasmtime_module_new(store, binary, &mut ret) {
|
||||
let engine = wasm_engine_t {
|
||||
engine: store.store.engine().clone(),
|
||||
};
|
||||
match wasmtime_module_new(&engine, binary, &mut ret) {
|
||||
Some(_err) => None,
|
||||
None => {
|
||||
assert!(!ret.is_null());
|
||||
@@ -40,13 +43,12 @@ pub extern "C" fn wasm_module_new(
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn wasmtime_module_new(
|
||||
store: &wasm_store_t,
|
||||
engine: &wasm_engine_t,
|
||||
binary: &wasm_byte_vec_t,
|
||||
ret: &mut *mut wasm_module_t,
|
||||
) -> Option<Box<wasmtime_error_t>> {
|
||||
let binary = binary.as_slice();
|
||||
let store = &store.store;
|
||||
handle_result(Module::from_binary(store.engine(), binary), |module| {
|
||||
handle_result(Module::from_binary(&engine.engine, binary), |module| {
|
||||
let imports = module
|
||||
.imports()
|
||||
.map(|i| wasm_importtype_t::new(i.module().to_owned(), i.name().to_owned(), i.ty()))
|
||||
|
||||
@@ -91,7 +91,7 @@ pub extern "C" fn wasm_table_get(
|
||||
index: wasm_table_size_t,
|
||||
) -> Option<Box<wasm_ref_t>> {
|
||||
let val = t.table().get(index)?;
|
||||
Some(val_into_ref(val).unwrap())
|
||||
val_into_ref(val)
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
|
||||
@@ -26,7 +26,9 @@ impl Drop for wasm_val_t {
|
||||
fn drop(&mut self) {
|
||||
match into_valtype(self.kind) {
|
||||
ValType::ExternRef => unsafe {
|
||||
if !self.of.ref_.is_null() {
|
||||
drop(Box::from_raw(self.of.ref_));
|
||||
}
|
||||
},
|
||||
_ => {}
|
||||
}
|
||||
@@ -116,7 +118,20 @@ impl wasm_val_t {
|
||||
ValType::I64 => Val::from(unsafe { self.of.i64 }),
|
||||
ValType::F32 => Val::from(unsafe { self.of.f32 }),
|
||||
ValType::F64 => Val::from(unsafe { self.of.f64 }),
|
||||
ValType::ExternRef | ValType::FuncRef => ref_to_val(unsafe { &*self.of.ref_ }),
|
||||
ValType::ExternRef => unsafe {
|
||||
if self.of.ref_.is_null() {
|
||||
Val::ExternRef(None)
|
||||
} else {
|
||||
ref_to_val(&*self.of.ref_)
|
||||
}
|
||||
},
|
||||
ValType::FuncRef => unsafe {
|
||||
if self.of.ref_.is_null() {
|
||||
Val::FuncRef(None)
|
||||
} else {
|
||||
ref_to_val(&*self.of.ref_)
|
||||
}
|
||||
},
|
||||
_ => unimplemented!("wasm_val_t::val {:?}", self.kind),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -98,12 +98,13 @@ fn apply_reloc(
|
||||
write_unaligned(reloc_address as *mut u32, reloc_delta_u32);
|
||||
},
|
||||
#[cfg(target_pointer_width = "64")]
|
||||
(RelocationKind::Relative, RelocationEncoding::X86Branch, 32) => unsafe {
|
||||
(RelocationKind::Relative, RelocationEncoding::Generic, 32) => unsafe {
|
||||
let reloc_address = body.add(offset as usize) as usize;
|
||||
let reloc_addend = r.addend() as isize;
|
||||
let reloc_delta_u64 = (target_func_address as u64)
|
||||
.wrapping_sub(reloc_address as u64)
|
||||
.wrapping_add(reloc_addend as u64);
|
||||
// TODO implement far calls mode in x64 new backend.
|
||||
assert!(
|
||||
reloc_delta_u64 as isize <= i32::max_value() as isize,
|
||||
"relocation too large to fit in i32"
|
||||
|
||||
@@ -10,6 +10,7 @@ snapshot of what the current state of the world looks like.
|
||||
All features of `wasmtime` should work on the following platforms:
|
||||
|
||||
* Linux x86\_64
|
||||
* Linux aarch64
|
||||
* macOS x86\_64
|
||||
* Windows x86\_64
|
||||
|
||||
@@ -18,9 +19,8 @@ sections below!
|
||||
|
||||
## JIT compiler support
|
||||
|
||||
The JIT compiler, backed by either `lightbeam` or `cranelift` supports only the
|
||||
x86\_64 architecture at this time. Support for at least ARM, AArch64, and x86 is
|
||||
planned at this time.
|
||||
The JIT compiler, backed by Cranelift, supports the x86\_64 and aarch64
|
||||
architectures at this time. Support for at least ARM and x86 is planned as well.
|
||||
|
||||
Usage of the JIT compiler will require a host operating system which supports
|
||||
creating executable memory pages on-the-fly. In Rust terms this generally means
|
||||
@@ -39,5 +39,6 @@ much else will be needed.
|
||||
The `wasmtime` project does not currently use `#[no_std]` for its crates, but
|
||||
this is not because it won't support it! At this time we're still gathering use
|
||||
cases for for what `#[no_std]` might entail, so if you're interested in this
|
||||
we'd love to hear about your use case! Feel free to open an issue on the
|
||||
we'd love to hear about your use case! Feel free to [open an
|
||||
issue](https://github.com/bytecodealliance/wasmtime/issues/new) on the
|
||||
`wasmtime` repository to discuss this.
|
||||
|
||||
@@ -66,7 +66,7 @@ int main() {
|
||||
// Now that we've got our binary webassembly we can compile our module.
|
||||
printf("Compiling module...\n");
|
||||
wasm_module_t *module = NULL;
|
||||
error = wasmtime_module_new(store, &wasm, &module);
|
||||
error = wasmtime_module_new(engine, &wasm, &module);
|
||||
wasm_byte_vec_delete(&wasm);
|
||||
if (error != NULL)
|
||||
exit_with_error("failed to compile module", error, NULL);
|
||||
|
||||
@@ -43,7 +43,7 @@ int main(int argc, const char* argv[]) {
|
||||
// Compile.
|
||||
printf("Compiling module...\n");
|
||||
wasm_module_t *module = NULL;
|
||||
wasmtime_error_t* error = wasmtime_module_new(store, &binary, &module);
|
||||
wasmtime_error_t* error = wasmtime_module_new(engine, &binary, &module);
|
||||
if (!module)
|
||||
exit_with_error("failed to compile module", error, NULL);
|
||||
wasm_byte_vec_delete(&binary);
|
||||
|
||||
@@ -59,7 +59,7 @@ int main() {
|
||||
|
||||
// Compile and instantiate our module
|
||||
wasm_module_t *module = NULL;
|
||||
error = wasmtime_module_new(store, &wasm, &module);
|
||||
error = wasmtime_module_new(engine, &wasm, &module);
|
||||
if (module == NULL)
|
||||
exit_with_error("failed to compile module", error, NULL);
|
||||
wasm_byte_vec_delete(&wasm);
|
||||
|
||||
@@ -67,7 +67,7 @@ int main() {
|
||||
// Now that we've got our binary webassembly we can compile our module.
|
||||
printf("Compiling module...\n");
|
||||
wasm_module_t *module = NULL;
|
||||
error = wasmtime_module_new(store, &wasm, &module);
|
||||
error = wasmtime_module_new(engine, &wasm, &module);
|
||||
wasm_byte_vec_delete(&wasm);
|
||||
if (error != NULL)
|
||||
exit_with_error("failed to compile module", error, NULL);
|
||||
|
||||
@@ -67,7 +67,7 @@ int main() {
|
||||
// Now that we've got our binary webassembly we can compile our module.
|
||||
printf("Compiling module...\n");
|
||||
wasm_module_t *module = NULL;
|
||||
error = wasmtime_module_new(store, &wasm, &module);
|
||||
error = wasmtime_module_new(engine, &wasm, &module);
|
||||
wasm_byte_vec_delete(&wasm);
|
||||
if (error != NULL)
|
||||
exit_with_error("failed to compile module", error, NULL);
|
||||
|
||||
@@ -89,7 +89,7 @@ int main() {
|
||||
wasm_module_t *module = NULL;
|
||||
wasm_trap_t *trap = NULL;
|
||||
wasm_instance_t *instance = NULL;
|
||||
error = wasmtime_module_new(store, &wasm, &module);
|
||||
error = wasmtime_module_new(engine, &wasm, &module);
|
||||
wasm_byte_vec_delete(&wasm);
|
||||
if (error != NULL)
|
||||
exit_with_error("failed to compile module", error, NULL);
|
||||
|
||||
@@ -45,10 +45,10 @@ int main() {
|
||||
wasmtime_error_t *error;
|
||||
wasm_module_t *linking1_module = NULL;
|
||||
wasm_module_t *linking2_module = NULL;
|
||||
error = wasmtime_module_new(store, &linking1_wasm, &linking1_module);
|
||||
error = wasmtime_module_new(engine, &linking1_wasm, &linking1_module);
|
||||
if (error != NULL)
|
||||
exit_with_error("failed to compile linking1", error, NULL);
|
||||
error = wasmtime_module_new(store, &linking2_wasm, &linking2_module);
|
||||
error = wasmtime_module_new(engine, &linking2_wasm, &linking2_module);
|
||||
if (error != NULL)
|
||||
exit_with_error("failed to compile linking2", error, NULL);
|
||||
wasm_byte_vec_delete(&linking1_wasm);
|
||||
|
||||
@@ -158,7 +158,7 @@ int main(int argc, const char* argv[]) {
|
||||
// Compile.
|
||||
printf("Compiling module...\n");
|
||||
wasm_module_t* module = NULL;
|
||||
error = wasmtime_module_new(store, &binary, &module);
|
||||
error = wasmtime_module_new(engine, &binary, &module);
|
||||
if (error)
|
||||
exit_with_error("failed to compile module", error, NULL);
|
||||
wasm_byte_vec_delete(&binary);
|
||||
|
||||
@@ -91,7 +91,7 @@ int main(int argc, const char* argv[]) {
|
||||
// Compile.
|
||||
printf("Compiling module...\n");
|
||||
wasm_module_t* module = NULL;
|
||||
error = wasmtime_module_new(store, &binary, &module);
|
||||
error = wasmtime_module_new(engine, &binary, &module);
|
||||
if (error)
|
||||
exit_with_error("failed to compile module", error, NULL);
|
||||
|
||||
|
||||
@@ -54,7 +54,7 @@ int main() {
|
||||
|
||||
// Compile our modules
|
||||
wasm_module_t *module = NULL;
|
||||
wasmtime_error_t *error = wasmtime_module_new(store, &wasm, &module);
|
||||
wasmtime_error_t *error = wasmtime_module_new(engine, &wasm, &module);
|
||||
if (!module)
|
||||
exit_with_error("failed to compile module", error, NULL);
|
||||
wasm_byte_vec_delete(&wasm);
|
||||
|
||||
Reference in New Issue
Block a user